#!/usr/bin/perl -l
use strict;
use warnings;
use LWP::Simple;
use Data::Dumper;
$| = 1; # flush on print so we can tail
print join '|', ('coords',
'ID',
'URL',
'Name',
'City',
'State',
'Loc Label',
'Loc Addr Middle',
'Loc Addr Last',
'Loc CSZ',
'Apps This Year',
'Apps Next Year',
'Program Length',
'Number 1st year spots',
'Avg hrs/wk 1st year',
'Max consec hours 1st year',
'Moonlighting allowed',
'Salary 1st year',
'Program Type');
while (<>) {
next unless m{searchByPgmNbr&pgmNumber=(\d+)\">(.*?)};
#### initial data from summary page
# id, url, name
my @data = ('0,0', $1, make_url($1), $2);
#city
my $line = <>;
chomp $line;
die $line unless ($line =~ m{
(.*?),});
push @data, $1;
#state
$line = <>;
chomp $line;
die $line unless ($line =~ m{\s+(.*?)\s+[\d-]+
});
push @data, $1;
# rest of data from summary
my $count = 0;
while ($line = <>) {
next if ( $line =~ m/\<|\>/ );
next if ( $line =~ m/^\s*$/ );
$line =~ s/\s+/ /g;
$count++;
push @data, $line;
last if (9 == $count);
}
# splice in real address from detail page
splice @data, 6, 0, get_details($data[1]);
print join '|', @data;
}
sub make_url {
my $id = shift;
return "https://freida.ama-assn.org/Freida/user/programSearchDispatch.do?method=searchByPgmNbr&pgmNumber=$id";
}
sub get_details {
my $id = shift;
my $details = get(make_url($id));
if (defined $details and
$details =~ m{(([^<]+
)+[^<]+)}) {
my @nuggets = split /
/, $1;
shift @nuggets; # trash the program director's name
my $loc_name = shift @nuggets; # name of location
my $loc_csz = pop @nuggets; # city state zip
my $loc_last = pop @nuggets; # last line of addr
# all other parts of address
my $loc_other = scalar(@nuggets) ? (join ' ', @nuggets) : '';
return ($loc_name, $loc_other, $loc_last, $loc_csz);
}
return ('','','','');
}