#!/usr/bin/perl -l use strict; use warnings; use LWP::Simple; use Data::Dumper; $| = 1; # flush on print so we can tail print join '|', ('coords', 'ID', 'URL', 'Name', 'City', 'State', 'Loc Label', 'Loc Addr Middle', 'Loc Addr Last', 'Loc CSZ', 'Apps This Year', 'Apps Next Year', 'Program Length', 'Number 1st year spots', 'Avg hrs/wk 1st year', 'Max consec hours 1st year', 'Moonlighting allowed', 'Salary 1st year', 'Program Type'); while (<>) { next unless m{searchByPgmNbr&pgmNumber=(\d+)\">(.*?)}; #### initial data from summary page # id, url, name my @data = ('0,0', $1, make_url($1), $2); #city my $line = <>; chomp $line; die $line unless ($line =~ m{
(.*?),}); push @data, $1; #state $line = <>; chomp $line; die $line unless ($line =~ m{\s+(.*?)\s+[\d-]+
}); push @data, $1; # rest of data from summary my $count = 0; while ($line = <>) { next if ( $line =~ m/\<|\>/ ); next if ( $line =~ m/^\s*$/ ); $line =~ s/\s+/ /g; $count++; push @data, $line; last if (9 == $count); } # splice in real address from detail page splice @data, 6, 0, get_details($data[1]); print join '|', @data; } sub make_url { my $id = shift; return "https://freida.ama-assn.org/Freida/user/programSearchDispatch.do?method=searchByPgmNbr&pgmNumber=$id"; } sub get_details { my $id = shift; my $details = get(make_url($id)); if (defined $details and $details =~ m{(([^<]+
)+[^<]+)
}) { my @nuggets = split //, $1; shift @nuggets; # trash the program director's name my $loc_name = shift @nuggets; # name of location my $loc_csz = pop @nuggets; # city state zip my $loc_last = pop @nuggets; # last line of addr # all other parts of address my $loc_other = scalar(@nuggets) ? (join ' ', @nuggets) : ''; return ($loc_name, $loc_other, $loc_last, $loc_csz); } return ('','','',''); }