X-Git-Url: http://git.freeside.biz/gitweb/?a=blobdiff_plain;f=FS%2FFS%2FMisc%2FGeo.pm;h=4ef10850e35b17c9517bbd9027aed57e8d6bcb61;hb=2d5acabf71d46aa469a6867f294706242c82db98;hp=a93d98f931a1816da89b7229500b0239ca708df4;hpb=5c45c12a64be3e67f8ba82b5a69cc08d5756cef2;p=freeside.git diff --git a/FS/FS/Misc/Geo.pm b/FS/FS/Misc/Geo.pm index a93d98f93..4ef10850e 100644 --- a/FS/FS/Misc/Geo.pm +++ b/FS/FS/Misc/Geo.pm @@ -6,10 +6,12 @@ use vars qw( $DEBUG @EXPORT_OK $conf ); use LWP::UserAgent; use HTTP::Request; use HTTP::Request::Common qw( GET POST ); +use HTTP::Cookies; use HTML::TokeParser; use URI::Escape 3.31; use Data::Dumper; use FS::Conf; +use Locale::Country; FS::UID->install_callback( sub { $conf = new FS::Conf; @@ -40,6 +42,10 @@ sub get_censustract_ffiec { my $location = shift; my $year = shift; + if ( length($location->{country}) and uc($location->{country}) ne 'US' ) { + return ''; + } + warn Dumper($location, $year) if $DEBUG; my $url = 'http://www.ffiec.gov/Geocode/default.aspx'; @@ -47,19 +53,20 @@ sub get_censustract_ffiec { my $return = {}; my $error = ''; - my $ua = new LWP::UserAgent; + my $ua = new LWP::UserAgent('cookie_jar' => HTTP::Cookies->new); my $res = $ua->request( GET( $url ) ); warn $res->as_string if $DEBUG > 2; - unless ($res->code eq '200') { + if (!$res->is_success) { $error = $res->message; } else { my $content = $res->content; + my $p = new HTML::TokeParser \$content; my $viewstate; my $eventvalidation; @@ -73,7 +80,7 @@ sub get_censustract_ffiec { last if $viewstate && $eventvalidation; } - unless ($viewstate && $eventvalidation ) { + if (!$viewstate or !$eventvalidation ) { $error = "either no __VIEWSTATE or __EVENTVALIDATION found"; @@ -85,6 +92,7 @@ sub get_censustract_ffiec { my @ffiec_args = ( __VIEWSTATE => $viewstate, __EVENTVALIDATION => $eventvalidation, + __VIEWSTATEENCRYPTED => '', ddlbYear => $year, txtAddress => $location->{address1}, txtCity => $location->{city}, @@ -410,9 +418,298 @@ sub standardize_ezlocate { \%result; } -=back +sub _tomtom_query { # helper method for the below + my %args = @_; + my $result = Geo::TomTom::Geocoding->query(%args); + die "TomTom geocoding error: ".$result->message."\n" + unless ( $result->is_success ); + my ($match) = $result->locations; + my $type = $match->{type}; + # match levels below "intersection" should not be considered clean + my $clean = ($type eq 'addresspoint' || + $type eq 'poi' || + $type eq 'house' || + $type eq 'intersection' + ) ? 'Y' : ''; + warn "tomtom returned $type match\n" if $DEBUG; + warn Dumper($match) if $DEBUG > 1; + ($match, $clean); +} + +sub standardize_tomtom { + # post-2013 TomTom API + # much better, but incompatible with ezlocate + my $self = shift; + my $location = shift; + eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US"; + die $@ if $@; + + my $key = $conf->config('tomtom-userid') + or die "no tomtom-userid configured\n"; + + my $country = code2country($location->{country}); + my ($address1, $address2) = ($location->{address1}, $location->{address2}); + my $subloc = ''; + + # trim whitespace + $address1 =~ s/^\s+//; + $address1 =~ s/\s+$//; + $address2 =~ s/^\s+//; + $address2 =~ s/\s+$//; + + # try to fix some cases of the address fields being switched + if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) { + $address2 = $address1; + $address1 = $location->{address2}; + } + # parse sublocation part (unit/suite/apartment...) and clean up + # non-sublocation address2 + ($subloc, $address2) = + subloc_address2($address1, $address2, $location->{country}); + # ask TomTom to standardize address1: + my %args = ( + key => $key, + T => $address1, + L => $location->{city}, + AA => $location->{state}, + PC => $location->{zip}, + CC => country2code($country, LOCALE_CODE_ALPHA_3), + ); + + my ($match, $clean) = _tomtom_query(%args); + + if (!$match or !$clean) { + # Then try cleaning up the input; TomTom is picky about junk in the + # address. Any of these can still be a clean match. + my $h = Geo::StreetAddress::US->parse_location($address1); + # First conservatively: + if ( $h->{sec_unit_type} ) { + my $strip = '\s+' . $h->{sec_unit_type}; + $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num}; + $strip .= '$'; + $args{T} =~ s/$strip//; + ($match, $clean) = _tomtom_query(%args); + } + if ( !$match or !$clean ) { + # Then more aggressively: + $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) ); + ($match, $clean) = _tomtom_query(%args); + } + } + + if ( !$match or !$clean ) { # partial matches are not useful + die "Address not found\n"; + } + my $tract = ''; + if ( defined $match->{censusTract} ) { + $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}. + join('.', $match->{censusTract} =~ /(....)(..)/); + } + $address1 = ''; + $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber}); + $address1 .= $match->{street} if $match->{street}; + $address1 .= ' '.$subloc if $subloc; + $address1 = uc($address1); # USPS standards + + return +{ + address1 => $address1, + address2 => $address2, + city => uc($match->{city}), + state => uc($location->{state}), + country => uc($location->{country}), + zip => ($match->{standardPostalCode} || $match->{postcode}), + latitude => $match->{latitude}, + longitude => $match->{longitude}, + censustract => $tract, + addr_clean => $clean, + }; +} + +=iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY + +Given 'address1' and 'address2' strings, extract the sublocation part +(from either one) and return it. If the sublocation was found in ADDRESS1, +also return ADDRESS2 (cleaned up for postal standards) as it's assumed to +contain something relevant. =cut +my %subloc_forms = ( + # Postal Addressing Standards, Appendix C + # (plus correction of "hanger" to "hangar") + US => {qw( + APARTMENT APT + BASEMENT BSMT + BUILDING BLDG + DEPARTMENT DEPT + FLOOR FL + FRONT FRNT + HANGAR HNGR + HANGER HNGR + KEY KEY + LOBBY LBBY + LOT LOT + LOWER LOWR + OFFICE OFC + PENTHOUSE PH + PIER PIER + REAR REAR + ROOM RM + SIDE SIDE + SLIP SLIP + SPACE SPC + STOP STOP + SUITE STE + TRAILER TRLR + UNIT UNIT + UPPER UPPR + )}, + # Canada Post Addressing Guidelines 4.3 + CA => {qw( + APARTMENT APT + APPARTEMENT APP + BUREAU BUREAU + SUITE SUITE + UNIT UNIT + UNITÉ UNITÉ + )}, +); + +sub subloc_address2 { + # Some things seen in the address2 field: + # Whitespace + # The complete address (with address1 containing part of the company name, + # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite + # number, etc.) + + # try to parse sublocation parts from address1; if they are present we'll + # append them back to address1 after standardizing + my $subloc = ''; + my ($addr1, $addr2, $country) = map uc, @_; + my $dict = $subloc_forms{$country} or return('', $addr2); + + my $found_in = 0; # which address is the sublocation + my $h; + foreach my $string ( + # patterns to try to parse + $addr1, + "$addr1 Nullcity, CA" + ) { + $h = Geo::StreetAddress::US->parse_location($addr1); + last if exists($h->{sec_unit_type}); + } + if (exists($h->{sec_unit_type})) { + $found_in = 1 + } else { + foreach my $string ( + # more patterns + $addr2, + "$addr1, $addr2", + "$addr1, $addr2 Nullcity, CA" + ) { + $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2"); + last if exists($h->{sec_unit_type}); + } + if (exists($h->{sec_unit_type})) { + $found_in = 2; + } + } + if ( $found_in ) { + $subloc = $h->{sec_unit_type}; + # special case: do not combine P.O. box sublocs with address1 + if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) { + if ( $found_in == 2 ) { + $addr2 = "PO BOX ".$h->{sec_unit_num}; + } # else it's in addr1, and leave it alone + return ('', $addr2); + } elsif ( exists($dict->{$subloc}) ) { + # substitute the official abbreviation + $subloc = $dict->{$subloc}; + } + $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num}); + } # otherwise $subloc = '' + + if ( $found_in == 2 ) { + # address2 should be fully combined into address1 + return ($subloc, ''); + } + # else address2 is not the canonical sublocation, but do our best to + # clean it up + # + # protect this + $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g? + my @words; + # remove all punctuation and spaces + foreach my $w (split(/\W+/, $addr2)) { + if ( exists($dict->{$w}) ) { + push @words, $dict->{$w}; + } else { + push @words, $w; + } + my $result = join(' ', @words); + # correct spacing of pound sign + number + $result =~ s/NUMBER(\d)/# $1/; + warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1; + $addr2 = $result; + } + $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly + ($subloc, $addr2); +} + +sub standardize_melissa { + my $class = shift; + my $location = shift; + + local $@; + eval "use Geo::Melissa::WebSmart"; + die $@ if $@; + + my $id = $conf->config('melissa-userid') + or die "no melissa-userid configured\n"; + my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0; + + my $request = { + id => $id, + a1 => $location->{address1}, + a2 => $location->{address2}, + city => $location->{city}, + state => $location->{state}, + ctry => $location->{country}, + zip => $location->{zip}, + geocode => $geocode, + }; + my $result = Geo::Melissa::WebSmart->query($request); + if ( $result->code =~ /AS01/ ) { # always present on success + my $addr = $result->address; + warn Dumper $addr if $DEBUG > 1; + my $out = { + address1 => $addr->{Address1}, + address2 => $addr->{Address2}, + city => $addr->{City}->{Name}, + state => $addr->{State}->{Abbreviation}, + country => $addr->{Country}->{Abbreviation}, + zip => $addr->{Zip}, + latitude => $addr->{Latitude}, + longitude => $addr->{Longitude}, + addr_clean => 'Y', + }; + if ( $addr->{Census}->{Tract} ) { + my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract}; + # insert decimal point two digits from the end + $censustract =~ s/(\d\d)$/\.$1/; + $out->{censustract} = $censustract; + $out->{censusyear} = $conf->config('census_year'); + } + # we could do a lot more nuanced reporting of the warning/status codes, + # but the UI doesn't support that yet. + return $out; + } else { + die $result->status_message; + } +} + +=back + +=cut 1;