4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
17 FS::UID->install_callback( sub {
23 @EXPORT_OK = qw( get_district );
27 FS::Misc::Geo - routines to fetch geographic information
33 =item get_censustract_ffiec LOCATION YEAR
35 Given a location hash (see L<FS::location_Mixin>) and a census map year,
36 returns a census tract code (consisting of state, county, and tract
37 codes) or an error message.
41 sub get_censustract_ffiec {
47 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
51 warn Dumper($location, $year) if $DEBUG;
53 # the old FFIEC geocoding service was shut down December 1, 2014.
54 # welcome to the future.
55 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
56 # build the single-line query
57 my $single_line = join(', ', $location->{address1},
61 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
62 my $request = POST( $url,
63 'Content-Type' => 'application/json; charset=utf-8',
64 'Accept' => 'application/json',
65 'Content' => encode_json($hashref)
68 my $ua = new LWP::UserAgent;
69 my $res = $ua->request( $request );
74 if (!$res->is_success) {
76 die "Census tract lookup error: ".$res->message;
81 my $content = eval { decode_json($res->content) };
82 die "Census tract JSON error: $@\n" if $@;
84 if ( !exists $content->{d}->{sStatus} ) {
85 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
87 if ( $content->{d}->{sStatus} eq 'Y' ) {
89 # this also contains the (partial) standardized address, correct zip
90 # code, coordinates, etc., and we could get all of them, but right now
91 # we only want the census tract
92 my $tract = join('', $content->{d}->{sStateCode},
93 $content->{d}->{sCountyCode},
94 $content->{d}->{sTractCode});
99 my $error = $content->{d}->{sMsg}
100 || 'FFIEC lookup failed, but with no status message.';
106 #sub get_district_methods {
108 # 'wa_sales' => 'Washington sales tax',
111 =item get_district LOCATION METHOD
113 For the location hash in LOCATION, using lookup method METHOD, fetch
114 tax district information. Currently the only available method is
115 'wa_sales' (the Washington Department of Revenue sales tax lookup).
117 Returns a hash reference containing the following fields:
122 - exempt_amount (currently zero)
123 - city, county, state, country (from
125 The intent is that you can assign this to an L<FS::cust_main_county>
126 object and insert it if there's not yet a tax rate defined for that
129 get_district will die on error.
137 my $location = shift;
138 my $method = shift or return '';
139 warn Dumper($location, $method) if $DEBUG;
144 my $location = shift;
146 return '' if $location->{state} ne 'WA';
148 my $return = { %$location };
149 $return->{'exempt_amount'} = 0.00;
151 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
152 my $ua = new LWP::UserAgent;
154 my $delim = '<|>'; # yes, <|>
155 my $year = (localtime)[5] + 1900;
156 my $month = (localtime)[4] + 1;
157 my @zip = split('-', $location->{zip});
160 'TaxType=S', #sales; 'P' = property
161 'Src=0', #does something complicated
163 'Addr='.uri_escape($location->{address1}),
164 'City='.uri_escape($location->{city}),
166 'Zip1='.($zip[1] || ''), #optional
173 my $query_string = join($delim, @args );
174 $url .= "?$query_string";
175 warn "\nrequest: $url\n\n" if $DEBUG > 1;
177 my $res = $ua->request( GET( "$url?$query_string" ) );
182 if ($res->code ne '200') {
183 $error = $res->message;
186 my $content = $res->content;
187 my $p = new HTML::TokeParser \$content;
189 while ( my $t = $p->get_tag('script') ) {
190 my $u = $p->get_token; #either enclosed text or the </script> tag
191 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
196 if ( $js ) { #found it
197 # strip down to the quoted string, which contains escaped single quotes.
198 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
199 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
200 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
202 $p = new HTML::TokeParser \$js;
203 TD: while ( my $td = $p->get_tag('td') ) {
204 while ( my $u = $p->get_token ) {
205 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
206 next if $u->[0] ne 'T'; # skip non-text
209 if ( lc($text) eq 'location code' ) {
210 $p->get_tag('td'); # skip to the next column
212 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
213 $return->{'district'} = $u->[1];
215 elsif ( lc($text) eq 'total tax rate' ) {
218 $u = $p->get_token until $u->[0] eq 'T';
219 $return->{'tax'} = $u->[1];
225 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
226 $return->{'tax'} *= 100; #percentage
227 warn Dumper($return) if $DEBUG > 1;
231 $error = 'district code/tax rate not found';
235 $error = "failed to parse document";
238 die "WA tax district lookup error: $error";
241 ###### USPS Standardization ######
243 sub standardize_usps {
246 eval "use Business::US::USPS::WebTools::AddressStandardization";
249 my $location = shift;
250 if ( $location->{country} ne 'US' ) {
252 warn "standardize_usps not for use in country ".$location->{country}."\n";
253 $location->{addr_clean} = '';
256 my $userid = $conf->config('usps_webtools-userid');
257 my $password = $conf->config('usps_webtools-password');
258 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
260 Password => $password,
262 } ) or die "error starting USPS WebTools\n";
264 my($zip5, $zip4) = split('-',$location->{'zip'});
267 FirmName => $location->{company},
268 Address2 => $location->{address1},
269 Address1 => $location->{address2},
270 City => $location->{city},
271 State => $location->{state},
275 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
278 my $hash = $verifier->verify_address( %usps_args );
280 warn $verifier->response
283 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
284 if $verifier->is_error;
286 my $zip = $hash->{Zip5};
287 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
289 { company => $hash->{FirmName},
290 address1 => $hash->{Address2},
291 address2 => $hash->{Address1},
292 city => $hash->{City},
293 state => $hash->{State},
299 ###### U.S. Census Bureau ######
301 sub standardize_uscensus {
303 my $location = shift;
304 my $log = FS::Log->new('FS::Misc::Geo::standardize_uscensus');
305 $log->debug(join("\n", @{$location}{'address1', 'city', 'state', 'zip'}));
307 eval "use Geo::USCensus::Geocoding";
310 if ( $location->{country} ne 'US' ) {
312 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
313 $location->{addr_clean} = '';
318 street => $location->{address1},
319 city => $location->{city},
320 state => $location->{state},
321 zip => $location->{zip},
322 debug => ($DEBUG || 0),
325 my $result = Geo::USCensus::Geocoding->query($request);
326 if ( $result->is_match ) {
327 # unfortunately we get the address back as a single line
328 $log->debug($result->address);
329 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
335 address2 => uc($location->{address2}),
336 latitude => $result->latitude,
337 longitude => $result->longitude,
338 censustract => $result->censustract,
341 die "Geocoding returned '".$result->address."', which does not seem to be a valid address.\n";
343 } elsif ( $result->match_level eq 'Tie' ) {
344 die "Geocoding was not able to identify a unique matching address.\n";
345 } elsif ( $result->match_level ) {
346 die "Geocoding did not find a matching address.\n";
348 $log->error($result->error_message);
349 return; # for internal errors, don't return anything
353 ####### EZLOCATE (obsolete) #######
355 my %ezlocate_error = ( # USA_Geo_002 documentation
356 10 => 'State not found',
357 11 => 'City not found',
358 12 => 'Invalid street address',
359 14 => 'Street name not found',
360 15 => 'Address range does not exist',
361 16 => 'Ambiguous address',
362 17 => 'Intersection not found', #unused?
365 sub standardize_ezlocate {
367 my $location = shift;
369 #if ( $location->{country} eq 'US' ) {
370 # $class = 'USA_Geo_004Tool';
372 #elsif ( $location->{country} eq 'CA' ) {
373 # $class = 'CAN_Geo_001Tool';
375 #else { # shouldn't be a fatal error, just pass through unverified address
376 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
377 # "' not available\n";
380 #my $path = $conf->config('teleatlas-path') || '';
381 #local @INC = (@INC, $path);
384 # die "Loading $class failed:\n$@".
385 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
388 $class = 'Geo::EZLocate'; # use our own library
389 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
392 my $userid = $conf->config('ezlocate-userid')
393 or die "no ezlocate-userid configured\n";
394 my $password = $conf->config('ezlocate-password')
395 or die "no ezlocate-password configured\n";
397 my $tool = $class->new($userid, $password);
398 my $match = $tool->findAddress(
399 $location->{address1},
402 $location->{zip}, #12345-6789 format is allowed
404 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
405 # error handling - B codes indicate success
406 die $ezlocate_error{$match->{MAT_STAT}}."\n"
407 unless $match->{MAT_STAT} =~ /^B\d$/;
410 address1 => $match->{MAT_ADDR},
411 address2 => $location->{address2},
412 city => $match->{MAT_CITY},
413 state => $match->{MAT_ST},
414 country => $location->{country},
415 zip => $match->{MAT_ZIP},
416 latitude => $match->{MAT_LAT},
417 longitude => $match->{MAT_LON},
418 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
419 sprintf('%07.2f',$match->{CEN_TRCT}),
422 if ( $match->{STD_ADDR} ) {
423 # then they have a postal standardized address for us
425 address1 => $match->{STD_ADDR},
426 address2 => $location->{address2},
427 city => $match->{STD_CITY},
428 state => $match->{STD_ST},
429 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
436 sub _tomtom_query { # helper method for the below
438 my $result = Geo::TomTom::Geocoding->query(%args);
439 die "TomTom geocoding error: ".$result->message."\n"
440 unless ( $result->is_success );
441 my ($match) = $result->locations;
442 my $type = $match->{type};
443 # match levels below "intersection" should not be considered clean
444 my $clean = ($type eq 'addresspoint' ||
447 $type eq 'intersection'
449 warn "tomtom returned $type match\n" if $DEBUG;
450 warn Dumper($match) if $DEBUG > 1;
454 sub standardize_tomtom {
455 # post-2013 TomTom API
456 # much better, but incompatible with ezlocate
458 my $location = shift;
459 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
462 my $key = $conf->config('tomtom-userid')
463 or die "no tomtom-userid configured\n";
465 my $country = code2country($location->{country});
466 my ($address1, $address2) = ($location->{address1}, $location->{address2});
470 $address1 =~ s/^\s+//;
471 $address1 =~ s/\s+$//;
472 $address2 =~ s/^\s+//;
473 $address2 =~ s/\s+$//;
475 # try to fix some cases of the address fields being switched
476 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
477 $address2 = $address1;
478 $address1 = $location->{address2};
480 # parse sublocation part (unit/suite/apartment...) and clean up
481 # non-sublocation address2
482 ($subloc, $address2) =
483 subloc_address2($address1, $address2, $location->{country});
484 # ask TomTom to standardize address1:
488 L => $location->{city},
489 AA => $location->{state},
490 PC => $location->{zip},
491 CC => country2code($country, LOCALE_CODE_ALPHA_3),
494 my ($match, $clean) = _tomtom_query(%args);
496 if (!$match or !$clean) {
497 # Then try cleaning up the input; TomTom is picky about junk in the
498 # address. Any of these can still be a clean match.
499 my $h = Geo::StreetAddress::US->parse_location($address1);
500 # First conservatively:
501 if ( $h->{sec_unit_type} ) {
502 my $strip = '\s+' . $h->{sec_unit_type};
503 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
505 $args{T} =~ s/$strip//;
506 ($match, $clean) = _tomtom_query(%args);
508 if ( !$match or !$clean ) {
509 # Then more aggressively:
510 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
511 ($match, $clean) = _tomtom_query(%args);
515 if ( !$match or !$clean ) { # partial matches are not useful
516 die "Address not found\n";
519 if ( defined $match->{censusTract} ) {
520 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
521 join('.', $match->{censusTract} =~ /(....)(..)/);
524 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
525 $address1 .= $match->{street} if $match->{street};
526 $address1 .= ' '.$subloc if $subloc;
527 $address1 = uc($address1); # USPS standards
530 address1 => $address1,
531 address2 => $address2,
532 city => uc($match->{city}),
533 state => uc($location->{state}),
534 country => uc($location->{country}),
535 zip => ($match->{standardPostalCode} || $match->{postcode}),
536 latitude => $match->{latitude},
537 longitude => $match->{longitude},
538 censustract => $tract,
539 addr_clean => $clean,
543 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
545 Given 'address1' and 'address2' strings, extract the sublocation part
546 (from either one) and return it. If the sublocation was found in ADDRESS1,
547 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
548 contain something relevant.
553 # Postal Addressing Standards, Appendix C
554 # (plus correction of "hanger" to "hangar")
582 # Canada Post Addressing Guidelines 4.3
593 sub subloc_address2 {
594 # Some things seen in the address2 field:
596 # The complete address (with address1 containing part of the company name,
597 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
600 # try to parse sublocation parts from address1; if they are present we'll
601 # append them back to address1 after standardizing
603 my ($addr1, $addr2, $country) = map uc, @_;
604 my $dict = $subloc_forms{$country} or return('', $addr2);
606 my $found_in = 0; # which address is the sublocation
609 # patterns to try to parse
611 "$addr1 Nullcity, CA"
613 $h = Geo::StreetAddress::US->parse_location($addr1);
614 last if exists($h->{sec_unit_type});
616 if (exists($h->{sec_unit_type})) {
623 "$addr1, $addr2 Nullcity, CA"
625 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
626 last if exists($h->{sec_unit_type});
628 if (exists($h->{sec_unit_type})) {
633 $subloc = $h->{sec_unit_type};
634 # special case: do not combine P.O. box sublocs with address1
635 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
636 if ( $found_in == 2 ) {
637 $addr2 = "PO BOX ".$h->{sec_unit_num};
638 } # else it's in addr1, and leave it alone
640 } elsif ( exists($dict->{$subloc}) ) {
641 # substitute the official abbreviation
642 $subloc = $dict->{$subloc};
644 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
645 } # otherwise $subloc = ''
647 if ( $found_in == 2 ) {
648 # address2 should be fully combined into address1
649 return ($subloc, '');
651 # else address2 is not the canonical sublocation, but do our best to
655 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
657 # remove all punctuation and spaces
658 foreach my $w (split(/\W+/, $addr2)) {
659 if ( exists($dict->{$w}) ) {
660 push @words, $dict->{$w};
664 my $result = join(' ', @words);
665 # correct spacing of pound sign + number
666 $result =~ s/NUMBER(\d)/# $1/;
667 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
670 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
674 sub standardize_melissa {
676 my $location = shift;
679 eval "use Geo::Melissa::WebSmart";
682 my $id = $conf->config('melissa-userid')
683 or die "no melissa-userid configured\n";
684 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
688 a1 => $location->{address1},
689 a2 => $location->{address2},
690 city => $location->{city},
691 state => $location->{state},
692 ctry => $location->{country},
693 zip => $location->{zip},
696 my $result = Geo::Melissa::WebSmart->query($request);
697 if ( $result->code =~ /AS01/ ) { # always present on success
698 my $addr = $result->address;
699 warn Dumper $addr if $DEBUG > 1;
701 address1 => $addr->{Address1},
702 address2 => $addr->{Address2},
703 city => $addr->{City}->{Name},
704 state => $addr->{State}->{Abbreviation},
705 country => $addr->{Country}->{Abbreviation},
707 latitude => $addr->{Latitude},
708 longitude => $addr->{Longitude},
711 if ( $addr->{Census}->{Tract} ) {
712 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
713 # insert decimal point two digits from the end
714 $censustract =~ s/(\d\d)$/\.$1/;
715 $out->{censustract} = $censustract;
716 $out->{censusyear} = $conf->config('census_year');
718 # we could do a lot more nuanced reporting of the warning/status codes,
719 # but the UI doesn't support that yet.
722 die $result->status_message;