4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
15 FS::UID->install_callback( sub {
21 @EXPORT_OK = qw( get_district );
25 FS::Misc::Geo - routines to fetch geographic information
31 =item get_censustract_ffiec LOCATION YEAR
33 Given a location hash (see L<FS::location_Mixin>) and a census map year,
34 returns a census tract code (consisting of state, county, and tract
35 codes) or an error message.
39 sub get_censustract_ffiec {
45 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
49 warn Dumper($location, $year) if $DEBUG;
51 # the old FFIEC geocoding service was shut down December 1, 2014.
52 # welcome to the future.
53 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
54 # build the single-line query
55 my $single_line = join(', ', $location->{address1},
59 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
60 my $request = POST( $url,
61 'Content-Type' => 'application/json; charset=utf-8',
62 'Accept' => 'application/json',
63 'Content' => encode_json($hashref)
66 my $ua = new LWP::UserAgent;
67 my $res = $ua->request( $request );
72 if (!$res->is_success) {
74 die "Census tract lookup error: ".$res->message;
79 my $content = eval { decode_json($res->content) };
80 die "Census tract JSON error: $@\n" if $@;
82 if ( !exists $content->{d}->{sStatus} ) {
83 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
85 if ( $content->{d}->{sStatus} eq 'Y' ) {
87 # this also contains the (partial) standardized address, correct zip
88 # code, coordinates, etc., and we could get all of them, but right now
89 # we only want the census tract
90 my $tract = join('', $content->{d}->{sStateCode},
91 $content->{d}->{sCountyCode},
92 $content->{d}->{sTractCode});
97 my $error = $content->{d}->{sMsg}
98 || 'FFIEC lookup failed, but with no status message.';
104 #sub get_district_methods {
106 # 'wa_sales' => 'Washington sales tax',
109 =item get_district LOCATION METHOD
111 For the location hash in LOCATION, using lookup method METHOD, fetch
112 tax district information. Currently the only available method is
113 'wa_sales' (the Washington Department of Revenue sales tax lookup).
115 Returns a hash reference containing the following fields:
120 - exempt_amount (currently zero)
121 - city, county, state, country (from
123 The intent is that you can assign this to an L<FS::cust_main_county>
124 object and insert it if there's not yet a tax rate defined for that
127 get_district will die on error.
135 my $location = shift;
136 my $method = shift or return '';
137 warn Dumper($location, $method) if $DEBUG;
142 my $location = shift;
144 return '' if $location->{state} ne 'WA';
146 my $return = { %$location };
147 $return->{'exempt_amount'} = 0.00;
149 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
150 my $ua = new LWP::UserAgent;
152 my $delim = '<|>'; # yes, <|>
153 my $year = (localtime)[5] + 1900;
154 my $month = (localtime)[4] + 1;
155 my @zip = split('-', $location->{zip});
158 'TaxType=S', #sales; 'P' = property
159 'Src=0', #does something complicated
161 'Addr='.uri_escape($location->{address1}),
162 'City='.uri_escape($location->{city}),
164 'Zip1='.($zip[1] || ''), #optional
171 my $query_string = join($delim, @args );
172 $url .= "?$query_string";
173 warn "\nrequest: $url\n\n" if $DEBUG > 1;
175 my $res = $ua->request( GET( "$url?$query_string" ) );
180 if ($res->code ne '200') {
181 $error = $res->message;
184 my $content = $res->content;
185 my $p = new HTML::TokeParser \$content;
187 while ( my $t = $p->get_tag('script') ) {
188 my $u = $p->get_token; #either enclosed text or the </script> tag
189 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
194 if ( $js ) { #found it
195 # strip down to the quoted string, which contains escaped single quotes.
196 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
197 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
198 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
200 $p = new HTML::TokeParser \$js;
201 TD: while ( my $td = $p->get_tag('td') ) {
202 while ( my $u = $p->get_token ) {
203 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
204 next if $u->[0] ne 'T'; # skip non-text
207 if ( lc($text) eq 'location code' ) {
208 $p->get_tag('td'); # skip to the next column
210 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
211 $return->{'district'} = $u->[1];
213 elsif ( lc($text) eq 'total tax rate' ) {
216 $u = $p->get_token until $u->[0] eq 'T';
217 $return->{'tax'} = $u->[1];
223 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
224 $return->{'tax'} *= 100; #percentage
225 warn Dumper($return) if $DEBUG > 1;
229 $error = 'district code/tax rate not found';
233 $error = "failed to parse document";
236 die "WA tax district lookup error: $error";
239 ###### USPS Standardization ######
241 sub standardize_usps {
244 eval "use Business::US::USPS::WebTools::AddressStandardization";
247 my $location = shift;
248 if ( $location->{country} ne 'US' ) {
250 warn "standardize_usps not for use in country ".$location->{country}."\n";
251 $location->{addr_clean} = '';
254 my $userid = $conf->config('usps_webtools-userid');
255 my $password = $conf->config('usps_webtools-password');
256 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
258 Password => $password,
260 } ) or die "error starting USPS WebTools\n";
262 my($zip5, $zip4) = split('-',$location->{'zip'});
265 FirmName => $location->{company},
266 Address2 => $location->{address1},
267 Address1 => $location->{address2},
268 City => $location->{city},
269 State => $location->{state},
273 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
276 my $hash = $verifier->verify_address( %usps_args );
278 warn $verifier->response
281 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
282 if $verifier->is_error;
284 my $zip = $hash->{Zip5};
285 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
287 { company => $hash->{FirmName},
288 address1 => $hash->{Address2},
289 address2 => $hash->{Address1},
290 city => $hash->{City},
291 state => $hash->{State},
297 ###### U.S. Census Bureau ######
299 sub standardize_uscensus {
301 my $location = shift;
303 eval "use Geo::USCensus::Geocoding";
306 if ( $location->{country} ne 'US' ) {
308 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
309 $location->{addr_clean} = '';
314 street => $location->{address1},
315 city => $location->{city},
316 state => $location->{state},
317 zip => $location->{zip},
318 debug => ($DEBUG || 0),
321 my $result = Geo::USCensus::Geocoding->query($request);
322 if ( $result->is_match ) {
323 # unfortunately we get the address back as a single line
324 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
330 address2 => uc($location->{address2}),
331 latitude => $result->latitude,
332 longitude => $result->longitude,
333 censustract => $result->censustract,
336 die "can't parse address '".$result->address."'";
339 warn Dumper($result) if $DEBUG;
340 die $result->error_message;
344 ####### EZLOCATE (obsolete) #######
346 my %ezlocate_error = ( # USA_Geo_002 documentation
347 10 => 'State not found',
348 11 => 'City not found',
349 12 => 'Invalid street address',
350 14 => 'Street name not found',
351 15 => 'Address range does not exist',
352 16 => 'Ambiguous address',
353 17 => 'Intersection not found', #unused?
356 sub standardize_ezlocate {
358 my $location = shift;
360 #if ( $location->{country} eq 'US' ) {
361 # $class = 'USA_Geo_004Tool';
363 #elsif ( $location->{country} eq 'CA' ) {
364 # $class = 'CAN_Geo_001Tool';
366 #else { # shouldn't be a fatal error, just pass through unverified address
367 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
368 # "' not available\n";
371 #my $path = $conf->config('teleatlas-path') || '';
372 #local @INC = (@INC, $path);
375 # die "Loading $class failed:\n$@".
376 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
379 $class = 'Geo::EZLocate'; # use our own library
380 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
383 my $userid = $conf->config('ezlocate-userid')
384 or die "no ezlocate-userid configured\n";
385 my $password = $conf->config('ezlocate-password')
386 or die "no ezlocate-password configured\n";
388 my $tool = $class->new($userid, $password);
389 my $match = $tool->findAddress(
390 $location->{address1},
393 $location->{zip}, #12345-6789 format is allowed
395 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
396 # error handling - B codes indicate success
397 die $ezlocate_error{$match->{MAT_STAT}}."\n"
398 unless $match->{MAT_STAT} =~ /^B\d$/;
401 address1 => $match->{MAT_ADDR},
402 address2 => $location->{address2},
403 city => $match->{MAT_CITY},
404 state => $match->{MAT_ST},
405 country => $location->{country},
406 zip => $match->{MAT_ZIP},
407 latitude => $match->{MAT_LAT},
408 longitude => $match->{MAT_LON},
409 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
410 sprintf('%07.2f',$match->{CEN_TRCT}),
413 if ( $match->{STD_ADDR} ) {
414 # then they have a postal standardized address for us
416 address1 => $match->{STD_ADDR},
417 address2 => $location->{address2},
418 city => $match->{STD_CITY},
419 state => $match->{STD_ST},
420 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
427 sub _tomtom_query { # helper method for the below
429 my $result = Geo::TomTom::Geocoding->query(%args);
430 die "TomTom geocoding error: ".$result->message."\n"
431 unless ( $result->is_success );
432 my ($match) = $result->locations;
433 my $type = $match->{type};
434 # match levels below "intersection" should not be considered clean
435 my $clean = ($type eq 'addresspoint' ||
438 $type eq 'intersection'
440 warn "tomtom returned $type match\n" if $DEBUG;
441 warn Dumper($match) if $DEBUG > 1;
445 sub standardize_tomtom {
446 # post-2013 TomTom API
447 # much better, but incompatible with ezlocate
449 my $location = shift;
450 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
453 my $key = $conf->config('tomtom-userid')
454 or die "no tomtom-userid configured\n";
456 my $country = code2country($location->{country});
457 my ($address1, $address2) = ($location->{address1}, $location->{address2});
461 $address1 =~ s/^\s+//;
462 $address1 =~ s/\s+$//;
463 $address2 =~ s/^\s+//;
464 $address2 =~ s/\s+$//;
466 # try to fix some cases of the address fields being switched
467 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
468 $address2 = $address1;
469 $address1 = $location->{address2};
471 # parse sublocation part (unit/suite/apartment...) and clean up
472 # non-sublocation address2
473 ($subloc, $address2) =
474 subloc_address2($address1, $address2, $location->{country});
475 # ask TomTom to standardize address1:
479 L => $location->{city},
480 AA => $location->{state},
481 PC => $location->{zip},
482 CC => country2code($country, LOCALE_CODE_ALPHA_3),
485 my ($match, $clean) = _tomtom_query(%args);
487 if (!$match or !$clean) {
488 # Then try cleaning up the input; TomTom is picky about junk in the
489 # address. Any of these can still be a clean match.
490 my $h = Geo::StreetAddress::US->parse_location($address1);
491 # First conservatively:
492 if ( $h->{sec_unit_type} ) {
493 my $strip = '\s+' . $h->{sec_unit_type};
494 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
496 $args{T} =~ s/$strip//;
497 ($match, $clean) = _tomtom_query(%args);
499 if ( !$match or !$clean ) {
500 # Then more aggressively:
501 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
502 ($match, $clean) = _tomtom_query(%args);
506 if ( !$match or !$clean ) { # partial matches are not useful
507 die "Address not found\n";
510 if ( defined $match->{censusTract} ) {
511 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
512 join('.', $match->{censusTract} =~ /(....)(..)/);
515 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
516 $address1 .= $match->{street} if $match->{street};
517 $address1 .= ' '.$subloc if $subloc;
518 $address1 = uc($address1); # USPS standards
521 address1 => $address1,
522 address2 => $address2,
523 city => uc($match->{city}),
524 state => uc($location->{state}),
525 country => uc($location->{country}),
526 zip => ($match->{standardPostalCode} || $match->{postcode}),
527 latitude => $match->{latitude},
528 longitude => $match->{longitude},
529 censustract => $tract,
530 addr_clean => $clean,
534 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
536 Given 'address1' and 'address2' strings, extract the sublocation part
537 (from either one) and return it. If the sublocation was found in ADDRESS1,
538 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
539 contain something relevant.
544 # Postal Addressing Standards, Appendix C
545 # (plus correction of "hanger" to "hangar")
573 # Canada Post Addressing Guidelines 4.3
584 sub subloc_address2 {
585 # Some things seen in the address2 field:
587 # The complete address (with address1 containing part of the company name,
588 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
591 # try to parse sublocation parts from address1; if they are present we'll
592 # append them back to address1 after standardizing
594 my ($addr1, $addr2, $country) = map uc, @_;
595 my $dict = $subloc_forms{$country} or return('', $addr2);
597 my $found_in = 0; # which address is the sublocation
600 # patterns to try to parse
602 "$addr1 Nullcity, CA"
604 $h = Geo::StreetAddress::US->parse_location($addr1);
605 last if exists($h->{sec_unit_type});
607 if (exists($h->{sec_unit_type})) {
614 "$addr1, $addr2 Nullcity, CA"
616 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
617 last if exists($h->{sec_unit_type});
619 if (exists($h->{sec_unit_type})) {
624 $subloc = $h->{sec_unit_type};
625 # special case: do not combine P.O. box sublocs with address1
626 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
627 if ( $found_in == 2 ) {
628 $addr2 = "PO BOX ".$h->{sec_unit_num};
629 } # else it's in addr1, and leave it alone
631 } elsif ( exists($dict->{$subloc}) ) {
632 # substitute the official abbreviation
633 $subloc = $dict->{$subloc};
635 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
636 } # otherwise $subloc = ''
638 if ( $found_in == 2 ) {
639 # address2 should be fully combined into address1
640 return ($subloc, '');
642 # else address2 is not the canonical sublocation, but do our best to
646 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
648 # remove all punctuation and spaces
649 foreach my $w (split(/\W+/, $addr2)) {
650 if ( exists($dict->{$w}) ) {
651 push @words, $dict->{$w};
655 my $result = join(' ', @words);
656 # correct spacing of pound sign + number
657 $result =~ s/NUMBER(\d)/# $1/;
658 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
661 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
665 sub standardize_melissa {
667 my $location = shift;
670 eval "use Geo::Melissa::WebSmart";
673 my $id = $conf->config('melissa-userid')
674 or die "no melissa-userid configured\n";
675 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
679 a1 => $location->{address1},
680 a2 => $location->{address2},
681 city => $location->{city},
682 state => $location->{state},
683 ctry => $location->{country},
684 zip => $location->{zip},
687 my $result = Geo::Melissa::WebSmart->query($request);
688 if ( $result->code =~ /AS01/ ) { # always present on success
689 my $addr = $result->address;
690 warn Dumper $addr if $DEBUG > 1;
692 address1 => $addr->{Address1},
693 address2 => $addr->{Address2},
694 city => $addr->{City}->{Name},
695 state => $addr->{State}->{Abbreviation},
696 country => $addr->{Country}->{Abbreviation},
698 latitude => $addr->{Latitude},
699 longitude => $addr->{Longitude},
702 if ( $addr->{Census}->{Tract} ) {
703 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
704 # insert decimal point two digits from the end
705 $censustract =~ s/(\d\d)$/\.$1/;
706 $out->{censustract} = $censustract;
707 $out->{censusyear} = $conf->config('census_year');
709 # we could do a lot more nuanced reporting of the warning/status codes,
710 # but the UI doesn't support that yet.
713 die $result->status_message;