4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
16 FS::UID->install_callback( sub {
22 @EXPORT_OK = qw( get_district );
26 FS::Misc::Geo - routines to fetch geographic information
32 =item get_censustract_ffiec LOCATION YEAR
34 Given a location hash (see L<FS::location_Mixin>) and a census map year,
35 returns a census tract code (consisting of state, county, and tract
36 codes) or an error message.
40 sub get_censustract_ffiec {
46 if ( length($location->{country}) and uc($location->{country}) ne 'US' ) {
50 warn Dumper($location, $year) if $DEBUG;
52 # the old FFIEC geocoding service was shut down December 1, 2014.
53 # welcome to the future.
54 my $url = 'https://geomap.ffiec.gov/FFIECGeocMap/GeocodeMap1.aspx/GetGeocodeData';
55 # build the single-line query
56 my $single_line = join(', ', $location->{address1},
60 my $hashref = { sSingleLine => $single_line, iCensusYear => $year };
61 my $request = POST( $url,
62 'Content-Type' => 'application/json; charset=utf-8',
63 'Accept' => 'application/json',
64 'Content' => encode_json($hashref)
67 my $ua = new LWP::UserAgent;
68 my $res = $ua->request( $request );
73 if (!$res->is_success) {
75 die "Census tract lookup error: ".$res->message;
80 my $content = eval { decode_json($res->content) };
81 die "Census tract JSON error: $@\n" if $@;
83 if ( !exists $content->{d}->{sStatus} ) {
84 die "Census tract response is missing a status indicator.\nThis is an FFIEC problem.\n";
86 if ( $content->{d}->{sStatus} eq 'Y' ) {
88 # this also contains the (partial) standardized address, correct zip
89 # code, coordinates, etc., and we could get all of them, but right now
90 # we only want the census tract
91 my $tract = join('', $content->{d}->{sStateCode},
92 $content->{d}->{sCountyCode},
93 $content->{d}->{sTractCode});
98 my $error = $content->{d}->{sMsg}
99 || 'FFIEC lookup failed, but with no status message.';
105 #sub get_district_methods {
107 # 'wa_sales' => 'Washington sales tax',
110 =item get_district LOCATION METHOD
112 For the location hash in LOCATION, using lookup method METHOD, fetch
113 tax district information. Currently the only available method is
114 'wa_sales' (the Washington Department of Revenue sales tax lookup).
116 Returns a hash reference containing the following fields:
121 - exempt_amount (currently zero)
122 - city, county, state, country (from
124 The intent is that you can assign this to an L<FS::cust_main_county>
125 object and insert it if there's not yet a tax rate defined for that
128 get_district will die on error.
136 my $location = shift;
137 my $method = shift or return '';
138 warn Dumper($location, $method) if $DEBUG;
143 my $location = shift;
145 return '' if $location->{state} ne 'WA';
147 my $return = { %$location };
148 $return->{'exempt_amount'} = 0.00;
150 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
151 my $ua = new LWP::UserAgent;
153 my $delim = '<|>'; # yes, <|>
154 my $year = (localtime)[5] + 1900;
155 my $month = (localtime)[4] + 1;
156 my @zip = split('-', $location->{zip});
159 'TaxType=S', #sales; 'P' = property
160 'Src=0', #does something complicated
162 'Addr='.uri_escape($location->{address1}),
163 'City='.uri_escape($location->{city}),
165 'Zip1='.($zip[1] || ''), #optional
172 my $query_string = join($delim, @args );
173 $url .= "?$query_string";
174 warn "\nrequest: $url\n\n" if $DEBUG > 1;
176 my $res = $ua->request( GET( "$url?$query_string" ) );
181 if ($res->code ne '200') {
182 $error = $res->message;
185 my $content = $res->content;
186 my $p = new HTML::TokeParser \$content;
188 while ( my $t = $p->get_tag('script') ) {
189 my $u = $p->get_token; #either enclosed text or the </script> tag
190 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
195 if ( $js ) { #found it
196 # strip down to the quoted string, which contains escaped single quotes.
197 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
198 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
199 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
201 $p = new HTML::TokeParser \$js;
202 TD: while ( my $td = $p->get_tag('td') ) {
203 while ( my $u = $p->get_token ) {
204 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
205 next if $u->[0] ne 'T'; # skip non-text
208 if ( lc($text) eq 'location code' ) {
209 $p->get_tag('td'); # skip to the next column
211 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
212 $return->{'district'} = $u->[1];
214 elsif ( lc($text) eq 'total tax rate' ) {
217 $u = $p->get_token until $u->[0] eq 'T';
218 $return->{'tax'} = $u->[1];
224 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
225 $return->{'tax'} *= 100; #percentage
226 warn Dumper($return) if $DEBUG > 1;
230 $error = 'district code/tax rate not found';
234 $error = "failed to parse document";
237 die "WA tax district lookup error: $error";
240 ###### USPS Standardization ######
242 sub standardize_usps {
245 eval "use Business::US::USPS::WebTools::AddressStandardization";
248 my $location = shift;
249 if ( $location->{country} ne 'US' ) {
251 warn "standardize_usps not for use in country ".$location->{country}."\n";
252 $location->{addr_clean} = '';
255 my $userid = $conf->config('usps_webtools-userid');
256 my $password = $conf->config('usps_webtools-password');
257 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
259 Password => $password,
261 } ) or die "error starting USPS WebTools\n";
263 my($zip5, $zip4) = split('-',$location->{'zip'});
266 FirmName => $location->{company},
267 Address2 => $location->{address1},
268 Address1 => $location->{address2},
269 City => $location->{city},
270 State => $location->{state},
274 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
277 my $hash = $verifier->verify_address( %usps_args );
279 warn $verifier->response
282 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
283 if $verifier->is_error;
285 my $zip = $hash->{Zip5};
286 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
288 { company => $hash->{FirmName},
289 address1 => $hash->{Address2},
290 address2 => $hash->{Address1},
291 city => $hash->{City},
292 state => $hash->{State},
298 ###### U.S. Census Bureau ######
300 sub standardize_uscensus {
302 my $location = shift;
304 eval "use Geo::USCensus::Geocoding";
307 if ( $location->{country} ne 'US' ) {
309 warn "standardize_uscensus not for use in country ".$location->{country}."\n";
310 $location->{addr_clean} = '';
315 street => $location->{address1},
316 city => $location->{city},
317 state => $location->{state},
318 zip => $location->{zip},
319 debug => ($DEBUG || 0),
322 my $result = Geo::USCensus::Geocoding->query($request);
323 if ( $result->is_match ) {
324 # unfortunately we get the address back as a single line
325 if ($result->address =~ /^(.*), (.*), ([A-Z]{2}), (\d{5}.*)$/) {
331 address2 => uc($location->{address2}),
332 latitude => $result->latitude,
333 longitude => $result->longitude,
334 censustract => $result->censustract,
337 die "can't parse address '".$result->address."'";
340 warn Dumper($result) if $DEBUG;
341 die $result->error_message;
345 ####### EZLOCATE (obsolete) #######
347 my %ezlocate_error = ( # USA_Geo_002 documentation
348 10 => 'State not found',
349 11 => 'City not found',
350 12 => 'Invalid street address',
351 14 => 'Street name not found',
352 15 => 'Address range does not exist',
353 16 => 'Ambiguous address',
354 17 => 'Intersection not found', #unused?
357 sub standardize_ezlocate {
359 my $location = shift;
361 #if ( $location->{country} eq 'US' ) {
362 # $class = 'USA_Geo_004Tool';
364 #elsif ( $location->{country} eq 'CA' ) {
365 # $class = 'CAN_Geo_001Tool';
367 #else { # shouldn't be a fatal error, just pass through unverified address
368 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
369 # "' not available\n";
372 #my $path = $conf->config('teleatlas-path') || '';
373 #local @INC = (@INC, $path);
376 # die "Loading $class failed:\n$@".
377 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
380 $class = 'Geo::EZLocate'; # use our own library
381 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
384 my $userid = $conf->config('ezlocate-userid')
385 or die "no ezlocate-userid configured\n";
386 my $password = $conf->config('ezlocate-password')
387 or die "no ezlocate-password configured\n";
389 my $tool = $class->new($userid, $password);
390 my $match = $tool->findAddress(
391 $location->{address1},
394 $location->{zip}, #12345-6789 format is allowed
396 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
397 # error handling - B codes indicate success
398 die $ezlocate_error{$match->{MAT_STAT}}."\n"
399 unless $match->{MAT_STAT} =~ /^B\d$/;
402 address1 => $match->{MAT_ADDR},
403 address2 => $location->{address2},
404 city => $match->{MAT_CITY},
405 state => $match->{MAT_ST},
406 country => $location->{country},
407 zip => $match->{MAT_ZIP},
408 latitude => $match->{MAT_LAT},
409 longitude => $match->{MAT_LON},
410 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
411 sprintf('%07.2f',$match->{CEN_TRCT}),
414 if ( $match->{STD_ADDR} ) {
415 # then they have a postal standardized address for us
417 address1 => $match->{STD_ADDR},
418 address2 => $location->{address2},
419 city => $match->{STD_CITY},
420 state => $match->{STD_ST},
421 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
428 sub _tomtom_query { # helper method for the below
430 my $result = Geo::TomTom::Geocoding->query(%args);
431 die "TomTom geocoding error: ".$result->message."\n"
432 unless ( $result->is_success );
433 my ($match) = $result->locations;
434 my $type = $match->{type};
435 # match levels below "intersection" should not be considered clean
436 my $clean = ($type eq 'addresspoint' ||
439 $type eq 'intersection'
441 warn "tomtom returned $type match\n" if $DEBUG;
442 warn Dumper($match) if $DEBUG > 1;
446 sub standardize_tomtom {
447 # post-2013 TomTom API
448 # much better, but incompatible with ezlocate
450 my $location = shift;
451 eval "use Geo::TomTom::Geocoding; use Geo::StreetAddress::US";
454 my $key = $conf->config('tomtom-userid')
455 or die "no tomtom-userid configured\n";
457 my $country = code2country($location->{country});
458 my ($address1, $address2) = ($location->{address1}, $location->{address2});
462 $address1 =~ s/^\s+//;
463 $address1 =~ s/\s+$//;
464 $address2 =~ s/^\s+//;
465 $address2 =~ s/\s+$//;
467 # try to fix some cases of the address fields being switched
468 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
469 $address2 = $address1;
470 $address1 = $location->{address2};
472 # parse sublocation part (unit/suite/apartment...) and clean up
473 # non-sublocation address2
474 ($subloc, $address2) =
475 subloc_address2($address1, $address2, $location->{country});
476 # ask TomTom to standardize address1:
480 L => $location->{city},
481 AA => $location->{state},
482 PC => $location->{zip},
483 CC => country2code($country, LOCALE_CODE_ALPHA_3),
486 my ($match, $clean) = _tomtom_query(%args);
488 if (!$match or !$clean) {
489 # Then try cleaning up the input; TomTom is picky about junk in the
490 # address. Any of these can still be a clean match.
491 my $h = Geo::StreetAddress::US->parse_location($address1);
492 # First conservatively:
493 if ( $h->{sec_unit_type} ) {
494 my $strip = '\s+' . $h->{sec_unit_type};
495 $strip .= '\s*' . $h->{sec_unit_num} if $h->{sec_unit_num};
497 $args{T} =~ s/$strip//;
498 ($match, $clean) = _tomtom_query(%args);
500 if ( !$match or !$clean ) {
501 # Then more aggressively:
502 $args{T} = uc( join(' ', @$h{'number', 'street', 'type'}) );
503 ($match, $clean) = _tomtom_query(%args);
507 if ( !$match or !$clean ) { # partial matches are not useful
508 die "Address not found\n";
511 if ( defined $match->{censusTract} ) {
512 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
513 join('.', $match->{censusTract} =~ /(....)(..)/);
516 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
517 $address1 .= $match->{street} if $match->{street};
518 $address1 .= ' '.$subloc if $subloc;
519 $address1 = uc($address1); # USPS standards
522 address1 => $address1,
523 address2 => $address2,
524 city => uc($match->{city}),
525 state => uc($location->{state}),
526 country => uc($location->{country}),
527 zip => ($match->{standardPostalCode} || $match->{postcode}),
528 latitude => $match->{latitude},
529 longitude => $match->{longitude},
530 censustract => $tract,
531 addr_clean => $clean,
535 =iten subloc_address2 ADDRESS1, ADDRESS2, COUNTRY
537 Given 'address1' and 'address2' strings, extract the sublocation part
538 (from either one) and return it. If the sublocation was found in ADDRESS1,
539 also return ADDRESS2 (cleaned up for postal standards) as it's assumed to
540 contain something relevant.
545 # Postal Addressing Standards, Appendix C
546 # (plus correction of "hanger" to "hangar")
574 # Canada Post Addressing Guidelines 4.3
585 sub subloc_address2 {
586 # Some things seen in the address2 field:
588 # The complete address (with address1 containing part of the company name,
589 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
592 # try to parse sublocation parts from address1; if they are present we'll
593 # append them back to address1 after standardizing
595 my ($addr1, $addr2, $country) = map uc, @_;
596 my $dict = $subloc_forms{$country} or return('', $addr2);
598 my $found_in = 0; # which address is the sublocation
601 # patterns to try to parse
603 "$addr1 Nullcity, CA"
605 $h = Geo::StreetAddress::US->parse_location($addr1);
606 last if exists($h->{sec_unit_type});
608 if (exists($h->{sec_unit_type})) {
615 "$addr1, $addr2 Nullcity, CA"
617 $h = Geo::StreetAddress::US->parse_location("$addr1, $addr2");
618 last if exists($h->{sec_unit_type});
620 if (exists($h->{sec_unit_type})) {
625 $subloc = $h->{sec_unit_type};
626 # special case: do not combine P.O. box sublocs with address1
627 if ( $h->{sec_unit_type} =~ /^P *O *BOX/i ) {
628 if ( $found_in == 2 ) {
629 $addr2 = "PO BOX ".$h->{sec_unit_num};
630 } # else it's in addr1, and leave it alone
632 } elsif ( exists($dict->{$subloc}) ) {
633 # substitute the official abbreviation
634 $subloc = $dict->{$subloc};
636 $subloc .= ' ' . $h->{sec_unit_num} if length($h->{sec_unit_num});
637 } # otherwise $subloc = ''
639 if ( $found_in == 2 ) {
640 # address2 should be fully combined into address1
641 return ($subloc, '');
643 # else address2 is not the canonical sublocation, but do our best to
647 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
649 # remove all punctuation and spaces
650 foreach my $w (split(/\W+/, $addr2)) {
651 if ( exists($dict->{$w}) ) {
652 push @words, $dict->{$w};
656 my $result = join(' ', @words);
657 # correct spacing of pound sign + number
658 $result =~ s/NUMBER(\d)/# $1/;
659 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;
662 $addr2 = '' if $addr2 eq $subloc; # if it was entered redundantly
666 sub standardize_melissa {
668 my $location = shift;
671 eval "use Geo::Melissa::WebSmart";
674 my $id = $conf->config('melissa-userid')
675 or die "no melissa-userid configured\n";
676 my $geocode = $conf->exists('melissa-enable_geocoding') ? 1 : 0;
680 a1 => $location->{address1},
681 a2 => $location->{address2},
682 city => $location->{city},
683 state => $location->{state},
684 ctry => $location->{country},
685 zip => $location->{zip},
688 my $result = Geo::Melissa::WebSmart->query($request);
689 if ( $result->code =~ /AS01/ ) { # always present on success
690 my $addr = $result->address;
691 warn Dumper $addr if $DEBUG > 1;
693 address1 => $addr->{Address1},
694 address2 => $addr->{Address2},
695 city => $addr->{City}->{Name},
696 state => $addr->{State}->{Abbreviation},
697 country => $addr->{Country}->{Abbreviation},
699 latitude => $addr->{Latitude},
700 longitude => $addr->{Longitude},
703 if ( $addr->{Census}->{Tract} ) {
704 my $censustract = $addr->{County}->{Fips} . $addr->{Census}->{Tract};
705 # insert decimal point two digits from the end
706 $censustract =~ s/(\d\d)$/\.$1/;
707 $out->{censustract} = $censustract;
708 $out->{censusyear} = $conf->config('census_year');
710 # we could do a lot more nuanced reporting of the warning/status codes,
711 # but the UI doesn't support that yet.
714 die $result->status_message;