4 use base qw( Exporter );
5 use vars qw( $DEBUG @EXPORT_OK $conf );
8 use HTTP::Request::Common qw( GET POST );
15 FS::UID->install_callback( sub {
21 @EXPORT_OK = qw( get_district );
25 FS::Misc::Geo - routines to fetch geographic information
31 =item get_censustract LOCATION YEAR
33 Given a location hash (see L<FS::location_Mixin>) and a census map year,
34 returns a census tract code (consisting of state, county, and tract
35 codes) or an error message.
39 sub get_censustract_ffiec {
44 warn Dumper($location, $year) if $DEBUG;
46 my $url = 'http://www.ffiec.gov/Geocode/default.aspx';
51 my $ua = new LWP::UserAgent;
52 my $res = $ua->request( GET( $url ) );
57 unless ($res->code eq '200') {
59 $error = $res->message;
63 my $content = $res->content;
64 my $p = new HTML::TokeParser \$content;
67 while (my $token = $p->get_tag('input') ) {
68 if ($token->[1]->{name} eq '__VIEWSTATE') {
69 $viewstate = $token->[1]->{value};
71 if ($token->[1]->{name} eq '__EVENTVALIDATION') {
72 $eventvalidation = $token->[1]->{value};
74 last if $viewstate && $eventvalidation;
77 unless ($viewstate && $eventvalidation ) {
79 $error = "either no __VIEWSTATE or __EVENTVALIDATION found";
83 my($zip5, $zip4) = split('-',$location->{zip});
87 __VIEWSTATE => $viewstate,
88 __EVENTVALIDATION => $eventvalidation,
90 txtAddress => $location->{address1},
91 txtCity => $location->{city},
92 ddlbState => $location->{state},
94 btnSearch => 'Search',
96 warn join("\n", @ffiec_args )
99 push @{ $ua->requests_redirectable }, 'POST';
100 $res = $ua->request( POST( $url, \@ffiec_args ) );
104 unless ($res->code eq '200') {
106 $error = $res->message;
110 my @id = qw( MSACode StateCode CountyCode TractCode );
111 $content = $res->content;
112 warn $res->content if $DEBUG > 2;
113 $p = new HTML::TokeParser \$content;
114 my $prefix = 'UcGeoResult11_lb';
116 sub { my $t=shift; scalar( grep { lc($t) eq lc("$prefix$_")} @id ) };
118 while (my $token = $p->get_tag('span') ) {
119 next unless ( $token->[1]->{id} && &$compare( $token->[1]->{id} ) );
120 $token->[1]->{id} =~ /^$prefix(\w+)$/;
121 $return->{lc($1)} = $p->get_trimmed_text("/span");
124 unless ( $return->{tractcode} ) {
125 warn "$error: $content ". Dumper($return) if $DEBUG;
126 $error = "No census tract found";
128 $return->{tractcode} .= ' '
129 unless $error || $JSON::VERSION >= 2; #broken JSON 1 workaround
131 } #unless ($res->code eq '200')
133 } #unless ($viewstate)
135 } #unless ($res->code eq '200')
137 die "FFIEC Geocoding error: $error\n" if $error;
139 $return->{'statecode'} . $return->{'countycode'} . $return->{'tractcode'};
142 #sub get_district_methods {
144 # 'wa_sales' => 'Washington sales tax',
147 =item get_district LOCATION METHOD
149 For the location hash in LOCATION, using lookup method METHOD, fetch
150 tax district information. Currently the only available method is
151 'wa_sales' (the Washington Department of Revenue sales tax lookup).
153 Returns a hash reference containing the following fields:
158 - exempt_amount (currently zero)
159 - city, county, state, country (from
161 The intent is that you can assign this to an L<FS::cust_main_county>
162 object and insert it if there's not yet a tax rate defined for that
165 get_district will die on error.
173 my $location = shift;
174 my $method = shift or return '';
175 warn Dumper($location, $method) if $DEBUG;
180 my $location = shift;
182 return '' if $location->{state} ne 'WA';
184 my $return = { %$location };
185 $return->{'exempt_amount'} = 0.00;
187 my $url = 'http://webgis2.dor.wa.gov/TaxRateLookup_AGS/TaxReport.aspx';
188 my $ua = new LWP::UserAgent;
190 my $delim = '<|>'; # yes, <|>
191 my $year = (localtime)[5] + 1900;
192 my $month = (localtime)[4] + 1;
193 my @zip = split('-', $location->{zip});
196 'TaxType=S', #sales; 'P' = property
197 'Src=0', #does something complicated
199 'Addr='.uri_escape($location->{address1}),
200 'City='.uri_escape($location->{city}),
202 'Zip1='.($zip[1] || ''), #optional
209 my $query_string = join($delim, @args );
210 $url .= "?$query_string";
211 warn "\nrequest: $url\n\n" if $DEBUG > 1;
213 my $res = $ua->request( GET( "$url?$query_string" ) );
218 if ($res->code ne '200') {
219 $error = $res->message;
222 my $content = $res->content;
223 my $p = new HTML::TokeParser \$content;
225 while ( my $t = $p->get_tag('script') ) {
226 my $u = $p->get_token; #either enclosed text or the </script> tag
227 if ( $u->[0] eq 'T' and $u->[1] =~ /tblSales/ ) {
232 if ( $js ) { #found it
233 # strip down to the quoted string, which contains escaped single quotes.
234 $js =~ s/.*\('tblSales'\);c.innerHTML='//s;
235 $js =~ s/(?<!\\)'.*//s; # (?<!\\) means "not preceded by a backslash"
236 warn "\n\n innerHTML:\n$js\n\n" if $DEBUG > 2;
238 $p = new HTML::TokeParser \$js;
239 TD: while ( my $td = $p->get_tag('td') ) {
240 while ( my $u = $p->get_token ) {
241 next TD if $u->[0] eq 'E' and $u->[1] eq 'td';
242 next if $u->[0] ne 'T'; # skip non-text
245 if ( lc($text) eq 'location code' ) {
246 $p->get_tag('td'); # skip to the next column
248 $u = $p->get_token until $u->[0] eq 'T'; # and then skip non-text
249 $return->{'district'} = $u->[1];
251 elsif ( lc($text) eq 'total tax rate' ) {
254 $u = $p->get_token until $u->[0] eq 'T';
255 $return->{'tax'} = $u->[1];
261 if ( $return->{'district'} =~ /^\d+$/ and $return->{'tax'} =~ /^.\d+$/ ) {
262 $return->{'tax'} *= 100; #percentage
263 warn Dumper($return) if $DEBUG > 1;
267 $error = 'district code/tax rate not found';
271 $error = "failed to parse document";
274 die "WA tax district lookup error: $error";
277 sub standardize_usps {
280 eval "use Business::US::USPS::WebTools::AddressStandardization";
283 my $location = shift;
284 if ( $location->{country} ne 'US' ) {
286 warn "standardize_usps not for use in country ".$location->{country}."\n";
287 $location->{addr_clean} = '';
290 my $userid = $conf->config('usps_webtools-userid');
291 my $password = $conf->config('usps_webtools-password');
292 my $verifier = Business::US::USPS::WebTools::AddressStandardization->new( {
294 Password => $password,
296 } ) or die "error starting USPS WebTools\n";
298 my($zip5, $zip4) = split('-',$location->{'zip'});
301 FirmName => $location->{company},
302 Address2 => $location->{address1},
303 Address1 => $location->{address2},
304 City => $location->{city},
305 State => $location->{state},
309 warn join('', map "$_: $usps_args{$_}\n", keys %usps_args )
312 my $hash = $verifier->verify_address( %usps_args );
314 warn $verifier->response
317 die "USPS WebTools error: ".$verifier->{error}{description} ."\n"
318 if $verifier->is_error;
320 my $zip = $hash->{Zip5};
321 $zip .= '-' . $hash->{Zip4} if $hash->{Zip4} =~ /\d/;
323 { company => $hash->{FirmName},
324 address1 => $hash->{Address2},
325 address2 => $hash->{Address1},
326 city => $hash->{City},
327 state => $hash->{State},
333 my %ezlocate_error = ( # USA_Geo_002 documentation
334 10 => 'State not found',
335 11 => 'City not found',
336 12 => 'Invalid street address',
337 14 => 'Street name not found',
338 15 => 'Address range does not exist',
339 16 => 'Ambiguous address',
340 17 => 'Intersection not found', #unused?
343 sub standardize_ezlocate {
345 my $location = shift;
347 #if ( $location->{country} eq 'US' ) {
348 # $class = 'USA_Geo_004Tool';
350 #elsif ( $location->{country} eq 'CA' ) {
351 # $class = 'CAN_Geo_001Tool';
353 #else { # shouldn't be a fatal error, just pass through unverified address
354 # warn "standardize_teleatlas: address lookup in '".$location->{country}.
355 # "' not available\n";
358 #my $path = $conf->config('teleatlas-path') || '';
359 #local @INC = (@INC, $path);
362 # die "Loading $class failed:\n$@".
363 # "\nMake sure the TeleAtlas Perl SDK is installed correctly.\n";
366 $class = 'Geo::EZLocate'; # use our own library
367 eval "use $class 0.02"; #Geo::EZLocate 0.02 for error handling
370 my $userid = $conf->config('ezlocate-userid')
371 or die "no ezlocate-userid configured\n";
372 my $password = $conf->config('ezlocate-password')
373 or die "no ezlocate-password configured\n";
375 my $tool = $class->new($userid, $password);
376 my $match = $tool->findAddress(
377 $location->{address1},
380 $location->{zip}, #12345-6789 format is allowed
382 warn "ezlocate returned match:\n".Dumper($match) if $DEBUG > 1;
383 # error handling - B codes indicate success
384 die $ezlocate_error{$match->{MAT_STAT}}."\n"
385 unless $match->{MAT_STAT} =~ /^B\d$/;
388 address1 => $match->{MAT_ADDR},
389 address2 => $location->{address2},
390 city => $match->{MAT_CITY},
391 state => $match->{MAT_ST},
392 country => $location->{country},
393 zip => $match->{MAT_ZIP},
394 latitude => $match->{MAT_LAT},
395 longitude => $match->{MAT_LON},
396 censustract => $match->{FIPS_ST}.$match->{FIPS_CTY}.
397 sprintf('%07.2f',$match->{CEN_TRCT}),
400 if ( $match->{STD_ADDR} ) {
401 # then they have a postal standardized address for us
403 address1 => $match->{STD_ADDR},
404 address2 => $location->{address2},
405 city => $match->{STD_CITY},
406 state => $match->{STD_ST},
407 zip => $match->{STD_ZIP}.'-'.$match->{STD_P4},
414 sub standardize_tomtom {
415 # post-2013 TomTom API
416 # much better, but incompatible with ezlocate
418 my $location = shift;
419 my $class = 'Geo::TomTom::Geocoding';
423 my $key = $conf->config('tomtom-userid')
424 or die "no tomtom-userid configured\n";
426 my $country = code2country($location->{country});
427 my ($address1, $address2) = ($location->{address1}, $location->{address2});
428 # try to fix some cases of the address fields being switched
429 if ( $address2 =~ /^\d/ and $address1 !~ /^\d/ ) {
430 $address2 = $address1;
431 $address1 = $location->{address2};
433 my $result = $class->query(
436 L => $location->{city},
437 AA => $location->{state},
438 PC => $location->{zip},
439 CC => country2code($country, LOCALE_CODE_ALPHA_3),
441 unless ( $result->is_success ) {
442 die "TomTom geocoding error: ".$result->message."\n";
444 my ($match) = $result->locations;
446 die "Location not found.\n";
448 my $type = $match->{type};
449 warn "tomtom returned $type match\n" if $DEBUG;
450 warn Dumper($match) if $DEBUG > 1;
452 if ( defined $match->{censusTract} ) {
453 $tract = $match->{censusStateCode}. $match->{censusFipsCountyCode}.
454 join('.', $match->{censusTract} =~ /(....)(..)/);
456 # match levels below "intersection" should not be considered clean
457 my $clean = ($type eq 'addresspoint' ||
460 $type eq 'intersection'
463 $address2 = normalize_address2($address2, $location->{country});
466 $address1 = $match->{houseNumber} . ' ' if length($match->{houseNumber});
467 $address1 .= $match->{street} if $match->{street};
470 address1 => $address1,
471 address2 => $address2,
472 city => $match->{city},
473 state => $location->{state}, # this will never change
474 country => $location->{country}, # ditto
475 zip => ($match->{standardPostalCode} || $match->{postcode}),
476 latitude => $match->{latitude},
477 longitude => $match->{longitude},
478 censustract => $tract,
479 addr_clean => $clean,
483 =iten normalize_address2 STRING, COUNTRY
485 Given an 'address2' STRING, normalize it for COUNTRY postal standards.
486 Currently only works for US and CA.
490 # XXX really ought to be a separate module
491 my %address2_forms = (
492 # Postal Addressing Standards, Appendix C
493 # (plus correction of "hanger" to "hangar")
521 # Canada Post Addressing Guidelines 4.3
532 sub normalize_address2 {
533 # Some things seen in the address2 field:
535 # The complete address (with address1 containing part of the company name,
536 # or an ATTN or DBA line, or P.O. Box, or department name, or building/suite
538 my ($addr2, $country) = @_;
540 if ( exists($address2_forms{$country}) ) {
541 my $dict = $address2_forms{$country};
543 $addr2 =~ s/#\s*(\d)/NUMBER$1/; # /g?
545 # remove all punctuation and spaces
546 foreach my $w (split(/\W+/, $addr2)) {
547 if ( exists($dict->{$w}) ) {
548 push @words, $dict->{$w};
553 my $result = join(' ', @words);
554 # correct spacing of pound sign + number
555 $result =~ s/NUMBER(\d)/# $1/;
556 warn "normalizing '$addr2' to '$result'\n" if $DEBUG > 1;