X-Git-Url: http://git.freeside.biz/gitweb/?a=blobdiff_plain;f=rt%2Flib%2FRT%2FI18N.pm;h=11cd5f1201c060496e909c0f7d800e7c49ba4024;hb=ed1f84b4e8f626245995ecda5afcf83092c153b2;hp=cadf7cc7c4a83dbbac4aa68d17468f41e59f8067;hpb=85e677b86fc37c54e6de2b06340351a28f5a5916;p=freeside.git diff --git a/rt/lib/RT/I18N.pm b/rt/lib/RT/I18N.pm index cadf7cc7c..11cd5f120 100644 --- a/rt/lib/RT/I18N.pm +++ b/rt/lib/RT/I18N.pm @@ -2,7 +2,7 @@ # # COPYRIGHT: # -# This software is Copyright (c) 1996-2012 Best Practical Solutions, LLC +# This software is Copyright (c) 1996-2014 Best Practical Solutions, LLC # # # (Except where explicitly superseded by other copyright notices) @@ -62,7 +62,6 @@ use Locale::Maketext 1.04; use Locale::Maketext::Lexicon 0.25; use base 'Locale::Maketext::Fuzzy'; -use Encode; use MIME::Entity; use MIME::Head; use File::Glob; @@ -209,33 +208,43 @@ sub SetMIMEEntityToEncoding { # do the same for parts first of all SetMIMEEntityToEncoding( $_, $enc, $preserve_words ) foreach $entity->parts; - my $charset = _FindOrGuessCharset($entity) or return; + my $head = $entity->head; + + my $charset = _FindOrGuessCharset($entity); + if ( $charset ) { + unless( Encode::find_encoding($charset) ) { + $RT::Logger->warning("Encoding '$charset' is not supported"); + $charset = undef; + } + } + unless ( $charset ) { + $head->replace( "X-RT-Original-Content-Type" => $head->mime_attr('Content-Type') ); + $head->mime_attr('Content-Type' => 'application/octet-stream'); + return; + } SetMIMEHeadToEncoding( - $entity->head, + $head, _FindOrGuessCharset($entity, 1) => $enc, $preserve_words ); - my $head = $entity->head; - # If this is a textual entity, we'd need to preserve its original encoding - $head->replace( "X-RT-Original-Encoding" => $charset ) + $head->replace( "X-RT-Original-Encoding" => Encode::encode( "UTF-8", $charset ) ) if $head->mime_attr('content-type.charset') or IsTextualContentType($head->mime_type); return unless IsTextualContentType($head->mime_type); my $body = $entity->bodyhandle; - if ( $enc ne $charset && $body ) { + if ( $body && ($enc ne $charset || $enc =~ /^utf-?8(?:-strict)?$/i) ) { my $string = $body->as_string or return; + RT::Util::assert_bytes($string); $RT::Logger->debug( "Converting '$charset' to '$enc' for " . $head->mime_type . " - " - . ( $head->get('subject') || 'Subjectless message' ) ); + . ( Encode::decode("UTF-8",$head->get('subject')) || 'Subjectless message' ) ); - # NOTE:: see the comments at the end of the sub. - Encode::_utf8_off($string); Encode::from_to( $string, $charset => $enc ); my $new_body = MIME::Body::InCore->new($string); @@ -248,30 +257,11 @@ sub SetMIMEEntityToEncoding { } } -# NOTES: Why Encode::_utf8_off before Encode::from_to -# -# All the strings in RT are utf-8 now. Quotes from Encode POD: -# -# [$length =] from_to($octets, FROM_ENC, TO_ENC [, CHECK]) -# ... The data in $octets must be encoded as octets and not as -# characters in Perl's internal format. ... -# -# Not turning off the UTF-8 flag in the string will prevent the string -# from conversion. - - - =head2 DecodeMIMEWordsToUTF8 $raw An utility method which mimics MIME::Words::decode_mimewords, but only -limited functionality. This function returns an utf-8 string. - -It returns the decoded string, or the original string if it's not -encoded. Since the subroutine converts specified string into utf-8 -charset, it should not alter a subject written in English. - -Why not use MIME::Words directly? Because it fails in RT when I -tried. Maybe it's ok now. +limited functionality. Despite its name, this function returns the +bytes of the string, in UTF-8. =cut @@ -293,18 +283,30 @@ sub DecodeMIMEWordsToEncoding { $str = MIME::Field::ParamVal->parse($str)->stringify; } + # Pre-parse by removing all whitespace between encoded words + my $encoded_word = qr/ + =\? # =? + ([^?]+?) # charset + (?:\*[^?]+)? # optional '*language' + \? # ? + ([QqBb]) # encoding + \? # ? + ([^?]+) # encoded string + \?= # ?= + /x; + $str =~ s/($encoded_word)\s+(?=$encoded_word)/$1/g; + + # Also merge quoted-printable sections together, in case multiple + # octets of a single encoded character were split between chunks. + # Though not valid according to RFC 2047, this has been seen in the + # wild. + 1 while $str =~ s/(=\?[^?]+\?[Qq]\?)([^?]+)\?=\1([^?]+)\?=/$1$2$3?=/i; + # XXX TODO: use decode('MIME-Header', ...) and Encode::Alias to replace our # custom MIME word decoding and charset canonicalization. We can't do this # until we parse before decode, instead of the other way around. my @list = $str =~ m/(.*?) # prefix - =\? # =? - ([^?]+?) # charset - (?:\*[^?]+)? # optional '*language' - \? # ? - ([QqBb]) # encoding - \? # ? - ([^?]+) # encoded string - \?= # ?= + $encoded_word ([^=]*) # trailing /xgcs; @@ -335,8 +337,15 @@ sub DecodeMIMEWordsToEncoding { } # now we have got a decoded subject, try to convert into the encoding - unless ( $charset eq $to_charset ) { - Encode::from_to( $enc_str, $charset, $to_charset ); + if ( $charset ne $to_charset || $charset =~ /^utf-?8(?:-strict)?$/i ) { + if ( Encode::find_encoding($charset) ) { + Encode::from_to( $enc_str, $charset, $to_charset ); + } else { + $RT::Logger->warning("Charset '$charset' is not supported"); + $enc_str =~ s/[^[:print:]]/\357\277\275/g; + Encode::from_to( $enc_str, 'UTF-8', $to_charset ) + unless $to_charset eq 'utf-8'; + } } # XXX TODO: RT doesn't currently do the right thing with mime-encoded headers @@ -358,6 +367,7 @@ sub DecodeMIMEWordsToEncoding { $enc_str = qq{"$enc_str"} if $enc_str =~ /[,;]/ and $enc_str !~ /^".*"$/ + and $prefix !~ /"$/ and $trailing !~ /^"/ and (!$field || $field =~ /^(?:To$|From$|B?Cc$|Content-)/i); $str .= $prefix . $enc_str . $trailing; @@ -532,13 +542,13 @@ sub SetMIMEHeadToEncoding { return if $charset eq $enc and $preserve_words; + RT::Util::assert_bytes( $head->as_string ); foreach my $tag ( $head->tags ) { next unless $tag; # seen in wild: headers with no name my @values = $head->get_all($tag); $head->delete($tag); foreach my $value (@values) { - if ( $charset ne $enc ) { - Encode::_utf8_off($value); + if ( $charset ne $enc || $enc =~ /^utf-?8(?:-strict)?$/i ) { Encode::from_to( $value, $charset => $enc ); } $value = DecodeMIMEWordsToEncoding( $value, $enc, $tag )