diff -Nru libencode-perl-2.82/Changes libencode-perl-2.84/Changes --- libencode-perl-2.82/Changes 2016-02-06 20:17:29.000000000 +0000 +++ libencode-perl-2.84/Changes 2016-04-11 07:17:04.000000000 +0000 @@ -1,8 +1,30 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.82 2016/02/06 20:17:24 dankogai Exp dankogai $ +# $Id: Changes,v 2.84 2016/04/11 07:17:02 dankogai Exp dankogai $ # -$Revision: 2.82 $ $Date: 2016/02/06 20:17:24 $ +$Revision: 2.84 $ $Date: 2016/04/11 07:17:02 $ +! lib/Encode/MIME/Header.pm + Pulled: Encode::MIME::Header: + Update description that this module is only for unstructured header + https://github.com/dankogai/p5-encode/pull/53 +! lib/Encode/MIME/Header.pm t/mime-header.t + Pulled: Encode::MIME::Header: Fix valid_q_chars, '-' needs to be escaped + https://github.com/dankogai/p5-encode/pull/52 + +2.83 2016/03/24 07:49:54 +! lib/Encode/MIME/Header.pm t/mime-header.t + Both decoder and encoder are rewritten by Pali Rohár. + Encoder should be now fully compliant of RFC 2047. + Decoder is less strict to be able to decode + strings generated by old versions of this module. + https://github.com/dankogai/p5-encode/pull/51 +! t/mime-header.t + Add more test vectors from RFC2047, pp.11-12 +! lib/Encode/Supported.pod + merge: Autrijus -> Audrey + https://github.com/dankogai/p5-encode/pull/50 + +2.82 2016/02/06 20:17:24 ! lib/Encode/MIME/Header.pm lib/Encode/MIME/Header/ISO_2022_JP.pm t/mime-header.t diff -Nru libencode-perl-2.82/debian/changelog libencode-perl-2.84/debian/changelog --- libencode-perl-2.82/debian/changelog 2016-02-07 14:13:56.000000000 +0000 +++ libencode-perl-2.84/debian/changelog 2016-04-11 17:13:24.000000000 +0000 @@ -1,3 +1,19 @@ +libencode-perl (2.84-1) unstable; urgency=medium + + * Team upload. + * Imported upstream version 2.84. + + -- Niko Tyni Mon, 11 Apr 2016 20:09:46 +0300 + +libencode-perl (2.83-1) unstable; urgency=medium + + * Team upload. + * Import upstream version 2.83. + + rewritten RFC 2047 implementation fixes whitespace issues + (Closes: #819155) + + -- Niko Tyni Sat, 26 Mar 2016 14:15:43 +0200 + libencode-perl (2.82-1) unstable; urgency=medium [ Salvatore Bonaccorso ] diff -Nru libencode-perl-2.82/Encode.pm libencode-perl-2.84/Encode.pm --- libencode-perl-2.82/Encode.pm 2016-02-06 20:17:29.000000000 +0000 +++ libencode-perl-2.84/Encode.pm 2016-04-11 07:17:04.000000000 +0000 @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.82 2016/02/06 20:16:42 dankogai Exp $ +# $Id: Encode.pm,v 2.84 2016/04/11 07:16:52 dankogai Exp $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.82 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.84 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); diff -Nru libencode-perl-2.82/lib/Encode/MIME/Header/ISO_2022_JP.pm libencode-perl-2.84/lib/Encode/MIME/Header/ISO_2022_JP.pm --- libencode-perl-2.82/lib/Encode/MIME/Header/ISO_2022_JP.pm 2016-02-06 20:17:34.000000000 +0000 +++ libencode-perl-2.84/lib/Encode/MIME/Header/ISO_2022_JP.pm 2016-04-11 07:17:06.000000000 +0000 @@ -6,7 +6,7 @@ use parent qw(Encode::MIME::Header); $Encode::Encoding{'MIME-Header-ISO_2022_JP'} = - bless { encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => + bless { decode_b => '1', decode_q => '1', encode => 'B', bpl => 76, Name => 'MIME-Header-ISO_2022_JP' } => __PACKAGE__; use constant HEAD => '=?ISO-2022-JP?B?'; @@ -14,7 +14,7 @@ use Encode::CJKConstants qw(%RE); -our $VERSION = do { my @r = ( q$Revision: 1.6 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 1.7 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; # I owe the below codes totally to # Jcode by Dan Kogai & http://www.din.or.jp/~ohzaki/perl.htm#JP_Base64 diff -Nru libencode-perl-2.82/lib/Encode/MIME/Header.pm libencode-perl-2.84/lib/Encode/MIME/Header.pm --- libencode-perl-2.82/lib/Encode/MIME/Header.pm 2016-02-06 20:17:33.000000000 +0000 +++ libencode-perl-2.84/lib/Encode/MIME/Header.pm 2016-04-11 07:17:06.000000000 +0000 @@ -3,7 +3,7 @@ use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.21 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -26,7 +26,7 @@ $Encode::Encoding{'MIME-Q'} = bless { %seed, - decode_q => 1, + decode_b => 0, encode => 'Q', Name => 'MIME-Q', } => __PACKAGE__; @@ -36,47 +36,74 @@ sub needs_lines { 1 } sub perlio_ok { 0 } +# RFC 2047 and RFC 2231 grammar +my $re_charset = qr/[-0-9A-Za-z_]+/; +my $re_language = qr/[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*/; +my $re_encoding = qr/[QqBb]/; +my $re_encoded_text = qr/[^\?\s]*/; +my $re_encoded_word = qr/=\?$re_charset(?:\*$re_language)?\?$re_encoding\?$re_encoded_text\?=/; +my $re_capture_encoded_word = qr/=\?($re_charset)((?:\*$re_language)?)\?($re_encoding)\?($re_encoded_text)\?=/; + +our $STRICT_DECODE = 0; + sub decode($$;$) { use utf8; my ( $obj, $str, $chk ) = @_; - # zap spaces between encoded words - $str =~ s/\?=\s+=\?/\?==\?/gos; - + # multi-line header to single line - $str =~ s/(?:\r\n|[\r\n])[ \t]//gos; + $str =~ s/(?:\r\n|[\r\n])([ \t])/$1/gos; - 1 while ( $str =~ - s/(=\?[-0-9A-Za-z_]+\?[Qq]\?)([^?]*?)\?=\1([^?]*?\?=)/$1$2$3/ ) - ; # Concat consecutive QP encoded mime headers - # Fixes breaking inside multi-byte characters - - $str =~ s{ - =\? # begin encoded word - ([-0-9A-Za-z_]+) # charset (encoding) - (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) - \?([QqBb])\? # delimiter - (.*?) # Base64-encodede contents - \?= # end encoded word - }{ - if (uc($2) eq 'B'){ - $obj->{decode_b} or croak qq(MIME "B" unsupported); - decode_b($1, $3, $chk); - } elsif (uc($2) eq 'Q'){ - $obj->{decode_q} or croak qq(MIME "Q" unsupported); - decode_q($1, $3, $chk); - } else { - croak qq(MIME "$2" encoding is nonexistent!); - } - }egox; - $_[1] = $str if $chk; - return $str; + # decode each line separately + my @input = split /(\r\n|\r|\n)/o, $str; + my $output = substr($str, 0, 0); # to propagate taintedness + + while ( @input ) { + + my $line = shift @input; + my $sep = shift @input; + + # in strict mode encoded words must be always separated by spaces or tabs + # except in comments when separator between words and comment round brackets can be omitted + my $re_word_begin = $STRICT_DECODE ? qr/(?:[ \t\n]|\A)\(?/ : qr//; + my $re_word_sep = $STRICT_DECODE ? qr/[ \t]+/ : qr/\s*/; + my $re_word_end = $STRICT_DECODE ? qr/\)?(?:[ \t\n]|\z)/ : qr//; + + # concat consecutive encoded mime words with same charset, language and encoding + # fixes breaking inside multi-byte characters + 1 while $line =~ s/($re_word_begin)$re_capture_encoded_word$re_word_sep=\?\2\3\?\4\?($re_encoded_text)\?=(?=$re_word_end)/$1=\?$2$3\?$4\?$5$6\?=/; + + $line =~ s{($re_word_begin)((?:$re_encoded_word$re_word_sep)*$re_encoded_word)(?=$re_word_end)}{ + my $begin = $1; + my $words = $2; + $words =~ s{$re_capture_encoded_word$re_word_sep?}{ + if (uc($3) eq 'B') { + $obj->{decode_b} or croak qq(MIME "B" unsupported); + decode_b($1, $4, $chk); + } elsif (uc($3) eq 'Q') { + $obj->{decode_q} or croak qq(MIME "Q" unsupported); + decode_q($1, $4, $chk); + } else { + croak qq(MIME "$3" encoding is nonexistent!); + } + }eg; + $begin . $words; + }eg; + + $output .= $line; + $output .= $sep if defined $sep; + + } + + $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok + return $output; } sub decode_b { - my $enc = shift; - my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); - my $db64 = decode_base64(shift); - my $chk = shift; + my ( $enc, $b, $chk ) = @_; + my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); + # MIME::Base64::decode_base64 ignores everything after a '=' padding character + # split string after each sequence of padding characters and decode each substring + my $db64 = join('', map { decode_base64($_) } split /(?<==)(?=[^=])/, $b); return $d->name eq 'utf8' ? Encode::decode_utf8($db64) : $d->decode( $db64, $chk || Encode::FB_PERLQQ ); @@ -92,102 +119,92 @@ : $d->decode( $q, $chk || Encode::FB_PERLQQ ); } -my $especials = - join( '|' => map { quotemeta( chr($_) ) } - unpack( "C*", qq{()<>,;:"'/[]?=} ) ); - -my $re_encoded_word = qr{ - =\? # begin encoded word - (?:[-0-9A-Za-z_]+) # charset (encoding) - (?:\*[A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*)? # language (RFC 2231) - \?(?:[QqBb])\? # delimiter - (?:.*?) # Base64-encodede contents - \?= # end encoded word -}xo; - -my $re_especials = qr{$re_encoded_word|$especials}xo; - -# cf: -# https://rt.cpan.org/Ticket/Display.html?id=88717 -# https://www.ietf.org/rfc/rfc0822.txt -my $re_linear_white_space = qr{(?:[ \t]|\r\n?)}; - sub encode($$;$) { my ( $obj, $str, $chk ) = @_; - my @line = (); - for my $line ( split /\r\n|[\r\n]/o, $str ) { - my ( @word, @subline ); - if ($line =~ /\A([\w\-]+:\s+)(.*)\z/o) { - push @word, $1, $obj->_encode($2); # "X-Header-Name: ..." + $_[1] = '' if $chk; # empty the input string in the stack so perlio is ok + return $obj->_fold_line($obj->_encode_line($str)); +} + +sub _fold_line { + my ( $obj, $line ) = @_; + my $bpl = $obj->{bpl}; + my $output = substr($line, 0, 0); # to propagate taintedness + + while ( length $line ) { + if ( $line =~ s/^(.{0,$bpl})(\s|\z)// ) { + $output .= $1; + $output .= "\r\n" . $2 if length $line; + } elsif ( $line =~ s/(\s)(.*)$// ) { + $output .= $line; + $line = $2; + $output .= "\r\n" . $1 if length $line; } else { - push @word, $obj->_encode($line); # anything else - } - my $subline = ''; - for my $word (@word) { - use bytes (); - if ( bytes::length($subline) + bytes::length($word) > - $obj->{bpl} - 1 ) - { - push @subline, $subline; - $subline = ''; - } - $subline .= ' ' if ($subline =~ /\?=$/ and $word =~ /^=\?/); - $subline .= $word; + $output .= $line; + last; } - length($subline) and push @subline, $subline; - push @line, join( "\n " => grep !/^$/, @subline ); } - $_[1] = '' if $chk; - return (substr($str, 0, 0) . join( "\n", @line )); + + return $output; } use constant HEAD => '=?UTF-8?'; use constant TAIL => '?='; -use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; +use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, B_len => \&_encode_b_len, Q_len => \&_encode_q_len }; -sub _encode { +sub _encode_line { my ( $o, $str ) = @_; my $enc = $o->{encode}; + my $enc_len = $enc . '_len'; my $llen = ( $o->{bpl} - length(HEAD) - 2 - length(TAIL) ); - # to coerce a floating-point arithmetics, the following contains - # .0 in numbers -- dankogai - $llen *= $enc eq 'B' ? 3.0 / 4.0 : 1.0 / 3.0; my @result = (); my $chunk = ''; while ( length( my $chr = substr( $str, 0, 1, '' ) ) ) { - use bytes (); - if ( bytes::length($chunk) + bytes::length($chr) > $llen ) { + if ( SINGLE->{$enc_len}($chunk . $chr) > $llen ) { push @result, SINGLE->{$enc}($chunk); $chunk = ''; } $chunk .= $chr; } length($chunk) and push @result, SINGLE->{$enc}($chunk); - return @result; + return join(' ', @result); } sub _encode_b { HEAD . 'B?' . encode_base64( encode_utf8(shift), '' ) . TAIL; } +sub _encode_b_len { + my ( $chunk ) = @_; + use bytes (); + return bytes::length($chunk) * 4 / 3; +} + +my $valid_q_chars = '0-9A-Za-z !*+\-/'; + sub _encode_q { - my $chunk = shift; + my ( $chunk ) = @_; $chunk = encode_utf8($chunk); - $chunk =~ s{ - ([^0-9A-Za-z]) - }{ - join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) - }egox; + $chunk =~ s{([^$valid_q_chars])}{ + join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) + }egox; + $chunk =~ s/ /_/go; return HEAD . 'Q?' . $chunk . TAIL; } +sub _encode_q_len { + my ( $chunk ) = @_; + use bytes (); + my $valid_count =()= $chunk =~ /[$valid_q_chars]/sgo; + return ( bytes::length($chunk) - $valid_count ) * 3 + $valid_count; +} + 1; __END__ =head1 NAME -Encode::MIME::Header -- MIME 'B' and 'Q' header encoding +Encode::MIME::Header -- MIME 'B' and 'Q' encoding for unstructured header =head1 SYNOPSIS @@ -197,7 +214,8 @@ =head1 ABSTRACT -This module implements RFC 2047 Mime Header Encoding. There are 3 +This module implements RFC 2047 MIME encoding for unstructured header. +It cannot be used for structured headers like From or To. There are 3 variant encoding names; C, C and C. The difference is described below @@ -222,6 +240,25 @@ =head1 BUGS +Before version 2.83 this module had broken both decoder and encoder. +Encoder inserted additional spaces, incorrectly encoded input data +and produced invalid MIME strings. Decoder lot of times discarded +white space characters, incorrectly interpreted data or decoded +Base64 string as Quoted-Printable. + +As of version 2.83 encoder should be fully compliant of RFC 2047. +Due to bugs in previous versions of encoder, decoder is by default in +less strict compatible mode. It should be able to decode strings +encoded by pre 2.83 version of this module. But this default mode is +not correct according to RFC 2047. + +In default mode decoder try to decode every substring which looks like +MIME encoded data. So it means that MIME data does not need to be +separated by white space. To enforce correct strict mode, set package +variable $Encode::MIME::Header::STRICT_DECODE to 1, e.g. by localizing: + +C + It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? and =?ISO-8859-1?= but that makes the implementation too complicated. These days major mail agents all support =?UTF-8? so I think it is diff -Nru libencode-perl-2.82/lib/Encode/Supported.pod libencode-perl-2.84/lib/Encode/Supported.pod --- libencode-perl-2.82/lib/Encode/Supported.pod 2015-09-17 15:58:47.000000000 +0000 +++ libencode-perl-2.84/lib/Encode/Supported.pod 2016-03-11 05:13:47.000000000 +0000 @@ -367,7 +367,7 @@ Not very popular. Needs CNS 11643-1 and -2 which are not available in this module. CNS 11643 is supported (via euc-tw) in Encode::HanExtra. -Autrijus Tang may add support for this encoding in his module in future. +Audrey Tang may add support for this encoding in her module in future. =item Various HP-UX encodings diff -Nru libencode-perl-2.82/META.json libencode-perl-2.84/META.json --- libencode-perl-2.82/META.json 2016-02-06 20:18:01.000000000 +0000 +++ libencode-perl-2.84/META.json 2016-04-11 07:18:17.000000000 +0000 @@ -43,6 +43,6 @@ "url" : "https://github.com/dankogai/p5-encode" } }, - "version" : "2.82", + "version" : "2.84", "x_serialization_backend" : "JSON::PP version 2.27300" } diff -Nru libencode-perl-2.82/META.yml libencode-perl-2.84/META.yml --- libencode-perl-2.82/META.yml 2016-02-06 20:18:01.000000000 +0000 +++ libencode-perl-2.84/META.yml 2016-04-11 07:18:17.000000000 +0000 @@ -22,5 +22,5 @@ parent: '0.221' resources: repository: https://github.com/dankogai/p5-encode -version: '2.82' +version: '2.84' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff -Nru libencode-perl-2.82/t/encoding-locale.t libencode-perl-2.84/t/encoding-locale.t --- libencode-perl-2.82/t/encoding-locale.t 2015-04-02 07:17:14.000000000 +0000 +++ libencode-perl-2.84/t/encoding-locale.t 2016-02-26 04:28:44.000000000 +0000 @@ -14,8 +14,9 @@ my $locale_encoding = encoding::_get_locale_encoding; SKIP: { - is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value') - or skip 'no locale encoding found', 1; + defined $locale_encoding or skip 'no locale encoding found', 3; + + is(ref $locale_encoding, '', '_get_locale_encoding returns a scalar value'); my $enc = find_encoding($locale_encoding); ok(defined $enc, 'encoding returned is supported') diff -Nru libencode-perl-2.82/t/mime-header.t libencode-perl-2.84/t/mime-header.t --- libencode-perl-2.82/t/mime-header.t 2016-02-06 20:17:36.000000000 +0000 +++ libencode-perl-2.84/t/mime-header.t 2016-04-11 07:17:07.000000000 +0000 @@ -1,5 +1,5 @@ # -# $Id: mime-header.t,v 2.10 2016/02/06 20:17:24 dankogai Exp dankogai $ +# $Id: mime-header.t,v 2.12 2016/04/11 07:17:02 dankogai Exp dankogai $ # This script is written in utf8 # BEGIN { @@ -19,130 +19,128 @@ $| = 1; } -no utf8; - use strict; -#use Test::More qw(no_plan); -use Test::More tests => 21; -use_ok("Encode::MIME::Header"); - -my $eheader =<<'EOS'; -From: =?US-ASCII?Q?Keith_Moore?= -To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= -CC: =?ISO-8859-1?Q?Andr=E9?= Pirard -Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -my $dheader=<<"EOS"; -From: Keith Moore -To: Keld J\xF8rn Simonsen -CC: Andr\xE9 Pirard -Subject: If you can read this you understand the example. -EOS - -is(Encode::decode('MIME-Header', $eheader), $dheader, "decode ASCII (RFC2047)"); use utf8; +use charnames ":full"; -my $uheader =<<'EOS'; -From: =?US-ASCII?Q?Keith_Moore?= -To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= -CC: =?ISO-8859-1?Q?Andr=E9?= Pirard -Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -is(Encode::decode('MIME-Header', $uheader), $dheader, "decode UTF-8 (RFC2047)"); - -my $lheader =<<'EOS'; -From: =?US-ASCII*en-US?Q?Keith_Moore?= -To: =?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?= -CC: =?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard -Subject: =?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= - =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= -EOS - -is(Encode::decode('MIME-Header', $lheader), $dheader, "decode language tag (RFC2231)"); - - -$dheader=<<'EOS'; -From: 小飼 弾 -To: dankogai@dan.co.jp (小飼=Kogai, 弾=Dan) -Subject: 漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか? -EOS - -my $bheader =<<'EOS'; -From: =?UTF-8?B?5bCP6aO8IOW8viA8ZGFua29nYWlAZGFuLmNvLmpwPg==?= -To: =?UTF-8?B?ZGFua29nYWlAZGFuLmNvLmpwICjlsI/po7w9S29nYWksIOW8vj1EYW4p?= -Subject: - =?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?= - =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?= - =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?= - =?UTF-8?B?77yf?= -EOS - -my $qheader=<<'EOS'; -From: =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=20=E5=BC=BE=20=3Cdankogai=40?= - =?UTF-8?Q?dan=2Eco=2Ejp=3E?= -To: =?UTF-8?Q?dankogai=40dan=2Eco=2Ejp=20=28?= - =?UTF-8?Q?=E5=B0=8F=E9=A3=BC=3DKogai=2C=20=E5=BC=BE=3DDan?= =?UTF-8?Q?=29?= -Subject: - =?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?= - =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?= - =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?= - =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?= - =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?= - =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C?= - =?UTF-8?Q?=E3=82=8B=E3=81=AE=E3=81=8B=EF=BC=9F?= -EOS - -is(Encode::decode('MIME-Header', $bheader), $dheader, "decode B"); -is(Encode::decode('MIME-Header', $qheader), $dheader, "decode Q"); -is(Encode::encode('MIME-B', $dheader)."\n", $bheader, "encode B"); -is(Encode::encode('MIME-Q', $dheader)."\n", $qheader, "encode Q"); - -$dheader = "What is =?UTF-8?B?w4RwZmVs?= ?"; -$bheader = "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?="; -$qheader = "=?UTF-8?Q?What=20is=20=3D=3FUTF=2D8=3FB=3Fw4R?=" - . "\n " . "=?UTF-8?Q?wZmVs=3F=3D=20=3F?="; -is(Encode::encode('MIME-B', $dheader), $bheader, "Double decode B"); -is(Encode::encode('MIME-Q', $dheader), $qheader, "Double decode Q"); -{ - # From: Dave Evans - # Subject: Bug in Encode::MIME::Header - # Message-Id: <3F43440B.7060606@rudolf.org.uk> - use charnames ":full"; - my $pound_1024 = "\N{POUND SIGN}1024"; - is(Encode::encode('MIME-Q' => $pound_1024), '=?UTF-8?Q?=C2=A31024?=', - 'pound 1024'); +use Test::More tests => 130; +use_ok("Encode::MIME::Header"); + +my @decode_tests = ( + # RFC2047 p.5 + "=?iso-8859-1?q?this=20is=20some=20text?=" => "this is some text", + # RFC2047 p.10 + "=?US-ASCII?Q?Keith_Moore?=" => "Keith Moore", + "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen", + "=?ISO-8859-1?Q?Andr=E9?= Pirard" => "André Pirard", + "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=\r\n =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.", + "=?ISO-8859-1?Q?Olle_J=E4rnefors?=" => "Olle Järnefors", + "=?ISO-8859-1?Q?Patrik_F=E4ltstr=F6m?=" => "Patrik Fältström", + # RFC2047 p.11 + "(=?iso-8859-8?b?7eXs+SDv4SDp7Oj08A==?=)" => "(םולש ןב ילטפנ)", + "(=?ISO-8859-1?Q?a?=)" => "(a)", + "(=?ISO-8859-1?Q?a?= b)" => "(a b)", + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)", + "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => "(ab)", + "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => "(ab)", + # RFC2047 p.12 + "(=?ISO-8859-1?Q?a_b?=)" => '(a b)', + "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => "(a b)", + # RFC2231 p.6 + "=?US-ASCII*EN?Q?Keith_Moore?=" => "Keith Moore", + # others + "=?US-ASCII*en-US?Q?Keith_Moore?=" => "Keith Moore", + "=?ISO-8859-1*da-DK?Q?Keld_J=F8rn_Simonsen?=" => "Keld Jørn Simonsen", + "=?ISO-8859-1*fr-BE?Q?Andr=E9?= Pirard" => "André Pirard", + "=?ISO-8859-1*en?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=" => "If you can read this you understand the example.", + # RT67569 + "foo =?us-ascii?q?bar?=" => "foo bar", + "foo\r\n =?us-ascii?q?bar?=" => "foo bar", + "=?us-ascii?q?foo?= bar" => "foo bar", + "=?us-ascii?q?foo?=\r\n bar" => "foo bar", + "foo bar" => "foo bar", + "foo\r\n bar" => "foo bar", + "=?us-ascii?q?foo?= =?us-ascii?q?bar?=" => "foobar", + "=?us-ascii?q?foo?=\r\n =?us-ascii?q?bar?=" => "foobar", + "=?us-ascii?q?foo bar?=" => "=?us-ascii?q?foo bar?=", + "=?us-ascii?q?foo\r\n bar?=" => "=?us-ascii?q?foo bar?=", + # RT40027 + "a: b\r\n c" => "a: b c", + # RT104422 + "=?utf-8?Q?pre?= =?utf-8?B?IGZvbw==?=\r\n =?utf-8?Q?bar?=" => "pre foobar", +); + +my @decode_default_tests = ( + @decode_tests, + '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo bar', + '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"foo bar"', + "=?us-ascii?q?foo?==?us-ascii?q?bar?=" => "foobar", + "foo=?us-ascii?q?bar?=" => "foobar", + "foo =?us-ascii?q?=20?==?us-ascii?q?bar?=" => "foo bar", + # Encode::MIME::Header pre 2.83 + "[=?UTF-8?B?ZsOzcnVt?=]=?UTF-8?B?IHNwcsOhdmE=?=" => "[fórum] správa", + "test:=?UTF-8?B?IHNwcsOhdmE=?=" => "test: správa", + "=?UTF-8?B?dMOpc3Q=?=:=?UTF-8?B?IHNwcsOhdmE=?=", "tést: správa", +); + +my @decode_strict_tests = ( + @decode_tests, + '=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?=' => 'foo bar', + '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="' => '"=?us-ascii?q?foo=20=3cbar=40baz=2efoo=3e=20bar?="', +); + +my @encode_tests = ( + "小飼 弾" => "=?UTF-8?B?5bCP6aO8IOW8vg==?=", "=?UTF-8?Q?=E5=B0=8F=E9=A3=BC_=E5=BC=BE?=", + "漢字、カタカナ、ひらがなを含む、非常に長いタイトル行が一体全体どのようにしてEncodeされるのか?" => "=?UTF-8?B?5ryi5a2X44CB44Kr44K/44Kr44OK44CB44Gy44KJ44GM44Gq44KS5ZCr44KA?=\r\n =?UTF-8?B?44CB6Z2e5bi444Gr6ZW344GE44K/44Kk44OI44Or6KGM44GM5LiA5L2T5YWo?=\r\n =?UTF-8?B?5L2T44Gp44Gu44KI44GG44Gr44GX44GmRW5jb2Rl44GV44KM44KL44Gu44GL?=\r\n =?UTF-8?B?77yf?=", "=?UTF-8?Q?=E6=BC=A2=E5=AD=97=E3=80=81=E3=82=AB=E3=82=BF=E3=82=AB=E3=83=8A?=\r\n =?UTF-8?Q?=E3=80=81=E3=81=B2=E3=82=89=E3=81=8C=E3=81=AA=E3=82=92=E5=90=AB?=\r\n =?UTF-8?Q?=E3=82=80=E3=80=81=E9=9D=9E=E5=B8=B8=E3=81=AB=E9=95=B7=E3=81=84?=\r\n =?UTF-8?Q?=E3=82=BF=E3=82=A4=E3=83=88=E3=83=AB=E8=A1=8C=E3=81=8C=E4=B8=80?=\r\n =?UTF-8?Q?=E4=BD=93=E5=85=A8=E4=BD=93=E3=81=A9=E3=81=AE=E3=82=88=E3=81=86?=\r\n =?UTF-8?Q?=E3=81=AB=E3=81=97=E3=81=A6Encode=E3=81=95=E3=82=8C=E3=82=8B?=\r\n =?UTF-8?Q?=E3=81=AE=E3=81=8B=EF=BC=9F?=", + # double encode + "What is =?UTF-8?B?w4RwZmVs?= ?" => "=?UTF-8?B?V2hhdCBpcyA9P1VURi04P0I/dzRSd1ptVnM/PSA/?=", "=?UTF-8?Q?What_is_=3D=3FUTF-8=3FB=3Fw4RwZmVs=3F=3D_=3F?=", + # pound 1024 + "\N{POUND SIGN}1024" => "=?UTF-8?B?wqMxMDI0?=", "=?UTF-8?Q?=C2=A31024?=", + # latin1 characters + "\x{fc}" => "=?UTF-8?B?w7w=?=", "=?UTF-8?Q?=C3=BC?=", + # RT42627 + Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0") => "=?UTF-8?B?wqN4eHh4eHh4eHh4eHh4eHh4eHh4MA==?=", "=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx0?=", + # RT87831 + "0" => "=?UTF-8?B?MA==?=", "=?UTF-8?Q?0?=", + # RT88717 + "Hey foo\x{2024}bar:whee" => "=?UTF-8?B?SGV5IGZvb+KApGJhcjp3aGVl?=", "=?UTF-8?Q?Hey_foo=E2=80=A4bar=3Awhee?=", + # valid q chars + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz !*+-/" => "=?UTF-8?B?MDEyMzQ1Njc4OUFCQ0RFRkdISUpLTE1OT1BRUlNUVVZXWFlaYWJjZGVmZ2hpams=?=\r\n =?UTF-8?B?bG1ub3BxcnN0dXZ3eHl6ICEqKy0v?=", "=?UTF-8?Q?0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_?=\r\n =?UTF-8?Q?!*+-/?=", + # invalid q chars + "." => "=?UTF-8?B?Lg==?=", "=?UTF-8?Q?=2E?=", + "," => "=?UTF-8?B?LA==?=", "=?UTF-8?Q?=2C?=", +); + +sub info { + my ($str) = @_; + $str = Encode::encode_utf8($str); + $str =~ s/\r/\\r/gs; + $str =~ s/\n/\\n/gs; + return $str; } -is(Encode::encode('MIME-Q', "\x{fc}"), '=?UTF-8?Q?=C3=BC?=', 'Encode latin1 characters'); +my @splice; -# RT42627 +@splice = @encode_tests; +while (my ($d, $b, $q) = splice @splice, 0, 3) { + is Encode::encode('MIME-Header', $d) => $b, info("encode default: $d => $b"); + is Encode::encode('MIME-B', $d) => $b, info("encode base64: $d => $b"); + is Encode::encode('MIME-Q', $d) => $q, info("encode qp: $d => $q"); + is Encode::decode('MIME-B', $b) => $d, info("decode base64: $b => $d"); + is Encode::decode('MIME-Q', $q) => $d, info("decode qp: $b => $d"); +} -my $rt42627 = Encode::decode_utf8("\x{c2}\x{a3}xxxxxxxxxxxxxxxxxxx0"); -is(Encode::encode('MIME-Q', $rt42627), - '=?UTF-8?Q?=C2=A3xxxxxxxxxxxxxxxxxxx?= =?UTF-8?Q?0?=', - 'MIME-Q encoding does not truncate trailing zeros'); +@splice = @decode_default_tests; +while (my ($e, $d) = splice @splice, 0, 2) { + is Encode::decode('MIME-Header', $e) => $d, info("decode default: $e => $d"); +} -# RT87831 -is(Encode::encode('MIME-Header', '0'), '=?UTF-8?B?MA==?=', 'RT87831'); +local $Encode::MIME::Header::STRICT_DECODE = 1; -# More from RFC2047 pp.11-12 -my @rfc2047 = ( - # RFC2047 p.11 - "(=?ISO-8859-1?Q?a?=)" => '(a)', - "(=?ISO-8859-1?Q?a?= b)" => '(a b)', - "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => '(ab)', - "(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)" => '(ab)', - "(=?ISO-8859-1?Q?a?=\r\n\t=?ISO-8859-1?Q?b?=)" => '(ab)', - # RFC2047 p.12 - "(=?ISO-8859-1?Q?a_b?=)" => '(a b)', - "(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)" => '(a b)' - ); -while (my ($e, $d) = splice @rfc2047, 0, 2) { - is Encode::decode('MIME-Header', $e) => $d, "rfc2047: $e => $d"; +@splice = @decode_strict_tests; +while (my ($e, $d) = splice @splice, 0, 2) { + is Encode::decode('MIME-Header', $e) => $d, info("decode strict: $e => $d"); } + __END__