diff -Nru libencode-perl-2.63/bin/enc2xs libencode-perl-2.73/bin/enc2xs --- libencode-perl-2.63/bin/enc2xs 2014-10-19 07:02:48.000000000 +0000 +++ libencode-perl-2.73/bin/enc2xs 2015-04-15 23:14:01.000000000 +0000 @@ -10,7 +10,7 @@ use Getopt::Std; use Config; my @orig_ARGV = @ARGV; -our $VERSION = do { my @r = (q$Revision: 2.14 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; +our $VERSION = do { my @r = (q$Revision: 2.17 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; # These may get re-ordered. # RAW is a do_now as inserted by &enter @@ -133,10 +133,18 @@ # -o to specify the output file name (else it's the first arg) # -f to give a file with a list of input files (else use the args) # -n to name the encoding (else use the basename of the input file. -getopts('CM:SQqOo:f:n:',\%opt); +getopts('CM:SQqOo:f:n:v',\%opt); $opt{M} and make_makefile_pl($opt{M}, @ARGV); $opt{C} and make_configlocal_pm($opt{C}, @ARGV); +$opt{v} ||= $ENV{ENC2XS_VERBOSE}; + +sub verbose { + print STDERR @_ if $opt{v}; +} +sub verbosef { + printf STDERR @_ if $opt{v}; +} # This really should go first, else the die here causes empty (non-erroneous) # output files to be written. @@ -252,7 +260,7 @@ if ($doC) { - print STDERR "Writing compiled form\n"; + verbose "Writing compiled form\n"; foreach my $name (sort cmp_name keys %encoding) { my ($e2u,$u2e,$erep,$min_el,$max_el) = @{$encoding{$name}}; @@ -272,8 +280,9 @@ # push(@{$encoding{$name}},outstring(\*C,$e2u->{Cname}.'_def',$erep)); } my $cpp = ($Config{d_cplusplus} || '') eq 'define'; - my $exta = $cpp ? 'extern "C" ' : "static"; - my $extb = $cpp ? 'extern "C" ' : ""; + my $ext = $cpp ? 'extern "C"' : "extern"; + my $exta = $cpp ? 'extern "C"' : "static"; + my $extb = $cpp ? 'extern "C"' : ""; foreach my $enc (sort cmp_name keys %encoding) { # my ($e2u,$u2e,$rep,$min_el,$max_el,$rsym) = @{$encoding{$enc}}; @@ -300,7 +309,7 @@ { my $sym = "${enc}_encoding"; $sym =~ s/\W+/_/g; - print H "extern encode_t $sym;\n"; + print H "${ext} encode_t $sym;\n"; print D " Encode_XSEncoding(aTHX_ &$sym);\n"; } @@ -347,10 +356,10 @@ my $perc_saved = $saved/($strings + $saved) * 100; my $perc_subsaved = $subsave/($strings + $subsave) * 100; - printf STDERR "%d bytes in string tables\n",$strings; - printf STDERR "%d bytes (%.3g%%) saved spotting duplicates\n", + verbosef "%d bytes in string tables\n",$strings; + verbosef "%d bytes (%.3g%%) saved spotting duplicates\n", $saved, $perc_saved if $saved; - printf STDERR "%d bytes (%.3g%%) saved using substrings\n", + verbosef "%d bytes (%.3g%%) saved using substrings\n", $subsave, $perc_subsaved if $subsave; } elsif ($doEnc) @@ -701,7 +710,12 @@ my $cpp = ($Config{d_cplusplus} || '') eq 'define'; my $var = $^O eq 'MacOS' || $cpp ? 'extern' : 'static'; my $const = $cpp ? '' : 'const'; - print $fh "$var $const encpage_t $name\[",scalar(@{$a->{'Entries'}}),"];\n"; + my $ccflags = $Config{ccflags}; + if (defined $Config{ccwarnflags}) { + $ccflags .= " " . $Config{ccwarnflags}; + } + my $count = $ccflags =~ /-Wc\+\+-compat/ ? '' : scalar(@{$a->{'Entries'}}); + print $fh "$var $const encpage_t $name\[$count];\n"; } $a->{'DoneStrings'} = 1; foreach my $b (@{$a->{'Entries'}}) @@ -1001,9 +1015,9 @@ $mod =~ s/.*\bEncode\b/Encode/o; $mod =~ s/\.pm\z//o; $mod =~ s,/,::,og; - warn qq{ require $mod;\n}; eval qq{ require $mod; }; - $@ and die "Can't require $mod: $@\n"; + return if $@; + warn qq{ require $mod;\n}; for my $enc ( Encode->encodings() ) { no warnings; $in_core{$enc} and next; @@ -1017,7 +1031,7 @@ $_ModLines .= qq(\$Encode::ExtModule{'$enc'} = "$LocalMod{$enc}";\n); } - warn $_ModLines; + warn $_ModLines if $_ModLines; $_LocalVer = _mkversion(); $_E2X = find_e2x(); $_Inc = $INC{"Encode.pm"}; diff -Nru libencode-perl-2.63/bin/encguess libencode-perl-2.73/bin/encguess --- libencode-perl-2.63/bin/encguess 1970-01-01 00:00:00.000000000 +0000 +++ libencode-perl-2.73/bin/encguess 2015-04-15 23:14:33.000000000 +0000 @@ -0,0 +1,145 @@ +#!./perl +use 5.008001; +use strict; +use warnings; +use Encode; +use Getopt::Std; +use Carp; +use Encode::Guess; +$Getopt::Std::STANDARD_HELP_VERSION = 1; + +my %opt; +getopts( "huSs:", \%opt ); +my @suspect_list; +list_valid_suspects() and exit if $opt{S}; +@suspect_list = split /:,/, $opt{s} if $opt{s}; +HELP_MESSAGE() if $opt{h}; +HELP_MESSAGE() unless @ARGV; +do_guess($_) for @ARGV; + +sub read_file { + my $filename = shift; + local $/; + open my $fh, '<:raw', $filename or croak "$filename:$!"; + my $content = <$fh>; + close $fh; + return $content; +} + +sub do_guess { + my $filename = shift; + my $data = read_file($filename); + my $enc = guess_encoding( $data, @suspect_list ); + if ( !ref($enc) && $opt{u} ) { + return 1; + } + print "$filename\t"; + if ( ref($enc) ) { + print $enc->mime_name(); + } + else { + print "unknown"; + } + print "\n"; + return 1; +} + +sub list_valid_suspects { + print join( "\n", Encode->encodings(":all") ); + print "\n"; + return 1; +} + +sub HELP_MESSAGE { + exec 'pod2usage', $0 or die "pod2usage: $!" +} +__END__ +=head1 NAME + +encguess - guess character encodings of files + +=head1 VERSION + +$Id: encguess,v 0.1 2015/02/05 10:34:19 dankogai Exp $ + +=head1 SYNOPSIS + + encguess [switches] filename... + +=head2 SWITCHES + +=over 2 + +=item -h + +show this message and exit. + +=item -s + +specify a list of "suspect encoding types" to test, +seperated by either C<:> or C<,> + +=item -S + +output a list of all acceptable encoding types that can be used with +the -s param + +=item -u + +suppress display of unidentified types + +=back + +=head2 EXAMPLES: + +=over 2 + +=item * + +Guess encoding of a file named C, using only the default +suspect types. + + encguess test.txt + +=item * + +Guess the encoding type of a file named C, using the suspect +types C. + + encguess -s euc-jp,shiftjis,7bit-jis test.txt + encguess -s euc-jp:shiftjis:7bit-jis test.txt + +=item * + +Guess the encoding type of several files, do not display results for +unidentified files. + + encguess -us euc-jp,shiftjis,7bit-jis test*.txt + +=back + +=head1 DESCRIPTION + +The encoding identification is done by checking one encoding type at a +time until all but the right type are eliminated. The set of encoding +types to try is defined by the -s parameter and defaults to ascii, +utf8 and UTF-16/32 with BOM. This can be overridden by passing one or +more encoding types via the -s parameter. If you need to pass in +multiple suspect encoding types, use a quoted string with the a space +separating each value. + +=head1 SEE ALSO + +L, L + +=head1 LICENSE AND COPYRIGHT + +Copyright 2015 Michael LaGrasta and Dan Kogai. + +This program is free software; you can redistribute it and/or modify it +under the terms of the the Artistic License (2.0). You may obtain a +copy of the full license at: + +L + +=cut diff -Nru libencode-perl-2.63/Changes libencode-perl-2.73/Changes --- libencode-perl-2.63/Changes 2014-10-19 07:02:47.000000000 +0000 +++ libencode-perl-2.73/Changes 2015-04-15 23:14:25.000000000 +0000 @@ -1,8 +1,105 @@ # Revision history for Perl extension Encode. # -# $Id: Changes,v 2.63 2014/10/19 07:01:15 dankogai Exp $ +# $Id: Changes,v 2.73 2015/04/15 23:14:01 dankogai Exp dankogai $ # -$Revision: 2.63 $ $Date: 2014/10/19 07:01:15 $ +$Revision: 2.73 $ $Date: 2015/04/15 23:14:01 $ +! MANIFEST ++ t/isa.t +! Encode.pm + Addressed RT#103253: Encode::XS does not inherit from Encode::Encoding + https://rt.cpan.org/Public/Bug/Display.html?id=103253 +! encoding.pm ++ t/encoding-locale.t + Pulled: Rewrite of encoding::_get_locale_encoding for more portability #40 +! encoding.pm + Pulled: encoding.pm: more inlining #39 + https://github.com/dankogai/p5-encode/pull/39 + +2.72 2015/03/14 02:44:39 +! encoding.pm + Copied from bleadperl to be in sync with it again. + http://www.nntp.perl.org/group/perl.perl5.porters/2015/03/msg226576.html + +2.71 2015/03/12 00:03:52 +! encoding.pm + Pulled: Don't fail 'no encoding' on EBCDIC + https://github.com/dankogai/p5-encode/pull/38 +! lib/Encode/Alias.pm t/Aliases.t + Add cp65000 => UTF-7 and cp65001 => utf-8-strict + https://github.com/dankogai/p5-encode/issues/37 +! encoding.pm + Sync w/ bleadperl + https://github.com/dankogai/p5-encode/pull/36 +! bin/encguess + Pulled: show encguess example per #33 + https://github.com/dankogai/p5-encode/pull/34 + +2.70 2015/02/05 10:53:00 +! Makefile.PL + add bin/encguess to EXE_FILES + +2.69 2015/02/05 10:35:11 +! bin/encguess + Refactored so that + * does not depend on non-core module (File::Slurp in particular) + * PODified document + * -s "encA encB" to -s encA,encB which is more shell-friendly + * and more +! MANIFEST ++ bin/encguess + Pulled: Added CLI wrapper for Encode::Guess + https://github.com/dankogai/p5-encode/pull/32 +! Unicode/Unicode.pm + Pulled: Bump $VERSION in module changed since Encode-2.60 + https://github.com/dankogai/p5-encode/pull/31 + +2.68 2015/01/22 10:17:32 +! Pulled: Fix C++ build on Windows with VC++ + https://github.com/dankogai/p5-encode/pull/30 + https://rt.cpan.org/Public/Bug/Display.html?id=82897 +! lib/Encode/MIME/Header.pm t/taint.t + Pulled: maintain taint flag when encoding MIME on old perl + https://github.com/dankogai/p5-encode/pull/29 +! Encode.pm + POD fixes + https://github.com/dankogai/p5-encode/pull/27 +! bin/enc2xs + Addressed: RT#101345: [PATCH] reduce compiler warnings and stderr noise + enc2xs no longer emits verbose messages to STDERR + unless -v switch or $ENV{ENC2XS_VERBOSE} is set. + https://rt.cpan.org/Public/Bug/Display.html?id=101345 + +2.67 2014/12/04 20:13:00 +! t/taint.t + Now skips nonexistent methods like Encode::Detect->encode() should + that be installed. This resolves RT#100105. + https://rt.cpan.org/Ticket/Display.html?id=100105 + +2.66 2014/12/02 23:30:34 $ +! bin/enc2xs + Resolved RT#100656: enc2xs -C fails if URL::Encode::XS is installed + https://rt.cpan.org/Ticket/Display.html?id=100656 + +2.65 2014/11/27 14:08:33 +! Changes Encode.xs bin/enc2xs + Applied 3 patches from jhi: + 0001-For-C-don-t-use-the-array-size-in-forward-declaratiotion + 0002-Unused-variables + 0003-1-needs-casting-to-STRLEN + Message-Id: <54753674.6070909@iki.fi> + +2.64 2014/10/29 15:37:54 +! t/utf8warnings.t MANIFEST + Retouch pull #26 so it works with perl < 5.14 +! Encode.pm ++ t/utf8warnings.t + Pulled: Catch and re-issue utf8 warnings at a higher level + https://github.com/dankogai/p5-encode/pull/26 +! Encode.xs + Pulled: Validate continuations in the incremental UTF-X decoder + https://github.com/dankogai/p5-encode/pull/25 + +2.63 2014/10/19 07:01:15 ! Encode.xs Applied: RT #99264: call_pv() can reallocate the stack https://rt.cpan.org/Ticket/Display.html?id=99264 diff -Nru libencode-perl-2.63/debian/changelog libencode-perl-2.73/debian/changelog --- libencode-perl-2.63/debian/changelog 2014-10-20 18:21:35.000000000 +0000 +++ libencode-perl-2.73/debian/changelog 2015-05-04 12:21:23.000000000 +0000 @@ -1,3 +1,22 @@ +libencode-perl (2.73-1) unstable; urgency=medium + + * New upstream release + * Upload to unstable + + -- Dominic Hargreaves Mon, 04 May 2015 13:21:06 +0100 + +libencode-perl (2.72-1) experimental; urgency=medium + + [ Anuradha Weeraman ] + * Team upload + * New upstream release + + [ Dominic Hargreaves ] + * New upstream release + * Update copyright for new encguess script + + -- Dominic Hargreaves Sun, 29 Mar 2015 18:40:12 +0100 + libencode-perl (2.63-1) unstable; urgency=medium [ Salvatore Bonaccorso ] diff -Nru libencode-perl-2.63/debian/copyright libencode-perl-2.73/debian/copyright --- libencode-perl-2.63/debian/copyright 2014-10-20 18:21:35.000000000 +0000 +++ libencode-perl-2.73/debian/copyright 2015-05-04 12:18:55.000000000 +0000 @@ -7,6 +7,10 @@ Copyright: 2002-2014, Dan Kogai License: Artistic or GPL-1+ +Files: bin/encguess +Copyright: 2015 Michael LaGrasta and Dan Kogai +License: Artistic + Files: ucm/* Copyright: 1991-2013 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in http://www.unicode.org/copyright.html. diff -Nru libencode-perl-2.63/Encode.pm libencode-perl-2.73/Encode.pm --- libencode-perl-2.63/Encode.pm 2014-10-19 07:02:47.000000000 +0000 +++ libencode-perl-2.73/Encode.pm 2015-04-15 23:14:27.000000000 +0000 @@ -1,10 +1,10 @@ # -# $Id: Encode.pm,v 2.63 2014/10/19 07:02:18 dankogai Exp $ +# $Id: Encode.pm,v 2.73 2015/04/15 23:14:01 dankogai Exp dankogai $ # package Encode; use strict; use warnings; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.63 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.73 $ =~ /(\d+)/g; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use XSLoader (); XSLoader::load( __PACKAGE__, $VERSION ); @@ -156,7 +156,20 @@ require Carp; Carp::croak("Unknown encoding '$name'"); } - my $octets = $enc->encode( $string, $check ); + # For Unicode, warnings need to be caught and re-issued at this level + # so that callers can disable utf8 warnings lexically. + my $octets; + if ( ref($enc) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $octets = $enc->encode( $string, $check ); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $octets = $enc->encode( $string, $check ); + } $_[1] = $string if $check and !ref $check and !( $check & LEAVE_SRC() ); return $octets; } @@ -172,7 +185,20 @@ require Carp; Carp::croak("Unknown encoding '$name'"); } - my $string = $enc->decode( $octets, $check ); + # For Unicode, warnings need to be caught and re-issued at this level + # so that callers can disable utf8 warnings lexically. + my $string; + if ( ref($enc) eq 'Encode::Unicode' ) { + my $warn = ''; + { + local $SIG{__WARN__} = sub { $warn = shift }; + $string = $enc->decode( $octets, $check ); + } + warnings::warnif('utf8', $warn) if length $warn; + } + else { + $string = $enc->decode( $octets, $check ); + } $_[1] = $octets if $check and !ref $check and !( $check & LEAVE_SRC() ); return $string; } @@ -285,7 +311,11 @@ $Encode::Encoding{Unicode} = bless { Name => "Internal" } => "Encode::Internal"; } - + { + # https://rt.cpan.org/Public/Bug/Display.html?id=103253 + package Encode::XS; + push @Encode::XS::ISA, 'Encode::Encoding'; + } { # was in Encode::utf8 @@ -457,7 +487,7 @@ This function returns the string that results from decoding the scalar value I, assumed to be a sequence of octets in I, into -Perl's internal form. The returns the resulting string. As with encode(), +Perl's internal form. As with encode(), I can be either a canonical name or an alias. For encoding names and aliases, see L; for I, see L. @@ -547,7 +577,7 @@ from_to($octets, $from, $to, $check); -is equivalent t:o +is equivalent to: $octets = encode($to, decode($from, $octets), $check); @@ -674,7 +704,7 @@ handle the conversion. In the second, you explicitly translate from one encoding to the other. -Unfortunately, it may be that encodings are C-savvy. You can check +Unfortunately, it may be that encodings are not C-savvy. You can check to see whether your encoding is supported by C by invoking the C method on it: @@ -810,7 +840,7 @@ Even the fallback for C must return octets, which are then decoded with the character encoding that C accepts. So for -example if you wish to decode octests as UTF-8, and use ISO-8859-15 as +example if you wish to decode octets as UTF-8, and use ISO-8859-15 as a fallback for bytes that are not valid UTF-8, you could write $str = decode 'UTF-8', $octets, sub { diff -Nru libencode-perl-2.63/Encode.xs libencode-perl-2.73/Encode.xs --- libencode-perl-2.63/Encode.xs 2014-10-19 07:02:47.000000000 +0000 +++ libencode-perl-2.73/Encode.xs 2015-04-15 23:14:25.000000000 +0000 @@ -1,5 +1,5 @@ /* - $Id: Encode.xs,v 2.30 2014/10/19 07:01:15 dankogai Exp $ + $Id: Encode.xs,v 2.33 2015/01/22 10:17:32 dankogai Exp $ */ #define PERL_NO_GET_CONTEXT @@ -7,6 +7,7 @@ #include "perl.h" #include "XSUB.h" #include "encode.h" +#include "def_t.h" # define PERLIO_MODNAME "PerlIO::encoding" # define PERLIO_FILENAME "PerlIO/encoding.pm" @@ -21,6 +22,8 @@ #define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) { \ Perl_croak_nocontext("panic_unimplemented"); \ + PERL_UNUSED_VAR(sv); \ + PERL_UNUSED_VAR(encoding); \ return (y)0; /* fool picky compilers */ \ } /**/ @@ -67,6 +70,10 @@ call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) { /* Exists for breakpointing */ + PERL_UNUSED_VAR(routine); + PERL_UNUSED_VAR(done); + PERL_UNUSED_VAR(dest); + PERL_UNUSED_VAR(orig); } @@ -343,10 +350,14 @@ if (UTF8_IS_START(*s)) { U8 skip = UTF8SKIP(s); if ((s + skip) > e) { - /* Partial character */ - /* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ - if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) + if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) { + const U8 *p = s + 1; + for (; p < e; p++) { + if (!UTF8_IS_CONTINUATION(*p)) + goto malformed_byte; + } break; + } goto malformed_byte; } @@ -359,11 +370,11 @@ if (strict && uv > PERL_UNICODE_MAX) ulen = (STRLEN) -1; #endif - if (ulen == -1) { + if (ulen == (STRLEN) -1) { if (strict) { uv = utf8n_to_uvuni(s, e - s, &ulen, UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); - if (ulen == -1) + if (ulen == (STRLEN) -1) goto malformed_byte; goto malformed; } @@ -503,7 +514,6 @@ U8 *s; U8 *e; SV *dst; - bool renewed = 0; int check; CODE: { @@ -564,6 +574,7 @@ SV * obj CODE: { + PERL_UNUSED_VAR(obj); XSRETURN(1); } @@ -572,6 +583,7 @@ SV * obj CODE: RETVAL = 0; + PERL_UNUSED_VAR(obj); OUTPUT: RETVAL @@ -673,6 +685,7 @@ CODE: { /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ + PERL_UNUSED_VAR(obj); ST(0) = &PL_sv_no; XSRETURN(1); } @@ -685,6 +698,7 @@ /* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ /* require_pv(PERLIO_FILENAME); */ + PERL_UNUSED_VAR(obj); eval_pv("require PerlIO::encoding", 0); SPAGAIN; @@ -989,6 +1003,5 @@ BOOT: { -#include "def_t.h" #include "def_t.exh" } diff -Nru libencode-perl-2.63/encoding.pm libencode-perl-2.73/encoding.pm --- libencode-perl-2.63/encoding.pm 2014-10-19 07:02:47.000000000 +0000 +++ libencode-perl-2.73/encoding.pm 2015-04-15 23:14:33.000000000 +0000 @@ -1,12 +1,16 @@ -# $Id: encoding.pm,v 2.12 2013/04/26 18:30:46 dankogai Exp $ +# $Id: encoding.pm,v 2.15 2015/04/15 23:14:01 dankogai Exp dankogai $ package encoding; -our $VERSION = sprintf "%d.%02d", q$Revision: 2.12 $ =~ /(\d+)/g; +our $VERSION = sprintf "%d.%02d", q$Revision: 2.15 $ =~ /(\d+)/g; use Encode; use strict; use warnings; -use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; +use constant { + DEBUG => !!$ENV{PERL_ENCODE_DEBUG}, + HAS_PERLIO => eval { require PerlIO::encoding; PerlIO::encoding->VERSION(0.02) }, + PERL_5_21_7 => $^V && $^V ge v5.21.7, +}; BEGIN { if ( ord("A") == 193 ) { @@ -15,12 +19,6 @@ } } -our $HAS_PERLIO = 0; -eval { require PerlIO::encoding }; -unless ($@) { - $HAS_PERLIO = ( PerlIO::encoding->VERSION >= 0.02 ); -} - sub _exception { my $name = shift; $] > 5.008 and return 0; # 5.8.1 or higher then no @@ -39,64 +37,79 @@ sub _get_locale_encoding { my $locale_encoding; + if ($^O eq 'MSWin32') { + my @tries = ( + # First try to get the OutputCP. This will work only if we + # are attached to a console + 'Win32.pm' => 'Win32::GetConsoleOutputCP', + 'Win32/Console.pm' => 'Win32::Console::OutputCP', + # If above failed, this means that we are a GUI app + # Let's assume that the ANSI codepage is what matters + 'Win32.pm' => 'Win32::GetACP', + ); + while (@tries) { + my $cp = eval { + require $tries[0]; + no strict 'refs'; + &{$tries[1]}() + }; + if ($cp) { + if ($cp == 65001) { # Code page for UTF-8 + $locale_encoding = 'UTF-8'; + } else { + $locale_encoding = 'cp' . $cp; + } + return $locale_encoding; + } + splice(@tries, 0, 2) + } + } + # I18N::Langinfo isn't available everywhere - eval { + $locale_encoding = eval { require I18N::Langinfo; - I18N::Langinfo->import(qw(langinfo CODESET)); - $locale_encoding = langinfo( CODESET() ); + find_encoding( + I18N::Langinfo::langinfo( I18N::Langinfo::CODESET() ) + )->name }; + return $locale_encoding if defined $locale_encoding; - my $country_language; - - no warnings 'uninitialized'; - - if ( (not $locale_encoding) && in_locale() ) { - if ( $ENV{LC_ALL} =~ /^([^.]+)\.([^.@]+)(@.*)?$/ ) { - ( $country_language, $locale_encoding ) = ( $1, $2 ); - } - elsif ( $ENV{LANG} =~ /^([^.]+)\.([^.@]+)(@.*)?$/ ) { + eval { + require POSIX; + # Get the current locale + # Remember that MSVCRT impl is quite different from Unixes + my $locale = POSIX::setlocale(POSIX::LC_CTYPE()); + if ( $locale =~ /^([^.]+)\.([^.@]+)(?:@.*)?$/ ) { + my $country_language; ( $country_language, $locale_encoding ) = ( $1, $2 ); - } - # LANGUAGE affects only LC_MESSAGES only on glibc - } - elsif ( not $locale_encoding ) { - if ( $ENV{LC_ALL} =~ /\butf-?8\b/i - || $ENV{LANG} =~ /\butf-?8\b/i ) - { - $locale_encoding = 'utf8'; - } - - # Could do more heuristics based on the country and language - # parts of LC_ALL and LANG (the parts before the dot (if any)), - # since we have Locale::Country and Locale::Language available. - # TODO: get a database of Language -> Encoding mappings - # (the Estonian database at http://www.eki.ee/letter/ - # would be excellent!) --jhi - } - if ( defined $locale_encoding - && lc($locale_encoding) eq 'euc' - && defined $country_language ) - { - if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { - $locale_encoding = 'euc-jp'; - } - elsif ( $country_language =~ /^ko_KR|korean?$/i ) { - $locale_encoding = 'euc-kr'; - } - elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { - $locale_encoding = 'euc-cn'; - } - elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { - $locale_encoding = 'euc-tw'; - } - else { - require Carp; - Carp::croak( - "encoding: Locale encoding '$locale_encoding' too ambiguous" - ); + # Could do more heuristics based on the country and language + # since we have Locale::Country and Locale::Language available. + # TODO: get a database of Language -> Encoding mappings + # (the Estonian database at http://www.eki.ee/letter/ + # would be excellent!) --jhi + if (lc($locale_encoding) eq 'euc') { + if ( $country_language =~ /^ja_JP|japan(?:ese)?$/i ) { + $locale_encoding = 'euc-jp'; + } + elsif ( $country_language =~ /^ko_KR|korean?$/i ) { + $locale_encoding = 'euc-kr'; + } + elsif ( $country_language =~ /^zh_CN|chin(?:a|ese)$/i ) { + $locale_encoding = 'euc-cn'; + } + elsif ( $country_language =~ /^zh_TW|taiwan(?:ese)?$/i ) { + $locale_encoding = 'euc-tw'; + } + else { + require Carp; + Carp::croak( + "encoding: Locale encoding '$locale_encoding' too ambiguous" + ); + } + } } - } + }; return $locale_encoding; } @@ -131,11 +144,23 @@ $name = $enc->name; # canonize unless ( $arg{Filter} ) { DEBUG and warn "_exception($name) = ", _exception($name); - _exception($name) or ${^ENCODING} = $enc; - $HAS_PERLIO or return 1; + if (! _exception($name)) { + if (!PERL_5_21_7) { + ${^ENCODING} = $enc; + } + else { + # Starting with 5.21.7, this pragma uses a shadow variable + # designed explicitly for it, ${^E_NCODING}, to enforce + # lexical scope; instead of ${^ENCODING}. + $^H{'encoding'} = 1; + ${^E_NCODING} = $enc; + } + } + HAS_PERLIO or return 1; } else { defined( ${^ENCODING} ) and undef ${^ENCODING}; + undef ${^E_NCODING} if PERL_5_21_7; # implicitly 'use utf8' require utf8; # to fetch $utf8::hint_bits; @@ -185,7 +210,8 @@ sub unimport { no warnings; undef ${^ENCODING}; - if ($HAS_PERLIO) { + undef ${^E_NCODING} if PERL_5_21_7; + if (HAS_PERLIO) { binmode( STDIN, ":raw" ); binmode( STDOUT, ":raw" ); } @@ -205,20 +231,12 @@ =head1 NAME -encoding - allows you to write your script in non-ascii or non-utf8 +encoding - allows you to write your script in non-ASCII and non-UTF-8 =head1 WARNING -This module is deprecated under perl 5.18. It uses a mechanism provided by -perl that is deprecated under 5.18 and higher, and may be removed in a -future version. - -The easiest and the best alternative is to write your script in UTF-8 -and declear: - - use utf8; # not use encoding ':utf8'; - -See L and L for details. +This module has been deprecated since perl v5.18. See L and +L. =head1 SYNOPSIS @@ -235,68 +253,84 @@ # A simple euc-cn => utf-8 converter use encoding "euc-cn", STDOUT => "utf8"; while(<>){print}; - # "no encoding;" supported (but not scoped!) + # "no encoding;" supported no encoding; # an alternate way, Filter use encoding "euc-jp", Filter=>1; # now you can use kanji identifiers -- in euc-jp! - # switch on locale - - # note that this probably means that unless you have a complete control - # over the environments the application is ever going to be run, you should - # NOT use the feature of encoding pragma allowing you to write your script - # in any recognized encoding because changing locale settings will wreck - # the script; you can of course still use the other features of the pragma. + # encode based on the current locale - specialized purposes only; + # fraught with danger!! use encoding ':locale'; -=head1 ABSTRACT +=head1 DESCRIPTION -Let's start with a bit of history: Perl 5.6.0 introduced Unicode -support. You could apply C and regexes even to complex CJK -characters -- so long as the script was written in UTF-8. But back -then, text editors that supported UTF-8 were still rare and many users -instead chose to write scripts in legacy encodings, giving up a whole -new feature of Perl 5.6. - -Rewind to the future: starting from perl 5.8.0 with the B -pragma, you can write your script in any encoding you like (so long -as the C module supports it) and still enjoy Unicode support. -This pragma achieves that by doing the following: +This pragma is used to enable a Perl script to be written in encodings that +aren't strictly ASCII nor UTF-8. It translates all or portions of the Perl +program script from a given encoding into UTF-8, and changes the PerlIO layers +of C and C to the encoding specified. + +This pragma dates from the days when UTF-8-enabled editors were uncommon. But +that was long ago, and the need for it is greatly diminished. That, coupled +with the fact that it doesn't work with threads, along with other problems, +(see L) have led to its being deprecated. It is planned to remove this +pragma in a future Perl version. New code should be written in UTF-8, and the +C pragma used instead (see L and L for details). +Old code should be converted to UTF-8, via something like the recipe in the +L (though this simple approach may require manual adjustments +afterwards). + +The only legitimate use of this pragma is almost certainly just one per file, +near the top, with file scope, as the file is likely going to only be written +in one encoding. Further restrictions apply in Perls before v5.22 (see +L). -=over - -=item * - -Internally converts all literals (C) from -the encoding specified to utf8. In Perl 5.8.1 and later, literals in -C and C pseudo-filehandle are also converted. +There are two basic modes of operation (plus turning if off): -=item * +=over 4 -Changing PerlIO layers of C and C to the encoding - specified. +=item C'] ;> -=back +This is the normal operation. It translates various literals encountered in +the Perl source file from the encoding I into UTF-8, and similarly +converts character code points. This is used when the script is a combination +of ASCII (for the variable names and punctuation, I), but the literal +data is in the specified encoding. + +I is optional. If omitted, the encoding specified in the environment +variable L|perlrun/PERL_ENCODING> is used. If this isn't +set, or the resolved-to encoding is not known to C>, the error +C'> will be thrown. + +Starting in Perl v5.8.6 (C version 2.0.1), I may be the +name C<:locale>. This is for very specialized applications, and is documented +in L sub-pragma> below. + +The literals that are converted are C, and +starting in v5.8.1, C. Operations that do conversions include C, +C, C (but not C), and C. -=head2 Literal Conversions +Also starting in v5.8.1, the C pseudo-filehandle is translated from the +encoding into UTF-8. -You can write code in EUC-JP as follows: +For example, you can write code in EUC-JP as follows: my $Rakuda = "\xF1\xD1\xF1\xCC"; # Camel in Kanji #<-char-><-char-> # 4 octets s/\bCamel\b/$Rakuda/; And with C in effect, it is the same thing as -the code in UTF-8: +that code in UTF-8: my $Rakuda = "\x{99F1}\x{99DD}"; # two Unicode Characters s/\bCamel\b/$Rakuda/; -=head2 PerlIO layers for C +See L below for a more complete example. -The B pragma also modifies the filehandle layers of -STDIN and STDOUT to the specified encoding. Therefore, +Unless C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, the +PerlIO layers of C and C are set to "C<:encoding(I)>". +Therefore, use encoding "euc-jp"; my $message = "Camel is the symbol of perl.\n"; @@ -304,183 +338,145 @@ $message =~ s/\bCamel\b/$Rakuda/; print $message; -Will print "\xF1\xD1\xF1\xCC is the symbol of perl.\n", -not "\x{99F1}\x{99DD} is the symbol of perl.\n". +will print -You can override this by giving extra arguments; see below. - -=head2 Implicit upgrading for byte strings + "\xF1\xD1\xF1\xCC is the symbol of perl.\n" -By default, if strings operating under byte semantics and strings -with Unicode character data are concatenated, the new string will -be created by decoding the byte strings as I. +not -The B pragma changes this to use the specified encoding -instead. For example: + "\x{99F1}\x{99DD} is the symbol of perl.\n" - use encoding 'utf8'; - my $string = chr(20000); # a Unicode string - utf8::encode($string); # now it's a UTF-8 encoded byte string - # concatenate with another Unicode string - print length($string . chr(20000)); - -Will print C<2>, because C<$string> is upgraded as UTF-8. Without -C, it will print C<4> instead, since C<$string> -is three octets when interpreted as Latin-1. - -=head2 Side effects - -If the C pragma is in scope then the lengths returned are -calculated from the length of C<$/> in Unicode characters, which is not -always the same as the length of C<$/> in the native encoding. +You can override this by giving extra arguments; see below. -This pragma affects utf8::upgrade, but not utf8::downgrade. +Note that C WILL NOT be changed, regardless. -=head1 FEATURES THAT REQUIRE 5.8.1 +Also note that non-STD file handles remain unaffected. Use C or C to change the layers of those. -Some of the features offered by this pragma requires perl 5.8.1. Most -of these are done by Inaba Hiroto. Any other features and changes -are good for 5.8.0. +=item C Filter=E1;> -=over +This operates as above, but the C argument with a non-zero +value causes the entire script, and not just literals, to be translated from +the encoding into UTF-8. This allows identifiers in the source to be in that +encoding as well. (Problems may occur if the encoding is not a superset of +ASCII; imagine all your semi-colons being translated into something +different.) One can use this form to make -=item "NON-EUC" doublebyte encodings + ${"\x{4eba}"}++ -Because perl needs to parse script before applying this pragma, such -encodings as Shift_JIS and Big-5 that may contain '\' (BACKSLASH; -\x5c) in the second byte fails because the second byte may -accidentally escape the quoting character that follows. Perl 5.8.1 -or later fixes this problem. +work. (This is equivalent to C<$I++>, where I is a single Han +ideograph). -=item tr// +This effectively means that your source code behaves as if it were written in +UTF-8 with C<'use utf8>' in effect. So even if your editor only supports +Shift_JIS, for example, you can still try examples in Chapter 15 of +C. -C was overlooked by Perl 5 porters when they released perl 5.8.0 -See the section below for details. +This option is significantly slower than the other one. -=item DATA pseudo-filehandle +=item C -Another feature that was overlooked was C. +Unsets the script encoding. The layers of C, C are +reset to "C<:raw>" (the default unprocessed raw stream of bytes). =back -=head1 USAGE +=head1 OPTIONS -=over 4 +=head2 Setting C and/or C individually -=item use encoding [I] ; +The encodings of C and C are individually settable by parameters to +the pragma: -Sets the script encoding to I. And unless ${^UNICODE} -exists and non-zero, PerlIO layers of STDIN and STDOUT are set to -":encoding(I)". + use encoding 'euc-tw', STDIN => 'greek' ...; -Note that STDERR WILL NOT be changed. +In this case, you cannot omit the first I. C<< STDIN => undef >> +turns the I/O transcoding completely off for that filehandle. -Also note that non-STD file handles remain unaffected. Use C or C to change layers of those. +When C<${^UNICODE}> (available starting in v5.8.2) exists and is non-zero, +these options will be completely ignored. See L> and +L<"C<-C>" in perlrun|perlrun/-C [numberElist]> for details. -If no encoding is specified, the environment variable L -is consulted. If no encoding can be found, the error C'> will be thrown. +=head2 The C<:locale> sub-pragma -=item use encoding I [ STDIN =E I ...] ; +Starting in v5.8.6, the encoding name may be C<:locale>. This means that the +encoding is taken from the current locale, and not hard-coded by the pragma. +Since a script really can only be encoded in exactly one encoding, this option +is dangerous. It makes sense only if the script itself is written in ASCII, +and all the possible locales that will be in use when the script is executed +are supersets of ASCII. That means that the script itself doesn't get +changed, but the I/O handles have the specified encoding added, and the +operations like C and C use that encoding. -You can also individually set encodings of STDIN and STDOUT via the -C<< STDIN => I >> form. In this case, you cannot omit the -first I. C<< STDIN => undef >> turns the IO transcoding -completely off. +The logic of finding which locale C<:locale> uses is as follows: -When ${^UNICODE} exists and non-zero, these options will completely -ignored. ${^UNICODE} is a variable introduced in perl 5.8.1. See -L see L and L for -details (perl 5.8.1 and later). +=over 4 -=item use encoding I Filter=E1; +=item 1. -This turns the encoding pragma into a source filter. While the -default approach just decodes interpolated literals (in qq() and -qr()), this will apply a source filter to the entire source code. See -L below for details. +If the platform supports the C interface, the codeset +returned is used as the default encoding for the open pragma. -=item no encoding; +=item 2. -Unsets the script encoding. The layers of STDIN, STDOUT are -reset to ":raw" (the default unprocessed raw stream of bytes). +If 1. didn't work but we are under the locale pragma, the environment +variables C and C (in that order) are matched for encodings +(the part after "C<.>", if any), and if any found, that is used +as the default encoding for the open pragma. -=back +=item 3. -=head1 The Filter Option +If 1. and 2. didn't work, the environment variables C and C +(in that order) are matched for anything looking like UTF-8, and if +any found, C<:utf8> is used as the default encoding for the open +pragma. -The magic of C is not applied to the names of -identifiers. In order to make C<${"\x{4eba}"}++> ($human++, where human -is a single Han ideograph) work, you still need to write your script -in UTF-8 -- or use a source filter. That's what 'Filter=>1' does. +=back -What does this mean? Your source code behaves as if it is written in -UTF-8 with 'use utf8' in effect. So even if your editor only supports -Shift_JIS, for example, you can still try examples in Chapter 15 of -C. For instance, you can use UTF-8 -identifiers. +If your locale environment variables (C, C, C) +contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), +the default encoding of your C, C, and C, and of +B, is UTF-8. -This option is significantly slower and (as of this writing) non-ASCII -identifiers are not very stable WITHOUT this option and with the -source code written in UTF-8. +=head1 CAVEATS -=head2 Filter-related changes at Encode version 1.87 +=head2 SIDE EFFECTS =over =item * -The Filter option now sets STDIN and STDOUT like non-filter options. -And C<< STDIN=>I >> and C<< STDOUT=>I >> work like -non-filter version. +If the C pragma is in scope then the lengths returned are +calculated from the length of C<$/> in Unicode characters, which is not +always the same as the length of C<$/> in the native encoding. =item * -C is implicitly declared so you no longer have to C to C<${"\x{4eba}"}++>. - -=back - -=head1 CAVEATS - -=head2 NOT SCOPED - -The pragma is a per script, not a per block lexical. Only the last -C or C matters, and it affects -B. However, the pragma is supported and -B can appear as many times as you want in a given script. -The multiple use of this pragma is discouraged. - -By the same reason, the use this pragma inside modules is also -discouraged (though not as strongly discouraged as the case above. -See below). - -If you still have to write a module with this pragma, be very careful -of the load order. See the codes below; +Without this pragma, if strings operating under byte semantics and strings +with Unicode character data are concatenated, the new string will +be created by decoding the byte strings as I. - # called module - package Module_IN_BAR; - use encoding "bar"; - # stuff in "bar" encoding here - 1; +The B pragma changes this to use the specified encoding +instead. For example: - # caller script - use encoding "foo" - use Module_IN_BAR; - # surprise! use encoding "bar" is in effect. + use encoding 'utf8'; + my $string = chr(20000); # a Unicode string + utf8::encode($string); # now it's a UTF-8 encoded byte string + # concatenate with another Unicode string + print length($string . chr(20000)); -The best way to avoid this oddity is to use this pragma RIGHT AFTER -other modules are loaded. i.e. +Will print C<2>, because C<$string> is upgraded as UTF-8. Without +C, it will print C<4> instead, since C<$string> +is three octets when interpreted as Latin-1. - use Module_IN_BAR; - use encoding "foo"; +=back =head2 DO NOT MIX MULTIPLE ENCODINGS Notice that only literals (string or regular expression) having only legacy code points are affected: if you mix data like this + \x{100}\xDF \xDF\x{100} the data is assumed to be in (Latin 1 and) Unicode, not in your native @@ -509,10 +505,70 @@ So feel free to put your strings in your encoding in quotes and regexes. -=head2 tr/// with ranges +=head2 Prior to Perl v5.22 + +The pragma was a per script, not a per block lexical. Only the last +C or C mattered, and it affected +B. However, the C pragma was supported and +C could appear as many times as you want in a given script +(though only the last was effective). + +Since the scope wasn't lexical, other modules' use of C, C, I +were affected. This leads to spooky, incorrect action at a distance that is +hard to debug. + +This means you would have to be very careful of the load order: + + # called module + package Module_IN_BAR; + use encoding "bar"; + # stuff in "bar" encoding here + 1; + + # caller script + use encoding "foo" + use Module_IN_BAR; + # surprise! use encoding "bar" is in effect. + +The best way to avoid this oddity is to use this pragma RIGHT AFTER +other modules are loaded. i.e. + + use Module_IN_BAR; + use encoding "foo"; + +=head2 Prior to Encode version 1.87 + +=over + +=item * + +C and C were not set under the filter option. +And C<< STDIN=>I >> and C<< STDOUT=>I >> didn't work like +non-filter version. + +=item * + +C wasn't implicitly declared so you have to C to do + + ${"\x{4eba}"}++ + +=back + +=head2 Prior to Perl v5.8.1 + +=over + +=item "NON-EUC" doublebyte encodings + +Because perl needs to parse the script before applying this pragma, such +encodings as Shift_JIS and Big-5 that may contain C<'\'> (BACKSLASH; +C<\x5c>) in the second byte fail because the second byte may +accidentally escape the quoting character that follows. + +=item C The B pragma works by decoding string literals in -C and so forth. In perl 5.8.0, this +C and so forth. In perl v5.8.0, this does not apply to C. Therefore, use encoding 'euc-jp'; @@ -537,25 +593,21 @@ =back -This counterintuitive behavior has been fixed in perl 5.8.1. - -=head3 workaround to tr///; +This counterintuitive behavior has been fixed in perl v5.8.1. -In perl 5.8.0, you can work around as follows; +In perl v5.8.0, you can work around this as follows; use encoding 'euc-jp'; # .... eval qq{ \$kana =~ tr/\xA4\xA1-\xA4\xF3/\xA5\xA1-\xA5\xF3/ }; Note the C expression is surrounded by C. The idea behind -is the same as classic idiom that makes C 'interpolate'. +this is the same as the classic idiom that makes C 'interpolate': tr/$from/$to/; # wrong! eval qq{ tr/$from/$to/ }; # workaround. -Nevertheless, in case of B pragma even C is affected so -C not being decoded was obviously against the will of Perl5 -Porters so it has been fixed in Perl 5.8.1 or later. +=back =head1 EXAMPLE - Greekperl @@ -590,10 +642,24 @@ print "zetta\n" if unpack("C", (pack("C", 0xdf))) == 0xdf; -=head1 KNOWN PROBLEMS +=head1 BUGS =over +=item Thread safety + +C is not thread-safe (i.e., do not use in threaded +applications). + +=item Can't be used by more than one module in a single program. + +Only one encoding is allowed. If you combine modules in a program that have +different encodings, only one will be actually used. + +=item Other modules using C and C get the encoded stream + +They may be expecting something completely different. + =item literals in regex that are longer than 127 bytes For native multibyte encodings (either fixed or variable length), @@ -603,13 +669,11 @@ =item EBCDIC The encoding pragma is not supported on EBCDIC platforms. -(Porters who are willing and able to remove this limitation are -welcome.) -=item format +=item C -This pragma doesn't work well with format because PerlIO does not -get along very well with it. When format contains non-ascii +This pragma doesn't work well with C because PerlIO does not +get along very well with it. When C contains non-ASCII characters it prints funny or gets "wide character warnings". To understand it, try the code below. @@ -628,56 +692,19 @@ Without binmode this happens to work but without binmode, print() fails instead of write(). -At any rate, the very use of format is questionable when it comes to +At any rate, the very use of C is questionable when it comes to unicode characters since you have to consider such things as character width (i.e. double-width for ideographs) and directions (i.e. BIDI for Arabic and Hebrew). -=item Thread safety - -C is not thread-safe (i.e., do not use in threaded -applications). +=item See also L =back -=head2 The Logic of :locale - -The logic of C<:locale> is as follows: - -=over 4 - -=item 1. - -If the platform supports the langinfo(CODESET) interface, the codeset -returned is used as the default encoding for the open pragma. - -=item 2. - -If 1. didn't work but we are under the locale pragma, the environment -variables LC_ALL and LANG (in that order) are matched for encodings -(the part after C<.>, if any), and if any found, that is used -as the default encoding for the open pragma. - -=item 3. - -If 1. and 2. didn't work, the environment variables LC_ALL and LANG -(in that order) are matched for anything looking like UTF-8, and if -any found, C<:utf8> is used as the default encoding for the open -pragma. - -=back - -If your locale environment variables (LC_ALL, LC_CTYPE, LANG) -contain the strings 'UTF-8' or 'UTF8' (case-insensitive matching), -the default encoding of your STDIN, STDOUT, and STDERR, and of -B, is UTF-8. - =head1 HISTORY -This pragma first appeared in Perl 5.8.0. For features that require -5.8.1 and better, see above. - -The C<:locale> subpragma was implemented in 2.01, or Perl 5.8.6. +This pragma first appeared in Perl v5.8.0. It has been enhanced in later +releases as specified above. =head1 SEE ALSO diff -Nru libencode-perl-2.63/lib/Encode/Alias.pm libencode-perl-2.73/lib/Encode/Alias.pm --- libencode-perl-2.63/lib/Encode/Alias.pm 2014-10-19 07:02:48.000000000 +0000 +++ libencode-perl-2.73/lib/Encode/Alias.pm 2015-04-15 23:14:33.000000000 +0000 @@ -2,7 +2,7 @@ use strict; use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.18 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.19 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; use Exporter 'import'; @@ -256,6 +256,10 @@ define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); } + # https://github.com/dankogai/p5-encode/issues/37 + define_alias(qr/cp65000/i => '"UTF-7"'); + define_alias(qr/cp65001/i => '"utf-8-strict"'); + # utf8 is blessed :) define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' ); diff -Nru libencode-perl-2.63/lib/Encode/MIME/Header.pm libencode-perl-2.73/lib/Encode/MIME/Header.pm --- libencode-perl-2.63/lib/Encode/MIME/Header.pm 2014-10-19 07:02:48.000000000 +0000 +++ libencode-perl-2.73/lib/Encode/MIME/Header.pm 2015-04-15 23:14:30.000000000 +0000 @@ -3,7 +3,7 @@ use warnings; no warnings 'redefine'; -our $VERSION = do { my @r = ( q$Revision: 2.15 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; +our $VERSION = do { my @r = ( q$Revision: 2.16 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; use Encode qw(find_encoding encode_utf8 decode_utf8); use MIME::Base64; use Carp; @@ -139,7 +139,7 @@ push @line, join( "\n " => @subline ); } $_[1] = '' if $chk; - return join( "\n", @line ); + return (substr($str, 0, 0) . join( "\n", @line )); } use constant HEAD => '=?UTF-8?'; diff -Nru libencode-perl-2.63/Makefile.PL libencode-perl-2.73/Makefile.PL --- libencode-perl-2.63/Makefile.PL 2014-10-19 07:02:48.000000000 +0000 +++ libencode-perl-2.73/Makefile.PL 2015-04-15 23:14:30.000000000 +0000 @@ -1,5 +1,5 @@ # -# $Id: Makefile.PL,v 2.12 2013/09/14 07:51:59 dankogai Exp $ +# $Id: Makefile.PL,v 2.13 2015/02/05 10:53:00 dankogai Exp $ # use 5.007003; use strict; @@ -23,6 +23,7 @@ my @exe_files = qw(bin/enc2xs bin/piconv + bin/encguess ); my @more_exe_files = qw( unidump diff -Nru libencode-perl-2.63/MANIFEST libencode-perl-2.73/MANIFEST --- libencode-perl-2.63/MANIFEST 2014-10-19 07:03:35.000000000 +0000 +++ libencode-perl-2.73/MANIFEST 2015-04-15 23:15:42.000000000 +0000 @@ -31,6 +31,7 @@ Unicode/Unicode.pm Encode extension Unicode/Unicode.xs Encode extension bin/enc2xs Encode module generator +bin/encguess Guess the encoding of file(s) bin/piconv iconv by perl bin/ucm2table Table Generator for testing bin/ucmlint A UCM Lint utility @@ -82,6 +83,7 @@ t/grow.t test script t/gsm0338.t test script t/guess.t test script +t/isa.t test script t/jis7-fallback.t test script t/jisx0201.enc test data t/jisx0201.utf test data @@ -102,6 +104,7 @@ t/unibench.pl benchmark script t/utf8ref.t test script t/utf8strict.t test script +t/utf8warnings.t test script ucm/8859-1.ucm Unicode Character Map ucm/8859-10.ucm Unicode Character Map ucm/8859-11.ucm Unicode Character Map diff -Nru libencode-perl-2.63/META.json libencode-perl-2.73/META.json --- libencode-perl-2.63/META.json 2014-10-19 07:03:34.000000000 +0000 +++ libencode-perl-2.73/META.json 2015-04-15 23:15:42.000000000 +0000 @@ -4,7 +4,7 @@ "unknown" ], "dynamic_config" : 1, - "generated_by" : "ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142060", + "generated_by" : "ExtUtils::MakeMaker version 7.02, CPAN::Meta::Converter version 2.143240", "license" : [ "perl_5" ], @@ -43,5 +43,5 @@ "url" : "https://github.com/dankogai/p5-encode" } }, - "version" : "2.63" + "version" : "2.73" } diff -Nru libencode-perl-2.63/META.yml libencode-perl-2.73/META.yml --- libencode-perl-2.63/META.yml 2014-10-19 07:03:34.000000000 +0000 +++ libencode-perl-2.73/META.yml 2015-04-15 23:15:41.000000000 +0000 @@ -7,7 +7,7 @@ configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 -generated_by: 'ExtUtils::MakeMaker version 6.98, CPAN::Meta::Converter version 2.142060' +generated_by: 'ExtUtils::MakeMaker version 7.02, CPAN::Meta::Converter version 2.143240' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html @@ -22,4 +22,4 @@ parent: '0.221' resources: repository: https://github.com/dankogai/p5-encode -version: '2.63' +version: '2.73' diff -Nru libencode-perl-2.63/t/Aliases.t libencode-perl-2.73/t/Aliases.t --- libencode-perl-2.63/t/Aliases.t 2011-11-11 15:43:59.000000000 +0000 +++ libencode-perl-2.73/t/Aliases.t 2015-03-09 12:29:53.000000000 +0000 @@ -81,6 +81,8 @@ 'jis0208-raw' => $ON_EBCDIC ? '' : 'jis0208-raw', 'jis0212-raw' => $ON_EBCDIC ? '' : 'jis0212-raw', 'ksc5601-raw' => $ON_EBCDIC ? '' : 'ksc5601-raw', + 'cp65000' => 'UTF-7', + 'cp65001' => 'utf-8-strict', ); for my $i (1..11,13..16){ diff -Nru libencode-perl-2.63/t/isa.t libencode-perl-2.73/t/isa.t --- libencode-perl-2.63/t/isa.t 1970-01-01 00:00:00.000000000 +0000 +++ libencode-perl-2.73/t/isa.t 2015-04-15 23:14:01.000000000 +0000 @@ -0,0 +1,10 @@ +# +# $Id: isa.t,v 1.1 2015/04/02 12:08:24 dankogai Exp $ +# +use strict; +use Encode qw/find_encoding/; +use Test::More; +my @enc = Encode->encodings(":all"); +plan tests => 0+@enc; +isa_ok find_encoding($_), "Encode::Encoding" for @enc; + diff -Nru libencode-perl-2.63/t/taint.t libencode-perl-2.73/t/taint.t --- libencode-perl-2.63/t/taint.t 2013-04-26 18:02:59.000000000 +0000 +++ libencode-perl-2.73/t/taint.t 2015-01-22 01:06:02.000000000 +0000 @@ -3,13 +3,26 @@ use Encode qw(encode decode); use Scalar::Util qw(tainted); use Test::More; - -my $str = "abc" . substr($ENV{PATH},0,0); # tainted string +my $taint = substr($ENV{PATH},0,0); +my $str = "dan\x{5f3e}" . $taint; # tainted string to encode +my $bin = encode('UTF-8', $str); # tainted binary to decode my @names = Encode->encodings(':all'); plan tests => 2 * @names; -for my $name (@names){ - my $e = encode($name, $str); - ok tainted($e), "encode $name"; - my $d = decode($name, $e); - ok tainted($d), "decode $name"; +for my $name (@names) { + my ($d, $e, $s); + eval { + $e = encode($name, $str); + }; + SKIP: { + skip $@, 1 if $@; + ok tainted($e), "encode $name"; + } + $bin = $e.$taint if $e; + eval { + $d = decode($name, $bin); + }; + SKIP: { + skip $@, 1 if $@; + ok tainted($d), "decode $name"; + } } diff -Nru libencode-perl-2.63/t/utf8warnings.t libencode-perl-2.73/t/utf8warnings.t --- libencode-perl-2.63/t/utf8warnings.t 1970-01-01 00:00:00.000000000 +0000 +++ libencode-perl-2.73/t/utf8warnings.t 2014-10-29 15:09:15.000000000 +0000 @@ -0,0 +1,66 @@ +use strict; +use warnings; +BEGIN { + if ($] < 5.014){ + print "1..0 # Skip: Perl 5.14.0 or later required\n"; + exit 0; + } +} + +use Encode; +use Test::More tests => 7; + +my $valid = "\x61\x00\x00\x00"; +my $invalid = "\x78\x56\x34\x12"; + +my @warnings; +$SIG{__WARN__} = sub {push @warnings, "@_"}; + +my $enc = find_encoding("UTF32-LE"); + +{ + @warnings = (); + my $ret = Encode::Unicode::decode( $enc, $valid ); + is("@warnings", "", "Calling decode in Encode::Unicode on valid string produces no warnings"); +} + +{ + @warnings = (); + my $ret = Encode::Unicode::decode( $enc, $invalid ); + like("@warnings", qr/is not Unicode/, "Calling decode in Encode::Unicode on invalid string warns"); +} + +{ + no warnings 'utf8'; + @warnings = (); + my $ret = Encode::Unicode::decode( $enc, $invalid ); + is("@warnings", "", "Warning from decode in Encode::Unicode can be silenced via no warnings 'utf8'"); +} + +{ + no warnings; + @warnings = (); + my $ret = Encode::Unicode::decode( $enc, $invalid ); + is("@warnings", "", "Warning from decode in Encode::Unicode can be silenced via no warnings"); +} + +{ + @warnings = (); + my $ret = Encode::decode( $enc, $invalid ); + like("@warnings", qr/is not Unicode/, "Calling decode in Encode on invalid string warns"); +} + +{ + no warnings 'utf8'; + @warnings = (); + my $ret = Encode::decode( $enc, $invalid ); + is("@warnings", "", "Warning from decode in Encode can be silenced via no warnings 'utf8'"); +}; + +{ + no warnings; + @warnings = (); + my $ret = Encode::decode( $enc, $invalid ); + is("@warnings", "", "Warning from decode in Encode can be silenced via no warnings 'utf8'"); +}; +