diff -Nru libhtml-html5-parser-perl-0.103/bin/html2xhtml libhtml-html5-parser-perl-0.107/bin/html2xhtml --- libhtml-html5-parser-perl-0.103/bin/html2xhtml 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/bin/html2xhtml 2011-10-20 20:22:52.000000000 +0000 @@ -0,0 +1,23 @@ +#!/usr/bin/perl + +use 5.010; +use HTML::HTML5::Parser; + +my $input = shift // '-'; +my $output = shift // '-'; + +my $parser = HTML::HTML5::Parser->new; +my $dom = ($input eq '-') + ? $parser->parse_string(do { local $/ = }) + : $parser->parse_file($input); + +if ($output eq '-') +{ + print $dom->toString; +} +else +{ + open my($fh), '>:encoding(UTF-8)', $output; + print $fh $dom->toString; + close $fh; +} \ No newline at end of file diff -Nru libhtml-html5-parser-perl-0.103/bin/html5debug libhtml-html5-parser-perl-0.107/bin/html5debug --- libhtml-html5-parser-perl-0.103/bin/html5debug 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/bin/html5debug 2011-10-07 08:36:18.000000000 +0000 @@ -0,0 +1,87 @@ +#!/usr/bin/perl + +use Getopt::Long; +use HTML::HTML5::Parser; + +my $output = $ENV{HTML_OUTPUT} || 'debug:json'; +my $help; +GetOptions( + 'output|o=s' => \$output, + 'help|usage|h' => \$help, + ); + +if ($help) +{ + my $name = $0; + print <new; +my $h = join '', <>; +my $hash; + +if ($output =~ /debug/i) +{ + load('XML::LibXML::Debugging'); + $hash = $p->parse_string($h)->toDebuggingHash; +} +elsif ($output =~ /clark/i) +{ + load('XML::LibXML::Debugging'); + print $p->parse_string($h)->toClarkML; +} +elsif ($output =~ /html/i) +{ + load('HTML::HTML5::Writer'); + print HTML::HTML5::Writer->new->document($p->parse_string($h)); +} +elsif ($output =~ /parser/i) +{ + $p->parse_string($h); + $hash = $p; +} +elsif ($output =~ /err/i) +{ + $p->parse_string($h); + print "$_\n" foreach $p->errors; +} +else +{ + print $p->parse_string($h)->toString; +} + +if (defined $hash and $output =~ /json/) +{ + load('JSON'); + print to_json($hash, {pretty=>1,canonical=>1}); +} +elsif (defined $hash) +{ + load('Data::Dumper'); + print Dumper($hash); +} diff -Nru libhtml-html5-parser-perl-0.103/Changes libhtml-html5-parser-perl-0.107/Changes --- libhtml-html5-parser-perl-0.103/Changes 2011-02-09 14:22:19.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/Changes 2011-10-20 20:28:41.000000000 +0000 @@ -1,21 +1,48 @@ -############################################################################ -## Changes for HTML::HTML5::Parser ######################################### -############################################################################ - HTML-HTML5-Parser ================= Created: 2009-11-26 -Home page: +Home page: Bug tracker: Maintainer: Toby Inkster -0.103 [2011-02-09] +0.107 2011-10-20 + + - (Addition) html2xhtml now reads from STDIN by default. + - (Addition) html2xhtml can output to a file. + - (Bugfix) parse_file wasn't accepting relative file names + +0.106 2011-10-10 + + - (Bugfix) Tokenizer.pm was still trying to require NamedEntityList.pm. + +0.105 2011-10-07 + + - HTML::HTML5::Parser::Error overloads stringification. + - use HTML::HTML5::Entities + - (Addition) Bundle 'html5debug' script. + - (Packaging) Module::Package::RDF. + +0.104 2011-09-22 + + - (Update) Catch up to revision d81fcb920a1a3c351149cd66a64bf1b8ae14a172 + (2011-08-21) upstream. + - (Addition) Some error handling stuff. + - (Addition) Support element. + +0.103 2011-02-09 + + - (Documentation Update) Copyright 2011. - (Bugfix) TagSoupParser.pm called a method that is renamed between this distribution and upstream using its upstream name. - - (Update) Copyright 2011. -0.102 [2011-01-19] +0.102 2011-01-19 + + - (Addition) Support element. + - (Update) Catch up to revision f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 + (2010-10-11) upstream. + - (Bugfix) Fix source_line method. + - (Addition) Support
element. - (Addition) Allow element to appear in if document has an HTML4 doctype. This is a willful violation of the HTML5 parsing algorithm. (The may have elements as children, as well @@ -25,34 +52,38 @@ looser than the HTML 4 spec which says only should be used, but stricter than the HTML 4 DTD which allows pretty much anything in there!) - - (Addition) Support
element. - - (Addition) Support element. - - (Bugfix) Fix source_line method. - - (Update) Catch up to revision f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 - (2010-10-11) upstream. -0.101 [2010-06-30] +0.101 2010-06-30 + - (Bugfix) UTF-8 fix. -0.100 [2010-06-23] +0.100 2010-06-23 + - (Bugfix) Minor bugfixes. -0.04 [2010-04-21] +0.04 2010-04-21 + - (Update) Catch up to revision cf2c0df8a6dfb50fee923dfb21b14c83f282ccdc (2010-02-28) upstream. -0.03 [2010-01-15] +0.03 2010-01-15 + + - (Documentation Update) Copyright 2010. + - (Packaging Update) Upgrade distribution to my new packaging regime + (auto-generated changelogs, etc) - (Bugfix) Module didn't use URI::file properly. - - (Update) Copyright 2010. - - (Update) Upgrade distribution to my new packaging regime (auto-generated - changelogs, etc) -0.02 [2009-12-16] +0.02 2009-12-16 + - Replace Inline::Python encoding detection with weaker, but native Perl HTML::Encoding package. - - (Addition) Bundle the html2xhtml tool. + - (Addition Packaging) Bundle the html2xhtml tool. + +0.01 2009-12-03 +# Original version + -0.01 [2009-12-03] # Original version +0.00_01 2009-12-01 +# Developer preview -0.00_01 [2009-12-01] # Developer preview diff -Nru libhtml-html5-parser-perl-0.103/Changes.ttl libhtml-html5-parser-perl-0.107/Changes.ttl --- libhtml-html5-parser-perl-0.103/Changes.ttl 2011-02-09 14:21:48.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/Changes.ttl 1970-01-01 00:00:00.000000000 +0000 @@ -1,171 +0,0 @@ -############################################################# - -@prefix : . -@prefix author: . -@prefix dbug: . -@prefix dcs: . -@prefix dc: . -@prefix foaf: . -@prefix my: . -@prefix rdfs: . -@prefix toby: . -@prefix xsd: . - -############################################################# - -<> - - dc:title "Changes for HTML::HTML5::Parser" ; - dc:description "Revision history for Perl extension HTML::HTML5::Parser."@en ; - dc:subject my:project ; - dc:creator toby:i . - -############################################################# - -my:v_0-00_01 - - a :Version ; - dc:issued "2009-12-01"^^xsd:date ; - :revision "0.00_01"^^xsd:string ; - :file-release ; - rdfs:label "Developer preview"@en . - -my:v_0-01 - - a :Version ; - dc:issued "2009-12-03"^^xsd:date ; - :revision "0.01"^^xsd:string ; - :file-release ; - rdfs:label "Original version"@en . - -my:v_0-02 - - a :Version ; - dc:issued "2009-12-16"^^xsd:date ; - :revision "0.02"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-01 ; - dcs:item - [ rdfs:label "Replace Inline::Python encoding detection with weaker, but native Perl HTML::Encoding package."@en ] , - [ rdfs:label "Bundle the html2xhtml tool."@en ; a dcs:Addition , dcs:Packaging ] - ] . - -my:v_0-03 - - a :Version ; - dc:issued "2010-01-15"^^xsd:date ; - :revision "0.03"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-02 ; - dcs:item - [ rdfs:label "Module didn't use URI::file properly."@en ; a dcs:Bugfix ; rdfs:comment "Thanks shellac" ] , - [ rdfs:label "Upgrade distribution to my new packaging regime (auto-generated changelogs, etc)"@en ; a dcs:Update , dcs:Packaging ] , - [ rdfs:label "Copyright 2010."@en ; a dcs:Update , dcs:Documentation ] - ] . - -my:v_0-04 - - a :Version ; - dc:issued "2010-04-21"^^xsd:date ; - :revision "0.04"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-03 ; - dcs:item - [ rdfs:label "Catch up to revision cf2c0df8a6dfb50fee923dfb21b14c83f282ccdc (2010-02-28) upstream."@en ; a dcs:Update ] - ] . - -my:v_0-100 - - a :Version ; - dc:issued "2010-06-23"^^xsd:date ; - :revision "0.100"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-04 ; - dcs:item - [ rdfs:label "Minor bugfixes."@en ; a dcs:Bugfix ] - ] . - -my:v_0-101 - - a :Version ; - dc:issued "2010-06-30"^^xsd:date ; - :revision "0.101"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-100 ; - dcs:item - [ rdfs:label "UTF-8 fix."@en ; a dcs:Bugfix ; dcs:fixes [ rdfs:label "Wide characters in DOM tree."@en ; dbug:reporter author:gwilliams ] ] - ] . - -my:v_0-102 - - a :Version ; - dc:issued "2011-01-19"^^xsd:date ; - :revision "0.102"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-101 ; - dcs:item - [ rdfs:label "Fix source_line method."@en ; a dcs:Bugfix ] , - [ rdfs:label "Catch up to revision f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 (2010-10-11) upstream."@en ; a dcs:Update ] , - [ rdfs:label "Allow element to appear in if document has an HTML4 doctype. This is a willful violation of the HTML5 parsing algorithm. (The may have elements as children, as well as any children that would normally be allowed in the of the document, such as ; any other content is treated as the beginning of the , and thus closes and . That's slightly looser than the HTML 4 spec which says only should be used, but stricter than the HTML 4 DTD which allows pretty much anything in there!)"@en ; a dcs:Addition ] , - [ rdfs:label "Support
element."@en ; a dcs:Addition ] , - [ rdfs:label "Support element."@en ; a dcs:Addition ] - ] . - -my:v_0-103 - - a :Version ; - dc:issued "2011-02-09"^^xsd:date ; - :revision "0.103"^^xsd:string ; - :file-release ; - dcs:changeset [ - dcs:versus my:v_0-101 ; - dcs:item - [ rdfs:label "Copyright 2011."@en ; a dcs:Update , dcs:Documentation ] , - [ rdfs:label "TagSoupParser.pm called a method that is renamed between this distribution and upstream using its upstream name."@en ; a dcs:Bugfix ] - ] . - -############################################################# - -my:project - - a :Project ; - :name "HTML-HTML5-Parser" ; - :shortdesc "parse HTML reliably"@en ; - :programming-language "Perl" ; - :homepage ; - :download-page ; - :bug-database ; - :repository [ a :SVNRepository ; :browse ] ; - :maintainer toby:i ; - :developer toby:i , my:dev-wakaba ; - :documenter toby:i ; - :tester toby:i ; - :created "2009-11-26"^^xsd:date ; - :license ; - :release my:v_0-00_01 , my:v_0-01 , my:v_0-02 , my:v_0-03 , my:v_0-04 , - my:v_0-100 , my:v_0-101 , my:v_0-102 , my:v_0-103 . - -############################################################# - -toby:i - - a foaf:Person ; - foaf:name "Toby Inkster" ; - foaf:homepage ; - foaf:page ; - foaf:mbox ; - author:tobyink . - -my:dev-wakaba - - a foaf:Person ; - foaf:name "Wakaba" ; - foaf:page . - -############################################################# diff -Nru libhtml-html5-parser-perl-0.103/Changes.xml libhtml-html5-parser-perl-0.107/Changes.xml --- libhtml-html5-parser-perl-0.103/Changes.xml 2011-02-09 14:22:26.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/Changes.xml 1970-01-01 00:00:00.000000000 +0000 @@ -1,220 +0,0 @@ - - - - - - - - - Replace Inline::Python encoding detection with weaker, but native Perl HTML::Encoding package. - - - - Minor bugfixes. - - - - - - - - - UTF-8 fix. - - - - Wide characters in DOM tree. - - - - - - - - - - - - Fix source_line method. - - - - Catch up to revision f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 (2010-10-11) upstream. - - - - Allow <object> element to appear in <head> if document has an HTML4 doctype. This is a willful violation of the HTML5 parsing algorithm. (The <object> may have <param> elements as children, as well as any children that would normally be allowed in the <head> of the document, such as <meta>; any other content is treated as the beginning of the <body>, and thus closes <object> and <head>. That's slightly looser than the HTML 4 spec which says only <param> should be used, but stricter than the HTML 4 DTD which allows pretty much anything in there!) - - - - Support <figcaption> element. - - - - Support <summary> element. - - - - - Bundle the html2xhtml tool. - - - - - - - - - - Copyright 2011. - - - - TagSoupParser.pm called a method that is renamed between this distribution and upstream using its upstream name. - - - - - - - - - - - - - - Thanks shellac - Module didn't use URI::file properly. - - - - - Upgrade distribution to my new packaging regime (auto-generated changelogs, etc) - - - - - Copyright 2010. - - - - - - - - Catch up to revision cf2c0df8a6dfb50fee923dfb21b14c83f282ccdc (2010-02-28) upstream. - - - - - - - - Revision history for Perl extension HTML::HTML5::Parser. - - Changes for HTML::HTML5::Parser - - - - Wakaba - - - - - 2009-11-26 - - - - - - - - HTML-HTML5-Parser - Perl - - - - - - - - - - - parse HTML reliably - - - - - 2009-12-01 - - 0.00_01 - - Developer preview - - - 2009-12-03 - - 0.01 - - Original version - - - - 2009-12-16 - - 0.02 - - - - - 2010-01-15 - - 0.03 - - - - - 2010-04-21 - - 0.04 - - - - - 2010-06-23 - - 0.100 - - - - - 2010-06-30 - - 0.101 - - - - - 2011-01-19 - - 0.102 - - - - - 2011-02-09 - - 0.103 - - - - - - - - Toby Inkster - - - diff -Nru libhtml-html5-parser-perl-0.103/debian/changelog libhtml-html5-parser-perl-0.107/debian/changelog --- libhtml-html5-parser-perl-0.103/debian/changelog 2011-05-08 18:27:26.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/changelog 2011-12-18 22:22:04.000000000 +0000 @@ -1,3 +1,24 @@ +libhtml-html5-parser-perl (0.107-1) unstable; urgency=low + + [ Florian Schlichting ] + * Imported Upstream version 0.107. + * Dropped 2001_disable_verification.patch, modified file was removed. + * Bumped copyright years, adjusted DEP-5 headers and added/deleted stanzas + for new and removed files. + * Added dependency on libhtml-html5-entities-perl, dropped dependency on + libmodule-signature-perl. + * Added myself to uploaders and copyright. + + [ Jonas Smedegaard ] + * Update copyright file: Improve references for convenience copy of + Module::Install. + + [ gregor herrmann ] + * Remove debian/source/local-options; abort-on-upstream-changes and + unapply-patches are default in dpkg-source since 1.16.1. + + -- Florian Schlichting Sun, 18 Dec 2011 23:18:51 +0100 + libhtml-html5-parser-perl (0.103-2) unstable; urgency=low * Improve package relations: diff -Nru libhtml-html5-parser-perl-0.103/debian/control libhtml-html5-parser-perl-0.107/debian/control --- libhtml-html5-parser-perl-0.103/debian/control 2011-05-08 18:26:54.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/control 2011-12-18 22:22:04.000000000 +0000 @@ -8,11 +8,12 @@ dh-buildinfo, liberror-perl, libhtml-encoding-perl, + libhtml-html5-entities-perl, libwww-perl, - libxml-libxml-perl, - libmodule-signature-perl (>= 0.66) + libxml-libxml-perl Maintainer: Debian Perl Group -Uploaders: Jonas Smedegaard +Uploaders: Jonas Smedegaard , + Florian Schlichting Standards-Version: 3.9.2 Vcs-Git: git://git.debian.org/git/pkg-perl/packages/libhtml-html5-parser-perl Vcs-Browser: http://git.debian.org/?p=pkg-perl/packages/libhtml-html5-parser-perl.git diff -Nru libhtml-html5-parser-perl-0.103/debian/copyright libhtml-html5-parser-perl-0.107/debian/copyright --- libhtml-html5-parser-perl-0.103/debian/copyright 2011-05-08 18:24:23.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/copyright 2011-12-18 22:22:04.000000000 +0000 @@ -1,10 +1,10 @@ -Format: http://svn.debian.org/wsvn/dep/web/deps/dep5.mdwn?rev=174 -Upstream-Name: HTML::HTML5::Parser -Upstream-Contact: Toby Inkster +Format-Specification: http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?view=markup&pathrev=135 +Name: HTML::HTML5::Parser +Maintainer: Toby Inkster Source: http://search.cpan.org/dist/HTML-HTML5-Parser/ Files: * -Copyright: 2007-2010, Wakaba +Copyright: 2007-2011, Wakaba 2009-2011, Toby Inkster License: Artistic or GPL-1+ This library is free software; you can redistribute it and/or modify it @@ -14,28 +14,38 @@ Perl 5 version 5.8.1 is licensed under either the Artistic license or the GNU General Public License, version 1 or later. -Files: inc/Module/Install.pm -Copyright: 2008-2010, Adam Kennedy +Files: inc/Module/* +Copyright: 2002-2011, Adam Kennedy + 2002-2011, Audrey Tang + 2002-2011, Brian Ingerson License: Artistic or GPL-1+ + This program is free software; you can redistribute it and/or modify it + under the same terms as Perl itself. Comment: - Code lack licensing, but is apparently a convenience copy of - Module::Install which has standard Perl licensing. -Comment: - Perl 5 is licensed under either the Artistic license or the GNU General - Public License, version 1 or later. + Code lacks licensing, but is clearly a mangled convenience copy of + lib/Module/* from Module::Install (also contained, similarly mangled, + in that project below inc/Module/*), containing above copyright and + licensing. -Files: lib/HTML/HTML5/Parser/NamedEntityList.pm -Copyright: 2004-2007, Apple Computer, Inc. - 2004-2007, Mozilla Foundation - 2004-2007, Opera Software ASA - 2007-2010, Wakaba - 2009-2011, Toby Inkster -License: - You are granted a license to use, reproduce and create derivative works - of this document. +Files: inc/Module/Package* +Copyright: 2011, Ingy döt Net +License: Artistic or GPL-1+ + +Files: inc/Scalar/Util* +Copyright: 1997-2010, Graham Barr +License: Artistic or GPL-1+ + +Files: inc/YAML/Tiny.pm +Copyright: 2006-2011, Adam Kennedy +License: Artistic or GPL-1+ +Comment: + Code lacks licensing, but is clearly a mangled convenience copy of + lib/YAML/Tiny.pm from YAML::Tiny, containing above copyright and + licensing. Files: debian/* Copyright: 2011, Jonas Smedegaard + 2011, Florian Schlichting License: GPL-2+ License: Artistic @@ -60,7 +70,7 @@ General Public License for more details. Comment: On Debian systems the GNU General Public License (GPL) version 2 is - located in '/usr/share/common-licenses/GPL'. + located in '/usr/share/common-licenses/GPL-2'. . You should have received a copy of the GNU General Public License along with this program. If not, see . diff -Nru libhtml-html5-parser-perl-0.103/debian/patches/2001_disable_verification.patch libhtml-html5-parser-perl-0.107/debian/patches/2001_disable_verification.patch --- libhtml-html5-parser-perl-0.103/debian/patches/2001_disable_verification.patch 2011-03-05 07:15:19.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/patches/2001_disable_verification.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ ---- a/t/00sig.t -+++ b/t/00sig.t -@@ -1,4 +1,4 @@ - use lib 'inc'; - use Test::More tests => 1; - use Test::Signature; --signature_ok(); -+ok(1, "Skipping SIGNATURE test."); diff -Nru libhtml-html5-parser-perl-0.103/debian/patches/series libhtml-html5-parser-perl-0.107/debian/patches/series --- libhtml-html5-parser-perl-0.103/debian/patches/series 2011-03-05 07:11:25.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -2001_disable_verification.patch diff -Nru libhtml-html5-parser-perl-0.103/debian/rules libhtml-html5-parser-perl-0.107/debian/rules --- libhtml-html5-parser-perl-0.103/debian/rules 2011-03-06 16:03:28.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/debian/rules 2011-12-18 22:22:04.000000000 +0000 @@ -27,10 +27,7 @@ DEB_UPSTREAM_TARBALL_MD5 = b27f05f071d89a47f82bb952e0f684be # Needed both by upstream build process and at runtime -common-depends = liberror-perl, libhtml-encoding-perl, libwww-perl, libxml-libxml-perl +common-depends = liberror-perl, libhtml-encoding-perl, libhtml-html5-entities-perl, libwww-perl, libxml-libxml-perl CDBS_BUILD_DEPENDS += , $(common-depends) CDBS_DEPENDS_ALL = $(common-depends) - -# Needed by upstream tests -CDBS_BUILD_DEPENDS += , libmodule-signature-perl (>= 0.66) diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/AutoInstall.pm libhtml-html5-parser-perl-0.107/inc/Module/AutoInstall.pm --- libhtml-html5-parser-perl-0.103/inc/Module/AutoInstall.pm 2011-02-09 14:22:27.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/AutoInstall.pm 2011-10-20 20:28:42.000000000 +0000 @@ -17,11 +17,14 @@ ); # various lexical flags -my ( @Missing, @Existing, %DisabledTests, $UnderCPAN, $HasCPANPLUS ); +my ( @Missing, @Existing, %DisabledTests, $UnderCPAN, $InstallDepsTarget, $HasCPANPLUS ); my ( - $Config, $CheckOnly, $SkipInstall, $AcceptDefault, $TestOnly, $AllDeps + $Config, $CheckOnly, $SkipInstall, $AcceptDefault, $TestOnly, $AllDeps, + $UpgradeDeps ); -my ( $PostambleActions, $PostambleUsed ); +my ( $PostambleActions, $PostambleActionsNoTest, $PostambleActionsUpgradeDeps, + $PostambleActionsUpgradeDepsNoTest, $PostambleActionsListDeps, + $PostambleActionsListAllDeps, $PostambleUsed, $NoTest); # See if it's a testing or non-interactive session _accept_default( $ENV{AUTOMATED_TESTING} or ! -t STDIN ); @@ -31,6 +34,10 @@ $AcceptDefault = shift; } +sub _installdeps_target { + $InstallDepsTarget = shift; +} + sub missing_modules { return @Missing; } @@ -63,6 +70,11 @@ __PACKAGE__->install( $Config, @Missing = split( /,/, $1 ) ); exit 0; } + elsif ( $arg =~ /^--upgradedeps=(.*)$/ ) { + $UpgradeDeps = 1; + __PACKAGE__->install( $Config, @Missing = split( /,/, $1 ) ); + exit 0; + } elsif ( $arg =~ /^--default(?:deps)?$/ ) { $AcceptDefault = 1; } @@ -125,7 +137,7 @@ # check entirely since we don't want to have to load (and configure) # an old CPAN just for a cosmetic message - $UnderCPAN = _check_lock(1) unless $SkipInstall; + $UnderCPAN = _check_lock(1) unless $SkipInstall || $InstallDepsTarget; while ( my ( $feature, $modules ) = splice( @args, 0, 2 ) ) { my ( @required, @tests, @skiptests ); @@ -207,6 +219,7 @@ $CheckOnly or ($mandatory and $UnderCPAN) or $AllDeps + or $InstallDepsTarget or _prompt( qq{==> Auto-install the } . ( @required / 2 ) @@ -237,10 +250,17 @@ } } - if ( @Missing and not( $CheckOnly or $UnderCPAN ) ) { + if ( @Missing and not( $CheckOnly or $UnderCPAN) ) { require Config; - print -"*** Dependencies will be installed the next time you type '$Config::Config{make}'.\n"; + my $make = $Config::Config{make}; + if ($InstallDepsTarget) { + print +"*** To install dependencies type '$make installdeps' or '$make installdeps_notest'.\n"; + } + else { + print +"*** Dependencies will be installed the next time you type '$make'.\n"; + } # make an educated guess of whether we'll need root permission. print " (You may need to do that as the 'root' user.)\n" @@ -271,6 +291,10 @@ sub _check_lock { return unless @Missing or @_; + if ($ENV{PERL5_CPANM_IS_RUNNING}) { + return _running_under('cpanminus'); + } + my $cpan_env = $ENV{PERL5_CPAN_IS_RUNNING}; if ($ENV{PERL5_CPANPLUS_IS_RUNNING}) { @@ -332,6 +356,11 @@ } } + if ($UpgradeDeps) { + push @modules, @installed; + @installed = (); + } + return @installed unless @modules; # nothing to do return @installed if _check_lock(); # defer to the CPAN shell @@ -463,6 +492,11 @@ } else { die "*** Cannot convert option $key = '$value' to CPANPLUS version.\n"; } + push @config, 'prereqs', $value; + } elsif ( $key eq 'force' ) { + push @config, $key, $value; + } elsif ( $key eq 'notest' ) { + push @config, 'skiptest', $value; } else { die "*** Cannot convert option $key to CPANPLUS version.\n"; } @@ -497,10 +531,14 @@ # set additional options while ( my ( $opt, $arg ) = splice( @config, 0, 2 ) ) { ( $args{$opt} = $arg, next ) - if $opt =~ /^force$/; # pseudo-option + if $opt =~ /^(?:force|notest)$/; # pseudo-option $CPAN::Config->{$opt} = $arg; } + if ($args{notest} && (not CPAN::Shell->can('notest'))) { + die "Your version of CPAN is too old to support the 'notest' pragma"; + } + local $CPAN::Config->{prerequisites_policy} = 'follow'; while ( my ( $pkg, $ver ) = splice( @modules, 0, 2 ) ) { @@ -519,8 +557,16 @@ delete $INC{$inc}; } - my $rv = $args{force} ? CPAN::Shell->force( install => $pkg ) - : CPAN::Shell->install($pkg); + my $rv = do { + if ($args{force}) { + CPAN::Shell->force( install => $pkg ) + } elsif ($args{notest}) { + CPAN::Shell->notest( install => $pkg ) + } else { + CPAN::Shell->install($pkg) + } + }; + $rv ||= eval { $CPAN::META->instance( 'CPAN::Distribution', $obj->cpan_file, ) ->{install} @@ -763,6 +809,35 @@ : "\$(NOECHO) \$(NOOP)" ); + my $deps_list = join( ',', @Missing, @Existing ); + + $PostambleActionsUpgradeDeps = + "\$(PERL) $0 --config=$config --upgradedeps=$deps_list"; + + my $config_notest = + join( ',', (UNIVERSAL::isa( $Config, 'HASH' ) ? %{$Config} : @{$Config}), + 'notest', 1 ) + if $Config; + + $PostambleActionsNoTest = ( + ($missing and not $UnderCPAN) + ? "\$(PERL) $0 --config=$config_notest --installdeps=$missing" + : "\$(NOECHO) \$(NOOP)" + ); + + $PostambleActionsUpgradeDepsNoTest = + "\$(PERL) $0 --config=$config_notest --upgradedeps=$deps_list"; + + $PostambleActionsListDeps = + '@$(PERL) -le "print for @ARGV" ' + . join(' ', map $Missing[$_], grep $_ % 2 == 0, 0..$#Missing); + + my @all = (@Missing, @Existing); + + $PostambleActionsListAllDeps = + '@$(PERL) -le "print for @ARGV" ' + . join(' ', map $all[$_], grep $_ % 2 == 0, 0..$#all); + return %args; } @@ -797,11 +872,15 @@ sub postamble { $PostambleUsed = 1; + my $fragment; - return <<"END_MAKE"; + $fragment .= <<"AUTO_INSTALL" if !$InstallDepsTarget; config :: installdeps \t\$(NOECHO) \$(NOOP) +AUTO_INSTALL + + $fragment .= <<"END_MAKE"; checkdeps :: \t\$(PERL) $0 --checkdeps @@ -809,12 +888,28 @@ installdeps :: \t$PostambleActions +installdeps_notest :: +\t$PostambleActionsNoTest + +upgradedeps :: +\t$PostambleActionsUpgradeDeps + +upgradedeps_notest :: +\t$PostambleActionsUpgradeDepsNoTest + +listdeps :: +\t$PostambleActionsListDeps + +listalldeps :: +\t$PostambleActionsListAllDeps + END_MAKE + return $fragment; } 1; __END__ -#line 1071 +#line 1178 diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/AutoInstall.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/AutoInstall.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/AutoInstall.pm 2011-02-09 14:22:27.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/AutoInstall.pm 2011-10-20 20:28:42.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } @@ -73,6 +73,17 @@ ); } +sub installdeps_target { + my ($self, @args) = @_; + + $self->include('Module::AutoInstall'); + require Module::AutoInstall; + + Module::AutoInstall::_installdeps_target(1); + + $self->auto_install(@args); +} + sub auto_install_now { my $self = shift; $self->auto_install(@_); diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/AutoManifest.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/AutoManifest.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/AutoManifest.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/AutoManifest.pm 2011-10-20 20:28:42.000000000 +0000 @@ -0,0 +1,45 @@ +#line 1 +use strict; +use warnings; + +package Module::Install::AutoManifest; + +use Module::Install::Base; + +BEGIN { + our $VERSION = '0.003'; + our $ISCORE = 1; + our @ISA = qw(Module::Install::Base); +} + +sub auto_manifest { + my ($self) = @_; + + return unless $Module::Install::AUTHOR; + + die "auto_manifest requested, but no MANIFEST.SKIP exists\n" + unless -e "MANIFEST.SKIP"; + + if (-e "MANIFEST") { + unlink('MANIFEST') or die "Can't remove MANIFEST: $!"; + } + + $self->postamble(<<"END"); +create_distdir: manifest_clean manifest + +distclean :: manifest_clean + +manifest_clean: +\t\$(RM_F) MANIFEST +END + +} + +1; +__END__ + +#line 48 + +#line 131 + +1; # End of Module::Install::AutoManifest diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Base.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Base.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Base.pm 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Base.pm 2011-10-20 20:28:35.000000000 +0000 @@ -4,7 +4,7 @@ use strict 'vars'; use vars qw{$VERSION}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; } # Suspend handler for "redefined" warnings diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Can.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Can.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Can.pm 2011-02-09 14:22:28.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Can.pm 2011-10-20 20:28:43.000000000 +0000 @@ -9,7 +9,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/DOAPChangeSets.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/DOAPChangeSets.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/DOAPChangeSets.pm 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/DOAPChangeSets.pm 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -#line 1 -package Module::Install::DOAPChangeSets; - -use 5.008; -use base qw(Module::Install::Base); -use strict; - -our $VERSION = '0.101'; - -sub write_doap_changes { - my $self = shift; - $self->admin->write_doap_changes(@_) if $self->is_admin; -} - -sub write_doap_changes_xml { - my $self = shift; - $self->admin->write_doap_changes_xml(@_) if $self->is_admin; -} - -1; - -__END__ -#line 76 diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Fetch.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Fetch.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Fetch.pm 2011-02-09 14:22:28.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Fetch.pm 2011-10-20 20:28:43.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Include.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Include.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Include.pm 2011-02-09 14:22:27.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Include.pm 2011-10-20 20:28:35.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Makefile.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Makefile.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Makefile.pm 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Makefile.pm 2011-10-20 20:28:39.000000000 +0000 @@ -8,7 +8,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Metadata.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Metadata.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Metadata.pm 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Metadata.pm 2011-10-20 20:28:35.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } @@ -170,7 +170,7 @@ # Normalize the version $version = $self->_perl_version($version); - # We don't support the reall old versions + # We don't support the really old versions unless ( $version >= 5.005 ) { die "Module::Install only supports 5.005 or newer (use ExtUtils::MakeMaker)\n"; } @@ -515,6 +515,7 @@ 'GNU Free Documentation license' => 'unrestricted', 1, 'GNU Affero General Public License' => 'open_source', 1, '(?:Free)?BSD license' => 'bsd', 1, + 'Artistic license 2\.0' => 'artistic_2', 1, 'Artistic license' => 'artistic', 1, 'Apache (?:Software )?license' => 'apache', 1, 'GPL' => 'gpl', 1, @@ -550,9 +551,9 @@ sub _extract_bugtracker { my @links = $_[0] =~ m#L<( - \Qhttp://rt.cpan.org/\E[^>]+| - \Qhttp://github.com/\E[\w_]+/[\w_]+/issues| - \Qhttp://code.google.com/p/\E[\w_\-]+/issues/list + https?\Q://rt.cpan.org/\E[^>]+| + https?\Q://github.com/\E[\w_]+/[\w_]+/issues| + https?\Q://code.google.com/p/\E[\w_\-]+/issues/list )>#gx; my %links; @links{@links}=(); @@ -581,7 +582,7 @@ sub requires_from { my $self = shift; my $content = Module::Install::_readperl($_[0]); - my @requires = $content =~ m/^use\s+([^\W\d]\w*(?:::\w+)*)\s+([\d\.]+)/mg; + my @requires = $content =~ m/^use\s+([^\W\d]\w*(?:::\w+)*)\s+(v?[\d\.]+)/mg; while ( @requires ) { my $module = shift @requires; my $version = shift @requires; diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Package.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Package.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Package.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Package.pm 2011-10-20 20:28:35.000000000 +0000 @@ -0,0 +1,323 @@ +#line 1 +## +# name: Module::Install::Package +# abstract: Module::Install support for Module::Package +# author: Ingy döt Net +# license: perl +# copyright: 2011 +# see: +# - Module::Package + +# This module contains the Module::Package logic that must be available to +# both the Author and the End User. Author-only logic goes in a +# Module::Package::Plugin subclass. +package Module::Install::Package; +use strict; +use Module::Install::Base; +use vars qw'@ISA $VERSION'; +@ISA = 'Module::Install::Base'; +$VERSION = '0.30'; + +#-----------------------------------------------------------------------------# +# XXX BOOTBUGHACK +# This is here to try to get us out of Module-Package-0.11 cpantesters hell... +# Remove this when the situation has blown over. +sub pkg { + *inc::Module::Package::VERSION = sub { $VERSION }; + my $self = shift; + $self->module_package_internals_init($@); +} + +#-----------------------------------------------------------------------------# +# We allow the author to specify key/value options after the plugin. These +# options need to be available both at author time and install time. +#-----------------------------------------------------------------------------# +# OO accessor for command line options: +sub package_options { + @_>1?($_[0]->{package_options}=$_[1]):$_[0]->{package_options}} + +my $default_options = { + deps_list => 1, + install_bin => 1, + install_share => 1, + manifest_skip => 1, + requires_from => 1, +}; + +#-----------------------------------------------------------------------------# +# Module::Install plugin directives. Use long, ugly names to not pollute the +# Module::Install plugin namespace. These are only intended to be called from +# Module::Package. +#-----------------------------------------------------------------------------# + +# Module::Package starts off life as a normal call to this Module::Install +# plugin directive: +my $module_install_plugin; +my $module_package_plugin; +my $module_package_dist_plugin; +# XXX ARGVHACK This @argv thing is a temporary fix for an ugly bug somewhere in the +# Wikitext module usage. +my @argv; +sub module_package_internals_init { + my $self = $module_install_plugin = shift; + my ($plugin_spec, %options) = @_; + $self->package_options({%$default_options, %options}); + + if ($module_install_plugin->is_admin) { + $module_package_plugin = $self->_load_plugin($plugin_spec); + $module_package_plugin->mi($module_install_plugin); + $module_package_plugin->version_check($VERSION); + } + else { + $module_package_dist_plugin = $self->_load_dist_plugin($plugin_spec); + $module_package_dist_plugin->mi($module_install_plugin) if ref $module_package_dist_plugin; + } + # NOTE - This is the point in time where the body of Makefile.PL runs... + return; + + sub INIT { + return unless $module_install_plugin; + return if $Module::Package::ERROR; + eval { + if ($module_install_plugin->is_admin) { + $module_package_plugin->initial(); + $module_package_plugin->main(); + } + else { + $module_install_plugin->_initial(); + $module_package_dist_plugin->_initial() if ref $module_package_dist_plugin; + $module_install_plugin->_main(); + $module_package_dist_plugin->_main() if ref $module_package_dist_plugin; + } + }; + if ($@) { + $Module::Package::ERROR = $@; + die $@; + } + @argv = @ARGV; # XXX ARGVHACK + } + + # If this Module::Install plugin was used (by Module::Package) then wrap + # up any loose ends. This will get called after Makefile.PL has completed. + sub END { + @ARGV = @argv; # XXX ARGVHACK + return unless $module_install_plugin; + return if $Module::Package::ERROR; + $module_package_plugin + ? do { + $module_package_plugin->final; + $module_package_plugin->replicate_module_package; + } + : do { + $module_install_plugin->_final; + $module_package_dist_plugin->_final() if ref $module_package_dist_plugin; + } + } +} + +# Module::Package, Module::Install::Package and Module::Package::Plugin +# must all have the same version. Seems wise. +sub module_package_internals_version_check { + my ($self, $version) = @_; + return if $version < 0.1800001; # XXX BOOTBUGHACK!! + die <<"..." unless $version == $VERSION; + +Error! Something has gone awry: + Module::Package version=$version is using + Module::Install::Package version=$VERSION +If you are the author of this module, try upgrading Module::Package. +Otherwise, please notify the author of this error. + +... +} + +# Find and load the author side plugin: +sub _load_plugin { + my ($self, $spec, $namespace) = @_; + $spec ||= ''; + $namespace ||= 'Module::Package'; + my $version = ''; + $Module::Package::plugin_version = 0; + if ($spec =~ s/\s+(\S+)\s*//) { + $version = $1; + $Module::Package::plugin_version = $version; + } + my ($module, $plugin) = + not($spec) ? ('Plugin', "Plugin::basic") : + ($spec =~ /^\w(\w|::)*$/) ? ($spec, $spec) : + ($spec =~ /^:(\w+)$/) ? ('Plugin', "Plugin::$1") : + ($spec =~ /^(\S*\w):(\w+)$/) ? ($1, "$1::$2") : + die "$spec is invalid"; + $module = "${namespace}::${module}"; + $plugin = "${namespace}::${plugin}"; + eval "use $module $version (); 1" or die $@; + return $plugin->new(); +} + +# Find and load the user side plugin: +sub _load_dist_plugin { + my ($self, $spec, $namespace) = @_; + $spec ||= ''; + $namespace ||= 'Module::Package::Dist'; + my $r = eval { $self->_load_plugin($spec, $namespace); }; + return $r if ref $r; + return; +} + +#-----------------------------------------------------------------------------# +# These are the user side analogs to the author side plugin API calls. +# Prefix with '_' to not pollute Module::Install plugin space. +#-----------------------------------------------------------------------------# +sub _initial { + my ($self) = @_; +} + +sub _main { + my ($self) = @_; +} + +# NOTE These must match Module::Package::Plugin::final. +sub _final { + my ($self) = @_; + $self->_all_from; + $self->_requires_from; + $self->_install_bin; + $self->_install_share; + $self->_WriteAll; +} + +#-----------------------------------------------------------------------------# +# This section is where all the useful code bits go. These bits are needed by +# both Author and User side runs. +#-----------------------------------------------------------------------------# + +my $all_from = 0; +sub _all_from { + my $self = shift; + return if $all_from++; + return if $self->name; + my $file = shift || "$main::PM" or die "all_from has no file"; + $self->all_from($file); +} + +my $requires_from = 0; +sub _requires_from { + my $self = shift; + return if $requires_from++; + return unless $self->package_options->{requires_from}; + my $file = shift || "$main::PM" or die "requires_from has no file"; + $self->requires_from($main::PM) +} + +my $install_bin = 0; +sub _install_bin { + my $self = shift; + return if $install_bin++; + return unless $self->package_options->{install_bin}; + return unless -d 'bin'; + my @bin; + File::Find::find(sub { + return unless -f $_; + push @bin, $File::Find::name; + }, 'bin'); + $self->install_script($_) for @bin; +} + +my $install_share = 0; +sub _install_share { + my $self = shift; + return if $install_share++; + return unless $self->package_options->{install_share}; + return unless -d 'share'; + $self->install_share; +} + +my $WriteAll = 0; +sub _WriteAll { + my $self = shift; + return if $WriteAll++; + $self->WriteAll(@_); +} + +# Base package for Module::Package plugin distributed components. +package Module::Package::Dist; + +sub new { + my ($class, %args) = @_; + bless \%args, $class; +} + +sub mi { + @_ > 1 ? ($_[0]->{mi}=$_[1]) : $_[0]->{mi}; +} + +sub _initial { + my ($self) = @_; +} + +sub _main { + my ($self) = @_; +} + +sub _final { + my ($self) = @_; +} + +1; + +#-----------------------------------------------------------------------------# +# Take a guess at the primary .pm and .pod files for 'all_from', and friends. +# Put them in global magical vars in the main:: namespace. +#-----------------------------------------------------------------------------# +package Module::Package::PM; +use overload '""' => sub { + $_[0]->guess_pm unless @{$_[0]}; + return $_[0]->[0]; +}; +sub set { $_[0]->[0] = $_[1] } +sub guess_pm { + my $pm = ''; + my $self = shift; + if (-e 'META.yml') { + open META, 'META.yml' or die "Can't open 'META.yml' for input:\n$!"; + my $meta = do { local $/; }; + close META; + $meta =~ /^module_name: (\S+)$/m + or die "Can't get module_name from META.yml"; + $pm = $1; + $pm =~ s!::!/!g; + $pm = "lib/$pm.pm"; + } + else { + require File::Find; + my @array = (); + File::Find::find(sub { + return unless /\.pm$/; + my $name = $File::Find::name; + my $num = ($name =~ s!/+!/!g); + my $ary = $array[$num] ||= []; + push @$ary, $name; + }, 'lib'); + shift @array while @array and not defined $array[0]; + die "Can't guess main module" unless @array; + (($pm) = sort @{$array[0]}) or + die "Can't guess main module"; + } + my $pmc = $pm . 'c'; + $pm = $pmc if -e $pmc; + $self->set($pm); +} +$main::PM = bless [$main::PM ? ($main::PM) : ()], __PACKAGE__; + +package Module::Package::POD; +use overload '""' => sub { + return $_[0]->[0] if @{$_[0]}; + (my $pod = "$main::PM") =~ s/\.pm/.pod/ + or die "Module::Package's \$main::PM value should end in '.pm'"; + return -e $pod ? $pod : ''; +}; +sub set { $_[0][0] = $_[1] } +$main::POD = bless [$main::POD ? ($main::POD) : ()], __PACKAGE__; + +1; + diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/ReadmeFromPod.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/ReadmeFromPod.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/ReadmeFromPod.pm 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/ReadmeFromPod.pm 1970-01-01 00:00:00.000000000 +0000 @@ -1,36 +0,0 @@ -#line 1 -package Module::Install::ReadmeFromPod; - -use strict; -use warnings; -use base qw(Module::Install::Base); -use vars qw($VERSION); - -$VERSION = '0.06'; - -sub readme_from { - my $self = shift; - return unless $Module::Install::AUTHOR; - my $file = shift || return; - my $clean = shift; - require Pod::Text; - my $parser = Pod::Text->new(); - open README, '> README' or die "$!\n"; - $parser->output_fh( *README ); - $parser->parse_file( $file ); - return 1 unless $clean; - $self->postamble(<<"END"); -distclean :: license_clean - -license_clean: -\t\$(RM_F) README -END - return 1; -} - -'Readme!'; - -__END__ - -#line 89 - diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Scripts.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Scripts.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Scripts.pm 2011-02-09 14:22:26.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Scripts.pm 2011-10-20 20:28:39.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/TrustMetaYml.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/TrustMetaYml.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/TrustMetaYml.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/TrustMetaYml.pm 2011-10-20 20:28:35.000000000 +0000 @@ -0,0 +1,52 @@ +#line 1 +package Module::Install::TrustMetaYml; + +use 5.008; +use constant { FALSE => 0, TRUE => 1 }; +use strict; +use utf8; + +BEGIN { + $Module::Install::TrustMetaYml::AUTHORITY = 'cpan:TOBYINK'; +} +BEGIN { + $Module::Install::TrustMetaYml::VERSION = '0.001'; +} + +use base qw(Module::Install::Base); + +sub trust_meta_yml +{ + my ($self, $where) = @_; + $where ||= 'META.yml'; + + $self->perl_version('5.006') unless defined $self->perl_version; + + $self->include_deps('YAML::Tiny', 0); + return $self if $self->is_admin; + + require YAML::Tiny; + my $data = YAML::Tiny::LoadFile($where); + + $self->perl_version($data->{requires}{perl} || '5.006'); + + KEY: foreach my $key (qw(requires recommends build_requires)) + { + next KEY unless ref $data->{$key} eq 'HASH'; + my %deps = %{$data->{$key}}; + DEP: while (my ($pkg, $ver) = each %deps) + { + next if $pkg eq 'perl'; + $self->$key($pkg, $ver); + } + } + + return $self; +} + +*trust_meta_yaml = \&trust_meta_yml; + +TRUE; + +__END__ + diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/Win32.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/Win32.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/Win32.pm 2011-02-09 14:22:28.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/Win32.pm 2011-10-20 20:28:43.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = 'Module::Install::Base'; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install/WriteAll.pm libhtml-html5-parser-perl-0.107/inc/Module/Install/WriteAll.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install/WriteAll.pm 2011-02-09 14:22:28.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install/WriteAll.pm 2011-10-20 20:28:43.000000000 +0000 @@ -6,7 +6,7 @@ use vars qw{$VERSION @ISA $ISCORE}; BEGIN { - $VERSION = '1.00'; + $VERSION = '1.02'; @ISA = qw{Module::Install::Base}; $ISCORE = 1; } diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Install.pm libhtml-html5-parser-perl-0.107/inc/Module/Install.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Install.pm 2011-02-09 14:22:03.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Install.pm 2011-10-20 20:28:29.000000000 +0000 @@ -31,7 +31,7 @@ # This is not enforced yet, but will be some time in the next few # releases once we can make sure it won't clash with custom # Module::Install extensions. - $VERSION = '1.00'; + $VERSION = '1.02'; # Storage for the pseudo-singleton $MAIN = undef; @@ -467,4 +467,4 @@ 1; -# Copyright 2008 - 2010 Adam Kennedy. +# Copyright 2008 - 2011 Adam Kennedy. diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Package/Dist/RDF.pm libhtml-html5-parser-perl-0.107/inc/Module/Package/Dist/RDF.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Package/Dist/RDF.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Package/Dist/RDF.pm 2011-10-20 20:28:39.000000000 +0000 @@ -0,0 +1,21 @@ +#line 1 +package Module::Package::Dist::RDF; + +use 5.008003; +our $VERSION = '0.001'; + +package Module::Package::Dist::RDF::standard; + +use 5.008003; +use strict; +use base qw[Module::Package::Dist]; +our $VERSION = '0.001'; + +sub _main +{ + my ($self) = @_; + $self->mi->trust_meta_yml; + $self->mi->auto_install; +} + +1; \ No newline at end of file diff -Nru libhtml-html5-parser-perl-0.103/inc/Module/Package.pm libhtml-html5-parser-perl-0.107/inc/Module/Package.pm --- libhtml-html5-parser-perl-0.103/inc/Module/Package.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Module/Package.pm 2011-10-20 20:28:44.000000000 +0000 @@ -0,0 +1,71 @@ +#line 1 +## +# name: Module::Package +# abstract: Postmodern Perl Module Packaging +# author: Ingy döt Net +# license: perl +# copyright: 2011 +# see: +# - Module::Package::Plugin +# - Module::Install::Package +# - Module::Package::Tutorial + +package Module::Package; +use 5.005; +use strict; + +BEGIN { + $Module::Package::VERSION = '0.30'; + $inc::Module::Package::VERSION ||= $Module::Package::VERSION; + @inc::Module::Package::ISA = __PACKAGE__; +} + +sub import { + my $class = shift; + $INC{'inc/Module/Install.pm'} = __FILE__; + unshift @INC, 'inc' unless $INC[0] eq 'inc'; + eval "use Module::Install 1.01 (); 1" or $class->error($@); + + package main; + Module::Install->import(); + eval { + module_package_internals_version_check($Module::Package::VERSION); + module_package_internals_init(@_); + }; + if ($@) { + $Module::Package::ERROR = $@; + die $@; + } +} + +# XXX Remove this when things are stable. +sub error { + my ($class, $error) = @_; + if (-e 'inc' and not -e 'inc/.author') { + require Data::Dumper; + $Data::Dumper::Sortkeys = 1; + my $dump1 = Data::Dumper::Dumper(\%INC); + my $dump2 = Data::Dumper::Dumper(\@INC); + die <<"..."; +This should not have happened. Hopefully this dump will explain the problem: + +inc::Module::Package: $inc::Module::Package::VERSION +Module::Package: $Module::Package::VERSION +inc::Module::Install: $inc::Module::Install::VERSION +Module::Install: $Module::Install::VERSION + +Error: $error + +%INC: +$dump1 +\@INC: +$dump2 +... + } + else { + die $error; + } +} + +1; + diff -Nru libhtml-html5-parser-perl-0.103/inc/Scalar/Util/PP.pm libhtml-html5-parser-perl-0.107/inc/Scalar/Util/PP.pm --- libhtml-html5-parser-perl-0.103/inc/Scalar/Util/PP.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Scalar/Util/PP.pm 2011-10-20 20:28:36.000000000 +0000 @@ -0,0 +1,110 @@ +#line 1 +# Scalar::Util::PP.pm +# +# Copyright (c) 1997-2009 Graham Barr . All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +# +# This module is normally only loaded if the XS module is not available + +package Scalar::Util::PP; + +use strict; +use warnings; +use vars qw(@ISA @EXPORT $VERSION $recurse); +require Exporter; +use B qw(svref_2object); + +@ISA = qw(Exporter); +@EXPORT = qw(blessed reftype tainted readonly refaddr looks_like_number); +$VERSION = "1.21"; +$VERSION = eval $VERSION; + +sub blessed ($) { + return undef unless length(ref($_[0])); + my $b = svref_2object($_[0]); + return undef unless $b->isa('B::PVMG'); + my $s = $b->SvSTASH; + return $s->isa('B::HV') ? $s->NAME : undef; +} + +sub refaddr($) { + return undef unless length(ref($_[0])); + + my $addr; + if(defined(my $pkg = blessed($_[0]))) { + $addr .= bless $_[0], 'Scalar::Util::Fake'; + bless $_[0], $pkg; + } + else { + $addr .= $_[0] + } + + $addr =~ /0x(\w+)/; + local $^W; + hex($1); +} + +{ + my %tmap = qw( + B::HV HASH + B::AV ARRAY + B::CV CODE + B::IO IO + B::NULL SCALAR + B::NV SCALAR + B::PV SCALAR + B::GV GLOB + B::RV REF + B::REGEXP REGEXP + ); + + sub reftype ($) { + my $r = shift; + + return undef unless length(ref($r)); + + my $t = ref(svref_2object($r)); + + return + exists $tmap{$t} ? $tmap{$t} + : length(ref($$r)) ? 'REF' + : 'SCALAR'; + } +} + +sub tainted { + local($@, $SIG{__DIE__}, $SIG{__WARN__}); + local $^W = 0; + no warnings; + eval { kill 0 * $_[0] }; + $@ =~ /^Insecure/; +} + +sub readonly { + return 0 if tied($_[0]) || (ref(\($_[0])) ne "SCALAR"); + + local($@, $SIG{__DIE__}, $SIG{__WARN__}); + my $tmp = $_[0]; + + !eval { $_[0] = $tmp; 1 }; +} + +sub looks_like_number { + local $_ = shift; + + # checks from perlfaq4 + return 0 if !defined($_); + if (ref($_)) { + require overload; + return overload::Overloaded($_) ? defined(0 + $_) : 0; + } + return 1 if (/^[+-]?\d+$/); # is a +/- integer + return 1 if (/^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/); # a C float + return 1 if ($] >= 5.008 and /^(Inf(inity)?|NaN)$/i) or ($] >= 5.006001 and /^Inf$/i); + + 0; +} + + +1; diff -Nru libhtml-html5-parser-perl-0.103/inc/Scalar/Util.pm libhtml-html5-parser-perl-0.107/inc/Scalar/Util.pm --- libhtml-html5-parser-perl-0.103/inc/Scalar/Util.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Scalar/Util.pm 2011-10-20 20:28:36.000000000 +0000 @@ -0,0 +1,71 @@ +#line 1 +# Scalar::Util.pm +# +# Copyright (c) 1997-2007 Graham Barr . All rights reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. + +package Scalar::Util; + +use strict; +use vars qw(@ISA @EXPORT_OK $VERSION @EXPORT_FAIL); +require Exporter; +require List::Util; # List::Util loads the XS + +@ISA = qw(Exporter); +@EXPORT_OK = qw(blessed dualvar reftype weaken isweak tainted readonly openhandle refaddr isvstring looks_like_number set_prototype); +$VERSION = "1.21"; +$VERSION = eval $VERSION; + +unless (defined &dualvar) { + # Load Pure Perl version if XS not loaded + require Scalar::Util::PP; + Scalar::Util::PP->import; + push @EXPORT_FAIL, qw(weaken isweak dualvar isvstring set_prototype); +} + +sub export_fail { + if (grep { /dualvar/ } @EXPORT_FAIL) { # no XS loaded + my $pat = join("|", @EXPORT_FAIL); + if (my ($err) = grep { /^($pat)$/ } @_ ) { + require Carp; + Carp::croak("$err is only available with the XS version of Scalar::Util"); + } + } + + if (grep { /^(weaken|isweak)$/ } @_ ) { + require Carp; + Carp::croak("Weak references are not implemented in the version of perl"); + } + + if (grep { /^(isvstring)$/ } @_ ) { + require Carp; + Carp::croak("Vstrings are not implemented in the version of perl"); + } + + @_; +} + +sub openhandle ($) { + my $fh = shift; + my $rt = reftype($fh) || ''; + + return defined(fileno($fh)) ? $fh : undef + if $rt eq 'IO'; + + if (reftype(\$fh) eq 'GLOB') { # handle openhandle(*DATA) + $fh = \(my $tmp=$fh); + } + elsif ($rt ne 'GLOB') { + return undef; + } + + (tied(*$fh) or defined(fileno($fh))) + ? $fh : undef; +} + +1; + +__END__ + +#line 283 diff -Nru libhtml-html5-parser-perl-0.103/inc/Test/Signature.pm libhtml-html5-parser-perl-0.107/inc/Test/Signature.pm --- libhtml-html5-parser-perl-0.103/inc/Test/Signature.pm 2011-02-09 14:22:27.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/Test/Signature.pm 1970-01-01 00:00:00.000000000 +0000 @@ -1,58 +0,0 @@ -#line 1 -package Test::Signature; - -use 5.004; -use strict; -use vars qw( $VERSION @ISA @EXPORT @EXPORT_OK ); -use Exporter; -use Test::Builder; - -BEGIN { - $VERSION = '1.10'; - @ISA = qw( Exporter ); - @EXPORT = qw( signature_ok ); - @EXPORT_OK = qw( signature_force_ok ); -} - -my $test = Test::Builder->new(); - -#line 53 - -#line 77 - -sub action_skip { $test->skip( $_[0] ) } -sub action_ok { $test->ok( 0, $_[0] ) } - -sub signature_ok { - my $name = shift || 'Valid signature'; - my $force = shift || 0; - my $action = $force ? \&action_ok : \&action_skip; - SKIP: { - if ( !-s 'SIGNATURE' ) { - $action->("No SIGNATURE file found."); - } - elsif ( !eval { require Module::Signature; 1 } ) { - $action->( - "Next time around, consider installing Module::Signature, " - . "so you can verify the integrity of this distribution." ); - } - elsif ( !eval { require Socket; Socket::inet_aton('pgp.mit.edu') } ) { - $action->("Cannot connect to the keyserver."); - } - else { - $test->ok( Module::Signature::verify() == - Module::Signature::SIGNATURE_OK() => $name ); - } - } -} - -#line 118 - -sub signature_force_ok { - signature_ok( $_[0] || undef, 1 ); -} - -1; -__END__ - -#line 297 diff -Nru libhtml-html5-parser-perl-0.103/inc/YAML/Tiny.pm libhtml-html5-parser-perl-0.107/inc/YAML/Tiny.pm --- libhtml-html5-parser-perl-0.103/inc/YAML/Tiny.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/inc/YAML/Tiny.pm 2011-10-20 20:28:36.000000000 +0000 @@ -0,0 +1,622 @@ +#line 1 +package YAML::Tiny; + +use strict; +use Carp 'croak'; + +# UTF Support? +sub HAVE_UTF8 () { $] >= 5.007003 } +BEGIN { + if ( HAVE_UTF8 ) { + # The string eval helps hide this from Test::MinimumVersion + eval "require utf8;"; + die "Failed to load UTF-8 support" if $@; + } + + # Class structure + require 5.004; + require Exporter; + $YAML::Tiny::VERSION = '1.41'; + @YAML::Tiny::ISA = qw{ Exporter }; + @YAML::Tiny::EXPORT = qw{ Load Dump }; + @YAML::Tiny::EXPORT_OK = qw{ LoadFile DumpFile freeze thaw }; + + # Error storage + $YAML::Tiny::errstr = ''; +} + +# The character class of all characters we need to escape +# NOTE: Inlined, since it's only used once +# my $RE_ESCAPE = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f\"\n]'; + +# Printed form of the unprintable characters in the lowest range +# of ASCII characters, listed by ASCII ordinal position. +my @UNPRINTABLE = qw( + z x01 x02 x03 x04 x05 x06 a + x08 t n v f r x0e x0f + x10 x11 x12 x13 x14 x15 x16 x17 + x18 x19 x1a e x1c x1d x1e x1f +); + +# Printable characters for escapes +my %UNESCAPES = ( + z => "\x00", a => "\x07", t => "\x09", + n => "\x0a", v => "\x0b", f => "\x0c", + r => "\x0d", e => "\x1b", '\\' => '\\', +); + +# Special magic boolean words +my %QUOTE = map { $_ => 1 } qw{ + null Null NULL + y Y yes Yes YES n N no No NO + true True TRUE false False FALSE + on On ON off Off OFF +}; + + + + + +##################################################################### +# Implementation + +# Create an empty YAML::Tiny object +sub new { + my $class = shift; + bless [ @_ ], $class; +} + +# Create an object from a file +sub read { + my $class = ref $_[0] ? ref shift : shift; + + # Check the file + my $file = shift or return $class->_error( 'You did not specify a file name' ); + return $class->_error( "File '$file' does not exist" ) unless -e $file; + return $class->_error( "'$file' is a directory, not a file" ) unless -f _; + return $class->_error( "Insufficient permissions to read '$file'" ) unless -r _; + + # Slurp in the file + local $/ = undef; + local *CFG; + unless ( open(CFG, $file) ) { + return $class->_error("Failed to open file '$file': $!"); + } + my $contents = ; + unless ( close(CFG) ) { + return $class->_error("Failed to close file '$file': $!"); + } + + $class->read_string( $contents ); +} + +# Create an object from a string +sub read_string { + my $class = ref $_[0] ? ref shift : shift; + my $self = bless [], $class; + my $string = $_[0]; + unless ( defined $string ) { + return $self->_error("Did not provide a string to load"); + } + + # Byte order marks + # NOTE: Keeping this here to educate maintainers + # my %BOM = ( + # "\357\273\277" => 'UTF-8', + # "\376\377" => 'UTF-16BE', + # "\377\376" => 'UTF-16LE', + # "\377\376\0\0" => 'UTF-32LE' + # "\0\0\376\377" => 'UTF-32BE', + # ); + if ( $string =~ /^(?:\376\377|\377\376|\377\376\0\0|\0\0\376\377)/ ) { + return $self->_error("Stream has a non UTF-8 BOM"); + } else { + # Strip UTF-8 bom if found, we'll just ignore it + $string =~ s/^\357\273\277//; + } + + # Try to decode as utf8 + utf8::decode($string) if HAVE_UTF8; + + # Check for some special cases + return $self unless length $string; + unless ( $string =~ /[\012\015]+\z/ ) { + return $self->_error("Stream does not end with newline character"); + } + + # Split the file into lines + my @lines = grep { ! /^\s*(?:\#.*)?\z/ } + split /(?:\015{1,2}\012|\015|\012)/, $string; + + # Strip the initial YAML header + @lines and $lines[0] =~ /^\%YAML[: ][\d\.]+.*\z/ and shift @lines; + + # A nibbling parser + while ( @lines ) { + # Do we have a document header? + if ( $lines[0] =~ /^---\s*(?:(.+)\s*)?\z/ ) { + # Handle scalar documents + shift @lines; + if ( defined $1 and $1 !~ /^(?:\#.+|\%YAML[: ][\d\.]+)\z/ ) { + push @$self, $self->_read_scalar( "$1", [ undef ], \@lines ); + next; + } + } + + if ( ! @lines or $lines[0] =~ /^(?:---|\.\.\.)/ ) { + # A naked document + push @$self, undef; + while ( @lines and $lines[0] !~ /^---/ ) { + shift @lines; + } + + } elsif ( $lines[0] =~ /^\s*\-/ ) { + # An array at the root + my $document = [ ]; + push @$self, $document; + $self->_read_array( $document, [ 0 ], \@lines ); + + } elsif ( $lines[0] =~ /^(\s*)\S/ ) { + # A hash at the root + my $document = { }; + push @$self, $document; + $self->_read_hash( $document, [ length($1) ], \@lines ); + + } else { + croak("YAML::Tiny failed to classify the line '$lines[0]'"); + } + } + + $self; +} + +# Deparse a scalar string to the actual scalar +sub _read_scalar { + my ($self, $string, $indent, $lines) = @_; + + # Trim trailing whitespace + $string =~ s/\s*\z//; + + # Explitic null/undef + return undef if $string eq '~'; + + # Single quote + if ( $string =~ /^\'(.*?)\'\z/ ) { + return '' unless defined $1; + $string = $1; + $string =~ s/\'\'/\'/g; + return $string; + } + + # Double quote. + # The commented out form is simpler, but overloaded the Perl regex + # engine due to recursion and backtracking problems on strings + # larger than 32,000ish characters. Keep it for reference purposes. + # if ( $string =~ /^\"((?:\\.|[^\"])*)\"\z/ ) { + if ( $string =~ /^\"([^\\"]*(?:\\.[^\\"]*)*)\"\z/ ) { + # Reusing the variable is a little ugly, + # but avoids a new variable and a string copy. + $string = $1; + $string =~ s/\\"/"/g; + $string =~ s/\\([never\\fartz]|x([0-9a-fA-F]{2}))/(length($1)>1)?pack("H2",$2):$UNESCAPES{$1}/gex; + return $string; + } + + # Special cases + if ( $string =~ /^[\'\"!&]/ ) { + croak("YAML::Tiny does not support a feature in line '$lines->[0]'"); + } + return {} if $string eq '{}'; + return [] if $string eq '[]'; + + # Regular unquoted string + return $string unless $string =~ /^[>|]/; + + # Error + croak("YAML::Tiny failed to find multi-line scalar content") unless @$lines; + + # Check the indent depth + $lines->[0] =~ /^(\s*)/; + $indent->[-1] = length("$1"); + if ( defined $indent->[-2] and $indent->[-1] <= $indent->[-2] ) { + croak("YAML::Tiny found bad indenting in line '$lines->[0]'"); + } + + # Pull the lines + my @multiline = (); + while ( @$lines ) { + $lines->[0] =~ /^(\s*)/; + last unless length($1) >= $indent->[-1]; + push @multiline, substr(shift(@$lines), length($1)); + } + + my $j = (substr($string, 0, 1) eq '>') ? ' ' : "\n"; + my $t = (substr($string, 1, 1) eq '-') ? '' : "\n"; + return join( $j, @multiline ) . $t; +} + +# Parse an array +sub _read_array { + my ($self, $array, $indent, $lines) = @_; + + while ( @$lines ) { + # Check for a new document + if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { + while ( @$lines and $lines->[0] !~ /^---/ ) { + shift @$lines; + } + return 1; + } + + # Check the indent level + $lines->[0] =~ /^(\s*)/; + if ( length($1) < $indent->[-1] ) { + return 1; + } elsif ( length($1) > $indent->[-1] ) { + croak("YAML::Tiny found bad indenting in line '$lines->[0]'"); + } + + if ( $lines->[0] =~ /^(\s*\-\s+)[^\'\"]\S*\s*:(?:\s+|$)/ ) { + # Inline nested hash + my $indent2 = length("$1"); + $lines->[0] =~ s/-/ /; + push @$array, { }; + $self->_read_hash( $array->[-1], [ @$indent, $indent2 ], $lines ); + + } elsif ( $lines->[0] =~ /^\s*\-(\s*)(.+?)\s*\z/ ) { + # Array entry with a value + shift @$lines; + push @$array, $self->_read_scalar( "$2", [ @$indent, undef ], $lines ); + + } elsif ( $lines->[0] =~ /^\s*\-\s*\z/ ) { + shift @$lines; + unless ( @$lines ) { + push @$array, undef; + return 1; + } + if ( $lines->[0] =~ /^(\s*)\-/ ) { + my $indent2 = length("$1"); + if ( $indent->[-1] == $indent2 ) { + # Null array entry + push @$array, undef; + } else { + # Naked indenter + push @$array, [ ]; + $self->_read_array( $array->[-1], [ @$indent, $indent2 ], $lines ); + } + + } elsif ( $lines->[0] =~ /^(\s*)\S/ ) { + push @$array, { }; + $self->_read_hash( $array->[-1], [ @$indent, length("$1") ], $lines ); + + } else { + croak("YAML::Tiny failed to classify line '$lines->[0]'"); + } + + } elsif ( defined $indent->[-2] and $indent->[-1] == $indent->[-2] ) { + # This is probably a structure like the following... + # --- + # foo: + # - list + # bar: value + # + # ... so lets return and let the hash parser handle it + return 1; + + } else { + croak("YAML::Tiny failed to classify line '$lines->[0]'"); + } + } + + return 1; +} + +# Parse an array +sub _read_hash { + my ($self, $hash, $indent, $lines) = @_; + + while ( @$lines ) { + # Check for a new document + if ( $lines->[0] =~ /^(?:---|\.\.\.)/ ) { + while ( @$lines and $lines->[0] !~ /^---/ ) { + shift @$lines; + } + return 1; + } + + # Check the indent level + $lines->[0] =~ /^(\s*)/; + if ( length($1) < $indent->[-1] ) { + return 1; + } elsif ( length($1) > $indent->[-1] ) { + croak("YAML::Tiny found bad indenting in line '$lines->[0]'"); + } + + # Get the key + unless ( $lines->[0] =~ s/^\s*([^\'\" ][^\n]*?)\s*:(\s+|$)// ) { + if ( $lines->[0] =~ /^\s*[?\'\"]/ ) { + croak("YAML::Tiny does not support a feature in line '$lines->[0]'"); + } + croak("YAML::Tiny failed to classify line '$lines->[0]'"); + } + my $key = $1; + + # Do we have a value? + if ( length $lines->[0] ) { + # Yes + $hash->{$key} = $self->_read_scalar( shift(@$lines), [ @$indent, undef ], $lines ); + } else { + # An indent + shift @$lines; + unless ( @$lines ) { + $hash->{$key} = undef; + return 1; + } + if ( $lines->[0] =~ /^(\s*)-/ ) { + $hash->{$key} = []; + $self->_read_array( $hash->{$key}, [ @$indent, length($1) ], $lines ); + } elsif ( $lines->[0] =~ /^(\s*)./ ) { + my $indent2 = length("$1"); + if ( $indent->[-1] >= $indent2 ) { + # Null hash entry + $hash->{$key} = undef; + } else { + $hash->{$key} = {}; + $self->_read_hash( $hash->{$key}, [ @$indent, length($1) ], $lines ); + } + } + } + } + + return 1; +} + +# Save an object to a file +sub write { + my $self = shift; + my $file = shift or return $self->_error('No file name provided'); + + # Write it to the file + open( CFG, '>' . $file ) or return $self->_error( + "Failed to open file '$file' for writing: $!" + ); + print CFG $self->write_string; + close CFG; + + return 1; +} + +# Save an object to a string +sub write_string { + my $self = shift; + return '' unless @$self; + + # Iterate over the documents + my $indent = 0; + my @lines = (); + foreach my $cursor ( @$self ) { + push @lines, '---'; + + # An empty document + if ( ! defined $cursor ) { + # Do nothing + + # A scalar document + } elsif ( ! ref $cursor ) { + $lines[-1] .= ' ' . $self->_write_scalar( $cursor, $indent ); + + # A list at the root + } elsif ( ref $cursor eq 'ARRAY' ) { + unless ( @$cursor ) { + $lines[-1] .= ' []'; + next; + } + push @lines, $self->_write_array( $cursor, $indent, {} ); + + # A hash at the root + } elsif ( ref $cursor eq 'HASH' ) { + unless ( %$cursor ) { + $lines[-1] .= ' {}'; + next; + } + push @lines, $self->_write_hash( $cursor, $indent, {} ); + + } else { + croak("Cannot serialize " . ref($cursor)); + } + } + + join '', map { "$_\n" } @lines; +} + +sub _write_scalar { + my $string = $_[1]; + return '~' unless defined $string; + return "''" unless length $string; + if ( $string =~ /[\x00-\x08\x0b-\x0d\x0e-\x1f\"\'\n]/ ) { + $string =~ s/\\/\\\\/g; + $string =~ s/"/\\"/g; + $string =~ s/\n/\\n/g; + $string =~ s/([\x00-\x1f])/\\$UNPRINTABLE[ord($1)]/g; + return qq|"$string"|; + } + if ( $string =~ /(?:^\W|\s)/ or $QUOTE{$string} ) { + return "'$string'"; + } + return $string; +} + +sub _write_array { + my ($self, $array, $indent, $seen) = @_; + if ( $seen->{refaddr($array)}++ ) { + die "YAML::Tiny does not support circular references"; + } + my @lines = (); + foreach my $el ( @$array ) { + my $line = (' ' x $indent) . '-'; + my $type = ref $el; + if ( ! $type ) { + $line .= ' ' . $self->_write_scalar( $el, $indent + 1 ); + push @lines, $line; + + } elsif ( $type eq 'ARRAY' ) { + if ( @$el ) { + push @lines, $line; + push @lines, $self->_write_array( $el, $indent + 1, $seen ); + } else { + $line .= ' []'; + push @lines, $line; + } + + } elsif ( $type eq 'HASH' ) { + if ( keys %$el ) { + push @lines, $line; + push @lines, $self->_write_hash( $el, $indent + 1, $seen ); + } else { + $line .= ' {}'; + push @lines, $line; + } + + } else { + die "YAML::Tiny does not support $type references"; + } + } + + @lines; +} + +sub _write_hash { + my ($self, $hash, $indent, $seen) = @_; + if ( $seen->{refaddr($hash)}++ ) { + die "YAML::Tiny does not support circular references"; + } + my @lines = (); + foreach my $name ( sort keys %$hash ) { + my $el = $hash->{$name}; + my $line = (' ' x $indent) . "$name:"; + my $type = ref $el; + if ( ! $type ) { + $line .= ' ' . $self->_write_scalar( $el, $indent + 1 ); + push @lines, $line; + + } elsif ( $type eq 'ARRAY' ) { + if ( @$el ) { + push @lines, $line; + push @lines, $self->_write_array( $el, $indent + 1, $seen ); + } else { + $line .= ' []'; + push @lines, $line; + } + + } elsif ( $type eq 'HASH' ) { + if ( keys %$el ) { + push @lines, $line; + push @lines, $self->_write_hash( $el, $indent + 1, $seen ); + } else { + $line .= ' {}'; + push @lines, $line; + } + + } else { + die "YAML::Tiny does not support $type references"; + } + } + + @lines; +} + +# Set error +sub _error { + $YAML::Tiny::errstr = $_[1]; + undef; +} + +# Retrieve error +sub errstr { + $YAML::Tiny::errstr; +} + + + + + +##################################################################### +# YAML Compatibility + +sub Dump { + YAML::Tiny->new(@_)->write_string; +} + +sub Load { + my $self = YAML::Tiny->read_string(@_); + unless ( $self ) { + croak("Failed to load YAML document from string"); + } + if ( wantarray ) { + return @$self; + } else { + # To match YAML.pm, return the last document + return $self->[-1]; + } +} + +BEGIN { + *freeze = *Dump; + *thaw = *Load; +} + +sub DumpFile { + my $file = shift; + YAML::Tiny->new(@_)->write($file); +} + +sub LoadFile { + my $self = YAML::Tiny->read($_[0]); + unless ( $self ) { + croak("Failed to load YAML document from '" . ($_[0] || '') . "'"); + } + if ( wantarray ) { + return @$self; + } else { + # Return only the last document to match YAML.pm, + return $self->[-1]; + } +} + + + + + +##################################################################### +# Use Scalar::Util if possible, otherwise emulate it + +BEGIN { + eval { + require Scalar::Util; + }; + if ( $@ ) { + # Failed to load Scalar::Util + eval <<'END_PERL'; +sub refaddr { + my $pkg = ref($_[0]) or return undef; + if (!!UNIVERSAL::can($_[0], 'can')) { + bless $_[0], 'Scalar::Util::Fake'; + } else { + $pkg = undef; + } + "$_[0]" =~ /0x(\w+)/; + my $i = do { local $^W; hex $1 }; + bless $_[0], $pkg if defined $pkg; + $i; +} +END_PERL + } else { + Scalar::Util->import('refaddr'); + } +} + +1; + +__END__ + +#line 1132 diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,7 +1,7 @@ package HTML::HTML5::Parser::Charset::DecodeHandle; use strict; -our $VERSION = '0.103'; +our $VERSION = '0.107'; ## NOTE: |Message::Charset::Info| uses this module without calling ## the constructor. diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/Info.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/Info.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/Info.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/Info.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,6 +1,6 @@ package HTML::HTML5::Parser::Charset::Info; use strict; -our $VERSION='0.103'; +our $VERSION='0.107'; ## TODO: Certain encodings MUST NOT be implemented [HTML5]. diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,7 +1,7 @@ package HTML::HTML5::Parser::Charset::UnicodeChecker; use strict; -our $VERSION = '0.103'; +our $VERSION = '0.107'; ## NOTE: For more information (including rationals of checks performed ## in this module), see diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm 2011-10-20 20:26:19.000000000 +0000 @@ -3,7 +3,7 @@ use strict; use HTML::Encoding qw(encoding_from_first_chars encoding_from_html_document); -our $VERSION='0.103'; +our $VERSION='0.107'; our $DEBUG; sub _detect { @@ -30,7 +30,7 @@ } } # detect_byte_string -#Copyright 2007-2010 Wakaba +#Copyright 2007-2011 Wakaba #Copyright 2009-2011 Toby Inkster # #This library is free software; you can redistribute it diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/WebLatin1.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/WebLatin1.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/WebLatin1.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/WebLatin1.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/perl package HTML::HTML5::Parser::Charset::WebLatin1; use strict; -our $VERSION='0.103'; +our $VERSION='0.107'; ## NOTE: This module does not expect that its standalone uses. ## See Message::Charset::Info for how it is used. diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/WebThai.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/WebThai.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Charset/WebThai.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Charset/WebThai.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,7 +1,7 @@ #!/usr/bin/perl package HTML::HTML5::Parser::Charset::WebThai; use strict; -our $VERSION='0.103'; +our $VERSION='0.107'; ## NOTE: This module does not expect that its standalone uses. ## See Message::Charset::Info for how it is used. diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Error.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Error.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Error.pm 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Error.pm 2011-10-20 20:26:19.000000000 +0000 @@ -0,0 +1,180 @@ +package HTML::HTML5::Parser::Error; + +=head1 NAME + +HTML::HTML5::Parser::Error - an error that occured during parsing + +=cut + +use 5.008001; +use strict; +use warnings; + +our $VERSION = '0.107'; + +use overload '""' => \&to_string; + +sub new +{ + my ($class, %args) = @_; + bless \%args, $class; +} + +=head1 DESCRIPTION + +Note that L is not a validation tool, and there are many +classes of error that it does not care about, so will not raise. + +The C and C methods of C generate +C objects. + +C overloads stringification, so can be printed, +matched against regular expressions, etc. + +=head2 Constructor + +=over + +=item C<< new(level=>$level, type=>$type, token=>$token, ...) >> + +Constructs a new C object. + +=back + +=head2 Methods + +=over + +=item C + +Returns the level of error. ('MUST', 'SHOULD', 'WARN', 'INFO' or undef.) + +=cut + +sub level +{ + my $self = shift; + return { + m => 'MUST', + s => 'SHOULD', + w => 'WARN', + i => 'INFO', + u => undef, + }->{$self->{level}} || undef; +} + +=item C + +Returns the parsing layer involved, often undef. e.g. 'encode'. + +=cut + +sub layer +{ + my $self = shift; + return $self->{layer} || undef; +} + +=item C + +Returns the type of error as a string. + +=cut + +sub type +{ + my $self = shift; + return $self->{type}||undef; +} + +=item C + +Returns the tag name (if any). + +=cut + +sub tag_name +{ + my $self = shift; + return undef unless $self->{token} && exists $self->{token}{tag_name}; + return $self->{token}{tag_name}; +} + +=item C + + ($line, $col) = $error->source_line(); + $line = $error->source_line; + +In scalar context, C returns the line number of the +source code that triggered the error. + +In list context, returns a line/column pair. (Tab characters count as +one column, not eight.) + +=cut + +sub source_line +{ + my $self = shift; + + if (wantarray) + { + return ($self->{line}, $self->{column}); + } + else + { + return $self->{line}; + } +} + +=item C + +Returns a friendly error string. + +=cut + +sub to_string +{ + my $self = shift; + + my $msg = $self->type; + my $level = $self->level; + my $tag = $self->tag_name; + my ($l, $c) = $self->source_line; + + my @details; + push @details, sprintf('complicance: %s', $level) if defined $level; + push @details, sprintf('line: %d', $l) if defined $l; + push @details, sprintf('column: %d', $c) if defined $c; + push @details, sprintf('tag: %s', $tag) if defined $tag; + + if (@details) + { + $msg .= " ["; + $msg .= join '; ', @details; + $msg .= "]"; + } + + return $msg; +} + +1; + +=back + +=head1 SEE ALSO + +L. + +=head1 AUTHOR + +Toby Inkster, Etobyink@cpan.orgE + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2011 by Toby Inkster + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.8.1 or, +at your option, any later version of Perl 5 you may have available. + diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/NamedEntityList.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/NamedEntityList.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/NamedEntityList.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/NamedEntityList.pm 1970-01-01 00:00:00.000000000 +0000 @@ -1,2273 +0,0 @@ -package HTML::HTML5::Parser::NamedEntityList; - -our $VERSION = '0.103'; - -$HTML::HTML5::Parser::TagSoupParser::EntityChar = { - "AElig" => "\306", - "AElig;" => "\306", - "AMP" => "&", - "AMP;" => "&", - "Aacute" => "\301", - "Aacute;" => "\301", - "Abreve;" => "\x{102}", - "Acirc" => "\302", - "Acirc;" => "\302", - "Acy;" => "\x{410}", - "Afr;" => "\x{1d504}", - "Agrave" => "\300", - "Agrave;" => "\300", - "Alpha;" => "\x{391}", - "Amacr;" => "\x{100}", - "And;" => "\x{2a53}", - "Aogon;" => "\x{104}", - "Aopf;" => "\x{1d538}", - "ApplyFunction;" => "\x{2061}", - "Aring" => "\305", - "Aring;" => "\305", - "Ascr;" => "\x{1d49c}", - "Assign;" => "\x{2254}", - "Atilde" => "\303", - "Atilde;" => "\303", - "Auml" => "\304", - "Auml;" => "\304", - "Backslash;" => "\x{2216}", - "Barv;" => "\x{2ae7}", - "Barwed;" => "\x{2306}", - "Bcy;" => "\x{411}", - "Because;" => "\x{2235}", - "Bernoullis;" => "\x{212c}", - "Beta;" => "\x{392}", - "Bfr;" => "\x{1d505}", - "Bopf;" => "\x{1d539}", - "Breve;" => "\x{2d8}", - "Bscr;" => "\x{212c}", - "Bumpeq;" => "\x{224e}", - "CHcy;" => "\x{427}", - "COPY" => "\251", - "COPY;" => "\251", - "Cacute;" => "\x{106}", - "Cap;" => "\x{22d2}", - "CapitalDifferentialD;" => "\x{2145}", - "Cayleys;" => "\x{212d}", - "Ccaron;" => "\x{10c}", - "Ccedil" => "\307", - "Ccedil;" => "\307", - "Ccirc;" => "\x{108}", - "Cconint;" => "\x{2230}", - "Cdot;" => "\x{10a}", - "Cedilla;" => "\270", - "CenterDot;" => "\267", - "Cfr;" => "\x{212d}", - "Chi;" => "\x{3a7}", - "CircleDot;" => "\x{2299}", - "CircleMinus;" => "\x{2296}", - "CirclePlus;" => "\x{2295}", - "CircleTimes;" => "\x{2297}", - "ClockwiseContourIntegral;" => "\x{2232}", - "CloseCurlyDoubleQuote;" => "\x{201d}", - "CloseCurlyQuote;" => "\x{2019}", - "Colon;" => "\x{2237}", - "Colone;" => "\x{2a74}", - "Congruent;" => "\x{2261}", - "Conint;" => "\x{222f}", - "ContourIntegral;" => "\x{222e}", - "Copf;" => "\x{2102}", - "Coproduct;" => "\x{2210}", - "CounterClockwiseContourIntegral;" => "\x{2233}", - "Cross;" => "\x{2a2f}", - "Cscr;" => "\x{1d49e}", - "Cup;" => "\x{22d3}", - "CupCap;" => "\x{224d}", - "DD;" => "\x{2145}", - "DDotrahd;" => "\x{2911}", - "DJcy;" => "\x{402}", - "DScy;" => "\x{405}", - "DZcy;" => "\x{40f}", - "Dagger;" => "\x{2021}", - "Darr;" => "\x{21a1}", - "Dashv;" => "\x{2ae4}", - "Dcaron;" => "\x{10e}", - "Dcy;" => "\x{414}", - "Del;" => "\x{2207}", - "Delta;" => "\x{394}", - "Dfr;" => "\x{1d507}", - "DiacriticalAcute;" => "\264", - "DiacriticalDot;" => "\x{2d9}", - "DiacriticalDoubleAcute;" => "\x{2dd}", - "DiacriticalGrave;" => "`", - "DiacriticalTilde;" => "\x{2dc}", - "Diamond;" => "\x{22c4}", - "DifferentialD;" => "\x{2146}", - "Dopf;" => "\x{1d53b}", - "Dot;" => "\250", - "DotDot;" => "\x{20dc}", - "DotEqual;" => "\x{2250}", - "DoubleContourIntegral;" => "\x{222f}", - "DoubleDot;" => "\250", - "DoubleDownArrow;" => "\x{21d3}", - "DoubleLeftArrow;" => "\x{21d0}", - "DoubleLeftRightArrow;" => "\x{21d4}", - "DoubleLeftTee;" => "\x{2ae4}", - "DoubleLongLeftArrow;" => "\x{27f8}", - "DoubleLongLeftRightArrow;" => "\x{27fa}", - "DoubleLongRightArrow;" => "\x{27f9}", - "DoubleRightArrow;" => "\x{21d2}", - "DoubleRightTee;" => "\x{22a8}", - "DoubleUpArrow;" => "\x{21d1}", - "DoubleUpDownArrow;" => "\x{21d5}", - "DoubleVerticalBar;" => "\x{2225}", - "DownArrow;" => "\x{2193}", - "DownArrowBar;" => "\x{2913}", - "DownArrowUpArrow;" => "\x{21f5}", - "DownBreve;" => "\x{311}", - "DownLeftRightVector;" => "\x{2950}", - "DownLeftTeeVector;" => "\x{295e}", - "DownLeftVector;" => "\x{21bd}", - "DownLeftVectorBar;" => "\x{2956}", - "DownRightTeeVector;" => "\x{295f}", - "DownRightVector;" => "\x{21c1}", - "DownRightVectorBar;" => "\x{2957}", - "DownTee;" => "\x{22a4}", - "DownTeeArrow;" => "\x{21a7}", - "Downarrow;" => "\x{21d3}", - "Dscr;" => "\x{1d49f}", - "Dstrok;" => "\x{110}", - "ENG;" => "\x{14a}", - "ETH" => "\320", - "ETH;" => "\320", - "Eacute" => "\311", - "Eacute;" => "\311", - "Ecaron;" => "\x{11a}", - "Ecirc" => "\312", - "Ecirc;" => "\312", - "Ecy;" => "\x{42d}", - "Edot;" => "\x{116}", - "Efr;" => "\x{1d508}", - "Egrave" => "\310", - "Egrave;" => "\310", - "Element;" => "\x{2208}", - "Emacr;" => "\x{112}", - "EmptySmallSquare;" => "\x{25fb}", - "EmptyVerySmallSquare;" => "\x{25ab}", - "Eogon;" => "\x{118}", - "Eopf;" => "\x{1d53c}", - "Epsilon;" => "\x{395}", - "Equal;" => "\x{2a75}", - "EqualTilde;" => "\x{2242}", - "Equilibrium;" => "\x{21cc}", - "Escr;" => "\x{2130}", - "Esim;" => "\x{2a73}", - "Eta;" => "\x{397}", - "Euml" => "\313", - "Euml;" => "\313", - "Exists;" => "\x{2203}", - "ExponentialE;" => "\x{2147}", - "Fcy;" => "\x{424}", - "Ffr;" => "\x{1d509}", - "FilledSmallSquare;" => "\x{25fc}", - "FilledVerySmallSquare;" => "\x{25aa}", - "Fopf;" => "\x{1d53d}", - "ForAll;" => "\x{2200}", - "Fouriertrf;" => "\x{2131}", - "Fscr;" => "\x{2131}", - "GJcy;" => "\x{403}", - "GT" => ">", - "GT;" => ">", - "Gamma;" => "\x{393}", - "Gammad;" => "\x{3dc}", - "Gbreve;" => "\x{11e}", - "Gcedil;" => "\x{122}", - "Gcirc;" => "\x{11c}", - "Gcy;" => "\x{413}", - "Gdot;" => "\x{120}", - "Gfr;" => "\x{1d50a}", - "Gg;" => "\x{22d9}", - "Gopf;" => "\x{1d53e}", - "GreaterEqual;" => "\x{2265}", - "GreaterEqualLess;" => "\x{22db}", - "GreaterFullEqual;" => "\x{2267}", - "GreaterGreater;" => "\x{2aa2}", - "GreaterLess;" => "\x{2277}", - "GreaterSlantEqual;" => "\x{2a7e}", - "GreaterTilde;" => "\x{2273}", - "Gscr;" => "\x{1d4a2}", - "Gt;" => "\x{226b}", - "HARDcy;" => "\x{42a}", - "Hacek;" => "\x{2c7}", - "Hat;" => "^", - "Hcirc;" => "\x{124}", - "Hfr;" => "\x{210c}", - "HilbertSpace;" => "\x{210b}", - "Hopf;" => "\x{210d}", - "HorizontalLine;" => "\x{2500}", - "Hscr;" => "\x{210b}", - "Hstrok;" => "\x{126}", - "HumpDownHump;" => "\x{224e}", - "HumpEqual;" => "\x{224f}", - "IEcy;" => "\x{415}", - "IJlig;" => "\x{132}", - "IOcy;" => "\x{401}", - "Iacute" => "\315", - "Iacute;" => "\315", - "Icirc" => "\316", - "Icirc;" => "\316", - "Icy;" => "\x{418}", - "Idot;" => "\x{130}", - "Ifr;" => "\x{2111}", - "Igrave" => "\314", - "Igrave;" => "\314", - "Im;" => "\x{2111}", - "Imacr;" => "\x{12a}", - "ImaginaryI;" => "\x{2148}", - "Implies;" => "\x{21d2}", - "Int;" => "\x{222c}", - "Integral;" => "\x{222b}", - "Intersection;" => "\x{22c2}", - "InvisibleComma;" => "\x{2063}", - "InvisibleTimes;" => "\x{2062}", - "Iogon;" => "\x{12e}", - "Iopf;" => "\x{1d540}", - "Iota;" => "\x{399}", - "Iscr;" => "\x{2110}", - "Itilde;" => "\x{128}", - "Iukcy;" => "\x{406}", - "Iuml" => "\317", - "Iuml;" => "\317", - "Jcirc;" => "\x{134}", - "Jcy;" => "\x{419}", - "Jfr;" => "\x{1d50d}", - "Jopf;" => "\x{1d541}", - "Jscr;" => "\x{1d4a5}", - "Jsercy;" => "\x{408}", - "Jukcy;" => "\x{404}", - "KHcy;" => "\x{425}", - "KJcy;" => "\x{40c}", - "Kappa;" => "\x{39a}", - "Kcedil;" => "\x{136}", - "Kcy;" => "\x{41a}", - "Kfr;" => "\x{1d50e}", - "Kopf;" => "\x{1d542}", - "Kscr;" => "\x{1d4a6}", - "LJcy;" => "\x{409}", - "LT" => "<", - "LT;" => "<", - "Lacute;" => "\x{139}", - "Lambda;" => "\x{39b}", - "Lang;" => "\x{27ea}", - "Laplacetrf;" => "\x{2112}", - "Larr;" => "\x{219e}", - "Lcaron;" => "\x{13d}", - "Lcedil;" => "\x{13b}", - "Lcy;" => "\x{41b}", - "LeftAngleBracket;" => "\x{27e8}", - "LeftArrow;" => "\x{2190}", - "LeftArrowBar;" => "\x{21e4}", - "LeftArrowRightArrow;" => "\x{21c6}", - "LeftCeiling;" => "\x{2308}", - "LeftDoubleBracket;" => "\x{27e6}", - "LeftDownTeeVector;" => "\x{2961}", - "LeftDownVector;" => "\x{21c3}", - "LeftDownVectorBar;" => "\x{2959}", - "LeftFloor;" => "\x{230a}", - "LeftRightArrow;" => "\x{2194}", - "LeftRightVector;" => "\x{294e}", - "LeftTee;" => "\x{22a3}", - "LeftTeeArrow;" => "\x{21a4}", - "LeftTeeVector;" => "\x{295a}", - "LeftTriangle;" => "\x{22b2}", - "LeftTriangleBar;" => "\x{29cf}", - "LeftTriangleEqual;" => "\x{22b4}", - "LeftUpDownVector;" => "\x{2951}", - "LeftUpTeeVector;" => "\x{2960}", - "LeftUpVector;" => "\x{21bf}", - "LeftUpVectorBar;" => "\x{2958}", - "LeftVector;" => "\x{21bc}", - "LeftVectorBar;" => "\x{2952}", - "Leftarrow;" => "\x{21d0}", - "Leftrightarrow;" => "\x{21d4}", - "LessEqualGreater;" => "\x{22da}", - "LessFullEqual;" => "\x{2266}", - "LessGreater;" => "\x{2276}", - "LessLess;" => "\x{2aa1}", - "LessSlantEqual;" => "\x{2a7d}", - "LessTilde;" => "\x{2272}", - "Lfr;" => "\x{1d50f}", - "Ll;" => "\x{22d8}", - "Lleftarrow;" => "\x{21da}", - "Lmidot;" => "\x{13f}", - "LongLeftArrow;" => "\x{27f5}", - "LongLeftRightArrow;" => "\x{27f7}", - "LongRightArrow;" => "\x{27f6}", - "Longleftarrow;" => "\x{27f8}", - "Longleftrightarrow;" => "\x{27fa}", - "Longrightarrow;" => "\x{27f9}", - "Lopf;" => "\x{1d543}", - "LowerLeftArrow;" => "\x{2199}", - "LowerRightArrow;" => "\x{2198}", - "Lscr;" => "\x{2112}", - "Lsh;" => "\x{21b0}", - "Lstrok;" => "\x{141}", - "Lt;" => "\x{226a}", - "Map;" => "\x{2905}", - "Mcy;" => "\x{41c}", - "MediumSpace;" => "\x{205f}", - "Mellintrf;" => "\x{2133}", - "Mfr;" => "\x{1d510}", - "MinusPlus;" => "\x{2213}", - "Mopf;" => "\x{1d544}", - "Mscr;" => "\x{2133}", - "Mu;" => "\x{39c}", - "NJcy;" => "\x{40a}", - "Nacute;" => "\x{143}", - "Ncaron;" => "\x{147}", - "Ncedil;" => "\x{145}", - "Ncy;" => "\x{41d}", - "NegativeMediumSpace;" => "\x{200b}", - "NegativeThickSpace;" => "\x{200b}", - "NegativeThinSpace;" => "\x{200b}", - "NegativeVeryThinSpace;" => "\x{200b}", - "NestedGreaterGreater;" => "\x{226b}", - "NestedLessLess;" => "\x{226a}", - "NewLine;" => "\n", - "Nfr;" => "\x{1d511}", - "NoBreak;" => "\x{2060}", - "NonBreakingSpace;" => "\240", - "Nopf;" => "\x{2115}", - "Not;" => "\x{2aec}", - "NotCongruent;" => "\x{2262}", - "NotCupCap;" => "\x{226d}", - "NotDoubleVerticalBar;" => "\x{2226}", - "NotElement;" => "\x{2209}", - "NotEqual;" => "\x{2260}", - "NotEqualTilde;" => "\x{2242}\x{338}", - "NotExists;" => "\x{2204}", - "NotGreater;" => "\x{226f}", - "NotGreaterEqual;" => "\x{2271}", - "NotGreaterFullEqual;" => "\x{2267}\x{338}", - "NotGreaterGreater;" => "\x{226b}\x{338}", - "NotGreaterLess;" => "\x{2279}", - "NotGreaterSlantEqual;" => "\x{2a7e}\x{338}", - "NotGreaterTilde;" => "\x{2275}", - "NotHumpDownHump;" => "\x{224e}\x{338}", - "NotHumpEqual;" => "\x{224f}\x{338}", - "NotLeftTriangle;" => "\x{22ea}", - "NotLeftTriangleBar;" => "\x{29cf}\x{338}", - "NotLeftTriangleEqual;" => "\x{22ec}", - "NotLess;" => "\x{226e}", - "NotLessEqual;" => "\x{2270}", - "NotLessGreater;" => "\x{2278}", - "NotLessLess;" => "\x{226a}\x{338}", - "NotLessSlantEqual;" => "\x{2a7d}\x{338}", - "NotLessTilde;" => "\x{2274}", - "NotNestedGreaterGreater;" => "\x{2aa2}\x{338}", - "NotNestedLessLess;" => "\x{2aa1}\x{338}", - "NotPrecedes;" => "\x{2280}", - "NotPrecedesEqual;" => "\x{2aaf}\x{338}", - "NotPrecedesSlantEqual;" => "\x{22e0}", - "NotReverseElement;" => "\x{220c}", - "NotRightTriangle;" => "\x{22eb}", - "NotRightTriangleBar;" => "\x{29d0}\x{338}", - "NotRightTriangleEqual;" => "\x{22ed}", - "NotSquareSubset;" => "\x{228f}\x{338}", - "NotSquareSubsetEqual;" => "\x{22e2}", - "NotSquareSuperset;" => "\x{2290}\x{338}", - "NotSquareSupersetEqual;" => "\x{22e3}", - "NotSubset;" => "\x{2282}\x{20d2}", - "NotSubsetEqual;" => "\x{2288}", - "NotSucceeds;" => "\x{2281}", - "NotSucceedsEqual;" => "\x{2ab0}\x{338}", - "NotSucceedsSlantEqual;" => "\x{22e1}", - "NotSucceedsTilde;" => "\x{227f}\x{338}", - "NotSuperset;" => "\x{2283}\x{20d2}", - "NotSupersetEqual;" => "\x{2289}", - "NotTilde;" => "\x{2241}", - "NotTildeEqual;" => "\x{2244}", - "NotTildeFullEqual;" => "\x{2247}", - "NotTildeTilde;" => "\x{2249}", - "NotVerticalBar;" => "\x{2224}", - "Nscr;" => "\x{1d4a9}", - "Ntilde" => "\321", - "Ntilde;" => "\321", - "Nu;" => "\x{39d}", - "OElig;" => "\x{152}", - "Oacute" => "\323", - "Oacute;" => "\323", - "Ocirc" => "\324", - "Ocirc;" => "\324", - "Ocy;" => "\x{41e}", - "Odblac;" => "\x{150}", - "Ofr;" => "\x{1d512}", - "Ograve" => "\322", - "Ograve;" => "\322", - "Omacr;" => "\x{14c}", - "Omega;" => "\x{3a9}", - "Omicron;" => "\x{39f}", - "Oopf;" => "\x{1d546}", - "OpenCurlyDoubleQuote;" => "\x{201c}", - "OpenCurlyQuote;" => "\x{2018}", - "Or;" => "\x{2a54}", - "Oscr;" => "\x{1d4aa}", - "Oslash" => "\330", - "Oslash;" => "\330", - "Otilde" => "\325", - "Otilde;" => "\325", - "Otimes;" => "\x{2a37}", - "Ouml" => "\326", - "Ouml;" => "\326", - "OverBar;" => "\x{203e}", - "OverBrace;" => "\x{23de}", - "OverBracket;" => "\x{23b4}", - "OverParenthesis;" => "\x{23dc}", - "PartialD;" => "\x{2202}", - "Pcy;" => "\x{41f}", - "Pfr;" => "\x{1d513}", - "Phi;" => "\x{3a6}", - "Pi;" => "\x{3a0}", - "PlusMinus;" => "\261", - "Poincareplane;" => "\x{210c}", - "Popf;" => "\x{2119}", - "Pr;" => "\x{2abb}", - "Precedes;" => "\x{227a}", - "PrecedesEqual;" => "\x{2aaf}", - "PrecedesSlantEqual;" => "\x{227c}", - "PrecedesTilde;" => "\x{227e}", - "Prime;" => "\x{2033}", - "Product;" => "\x{220f}", - "Proportion;" => "\x{2237}", - "Proportional;" => "\x{221d}", - "Pscr;" => "\x{1d4ab}", - "Psi;" => "\x{3a8}", - "QUOT" => "\"", - "QUOT;" => "\"", - "Qfr;" => "\x{1d514}", - "Qopf;" => "\x{211a}", - "Qscr;" => "\x{1d4ac}", - "RBarr;" => "\x{2910}", - "REG" => "\256", - "REG;" => "\256", - "Racute;" => "\x{154}", - "Rang;" => "\x{27eb}", - "Rarr;" => "\x{21a0}", - "Rarrtl;" => "\x{2916}", - "Rcaron;" => "\x{158}", - "Rcedil;" => "\x{156}", - "Rcy;" => "\x{420}", - "Re;" => "\x{211c}", - "ReverseElement;" => "\x{220b}", - "ReverseEquilibrium;" => "\x{21cb}", - "ReverseUpEquilibrium;" => "\x{296f}", - "Rfr;" => "\x{211c}", - "Rho;" => "\x{3a1}", - "RightAngleBracket;" => "\x{27e9}", - "RightArrow;" => "\x{2192}", - "RightArrowBar;" => "\x{21e5}", - "RightArrowLeftArrow;" => "\x{21c4}", - "RightCeiling;" => "\x{2309}", - "RightDoubleBracket;" => "\x{27e7}", - "RightDownTeeVector;" => "\x{295d}", - "RightDownVector;" => "\x{21c2}", - "RightDownVectorBar;" => "\x{2955}", - "RightFloor;" => "\x{230b}", - "RightTee;" => "\x{22a2}", - "RightTeeArrow;" => "\x{21a6}", - "RightTeeVector;" => "\x{295b}", - "RightTriangle;" => "\x{22b3}", - "RightTriangleBar;" => "\x{29d0}", - "RightTriangleEqual;" => "\x{22b5}", - "RightUpDownVector;" => "\x{294f}", - "RightUpTeeVector;" => "\x{295c}", - "RightUpVector;" => "\x{21be}", - "RightUpVectorBar;" => "\x{2954}", - "RightVector;" => "\x{21c0}", - "RightVectorBar;" => "\x{2953}", - "Rightarrow;" => "\x{21d2}", - "Ropf;" => "\x{211d}", - "RoundImplies;" => "\x{2970}", - "Rrightarrow;" => "\x{21db}", - "Rscr;" => "\x{211b}", - "Rsh;" => "\x{21b1}", - "RuleDelayed;" => "\x{29f4}", - "SHCHcy;" => "\x{429}", - "SHcy;" => "\x{428}", - "SOFTcy;" => "\x{42c}", - "Sacute;" => "\x{15a}", - "Sc;" => "\x{2abc}", - "Scaron;" => "\x{160}", - "Scedil;" => "\x{15e}", - "Scirc;" => "\x{15c}", - "Scy;" => "\x{421}", - "Sfr;" => "\x{1d516}", - "ShortDownArrow;" => "\x{2193}", - "ShortLeftArrow;" => "\x{2190}", - "ShortRightArrow;" => "\x{2192}", - "ShortUpArrow;" => "\x{2191}", - "Sigma;" => "\x{3a3}", - "SmallCircle;" => "\x{2218}", - "Sopf;" => "\x{1d54a}", - "Sqrt;" => "\x{221a}", - "Square;" => "\x{25a1}", - "SquareIntersection;" => "\x{2293}", - "SquareSubset;" => "\x{228f}", - "SquareSubsetEqual;" => "\x{2291}", - "SquareSuperset;" => "\x{2290}", - "SquareSupersetEqual;" => "\x{2292}", - "SquareUnion;" => "\x{2294}", - "Sscr;" => "\x{1d4ae}", - "Star;" => "\x{22c6}", - "Sub;" => "\x{22d0}", - "Subset;" => "\x{22d0}", - "SubsetEqual;" => "\x{2286}", - "Succeeds;" => "\x{227b}", - "SucceedsEqual;" => "\x{2ab0}", - "SucceedsSlantEqual;" => "\x{227d}", - "SucceedsTilde;" => "\x{227f}", - "SuchThat;" => "\x{220b}", - "Sum;" => "\x{2211}", - "Sup;" => "\x{22d1}", - "Superset;" => "\x{2283}", - "SupersetEqual;" => "\x{2287}", - "Supset;" => "\x{22d1}", - "THORN" => "\336", - "THORN;" => "\336", - "TRADE;" => "\x{2122}", - "TSHcy;" => "\x{40b}", - "TScy;" => "\x{426}", - "Tab;" => "\t", - "Tau;" => "\x{3a4}", - "Tcaron;" => "\x{164}", - "Tcedil;" => "\x{162}", - "Tcy;" => "\x{422}", - "Tfr;" => "\x{1d517}", - "Therefore;" => "\x{2234}", - "Theta;" => "\x{398}", - "ThickSpace;" => "\x{205f}\x{200a}", - "ThinSpace;" => "\x{2009}", - "Tilde;" => "\x{223c}", - "TildeEqual;" => "\x{2243}", - "TildeFullEqual;" => "\x{2245}", - "TildeTilde;" => "\x{2248}", - "Topf;" => "\x{1d54b}", - "TripleDot;" => "\x{20db}", - "Tscr;" => "\x{1d4af}", - "Tstrok;" => "\x{166}", - "Uacute" => "\332", - "Uacute;" => "\332", - "Uarr;" => "\x{219f}", - "Uarrocir;" => "\x{2949}", - "Ubrcy;" => "\x{40e}", - "Ubreve;" => "\x{16c}", - "Ucirc" => "\333", - "Ucirc;" => "\333", - "Ucy;" => "\x{423}", - "Udblac;" => "\x{170}", - "Ufr;" => "\x{1d518}", - "Ugrave" => "\331", - "Ugrave;" => "\331", - "Umacr;" => "\x{16a}", - "UnderBar;" => "_", - "UnderBrace;" => "\x{23df}", - "UnderBracket;" => "\x{23b5}", - "UnderParenthesis;" => "\x{23dd}", - "Union;" => "\x{22c3}", - "UnionPlus;" => "\x{228e}", - "Uogon;" => "\x{172}", - "Uopf;" => "\x{1d54c}", - "UpArrow;" => "\x{2191}", - "UpArrowBar;" => "\x{2912}", - "UpArrowDownArrow;" => "\x{21c5}", - "UpDownArrow;" => "\x{2195}", - "UpEquilibrium;" => "\x{296e}", - "UpTee;" => "\x{22a5}", - "UpTeeArrow;" => "\x{21a5}", - "Uparrow;" => "\x{21d1}", - "Updownarrow;" => "\x{21d5}", - "UpperLeftArrow;" => "\x{2196}", - "UpperRightArrow;" => "\x{2197}", - "Upsi;" => "\x{3d2}", - "Upsilon;" => "\x{3a5}", - "Uring;" => "\x{16e}", - "Uscr;" => "\x{1d4b0}", - "Utilde;" => "\x{168}", - "Uuml" => "\334", - "Uuml;" => "\334", - "VDash;" => "\x{22ab}", - "Vbar;" => "\x{2aeb}", - "Vcy;" => "\x{412}", - "Vdash;" => "\x{22a9}", - "Vdashl;" => "\x{2ae6}", - "Vee;" => "\x{22c1}", - "Verbar;" => "\x{2016}", - "Vert;" => "\x{2016}", - "VerticalBar;" => "\x{2223}", - "VerticalLine;" => "|", - "VerticalSeparator;" => "\x{2758}", - "VerticalTilde;" => "\x{2240}", - "VeryThinSpace;" => "\x{200a}", - "Vfr;" => "\x{1d519}", - "Vopf;" => "\x{1d54d}", - "Vscr;" => "\x{1d4b1}", - "Vvdash;" => "\x{22aa}", - "Wcirc;" => "\x{174}", - "Wedge;" => "\x{22c0}", - "Wfr;" => "\x{1d51a}", - "Wopf;" => "\x{1d54e}", - "Wscr;" => "\x{1d4b2}", - "Xfr;" => "\x{1d51b}", - "Xi;" => "\x{39e}", - "Xopf;" => "\x{1d54f}", - "Xscr;" => "\x{1d4b3}", - "YAcy;" => "\x{42f}", - "YIcy;" => "\x{407}", - "YUcy;" => "\x{42e}", - "Yacute" => "\335", - "Yacute;" => "\335", - "Ycirc;" => "\x{176}", - "Ycy;" => "\x{42b}", - "Yfr;" => "\x{1d51c}", - "Yopf;" => "\x{1d550}", - "Yscr;" => "\x{1d4b4}", - "Yuml;" => "\x{178}", - "ZHcy;" => "\x{416}", - "Zacute;" => "\x{179}", - "Zcaron;" => "\x{17d}", - "Zcy;" => "\x{417}", - "Zdot;" => "\x{17b}", - "ZeroWidthSpace;" => "\x{200b}", - "Zeta;" => "\x{396}", - "Zfr;" => "\x{2128}", - "Zopf;" => "\x{2124}", - "Zscr;" => "\x{1d4b5}", - "aacute" => "\341", - "aacute;" => "\341", - "abreve;" => "\x{103}", - "ac;" => "\x{223e}", - "acE;" => "\x{223e}\x{333}", - "acd;" => "\x{223f}", - "acirc" => "\342", - "acirc;" => "\342", - "acute" => "\264", - "acute;" => "\264", - "acy;" => "\x{430}", - "aelig" => "\346", - "aelig;" => "\346", - "af;" => "\x{2061}", - "afr;" => "\x{1d51e}", - "agrave" => "\340", - "agrave;" => "\340", - "alefsym;" => "\x{2135}", - "aleph;" => "\x{2135}", - "alpha;" => "\x{3b1}", - "amacr;" => "\x{101}", - "amalg;" => "\x{2a3f}", - "amp" => "&", - "amp;" => "&", - "and;" => "\x{2227}", - "andand;" => "\x{2a55}", - "andd;" => "\x{2a5c}", - "andslope;" => "\x{2a58}", - "andv;" => "\x{2a5a}", - "ang;" => "\x{2220}", - "ange;" => "\x{29a4}", - "angle;" => "\x{2220}", - "angmsd;" => "\x{2221}", - "angmsdaa;" => "\x{29a8}", - "angmsdab;" => "\x{29a9}", - "angmsdac;" => "\x{29aa}", - "angmsdad;" => "\x{29ab}", - "angmsdae;" => "\x{29ac}", - "angmsdaf;" => "\x{29ad}", - "angmsdag;" => "\x{29ae}", - "angmsdah;" => "\x{29af}", - "angrt;" => "\x{221f}", - "angrtvb;" => "\x{22be}", - "angrtvbd;" => "\x{299d}", - "angsph;" => "\x{2222}", - "angst;" => "\305", - "angzarr;" => "\x{237c}", - "aogon;" => "\x{105}", - "aopf;" => "\x{1d552}", - "ap;" => "\x{2248}", - "apE;" => "\x{2a70}", - "apacir;" => "\x{2a6f}", - "ape;" => "\x{224a}", - "apid;" => "\x{224b}", - "apos;" => "'", - "approx;" => "\x{2248}", - "approxeq;" => "\x{224a}", - "aring" => "\345", - "aring;" => "\345", - "ascr;" => "\x{1d4b6}", - "ast;" => "*", - "asymp;" => "\x{2248}", - "asympeq;" => "\x{224d}", - "atilde" => "\343", - "atilde;" => "\343", - "auml" => "\344", - "auml;" => "\344", - "awconint;" => "\x{2233}", - "awint;" => "\x{2a11}", - "bNot;" => "\x{2aed}", - "backcong;" => "\x{224c}", - "backepsilon;" => "\x{3f6}", - "backprime;" => "\x{2035}", - "backsim;" => "\x{223d}", - "backsimeq;" => "\x{22cd}", - "barvee;" => "\x{22bd}", - "barwed;" => "\x{2305}", - "barwedge;" => "\x{2305}", - "bbrk;" => "\x{23b5}", - "bbrktbrk;" => "\x{23b6}", - "bcong;" => "\x{224c}", - "bcy;" => "\x{431}", - "bdquo;" => "\x{201e}", - "becaus;" => "\x{2235}", - "because;" => "\x{2235}", - "bemptyv;" => "\x{29b0}", - "bepsi;" => "\x{3f6}", - "bernou;" => "\x{212c}", - "beta;" => "\x{3b2}", - "beth;" => "\x{2136}", - "between;" => "\x{226c}", - "bfr;" => "\x{1d51f}", - "bigcap;" => "\x{22c2}", - "bigcirc;" => "\x{25ef}", - "bigcup;" => "\x{22c3}", - "bigodot;" => "\x{2a00}", - "bigoplus;" => "\x{2a01}", - "bigotimes;" => "\x{2a02}", - "bigsqcup;" => "\x{2a06}", - "bigstar;" => "\x{2605}", - "bigtriangledown;" => "\x{25bd}", - "bigtriangleup;" => "\x{25b3}", - "biguplus;" => "\x{2a04}", - "bigvee;" => "\x{22c1}", - "bigwedge;" => "\x{22c0}", - "bkarow;" => "\x{290d}", - "blacklozenge;" => "\x{29eb}", - "blacksquare;" => "\x{25aa}", - "blacktriangle;" => "\x{25b4}", - "blacktriangledown;" => "\x{25be}", - "blacktriangleleft;" => "\x{25c2}", - "blacktriangleright;" => "\x{25b8}", - "blank;" => "\x{2423}", - "blk12;" => "\x{2592}", - "blk14;" => "\x{2591}", - "blk34;" => "\x{2593}", - "block;" => "\x{2588}", - "bne;" => "=\x{20e5}", - "bnequiv;" => "\x{2261}\x{20e5}", - "bnot;" => "\x{2310}", - "bopf;" => "\x{1d553}", - "bot;" => "\x{22a5}", - "bottom;" => "\x{22a5}", - "bowtie;" => "\x{22c8}", - "boxDL;" => "\x{2557}", - "boxDR;" => "\x{2554}", - "boxDl;" => "\x{2556}", - "boxDr;" => "\x{2553}", - "boxH;" => "\x{2550}", - "boxHD;" => "\x{2566}", - "boxHU;" => "\x{2569}", - "boxHd;" => "\x{2564}", - "boxHu;" => "\x{2567}", - "boxUL;" => "\x{255d}", - "boxUR;" => "\x{255a}", - "boxUl;" => "\x{255c}", - "boxUr;" => "\x{2559}", - "boxV;" => "\x{2551}", - "boxVH;" => "\x{256c}", - "boxVL;" => "\x{2563}", - "boxVR;" => "\x{2560}", - "boxVh;" => "\x{256b}", - "boxVl;" => "\x{2562}", - "boxVr;" => "\x{255f}", - "boxbox;" => "\x{29c9}", - "boxdL;" => "\x{2555}", - "boxdR;" => "\x{2552}", - "boxdl;" => "\x{2510}", - "boxdr;" => "\x{250c}", - "boxh;" => "\x{2500}", - "boxhD;" => "\x{2565}", - "boxhU;" => "\x{2568}", - "boxhd;" => "\x{252c}", - "boxhu;" => "\x{2534}", - "boxminus;" => "\x{229f}", - "boxplus;" => "\x{229e}", - "boxtimes;" => "\x{22a0}", - "boxuL;" => "\x{255b}", - "boxuR;" => "\x{2558}", - "boxul;" => "\x{2518}", - "boxur;" => "\x{2514}", - "boxv;" => "\x{2502}", - "boxvH;" => "\x{256a}", - "boxvL;" => "\x{2561}", - "boxvR;" => "\x{255e}", - "boxvh;" => "\x{253c}", - "boxvl;" => "\x{2524}", - "boxvr;" => "\x{251c}", - "bprime;" => "\x{2035}", - "breve;" => "\x{2d8}", - "brvbar" => "\246", - "brvbar;" => "\246", - "bscr;" => "\x{1d4b7}", - "bsemi;" => "\x{204f}", - "bsim;" => "\x{223d}", - "bsime;" => "\x{22cd}", - "bsol;" => "\\", - "bsolb;" => "\x{29c5}", - "bsolhsub;" => "\x{27c8}", - "bull;" => "\x{2022}", - "bullet;" => "\x{2022}", - "bump;" => "\x{224e}", - "bumpE;" => "\x{2aae}", - "bumpe;" => "\x{224f}", - "bumpeq;" => "\x{224f}", - "cacute;" => "\x{107}", - "cap;" => "\x{2229}", - "capand;" => "\x{2a44}", - "capbrcup;" => "\x{2a49}", - "capcap;" => "\x{2a4b}", - "capcup;" => "\x{2a47}", - "capdot;" => "\x{2a40}", - "caps;" => "\x{2229}\x{fe00}", - "caret;" => "\x{2041}", - "caron;" => "\x{2c7}", - "ccaps;" => "\x{2a4d}", - "ccaron;" => "\x{10d}", - "ccedil" => "\347", - "ccedil;" => "\347", - "ccirc;" => "\x{109}", - "ccups;" => "\x{2a4c}", - "ccupssm;" => "\x{2a50}", - "cdot;" => "\x{10b}", - "cedil" => "\270", - "cedil;" => "\270", - "cemptyv;" => "\x{29b2}", - "cent" => "\242", - "cent;" => "\242", - "centerdot;" => "\267", - "cfr;" => "\x{1d520}", - "chcy;" => "\x{447}", - "check;" => "\x{2713}", - "checkmark;" => "\x{2713}", - "chi;" => "\x{3c7}", - "cir;" => "\x{25cb}", - "cirE;" => "\x{29c3}", - "circ;" => "\x{2c6}", - "circeq;" => "\x{2257}", - "circlearrowleft;" => "\x{21ba}", - "circlearrowright;" => "\x{21bb}", - "circledR;" => "\256", - "circledS;" => "\x{24c8}", - "circledast;" => "\x{229b}", - "circledcirc;" => "\x{229a}", - "circleddash;" => "\x{229d}", - "cire;" => "\x{2257}", - "cirfnint;" => "\x{2a10}", - "cirmid;" => "\x{2aef}", - "cirscir;" => "\x{29c2}", - "clubs;" => "\x{2663}", - "clubsuit;" => "\x{2663}", - "colon;" => ":", - "colone;" => "\x{2254}", - "coloneq;" => "\x{2254}", - "comma;" => ",", - "commat;" => "\@", - "comp;" => "\x{2201}", - "compfn;" => "\x{2218}", - "complement;" => "\x{2201}", - "complexes;" => "\x{2102}", - "cong;" => "\x{2245}", - "congdot;" => "\x{2a6d}", - "conint;" => "\x{222e}", - "copf;" => "\x{1d554}", - "coprod;" => "\x{2210}", - "copy" => "\251", - "copy;" => "\251", - "copysr;" => "\x{2117}", - "crarr;" => "\x{21b5}", - "cross;" => "\x{2717}", - "cscr;" => "\x{1d4b8}", - "csub;" => "\x{2acf}", - "csube;" => "\x{2ad1}", - "csup;" => "\x{2ad0}", - "csupe;" => "\x{2ad2}", - "ctdot;" => "\x{22ef}", - "cudarrl;" => "\x{2938}", - "cudarrr;" => "\x{2935}", - "cuepr;" => "\x{22de}", - "cuesc;" => "\x{22df}", - "cularr;" => "\x{21b6}", - "cularrp;" => "\x{293d}", - "cup;" => "\x{222a}", - "cupbrcap;" => "\x{2a48}", - "cupcap;" => "\x{2a46}", - "cupcup;" => "\x{2a4a}", - "cupdot;" => "\x{228d}", - "cupor;" => "\x{2a45}", - "cups;" => "\x{222a}\x{fe00}", - "curarr;" => "\x{21b7}", - "curarrm;" => "\x{293c}", - "curlyeqprec;" => "\x{22de}", - "curlyeqsucc;" => "\x{22df}", - "curlyvee;" => "\x{22ce}", - "curlywedge;" => "\x{22cf}", - "curren" => "\244", - "curren;" => "\244", - "curvearrowleft;" => "\x{21b6}", - "curvearrowright;" => "\x{21b7}", - "cuvee;" => "\x{22ce}", - "cuwed;" => "\x{22cf}", - "cwconint;" => "\x{2232}", - "cwint;" => "\x{2231}", - "cylcty;" => "\x{232d}", - "dArr;" => "\x{21d3}", - "dHar;" => "\x{2965}", - "dagger;" => "\x{2020}", - "daleth;" => "\x{2138}", - "darr;" => "\x{2193}", - "dash;" => "\x{2010}", - "dashv;" => "\x{22a3}", - "dbkarow;" => "\x{290f}", - "dblac;" => "\x{2dd}", - "dcaron;" => "\x{10f}", - "dcy;" => "\x{434}", - "dd;" => "\x{2146}", - "ddagger;" => "\x{2021}", - "ddarr;" => "\x{21ca}", - "ddotseq;" => "\x{2a77}", - "deg" => "\260", - "deg;" => "\260", - "delta;" => "\x{3b4}", - "demptyv;" => "\x{29b1}", - "dfisht;" => "\x{297f}", - "dfr;" => "\x{1d521}", - "dharl;" => "\x{21c3}", - "dharr;" => "\x{21c2}", - "diam;" => "\x{22c4}", - "diamond;" => "\x{22c4}", - "diamondsuit;" => "\x{2666}", - "diams;" => "\x{2666}", - "die;" => "\250", - "digamma;" => "\x{3dd}", - "disin;" => "\x{22f2}", - "div;" => "\367", - "divide" => "\367", - "divide;" => "\367", - "divideontimes;" => "\x{22c7}", - "divonx;" => "\x{22c7}", - "djcy;" => "\x{452}", - "dlcorn;" => "\x{231e}", - "dlcrop;" => "\x{230d}", - "dollar;" => "\$", - "dopf;" => "\x{1d555}", - "dot;" => "\x{2d9}", - "doteq;" => "\x{2250}", - "doteqdot;" => "\x{2251}", - "dotminus;" => "\x{2238}", - "dotplus;" => "\x{2214}", - "dotsquare;" => "\x{22a1}", - "doublebarwedge;" => "\x{2306}", - "downarrow;" => "\x{2193}", - "downdownarrows;" => "\x{21ca}", - "downharpoonleft;" => "\x{21c3}", - "downharpoonright;" => "\x{21c2}", - "drbkarow;" => "\x{2910}", - "drcorn;" => "\x{231f}", - "drcrop;" => "\x{230c}", - "dscr;" => "\x{1d4b9}", - "dscy;" => "\x{455}", - "dsol;" => "\x{29f6}", - "dstrok;" => "\x{111}", - "dtdot;" => "\x{22f1}", - "dtri;" => "\x{25bf}", - "dtrif;" => "\x{25be}", - "duarr;" => "\x{21f5}", - "duhar;" => "\x{296f}", - "dwangle;" => "\x{29a6}", - "dzcy;" => "\x{45f}", - "dzigrarr;" => "\x{27ff}", - "eDDot;" => "\x{2a77}", - "eDot;" => "\x{2251}", - "eacute" => "\351", - "eacute;" => "\351", - "easter;" => "\x{2a6e}", - "ecaron;" => "\x{11b}", - "ecir;" => "\x{2256}", - "ecirc" => "\352", - "ecirc;" => "\352", - "ecolon;" => "\x{2255}", - "ecy;" => "\x{44d}", - "edot;" => "\x{117}", - "ee;" => "\x{2147}", - "efDot;" => "\x{2252}", - "efr;" => "\x{1d522}", - "eg;" => "\x{2a9a}", - "egrave" => "\350", - "egrave;" => "\350", - "egs;" => "\x{2a96}", - "egsdot;" => "\x{2a98}", - "el;" => "\x{2a99}", - "elinters;" => "\x{23e7}", - "ell;" => "\x{2113}", - "els;" => "\x{2a95}", - "elsdot;" => "\x{2a97}", - "emacr;" => "\x{113}", - "empty;" => "\x{2205}", - "emptyset;" => "\x{2205}", - "emptyv;" => "\x{2205}", - "emsp13;" => "\x{2004}", - "emsp14;" => "\x{2005}", - "emsp;" => "\x{2003}", - "eng;" => "\x{14b}", - "ensp;" => "\x{2002}", - "eogon;" => "\x{119}", - "eopf;" => "\x{1d556}", - "epar;" => "\x{22d5}", - "eparsl;" => "\x{29e3}", - "eplus;" => "\x{2a71}", - "epsi;" => "\x{3b5}", - "epsilon;" => "\x{3b5}", - "epsiv;" => "\x{3f5}", - "eqcirc;" => "\x{2256}", - "eqcolon;" => "\x{2255}", - "eqsim;" => "\x{2242}", - "eqslantgtr;" => "\x{2a96}", - "eqslantless;" => "\x{2a95}", - "equals;" => "=", - "equest;" => "\x{225f}", - "equiv;" => "\x{2261}", - "equivDD;" => "\x{2a78}", - "eqvparsl;" => "\x{29e5}", - "erDot;" => "\x{2253}", - "erarr;" => "\x{2971}", - "escr;" => "\x{212f}", - "esdot;" => "\x{2250}", - "esim;" => "\x{2242}", - "eta;" => "\x{3b7}", - "eth" => "\360", - "eth;" => "\360", - "euml" => "\353", - "euml;" => "\353", - "euro;" => "\x{20ac}", - "excl;" => "!", - "exist;" => "\x{2203}", - "expectation;" => "\x{2130}", - "exponentiale;" => "\x{2147}", - "fallingdotseq;" => "\x{2252}", - "fcy;" => "\x{444}", - "female;" => "\x{2640}", - "ffilig;" => "\x{fb03}", - "fflig;" => "\x{fb00}", - "ffllig;" => "\x{fb04}", - "ffr;" => "\x{1d523}", - "filig;" => "\x{fb01}", - "fjlig;" => "fj", - "flat;" => "\x{266d}", - "fllig;" => "\x{fb02}", - "fltns;" => "\x{25b1}", - "fnof;" => "\x{192}", - "fopf;" => "\x{1d557}", - "forall;" => "\x{2200}", - "fork;" => "\x{22d4}", - "forkv;" => "\x{2ad9}", - "fpartint;" => "\x{2a0d}", - "frac12" => "\275", - "frac12;" => "\275", - "frac13;" => "\x{2153}", - "frac14" => "\274", - "frac14;" => "\274", - "frac15;" => "\x{2155}", - "frac16;" => "\x{2159}", - "frac18;" => "\x{215b}", - "frac23;" => "\x{2154}", - "frac25;" => "\x{2156}", - "frac34" => "\276", - "frac34;" => "\276", - "frac35;" => "\x{2157}", - "frac38;" => "\x{215c}", - "frac45;" => "\x{2158}", - "frac56;" => "\x{215a}", - "frac58;" => "\x{215d}", - "frac78;" => "\x{215e}", - "frasl;" => "\x{2044}", - "frown;" => "\x{2322}", - "fscr;" => "\x{1d4bb}", - "gE;" => "\x{2267}", - "gEl;" => "\x{2a8c}", - "gacute;" => "\x{1f5}", - "gamma;" => "\x{3b3}", - "gammad;" => "\x{3dd}", - "gap;" => "\x{2a86}", - "gbreve;" => "\x{11f}", - "gcirc;" => "\x{11d}", - "gcy;" => "\x{433}", - "gdot;" => "\x{121}", - "ge;" => "\x{2265}", - "gel;" => "\x{22db}", - "geq;" => "\x{2265}", - "geqq;" => "\x{2267}", - "geqslant;" => "\x{2a7e}", - "ges;" => "\x{2a7e}", - "gescc;" => "\x{2aa9}", - "gesdot;" => "\x{2a80}", - "gesdoto;" => "\x{2a82}", - "gesdotol;" => "\x{2a84}", - "gesl;" => "\x{22db}\x{fe00}", - "gesles;" => "\x{2a94}", - "gfr;" => "\x{1d524}", - "gg;" => "\x{226b}", - "ggg;" => "\x{22d9}", - "gimel;" => "\x{2137}", - "gjcy;" => "\x{453}", - "gl;" => "\x{2277}", - "glE;" => "\x{2a92}", - "gla;" => "\x{2aa5}", - "glj;" => "\x{2aa4}", - "gnE;" => "\x{2269}", - "gnap;" => "\x{2a8a}", - "gnapprox;" => "\x{2a8a}", - "gne;" => "\x{2a88}", - "gneq;" => "\x{2a88}", - "gneqq;" => "\x{2269}", - "gnsim;" => "\x{22e7}", - "gopf;" => "\x{1d558}", - "grave;" => "`", - "gscr;" => "\x{210a}", - "gsim;" => "\x{2273}", - "gsime;" => "\x{2a8e}", - "gsiml;" => "\x{2a90}", - "gt" => ">", - "gt;" => ">", - "gtcc;" => "\x{2aa7}", - "gtcir;" => "\x{2a7a}", - "gtdot;" => "\x{22d7}", - "gtlPar;" => "\x{2995}", - "gtquest;" => "\x{2a7c}", - "gtrapprox;" => "\x{2a86}", - "gtrarr;" => "\x{2978}", - "gtrdot;" => "\x{22d7}", - "gtreqless;" => "\x{22db}", - "gtreqqless;" => "\x{2a8c}", - "gtrless;" => "\x{2277}", - "gtrsim;" => "\x{2273}", - "gvertneqq;" => "\x{2269}\x{fe00}", - "gvnE;" => "\x{2269}\x{fe00}", - "hArr;" => "\x{21d4}", - "hairsp;" => "\x{200a}", - "half;" => "\275", - "hamilt;" => "\x{210b}", - "hardcy;" => "\x{44a}", - "harr;" => "\x{2194}", - "harrcir;" => "\x{2948}", - "harrw;" => "\x{21ad}", - "hbar;" => "\x{210f}", - "hcirc;" => "\x{125}", - "hearts;" => "\x{2665}", - "heartsuit;" => "\x{2665}", - "hellip;" => "\x{2026}", - "hercon;" => "\x{22b9}", - "hfr;" => "\x{1d525}", - "hksearow;" => "\x{2925}", - "hkswarow;" => "\x{2926}", - "hoarr;" => "\x{21ff}", - "homtht;" => "\x{223b}", - "hookleftarrow;" => "\x{21a9}", - "hookrightarrow;" => "\x{21aa}", - "hopf;" => "\x{1d559}", - "horbar;" => "\x{2015}", - "hscr;" => "\x{1d4bd}", - "hslash;" => "\x{210f}", - "hstrok;" => "\x{127}", - "hybull;" => "\x{2043}", - "hyphen;" => "\x{2010}", - "iacute" => "\355", - "iacute;" => "\355", - "ic;" => "\x{2063}", - "icirc" => "\356", - "icirc;" => "\356", - "icy;" => "\x{438}", - "iecy;" => "\x{435}", - "iexcl" => "\241", - "iexcl;" => "\241", - "iff;" => "\x{21d4}", - "ifr;" => "\x{1d526}", - "igrave" => "\354", - "igrave;" => "\354", - "ii;" => "\x{2148}", - "iiiint;" => "\x{2a0c}", - "iiint;" => "\x{222d}", - "iinfin;" => "\x{29dc}", - "iiota;" => "\x{2129}", - "ijlig;" => "\x{133}", - "imacr;" => "\x{12b}", - "image;" => "\x{2111}", - "imagline;" => "\x{2110}", - "imagpart;" => "\x{2111}", - "imath;" => "\x{131}", - "imof;" => "\x{22b7}", - "imped;" => "\x{1b5}", - "in;" => "\x{2208}", - "incare;" => "\x{2105}", - "infin;" => "\x{221e}", - "infintie;" => "\x{29dd}", - "inodot;" => "\x{131}", - "int;" => "\x{222b}", - "intcal;" => "\x{22ba}", - "integers;" => "\x{2124}", - "intercal;" => "\x{22ba}", - "intlarhk;" => "\x{2a17}", - "intprod;" => "\x{2a3c}", - "iocy;" => "\x{451}", - "iogon;" => "\x{12f}", - "iopf;" => "\x{1d55a}", - "iota;" => "\x{3b9}", - "iprod;" => "\x{2a3c}", - "iquest" => "\277", - "iquest;" => "\277", - "iscr;" => "\x{1d4be}", - "isin;" => "\x{2208}", - "isinE;" => "\x{22f9}", - "isindot;" => "\x{22f5}", - "isins;" => "\x{22f4}", - "isinsv;" => "\x{22f3}", - "isinv;" => "\x{2208}", - "it;" => "\x{2062}", - "itilde;" => "\x{129}", - "iukcy;" => "\x{456}", - "iuml" => "\357", - "iuml;" => "\357", - "jcirc;" => "\x{135}", - "jcy;" => "\x{439}", - "jfr;" => "\x{1d527}", - "jmath;" => "\x{237}", - "jopf;" => "\x{1d55b}", - "jscr;" => "\x{1d4bf}", - "jsercy;" => "\x{458}", - "jukcy;" => "\x{454}", - "kappa;" => "\x{3ba}", - "kappav;" => "\x{3f0}", - "kcedil;" => "\x{137}", - "kcy;" => "\x{43a}", - "kfr;" => "\x{1d528}", - "kgreen;" => "\x{138}", - "khcy;" => "\x{445}", - "kjcy;" => "\x{45c}", - "kopf;" => "\x{1d55c}", - "kscr;" => "\x{1d4c0}", - "lAarr;" => "\x{21da}", - "lArr;" => "\x{21d0}", - "lAtail;" => "\x{291b}", - "lBarr;" => "\x{290e}", - "lE;" => "\x{2266}", - "lEg;" => "\x{2a8b}", - "lHar;" => "\x{2962}", - "lacute;" => "\x{13a}", - "laemptyv;" => "\x{29b4}", - "lagran;" => "\x{2112}", - "lambda;" => "\x{3bb}", - "lang;" => "\x{27e8}", - "langd;" => "\x{2991}", - "langle;" => "\x{27e8}", - "lap;" => "\x{2a85}", - "laquo" => "\253", - "laquo;" => "\253", - "larr;" => "\x{2190}", - "larrb;" => "\x{21e4}", - "larrbfs;" => "\x{291f}", - "larrfs;" => "\x{291d}", - "larrhk;" => "\x{21a9}", - "larrlp;" => "\x{21ab}", - "larrpl;" => "\x{2939}", - "larrsim;" => "\x{2973}", - "larrtl;" => "\x{21a2}", - "lat;" => "\x{2aab}", - "latail;" => "\x{2919}", - "late;" => "\x{2aad}", - "lates;" => "\x{2aad}\x{fe00}", - "lbarr;" => "\x{290c}", - "lbbrk;" => "\x{2772}", - "lbrace;" => "{", - "lbrack;" => "[", - "lbrke;" => "\x{298b}", - "lbrksld;" => "\x{298f}", - "lbrkslu;" => "\x{298d}", - "lcaron;" => "\x{13e}", - "lcedil;" => "\x{13c}", - "lceil;" => "\x{2308}", - "lcub;" => "{", - "lcy;" => "\x{43b}", - "ldca;" => "\x{2936}", - "ldquo;" => "\x{201c}", - "ldquor;" => "\x{201e}", - "ldrdhar;" => "\x{2967}", - "ldrushar;" => "\x{294b}", - "ldsh;" => "\x{21b2}", - "le;" => "\x{2264}", - "leftarrow;" => "\x{2190}", - "leftarrowtail;" => "\x{21a2}", - "leftharpoondown;" => "\x{21bd}", - "leftharpoonup;" => "\x{21bc}", - "leftleftarrows;" => "\x{21c7}", - "leftrightarrow;" => "\x{2194}", - "leftrightarrows;" => "\x{21c6}", - "leftrightharpoons;" => "\x{21cb}", - "leftrightsquigarrow;" => "\x{21ad}", - "leftthreetimes;" => "\x{22cb}", - "leg;" => "\x{22da}", - "leq;" => "\x{2264}", - "leqq;" => "\x{2266}", - "leqslant;" => "\x{2a7d}", - "les;" => "\x{2a7d}", - "lescc;" => "\x{2aa8}", - "lesdot;" => "\x{2a7f}", - "lesdoto;" => "\x{2a81}", - "lesdotor;" => "\x{2a83}", - "lesg;" => "\x{22da}\x{fe00}", - "lesges;" => "\x{2a93}", - "lessapprox;" => "\x{2a85}", - "lessdot;" => "\x{22d6}", - "lesseqgtr;" => "\x{22da}", - "lesseqqgtr;" => "\x{2a8b}", - "lessgtr;" => "\x{2276}", - "lesssim;" => "\x{2272}", - "lfisht;" => "\x{297c}", - "lfloor;" => "\x{230a}", - "lfr;" => "\x{1d529}", - "lg;" => "\x{2276}", - "lgE;" => "\x{2a91}", - "lhard;" => "\x{21bd}", - "lharu;" => "\x{21bc}", - "lharul;" => "\x{296a}", - "lhblk;" => "\x{2584}", - "ljcy;" => "\x{459}", - "ll;" => "\x{226a}", - "llarr;" => "\x{21c7}", - "llcorner;" => "\x{231e}", - "llhard;" => "\x{296b}", - "lltri;" => "\x{25fa}", - "lmidot;" => "\x{140}", - "lmoust;" => "\x{23b0}", - "lmoustache;" => "\x{23b0}", - "lnE;" => "\x{2268}", - "lnap;" => "\x{2a89}", - "lnapprox;" => "\x{2a89}", - "lne;" => "\x{2a87}", - "lneq;" => "\x{2a87}", - "lneqq;" => "\x{2268}", - "lnsim;" => "\x{22e6}", - "loang;" => "\x{27ec}", - "loarr;" => "\x{21fd}", - "lobrk;" => "\x{27e6}", - "longleftarrow;" => "\x{27f5}", - "longleftrightarrow;" => "\x{27f7}", - "longmapsto;" => "\x{27fc}", - "longrightarrow;" => "\x{27f6}", - "looparrowleft;" => "\x{21ab}", - "looparrowright;" => "\x{21ac}", - "lopar;" => "\x{2985}", - "lopf;" => "\x{1d55d}", - "loplus;" => "\x{2a2d}", - "lotimes;" => "\x{2a34}", - "lowast;" => "\x{2217}", - "lowbar;" => "_", - "loz;" => "\x{25ca}", - "lozenge;" => "\x{25ca}", - "lozf;" => "\x{29eb}", - "lpar;" => "(", - "lparlt;" => "\x{2993}", - "lrarr;" => "\x{21c6}", - "lrcorner;" => "\x{231f}", - "lrhar;" => "\x{21cb}", - "lrhard;" => "\x{296d}", - "lrm;" => "\x{200e}", - "lrtri;" => "\x{22bf}", - "lsaquo;" => "\x{2039}", - "lscr;" => "\x{1d4c1}", - "lsh;" => "\x{21b0}", - "lsim;" => "\x{2272}", - "lsime;" => "\x{2a8d}", - "lsimg;" => "\x{2a8f}", - "lsqb;" => "[", - "lsquo;" => "\x{2018}", - "lsquor;" => "\x{201a}", - "lstrok;" => "\x{142}", - "lt" => "<", - "lt;" => "<", - "ltcc;" => "\x{2aa6}", - "ltcir;" => "\x{2a79}", - "ltdot;" => "\x{22d6}", - "lthree;" => "\x{22cb}", - "ltimes;" => "\x{22c9}", - "ltlarr;" => "\x{2976}", - "ltquest;" => "\x{2a7b}", - "ltrPar;" => "\x{2996}", - "ltri;" => "\x{25c3}", - "ltrie;" => "\x{22b4}", - "ltrif;" => "\x{25c2}", - "lurdshar;" => "\x{294a}", - "luruhar;" => "\x{2966}", - "lvertneqq;" => "\x{2268}\x{fe00}", - "lvnE;" => "\x{2268}\x{fe00}", - "mDDot;" => "\x{223a}", - "macr" => "\257", - "macr;" => "\257", - "male;" => "\x{2642}", - "malt;" => "\x{2720}", - "maltese;" => "\x{2720}", - "map;" => "\x{21a6}", - "mapsto;" => "\x{21a6}", - "mapstodown;" => "\x{21a7}", - "mapstoleft;" => "\x{21a4}", - "mapstoup;" => "\x{21a5}", - "marker;" => "\x{25ae}", - "mcomma;" => "\x{2a29}", - "mcy;" => "\x{43c}", - "mdash;" => "\x{2014}", - "measuredangle;" => "\x{2221}", - "mfr;" => "\x{1d52a}", - "mho;" => "\x{2127}", - "micro" => "\265", - "micro;" => "\265", - "mid;" => "\x{2223}", - "midast;" => "*", - "midcir;" => "\x{2af0}", - "middot" => "\267", - "middot;" => "\267", - "minus;" => "\x{2212}", - "minusb;" => "\x{229f}", - "minusd;" => "\x{2238}", - "minusdu;" => "\x{2a2a}", - "mlcp;" => "\x{2adb}", - "mldr;" => "\x{2026}", - "mnplus;" => "\x{2213}", - "models;" => "\x{22a7}", - "mopf;" => "\x{1d55e}", - "mp;" => "\x{2213}", - "mscr;" => "\x{1d4c2}", - "mstpos;" => "\x{223e}", - "mu;" => "\x{3bc}", - "multimap;" => "\x{22b8}", - "mumap;" => "\x{22b8}", - "nGg;" => "\x{22d9}\x{338}", - "nGt;" => "\x{226b}\x{20d2}", - "nGtv;" => "\x{226b}\x{338}", - "nLeftarrow;" => "\x{21cd}", - "nLeftrightarrow;" => "\x{21ce}", - "nLl;" => "\x{22d8}\x{338}", - "nLt;" => "\x{226a}\x{20d2}", - "nLtv;" => "\x{226a}\x{338}", - "nRightarrow;" => "\x{21cf}", - "nVDash;" => "\x{22af}", - "nVdash;" => "\x{22ae}", - "nabla;" => "\x{2207}", - "nacute;" => "\x{144}", - "nang;" => "\x{2220}\x{20d2}", - "nap;" => "\x{2249}", - "napE;" => "\x{2a70}\x{338}", - "napid;" => "\x{224b}\x{338}", - "napos;" => "\x{149}", - "napprox;" => "\x{2249}", - "natur;" => "\x{266e}", - "natural;" => "\x{266e}", - "naturals;" => "\x{2115}", - "nbsp" => "\240", - "nbsp;" => "\240", - "nbump;" => "\x{224e}\x{338}", - "nbumpe;" => "\x{224f}\x{338}", - "ncap;" => "\x{2a43}", - "ncaron;" => "\x{148}", - "ncedil;" => "\x{146}", - "ncong;" => "\x{2247}", - "ncongdot;" => "\x{2a6d}\x{338}", - "ncup;" => "\x{2a42}", - "ncy;" => "\x{43d}", - "ndash;" => "\x{2013}", - "ne;" => "\x{2260}", - "neArr;" => "\x{21d7}", - "nearhk;" => "\x{2924}", - "nearr;" => "\x{2197}", - "nearrow;" => "\x{2197}", - "nedot;" => "\x{2250}\x{338}", - "nequiv;" => "\x{2262}", - "nesear;" => "\x{2928}", - "nesim;" => "\x{2242}\x{338}", - "nexist;" => "\x{2204}", - "nexists;" => "\x{2204}", - "nfr;" => "\x{1d52b}", - "ngE;" => "\x{2267}\x{338}", - "nge;" => "\x{2271}", - "ngeq;" => "\x{2271}", - "ngeqq;" => "\x{2267}\x{338}", - "ngeqslant;" => "\x{2a7e}\x{338}", - "nges;" => "\x{2a7e}\x{338}", - "ngsim;" => "\x{2275}", - "ngt;" => "\x{226f}", - "ngtr;" => "\x{226f}", - "nhArr;" => "\x{21ce}", - "nharr;" => "\x{21ae}", - "nhpar;" => "\x{2af2}", - "ni;" => "\x{220b}", - "nis;" => "\x{22fc}", - "nisd;" => "\x{22fa}", - "niv;" => "\x{220b}", - "njcy;" => "\x{45a}", - "nlArr;" => "\x{21cd}", - "nlE;" => "\x{2266}\x{338}", - "nlarr;" => "\x{219a}", - "nldr;" => "\x{2025}", - "nle;" => "\x{2270}", - "nleftarrow;" => "\x{219a}", - "nleftrightarrow;" => "\x{21ae}", - "nleq;" => "\x{2270}", - "nleqq;" => "\x{2266}\x{338}", - "nleqslant;" => "\x{2a7d}\x{338}", - "nles;" => "\x{2a7d}\x{338}", - "nless;" => "\x{226e}", - "nlsim;" => "\x{2274}", - "nlt;" => "\x{226e}", - "nltri;" => "\x{22ea}", - "nltrie;" => "\x{22ec}", - "nmid;" => "\x{2224}", - "nopf;" => "\x{1d55f}", - "not" => "\254", - "not;" => "\254", - "notin;" => "\x{2209}", - "notinE;" => "\x{22f9}\x{338}", - "notindot;" => "\x{22f5}\x{338}", - "notinva;" => "\x{2209}", - "notinvb;" => "\x{22f7}", - "notinvc;" => "\x{22f6}", - "notni;" => "\x{220c}", - "notniva;" => "\x{220c}", - "notnivb;" => "\x{22fe}", - "notnivc;" => "\x{22fd}", - "npar;" => "\x{2226}", - "nparallel;" => "\x{2226}", - "nparsl;" => "\x{2afd}\x{20e5}", - "npart;" => "\x{2202}\x{338}", - "npolint;" => "\x{2a14}", - "npr;" => "\x{2280}", - "nprcue;" => "\x{22e0}", - "npre;" => "\x{2aaf}\x{338}", - "nprec;" => "\x{2280}", - "npreceq;" => "\x{2aaf}\x{338}", - "nrArr;" => "\x{21cf}", - "nrarr;" => "\x{219b}", - "nrarrc;" => "\x{2933}\x{338}", - "nrarrw;" => "\x{219d}\x{338}", - "nrightarrow;" => "\x{219b}", - "nrtri;" => "\x{22eb}", - "nrtrie;" => "\x{22ed}", - "nsc;" => "\x{2281}", - "nsccue;" => "\x{22e1}", - "nsce;" => "\x{2ab0}\x{338}", - "nscr;" => "\x{1d4c3}", - "nshortmid;" => "\x{2224}", - "nshortparallel;" => "\x{2226}", - "nsim;" => "\x{2241}", - "nsime;" => "\x{2244}", - "nsimeq;" => "\x{2244}", - "nsmid;" => "\x{2224}", - "nspar;" => "\x{2226}", - "nsqsube;" => "\x{22e2}", - "nsqsupe;" => "\x{22e3}", - "nsub;" => "\x{2284}", - "nsubE;" => "\x{2ac5}\x{338}", - "nsube;" => "\x{2288}", - "nsubset;" => "\x{2282}\x{20d2}", - "nsubseteq;" => "\x{2288}", - "nsubseteqq;" => "\x{2ac5}\x{338}", - "nsucc;" => "\x{2281}", - "nsucceq;" => "\x{2ab0}\x{338}", - "nsup;" => "\x{2285}", - "nsupE;" => "\x{2ac6}\x{338}", - "nsupe;" => "\x{2289}", - "nsupset;" => "\x{2283}\x{20d2}", - "nsupseteq;" => "\x{2289}", - "nsupseteqq;" => "\x{2ac6}\x{338}", - "ntgl;" => "\x{2279}", - "ntilde" => "\361", - "ntilde;" => "\361", - "ntlg;" => "\x{2278}", - "ntriangleleft;" => "\x{22ea}", - "ntrianglelefteq;" => "\x{22ec}", - "ntriangleright;" => "\x{22eb}", - "ntrianglerighteq;" => "\x{22ed}", - "nu;" => "\x{3bd}", - "num;" => "#", - "numero;" => "\x{2116}", - "numsp;" => "\x{2007}", - "nvDash;" => "\x{22ad}", - "nvHarr;" => "\x{2904}", - "nvap;" => "\x{224d}\x{20d2}", - "nvdash;" => "\x{22ac}", - "nvge;" => "\x{2265}\x{20d2}", - "nvgt;" => ">\x{20d2}", - "nvinfin;" => "\x{29de}", - "nvlArr;" => "\x{2902}", - "nvle;" => "\x{2264}\x{20d2}", - "nvlt;" => "<\x{20d2}", - "nvltrie;" => "\x{22b4}\x{20d2}", - "nvrArr;" => "\x{2903}", - "nvrtrie;" => "\x{22b5}\x{20d2}", - "nvsim;" => "\x{223c}\x{20d2}", - "nwArr;" => "\x{21d6}", - "nwarhk;" => "\x{2923}", - "nwarr;" => "\x{2196}", - "nwarrow;" => "\x{2196}", - "nwnear;" => "\x{2927}", - "oS;" => "\x{24c8}", - "oacute" => "\363", - "oacute;" => "\363", - "oast;" => "\x{229b}", - "ocir;" => "\x{229a}", - "ocirc" => "\364", - "ocirc;" => "\364", - "ocy;" => "\x{43e}", - "odash;" => "\x{229d}", - "odblac;" => "\x{151}", - "odiv;" => "\x{2a38}", - "odot;" => "\x{2299}", - "odsold;" => "\x{29bc}", - "oelig;" => "\x{153}", - "ofcir;" => "\x{29bf}", - "ofr;" => "\x{1d52c}", - "ogon;" => "\x{2db}", - "ograve" => "\362", - "ograve;" => "\362", - "ogt;" => "\x{29c1}", - "ohbar;" => "\x{29b5}", - "ohm;" => "\x{3a9}", - "oint;" => "\x{222e}", - "olarr;" => "\x{21ba}", - "olcir;" => "\x{29be}", - "olcross;" => "\x{29bb}", - "oline;" => "\x{203e}", - "olt;" => "\x{29c0}", - "omacr;" => "\x{14d}", - "omega;" => "\x{3c9}", - "omicron;" => "\x{3bf}", - "omid;" => "\x{29b6}", - "ominus;" => "\x{2296}", - "oopf;" => "\x{1d560}", - "opar;" => "\x{29b7}", - "operp;" => "\x{29b9}", - "oplus;" => "\x{2295}", - "or;" => "\x{2228}", - "orarr;" => "\x{21bb}", - "ord;" => "\x{2a5d}", - "order;" => "\x{2134}", - "orderof;" => "\x{2134}", - "ordf" => "\252", - "ordf;" => "\252", - "ordm" => "\272", - "ordm;" => "\272", - "origof;" => "\x{22b6}", - "oror;" => "\x{2a56}", - "orslope;" => "\x{2a57}", - "orv;" => "\x{2a5b}", - "oscr;" => "\x{2134}", - "oslash" => "\370", - "oslash;" => "\370", - "osol;" => "\x{2298}", - "otilde" => "\365", - "otilde;" => "\365", - "otimes;" => "\x{2297}", - "otimesas;" => "\x{2a36}", - "ouml" => "\366", - "ouml;" => "\366", - "ovbar;" => "\x{233d}", - "par;" => "\x{2225}", - "para" => "\266", - "para;" => "\266", - "parallel;" => "\x{2225}", - "parsim;" => "\x{2af3}", - "parsl;" => "\x{2afd}", - "part;" => "\x{2202}", - "pcy;" => "\x{43f}", - "percnt;" => "%", - "period;" => ".", - "permil;" => "\x{2030}", - "perp;" => "\x{22a5}", - "pertenk;" => "\x{2031}", - "pfr;" => "\x{1d52d}", - "phi;" => "\x{3c6}", - "phiv;" => "\x{3c5}", - "phmmat;" => "\x{2133}", - "phone;" => "\x{260e}", - "pi;" => "\x{3c0}", - "pitchfork;" => "\x{22d4}", - "piv;" => "\x{3d6}", - "planck;" => "\x{210f}", - "planckh;" => "\x{210e}", - "plankv;" => "\x{210f}", - "plus;" => "+", - "plusacir;" => "\x{2a23}", - "plusb;" => "\x{229e}", - "pluscir;" => "\x{2a22}", - "plusdo;" => "\x{2214}", - "plusdu;" => "\x{2a25}", - "pluse;" => "\x{2a72}", - "plusmn" => "\261", - "plusmn;" => "\261", - "plussim;" => "\x{2a26}", - "plustwo;" => "\x{2a27}", - "pm;" => "\261", - "pointint;" => "\x{2a15}", - "popf;" => "\x{1d561}", - "pound" => "\243", - "pound;" => "\243", - "pr;" => "\x{227a}", - "prE;" => "\x{2ab3}", - "prap;" => "\x{2ab7}", - "prcue;" => "\x{227c}", - "pre;" => "\x{2aaf}", - "prec;" => "\x{227a}", - "precapprox;" => "\x{2ab7}", - "preccurlyeq;" => "\x{227c}", - "preceq;" => "\x{2aaf}", - "precnapprox;" => "\x{2ab9}", - "precneqq;" => "\x{2ab5}", - "precnsim;" => "\x{22e8}", - "precsim;" => "\x{227e}", - "prime;" => "\x{2032}", - "primes;" => "\x{2119}", - "prnE;" => "\x{2ab5}", - "prnap;" => "\x{2ab9}", - "prnsim;" => "\x{22e8}", - "prod;" => "\x{220f}", - "profalar;" => "\x{232e}", - "profline;" => "\x{2312}", - "profsurf;" => "\x{2313}", - "prop;" => "\x{221d}", - "propto;" => "\x{221d}", - "prsim;" => "\x{227e}", - "prurel;" => "\x{22b0}", - "pscr;" => "\x{1d4c5}", - "psi;" => "\x{3c8}", - "puncsp;" => "\x{2008}", - "qfr;" => "\x{1d52e}", - "qint;" => "\x{2a0c}", - "qopf;" => "\x{1d562}", - "qprime;" => "\x{2057}", - "qscr;" => "\x{1d4c6}", - "quaternions;" => "\x{210d}", - "quatint;" => "\x{2a16}", - "quest;" => "?", - "questeq;" => "\x{225f}", - "quot" => "\"", - "quot;" => "\"", - "rAarr;" => "\x{21db}", - "rArr;" => "\x{21d2}", - "rAtail;" => "\x{291c}", - "rBarr;" => "\x{290f}", - "rHar;" => "\x{2964}", - "race;" => "\x{223d}\x{331}", - "racute;" => "\x{155}", - "radic;" => "\x{221a}", - "raemptyv;" => "\x{29b3}", - "rang;" => "\x{27e9}", - "rangd;" => "\x{2992}", - "range;" => "\x{29a5}", - "rangle;" => "\x{27e9}", - "raquo" => "\273", - "raquo;" => "\273", - "rarr;" => "\x{2192}", - "rarrap;" => "\x{2975}", - "rarrb;" => "\x{21e5}", - "rarrbfs;" => "\x{2920}", - "rarrc;" => "\x{2933}", - "rarrfs;" => "\x{291e}", - "rarrhk;" => "\x{21aa}", - "rarrlp;" => "\x{21ac}", - "rarrpl;" => "\x{2945}", - "rarrsim;" => "\x{2974}", - "rarrtl;" => "\x{21a3}", - "rarrw;" => "\x{219d}", - "ratail;" => "\x{291a}", - "ratio;" => "\x{2236}", - "rationals;" => "\x{211a}", - "rbarr;" => "\x{290d}", - "rbbrk;" => "\x{2773}", - "rbrace;" => "}", - "rbrack;" => "]", - "rbrke;" => "\x{298c}", - "rbrksld;" => "\x{298e}", - "rbrkslu;" => "\x{2990}", - "rcaron;" => "\x{159}", - "rcedil;" => "\x{157}", - "rceil;" => "\x{2309}", - "rcub;" => "}", - "rcy;" => "\x{440}", - "rdca;" => "\x{2937}", - "rdldhar;" => "\x{2969}", - "rdquo;" => "\x{201d}", - "rdquor;" => "\x{201d}", - "rdsh;" => "\x{21b3}", - "real;" => "\x{211c}", - "realine;" => "\x{211b}", - "realpart;" => "\x{211c}", - "reals;" => "\x{211d}", - "rect;" => "\x{25ad}", - "reg" => "\256", - "reg;" => "\256", - "rfisht;" => "\x{297d}", - "rfloor;" => "\x{230b}", - "rfr;" => "\x{1d52f}", - "rhard;" => "\x{21c1}", - "rharu;" => "\x{21c0}", - "rharul;" => "\x{296c}", - "rho;" => "\x{3c1}", - "rhov;" => "\x{3f1}", - "rightarrow;" => "\x{2192}", - "rightarrowtail;" => "\x{21a3}", - "rightharpoondown;" => "\x{21c1}", - "rightharpoonup;" => "\x{21c0}", - "rightleftarrows;" => "\x{21c4}", - "rightleftharpoons;" => "\x{21cc}", - "rightrightarrows;" => "\x{21c9}", - "rightsquigarrow;" => "\x{219d}", - "rightthreetimes;" => "\x{22cc}", - "ring;" => "\x{2da}", - "risingdotseq;" => "\x{2253}", - "rlarr;" => "\x{21c4}", - "rlhar;" => "\x{21cc}", - "rlm;" => "\x{200f}", - "rmoust;" => "\x{23b1}", - "rmoustache;" => "\x{23b1}", - "rnmid;" => "\x{2aee}", - "roang;" => "\x{27ed}", - "roarr;" => "\x{21fe}", - "robrk;" => "\x{27e7}", - "ropar;" => "\x{2986}", - "ropf;" => "\x{1d563}", - "roplus;" => "\x{2a2e}", - "rotimes;" => "\x{2a35}", - "rpar;" => ")", - "rpargt;" => "\x{2994}", - "rppolint;" => "\x{2a12}", - "rrarr;" => "\x{21c9}", - "rsaquo;" => "\x{203a}", - "rscr;" => "\x{1d4c7}", - "rsh;" => "\x{21b1}", - "rsqb;" => "]", - "rsquo;" => "\x{2019}", - "rsquor;" => "\x{2019}", - "rthree;" => "\x{22cc}", - "rtimes;" => "\x{22ca}", - "rtri;" => "\x{25b9}", - "rtrie;" => "\x{22b5}", - "rtrif;" => "\x{25b8}", - "rtriltri;" => "\x{29ce}", - "ruluhar;" => "\x{2968}", - "rx;" => "\x{211e}", - "sacute;" => "\x{15b}", - "sbquo;" => "\x{201a}", - "sc;" => "\x{227b}", - "scE;" => "\x{2ab4}", - "scap;" => "\x{2ab8}", - "scaron;" => "\x{161}", - "sccue;" => "\x{227d}", - "sce;" => "\x{2ab0}", - "scedil;" => "\x{15f}", - "scirc;" => "\x{15d}", - "scnE;" => "\x{2ab6}", - "scnap;" => "\x{2aba}", - "scnsim;" => "\x{22e9}", - "scpolint;" => "\x{2a13}", - "scsim;" => "\x{227f}", - "scy;" => "\x{441}", - "sdot;" => "\x{22c5}", - "sdotb;" => "\x{22a1}", - "sdote;" => "\x{2a66}", - "seArr;" => "\x{21d8}", - "searhk;" => "\x{2925}", - "searr;" => "\x{2198}", - "searrow;" => "\x{2198}", - "sect" => "\247", - "sect;" => "\247", - "semi;" => ";", - "seswar;" => "\x{2929}", - "setminus;" => "\x{2216}", - "setmn;" => "\x{2216}", - "sext;" => "\x{2736}", - "sfr;" => "\x{1d530}", - "sfrown;" => "\x{2322}", - "sharp;" => "\x{266f}", - "shchcy;" => "\x{449}", - "shcy;" => "\x{448}", - "shortmid;" => "\x{2223}", - "shortparallel;" => "\x{2225}", - "shy" => "\255", - "shy;" => "\255", - "sigma;" => "\x{3c3}", - "sigmaf;" => "\x{3c2}", - "sigmav;" => "\x{3c2}", - "sim;" => "\x{223c}", - "simdot;" => "\x{2a6a}", - "sime;" => "\x{2243}", - "simeq;" => "\x{2243}", - "simg;" => "\x{2a9e}", - "simgE;" => "\x{2aa0}", - "siml;" => "\x{2a9d}", - "simlE;" => "\x{2a9f}", - "simne;" => "\x{2246}", - "simplus;" => "\x{2a24}", - "simrarr;" => "\x{2972}", - "slarr;" => "\x{2190}", - "smallsetminus;" => "\x{2216}", - "smashp;" => "\x{2a33}", - "smeparsl;" => "\x{29e4}", - "smid;" => "\x{2223}", - "smile;" => "\x{2323}", - "smt;" => "\x{2aaa}", - "smte;" => "\x{2aac}", - "smtes;" => "\x{2aac}\x{fe00}", - "softcy;" => "\x{44c}", - "sol;" => "/", - "solb;" => "\x{29c4}", - "solbar;" => "\x{233f}", - "sopf;" => "\x{1d564}", - "spades;" => "\x{2660}", - "spadesuit;" => "\x{2660}", - "spar;" => "\x{2225}", - "sqcap;" => "\x{2293}", - "sqcaps;" => "\x{2293}\x{fe00}", - "sqcup;" => "\x{2294}", - "sqcups;" => "\x{2294}\x{fe00}", - "sqsub;" => "\x{228f}", - "sqsube;" => "\x{2291}", - "sqsubset;" => "\x{228f}", - "sqsubseteq;" => "\x{2291}", - "sqsup;" => "\x{2290}", - "sqsupe;" => "\x{2292}", - "sqsupset;" => "\x{2290}", - "sqsupseteq;" => "\x{2292}", - "squ;" => "\x{25a1}", - "square;" => "\x{25a1}", - "squarf;" => "\x{25aa}", - "squf;" => "\x{25aa}", - "srarr;" => "\x{2192}", - "sscr;" => "\x{1d4c8}", - "ssetmn;" => "\x{2216}", - "ssmile;" => "\x{2323}", - "sstarf;" => "\x{22c6}", - "star;" => "\x{2606}", - "starf;" => "\x{2605}", - "straightepsilon;" => "\x{3f5}", - "straightphi;" => "\x{3d5}", - "strns;" => "\257", - "sub;" => "\x{2282}", - "subE;" => "\x{2ac5}", - "subdot;" => "\x{2abd}", - "sube;" => "\x{2286}", - "subedot;" => "\x{2ac3}", - "submult;" => "\x{2ac1}", - "subnE;" => "\x{2acb}", - "subne;" => "\x{228a}", - "subplus;" => "\x{2abf}", - "subrarr;" => "\x{2979}", - "subset;" => "\x{2282}", - "subseteq;" => "\x{2286}", - "subseteqq;" => "\x{2ac5}", - "subsetneq;" => "\x{228a}", - "subsetneqq;" => "\x{2acb}", - "subsim;" => "\x{2ac7}", - "subsub;" => "\x{2ad5}", - "subsup;" => "\x{2ad3}", - "succ;" => "\x{227b}", - "succapprox;" => "\x{2ab8}", - "succcurlyeq;" => "\x{227d}", - "succeq;" => "\x{2ab0}", - "succnapprox;" => "\x{2aba}", - "succneqq;" => "\x{2ab6}", - "succnsim;" => "\x{22e9}", - "succsim;" => "\x{227f}", - "sum;" => "\x{2211}", - "sung;" => "\x{266a}", - "sup1" => "\271", - "sup1;" => "\271", - "sup2" => "\262", - "sup2;" => "\262", - "sup3" => "\263", - "sup3;" => "\263", - "sup;" => "\x{2283}", - "supE;" => "\x{2ac6}", - "supdot;" => "\x{2abe}", - "supdsub;" => "\x{2ad8}", - "supe;" => "\x{2287}", - "supedot;" => "\x{2ac4}", - "suphsol;" => "\x{27c9}", - "suphsub;" => "\x{2ad7}", - "suplarr;" => "\x{297b}", - "supmult;" => "\x{2ac2}", - "supnE;" => "\x{2acc}", - "supne;" => "\x{228b}", - "supplus;" => "\x{2ac0}", - "supset;" => "\x{2283}", - "supseteq;" => "\x{2287}", - "supseteqq;" => "\x{2ac6}", - "supsetneq;" => "\x{228b}", - "supsetneqq;" => "\x{2acc}", - "supsim;" => "\x{2ac8}", - "supsub;" => "\x{2ad4}", - "supsup;" => "\x{2ad6}", - "swArr;" => "\x{21d9}", - "swarhk;" => "\x{2926}", - "swarr;" => "\x{2199}", - "swarrow;" => "\x{2199}", - "swnwar;" => "\x{292a}", - "szlig" => "\337", - "szlig;" => "\337", - "target;" => "\x{2316}", - "tau;" => "\x{3c4}", - "tbrk;" => "\x{23b4}", - "tcaron;" => "\x{165}", - "tcedil;" => "\x{163}", - "tcy;" => "\x{442}", - "tdot;" => "\x{20db}", - "telrec;" => "\x{2315}", - "tfr;" => "\x{1d531}", - "there4;" => "\x{2234}", - "therefore;" => "\x{2234}", - "theta;" => "\x{3b8}", - "thetasym;" => "\x{3d1}", - "thetav;" => "\x{3d1}", - "thickapprox;" => "\x{2248}", - "thicksim;" => "\x{223c}", - "thinsp;" => "\x{2009}", - "thkap;" => "\x{2248}", - "thksim;" => "\x{223c}", - "thorn" => "\376", - "thorn;" => "\376", - "tilde;" => "\x{2dc}", - "times" => "\327", - "times;" => "\327", - "timesb;" => "\x{22a0}", - "timesbar;" => "\x{2a31}", - "timesd;" => "\x{2a30}", - "tint;" => "\x{222d}", - "toea;" => "\x{2928}", - "top;" => "\x{22a4}", - "topbot;" => "\x{2336}", - "topcir;" => "\x{2af1}", - "topf;" => "\x{1d565}", - "topfork;" => "\x{2ada}", - "tosa;" => "\x{2929}", - "tprime;" => "\x{2034}", - "trade;" => "\x{2122}", - "triangle;" => "\x{25b5}", - "triangledown;" => "\x{25bf}", - "triangleleft;" => "\x{25c3}", - "trianglelefteq;" => "\x{22b4}", - "triangleq;" => "\x{225c}", - "triangleright;" => "\x{25b9}", - "trianglerighteq;" => "\x{22b5}", - "tridot;" => "\x{25ec}", - "trie;" => "\x{225c}", - "triminus;" => "\x{2a3a}", - "triplus;" => "\x{2a39}", - "trisb;" => "\x{29cd}", - "tritime;" => "\x{2a3b}", - "trpezium;" => "\x{23e2}", - "tscr;" => "\x{1d4c9}", - "tscy;" => "\x{446}", - "tshcy;" => "\x{45b}", - "tstrok;" => "\x{167}", - "twixt;" => "\x{226c}", - "twoheadleftarrow;" => "\x{219e}", - "twoheadrightarrow;" => "\x{21a0}", - "uArr;" => "\x{21d1}", - "uHar;" => "\x{2963}", - "uacute" => "\372", - "uacute;" => "\372", - "uarr;" => "\x{2191}", - "ubrcy;" => "\x{45e}", - "ubreve;" => "\x{16d}", - "ucirc" => "\373", - "ucirc;" => "\373", - "ucy;" => "\x{443}", - "udarr;" => "\x{21c5}", - "udblac;" => "\x{171}", - "udhar;" => "\x{296e}", - "ufisht;" => "\x{297e}", - "ufr;" => "\x{1d532}", - "ugrave" => "\371", - "ugrave;" => "\371", - "uharl;" => "\x{21bf}", - "uharr;" => "\x{21be}", - "uhblk;" => "\x{2580}", - "ulcorn;" => "\x{231c}", - "ulcorner;" => "\x{231c}", - "ulcrop;" => "\x{230f}", - "ultri;" => "\x{25f8}", - "umacr;" => "\x{16b}", - "uml" => "\250", - "uml;" => "\250", - "uogon;" => "\x{173}", - "uopf;" => "\x{1d566}", - "uparrow;" => "\x{2191}", - "updownarrow;" => "\x{2195}", - "upharpoonleft;" => "\x{21bf}", - "upharpoonright;" => "\x{21be}", - "uplus;" => "\x{228e}", - "upsi;" => "\x{3c5}", - "upsih;" => "\x{3d2}", - "upsilon;" => "\x{3c5}", - "upuparrows;" => "\x{21c8}", - "urcorn;" => "\x{231d}", - "urcorner;" => "\x{231d}", - "urcrop;" => "\x{230e}", - "uring;" => "\x{16f}", - "urtri;" => "\x{25f9}", - "uscr;" => "\x{1d4ca}", - "utdot;" => "\x{22f0}", - "utilde;" => "\x{169}", - "utri;" => "\x{25b5}", - "utrif;" => "\x{25b4}", - "uuarr;" => "\x{21c8}", - "uuml" => "\374", - "uuml;" => "\374", - "uwangle;" => "\x{29a7}", - "vArr;" => "\x{21d5}", - "vBar;" => "\x{2ae8}", - "vBarv;" => "\x{2ae9}", - "vDash;" => "\x{22a8}", - "vangrt;" => "\x{299c}", - "varepsilon;" => "\x{3f5}", - "varkappa;" => "\x{3f0}", - "varnothing;" => "\x{2205}", - "varphi;" => "\x{3d5}", - "varpi;" => "\x{3d6}", - "varpropto;" => "\x{221d}", - "varr;" => "\x{2195}", - "varrho;" => "\x{3f1}", - "varsigma;" => "\x{3c2}", - "varsubsetneq;" => "\x{228a}\x{fe00}", - "varsubsetneqq;" => "\x{2acb}\x{fe00}", - "varsupsetneq;" => "\x{228b}\x{fe00}", - "varsupsetneqq;" => "\x{2acc}\x{fe00}", - "vartheta;" => "\x{3d1}", - "vartriangleleft;" => "\x{22b2}", - "vartriangleright;" => "\x{22b3}", - "vcy;" => "\x{432}", - "vdash;" => "\x{22a2}", - "vee;" => "\x{2228}", - "veebar;" => "\x{22bb}", - "veeeq;" => "\x{225a}", - "vellip;" => "\x{22ee}", - "verbar;" => "|", - "vert;" => "|", - "vfr;" => "\x{1d533}", - "vltri;" => "\x{22b2}", - "vnsub;" => "\x{2282}\x{20d2}", - "vnsup;" => "\x{2283}\x{20d2}", - "vopf;" => "\x{1d567}", - "vprop;" => "\x{221d}", - "vrtri;" => "\x{22b3}", - "vscr;" => "\x{1d4cb}", - "vsubnE;" => "\x{2acb}\x{fe00}", - "vsubne;" => "\x{228a}\x{fe00}", - "vsupnE;" => "\x{2acc}\x{fe00}", - "vsupne;" => "\x{228b}\x{fe00}", - "vzigzag;" => "\x{299a}", - "wcirc;" => "\x{175}", - "wedbar;" => "\x{2a5f}", - "wedge;" => "\x{2227}", - "wedgeq;" => "\x{2259}", - "weierp;" => "\x{2118}", - "wfr;" => "\x{1d534}", - "wopf;" => "\x{1d568}", - "wp;" => "\x{2118}", - "wr;" => "\x{2240}", - "wreath;" => "\x{2240}", - "wscr;" => "\x{1d4cc}", - "xcap;" => "\x{22c2}", - "xcirc;" => "\x{25ef}", - "xcup;" => "\x{22c3}", - "xdtri;" => "\x{25bd}", - "xfr;" => "\x{1d535}", - "xhArr;" => "\x{27fa}", - "xharr;" => "\x{27f7}", - "xi;" => "\x{3be}", - "xlArr;" => "\x{27f8}", - "xlarr;" => "\x{27f5}", - "xmap;" => "\x{27fc}", - "xnis;" => "\x{22fb}", - "xodot;" => "\x{2a00}", - "xopf;" => "\x{1d569}", - "xoplus;" => "\x{2a01}", - "xotime;" => "\x{2a02}", - "xrArr;" => "\x{27f9}", - "xrarr;" => "\x{27f6}", - "xscr;" => "\x{1d4cd}", - "xsqcup;" => "\x{2a06}", - "xuplus;" => "\x{2a04}", - "xutri;" => "\x{25b3}", - "xvee;" => "\x{22c1}", - "xwedge;" => "\x{22c0}", - "yacute" => "\375", - "yacute;" => "\375", - "yacy;" => "\x{44f}", - "ycirc;" => "\x{177}", - "ycy;" => "\x{44b}", - "yen" => "\245", - "yen;" => "\245", - "yfr;" => "\x{1d536}", - "yicy;" => "\x{457}", - "yopf;" => "\x{1d56a}", - "yscr;" => "\x{1d4ce}", - "yucy;" => "\x{44e}", - "yuml" => "\377", - "yuml;" => "\377", - "zacute;" => "\x{17a}", - "zcaron;" => "\x{17e}", - "zcy;" => "\x{437}", - "zdot;" => "\x{17c}", - "zeetrf;" => "\x{2128}", - "zeta;" => "\x{3b6}", - "zfr;" => "\x{1d537}", - "zhcy;" => "\x{436}", - "zigrarr;" => "\x{21dd}", - "zopf;" => "\x{1d56b}", - "zscr;" => "\x{1d4cf}", - "zwj;" => "\x{200d}", - "zwnj;" => "\x{200c}", - }; - -1; - -__DATA__ - -=head1 NAME - -NamedEntityList.pm - A named entity list for HTML parser - -=head1 DESCRIPTION - -The C file contains a list of named entities -used in HTML documents, both conforming and non-conforming. -It is referenced by C. - -=head1 SEE ALSO - -L. - -Web Applications 1.0 - Named character references -L. - -=head1 LICENSE - -(C) Copyright 2004-2007 Apple Computer, Inc., Mozilla Foundation, -and Opera Software ASA. - -Copyright 2007-2010 Wakaba . - -Copyright 2009-2011 Toby Inkster . - -You are granted a license to use, reproduce and create derivative works -of this document. - -=cut - diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/TagSoupParser.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/TagSoupParser.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/TagSoupParser.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/TagSoupParser.pm 2011-10-20 20:26:19.000000000 +0000 @@ -4,26 +4,29 @@ # on manakai, and towards CPAN and XML::LibXML. # http://suika.fam.cx/gate/git/wi/manakai.git/history/HEAD:/lib/Whatpm/HTML.pm -# CAUGHT UP TO f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 +# CAUGHT UP TO d81fcb920a1a3c351149cd66a64bf1b8ae14a172 (2011-08-21) use 5.008001; use strict; #use warnings; -our $VERSION='0.103'; +our $VERSION = '0.107'; + use Error qw(:try); +use IO::Handle; +use HTML::HTML5::Parser::Tokenizer; +use Scalar::Util qw(refaddr); BEGIN { *XML::LibXML::Element::appendTextFromUnicode = sub { my ($element, $parser, $text) = @_; + $text = $parser unless (defined $text or ref $parser); utf8::encode($text); return $element->appendText($text); }; } -use Scalar::Util qw(refaddr); - our $DATA; sub DATA { @@ -43,8 +46,6 @@ return $DATA->{$oaddr}; } -use HTML::HTML5::Parser::Tokenizer; - ## NOTE: This module don't check all HTML5 parse errors; character ## encoding related parse errors are expected to be handled by relevant ## modules. @@ -61,8 +62,6 @@ ## doc.write (''); ## alert (doc.compatMode); -require IO::Handle; - ## Namespace URLs sub HTML_NS () { q } @@ -760,7 +759,7 @@ $self->{char_buffer_pos} = 0; my $count = $input->manakai_read_until - ($self->{char_buffer}, qr/[^\x00\x0A\x0D\x{D800}-\x{DFFF}]/, $self->{char_buffer_pos}); + ($self->{char_buffer}, qr/[^\x0A\x0D]/, $self->{char_buffer_pos}); if ($count) { $self->{line_prev} = $self->{line}; $self->{column_prev} = $self->{column}; @@ -796,22 +795,14 @@ $self->{nc} = 0x000A; # LF # MUST $self->{line}++; $self->{column} = 0; - } elsif ($self->{nc} == 0x0000) { # NULL - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); - $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST - } elsif (0xD800 <= $self->{nc} and $self->{nc} <= 0xDFFF) { - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'surrogate'); ## XXX documentation - $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST - } + } }; $self->{read_until} = sub { #my ($scalar, $specials_range, $offset) = @_; return 0 if defined $self->{next_nc}; - my $pattern = qr/[^$_[1]\x00\x0A\x0D\x{D800}-\x{DFFF}]/; + my $pattern = qr/[^$_[1]\x0A\x0D]/; my $offset = $_[2] || 0; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -922,10 +913,6 @@ sub BODY_AFTER_IMS () { 0b100000000 } sub FRAME_IMS () { 0b1000000000 } sub SELECT_IMS () { 0b10000000000 } -#sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } # see HTML::HTML5::Parser::Tokenizer - ## NOTE: "in foreign content" insertion mode is special; it is combined - ## with the secondary insertion mode. In this parser, they are stored - ## together in the bit-or'ed form. sub IN_CDATA_RCDATA_IM () { 0b1000000000000 } ## NOTE: "in CDATA/RCDATA" insertion mode is also special; it is ## combined with the original insertion mode. In thie parser, @@ -1061,7 +1048,7 @@ # TOBYINK DATA($self->{'document'}, isHTML4 => 1) - if ($token->{pubid} =~ /html 4/i or $token->{sysid} =~ /html4/i); + if (($token->{pubid}||'') =~ /html 4/i or ($token->{sysid}||'') =~ /html4/i); if ($token->{quirks} or $doctype_name ne 'html') { @@ -1377,15 +1364,9 @@ } } - ## Step 4..14 + ## Step 4..13 my $new_mode; - if ($node->[1] & SVG_EL or $node->[1] & MML_EL) { - - $new_mode = IN_FOREIGN_CONTENT_IM | IN_BODY_IM; - } elsif ($node->[1] & FOREIGN_EL) { - - # - } elsif ($node->[1] == TABLE_CELL_EL) { + if ($node->[1] == TABLE_CELL_EL) { if ($last) { # @@ -1393,6 +1374,8 @@ $new_mode = IN_CELL_IM; } + } elsif ($node->[1] & FOREIGN_EL) { + # } else { $new_mode = { @@ -1413,7 +1396,7 @@ } $self->{insertion_mode} = $new_mode and last LOOP if defined $new_mode; - ## Step 15 + ## Step 14 if ($node->[1] == HTML_EL) { ## NOTE: Commented out in the spec (HTML5 revision 3894). #unless (defined $self->{head_element}) { @@ -1429,18 +1412,18 @@ } - ## Step 16 + ## Step 15 if ($last) { $self->{insertion_mode} = IN_BODY_IM; last LOOP; } - - ## Step 17 + + ## Step 16 $i--; $node = $self->{open_elements}->[$i]; - ## Step 18 + ## Step 17 redo LOOP; } # LOOP @@ -1536,14 +1519,77 @@ $token = $self->_get_next_token; }; # $script_start_tag + sub push_afe ($$) + { + my ($item => $afes) = @_; + my $item_token = $item->[2]; + + my $depth = 0; + OUTER: for my $i (reverse 0..$#$afes) + { + my $afe = $afes->[$i]; + if ($afe->[0] eq '#marker') + { + last OUTER; + } + else + { + my $token = $afe->[2]; + ## Both |$token| and |$item_token| should be start tag tokens. + if ($token->{tag_name} eq $item_token->{tag_name}) + { + if ((keys %{$token->{attributes}}) != + (keys %{$item_token->{attributes}})) + { + next OUTER; + } + for my $attr_name (keys %{$item_token->{attributes}}) + { + next OUTER unless $token->{attributes}->{$attr_name}; + next OUTER unless + $token->{attributes}->{$attr_name}->{value} eq + $item_token->{attributes}->{$attr_name}->{value}; + } + $depth++; + if ($depth == 3) + { + splice @$afes, $i, 1 => (); + last OUTER; + } + } + + ## We don't have to check namespaces of elements and attributes, + ## nevertheless the spec requires it, because |$afes| could + ## never contain a non-HTML element at the time of writing. In + ## addition, scripted changes would never change the original + ## start tag token. + } + } # OUTER + + push @$afes, $item; + } # push_afe + + my $formatting_end_tag = sub { my ($self, $active_formatting_elements, $open_tables, $end_tag_token) = @_; my $tag_name = $end_tag_token->{tag_name}; ## NOTE: The adoption agency algorithm (AAA). - FET: { ## Step 1 + my $outer_loop_counter = 0; + + OUTER: { + if ($outer_loop_counter >= 8) + { + $token = $self->_get_next_token; + last OUTER; + } + + ## Step 3 + $outer_loop_counter++; + + ## Step 4 my $formatting_element; my $formatting_element_i_in_active; AFE: for (reverse 0..$#$active_formatting_elements) { @@ -1606,7 +1652,7 @@ token => $end_tag_token); } - ## Step 2 + ## Step 5 my $furthest_block; my $furthest_block_i_in_open; OE: for (reverse 0..$#{$self->{open_elements}}) { @@ -1621,7 +1667,7 @@ } } # OE - ## Step 3 + ## Step 6 unless (defined $furthest_block) { # MUST splice @{$self->{open_elements}}, $formatting_element_i_in_open; @@ -1630,24 +1676,37 @@ return; } - ## Step 4 + ## Step 7 my $common_ancestor_node = $self->{open_elements}->[$formatting_element_i_in_open - 1]; - ## Step 5 + ## Step 8 my $bookmark_prev_el = $active_formatting_elements->[$formatting_element_i_in_active - 1] ->[0]; - ## Step 6 + ## Step 9 my $node = $furthest_block; my $node_i_in_open = $furthest_block_i_in_open; my $last_node = $furthest_block; - S7: { - ## Step 6.1 + + ## Step 9.1 + my $inner_loop_counter = 0; + + INNER: { + ## Step 9.2 + if ($inner_loop_counter >= 3) { + $token = $self->_get_next_token; + last OUTER; + } + + ## Step 9.3 + $inner_loop_counter++; + + ## Step 9.4 $node_i_in_open--; $node = $self->{open_elements}->[$node_i_in_open]; - ## Step 6.2 + ## Step 9.5 my $node_i_in_active; my $node_token; S7S2: { @@ -1660,13 +1719,13 @@ } } splice @{$self->{open_elements}}, $node_i_in_open, 1; - redo S7; + redo INNER; } # S7S2 - ## Step 6.3 - last S7 if $node->[0] eq $formatting_element->[0]; + ## Step 9.6 + last INNER if $node->[0] eq $formatting_element->[0]; - ## Step 6.4 + ## Step 9.7 if ($node->[0]->hasChildNodes ()) { my $new_element = []; @@ -1694,23 +1753,23 @@ $node = $new_element; } - ## Step 6.5 + ## Step 9.8 if ($last_node->[0] eq $furthest_block->[0]) { $bookmark_prev_el = $node->[0]; } - ## Step 6.6 + ## Step 9.9 $node->[0]->appendChild ($last_node->[0]); - ## Step 6.7 + ## Step 9.10 $last_node = $node; - ## Step 6.8 - redo S7; - } # S7 + ## Step 9.11 + redo INNER; + } # INNER - ## Step 7 + ## Step 10 if ($common_ancestor_node->[1] & TABLE_ROWS_EL) { ## Foster parenting. my $foster_parent_element; @@ -1734,7 +1793,7 @@ $common_ancestor_node->[0]->appendChild ($last_node->[0]); } - ## Step 8 + ## Step 11 my $new_element = []; $new_element->[0] = $self->{document}->createElementNS((HTML_NS), $formatting_element->[2]->{tag_name}); @@ -1756,14 +1815,14 @@ $new_element->[1] = $formatting_element->[1]; $new_element->[2] = $formatting_element->[2]; - ## Step 9 + ## Step 12 my @cn = $furthest_block->[0]->childNodes; $new_element->[0]->appendChild($_) for @cn; - ## Step 10 + ## Step 13 $furthest_block->[0]->appendChild ($new_element->[0]); - ## Step 11 + ## Step 14 my $i; AFE: for (reverse 0..$#$active_formatting_elements) { if ($active_formatting_elements->[$_]->[0] eq $formatting_element->[0]) { @@ -1777,7 +1836,7 @@ } # AFE splice @$active_formatting_elements, $i + 1, 0, $new_element; - ## Step 12 + ## Step 15 undef $i; OE: for (reverse 0..$#{$self->{open_elements}}) { if ($self->{open_elements}->[$_]->[0] eq $formatting_element->[0]) { @@ -1791,9 +1850,9 @@ } # OE splice @{$self->{open_elements}}, $i + 1, 0, $new_element; - ## Step 13 - redo FET; - } # FET + ## Step 16 + redo OUTER; + } # OUTER }; # $formatting_end_tag my $reconstruct_active_formatting_elements = sub ($$$$) { # MUST @@ -1853,7 +1912,7 @@ S7: { ## Step 8 - my $clone = [$entry->[0]->cloneNode (0), $entry->[1]]; + my $clone = [$entry->[0]->cloneNode(0), $entry->[1], $entry->[2]]; ## Step 9 $insert->($self, $clone->[0], $open_tables); @@ -1963,314 +2022,89 @@ ## calling |manakai_append_text| method. B: while (1) { - - ## The "in table text" insertion mode. - if ($self->{insertion_mode} & TABLE_IMS and - not $self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and - not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) { - C: { - my $s; - if ($token->{type} == CHARACTER_TOKEN) { - - $self->{pending_chars} ||= []; - push @{$self->{pending_chars}}, $token; - $token = $self->_get_next_token; - next B; - } else { - if ($self->{pending_chars}) { - $s = join '', map { $_->{data} } @{$self->{pending_chars}}; - delete $self->{pending_chars}; - if ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) { - - # - } else { - - $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self,$s); - last C; - } - } else { - - last C; - } - } - - ## "in table" insertion mode, "Anything else". - - ## Foster parenting. - $self->{parse_error}->(level => $self->{level}->{must}, type => 'in table:#text', token => $token); - - ## NOTE: As if in body, but insert into the foster parent element. - $reconstruct_active_formatting_elements - ->($self, $insert_to_foster, $active_formatting_elements, $open_tables); - - if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { - # MUST - my $foster_parent_element; - my $next_sibling; - OE: for (reverse 0..$#{$self->{open_elements}}) { - if ($self->{open_elements}->[$_]->[1] == TABLE_EL) { - - $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; - $next_sibling = $self->{open_elements}->[$_]->[0]; - undef $next_sibling - unless $next_sibling->parentNode eq $foster_parent_element; - last OE; - } - } # OE - $foster_parent_element ||= $self->{open_elements}->[0]->[0]; - - - $foster_parent_element->insertBefore - ($self->{document}->createTextNode ($s), $next_sibling); - - $open_tables->[-1]->[1] = 1; # tainted - $open_tables->[-1]->[2] = 1; # ~node inserted - } else { - ## NOTE: Fragment case or in a foster parent'ed element - ## (e.g. |a|). In fragment case, whether the - ## character is appended to existing node or a new node is - ## created is irrelevant, since the foster parent'ed nodes - ## are discarded and fragment parsing does not invoke any - ## script. - - $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $s); - } - } # C - } # TABLE_IMS - - if ($token->{type} == DOCTYPE_TOKEN) { - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'in html:#DOCTYPE', token => $token); - ## Ignore the token - ## Stay in the phase + if ($token->{n}++ == 100) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'parser impl error', # XXXtest + token => $token); + require Data::Dumper; + warn "====== HTML Parser Error ======\n"; + warn join (' ', map { $_->[0]->tagName } @{$self->{open_elements}}) . ' #' . $self->{insertion_mode} . "\n"; + warn Data::Dumper::Dumper ($token); $token = $self->_get_next_token; next B; - } elsif ($token->{type} == START_TAG_TOKEN and - $token->{tag_name} eq 'html') { - if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'after html', text => 'html', token => $token); - $self->{insertion_mode} = AFTER_BODY_IM; - } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'after html', text => 'html', token => $token); - $self->{insertion_mode} = AFTER_FRAMESET_IM; - } else { - - } - - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'not first start tag', token => $token); - my $top_el = $self->{open_elements}->[0]->[0]; - for my $attr_name (keys %{$token->{attributes}}) { - eval { - unless ($top_el->hasAttributeNS (undef, $attr_name)) { - - $top_el->setAttributeNS - (undef, $attr_name, $token->{attributes}->{$attr_name}->{value}); - } - }; - } + } + ## + if ( + (not @{$self->{open_elements}}) or + (not $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or ## HTML element + ($self->{open_elements}->[-1]->[1] == MML_TEXT_INTEGRATION_EL and + (($token->{type} == START_TAG_TOKEN and + $token->{tag_name} ne 'mglyph' and + $token->{tag_name} ne 'malignmark') or + $token->{type} == CHARACTER_TOKEN)) or + ($self->{open_elements}->[-1]->[1] & MML_AXML_EL and + $token->{type} == START_TAG_TOKEN and + $token->{tag_name} eq 'svg') or + ( ## If the current node is an HTML integration point (other + ## than |annotation-xml|). + $self->{open_elements}->[-1]->[1] == SVG_INTEGRATION_EL and + ($token->{type} == START_TAG_TOKEN or + $token->{type} == CHARACTER_TOKEN)) or + ( ## If the current node is an |annotation-xml| whose |encoding| + ## is |text/html| or |application/xhtml+xml| (HTML integration + ## point). + $self->{open_elements}->[-1]->[1] == MML_AXML_EL and + ($token->{type} == START_TAG_TOKEN or + $token->{type} == CHARACTER_TOKEN) and + do { + my $encoding = $self->{open_elements}->[-1]->[0]->get_attribute_ns (undef, 'encoding') || ''; + $encoding =~ tr/A-Z/a-z/; ## ASCII case-insensitive. + if ($encoding eq 'text/html' or + $encoding eq 'application/xhtml+xml') { + 1; + } else { + 0; + } + }) or + ($token->{type} == END_OF_FILE_TOKEN)) { - $token = $self->_get_next_token; - next B; - } elsif ($token->{type} == COMMENT_TOKEN) { - my $comment = $self->{document}->createComment ($token->{data}); - if ($self->{insertion_mode} & AFTER_HTML_IMS) { - - $self->{document}->appendChild ($comment); - } elsif ($self->{insertion_mode} == AFTER_BODY_IM) { - - $self->{open_elements}->[0]->[0]->appendChild ($comment); - } else { - - $self->{open_elements}->[-1]->[0]->appendChild ($comment); - $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted - } - $token = $self->_get_next_token; - next B; - } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) { + ## Use the rules for the current insertion mode in HTML content. + # + } else { + ## Use the rules for the foreign content. if ($token->{type} == CHARACTER_TOKEN) { - $token->{data} =~ s/^\x0A// if $self->{ignore_newline}; - delete $self->{ignore_newline}; - - if (length $token->{data}) { - - $self->{open_elements}->[-1]->[0]->appendText #TODO - check - ($token->{data}); - } else { - + ## "In foreign content", character tokens. + my $data = $token->{data}; + while ($data =~ s/\x00/\x{FFFD}/) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL', token => $token); + } + $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $data); + if ($data =~ /[^\x09\x0A\x0C\x0D\x20]/) { + delete $self->{frameset_ok}; } - $token = $self->_get_next_token; + + $token = $self->_get_next_token; next B; - } elsif ($token->{type} == END_TAG_TOKEN) { - delete $self->{ignore_newline}; - - if ($token->{tag_name} eq 'script') { - - - ## Para 1-2 - my $script = pop @{$self->{open_elements}}; - - ## Para 3 - $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; - - ## Para 4 - ## TODO: $old_insertion_point = $current_insertion_point; - ## TODO: $current_insertion_point = just before $self->{nc}; - - ## Para 5 - ## TODO: Run the $script->[0]. - - ## Para 6 - ## TODO: $current_insertion_point = $old_insertion_point; - - ## Para 7 - ## TODO: if ($pending_external_script) { - ## TODO: ... - ## TODO: } - - $token = $self->_get_next_token; - next B; - } else { - - - pop @{$self->{open_elements}}; - - $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; - $token = $self->_get_next_token; - next B; - - } - } elsif ($token->{type} == END_OF_FILE_TOKEN) { - delete $self->{ignore_newline}; - - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed', - text => $self->{open_elements}->[-1]->[0] - ->tagName, - token => $token); - - #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) { - # ## TODO: Mark as "already executed" - #} - - pop @{$self->{open_elements}}; - - $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; - ## Reprocess. - next B; - } else { - die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type"; - } - - } elsif ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { - if ($token->{type} == CHARACTER_TOKEN) { - ## "In foreign content" insertion mode, character token. - - if ( - ( ## If the current node is an HTML element. - not $self->{open_elements}->[-1]->[1] & FOREIGN_EL - ) or - ( ## If the current node is an HTML integration point (other - ## than |annotation-xml|). - $self->{open_elements}->[-1]->[1] == SVG_INTEGRATION_EL - ) or - ( ## If the current node is an |annotation-xml| whose - ## |encoding| is |text/html| or |application/xhtml+xml| - ## (HTML integration point). - $self->{open_elements}->[-1]->[1] == MML_AXML_EL and - do { - my $encoding = $self->{open_elements}->[-1]->[0]->getAttributeNS(undef, 'encoding') || ''; - $encoding =~ tr/A-Z/a-z/; ## ASCII case-insensitive. - if ($encoding eq 'text/html' or - $encoding eq 'application/xhtml+xml') { - 1; - } else { - 0; - } - } - ) - ) { - ## I.e., if the current node is an HTML element, or if the - ## current node is an HTML integration point. - - ## Process the token "using the rules for" the "in body" - ## insertion mode, then goto |continue|. - # ... - } else { - $self->{open_elements}->[-1]->[0]->manakai_append_text ($token->{data}); - - if ($token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) { - delete $self->{frameset_ok}; - } - - $token = $self->_get_next_token; - next B; - } - } elsif ($token->{type} == START_TAG_TOKEN) { - ## "In foreign content" insertion mode, start tag token. + ## "In foreign content", start tag token. if ( - ( ## Start tag, if the current node is an HTML element. - not $self->{open_elements}->[-1]->[1] & FOREIGN_EL - ) or - ( ## Non-"mglyph" non-"malignmark" start tag, if the current - ## node is a MathML text integration point; Start tag, if - ## the current node is an HTML integration point (other - ## than |annotation-xml|). - $self->{open_elements}->[-1]->[1] & FOREIGN_FLOW_CONTENT_EL and - $self->{open_elements}->[-1]->[1] != MML_AXML_EL and - ( - $self->{open_elements}->[-1]->[1] & SVG_EL or - not {mglyph => 1, malignmark => 1}->{$token->{tag_name}} - ) - ) or - ( ## "svg" start tag, if the current node is an - ## |annotation-xml| element; Start tag, if the current - ## node is an |annotation-xml| whose |encoding| is - ## |text/html| or |application/xhtml+xml| (HTML - ## integration point). - $self->{open_elements}->[-1]->[1] == MML_AXML_EL and - ( - $token->{tag_name} eq 'svg' or - do { - my $encoding = $self->{open_elements}->[-1]->[0]->get_attribute_ns (undef, 'encoding') || ''; - $encoding =~ tr/A-Z/a-z/; ## ASCII case-insensitive. - if ($encoding eq 'text/html' or - $encoding eq 'application/xhtml+xml') { - 1; - } else { - 0; - } - } - ) - ) - ) { - - ## Process the token "using the rules for" the "in body" - ## insertion mode, then goto |continue|. - # ... - - } elsif ({ - b => 1, big => 1, blockquote => 1, body => 1, br => 1, - center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1, - em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1, - h4 => 1, h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, - img => 1, li => 1, listing => 1, menu => 1, meta => 1, - nobr => 1, ol => 1, p => 1, pre => 1, ruby => 1, s => 1, - small => 1, span => 1, strong => 1, strike => 1, sub => 1, - sup => 1, table => 1, tt => 1, u => 1, ul => 1, var => 1, - }->{$token->{tag_name}} or - ($token->{tag_name} eq 'font' and - ($token->{attributes}->{color} or - $token->{attributes}->{face} or - $token->{attributes}->{size}))) { - ## "In foreign content" insertion mode, HTML-only start - ## tags. - + { + b => 1, big => 1, blockquote => 1, body => 1, br => 1, + center => 1, code => 1, dd => 1, div => 1, dl => 1, dt => 1, + em => 1, embed => 1, h1 => 1, h2 => 1, h3 => 1, h4 => 1, + h5 => 1, h6 => 1, head => 1, hr => 1, i => 1, img => 1, li => 1, + listing => 1, menu => 1, meta => 1, nobr => 1, ol => 1, + p => 1, pre => 1, ruby => 1, s => 1, small => 1, span => 1, + strong => 1, strike => 1, sub => 1, sup => 1, table => 1, + tt => 1, u => 1, ul => 1, var => 1, + }->{$token->{tag_name}} or + ($token->{tag_name} eq 'font' and + ($token->{attributes}->{color} or + $token->{attributes}->{face} or + $token->{attributes}->{size})) + ) { + ## "In foreign content", HTML-only start tag. $self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed', text => $self->{open_elements}->[-1]->[0] ->localname, @@ -2303,11 +2137,11 @@ redo V; } - ## Reprocess. - next B; # goto |continue| + ## Reprocess the token. + next B; } else { - ## "In foreign content" insertion mode, foreign start tags. + ## "In foreign content", foreign start tag. my $nsuri = $self->{open_elements}->[-1]->[0]->namespaceURI; my $tag_name = $token->{tag_name}; if ($nsuri eq (SVG_NS)) { @@ -2433,10 +2267,12 @@ } } elsif ($token->{type} == END_TAG_TOKEN) { + ## "In foreign content", end tag. + if ($token->{tag_name} eq 'script' and $self->{open_elements}->[-1]->[1] == SVG_SCRIPT_EL) { - ## "In foreign content" insertion mode, "script" end tag, if - ## the current node is an SVG |script| element. + ## "In foreign content", "script" end tag, if the current + ## node is an SVG |script| element. pop @{$self->{open_elements}}; @@ -2444,17 +2280,8 @@ $token = $self->_get_next_token; next B; - } elsif (not $self->{open_elements}->[-1]->[1] & FOREIGN_EL) { - ## "In foreign content" insertion mode, an end tag, if the - ## current node is an HTML element. - - ## Process the token "using the rules for" the "in body" - ## insertion mode, then goto |continue|. - # ... - } else { - ## "In foreign content" insertion mode, an end tag, if the - ## current node is a foreign element. + ## "In foreign content", end tag. ## 1. my $i = -1; @@ -2475,6 +2302,8 @@ $tag_name =~ tr/A-Z/a-z/; ## ASCII case-insensitive. if ($tag_name eq $token->{tag_name}) { splice @{$self->{open_elements}}, $i, -$i, (); + $token = $self->_get_next_token; + next B; } ## 4. @@ -2487,29 +2316,234 @@ } } # LOOP - ## Steps 6. and 7. is done in the |continue| block. + ## Step 6 (Use the current insertion mode in HTML content) + # + } + + } elsif ($token->{type} == COMMENT_TOKEN) { + ## "In foreign content", comment token. + my $comment = $self->{document}->createComment ($token->{data}); + $self->{open_elements}->[-1]->[0]->appendChild ($comment); + $token = $self->_get_next_token; + next B; + } elsif ($token->{type} == DOCTYPE_TOKEN) { + + ## "In foreign content", DOCTYPE token. + $self->{parse_error}->(level => $self->{level}->{must}, type => 'in html:#DOCTYPE', token => $token); + ## Ignore the token. + $token = $self->_get_next_token; + next B; + } else { + die "$0: $token->{type}: Unknown token type"; + } + } # foreign + + ## The "in table text" insertion mode. + if ($self->{insertion_mode} & TABLE_IMS and + not $self->{insertion_mode} & IN_CDATA_RCDATA_IM) { + C: { + my $s; + if ($token->{type} == CHARACTER_TOKEN) { + + $self->{pending_chars} ||= []; + push @{$self->{pending_chars}}, $token; $token = $self->_get_next_token; next B; + } else { + ## There is an "insert pending chars" code clone. + if ($self->{pending_chars}) { + $s = join '', map { $_->{data} } @{$self->{pending_chars}}; + delete $self->{pending_chars}; + while ($s =~ s/\x00//) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL', token => $token); + } + if ($s eq '') { + last C; + } elsif ($s =~ /[^\x09\x0A\x0C\x0D\x20]/) { + # + } else { + + $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $s); + last C; + } + } else { + + last C; + } + } + + ## "in table" insertion mode, "Anything else". + + ## Foster parenting. + $self->{parse_error}->(level => $self->{level}->{must}, type => 'in table:#text', token => $token); + + ## NOTE: As if in body, but insert into the foster parent element. + $reconstruct_active_formatting_elements + ->($self, $insert_to_foster, $active_formatting_elements, + $open_tables); + + if ($self->{open_elements}->[-1]->[1] & TABLE_ROWS_EL) { + # MUST + my $foster_parent_element; + my $next_sibling; + OE: for (reverse 0..$#{$self->{open_elements}}) { + if ($self->{open_elements}->[$_]->[1] == TABLE_EL) { + + $foster_parent_element = $self->{open_elements}->[$_ - 1]->[0]; + $next_sibling = $self->{open_elements}->[$_]->[0]; + undef $next_sibling + unless $next_sibling->parentNode eq $foster_parent_element; + last OE; + } + } # OE + $foster_parent_element ||= $self->{open_elements}->[0]->[0]; + + $foster_parent_element->insertBefore + ($self->{document}->createTextNode ($s), $next_sibling); + + $open_tables->[-1]->[1] = 1; # tainted + $open_tables->[-1]->[2] = 1; # ~node inserted + } else { + ## NOTE: Fragment case or in a foster parent'ed element + ## (e.g. |
a|). In fragment case, whether the + ## character is appended to existing node or a new node is + ## created is irrelevant, since the foster parent'ed nodes + ## are discarded and fragment parsing does not invoke any + ## script. + + $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $s); + } + } # C + } # TABLE_IMS + + if ($token->{type} == DOCTYPE_TOKEN) { + + $self->{parse_error}->(level => $self->{level}->{must}, type => 'in html:#DOCTYPE', token => $token); + ## Ignore the token + ## Stay in the phase + $token = $self->_get_next_token; + next B; + } elsif ($token->{type} == START_TAG_TOKEN and + $token->{tag_name} eq 'html') { + if ($self->{insertion_mode} == AFTER_HTML_BODY_IM) { + + $self->{parse_error}->(level => $self->{level}->{must}, type => 'after html', text => 'html', token => $token); + $self->{insertion_mode} = AFTER_BODY_IM; + } elsif ($self->{insertion_mode} == AFTER_HTML_FRAMESET_IM) { + + $self->{parse_error}->(level => $self->{level}->{must}, type => 'after html', text => 'html', token => $token); + $self->{insertion_mode} = AFTER_FRAMESET_IM; + } else { + + } + + + $self->{parse_error}->(level => $self->{level}->{must}, type => 'not first start tag', token => $token); + my $top_el = $self->{open_elements}->[0]->[0]; + for my $attr_name (keys %{$token->{attributes}}) { + unless ($top_el->hasAttributeNS(undef, $attr_name)) { + $top_el->set_attribute_ns + (undef, [undef, $attr_name], + $token->{attributes}->{$attr_name}->{value}); } + } + + $token = $self->_get_next_token; + next B; + } elsif ($token->{type} == COMMENT_TOKEN) { + my $comment = $self->{document}->createComment ($token->{data}); + if ($self->{insertion_mode} & AFTER_HTML_IMS) { + $self->{document}->appendChild ($comment); + } elsif ($self->{insertion_mode} == AFTER_BODY_IM) { + + $self->{open_elements}->[0]->[0]->appendChild($comment); + } else { + $self->{open_elements}->[-1]->[0]->appendChild($comment); + $open_tables->[-1]->[2] = 0 if @$open_tables; # ~node inserted + } + $token = $self->_get_next_token; + next B; + } elsif ($self->{insertion_mode} & IN_CDATA_RCDATA_IM) { + if ($token->{type} == CHARACTER_TOKEN) { + $token->{data} =~ s/^\x0A// if $self->{ignore_newline}; + delete $self->{ignore_newline}; + + if (length $token->{data}) { + + ## NOTE: NULLs are replaced into U+FFFDs in tokenizer. + $self->{open_elements}->[-1]->[0]->appendTextFromUnicode + ($self, $token->{data}); + } else { + + } + $token = $self->_get_next_token; + next B; + } elsif ($token->{type} == END_TAG_TOKEN) { + delete $self->{ignore_newline}; + + if ($token->{tag_name} eq 'script') { + + + ## Para 1-2 + my $script = pop @{$self->{open_elements}}; + + ## Para 3 + $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; + + ## Para 4 + ## TODO: $old_insertion_point = $current_insertion_point; + ## TODO: $current_insertion_point = just before $self->{nc}; + + ## Para 5 + ## TODO: Run the $script->[0]. + + ## Para 6 + ## TODO: $current_insertion_point = $old_insertion_point; + + ## Para 7 + ## TODO: if ($pending_external_script) { + ## TODO: ... + ## TODO: } + + $token = $self->_get_next_token; + next B; + } else { + + + pop @{$self->{open_elements}}; + + $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; + $token = $self->_get_next_token; + next B; + } } elsif ($token->{type} == END_OF_FILE_TOKEN) { - ## "In foreign content" insertion mode, an end-of-file token. + delete $self->{ignore_newline}; - ## Process the token "using the rules for" the "in body" - ## insertion mode, then goto |continue|. - #... + $self->{parse_error}->(level => $self->{level}->{must}, type => 'not closed', + text => $self->{open_elements}->[-1]->[0] + ->manakai_local_name, + token => $token); + + #if ($self->{open_elements}->[-1]->[1] == SCRIPT_EL) { + # ## TODO: Mark as "already executed" + #} + + pop @{$self->{open_elements}}; + + $self->{insertion_mode} &= ~ IN_CDATA_RCDATA_IM; + ## Reprocess. + next B; } else { - die "$0: $token->{type}: Unknown token type"; + die "$0: $token->{type}: In CDATA/RCDATA: Unknown token type"; } - - # ... } # insertion_mode # BEGIN:TOBYINK if ($self->{insertion_mode} == IN_HEAD_IM and - $token->{tag_name} eq 'object' and + ($token->{tag_name}||'') eq 'object' and $token->{type} == END_TAG_TOKEN and DATA($self->{'document'}, 'isHTML4')) { @@ -3367,16 +3401,24 @@ } elsif ($self->{insertion_mode} & BODY_IMS) { if ($token->{type} == CHARACTER_TOKEN) { ## "In body" insertion mode, character token. It is also used - ## for character tokens in "in foreign content" insertion + ## for character tokens "in foreign content" insertion ## mode, for certain cases. - + + while ($token->{data} =~ s/\x00//g) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL', token => $token); + } + if ($token->{data} eq '') { + $token = $self->_get_next_token; + next B; + } + $reconstruct_active_formatting_elements ->($self, $insert_to_current, $active_formatting_elements, $open_tables); $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $token->{data}); if ($self->{frameset_ok} and - $token->{data} =~ /[^\x09\x0A\x0C\x0D\x20\x{FFFD}]/) { + $token->{data} =~ /[^\x09\x0A\x0C\x0D\x20]/) { delete $self->{frameset_ok}; } @@ -4650,7 +4692,12 @@ } elsif ($self->{insertion_mode} & SELECT_IMS) { if ($token->{type} == CHARACTER_TOKEN) { - $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $token->{data}); + my $data = $token->{data}; + while ($data =~ s/\x00//) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL', token => $token); + } + $self->{open_elements}->[-1]->[0]->appendTextFromUnicode($self, $data) + if $data ne ''; $token = $self->_get_next_token; next B; } elsif ($token->{type} == START_TAG_TOKEN) { @@ -6293,8 +6340,6 @@ text => $self->{open_elements}->[-1]->[0] ->tagName, token => $token); - pop @{$self->{open_elements}} - while not $self->{open_elements}->[-1]->[1] == RUBY_EL; } last INSCOPE; } elsif ($node->[1] & SCOPING_EL) { @@ -6302,8 +6347,6 @@ last INSCOPE; } } # INSCOPE - - ## TODO: is not allowed. { @@ -6413,7 +6456,6 @@ delete $self->{self_closing}; } else { - $self->{insertion_mode} = IN_FOREIGN_CONTENT_IM | IN_BODY_IM; } $token = $self->_get_next_token; @@ -6432,7 +6474,8 @@ $token = $self->_get_next_token; next B; } elsif ($token->{tag_name} eq 'param' or - $token->{tag_name} eq 'source') { + $token->{tag_name} eq 'source' or + $token->{tag_name} eq 'track') { { my $el; @@ -6527,7 +6570,21 @@ } elsif ($token->{tag_name} eq 'input') { ## TODO: associate with $self->{form_element} if defined + pop @{$self->{open_elements}}; + + if ($token->{attributes}->{type}) { + my $type = $token->{attributes}->{type}->{value}; + $type =~ tr/A-Z/a-z/; ## ASCII case-insensitive. + if ($type eq 'hidden') { + # + } else { + delete $self->{frameset_ok}; + } + } else { + delete $self->{frameset_ok}; + } + delete $self->{self_closing}; } elsif ({ area => 1, br => 1, embed => 1, img => 1, wbr => 1, keygen => 1, @@ -6968,10 +7025,6 @@ } } next B; - } continue { # B - if ($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM) { - $self->_reset_insertion_mode; - } } # B ## Stop parsing # MUST @@ -7089,10 +7142,6 @@ $p->{line}++; $p->{column} = 0; - } elsif ($self->{nc} == 0x0000) { # NULL - - $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); - $self->{nc} = 0xFFFD; # REPLACEMENT CHARACTER # MUST } elsif (0xD800 <= $self->{nc} and $self->{nc} <= 0xDFFF) { $self->{parse_error}->(level => $self->{level}->{must}, type => 'surrogate'); ## XXX documentation diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Tokenizer.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Tokenizer.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser/Tokenizer.pm 2011-02-09 14:19:10.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser/Tokenizer.pm 2011-10-20 20:26:19.000000000 +0000 @@ -1,6 +1,6 @@ package HTML::HTML5::Parser::Tokenizer; # -*- Perl -*- use strict; -our $VERSION='0.103'; +our $VERSION='0.107'; ## This module implements the tokenization phase of both HTML5 and ## XML5. Notes like this are usually based on the latest HTML @@ -83,6 +83,8 @@ BEGIN { HTML::HTML5::Parser::Tokenizer->import (':token') } +use HTML::HTML5::Entities qw[%entity2char]; + ## ------ Tokenizer states ------ sub DATA_STATE () { 0 } @@ -228,7 +230,6 @@ ## of tokenization state constants. See Whatpm::HTML for the full ## list and the descriptions for constants. -sub IN_FOREIGN_CONTENT_IM () { 0b100000000000 } sub FOREIGN_EL () { 0b1_00000000000 } ## ------ Character reference mappings ------ @@ -394,10 +395,15 @@ emit => END_OF_FILE_TOKEN, reconsume => 1, }; +$Action->[DATA_STATE]->[0x0000] = { + name => 'data null', + emit => CHARACTER_TOKEN, + error => 'NULL', +}; $Action->[DATA_STATE]->[KEY_ELSE_CHAR] = { name => 'data else', emit => CHARACTER_TOKEN, - emit_data_read_until => q{<&}, + emit_data_read_until => qq{\x00<&}, }; $XMLAction->[DATA_STATE]->[0x005D] = { # ] name => 'data ]', @@ -407,7 +413,7 @@ $XMLAction->[DATA_STATE]->[KEY_ELSE_CHAR] = { name => 'data else xml', emit => CHARACTER_TOKEN, - emit_data_read_until => q{<&\]}, + emit_data_read_until => qq{\x00<&\]}, }; $Action->[RCDATA_STATE]->[0x0026] = { name => 'rcdata &', @@ -419,32 +425,41 @@ state => RCDATA_LT_STATE, }; $Action->[RCDATA_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR]; +$Action->[RCDATA_STATE]->[0x0000] = { + name => 'rcdata null', + emit => CHARACTER_TOKEN, + emit_data => "\x{FFFD}", + error => 'NULL', +}; $Action->[RCDATA_STATE]->[KEY_ELSE_CHAR] = { name => 'rcdata else', emit => CHARACTER_TOKEN, - emit_data_read_until => q{<&}, + emit_data_read_until => qq{\x00<&}, }; $Action->[RAWTEXT_STATE]->[0x003C] = { name => 'rawtext <', state => RAWTEXT_LT_STATE, }; $Action->[RAWTEXT_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR]; +$Action->[RAWTEXT_STATE]->[0x0000] = $Action->[RCDATA_STATE]->[0x0000]; $Action->[RAWTEXT_STATE]->[KEY_ELSE_CHAR] = { name => 'rawtext else', emit => CHARACTER_TOKEN, - emit_data_read_until => q{<}, + emit_data_read_until => qq{\x00<}, }; $Action->[SCRIPT_DATA_STATE]->[0x003C] = { name => 'script data <', state => SCRIPT_DATA_LT_STATE, }; $Action->[SCRIPT_DATA_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR]; +$Action->[SCRIPT_DATA_STATE]->[0x0000] = $Action->[RAWTEXT_STATE]->[0x0000]; $Action->[SCRIPT_DATA_STATE]->[KEY_ELSE_CHAR] = $Action->[RAWTEXT_STATE]->[KEY_ELSE_CHAR]; $Action->[PLAINTEXT_STATE]->[KEY_EOF_CHAR] = $Action->[DATA_STATE]->[KEY_EOF_CHAR]; +$Action->[PLAINTEXT_STATE]->[0x0000] = $Action->[RAWTEXT_STATE]->[0x0000]; $Action->[PLAINTEXT_STATE]->[KEY_ELSE_CHAR] = { name => 'plaintext else', emit => CHARACTER_TOKEN, - emit_data_read_until => q{}, + emit_data_read_until => qq{\x00}, }; # "Tag open state" is known as "tag state" in XML5. $Action->[TAG_OPEN_STATE]->[0x0021] = { @@ -508,6 +523,16 @@ emit_delta => 1, }; $Action->[TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = $Action->[TAG_OPEN_STATE]->[0x003E]; + $XMLAction->[TAG_OPEN_STATE]->[0x0000] = { + name => 'tag open null xml', + ct => { + type => START_TAG_TOKEN, + delta => 1, + append_tag_name => 0xFFFD, + }, + error => 'NULL', + state => TAG_NAME_STATE, + }; ## XML5: "<:" has a parse error. $XMLAction->[TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = { name => 'tag open else xml', @@ -628,7 +653,7 @@ ## not XML5. ## NOTE: A short end tag token. - + $XMLAction->[CLOSE_TAG_OPEN_STATE]->[0x003E] = { name => 'end tag open > xml', error => 'empty end tag', @@ -666,6 +691,16 @@ ## the |data| of the comment token generated from the bogus end tag, ## as defined in the "bogus comment state" entry. }; + $XMLAction->[CLOSE_TAG_OPEN_STATE]->[0x0000] = { + name => 'end tag open null xml', + ct => { + type => END_TAG_TOKEN, + delta => 2, + append_tag_name => 0xFFFD, + }, + error => 'NULL', + state => TAG_NAME_STATE, ## XML5: "end tag name state". + }; ## XML5: "[CLOSE_TAG_OPEN_STATE]->[KEY_ELSE_CHAR] = { name => 'end tag open else xml', @@ -711,6 +746,13 @@ name => 'tag name /', state => SELF_CLOSING_START_TAG_STATE, }; +$Action->[TAG_NAME_STATE]->[0x0000] = { + name => 'tag name null', + ct => { + append_tag_name => 0xFFFD, + }, + error => 'NULL', +}; $Action->[TAG_NAME_STATE]->[KEY_ELSE_CHAR] = { name => 'tag name else', ct => { @@ -817,6 +859,18 @@ state => DATA_STATE, reconsume => 1, }; +$Action->[SCRIPT_DATA_ESCAPED_STATE]->[0x0000] = +$Action->[SCRIPT_DATA_ESCAPED_DASH_STATE]->[0x0000] = +$Action->[SCRIPT_DATA_ESCAPED_DASH_DASH_STATE]->[0x0000] = +$Action->[SCRIPT_DATA_DOUBLE_ESCAPED_STATE]->[0x0000] = +$Action->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_STATE]->[0x0000] = +$Action->[SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH_STATE]->[0x0000] = { + name => 'script data escaped null', + emit => CHARACTER_TOKEN, + emit_data => "\x{FFFD}", + error => 'NULL', + state => SCRIPT_DATA_ESCAPED_STATE, +}; $Action->[SCRIPT_DATA_ESCAPED_STATE]->[KEY_ELSE_CHAR] = { name => 'script data escaped else', emit => CHARACTER_TOKEN, @@ -961,6 +1015,12 @@ ca => {set_name => 0x0000}, state => ATTRIBUTE_NAME_STATE, }; +$Action->[BEFORE_ATTRIBUTE_NAME_STATE]->[0x0000] = { + name => 'before attr name null', + ca => {set_name => 0xFFFD}, + error => 'NULL', + state => ATTRIBUTE_NAME_STATE, +}; ## XML5: ":" raises a parse error and is ignored. $Action->[BEFORE_ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = { name => 'before attr name else', @@ -1025,6 +1085,11 @@ error => 'bad attribute name', ## XML5: Not a parse error. ca => {name => 0x0000}, }; +$Action->[ATTRIBUTE_NAME_STATE]->[0x0000] = { + name => 'attr name null', + ca => {name => 0xFFFD}, + error => 'NULL', +}; $Action->[ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = { name => 'attr name else', ca => {name => 0x0000}, @@ -1082,6 +1147,13 @@ ca => {set_name => 0x0000}, state => ATTRIBUTE_NAME_STATE, }; +$Action->[AFTER_ATTRIBUTE_NAME_STATE]->[0x0000] = { + name => q[after attr name else], + ca => {set_name => 0xFFFD}, + error => 'NULL', + #error2(xml) => 'no attr value', ## XML5: Not a parse error. + state => ATTRIBUTE_NAME_STATE, +}; $Action->[AFTER_ATTRIBUTE_NAME_STATE]->[KEY_ELSE_CHAR] = { name => q[after attr name else], ca => {set_name => 0x0000}, @@ -1136,6 +1208,13 @@ ca => {value => 1}, state => ATTRIBUTE_VALUE_UNQUOTED_STATE, }; +$Action->[BEFORE_ATTRIBUTE_VALUE_STATE]->[0x0000] = { + name => 'before attr value null', + ca => {value => "\x{FFFD}"}, + error => 'NULL', + #error2(xml) => 'unquoted attr value', ## XML5: Not a parse error. + state => ATTRIBUTE_VALUE_UNQUOTED_STATE, +}; $XMLAction->[BEFORE_ATTRIBUTE_VALUE_STATE]->[KEY_ELSE_CHAR] = { name => 'before attr value else xml', error => 'unquoted attr value', ## XML5: Not a parse error. # XXXdocumentation @@ -1284,7 +1363,7 @@ if (my $aca = $action->{ca}) { if ($aca->{value}) { - $self->{ca}->{value} .= chr $nc; + $self->{ca}->{value} .= $aca->{value} ne '1' ? $aca->{value} : chr $nc; } elsif (defined $aca->{name}) { $self->{ca}->{name} .= chr ($nc + $aca->{name}); } elsif (defined $aca->{set_name}) { @@ -1650,6 +1729,22 @@ } else { die "$0: $self->{ct}->{type}: Unknown token type"; } + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ca}->{value} .= "\x{FFFD}"; + ## Stay in the state + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { ## XML5 [ATTLIST]: Not defined yet. if ($self->{is_xml} and $nc == 0x003C) { # < @@ -1661,7 +1756,7 @@ } $self->{ca}->{value} .= chr ($nc); $self->{read_until}->($self->{ca}->{value}, - qq["&<\x09\x0C\x20], + qq[\x00"&<\x09\x0C\x20], length $self->{ca}->{value}); ## Stay in the state @@ -1787,6 +1882,22 @@ } else { die "$0: $self->{ct}->{type}: Unknown token type"; } + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ca}->{value} .= "\x{FFFD}"; + ## Stay in the state + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { ## XML5 [ATTLIST]: Not defined yet. if ($self->{is_xml} and $nc == 0x003C) { # < @@ -1798,7 +1909,7 @@ } $self->{ca}->{value} .= chr ($nc); $self->{read_until}->($self->{ca}->{value}, - qq['&<\x09\x0C\x20], + qq[\x00'&<\x09\x0C\x20], length $self->{ca}->{value}); ## Stay in the state @@ -1969,6 +2080,22 @@ } else { die "$0: $self->{ct}->{type}: Unknown token type"; } + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ca}->{value} .= "\x{FFFD}"; + ## Stay in the state + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { if ({ 0x0022 => 1, # " @@ -1985,7 +2112,7 @@ } $self->{ca}->{value} .= chr ($nc); $self->{read_until}->($self->{ca}->{value}, - qq["'=&` \x09\x0C<>], + qq[\x00"'=&` \x09\x0C<>], length $self->{ca}->{value}); ## Stay in the state @@ -2081,11 +2208,26 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{data} .= "\x{FFFD}"; # comment + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{data} .= chr ($nc); # comment $self->{read_until}->($self->{ct}->{data}, - q[>], + qq[\x00>], length $self->{ct}->{data}); ## Stay in the state. @@ -2138,10 +2280,7 @@ } redo A; - } elsif ((($self->{insertion_mode} & IN_FOREIGN_CONTENT_IM and - $self->{open_elements}->[-1]->[1] & FOREIGN_EL) or - $self->{is_xml}) and - $nc == 0x005B) { # [ +# $nc == 0x005B) { # [ $self->{state} = MD_CDATA_STATE; $self->{kwd} = '['; @@ -2375,6 +2514,22 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ct}->{data} .= "\x{FFFD}"; # comment + $self->{state} = COMMENT_STATE; + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{data} # comment @@ -2447,6 +2602,22 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ct}->{data} .= "-\x{FFFD}"; # comment + $self->{state} = COMMENT_STATE; + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{data} # comment @@ -2497,11 +2668,26 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ct}->{data} .= "\x{FFFD}"; # comment + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{data} .= chr ($nc); # comment $self->{read_until}->($self->{ct}->{data}, - q[-], + qq[-\x00], length $self->{ct}->{data}); ## Stay in the state @@ -2550,6 +2736,22 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + $self->{ct}->{data} .= "-\x{FFFD}"; # comment + $self->{state} = COMMENT_STATE; + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{data} .= '-' . chr ($nc); # comment @@ -2652,6 +2854,26 @@ return ($self->{ct}); # comment redo A; + } elsif ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + if ($state == COMMENT_END_BANG_STATE) { + $self->{ct}->{data} .= "--!\x{FFFD}"; # comment + } else { + $self->{ct}->{data} .= "--\x{FFFD}"; # comment + } + $self->{state} = COMMENT_STATE; + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { if ($state == COMMENT_END_BANG_STATE) { @@ -2791,6 +3013,22 @@ return ($self->{ct}); # DOCTYPE redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{name} = "\x{FFFD}"; + delete $self->{ct}->{quirks}; + $self->{state} = DOCTYPE_NAME_STATE; + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{name} = chr $nc; @@ -2812,8 +3050,6 @@ } elsif ($state == DOCTYPE_NAME_STATE) { ## XML5: "DOCTYPE root name state". - ## ISSUE: Redundant "First," in the spec. - if ($is_space->{$nc}) { $self->{state} = AFTER_DOCTYPE_NAME_STATE; @@ -2893,6 +3129,21 @@ return ($self->{ct}); # DOCTYPE redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{name} .= "\x{FFFD}"; # DOCTYPE + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{name} .= chr ($nc); # DOCTYPE @@ -3432,13 +3683,28 @@ ## Reconsume. return ($self->{ct}); # DOCTYPE redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{pubid} .= "\x{FFFD}"; # DOCTYPE/ENTITY/NOTATION + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{pubid} .= chr $nc; # DOCTYPE/ENTITY/NOTATION - $self->{read_until}->($self->{ct}->{pubid}, q[">], + $self->{read_until}->($self->{ct}->{pubid}, qq[\x00">], length $self->{ct}->{pubid}); - ## Stay in the state + ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { $self->{line_prev} = $self->{line}; @@ -3508,10 +3774,25 @@ ## reconsume return ($self->{ct}); # DOCTYPE/ENTITY/NOTATION redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{pubid} .= "\x{FFFD}"; # DOCTYPE/ENTITY/NOTATION + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{pubid} .= chr $nc; # DOCTYPE/ENTITY/NOTATION - $self->{read_until}->($self->{ct}->{pubid}, q['>], + $self->{read_until}->($self->{ct}->{pubid}, qq[\x00'>], length $self->{ct}->{pubid}); ## Stay in the state @@ -3884,10 +4165,25 @@ ## reconsume return ($self->{ct}); # DOCTYPE/ENTITY/NOTATION redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{sysid} .= "\x{FFFD}"; # DOCTYPE/ENTITY/NOTATION + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{sysid} .= chr $nc; # DOCTYPE/ENTITY/NOTATION - $self->{read_until}->($self->{ct}->{sysid}, q[">], + $self->{read_until}->($self->{ct}->{sysid}, qq[\x00">], length $self->{ct}->{sysid}); ## Stay in the state @@ -3956,10 +4252,25 @@ ## reconsume return ($self->{ct}); # DOCTYPE/ENTITY/NOTATION redo A; + } elsif ($nc == 0x0000) { + $self->{ct}->{sysid} .= "\x{FFFD}"; # DOCTYPE/ENTITY/NOTATION + ## Stay in the state. + + if ($self->{char_buffer_pos} < length $self->{char_buffer}) { + $self->{line_prev} = $self->{line}; + $self->{column_prev} = $self->{column}; + $self->{column}++; + $self->{nc} + = ord substr ($self->{char_buffer}, $self->{char_buffer_pos}++, 1); + } else { + $self->{set_nc}->($self); + } + + redo A; } else { $self->{ct}->{sysid} .= chr $nc; # DOCTYPE/ENTITY/NOTATION - $self->{read_until}->($self->{ct}->{sysid}, q['>], + $self->{read_until}->($self->{ct}->{sysid}, qq[\x00'>], length $self->{ct}->{sysid}); ## Stay in the state @@ -4279,8 +4590,11 @@ $self->{ct}->{data} .= chr $nc; $self->{read_until}->($self->{ct}->{data}, - q<]>, + qq<\x00]>, length $self->{ct}->{data}); + ## NOTE: NULLs are left as is (see spec's comment). However, + ## a token cannot contain more than one U+0000 NULL character + ## for the ease of processing in the tree constructor. ## Stay in the state. @@ -4424,7 +4738,7 @@ (0x0061 <= $nc and $nc <= 0x007A)) { # a..z - require HTML::HTML5::Parser::NamedEntityList; + #require HTML::HTML5::Parser::NamedEntityList; $self->{state} = ENTITY_NAME_STATE; $self->{kwd} = chr $nc; $self->{entity__value} = $self->{kwd}; @@ -4808,9 +5122,9 @@ ## This is redundant for the same reason. $self->{entity_add} => 1, }->{$nc}))) { - our $EntityChar; + #local %entity2char; $self->{kwd} .= chr $nc; ## Bare entity name. - if (defined $EntityChar->{$self->{kwd}} or ## HTML charrefs. + if (defined $entity2char{$self->{kwd}} or ## HTML charrefs. $self->{ge}->{$self->{kwd}}) { ## XML general entities. if ($nc == 0x003B) { # ; if (defined $self->{ge}->{$self->{kwd}}) { @@ -4848,7 +5162,7 @@ } else { } - $self->{entity__value} = $EntityChar->{$self->{kwd}}; + $self->{entity__value} = $entity2char{$self->{kwd}}; } $self->{entity__match} = 1; ## Matched exactly with ";" entity. @@ -4865,7 +5179,7 @@ # } else { - $self->{entity__value} = $EntityChar->{$self->{kwd}}; + $self->{entity__value} = $entity2char{$self->{kwd}}; $self->{entity__match} = -1; ## Exactly matched to non-";" entity. ## Stay in the state. @@ -5010,7 +5324,7 @@ redo A; } - ## XML-only states + ## ========== XML-only states ========== } elsif ($state == PI_STATE) { ## XML5: "Pi state" and "DOCTYPE pi state". @@ -5037,8 +5351,11 @@ redo A; } else { ## XML5: "DOCTYPE pi state": Stay in the state. + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } $self->{ct} = {type => PI_TOKEN, - target => chr $nc, + target => $nc == 0x0000 ? "\x{FFFD}" : chr $nc, data => '', line => $self->{line_prev}, column => $self->{column_prev} - 1, @@ -5103,7 +5420,10 @@ redo A; } else { ## XML5: typo ("tag name" -> "target") - $self->{ct}->{target} .= chr $nc; # pi + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{target} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # pi if ($self->{char_buffer_pos} < length $self->{char_buffer}) { $self->{line_prev} = $self->{line}; @@ -5169,8 +5489,11 @@ column => $self->{ct}->{column}}); redo A; } else { - $self->{ct}->{data} .= chr $nc; # pi - $self->{read_until}->($self->{ct}->{data}, q[?], + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{data} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # pi + $self->{read_until}->($self->{ct}->{data}, qq[\x00?], length $self->{ct}->{data}); ## Stay in the state. @@ -5299,7 +5622,7 @@ } elsif ($nc == 0x0025) { # % ## XML5: Not defined yet. - ## TODO: + ## TODO: parameter entity expansion if (not $self->{stop_processing} and not $self->{document}->xml_standalone) { @@ -6025,7 +6348,10 @@ redo A; } else { ## XML5: [ATTLIST] Not defined yet. - $self->{ct}->{name} .= chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{name} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; $self->{state} = MD_NAME_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -6137,7 +6463,10 @@ redo A; } else { ## XML5: [ATTLIST] Not defined yet. - $self->{ct}->{name} .= chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{name} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -6190,7 +6519,10 @@ redo A; } else { ## XML5: Not defined yet. - $self->{ca} = {name => chr ($nc), # attrdef + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ca} = {name => $nc == 0x0000 ? "\x{FFFD}" : chr $nc, # attrdef tokens => [], line => $self->{line}, column => $self->{column}}; $self->{state} = DOCTYPE_ATTLIST_ATTRIBUTE_NAME_STATE; @@ -6274,7 +6606,10 @@ redo A; } else { ## XML5: Not defined yet. - $self->{ca}->{name} .= chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ca}->{name} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -6699,7 +7034,10 @@ ## Discard the current token. redo A; } else { - push @{$self->{ca}->{tokens}}, chr $nc; + if ($nc == 0x000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + push @{$self->{ca}->{tokens}}, $nc == 0x0000 ? "\x{FFFD}" : chr $nc; $self->{state} = ALLOWED_TOKEN_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -6791,7 +7129,10 @@ ## Discard the current token. redo A; } else { - $self->{ca}->{tokens}->[-1] .= chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ca}->{tokens}->[-1] .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -6886,7 +7227,10 @@ $self->{parse_error}->(level => $self->{level}->{must}, type => 'space in allowed token', ## TODO: type line => $self->{line_prev}, column => $self->{column_prev}); - $self->{ca}->{tokens}->[-1] .= ' ' . chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ca}->{tokens}->[-1] .= ' ' . ($nc == 0x0000 ? "\x{FFFD}" : chr $nc); $self->{state} = ALLOWED_TOKEN_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -7562,7 +7906,10 @@ ## Discard the current token. redo A; } else { - $self->{ct}->{notation} = chr $nc; # ENTITY + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{notation} = $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # ENTITY $self->{state} = NOTATION_NAME_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -7624,7 +7971,10 @@ ## The current token. redo A; } else { - $self->{ct}->{notation} .= chr $nc; # ENTITY + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{notation} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # ENTITY ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -7678,7 +8028,10 @@ ## Discard the current token. redo A; } else { - $self->{ct}->{value} .= chr $nc; # ENTITY + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{value} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # ENTITY if ($self->{char_buffer_pos} < length $self->{char_buffer}) { $self->{line_prev} = $self->{line}; @@ -7731,7 +8084,10 @@ ## Discard the current token. redo A; } else { - $self->{ct}->{value} .= chr $nc; # ENTITY + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{value} .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # ENTITY if ($self->{char_buffer_pos} < length $self->{char_buffer}) { $self->{line_prev} = $self->{line}; @@ -7846,7 +8202,10 @@ ## Discard the current token. redo A; } else { - $self->{ct}->{content} = [chr $nc]; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{content} = [$nc == 0x0000 ? "\x{FFFD}" : chr $nc]; $self->{state} = CONTENT_KEYWORD_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -7908,7 +8267,10 @@ ## Discard the current token. redo A; } else { - $self->{ct}->{content}->[-1] .= chr $nc; # ELEMENT + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{content}->[-1] .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; # ELEMENT ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -8022,7 +8384,10 @@ ## Discard the current token. redo A; } else { - push @{$self->{ct}->{content}}, chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + push @{$self->{ct}->{content}}, $nc == 0x0000 ? "\x{FFFD}" : chr $nc; $self->{state} = CM_ELEMENT_NAME_STATE; if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -8136,7 +8501,10 @@ ## Discard the token. redo A; } else { - $self->{ct}->{content}->[-1] .= chr $nc; + if ($nc == 0x0000) { + $self->{parse_error}->(level => $self->{level}->{must}, type => 'NULL'); + } + $self->{ct}->{content}->[-1] .= $nc == 0x0000 ? "\x{FFFD}" : chr $nc; ## Stay in the state. if ($self->{char_buffer_pos} < length $self->{char_buffer}) { @@ -8456,7 +8824,7 @@ 1; -# Copyright 2007-2010 Wakaba . +# Copyright 2007-2011 Wakaba . # # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. diff -Nru libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser.pm libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser.pm --- libhtml-html5-parser-perl-0.103/lib/HTML/HTML5/Parser.pm 2011-02-09 14:19:43.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/lib/HTML/HTML5/Parser.pm 2011-10-20 20:26:19.000000000 +0000 @@ -26,11 +26,13 @@ use warnings; our $AUTOLOAD; -our $VERSION = '0.103'; +our $VERSION = '0.107'; use Carp; +use HTML::HTML5::Parser::Error; use HTML::HTML5::Parser::TagSoupParser; use LWP::UserAgent; +use Scalar::Util qw(blessed); use URI::file; use XML::LibXML; @@ -111,16 +113,12 @@ my $file = shift; my $opts = shift || {}; - unless (UNIVERSAL::isa($file, 'URI')) + unless (blessed($file) and $file->isa('URI')) { if ($file =~ /^[a-z0-9_\.-]+:\S+$/i) - { - $file = URI->new($file); - } + { $file = URI->new($file); } else - { - $file = URI::file->new($file); - } + { $file = URI::file->new_abs($file); } } my $ua; @@ -215,20 +213,23 @@ my $text = shift; my $opts = shift || {}; + $self->{'errors'} = []; $opts->{'parser_used'} = 'HTML::HTML5::Parser'; my $dom = XML::LibXML::Document->createDocument; if (defined $opts->{'encoding'} || 1) { HTML::HTML5::Parser::TagSoupParser->parse_byte_string($opts->{'encoding'}, $text, $dom, sub{ - my $err = \@_; + my $err = HTML::HTML5::Parser::Error->new(@_); + $self->{error_handler}->($err) if $self->{error_handler}; push @{$self->{'errors'}}, $err; }); } else { HTML::HTML5::Parser::TagSoupParser->parse_char_string($text, $dom, sub{ - my $err = \@_; + my $err = HTML::HTML5::Parser::Error->new(@_); + $self->{error_handler}->($err) if $self->{error_handler}; push @{$self->{'errors'}}, $err; }); } @@ -290,6 +291,7 @@ return 0; } + carp "HTML::HTML5::Parser doesn't understand '$func'." if length $func; } =head2 Additional Methods @@ -299,6 +301,36 @@ =over 8 +=item C + +Get/set an error handling function. Must be set to a coderef or undef. + +The error handling function will be called with a single parameter, a +L object. + +=cut + +sub error_handler +{ + my $self = shift; + $self->{error_handler} = shift if @_; + return $self->{error_handler}; +} + +=item C + +Returns a list of errors that occurred during the last parse. + +See L. + +=cut + +sub errors +{ + my $self = shift; + return @{ $self->{errors} }; +} + =item C $mode = $parser->compat_mode( $doc ); @@ -383,6 +415,7 @@ } } +sub DESTROY {} =back @@ -401,7 +434,7 @@ =head1 COPYRIGHT AND LICENSE -Copyright (C) 2007-2010 by Wakaba +Copyright (C) 2007-2011 by Wakaba Copyright (C) 2009-2011 by Toby Inkster diff -Nru libhtml-html5-parser-perl-0.103/LICENSE libhtml-html5-parser-perl-0.107/LICENSE --- libhtml-html5-parser-perl-0.103/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/LICENSE 2011-10-20 20:28:42.000000000 +0000 @@ -0,0 +1,377 @@ +This software is copyright (c) 2011 by Toby Inkster , Wakaba. + +This is free software; you can redistribute it and/or modify it under +the same terms as the Perl 5 programming language system itself. + +Terms of the Perl programming language system itself + +a) the GNU General Public License as published by the Free + Software Foundation; either version 1, or (at your option) any + later version, or +b) the "Artistic License" + +--- The GNU General Public License, Version 1, February 1989 --- + +This software is Copyright (c) 2011 by Toby Inkster , Wakaba. + +This is free software, licensed under: + + The GNU General Public License, Version 1, February 1989 + + GNU GENERAL PUBLIC LICENSE + Version 1, February 1989 + + Copyright (C) 1989 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The license agreements of most software companies try to keep users +at the mercy of those companies. By contrast, our General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. The +General Public License applies to the Free Software Foundation's +software and to any other program whose authors commit to using it. +You can use it for your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Specifically, the General Public License is designed to make +sure that you have the freedom to give away or sell copies of free +software, that you receive source code or can get it if you want it, +that you can change the software or use pieces of it in new free +programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of a such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must tell them their rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any program or other work which +contains a notice placed by the copyright holder saying it may be +distributed under the terms of this General Public License. The +"Program", below, refers to any such program or work, and a "work based +on the Program" means either the Program or any work containing the +Program or a portion of it, either verbatim or with modifications. Each +licensee is addressed as "you". + + 1. You may copy and distribute verbatim copies of the Program's source +code as you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this +General Public License and to the absence of any warranty; and give any +other recipients of the Program a copy of this General Public License +along with the Program. You may charge a fee for the physical act of +transferring a copy. + + 2. You may modify your copy or copies of the Program or any portion of +it, and copy and distribute such modifications under the terms of Paragraph +1 above, provided that you also do the following: + + a) cause the modified files to carry prominent notices stating that + you changed the files and the date of any change; and + + b) cause the whole of any work that you distribute or publish, that + in whole or in part contains the Program or any part thereof, either + with or without modifications, to be licensed at no charge to all + third parties under the terms of this General Public License (except + that you may choose to grant warranty protection to some or all + third parties, at your option). + + c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive use + in the simplest and most usual way, to print or display an + announcement including an appropriate copyright notice and a notice + that there is no warranty (or else, saying that you provide a + warranty) and that users may redistribute the program under these + conditions, and telling the user how to view a copy of this General + Public License. + + d) You may charge a fee for the physical act of transferring a + copy, and you may at your option offer warranty protection in + exchange for a fee. + +Mere aggregation of another independent work with the Program (or its +derivative) on a volume of a storage or distribution medium does not bring +the other work under the scope of these terms. + + 3. You may copy and distribute the Program (or a portion or derivative of +it, under Paragraph 2) in object code or executable form under the terms of +Paragraphs 1 and 2 above provided that you also do one of the following: + + a) accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of + Paragraphs 1 and 2 above; or, + + b) accompany it with a written offer, valid for at least three + years, to give any third party free (except for a nominal charge + for the cost of distribution) a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of + Paragraphs 1 and 2 above; or, + + c) accompany it with the information you received as to where the + corresponding source code may be obtained. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form alone.) + +Source code for a work means the preferred form of the work for making +modifications to it. For an executable file, complete source code means +all the source code for all modules it contains; but, as a special +exception, it need not include source code for modules which are standard +libraries that accompany the operating system on which the executable +file runs, or for standard header files or definitions files that +accompany that operating system. + + 4. You may not copy, modify, sublicense, distribute or transfer the +Program except as expressly provided under this General Public License. +Any attempt otherwise to copy, modify, sublicense, distribute or transfer +the Program is void, and will automatically terminate your rights to use +the Program under this License. However, parties who have received +copies, or rights to use copies, from you under this General Public +License will not have their licenses terminated so long as such parties +remain in full compliance. + + 5. By copying, distributing or modifying the Program (or any work based +on the Program) you indicate your acceptance of this license to do so, +and all its terms and conditions. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the original +licensor to copy, distribute or modify the Program subject to these +terms and conditions. You may not impose any further restrictions on the +recipients' exercise of the rights granted herein. + + 7. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of the license which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +the license, you may choose any version ever published by the Free Software +Foundation. + + 8. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to humanity, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + + To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 1, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19xx name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the +appropriate parts of the General Public License. Of course, the +commands you use may be called something other than `show w' and `show +c'; they could even be mouse-clicks or menu items--whatever suits your +program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + program `Gnomovision' (a program to direct compilers to make passes + at assemblers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +That's all there is to it! + + +--- The Artistic License 1.0 --- + +This software is Copyright (c) 2011 by Toby Inkster , Wakaba. + +This is free software, licensed under: + + The Artistic License 1.0 + +The Artistic License + +Preamble + +The intent of this document is to state the conditions under which a Package +may be copied, such that the Copyright Holder maintains some semblance of +artistic control over the development of the package, while giving the users of +the package the right to use and distribute the Package in a more-or-less +customary fashion, plus the right to make reasonable modifications. + +Definitions: + + - "Package" refers to the collection of files distributed by the Copyright + Holder, and derivatives of that collection of files created through + textual modification. + - "Standard Version" refers to such a Package if it has not been modified, + or has been modified in accordance with the wishes of the Copyright + Holder. + - "Copyright Holder" is whoever is named in the copyright or copyrights for + the package. + - "You" is you, if you're thinking about copying or distributing this Package. + - "Reasonable copying fee" is whatever you can justify on the basis of media + cost, duplication charges, time of people involved, and so on. (You will + not be required to justify it to the Copyright Holder, but only to the + computing community at large as a market that must bear the fee.) + - "Freely Available" means that no fee is charged for the item itself, though + there may be fees involved in handling the item. It also means that + recipients of the item may redistribute it under the same conditions they + received it. + +1. You may make and give away verbatim copies of the source form of the +Standard Version of this Package without restriction, provided that you +duplicate all of the original copyright notices and associated disclaimers. + +2. You may apply bug fixes, portability fixes and other modifications derived +from the Public Domain or from the Copyright Holder. A Package modified in such +a way shall still be considered the Standard Version. + +3. You may otherwise modify your copy of this Package in any way, provided that +you insert a prominent notice in each changed file stating how and when you +changed that file, and provided that you do at least ONE of the following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or an + equivalent medium, or placing the modifications on a major archive site + such as ftp.uu.net, or by allowing the Copyright Holder to include your + modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict with + standard executables, which must also be provided, and provide a separate + manual page for each non-standard executable that clearly documents how it + differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + +4. You may distribute the programs of this Package in object code or executable +form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where to + get the Standard Version. + + b) accompany the distribution with the machine-readable source of the Package + with your modifications. + + c) accompany any non-standard executables with their corresponding Standard + Version executables, giving the non-standard executables non-standard + names, and clearly documenting the differences in manual pages (or + equivalent), together with instructions on where to get the Standard + Version. + + d) make other distribution arrangements with the Copyright Holder. + +5. You may charge a reasonable copying fee for any distribution of this +Package. You may charge any fee you choose for support of this Package. You +may not charge a fee for this Package itself. However, you may distribute this +Package in aggregate with other (possibly commercial) programs as part of a +larger (possibly commercial) software distribution provided that you do not +advertise this Package as a product of your own. + +6. The scripts and library files supplied as input to or produced as output +from the programs of this Package do not automatically fall under the copyright +of this Package, but belong to whomever generated them, and may be sold +commercially, and may be aggregated with this Package. + +7. C or perl subroutines supplied by you and linked into this Package shall not +be considered part of this Package. + +8. The name of the Copyright Holder may not be used to endorse or promote +products derived from this software without specific prior written permission. + +9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +The End + diff -Nru libhtml-html5-parser-perl-0.103/Makefile.PL libhtml-html5-parser-perl-0.107/Makefile.PL --- libhtml-html5-parser-perl-0.103/Makefile.PL 2011-01-19 23:42:50.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/Makefile.PL 2011-10-07 09:01:11.000000000 +0000 @@ -1,41 +1,2 @@ -use strict; -use warnings; - -use inc::Module::Install; - -my $dist = 'HTML-HTML5-Parser'; -my $fn = "lib/$dist.pm"; $fn =~ s#-#/#g; - -name $dist; -perl_version_from $fn; -version_from $fn; -abstract_from $fn; -readme_from $fn; -author 'Toby Inkster '; -license 'perl'; - -requires 'Error' => 0; -requires 'HTML::Encoding' => '0.55'; -requires 'LWP::UserAgent' => 0; -test_requires 'Module::Signature' => '0.66'; -test_requires 'Test::More' => '0.61'; -requires 'XML::LibXML' => '1.60'; - -resources( - 'homepage' => "http://search.cpan.org/dist/$dist/", - 'repository' => "http://goddamn.co.uk/viewvc/perlmods/$dist/", - 'bugtracker' => "http://rt.cpan.org/Dist/Display.html?Queue=$dist", - ); - -write_doap_changes; -write_doap_changes_xml; - -install_script 'html2xhtml'; - -include 'Test::Signature'; -auto_install; -WriteAll( - 'meta' => 1, - 'sign' => 1, - ); +use inc::Module::Package 'RDF:standard'; diff -Nru libhtml-html5-parser-perl-0.103/MANIFEST libhtml-html5-parser-perl-0.107/MANIFEST --- libhtml-html5-parser-perl-0.103/MANIFEST 2010-04-09 16:16:53.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/MANIFEST 2011-10-20 20:28:59.000000000 +0000 @@ -1,41 +1,45 @@ +bin/html2xhtml +bin/html5debug Changes -Changes.ttl -Changes.xml -Makefile.PL -MANIFEST -MANIFEST.SKIP -README -META.yml -SIGNATURE - -lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm -lib/HTML/HTML5/Parser/Charset/Info.pm -lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm -lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm -lib/HTML/HTML5/Parser/Charset/WebLatin1.pm -lib/HTML/HTML5/Parser/Charset/WebThai.pm -lib/HTML/HTML5/Parser/NamedEntityList.pm -lib/HTML/HTML5/Parser.pm -lib/HTML/HTML5/Parser/TagSoupParser.pm -lib/HTML/HTML5/Parser/Tokenizer.pm - -script/html2xhtml - -t/00sig.t -t/01basic.t - inc/Module/AutoInstall.pm +inc/Module/Install.pm inc/Module/Install/AutoInstall.pm +inc/Module/Install/AutoManifest.pm inc/Module/Install/Base.pm inc/Module/Install/Can.pm -inc/Module/Install/DOAPChangeSets.pm inc/Module/Install/Fetch.pm inc/Module/Install/Include.pm inc/Module/Install/Makefile.pm inc/Module/Install/Metadata.pm -inc/Module/Install.pm -inc/Module/Install/ReadmeFromPod.pm +inc/Module/Install/Package.pm inc/Module/Install/Scripts.pm +inc/Module/Install/TrustMetaYml.pm inc/Module/Install/Win32.pm inc/Module/Install/WriteAll.pm -inc/Test/Signature.pm +inc/Module/Package.pm +inc/Module/Package/Dist/RDF.pm +inc/Scalar/Util.pm +inc/Scalar/Util/PP.pm +inc/YAML/Tiny.pm +lib/HTML/HTML5/Parser.pm +lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm +lib/HTML/HTML5/Parser/Charset/Info.pm +lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm +lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm +lib/HTML/HTML5/Parser/Charset/WebLatin1.pm +lib/HTML/HTML5/Parser/Charset/WebThai.pm +lib/HTML/HTML5/Parser/Error.pm +lib/HTML/HTML5/Parser/TagSoupParser.pm +lib/HTML/HTML5/Parser/Tokenizer.pm +LICENSE +Makefile.PL +MANIFEST This list of files +META.yml +meta/changes.ttl +meta/doap.ttl +meta/makefile.ttl +README +t/01basic.t +t/02html_4.t +TODO +SIGNATURE Public-key signature (added by MakeMaker) diff -Nru libhtml-html5-parser-perl-0.103/MANIFEST.SKIP libhtml-html5-parser-perl-0.107/MANIFEST.SKIP --- libhtml-html5-parser-perl-0.103/MANIFEST.SKIP 2011-01-19 23:38:03.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/MANIFEST.SKIP 1970-01-01 00:00:00.000000000 +0000 @@ -1,8 +0,0 @@ -^HTML-HTML5-Parser-.\...\.tar\.gz$ -^Makefile$ -^blib/ -^pm_to_blib -^blibdirs -\.svn -^example.*\.(pl|html)$ -^MYMETA\. diff -Nru libhtml-html5-parser-perl-0.103/meta/changes.ttl libhtml-html5-parser-perl-0.107/meta/changes.ttl --- libhtml-html5-parser-perl-0.103/meta/changes.ttl 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/meta/changes.ttl 2011-10-20 20:18:36.000000000 +0000 @@ -0,0 +1,172 @@ +@prefix : . +@prefix author: . +@prefix dbug: . +@prefix dcs: . +@prefix dc: . +@prefix foaf: . +@prefix my: . +@prefix rdfs: . +@prefix toby: . +@prefix xsd: . + +my:project :release my:v_0-00_01 . +my:v_0-00_01 + a :Version ; + dc:issued "2009-12-01"^^xsd:date ; + :revision "0.00_01"^^xsd:string ; + :file-release ; + rdfs:label "Developer preview"@en . + +my:project :release my:v_0-01 . +my:v_0-01 + a :Version ; + dc:issued "2009-12-03"^^xsd:date ; + :revision "0.01"^^xsd:string ; + :file-release ; + rdfs:label "Original version"@en . + +my:project :release my:v_0-02 . +my:v_0-02 + a :Version ; + dc:issued "2009-12-16"^^xsd:date ; + :revision "0.02"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-01 ; + dcs:item + [ rdfs:label "Replace Inline::Python encoding detection with weaker, but native Perl HTML::Encoding package."@en ] , + [ rdfs:label "Bundle the html2xhtml tool."@en ; a dcs:Addition , dcs:Packaging ] + ] . + +my:project :release my:v_0-03 . +my:v_0-03 + a :Version ; + dc:issued "2010-01-15"^^xsd:date ; + :revision "0.03"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-02 ; + dcs:item + [ rdfs:label "Module didn't use URI::file properly."@en ; a dcs:Bugfix ; rdfs:comment "Thanks shellac" ] , + [ rdfs:label "Upgrade distribution to my new packaging regime (auto-generated changelogs, etc)"@en ; a dcs:Update , dcs:Packaging ] , + [ rdfs:label "Copyright 2010."@en ; a dcs:Update , dcs:Documentation ] + ] . + +my:project :release my:v_0-04 . +my:v_0-04 + a :Version ; + dc:issued "2010-04-21"^^xsd:date ; + :revision "0.04"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-03 ; + dcs:item + [ rdfs:label "Catch up to revision cf2c0df8a6dfb50fee923dfb21b14c83f282ccdc (2010-02-28) upstream."@en ; a dcs:Update ] + ] . + +my:project :release my:v_0-100 . +my:v_0-100 + a :Version ; + dc:issued "2010-06-23"^^xsd:date ; + :revision "0.100"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-04 ; + dcs:item + [ rdfs:label "Minor bugfixes."@en ; a dcs:Bugfix ] + ] . + +my:project :release my:v_0-101 . +my:v_0-101 + a :Version ; + dc:issued "2010-06-30"^^xsd:date ; + :revision "0.101"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-100 ; + dcs:item + [ rdfs:label "UTF-8 fix."@en ; a dcs:Bugfix ; dcs:fixes [ rdfs:label "Wide characters in DOM tree."@en ; dbug:reporter author:gwilliams ] ] + ] . + +my:project :release my:v_0-102 . +my:v_0-102 + a :Version ; + dc:issued "2011-01-19"^^xsd:date ; + :revision "0.102"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-101 ; + dcs:item + [ rdfs:label "Fix source_line method."@en ; a dcs:Bugfix ] , + [ rdfs:label "Catch up to revision f2c921a886ab0b3dfb8d21b82525e98a4a921ad4 (2010-10-11) upstream."@en ; a dcs:Update ] , + [ rdfs:label "Allow element to appear in if document has an HTML4 doctype. This is a willful violation of the HTML5 parsing algorithm. (The may have elements as children, as well as any children that would normally be allowed in the of the document, such as ; any other content is treated as the beginning of the , and thus closes and . That's slightly looser than the HTML 4 spec which says only should be used, but stricter than the HTML 4 DTD which allows pretty much anything in there!)"@en ; a dcs:Addition ] , + [ rdfs:label "Support
element."@en ; a dcs:Addition ] , + [ rdfs:label "Support element."@en ; a dcs:Addition ] + ] . + +my:project :release my:v_0-103 . +my:v_0-103 + a :Version ; + dc:issued "2011-02-09"^^xsd:date ; + :revision "0.103"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-101 ; + dcs:item + [ rdfs:label "Copyright 2011."@en ; a dcs:Update , dcs:Documentation ] , + [ rdfs:label "TagSoupParser.pm called a method that is renamed between this distribution and upstream using its upstream name."@en ; a dcs:Bugfix ] + ] . + +my:project :release my:v_0-104 . +my:v_0-104 + a :Version ; + dc:issued "2011-09-22"^^xsd:date ; + :revision "0.104"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-103 ; + dcs:item + [ rdfs:label "Catch up to revision d81fcb920a1a3c351149cd66a64bf1b8ae14a172 (2011-08-21) upstream."@en ; a dcs:Update ] , + [ rdfs:label "Support element."@en ; a dcs:Addition ] , + [ rdfs:label "Some error handling stuff."@en ; a dcs:Addition ] + ] . + +my:project :release my:v_0-105 . +my:v_0-105 + a :Version ; + dc:issued "2011-10-07"^^xsd:date ; + :revision "0.105"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-104 ; + dcs:item + [ rdfs:label "HTML::HTML5::Parser::Error overloads stringification."@en ] , + [ rdfs:label "Module::Package::RDF."@en ; a dcs:Packaging ] , + [ rdfs:label "Bundle 'html5debug' script."@en ; a dcs:Addition ] , + [ rdfs:label "use HTML::HTML5::Entities"@en ] + ] . + +my:project :release my:v_0-106 . +my:v_0-106 + a :Version ; + dc:issued "2011-10-10"^^xsd:date ; + :revision "0.106"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-105 ; + dcs:item + [ rdfs:label "Tokenizer.pm was still trying to require NamedEntityList.pm."@en ; a dcs:Bugfix ] + ] . + +my:project :release my:v_0-107 . +my:v_0-107 + a :Version ; + dc:issued "2011-10-20"^^xsd:date ; + :revision "0.107"^^xsd:string ; + :file-release ; + dcs:changeset [ + dcs:versus my:v_0-106 ; + dcs:item [ rdfs:label "parse_file wasn't accepting relative file names"@en ; a dcs:Bugfix ] ; + dcs:item [ rdfs:label "html2xhtml now reads from STDIN by default."@en ; a dcs:Addition ] ; + dcs:item [ rdfs:label "html2xhtml can output to a file."@en ; a dcs:Addition ] + ] . diff -Nru libhtml-html5-parser-perl-0.103/meta/doap.ttl libhtml-html5-parser-perl-0.107/meta/doap.ttl --- libhtml-html5-parser-perl-0.103/meta/doap.ttl 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/meta/doap.ttl 2011-10-07 08:45:13.000000000 +0000 @@ -0,0 +1,43 @@ +@prefix : . +@prefix author: . +@prefix dbug: . +@prefix dcs: . +@prefix dc: . +@prefix foaf: . +@prefix my: . +@prefix rdfs: . +@prefix toby: . +@prefix xsd: . + +my:project + a :Project ; + :name "HTML-HTML5-Parser" ; + :shortdesc "parse HTML reliably"@en ; + :programming-language "Perl" ; + :homepage ; + :download-page ; + :bug-database ; + :repository [ a :SVNRepository ; :browse ] ; + :maintainer toby:i ; + :developer toby:i , my:dev-wakaba ; + :documenter toby:i ; + :tester toby:i ; + :created "2009-11-26"^^xsd:date ; + :license ; + :category + [ rdfs:label "Web"@en ] , + [ rdfs:label "HTML"@en ] , + [ rdfs:label "HTML5"@en ] . + +toby:i + a foaf:Person ; + foaf:name "Toby Inkster" ; + foaf:homepage ; + foaf:page ; + foaf:mbox ; + author:tobyink . + +my:dev-wakaba + a foaf:Person ; + foaf:name "Wakaba" ; + foaf:page . diff -Nru libhtml-html5-parser-perl-0.103/meta/makefile.ttl libhtml-html5-parser-perl-0.107/meta/makefile.ttl --- libhtml-html5-parser-perl-0.103/meta/makefile.ttl 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/meta/makefile.ttl 2011-10-20 20:25:16.000000000 +0000 @@ -0,0 +1,20 @@ +# This file provides instructions for packaging. + +@prefix : . + + + :perl_version_from _:main ; + :version_from _:main ; + :readme_from _:main ; + :test_requires "Test::More 0.61" ; + :requires "Error" ; + :requires "HTML::Encoding 0.55" ; + :requires "HTML::HTML5::Entities 0.001" ; + :requires "LWP::UserAgent" ; + :requires "XML::LibXML 1.60" ; + :requires "Scalar::Util" ; + :install_script _:html5debug , _:html2xhtml . + +_:main "lib/HTML/HTML5/Parser.pm" . +_:html5debug "bin/html5debug" . +_:html2xhtml "bin/html2xhtml" . diff -Nru libhtml-html5-parser-perl-0.103/META.yml libhtml-html5-parser-perl-0.107/META.yml --- libhtml-html5-parser-perl-0.103/META.yml 2011-02-09 14:22:29.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/META.yml 2011-10-20 20:28:44.000000000 +0000 @@ -2,32 +2,40 @@ abstract: 'parse HTML reliably' author: - 'Toby Inkster ' + - Wakaba build_requires: ExtUtils::MakeMaker: 6.42 - Module::Signature: 0.66 Test::More: 0.61 configure_requires: ExtUtils::MakeMaker: 6.42 distribution_type: module -generated_by: 'Module::Install version 1.00' +generated_by: 'Module::Install version 1.02' +keywords: + - HTML + - HTML5 + - Web license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 +module_name: HTML::HTML5::Parser name: HTML-HTML5-Parser no_index: directory: - inc - t + - xt requires: Error: 0 HTML::Encoding: 0.55 + HTML::HTML5::Entities: 0.001 LWP::UserAgent: 0 + Scalar::Util: 0 XML::LibXML: 1.60 perl: 5.8.1 resources: bugtracker: http://rt.cpan.org/Dist/Display.html?Queue=HTML-HTML5-Parser - homepage: http://search.cpan.org/dist/HTML-HTML5-Parser/ + homepage: https://metacpan.org/release/HTML-HTML5-Parser license: http://dev.perl.org/licenses/ - repository: http://goddamn.co.uk/viewvc/perlmods/HTML-HTML5-Parser/ -version: 0.103 + repository: http://goddamn.co.uk/svn-web/perlmods/browse/HTML-HTML5-Parser/ +version: 0.107 diff -Nru libhtml-html5-parser-perl-0.103/README libhtml-html5-parser-perl-0.107/README --- libhtml-html5-parser-perl-0.103/README 2011-02-09 14:22:08.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/README 2011-10-20 20:28:39.000000000 +0000 @@ -88,6 +88,18 @@ The module provides a few additional methods to obtain additional, non-DOM data from DOM nodes. + "error_handler" + Get/set an error handling function. Must be set to a coderef or + undef. + + The error handling function will be called with a single + parameter, a HTML::HTML5::Parser::Error object. + + "errors" + Returns a list of errors that occurred during the last parse. + + See HTML::HTML5::Parser::Error. + "compat_mode" $mode = $parser->compat_mode( $doc ); @@ -125,7 +137,7 @@ Toby Inkster, COPYRIGHT AND LICENSE - Copyright (C) 2007-2010 by Wakaba + Copyright (C) 2007-2011 by Wakaba Copyright (C) 2009-2011 by Toby Inkster diff -Nru libhtml-html5-parser-perl-0.103/script/html2xhtml libhtml-html5-parser-perl-0.107/script/html2xhtml --- libhtml-html5-parser-perl-0.103/script/html2xhtml 2010-04-09 16:16:53.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/script/html2xhtml 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -#!/usr/bin/perl - -use HTML::HTML5::Parser; - -my $uri = shift @ARGV - or die "Please specify an input URL or filename.\n"; - -my $parser = HTML::HTML5::Parser->new; -my $dom = $parser->parse_file($uri); - -print $dom->toString; \ No newline at end of file diff -Nru libhtml-html5-parser-perl-0.103/SIGNATURE libhtml-html5-parser-perl-0.107/SIGNATURE --- libhtml-html5-parser-perl-0.103/SIGNATURE 2011-02-09 14:22:35.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/SIGNATURE 2011-10-20 20:28:59.000000000 +0000 @@ -14,46 +14,54 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -SHA1 1ba5ede3776da2eea6f6032f929a22fc731361eb Changes -SHA1 8f6d6a5d4254c3d9fb1c32ab3e928a7f951cdb86 Changes.ttl -SHA1 5a466c862110557e3be6d65bfef0bf029c453230 Changes.xml -SHA1 c2de647e692d08ec96e390793af3aa490bed0aef MANIFEST -SHA1 e5bd966755a8ec604b39a51b3fb23b38a8f99d7a MANIFEST.SKIP -SHA1 49799c739febf8a1737853cfc6a699f42c285cd7 META.yml -SHA1 c839ccf0cc9bcaf64211b7888ac5f422c6277f7d Makefile.PL -SHA1 0306177a716df0d481ab51c21f2541f607d51958 README -SHA1 20c73697e1713638140c719d8eaa19a275ed43a5 inc/Module/AutoInstall.pm -SHA1 7305dbe2904416e28decb05396988a5d51d578be inc/Module/Install.pm -SHA1 ca13d9875e1249f6e84f7070be8152c34837955e inc/Module/Install/AutoInstall.pm -SHA1 129960509127732258570c122042bc48615222e1 inc/Module/Install/Base.pm -SHA1 cf3356ed9a5bd2f732527ef9e7bc5ef4458c8a93 inc/Module/Install/Can.pm -SHA1 de189984fc99741cc060d1964f513e45bf6d5c74 inc/Module/Install/DOAPChangeSets.pm -SHA1 bf0a3e1977effc2832d7a813a76dce3f31b437b6 inc/Module/Install/Fetch.pm -SHA1 b501b0df59a5cd235cca473889f82c3d3429f39e inc/Module/Install/Include.pm -SHA1 b721c93ca5bc9a6aa863b49af15f1b1de6125935 inc/Module/Install/Makefile.pm -SHA1 026cc0551a0ad399d195e395b46bdf842e115192 inc/Module/Install/Metadata.pm -SHA1 a615bb050ca0b2d79bfa0224addcb83019a53b09 inc/Module/Install/ReadmeFromPod.pm -SHA1 d3a4c720c6ec7f8fce7df37002fbc9177fb77050 inc/Module/Install/Scripts.pm -SHA1 5457015ea5a50e93465bf2dafa29feebd547f85b inc/Module/Install/Win32.pm -SHA1 051e7fa8063908befa3440508d0584a2497b97db inc/Module/Install/WriteAll.pm -SHA1 1e6399a0585817abef500c34fce676fb9b0545f0 inc/Test/Signature.pm -SHA1 c6f636ccec77a31431d54f9f12a892ba7cb4dcf3 lib/HTML/HTML5/Parser.pm -SHA1 901d7674cc3080da204e7b37f6c894c21bf3bf66 lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm -SHA1 4beb617d7d4c8684a162e93dadb23757def1a229 lib/HTML/HTML5/Parser/Charset/Info.pm -SHA1 f841655905e225deba08abba1a0fb213df09513f lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm -SHA1 6953f731978fae4cf5a32f6d93ce3474800c49cb lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm -SHA1 cacb52f1e2610248c68d11d989bf260950228be2 lib/HTML/HTML5/Parser/Charset/WebLatin1.pm -SHA1 5fb616de53eadfd16d56a8ec7fff2a18eb5ab3f6 lib/HTML/HTML5/Parser/Charset/WebThai.pm -SHA1 ff1d07455dc0d489f5629e9d8c3187f72e526667 lib/HTML/HTML5/Parser/NamedEntityList.pm -SHA1 a1a7d92372d69593767558582bd90ce363e9d158 lib/HTML/HTML5/Parser/TagSoupParser.pm -SHA1 b47306873857033aeb43e35ec4fdff5431d87cf0 lib/HTML/HTML5/Parser/Tokenizer.pm -SHA1 2321f5a2cf05d9e066534afa4c35e124cdb35519 script/html2xhtml -SHA1 bb128dce2db8ad397dbeb957d79d084cc5d2391a t/00sig.t -SHA1 b8a5638837df2324dae3df17dcc005c2ac192a68 t/01basic.t +SHA1 95d134e1bd71206f1c77865e7c782607cc8885a9 Changes +SHA1 25a45e6a71f4964b941bd02ad2494b73b69fb8f0 LICENSE +SHA1 e8bdf8f7bd6f16dec356889108d6f95d2bddd684 MANIFEST +SHA1 c4da673cb0bb45e48909997635cfa67f85c4b40f META.yml +SHA1 7150e5e086ef493e1e527a1eeec44a8344b80db6 Makefile.PL +SHA1 514417ea59e5863c1913dc56620d0090622bc277 README +SHA1 aef0d4d3e351df0490c3575397791fb8067417d6 TODO +SHA1 36e39ced0c6857bfa999ab5a30efb1d8a742b468 bin/html2xhtml +SHA1 0255ba1cefd913be82c96d54693a3311533963fa bin/html5debug +SHA1 5d94bc10deff1dd74e4bc5dfa6fc015e39271f15 inc/Module/AutoInstall.pm +SHA1 40106479d4e07f379cb82ca1d69fca92e3a40f47 inc/Module/Install.pm +SHA1 34a24a530ecf0365cc02e4150b06c9bed702a441 inc/Module/Install/AutoInstall.pm +SHA1 c04f94f91fa97b9f8cfb5a36071098ab0e6c78e3 inc/Module/Install/AutoManifest.pm +SHA1 ae8aa01a73cb83da31c39e8eed1120c59cb530a1 inc/Module/Install/Base.pm +SHA1 5c87d2d0e2c08b5173259006c88ad81c24303f9d inc/Module/Install/Can.pm +SHA1 98daf9d8c50b4b7e8988cf1fa2b86044ad219533 inc/Module/Install/Fetch.pm +SHA1 f3e008113f7f49b0625083b6cc358a312854f613 inc/Module/Install/Include.pm +SHA1 e67589fcbacdda6c98ff34d8e26a004ab0467bdc inc/Module/Install/Makefile.pm +SHA1 209ea405d4ab94475661bb450d0ea042d2ec25b5 inc/Module/Install/Metadata.pm +SHA1 3b9281ddf7dd6d6f5de0a9642c69333023193c80 inc/Module/Install/Package.pm +SHA1 87011c8a6c6fd2070ddf05c1b9a6c2eb9e074a34 inc/Module/Install/Scripts.pm +SHA1 b86d0385e10881db680d28bde94f275e49e34a27 inc/Module/Install/TrustMetaYml.pm +SHA1 1326052d1df1065debee74f9d8583a734b9b3d00 inc/Module/Install/Win32.pm +SHA1 bb607f3715c40fc3bc1c46496587cdb215bc4fa2 inc/Module/Install/WriteAll.pm +SHA1 26d58a041cd6b3d21db98b32e8fd1841aae21204 inc/Module/Package.pm +SHA1 4db02ca3854a0d95bf38139e9b714cc85b618189 inc/Module/Package/Dist/RDF.pm +SHA1 775bd24f2fdbb6dce51a8b5b0cdb01fccfce83c4 inc/Scalar/Util.pm +SHA1 b1b664983568bf822c327599eb88e223483ae96a inc/Scalar/Util/PP.pm +SHA1 eef6bff62046bff2ce08ba132d0b58fba30f40b4 inc/YAML/Tiny.pm +SHA1 d3a86bbfc960e8ac1c501ec1b78085f2cbaf0179 lib/HTML/HTML5/Parser.pm +SHA1 1e3e368c8449996ac046fa840b0686e9c36a82fa lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm +SHA1 10132cb995136fd9aac48c3a6fd518f748b930ca lib/HTML/HTML5/Parser/Charset/Info.pm +SHA1 dd708491a230290eadf8edaf01b3c6aa68a1f72e lib/HTML/HTML5/Parser/Charset/UnicodeChecker.pm +SHA1 44e7bfc74467b357699ae42eaeb9947ddc7f6242 lib/HTML/HTML5/Parser/Charset/UniversalCharDet.pm +SHA1 798a2160e4ae9171d0daa21dfaf9ff61905f0be8 lib/HTML/HTML5/Parser/Charset/WebLatin1.pm +SHA1 9dc191a323cddf1328bf7ec8d2457a34d7cb2879 lib/HTML/HTML5/Parser/Charset/WebThai.pm +SHA1 74558142b136b56d523282175a9aa3709740be78 lib/HTML/HTML5/Parser/Error.pm +SHA1 0380159ccdd3586b1612dbde28c4b4439602ee4c lib/HTML/HTML5/Parser/TagSoupParser.pm +SHA1 e0b71c1582dc2007fb07389d08cdc2db66265127 lib/HTML/HTML5/Parser/Tokenizer.pm +SHA1 0228a00c035dab9ec239984e907205e805a4455c meta/changes.ttl +SHA1 225231ee459879a71bd63497c73a92cf51a6c36d meta/doap.ttl +SHA1 4ec77981db4ef6e38aa6b0871b6bec0092ad1186 meta/makefile.ttl +SHA1 b56593d4d41f4761ee8ade2ea0759670c504f549 t/01basic.t +SHA1 16920a5e9e70b8d96ddbdc46be5a578746307983 t/02html_4.t -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (GNU/Linux) -iEYEARECAAYFAk1SoyoACgkQzr+BKGoqfTmzZgCfQgm0jrr24iiK+Kl4slhWxetF -twIAnRKutRhhRmjpz5ZQA8NB5hLKKl0P -=KVpo +iEYEARECAAYFAk6ghIsACgkQzr+BKGoqfTlghACgpg+Ge5a38RM++Z1oUHyt2ufJ +4PcAoLULX6axWS+a9t6FT2dvS5YnbE4Q +=Cex3 -----END PGP SIGNATURE----- diff -Nru libhtml-html5-parser-perl-0.103/t/00sig.t libhtml-html5-parser-perl-0.107/t/00sig.t --- libhtml-html5-parser-perl-0.103/t/00sig.t 2010-04-09 16:16:53.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/t/00sig.t 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -use lib 'inc'; -use Test::More tests => 1; -use Test::Signature; -signature_ok(); diff -Nru libhtml-html5-parser-perl-0.103/t/01basic.t libhtml-html5-parser-perl-0.107/t/01basic.t --- libhtml-html5-parser-perl-0.103/t/01basic.t 2010-04-09 16:16:53.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/t/01basic.t 2011-10-07 07:58:39.000000000 +0000 @@ -7,7 +7,7 @@ foo

Foo -

  • Bart +
    • Bart< The inequality is 2<3 .

    Baz


    @@ -23,4 +23,4 @@ my @italics = $dom->getElementsByTagName('i'); my $lone_letter = $italics[1]; -is($lone_letter->textContent, 't', "parsing seems to follow HTML5 rules"); +is($lone_letter->textContent, 't<', "parsing seems to follow HTML5 rules"); diff -Nru libhtml-html5-parser-perl-0.103/t/02html_4.t libhtml-html5-parser-perl-0.107/t/02html_4.t --- libhtml-html5-parser-perl-0.103/t/02html_4.t 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/t/02html_4.t 2011-03-23 20:54:31.000000000 +0000 @@ -0,0 +1,19 @@ +use Test::More tests => 2; +use HTML::HTML5::Parser; + +my $parser = HTML::HTML5::Parser->new; + +my $html = <foo + +

    foo

    +HTML + +my $dom_4 = $parser->parse_string(''.$html); +my $dom_5 = $parser->parse_string(''.$html); + +my ($object_4) = $dom_4->getElementsByTagName('object'); +my ($object_5) = $dom_5->getElementsByTagName('object'); + +is($object_4->parentNode->tagName, 'head', 'HTML 4 allows in .'); +is($object_5->parentNode->tagName, 'body', 'HTML 5 disallows in .'); diff -Nru libhtml-html5-parser-perl-0.103/TODO libhtml-html5-parser-perl-0.107/TODO --- libhtml-html5-parser-perl-0.103/TODO 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-html5-parser-perl-0.107/TODO 2011-10-07 09:01:30.000000000 +0000 @@ -0,0 +1 @@ +* Nothing in particular