--- libhtml-parser-perl-3.56.orig/util.c +++ libhtml-parser-perl-3.56/util.c @@ -95,14 +95,14 @@ ent_start = s; repl = 0; - if (*s == '#') { + if (s < end && *s == '#') { UV num = 0; UV prev = 0; int ok = 0; s++; - if (*s == 'x' || *s == 'X') { + if (s < end && (*s == 'x' || *s == 'X')) { s++; - while (*s) { + while (s < end) { char *tmp = strchr(PL_hexdigit, *s); if (!tmp) break; @@ -118,7 +118,7 @@ } } else { - while (isDIGIT(*s)) { + while (s < end && isDIGIT(*s)) { num = num * 10 + (*s - '0'); if (prev && num < prev) { /* overflow */ @@ -181,7 +181,7 @@ } else { char *ent_name = s; - while (isALNUM(*s)) + while (s < end && isALNUM(*s)) s++; if (ent_name != s && entity2char) { SV** svp; @@ -217,7 +217,7 @@ if (repl) { char *repl_allocated = 0; - if (*s == ';') + if (s < end && *s == ';') s++; t--; /* '&' already copied, undo it */ --- libhtml-parser-perl-3.56.orig/Parser.pm +++ libhtml-parser-perl-3.56/Parser.pm @@ -1048,9 +1048,10 @@ $p->parse_file(shift || die) || die $!; print "\n"; -More examples are found in the F directory of the C -distribution: the program C shows how you can edit all links -found in a document; the program C shows how to edit the text only; the +On a Debian box, more examples can be found in the +/usr/share/doc/libhtml-parser-perl/examples directory. +The program C shows how you can edit all links +found in a document and C how to edit the text only; the program C shows how you can strip out certain tags/elements and/or attributes; and the program C show how to obtain the plain text, but not any script/style content. --- libhtml-parser-perl-3.56.orig/t/entities.t +++ libhtml-parser-perl-3.56/t/entities.t @@ -1,6 +1,6 @@ use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric); -use Test::More tests => 12; +use Test::More tests => 13; $a = "Våre norske tegn bør æres"; @@ -66,6 +66,8 @@ is(decode_entities("'"), "'"); is(encode_entities("'", "'"), "'"); +is(decode_entities("Attention Homeοωnөrs...1ѕt Tімe Eνөг"), + "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}"); __END__ # Quoted from rfc1866.txt --- libhtml-parser-perl-3.56.orig/eg/hform +++ libhtml-parser-perl-3.56/eg/hform @@ -4,7 +4,7 @@ use HTML::PullParser (); use HTML::Entities qw(decode_entities); -use Data::Dump qw(dump); +use Data::Dumper qw(Dumper); my @FORM_TAGS = qw(form input textarea button select option); @@ -46,7 +46,7 @@ while (defined(my $t = $p->get_token)) { next unless ref $t; # skip text last if $t->[0] eq "/select"; - #print "select ", dump($t), "\n"; + #print "select ", Dumper($t), "\n"; if ($t->[0] eq "option") { my $value = $t->[1]->{value}; my $text = get_text($p, "/option"); @@ -80,4 +80,4 @@ } } -print dump(\@forms), "\n"; +print Dumper(\@forms), "\n"; --- libhtml-parser-perl-3.56.orig/eg/htextsub +++ libhtml-parser-perl-3.56/eg/htextsub @@ -1,5 +1,7 @@ #!/usr/bin/perl -w +# Shows how to edit text in an HTML document. + use strict; my $code = shift || usage(); $code = 'sub edit_print { local $_ = shift; ' . $code . '; print }'; --- libhtml-parser-perl-3.56.orig/debian/copyright +++ libhtml-parser-perl-3.56/debian/copyright @@ -0,0 +1,26 @@ +This is Debian GNU/Linux's prepackaged version of HTML::Parser. This +is a set of perl modules which provide access to the world wide web +via HTTP and HTML tools. + +It is currently maintained by Kenneth J. Pronovici , +and was originally packaged by Michael Alan Dorman . + +The original sources should always be available from the Comprehensive Perl +Archive Network (CPAN). Visit to find a CPAN +site near you. + +The libhtml-parser-perl copright is as follows: + + COPYRIGHT + + C 1995-2004 Gisle Aas. All rights reserved. + C 1999-2000 Michael A. Chase. All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the same terms as Perl itself. + +Perl is distributed under your choice of the GNU General Public License or +the Artistic License. On Debian GNU/Linux systems, the complete text of the +GNU General Public License can be found in `/usr/share/common-licenses/GPL' +and the Artistic Licence in `/usr/share/common-licenses/Artistic'. + --- libhtml-parser-perl-3.56.orig/debian/rules +++ libhtml-parser-perl-3.56/debian/rules @@ -0,0 +1,57 @@ +#!/usr/bin/make -f + +DEB_HOST_ARCH := $(shell dpkg-architecture -qDEB_HOST_ARCH) + +PERL=/usr/bin/perl + +binary: binary-arch + +binary-arch: build install + dh_testdir + dh_testroot + dh_installdocs TODO + chmod a+x eg/hform + dh_installexamples eg/hanchors eg/hform eg/hrefsub eg/hstrip eg/htext eg/htextsub eg/htitle + dh_installchangelogs Changes + dh_strip + dh_fixperms + dh_compress + dh_installdeb + dh_perl + dh_shlibdeps + dh_gencontrol + dh_md5sums + dh_builddeb + +binary-indep: + +install: build + dh_testdir + dh_testroot + dh_installdirs + $(MAKE) install INSTALLDIRS=vendor PREFIX=$(CURDIR)/debian/libhtml-parser-perl/usr + rm -rf debian/libhtml-parser-perl/usr/share/perl5 + +build: configure-stamp build-stamp +build-stamp: + dh_testdir + $(MAKE) OPTIMIZE="-O2 -g -Wall" +ifneq ($(DEB_HOST_ARCH),hppa) + $(MAKE) test +endif + touch build-stamp + +configure: configure-stamp +configure-stamp: + dh_testdir + $(PERL) Makefile.PL INSTALLDIRS=vendor + touch configure-stamp + +clean: + dh_testdir + dh_testroot + if [ -e Makefile ]; then $(MAKE) -i distclean; fi + rm -f build-stamp configure-stamp + dh_clean + +.PHONY: binary binary-arch binary-indep install build configure clean --- libhtml-parser-perl-3.56.orig/debian/changelog +++ libhtml-parser-perl-3.56/debian/changelog @@ -0,0 +1,463 @@ +libhtml-parser-perl (3.56-1ubuntu2.1) intrepid-security; urgency=low + + * SECURITY UPDATE: denial of service via incomplete SGML numeric + character reference + - util.c: check for end + - t/entities.t: add test + - http://github.com/gisle/html-parser/commit/b9aae1e43eb2c8e989510187cff0ba3e996f9a4c + - CVE-2009-3627 + + -- Marc Deslauriers Thu, 05 Nov 2009 10:15:19 -0500 + +libhtml-parser-perl (3.56-1ubuntu2) intrepid; urgency=low + + * Restore changes from 3.56-1build1. + + -- Colin Watson Sat, 14 Jun 2008 12:06:43 +0100 + +libhtml-parser-perl (3.56-1ubuntu1) intrepid; urgency=low + + * Disable the test suite on hppa for now; the threads test deadlocks. + (I've filed bug 239978.) + + -- Colin Watson Sat, 14 Jun 2008 12:04:53 +0100 + +libhtml-parser-perl (3.56-1build1) intrepid; urgency=low + + * Rebuild for perl 5.10. + + -- Matthias Klose Wed, 07 May 2008 14:17:43 +0200 + +libhtml-parser-perl (3.56-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Thu, 18 Jan 2007 11:50:21 +0100 + +libhtml-parser-perl (3.55-1) unstable; urgency=low + + * New upstream release + * debian/compat: + + increased to 5 + * debian/control: + + Standards-Version: upgraded to 3.7.2.1 without additional changes + + -- Krzysztof Krzyzaniak (eloy) Thu, 13 Jul 2006 10:24:58 +0200 + +libhtml-parser-perl (3.54-1) unstable; urgency=low + + * New upstream release (bugfix) + * debian/control: + - Standards-Version: upgraded to 3.7.0 without additional changes + + -- Krzysztof Krzyzaniak (eloy) Thu, 4 May 2006 10:03:39 +0200 + +libhtml-parser-perl (3.52-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Thu, 27 Apr 2006 13:28:01 +0200 + +libhtml-parser-perl (3.51-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Thu, 23 Mar 2006 11:08:33 +0100 + +libhtml-parser-perl (3.50-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Fri, 17 Feb 2006 10:10:06 +0100 + +libhtml-parser-perl (3.49-1) unstable; urgency=low + + * New upstream release + * debian/watched updated + * debian/control - added liburi-perl to dependencies + + -- Krzysztof Krzyzaniak (eloy) Wed, 8 Feb 2006 22:53:55 +0100 + +libhtml-parser-perl (3.48-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Mon, 5 Dec 2005 09:36:55 +0100 + +libhtml-parser-perl (3.47-1) unstable; urgency=low + + * New upstream release + + -- Krzysztof Krzyzaniak (eloy) Wed, 23 Nov 2005 22:40:52 +0100 + +libhtml-parser-perl (3.46-1) unstable; urgency=low + + [ Krzysztof Krzyzaniak ] + * New upstream release (closes: #336002) + * chmod on hform example (closes: #330090) + + -- Krzysztof Krzyzaniak (eloy) Fri, 28 Oct 2005 12:16:53 +0200 + +libhtml-parser-perl (3.45-4) unstable; urgency=low + + * debian/rules: Install todo using dh_installdocs. + * debian/control: Added me to Uploaders. + + -- Florian Ragwitz Fri, 30 Sep 2005 02:33:10 +0200 + +libhtml-parser-perl (3.45-3) unstable; urgency=low + + [ Krzysztof Krzyzaniak ] + * debian/control - catalyst team takes package + * Bumped standards version to 3.6.2 + * Don't intall README & TODO files since they are for install purposes + + -- Krzysztof Krzyzaniak (eloy) Thu, 29 Sep 2005 10:13:48 +0200 + +libhtml-parser-perl (3.45-2) unstable; urgency=low + + * Add comment saying what example/htextsub program does (closes: #299853). + - Asked submitter for suggestions on what should be added. + - Ended up taking very brief explanation from manpage. + - Open to adding something better if someone has a suggestion. + + -- Kenneth J. Pronovici Wed, 23 Mar 2005 17:27:44 -0600 + +libhtml-parser-perl (3.45-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Sun, 9 Jan 2005 11:45:23 -0600 + +libhtml-parser-perl (3.44-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Tue, 4 Jan 2005 10:51:59 -0600 + +libhtml-parser-perl (3.43-1) unstable; urgency=low + + * New upstream release (release early, release often, once again). + + -- Kenneth J. Pronovici Mon, 6 Dec 2004 17:01:38 -0600 + +libhtml-parser-perl (3.41-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Thu, 2 Dec 2004 10:40:08 -0600 + +libhtml-parser-perl (3.40-1) unstable; urgency=low + + * New upstream release. + * Removed Debian-specific code from Makefile.PL; no longer needed + because Unicode support is no longer optional. + + -- Kenneth J. Pronovici Tue, 30 Nov 2004 12:41:01 -0600 + +libhtml-parser-perl (3.38-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Thu, 11 Nov 2004 23:32:00 -0600 + +libhtml-parser-perl (3.36-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Mon, 5 Apr 2004 18:49:29 -0500 + +libhtml-parser-perl (3.35-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Sat, 13 Dec 2003 10:26:24 -0600 + +libhtml-parser-perl (3.34-1) unstable; urgency=low + + * New upstream release. + - Includes fix to segfault problem exposed by XMLTV (closes: #217616). + + -- Kenneth J. Pronovici Mon, 27 Oct 2003 17:48:36 -0600 + +libhtml-parser-perl (3.33-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Sat, 18 Oct 2003 13:17:45 -0500 + +libhtml-parser-perl (3.32-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Sun, 12 Oct 2003 13:43:50 -0500 + +libhtml-parser-perl (3.31-4) unstable; urgency=low + + * Add build dependency on perl (>= 5.8.1) to complete fix for #213529. + * Updated standards version to 3.6.1 per PTS recommendation. + + -- Kenneth J. Pronovici Wed, 1 Oct 2003 18:37:05 -0500 + +libhtml-parser-perl (3.31-3) unstable; urgency=low + + * Rebuild with Perl 5.8.1 to fix libwww-perl problem (closes: #213529). + + -- Kenneth J. Pronovici Wed, 1 Oct 2003 13:46:08 -0500 + +libhtml-parser-perl (3.31-2) unstable; urgency=low + + * Apply patch to fix 'hform' example (closes: #206546). + * Make hform example executable by default. + + -- Kenneth J. Pronovici Thu, 21 Aug 2003 21:36:01 -0500 + +libhtml-parser-perl (3.31-1) unstable; urgency=low + + * New upstream release (release early, release often... wow). + + -- Kenneth J. Pronovici Wed, 20 Aug 2003 10:59:21 -0500 + +libhtml-parser-perl (3.30-1) unstable; urgency=low + + * New upstream release. + + -- Kenneth J. Pronovici Mon, 18 Aug 2003 11:44:02 -0500 + +libhtml-parser-perl (3.29-1) unstable; urgency=low + + * New upstream release. + - The patch closing #195500 has been applied by upstream. + + -- Kenneth J. Pronovici Fri, 15 Aug 2003 08:33:34 -0500 + +libhtml-parser-perl (3.28-3) unstable; urgency=low + + * Fix for out-of-memory problem on 64-bit architectures (closes: #195500). + - Debian-only, since upstream has never replied to the CPAN bug report or my emails. + - I applied the patch directly from the Debian bug report, since it looked sensible. + - All of the regression tests still pass, at least on i386 and alpha. + + -- Kenneth J. Pronovici Sat, 12 Jul 2003 16:12:13 -0500 + +libhtml-parser-perl (3.28-2) unstable; urgency=low + + * Now reference "examples/" directory rather than "eg/" directory + in Parser.pm's POD manpage documentation (closes: #199707). + + -- Kenneth J. Pronovici Mon, 7 Jul 2003 13:21:56 -0500 + +libhtml-parser-perl (3.28-1) unstable; urgency=low + + * New upstream release, made separately from new maintainer release for clarity. + * Updated year in debian/copyright, to be consistent with README. + + -- Kenneth J. Pronovici Tue, 1 Jul 2003 21:05:51 -0500 + +libhtml-parser-perl (3.26-1) unstable; urgency=low + + * New maintainer. + * Updated debian/copyright. + * Changed section from 'interpreters' to 'perl'. + * Bumped standards version to 3.5.9. + * Added debian/compat file at level 4; now require debhelper (>= 4.1.0). + * Got rid of duplicated section/priority in debian/control. + * Reworked debian/rules to standardize with my other packages. + * Added OPTIMIZE="-O2 -g -Wall" to build rule. + * Removed "." from description synopsis to quiet Lintian. + * Fix typo in Makefile.PL (closes: #166806). + * Added debian/watch file. + + -- Kenneth J. Pronovici Tue, 1 Jul 2003 18:23:14 -0500 + +libhtml-parser-perl (3.26-0.1) unstable; urgency=low + + * NMU (inactive maintainer) + * New upstream version (closes: Bug#168084) + * Add pedantic boilerplate to debian/copyright (closes: Bug#157593) + * Add examples to doc/examples (closes: Bug#155776, Bug#138132) + + -- Ivan Kohler Wed, 6 Nov 2002 11:07:36 -0800 + +libhtml-parser-perl (3.25-1.1) unstable; urgency=low + + * NMU for perl 5.8. No changes except build-dep on perl 5.8. + * Corrected case of Build-Depends line. + + -- Joey Hess Wed, 31 Jul 2002 05:12:35 +0000 + +libhtml-parser-perl (3.25-1) unstable; urgency=low + + * New upstream release, fixes build problem. + + -- Michael Alan Dorman Sat, 12 May 2001 22:15:58 -0400 + +libhtml-parser-perl (3.24-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Wed, 9 May 2001 11:45:46 -0400 + +libhtml-parser-perl (3.23-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Tue, 8 May 2001 09:51:12 -0400 + +libhtml-parser-perl (3.22-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Wed, 18 Apr 2001 08:13:01 -0400 + +libhtml-parser-perl (3.21-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Fri, 13 Apr 2001 10:01:43 -0400 + +libhtml-parser-perl (3.20-1) unstable; urgency=low + + * New upstream release + * Close some old bugs (closes: bug#81351, bug#86442) + + -- Michael Alan Dorman Wed, 4 Apr 2001 18:07:09 -0400 + +libhtml-parser-perl (3.19-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Mon, 12 Mar 2001 08:11:48 -0500 + +libhtml-parser-perl (3.18-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Fri, 9 Mar 2001 16:15:06 -0500 + +libhtml-parser-perl (3.17-2) unstable; urgency=low + + * Damn, forgot to depend on debhelper (closes: bug#88394) + + -- Michael Alan Dorman Mon, 5 Mar 2001 04:54:54 -0800 + +libhtml-parser-perl (3.17-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Sat, 24 Feb 2001 14:35:22 -0800 + +libhtml-parser-perl (3.15-3) unstable; urgency=low + + * Fix build-depends and depends. + + -- Michael Alan Dorman Wed, 24 Jan 2001 16:54:19 -0500 + +libhtml-parser-perl (3.15-2) unstable; urgency=low + + * Fix debian/rules + * Recompiled with 5.6. Sorry to anyone using an embedded 5.005. (closes: bug#80707, bug#82651, bug#79601, bug#82942) + + -- Michael Alan Dorman Sun, 21 Jan 2001 20:23:07 -0500 + +libhtml-parser-perl (3.15-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Wed, 27 Dec 2000 09:43:39 -0500 + +libhtml-parser-perl (3.14-1) unstable; urgency=low + + * New upstream release + * Rebuild makes it 5.6 compatible (closes: bug#76847) + + -- Michael Alan Dorman Fri, 8 Dec 2000 09:24:05 -0500 + +libhtml-parser-perl (3.13-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Mon, 18 Sep 2000 07:58:13 -0400 + +libhtml-parser-perl (3.12-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Fri, 15 Sep 2000 08:31:35 -0400 + +libhtml-parser-perl (3.11-1) unstable; urgency=low + + * New upstream release + * Added new dependency on libhtml-tagset-perl + + -- Michael Alan Dorman Tue, 22 Aug 2000 09:43:43 -0400 + +libhtml-parser-perl (3.10-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Mon, 10 Jul 2000 14:30:50 -0400 + +libhtml-parser-perl (3.08-1) unstable; urgency=low + + * New upstream release + + -- Michael Alan Dorman Mon, 29 May 2000 11:48:14 -0400 + +libhtml-parser-perl (3.07-1) unstable; urgency=low + + * New upstream version. + * End of comment bug has been fixed upstream for a few versions (closes: bug#60678) + + -- Michael Alan Dorman Tue, 28 Mar 2000 11:03:02 -0500 + +libhtml-parser-perl (3.05-1) unstable; urgency=low + + * New upstream version. + + -- Michael Alan Dorman Sun, 23 Jan 2000 16:47:57 -0500 + +libhtml-parser-perl (3.04-1) unstable; urgency=low + + * Should have checked for a new version first... + + -- Michael Alan Dorman Sun, 16 Jan 2000 10:06:20 -0500 + +libhtml-parser-perl (3.02-2) unstable; urgency=low + + * Oops, forgot that it was changing to be arch-dep (closes: bug#55254) + + -- Michael Alan Dorman Sun, 16 Jan 2000 09:59:28 -0500 + +libhtml-parser-perl (3.02-1) unstable; urgency=low + + * New upstream version. Now a much faster XS module. + + -- Michael Alan Dorman Wed, 12 Jan 2000 20:19:39 -0500 + +libhtml-parser-perl (2.23-2) unstable; urgency=low + + * Modified for new perl packages. + * Modified to use debhelper. + + -- Michael Alan Dorman Sun, 4 Jul 1999 22:32:25 +0000 + +libhtml-parser-perl (2.23-1) unstable; urgency=low + + * New upstream release. + + -- Michael Alan Dorman Sat, 12 Jun 1999 16:24:24 -0400 + +libhtml-parser-perl (2.22-1) unstable; urgency=low + + * Changed Conflicts to Replaces to accomodate dpkg-http + + -- Michael Alan Dorman Fri, 26 Feb 1999 12:37:13 -0500 + +libhtml-parser-perl (2.20-1) unstable; urgency=low + + * New upstream release. + * Create new libhtml-parser-perl and libhtml-tree-perl to reflect + upstream split from libwww-perl. + + -- Michael Alan Dorman Fri, 7 Aug 1998 10:26:09 -0500 --- libhtml-parser-perl-3.56.orig/debian/control +++ libhtml-parser-perl-3.56/debian/control @@ -0,0 +1,19 @@ +Source: libhtml-parser-perl +Maintainer: Ubuntu Core Developers +XSBC-Original-Maintainer: Debian Catalyst Maintainers +Uploaders: Krzysztof Krzyzaniak (eloy) , Kenneth J. Pronovici , Florian Ragwitz +Section: perl +Priority: optional +Build-Depends: debhelper (>= 5.0.0), perl (>= 5.10), libhtml-tagset-perl, libtest-pod-perl, liburi-perl +Standards-Version: 3.7.2.1 + +Package: libhtml-parser-perl +Architecture: any +Depends: ${perl:Depends}, libhtml-tagset-perl, ${shlibs:Depends}, liburi-perl +Enhances: libwww-perl +Replaces: libwww-perl (<<5.36-0) +Conflicts: libwww-perl (<<5.36-0), libhtml-tree-perl (<<0.61-0) +Description: A collection of modules that parse HTML text documents + This is a collection of modules that parse HTML text documents. These + modules used to be part of the libwww-perl distribution, but are now + unbundled in order to facilitate a separate development track. --- libhtml-parser-perl-3.56.orig/debian/compat +++ libhtml-parser-perl-3.56/debian/compat @@ -0,0 +1 @@ +5 --- libhtml-parser-perl-3.56.orig/debian/watch +++ libhtml-parser-perl-3.56/debian/watch @@ -0,0 +1,2 @@ +version=2 +http://www.cpan.org/modules/by-module/HTML/HTML-Parser-([\d\.]+)\.tar\.gz