diff -Nru libhtml-scrubber-perl-0.15/Build.PL libhtml-scrubber-perl-0.17/Build.PL --- libhtml-scrubber-perl-0.15/Build.PL 2015-10-10 14:01:34.000000000 +0000 +++ libhtml-scrubber-perl-0.17/Build.PL 1970-01-01 00:00:00.000000000 +0000 @@ -1,79 +0,0 @@ - -# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.039. -use strict; -use warnings; - -use Module::Build 0.28; - - -my %module_build_args = ( - "build_requires" => { - "Module::Build" => "0.28", - "Test::CPAN::Meta" => 0, - "Test::EOL" => 0, - "Test::NoTabs" => 0 - }, - "configure_requires" => { - "Module::Build" => "0.28" - }, - "dist_abstract" => "Perl extension for scrubbing/sanitizing html", - "dist_author" => [ - "Ruslan Zakirov ", - "Nigel Metheringham ", - "D. H. " - ], - "dist_name" => "HTML-Scrubber", - "dist_version" => "0.15", - "license" => "perl", - "module_name" => "HTML::Scrubber", - "recursive_test_files" => 1, - "requires" => { - "HTML::Entities" => 0, - "HTML::Parser" => "3.47", - "Scalar::Util" => 0, - "perl" => "5.008", - "strict" => 0, - "warnings" => 0 - }, - "test_requires" => { - "Carp" => 0, - "File::Spec" => 0, - "File::Temp" => 0, - "IO::Handle" => 0, - "IPC::Open3" => 0, - "Test" => 0, - "Test::Memory::Cycle" => 0, - "Test::More" => "0.94", - "blib" => "1.01", - "utf8" => 0 - } -); - - -my %fallback_build_requires = ( - "Carp" => 0, - "File::Spec" => 0, - "File::Temp" => 0, - "IO::Handle" => 0, - "IPC::Open3" => 0, - "Module::Build" => "0.28", - "Test" => 0, - "Test::CPAN::Meta" => 0, - "Test::EOL" => 0, - "Test::Memory::Cycle" => 0, - "Test::More" => "0.94", - "Test::NoTabs" => 0, - "blib" => "1.01", - "utf8" => 0 -); - - -unless ( eval { Module::Build->VERSION(0.4004) } ) { - delete $module_build_args{test_requires}; - $module_build_args{build_requires} = \%fallback_build_requires; -} - -my $build = Module::Build->new(%module_build_args); - - -$build->create_build_script; diff -Nru libhtml-scrubber-perl-0.15/Changes libhtml-scrubber-perl-0.17/Changes --- libhtml-scrubber-perl-0.15/Changes 2015-10-10 14:01:34.000000000 +0000 +++ libhtml-scrubber-perl-0.17/Changes 2017-06-27 13:03:51.000000000 +0000 @@ -1,5 +1,19 @@ Revision history for Perl extension HTML::Scrubber. +0.17 2017-06-27 14:03:47+01:00 Europe/London + +0.16 2017-06-25 20:30:15+01:00 Europe/London (TRIAL RELEASE) + - Add missing testing prereqs (github pr#9 paultcochrane) + - Extend list of Perls in Travis config (github pr#10 paultcochrane) + - Avoid pod-spell test failure from ABSTRACT text (github pr#11 paultcochrane) + - Minor documentation fixes (github pr#12 paultcochrane) + - Purge trailing whitespace in Travis config (github pr#13 paultcochrane) + - Fix perlcritic issues (github pr#14 paultcochrane) + - Fix stale URLs (github pr#15 paultcochrane) + - Remove invalid end tags for empty elements (RT120384) + (github pr#16 paultcochrane) + - Rework Dist::Zilla config to be more portable + 0.15 2015-10-10 15:01:31+01:00 Europe/London - Minor spelling check avoidance tweaks - Enforce comments to be well formed, to resolve potential security issue diff -Nru libhtml-scrubber-perl-0.15/debian/changelog libhtml-scrubber-perl-0.17/debian/changelog --- libhtml-scrubber-perl-0.15/debian/changelog 2015-10-31 20:02:55.000000000 +0000 +++ libhtml-scrubber-perl-0.17/debian/changelog 2017-11-11 13:34:29.000000000 +0000 @@ -1,3 +1,25 @@ +libhtml-scrubber-perl (0.17-1) unstable; urgency=medium + + * Team upload + + [ Salvatore Bonaccorso ] + * debian/control: Use HTTPS transport protocol for Vcs-Git URI + + [ gregor herrmann ] + * debian/copyright: change Copyright-Format 1.0 URL to HTTPS. + * debian/upstream/metadata: change GitHub/CPAN URL(s) to HTTPS. + * debian/upstream/metadata: use HTTPS for GitHub URLs. + + [ Florian Schlichting ] + * Import upstream version 0.17 + * Update upstream metadata + * Drop build-dependency on M::B after switch to EU::MM + * Bump copyright years + * Add build-dependency on Test::Differences + * Declare compliance with Debian Policy 4.1.1 + + -- Florian Schlichting Sat, 11 Nov 2017 14:34:29 +0100 + libhtml-scrubber-perl (0.15-1) unstable; urgency=medium * Team upload. diff -Nru libhtml-scrubber-perl-0.15/debian/control libhtml-scrubber-perl-0.17/debian/control --- libhtml-scrubber-perl-0.15/debian/control 2015-10-31 20:02:55.000000000 +0000 +++ libhtml-scrubber-perl-0.17/debian/control 2017-11-11 13:33:58.000000000 +0000 @@ -4,17 +4,17 @@ Section: perl Priority: optional Build-Depends: debhelper (>= 9), - libmodule-build-perl, perl Build-Depends-Indep: libhtml-parser-perl, libtest-cpan-meta-perl, + libtest-differences-perl, libtest-eol-perl, libtest-memory-cycle-perl, libtest-notabs-perl, perl (>= 5.11.1) | libtest-simple-perl (>= 0.94) -Standards-Version: 3.9.6 +Standards-Version: 4.1.1 Vcs-Browser: https://anonscm.debian.org/cgit/pkg-perl/packages/libhtml-scrubber-perl.git -Vcs-Git: git://anonscm.debian.org/pkg-perl/packages/libhtml-scrubber-perl.git +Vcs-Git: https://anonscm.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git Homepage: https://metacpan.org/release/HTML-Scrubber Testsuite: autopkgtest-pkg-perl diff -Nru libhtml-scrubber-perl-0.15/debian/copyright libhtml-scrubber-perl-0.17/debian/copyright --- libhtml-scrubber-perl-0.15/debian/copyright 2015-10-31 20:02:55.000000000 +0000 +++ libhtml-scrubber-perl-0.17/debian/copyright 2017-11-10 14:37:48.000000000 +0000 @@ -1,4 +1,4 @@ -Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: HTML-Scrubber Upstream-Contact: Nigel Metheringham Source: https://metacpan.org/release/HTML-Scrubber @@ -6,7 +6,7 @@ Files: * Copyright: 2003-2004, D. H. 2011-2013, Nigel Metheringham - 2013-2015, Ruslan Zakirov + 2013-2017, Ruslan Zakirov License: Artistic or GPL-1+ Files: debian/* diff -Nru libhtml-scrubber-perl-0.15/debian/upstream/metadata libhtml-scrubber-perl-0.17/debian/upstream/metadata --- libhtml-scrubber-perl-0.15/debian/upstream/metadata 2015-10-31 20:02:55.000000000 +0000 +++ libhtml-scrubber-perl-0.17/debian/upstream/metadata 2017-11-10 14:34:07.000000000 +0000 @@ -1,9 +1,7 @@ --- Archive: CPAN -Bug-Database: http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Scrubber -Bug-Submit: bug-HTML-Scrubber@rt.cpan.org Contact: Ruslan Zakirov , Nigel Metheringham , D. H. Name: HTML-Scrubber -Repository: git://github.com/nigelm/html-scrubber.git -Repository-Browse: http://github.com/nigelm/html-scrubber +Repository: https://github.com/nigelm/html-scrubber.git +Repository-Browse: https://github.com/nigelm/html-scrubber diff -Nru libhtml-scrubber-perl-0.15/dist.ini libhtml-scrubber-perl-0.17/dist.ini --- libhtml-scrubber-perl-0.15/dist.ini 1970-01-01 00:00:00.000000000 +0000 +++ libhtml-scrubber-perl-0.17/dist.ini 2017-06-27 13:03:51.000000000 +0000 @@ -0,0 +1,99 @@ +name = HTML-Scrubber +author = Ruslan Zakirov +author = Nigel Metheringham +author = D. H. +license = Perl_5 +copyright_holder = Ruslan Zakirov, Nigel Metheringham, 2003-2004 D. H. +copyright_year = 2017 + +[AutoPrereqs] +skip = ^blib + +[Prereqs / TestRequires] +Test::NoTabs = 0 +Pod::Coverage::TrustPod = 0 +Test::CPAN::Meta = 0 +Test::Differences = 0 +Test::EOL = 0 +Test::Kwalitee = 1.21 +Test::Memory::Cycle = 0 +Test::More = 0.88 +Test::PAUSE::Permissions = 0 +Test::Pod = 1.41 +Test::Pod::Coverage = 1.08 + +;; -- Declare additional author deps for Dist::Zilla +; authordep Pod::Elemental::Transformer::List +; authordep Pod::Weaver::Plugin::WikiDoc +; authordep Pod::Weaver::PluginBundle::Default +; authordep Pod::Weaver::Section::Contributors +; authordep Pod::Weaver::Section::Support + +[NextRelease] ; Mark up the next release in changes + +[@Git] +changelog = Changes +allow_dirty = dist.ini +allow_dirty = Changes +allow_dirty = README.md +commit_msg = v%v%n%n%c +tag_format = release/%v +tag_message = %v +push_to = origin + +;; -- Additional git +[Git::GatherDir] +[Git::NextVersion] ; Get the next version tag from git +version_regexp = ^release/(\d+.\d+)$ +[Git::CheckFor::CorrectBranch] ; ensure on master branch for release +[Git::Remote::Check] ; ensure our branch is ahead of remote +[Git::Contributors] ; add contributors from the git logs +[Git::CommitBuild] +branch = +release_branch = cpan + +;; -- Sets of additional tests we want to do as part of release +[Test::Perl::Critic] +[MetaTests] +[PodCoverageTests] +[OurPkgVersion] +[Test::Kwalitee] +[Test::EOL] +[Test::PAUSE::Permissions] ; if doing a release make sure we have PAUSE perms +[Test::NoTabs] + +;; -- Additional information +[GithubMeta] ; Grab the repo metadata +[PodWeaver] ; Mangle the pod a bit +[CheckChangeLog] ; Make sure we have a change set +[Authority] +authority = cpan:NIGELM +do_metadata = 1 +locate_comment = 1 + +[@Starter] +-remove = GatherDir ; we use [Git::GatherDir] instead + +; -- Put an autogenerated Markdown readme into the repo +[ReadmeAnyFromPod / Markdown_Readme] +type = markdown +filename = README.md +location = root ; do not include pod readmes in the build! + +[Meta::Contributors] + +;; -- Add decoration to the github readme +[GitHubREADME::Badge] +badges = travis +;badges = coveralls +;badges = gitter +badges = cpants +badges = issues +badges = github_tag +badges = license +badges = version +;badges = codecov +;badges = gitlab_ci +;badges = gitlab_cover + +;; - end diff -Nru libhtml-scrubber-perl-0.15/INSTALL libhtml-scrubber-perl-0.17/INSTALL --- libhtml-scrubber-perl-0.15/INSTALL 2015-10-10 14:01:34.000000000 +0000 +++ libhtml-scrubber-perl-0.17/INSTALL 1970-01-01 00:00:00.000000000 +0000 @@ -1,43 +0,0 @@ -This is the Perl distribution HTML-Scrubber. - -Installing HTML-Scrubber is straightforward. - -## Installation with cpanm - -If you have cpanm, you only need one line: - - % cpanm HTML::Scrubber - -If you are installing into a system-wide directory, you may need to pass the -"-S" flag to cpanm, which uses sudo to install the module: - - % cpanm -S HTML::Scrubber - -## Installing with the CPAN shell - -Alternatively, if your CPAN shell is set up, you should just be able to do: - - % cpan HTML::Scrubber - -## Manual installation - -As a last resort, you can manually install it. Download the tarball, untar it, -then build it: - - % perl Build.PL - % ./Build && ./Build test - -Then install it: - - % ./Build install - -If you are installing into a system-wide directory, you may need to run: - - % sudo ./Build install - -## Documentation - -HTML-Scrubber documentation is available as POD. -You can run perldoc from a shell to read the documentation: - - % perldoc HTML::Scrubber diff -Nru libhtml-scrubber-perl-0.15/lib/HTML/Scrubber.pm libhtml-scrubber-perl-0.17/lib/HTML/Scrubber.pm --- libhtml-scrubber-perl-0.15/lib/HTML/Scrubber.pm 2015-10-10 14:01:34.000000000 +0000 +++ libhtml-scrubber-perl-0.17/lib/HTML/Scrubber.pm 2017-06-27 13:03:51.000000000 +0000 @@ -1,6 +1,6 @@ package HTML::Scrubber; -# ABSTRACT: Perl extension for scrubbing/sanitizing html +# ABSTRACT: Perl extension for scrubbing/sanitizing HTML use 5.008; # enforce minimum perl version of 5.8 @@ -9,10 +9,11 @@ use HTML::Parser 3.47 (); use HTML::Entities; use Scalar::Util ('weaken'); +use List::Util qw(any); our ( @_scrub, @_scrub_fh ); -our $VERSION = '0.15'; # VERSION +our $VERSION = '0.17'; # VERSION our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY # my my my my, these here to prevent foolishness like @@ -20,6 +21,7 @@ (@_scrub) = ( \&_scrub, "self, event, tagname, attr, attrseq, text" ); (@_scrub_fh) = ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text" ); + sub new { my $package = shift; my $p = HTML::Parser->new( @@ -263,6 +265,12 @@ } } elsif ( $e eq 'end' ) { + + # empty tags list taken from + # https://developer.mozilla.org/en/docs/Glossary/empty_element + my @empty_tags = qw(area base br col embed hr img input link meta param source track wbr); + return "" if $text ne '' && any { $t eq $_ } @empty_tags; # skip false closing empty tags + my $place = 0; if ( exists $s->{_rules}->{$t} ) { $place = 1 if $s->{_rules}->{$t}; @@ -290,7 +298,7 @@ $outstr .= $text if $s->{_process}; } elsif ( $e eq 'text' or $e eq 'default' ) { - $text =~ s//>/g; $outstr .= $text; @@ -365,15 +373,17 @@ =pod -=for stopwords html cpan callback homepage Perlbrew perltidy respository +=encoding UTF-8 =head1 NAME -HTML::Scrubber - Perl extension for scrubbing/sanitizing html +HTML::Scrubber - Perl extension for scrubbing/sanitizing HTML =head1 VERSION -version 0.15 +version 0.17 + +=for stopwords html cpan callback homepage Perlbrew perltidy repository =head1 SYNOPSIS @@ -405,20 +415,43 @@ If you want to "scrub" or "sanitize" html input in a reliable and flexible fashion, then this module is for you. -I wasn't satisfied with HTML::Sanitizer because it is based on -HTML::TreeBuilder, so I thought I'd write something similar that works directly -with HTML::Parser. +I wasn't satisfied with L because it is based on +L, so I thought I'd write something similar that works +directly with L. =head1 METHODS First a note on documentation: just study the L below. It's -all the documentation you could need +all the documentation you could need. Also, be sure to read all the comments as well as L. If you're new to perl, good luck to you. +=head2 new + + my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] ); + +Build a new L. The arguments are the initial values for the +following directives:- + +=over 4 + +=item * default + +=item * allow + +=item * deny + +=item * rules + +=item * process + +=item * comment + +=back + =head2 comment warn "comments are ", $p->comment ? 'allowed' : 'not allowed'; @@ -435,9 +468,9 @@ if $p->script; # off by default $p->script( 0 || 1 ); -B<**> Please note that this is implemented using HTML::Parser's ignore_elements -function, so if C