diff -Nru libhtml-scrubber-perl-0.15/Build.PL libhtml-scrubber-perl-0.17/Build.PL
--- libhtml-scrubber-perl-0.15/Build.PL 2015-10-10 14:01:34.000000000 +0000
+++ libhtml-scrubber-perl-0.17/Build.PL 1970-01-01 00:00:00.000000000 +0000
@@ -1,79 +0,0 @@
-
-# This file was automatically generated by Dist::Zilla::Plugin::ModuleBuild v5.039.
-use strict;
-use warnings;
-
-use Module::Build 0.28;
-
-
-my %module_build_args = (
- "build_requires" => {
- "Module::Build" => "0.28",
- "Test::CPAN::Meta" => 0,
- "Test::EOL" => 0,
- "Test::NoTabs" => 0
- },
- "configure_requires" => {
- "Module::Build" => "0.28"
- },
- "dist_abstract" => "Perl extension for scrubbing/sanitizing html",
- "dist_author" => [
- "Ruslan Zakirov ",
- "Nigel Metheringham ",
- "D. H. "
- ],
- "dist_name" => "HTML-Scrubber",
- "dist_version" => "0.15",
- "license" => "perl",
- "module_name" => "HTML::Scrubber",
- "recursive_test_files" => 1,
- "requires" => {
- "HTML::Entities" => 0,
- "HTML::Parser" => "3.47",
- "Scalar::Util" => 0,
- "perl" => "5.008",
- "strict" => 0,
- "warnings" => 0
- },
- "test_requires" => {
- "Carp" => 0,
- "File::Spec" => 0,
- "File::Temp" => 0,
- "IO::Handle" => 0,
- "IPC::Open3" => 0,
- "Test" => 0,
- "Test::Memory::Cycle" => 0,
- "Test::More" => "0.94",
- "blib" => "1.01",
- "utf8" => 0
- }
-);
-
-
-my %fallback_build_requires = (
- "Carp" => 0,
- "File::Spec" => 0,
- "File::Temp" => 0,
- "IO::Handle" => 0,
- "IPC::Open3" => 0,
- "Module::Build" => "0.28",
- "Test" => 0,
- "Test::CPAN::Meta" => 0,
- "Test::EOL" => 0,
- "Test::Memory::Cycle" => 0,
- "Test::More" => "0.94",
- "Test::NoTabs" => 0,
- "blib" => "1.01",
- "utf8" => 0
-);
-
-
-unless ( eval { Module::Build->VERSION(0.4004) } ) {
- delete $module_build_args{test_requires};
- $module_build_args{build_requires} = \%fallback_build_requires;
-}
-
-my $build = Module::Build->new(%module_build_args);
-
-
-$build->create_build_script;
diff -Nru libhtml-scrubber-perl-0.15/Changes libhtml-scrubber-perl-0.17/Changes
--- libhtml-scrubber-perl-0.15/Changes 2015-10-10 14:01:34.000000000 +0000
+++ libhtml-scrubber-perl-0.17/Changes 2017-06-27 13:03:51.000000000 +0000
@@ -1,5 +1,19 @@
Revision history for Perl extension HTML::Scrubber.
+0.17 2017-06-27 14:03:47+01:00 Europe/London
+
+0.16 2017-06-25 20:30:15+01:00 Europe/London (TRIAL RELEASE)
+ - Add missing testing prereqs (github pr#9 paultcochrane)
+ - Extend list of Perls in Travis config (github pr#10 paultcochrane)
+ - Avoid pod-spell test failure from ABSTRACT text (github pr#11 paultcochrane)
+ - Minor documentation fixes (github pr#12 paultcochrane)
+ - Purge trailing whitespace in Travis config (github pr#13 paultcochrane)
+ - Fix perlcritic issues (github pr#14 paultcochrane)
+ - Fix stale URLs (github pr#15 paultcochrane)
+ - Remove invalid end tags for empty elements (RT120384)
+ (github pr#16 paultcochrane)
+ - Rework Dist::Zilla config to be more portable
+
0.15 2015-10-10 15:01:31+01:00 Europe/London
- Minor spelling check avoidance tweaks
- Enforce comments to be well formed, to resolve potential security issue
diff -Nru libhtml-scrubber-perl-0.15/debian/changelog libhtml-scrubber-perl-0.17/debian/changelog
--- libhtml-scrubber-perl-0.15/debian/changelog 2015-10-31 20:02:55.000000000 +0000
+++ libhtml-scrubber-perl-0.17/debian/changelog 2017-11-11 13:34:29.000000000 +0000
@@ -1,3 +1,25 @@
+libhtml-scrubber-perl (0.17-1) unstable; urgency=medium
+
+ * Team upload
+
+ [ Salvatore Bonaccorso ]
+ * debian/control: Use HTTPS transport protocol for Vcs-Git URI
+
+ [ gregor herrmann ]
+ * debian/copyright: change Copyright-Format 1.0 URL to HTTPS.
+ * debian/upstream/metadata: change GitHub/CPAN URL(s) to HTTPS.
+ * debian/upstream/metadata: use HTTPS for GitHub URLs.
+
+ [ Florian Schlichting ]
+ * Import upstream version 0.17
+ * Update upstream metadata
+ * Drop build-dependency on M::B after switch to EU::MM
+ * Bump copyright years
+ * Add build-dependency on Test::Differences
+ * Declare compliance with Debian Policy 4.1.1
+
+ -- Florian Schlichting Sat, 11 Nov 2017 14:34:29 +0100
+
libhtml-scrubber-perl (0.15-1) unstable; urgency=medium
* Team upload.
diff -Nru libhtml-scrubber-perl-0.15/debian/control libhtml-scrubber-perl-0.17/debian/control
--- libhtml-scrubber-perl-0.15/debian/control 2015-10-31 20:02:55.000000000 +0000
+++ libhtml-scrubber-perl-0.17/debian/control 2017-11-11 13:33:58.000000000 +0000
@@ -4,17 +4,17 @@
Section: perl
Priority: optional
Build-Depends: debhelper (>= 9),
- libmodule-build-perl,
perl
Build-Depends-Indep: libhtml-parser-perl,
libtest-cpan-meta-perl,
+ libtest-differences-perl,
libtest-eol-perl,
libtest-memory-cycle-perl,
libtest-notabs-perl,
perl (>= 5.11.1) | libtest-simple-perl (>= 0.94)
-Standards-Version: 3.9.6
+Standards-Version: 4.1.1
Vcs-Browser: https://anonscm.debian.org/cgit/pkg-perl/packages/libhtml-scrubber-perl.git
-Vcs-Git: git://anonscm.debian.org/pkg-perl/packages/libhtml-scrubber-perl.git
+Vcs-Git: https://anonscm.debian.org/git/pkg-perl/packages/libhtml-scrubber-perl.git
Homepage: https://metacpan.org/release/HTML-Scrubber
Testsuite: autopkgtest-pkg-perl
diff -Nru libhtml-scrubber-perl-0.15/debian/copyright libhtml-scrubber-perl-0.17/debian/copyright
--- libhtml-scrubber-perl-0.15/debian/copyright 2015-10-31 20:02:55.000000000 +0000
+++ libhtml-scrubber-perl-0.17/debian/copyright 2017-11-10 14:37:48.000000000 +0000
@@ -1,4 +1,4 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: HTML-Scrubber
Upstream-Contact: Nigel Metheringham
Source: https://metacpan.org/release/HTML-Scrubber
@@ -6,7 +6,7 @@
Files: *
Copyright: 2003-2004, D. H.
2011-2013, Nigel Metheringham
- 2013-2015, Ruslan Zakirov
+ 2013-2017, Ruslan Zakirov
License: Artistic or GPL-1+
Files: debian/*
diff -Nru libhtml-scrubber-perl-0.15/debian/upstream/metadata libhtml-scrubber-perl-0.17/debian/upstream/metadata
--- libhtml-scrubber-perl-0.15/debian/upstream/metadata 2015-10-31 20:02:55.000000000 +0000
+++ libhtml-scrubber-perl-0.17/debian/upstream/metadata 2017-11-10 14:34:07.000000000 +0000
@@ -1,9 +1,7 @@
---
Archive: CPAN
-Bug-Database: http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Scrubber
-Bug-Submit: bug-HTML-Scrubber@rt.cpan.org
Contact: Ruslan Zakirov , Nigel Metheringham ,
D. H.
Name: HTML-Scrubber
-Repository: git://github.com/nigelm/html-scrubber.git
-Repository-Browse: http://github.com/nigelm/html-scrubber
+Repository: https://github.com/nigelm/html-scrubber.git
+Repository-Browse: https://github.com/nigelm/html-scrubber
diff -Nru libhtml-scrubber-perl-0.15/dist.ini libhtml-scrubber-perl-0.17/dist.ini
--- libhtml-scrubber-perl-0.15/dist.ini 1970-01-01 00:00:00.000000000 +0000
+++ libhtml-scrubber-perl-0.17/dist.ini 2017-06-27 13:03:51.000000000 +0000
@@ -0,0 +1,99 @@
+name = HTML-Scrubber
+author = Ruslan Zakirov
+author = Nigel Metheringham
+author = D. H.
+license = Perl_5
+copyright_holder = Ruslan Zakirov, Nigel Metheringham, 2003-2004 D. H.
+copyright_year = 2017
+
+[AutoPrereqs]
+skip = ^blib
+
+[Prereqs / TestRequires]
+Test::NoTabs = 0
+Pod::Coverage::TrustPod = 0
+Test::CPAN::Meta = 0
+Test::Differences = 0
+Test::EOL = 0
+Test::Kwalitee = 1.21
+Test::Memory::Cycle = 0
+Test::More = 0.88
+Test::PAUSE::Permissions = 0
+Test::Pod = 1.41
+Test::Pod::Coverage = 1.08
+
+;; -- Declare additional author deps for Dist::Zilla
+; authordep Pod::Elemental::Transformer::List
+; authordep Pod::Weaver::Plugin::WikiDoc
+; authordep Pod::Weaver::PluginBundle::Default
+; authordep Pod::Weaver::Section::Contributors
+; authordep Pod::Weaver::Section::Support
+
+[NextRelease] ; Mark up the next release in changes
+
+[@Git]
+changelog = Changes
+allow_dirty = dist.ini
+allow_dirty = Changes
+allow_dirty = README.md
+commit_msg = v%v%n%n%c
+tag_format = release/%v
+tag_message = %v
+push_to = origin
+
+;; -- Additional git
+[Git::GatherDir]
+[Git::NextVersion] ; Get the next version tag from git
+version_regexp = ^release/(\d+.\d+)$
+[Git::CheckFor::CorrectBranch] ; ensure on master branch for release
+[Git::Remote::Check] ; ensure our branch is ahead of remote
+[Git::Contributors] ; add contributors from the git logs
+[Git::CommitBuild]
+branch =
+release_branch = cpan
+
+;; -- Sets of additional tests we want to do as part of release
+[Test::Perl::Critic]
+[MetaTests]
+[PodCoverageTests]
+[OurPkgVersion]
+[Test::Kwalitee]
+[Test::EOL]
+[Test::PAUSE::Permissions] ; if doing a release make sure we have PAUSE perms
+[Test::NoTabs]
+
+;; -- Additional information
+[GithubMeta] ; Grab the repo metadata
+[PodWeaver] ; Mangle the pod a bit
+[CheckChangeLog] ; Make sure we have a change set
+[Authority]
+authority = cpan:NIGELM
+do_metadata = 1
+locate_comment = 1
+
+[@Starter]
+-remove = GatherDir ; we use [Git::GatherDir] instead
+
+; -- Put an autogenerated Markdown readme into the repo
+[ReadmeAnyFromPod / Markdown_Readme]
+type = markdown
+filename = README.md
+location = root ; do not include pod readmes in the build!
+
+[Meta::Contributors]
+
+;; -- Add decoration to the github readme
+[GitHubREADME::Badge]
+badges = travis
+;badges = coveralls
+;badges = gitter
+badges = cpants
+badges = issues
+badges = github_tag
+badges = license
+badges = version
+;badges = codecov
+;badges = gitlab_ci
+;badges = gitlab_cover
+
+;; - end
diff -Nru libhtml-scrubber-perl-0.15/INSTALL libhtml-scrubber-perl-0.17/INSTALL
--- libhtml-scrubber-perl-0.15/INSTALL 2015-10-10 14:01:34.000000000 +0000
+++ libhtml-scrubber-perl-0.17/INSTALL 1970-01-01 00:00:00.000000000 +0000
@@ -1,43 +0,0 @@
-This is the Perl distribution HTML-Scrubber.
-
-Installing HTML-Scrubber is straightforward.
-
-## Installation with cpanm
-
-If you have cpanm, you only need one line:
-
- % cpanm HTML::Scrubber
-
-If you are installing into a system-wide directory, you may need to pass the
-"-S" flag to cpanm, which uses sudo to install the module:
-
- % cpanm -S HTML::Scrubber
-
-## Installing with the CPAN shell
-
-Alternatively, if your CPAN shell is set up, you should just be able to do:
-
- % cpan HTML::Scrubber
-
-## Manual installation
-
-As a last resort, you can manually install it. Download the tarball, untar it,
-then build it:
-
- % perl Build.PL
- % ./Build && ./Build test
-
-Then install it:
-
- % ./Build install
-
-If you are installing into a system-wide directory, you may need to run:
-
- % sudo ./Build install
-
-## Documentation
-
-HTML-Scrubber documentation is available as POD.
-You can run perldoc from a shell to read the documentation:
-
- % perldoc HTML::Scrubber
diff -Nru libhtml-scrubber-perl-0.15/lib/HTML/Scrubber.pm libhtml-scrubber-perl-0.17/lib/HTML/Scrubber.pm
--- libhtml-scrubber-perl-0.15/lib/HTML/Scrubber.pm 2015-10-10 14:01:34.000000000 +0000
+++ libhtml-scrubber-perl-0.17/lib/HTML/Scrubber.pm 2017-06-27 13:03:51.000000000 +0000
@@ -1,6 +1,6 @@
package HTML::Scrubber;
-# ABSTRACT: Perl extension for scrubbing/sanitizing html
+# ABSTRACT: Perl extension for scrubbing/sanitizing HTML
use 5.008; # enforce minimum perl version of 5.8
@@ -9,10 +9,11 @@
use HTML::Parser 3.47 ();
use HTML::Entities;
use Scalar::Util ('weaken');
+use List::Util qw(any);
our ( @_scrub, @_scrub_fh );
-our $VERSION = '0.15'; # VERSION
+our $VERSION = '0.17'; # VERSION
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
# my my my my, these here to prevent foolishness like
@@ -20,6 +21,7 @@
(@_scrub) = ( \&_scrub, "self, event, tagname, attr, attrseq, text" );
(@_scrub_fh) = ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text" );
+
sub new {
my $package = shift;
my $p = HTML::Parser->new(
@@ -263,6 +265,12 @@
}
}
elsif ( $e eq 'end' ) {
+
+ # empty tags list taken from
+ # https://developer.mozilla.org/en/docs/Glossary/empty_element
+ my @empty_tags = qw(area base br col embed hr img input link meta param source track wbr);
+ return "" if $text ne '' && any { $t eq $_ } @empty_tags; # skip false closing empty tags
+
my $place = 0;
if ( exists $s->{_rules}->{$t} ) {
$place = 1 if $s->{_rules}->{$t};
@@ -290,7 +298,7 @@
$outstr .= $text if $s->{_process};
}
elsif ( $e eq 'text' or $e eq 'default' ) {
- $text =~ s/</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
+ $text =~ s/</g; #https://rt.cpan.org/Public/Ticket/Attachment/83958/10332/scrubber.patch
$text =~ s/>/>/g;
$outstr .= $text;
@@ -365,15 +373,17 @@
=pod
-=for stopwords html cpan callback homepage Perlbrew perltidy respository
+=encoding UTF-8
=head1 NAME
-HTML::Scrubber - Perl extension for scrubbing/sanitizing html
+HTML::Scrubber - Perl extension for scrubbing/sanitizing HTML
=head1 VERSION
-version 0.15
+version 0.17
+
+=for stopwords html cpan callback homepage Perlbrew perltidy repository
=head1 SYNOPSIS
@@ -405,20 +415,43 @@
If you want to "scrub" or "sanitize" html input in a reliable and flexible
fashion, then this module is for you.
-I wasn't satisfied with HTML::Sanitizer because it is based on
-HTML::TreeBuilder, so I thought I'd write something similar that works directly
-with HTML::Parser.
+I wasn't satisfied with L because it is based on
+L, so I thought I'd write something similar that works
+directly with L.
=head1 METHODS
First a note on documentation: just study the L below. It's
-all the documentation you could need
+all the documentation you could need.
Also, be sure to read all the comments as well as L.
If you're new to perl, good luck to you.
+=head2 new
+
+ my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] );
+
+Build a new L. The arguments are the initial values for the
+following directives:-
+
+=over 4
+
+=item * default
+
+=item * allow
+
+=item * deny
+
+=item * rules
+
+=item * process
+
+=item * comment
+
+=back
+
=head2 comment
warn "comments are ", $p->comment ? 'allowed' : 'not allowed';
@@ -435,9 +468,9 @@
if $p->script; # off by default
$p->script( 0 || 1 );
-B<**> Please note that this is implemented using HTML::Parser's ignore_elements
-function, so if C