diff -Nru pypdf2-1.23+git20141008/CHANGELOG pypdf2-1.25/CHANGELOG --- pypdf2-1.23+git20141008/CHANGELOG 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/CHANGELOG 2015-07-16 10:58:34.000000000 +0000 @@ -1,3 +1,98 @@ +Version 1.25, 2015-07-07 +------------------------ + +BUGFIXES: + + - Added Python 3 algorithm for ASCII85Decode. Fixes issue when + reading reportlab-generated files with Py 3 (jerickbixly) + + - Recognize more escape sequence which would otherwise throw an + exception (manuelzs, robertsoakes) + + - Fixed overflow error in generic.py. Occurred + when reading a too-large int in Python 2 (by Raja Jamwal) + + - Allow access to files which were encrypted with an empty + password. Previously threw a "File has not been decrypted" + exception (Elena Williams) + + - Do not attempt to decode an empty data stream. Previously + would cause an error in decode algorithms (vladir) + + - Fixed some type issues specific to Py 2 or Py 3 + + - Fix issue when stream data begins with whitespace (soloma83) + + - Recognize abbreviated filter names (AlmightyOatmeal and + Matthew Weiss) + + - Copy decryption key from PdfFileReader to PdfFileMerger. + Allows usage of PdfFileMerger with encrypted files (twolfson) + + - Fixed bug which occurred when a NameObject is present at end + of a file stream. Threw a "Stream has ended unexpectedly" + exception (speedplane) + +FEATURES: + + - Initial work on a test suite; to be expanded in future. + Tests and Resources directory added, README updated (robertsoakes) + + - Added document cloning methods to PdfFileWriter: + appendPagesFromReader, cloneReaderDocumentRoot, and + cloneDocumentFromReader. See official documentation (robertsoakes) + + - Added method for writing to form fields: updatePageFormFieldValues. + This will be enhanced in the future. See official documentation + (robertsoakes) + + - New addAttachment method. See documentation. Support for adding + and extracting embedded files to be enhanced in the future + (moshekaplan) + + - Added methods to get page number of given PageObject or + Destination: getPageNumber and getDestinationPageNumber. + See documentation (mozbugbox) + +OTHER ENHANCEMENTS: + + - Enhanced type handling (Brent Amrhein) + + - Enhanced exception handling in NameObject (sbywater) + + - Enhanced extractText method output (peircej) + + - Better exception handling + + - Enhanced regex usage in NameObject class (speedplane) + + +Version 1.24, 2014-12-31 +------------------------ + + - Bugfixes for reading files in Python 3 (by Anthony Tuininga and + pqqp) + + - Appropriate errors are now raised instead of infinite loops (by + naure and Cyrus Vafadari) + + - Bugfix for parsing number tokens with leading spaces (by Maxim + Kamenkov) + + - Don't crash on bad /Outlines reference (by eshellman) + + - Conform tabs/spaces and blank lines to PEP 8 standards + + - Utilize the readUntilRegex method when reading Number Objects + (by Brendan Jurd) + + - More bugfixes for Python 3 and clearer exception handling + + - Fixed encoding issue in merger (with eshellman) + + - Created separate folder for scripts + + Version 1.23, 2014-08-11 ------------------------ @@ -100,13 +195,13 @@ a custom implementation. - Fix NumberObject and NameObject constructors for compatibility with PyPy - (Rdiger Jungbeck, Xavier Dupr, shezadkhan137, Steven Witham) + (Rüdiger Jungbeck, Xavier Dupré, shezadkhan137, Steven Witham) - Utilize utils.Str in pdf.py and pagerange.py to resolve type issues (by egbutter) - Improvements in implementing StringIO for Python 2 and BytesIO for - Python 3 (by Xavier Dupr) + Python 3 (by Xavier Dupré) - Added /x00 to Whitespaces, defined utils.WHITESPACES to clarify code (by Maxim Kamenkov) @@ -149,7 +244,7 @@ - Bugfix to handle nested bookmarks correctly (by Jamie Lentin) - New methods removeImages() and removeText() available for PdfFileWriter - (by Tien Ha) + (by Tien Haï) - Exception handling for illegal characters in Name Objects diff -Nru pypdf2-1.23+git20141008/debian/changelog pypdf2-1.25/debian/changelog --- pypdf2-1.23+git20141008/debian/changelog 2014-10-25 21:00:12.000000000 +0000 +++ pypdf2-1.25/debian/changelog 2015-07-16 11:00:55.000000000 +0000 @@ -1,3 +1,9 @@ +pypdf2 (1.25-1) trusty; urgency=medium + + * New upstream release. + + -- REMnux Distribution (https://REMnux.org/) Thu, 16 Jul 2015 07:00:00 -0400 + pypdf2 (1.23+git20141008-1) unstable; urgency=low * Upstream snapshot with various bug fixes. diff -Nru pypdf2-1.23+git20141008/debian/control pypdf2-1.25/debian/control --- pypdf2-1.23+git20141008/debian/control 2014-10-25 21:00:18.000000000 +0000 +++ pypdf2-1.25/debian/control 2015-07-16 11:01:55.000000000 +0000 @@ -12,8 +12,8 @@ Architecture: all Depends: ${misc:Depends}, ${python:Depends} Provides: ${python:Provides} -Breaks: python-pypdf (<< 1.23-3.1~) -Replaces: python-pypdf (<< 1.23-3.1~) +Breaks: python-pypdf (<< 1.23+git20141008-1~) +Replaces: python-pypdf (<< 1.23+git20141008-1~) Description: Pure-Python library built as a PDF toolkit (Python 2) A Pure-Python library built as a PDF toolkit. It is capable of: - extracting document information (title, author, ...), diff -Nru pypdf2-1.23+git20141008/.git/config pypdf2-1.25/.git/config --- pypdf2-1.23+git20141008/.git/config 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/config 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,11 @@ +[core] + repositoryformatversion = 0 + filemode = true + bare = false + logallrefupdates = true +[remote "origin"] + url = https://github.com/mstamy2/PyPDF2.git + fetch = +refs/heads/*:refs/remotes/origin/* +[branch "master"] + remote = origin + merge = refs/heads/master diff -Nru pypdf2-1.23+git20141008/.git/description pypdf2-1.25/.git/description --- pypdf2-1.23+git20141008/.git/description 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/description 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1 @@ +Unnamed repository; edit this file 'description' to name the repository. diff -Nru pypdf2-1.23+git20141008/.git/HEAD pypdf2-1.25/.git/HEAD --- pypdf2-1.23+git20141008/.git/HEAD 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/HEAD 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +ref: refs/heads/master diff -Nru pypdf2-1.23+git20141008/.git/hooks/applypatch-msg.sample pypdf2-1.25/.git/hooks/applypatch-msg.sample --- pypdf2-1.23+git20141008/.git/hooks/applypatch-msg.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/applypatch-msg.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,15 @@ +#!/bin/sh +# +# An example hook script to check the commit log message taken by +# applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. The hook is +# allowed to edit the commit message file. +# +# To enable this hook, rename this file to "applypatch-msg". + +. git-sh-setup +test -x "$GIT_DIR/hooks/commit-msg" && + exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"} +: diff -Nru pypdf2-1.23+git20141008/.git/hooks/commit-msg.sample pypdf2-1.25/.git/hooks/commit-msg.sample --- pypdf2-1.23+git20141008/.git/hooks/commit-msg.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/commit-msg.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,24 @@ +#!/bin/sh +# +# An example hook script to check the commit log message. +# Called by "git commit" with one argument, the name of the file +# that has the commit message. The hook should exit with non-zero +# status after issuing an appropriate message if it wants to stop the +# commit. The hook is allowed to edit the commit message file. +# +# To enable this hook, rename this file to "commit-msg". + +# Uncomment the below to add a Signed-off-by line to the message. +# Doing this in a hook is a bad idea in general, but the prepare-commit-msg +# hook is more suited to it. +# +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" + +# This example catches duplicate Signed-off-by lines. + +test "" = "$(grep '^Signed-off-by: ' "$1" | + sort | uniq -c | sed -e '/^[ ]*1[ ]/d')" || { + echo >&2 Duplicate Signed-off-by lines. + exit 1 +} diff -Nru pypdf2-1.23+git20141008/.git/hooks/post-update.sample pypdf2-1.25/.git/hooks/post-update.sample --- pypdf2-1.23+git20141008/.git/hooks/post-update.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/post-update.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,8 @@ +#!/bin/sh +# +# An example hook script to prepare a packed repository for use over +# dumb transports. +# +# To enable this hook, rename this file to "post-update". + +exec git update-server-info diff -Nru pypdf2-1.23+git20141008/.git/hooks/pre-applypatch.sample pypdf2-1.25/.git/hooks/pre-applypatch.sample --- pypdf2-1.23+git20141008/.git/hooks/pre-applypatch.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/pre-applypatch.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,14 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed +# by applypatch from an e-mail message. +# +# The hook should exit with non-zero status after issuing an +# appropriate message if it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-applypatch". + +. git-sh-setup +test -x "$GIT_DIR/hooks/pre-commit" && + exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"} +: diff -Nru pypdf2-1.23+git20141008/.git/hooks/pre-commit.sample pypdf2-1.25/.git/hooks/pre-commit.sample --- pypdf2-1.23+git20141008/.git/hooks/pre-commit.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/pre-commit.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,49 @@ +#!/bin/sh +# +# An example hook script to verify what is about to be committed. +# Called by "git commit" with no arguments. The hook should +# exit with non-zero status after issuing an appropriate message if +# it wants to stop the commit. +# +# To enable this hook, rename this file to "pre-commit". + +if git rev-parse --verify HEAD >/dev/null 2>&1 +then + against=HEAD +else + # Initial commit: diff against an empty tree object + against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 +fi + +# If you want to allow non-ASCII filenames set this variable to true. +allownonascii=$(git config --bool hooks.allownonascii) + +# Redirect output to stderr. +exec 1>&2 + +# Cross platform projects tend to avoid non-ASCII filenames; prevent +# them from being added to the repository. We exploit the fact that the +# printable range starts at the space character and ends with tilde. +if [ "$allownonascii" != "true" ] && + # Note that the use of brackets around a tr range is ok here, (it's + # even required, for portability to Solaris 10's /usr/bin/tr), since + # the square bracket bytes happen to fall in the designated range. + test $(git diff --cached --name-only --diff-filter=A -z $against | + LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 +then + cat <<\EOF +Error: Attempt to add a non-ASCII file name. + +This can cause problems if you want to work with people on other platforms. + +To be portable it is advisable to rename the file. + +If you know what you are doing you can disable this check using: + + git config hooks.allownonascii true +EOF + exit 1 +fi + +# If there are whitespace errors, print the offending file names and fail. +exec git diff-index --check --cached $against -- diff -Nru pypdf2-1.23+git20141008/.git/hooks/prepare-commit-msg.sample pypdf2-1.25/.git/hooks/prepare-commit-msg.sample --- pypdf2-1.23+git20141008/.git/hooks/prepare-commit-msg.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/prepare-commit-msg.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,36 @@ +#!/bin/sh +# +# An example hook script to prepare the commit log message. +# Called by "git commit" with the name of the file that has the +# commit message, followed by the description of the commit +# message's source. The hook's purpose is to edit the commit +# message file. If the hook fails with a non-zero status, +# the commit is aborted. +# +# To enable this hook, rename this file to "prepare-commit-msg". + +# This hook includes three examples. The first comments out the +# "Conflicts:" part of a merge commit. +# +# The second includes the output of "git diff --name-status -r" +# into the message, just before the "git status" output. It is +# commented because it doesn't cope with --amend or with squashed +# commits. +# +# The third example adds a Signed-off-by line to the message, that can +# still be edited. This is rarely a good idea. + +case "$2,$3" in + merge,) + /usr/bin/perl -i.bak -ne 's/^/# /, s/^# #/#/ if /^Conflicts/ .. /#/; print' "$1" ;; + +# ,|template,) +# /usr/bin/perl -i.bak -pe ' +# print "\n" . `git diff --cached --name-status -r` +# if /^#/ && $first++ == 0' "$1" ;; + + *) ;; +esac + +# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p') +# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1" diff -Nru pypdf2-1.23+git20141008/.git/hooks/pre-push.sample pypdf2-1.25/.git/hooks/pre-push.sample --- pypdf2-1.23+git20141008/.git/hooks/pre-push.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/pre-push.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,54 @@ +#!/bin/sh + +# An example hook script to verify what is about to be pushed. Called by "git +# push" after it has checked the remote status, but before anything has been +# pushed. If this script exits with a non-zero status nothing will be pushed. +# +# This hook is called with the following parameters: +# +# $1 -- Name of the remote to which the push is being done +# $2 -- URL to which the push is being done +# +# If pushing without using a named remote those arguments will be equal. +# +# Information about the commits which are being pushed is supplied as lines to +# the standard input in the form: +# +# +# +# This sample shows how to prevent push of commits where the log message starts +# with "WIP" (work in progress). + +remote="$1" +url="$2" + +z40=0000000000000000000000000000000000000000 + +IFS=' ' +while read local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = $z40 ] + then + # Handle delete + : + else + if [ "$remote_sha" = $z40 ] + then + # New branch, examine all commits + range="$local_sha" + else + # Update to existing branch, examine new commits + range="$remote_sha..$local_sha" + fi + + # Check for WIP commit + commit=`git rev-list -n 1 --grep '^WIP' "$range"` + if [ -n "$commit" ] + then + echo "Found WIP commit in $local_ref, not pushing" + exit 1 + fi + fi +done + +exit 0 diff -Nru pypdf2-1.23+git20141008/.git/hooks/pre-rebase.sample pypdf2-1.25/.git/hooks/pre-rebase.sample --- pypdf2-1.23+git20141008/.git/hooks/pre-rebase.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/pre-rebase.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,169 @@ +#!/bin/sh +# +# Copyright (c) 2006, 2008 Junio C Hamano +# +# The "pre-rebase" hook is run just before "git rebase" starts doing +# its job, and can prevent the command from running by exiting with +# non-zero status. +# +# The hook is called with the following parameters: +# +# $1 -- the upstream the series was forked from. +# $2 -- the branch being rebased (or empty when rebasing the current branch). +# +# This sample shows how to prevent topic branches that are already +# merged to 'next' branch from getting rebased, because allowing it +# would result in rebasing already published history. + +publish=next +basebranch="$1" +if test "$#" = 2 +then + topic="refs/heads/$2" +else + topic=`git symbolic-ref HEAD` || + exit 0 ;# we do not interrupt rebasing detached HEAD +fi + +case "$topic" in +refs/heads/??/*) + ;; +*) + exit 0 ;# we do not interrupt others. + ;; +esac + +# Now we are dealing with a topic branch being rebased +# on top of master. Is it OK to rebase it? + +# Does the topic really exist? +git show-ref -q "$topic" || { + echo >&2 "No such branch $topic" + exit 1 +} + +# Is topic fully merged to master? +not_in_master=`git rev-list --pretty=oneline ^master "$topic"` +if test -z "$not_in_master" +then + echo >&2 "$topic is fully merged to master; better remove it." + exit 1 ;# we could allow it, but there is no point. +fi + +# Is topic ever merged to next? If so you should not be rebasing it. +only_next_1=`git rev-list ^master "^$topic" ${publish} | sort` +only_next_2=`git rev-list ^master ${publish} | sort` +if test "$only_next_1" = "$only_next_2" +then + not_in_topic=`git rev-list "^$topic" master` + if test -z "$not_in_topic" + then + echo >&2 "$topic is already up-to-date with master" + exit 1 ;# we could allow it, but there is no point. + else + exit 0 + fi +else + not_in_next=`git rev-list --pretty=oneline ^${publish} "$topic"` + /usr/bin/perl -e ' + my $topic = $ARGV[0]; + my $msg = "* $topic has commits already merged to public branch:\n"; + my (%not_in_next) = map { + /^([0-9a-f]+) /; + ($1 => 1); + } split(/\n/, $ARGV[1]); + for my $elem (map { + /^([0-9a-f]+) (.*)$/; + [$1 => $2]; + } split(/\n/, $ARGV[2])) { + if (!exists $not_in_next{$elem->[0]}) { + if ($msg) { + print STDERR $msg; + undef $msg; + } + print STDERR " $elem->[1]\n"; + } + } + ' "$topic" "$not_in_next" "$not_in_master" + exit 1 +fi + +<<\DOC_END + +This sample hook safeguards topic branches that have been +published from being rewound. + +The workflow assumed here is: + + * Once a topic branch forks from "master", "master" is never + merged into it again (either directly or indirectly). + + * Once a topic branch is fully cooked and merged into "master", + it is deleted. If you need to build on top of it to correct + earlier mistakes, a new topic branch is created by forking at + the tip of the "master". This is not strictly necessary, but + it makes it easier to keep your history simple. + + * Whenever you need to test or publish your changes to topic + branches, merge them into "next" branch. + +The script, being an example, hardcodes the publish branch name +to be "next", but it is trivial to make it configurable via +$GIT_DIR/config mechanism. + +With this workflow, you would want to know: + +(1) ... if a topic branch has ever been merged to "next". Young + topic branches can have stupid mistakes you would rather + clean up before publishing, and things that have not been + merged into other branches can be easily rebased without + affecting other people. But once it is published, you would + not want to rewind it. + +(2) ... if a topic branch has been fully merged to "master". + Then you can delete it. More importantly, you should not + build on top of it -- other people may already want to + change things related to the topic as patches against your + "master", so if you need further changes, it is better to + fork the topic (perhaps with the same name) afresh from the + tip of "master". + +Let's look at this example: + + o---o---o---o---o---o---o---o---o---o "next" + / / / / + / a---a---b A / / + / / / / + / / c---c---c---c B / + / / / \ / + / / / b---b C \ / + / / / / \ / + ---o---o---o---o---o---o---o---o---o---o---o "master" + + +A, B and C are topic branches. + + * A has one fix since it was merged up to "next". + + * B has finished. It has been fully merged up to "master" and "next", + and is ready to be deleted. + + * C has not merged to "next" at all. + +We would want to allow C to be rebased, refuse A, and encourage +B to be deleted. + +To compute (1): + + git rev-list ^master ^topic next + git rev-list ^master next + + if these match, topic has not merged in next at all. + +To compute (2): + + git rev-list master..topic + + if this is empty, it is fully merged to "master". + +DOC_END diff -Nru pypdf2-1.23+git20141008/.git/hooks/update.sample pypdf2-1.25/.git/hooks/update.sample --- pypdf2-1.23+git20141008/.git/hooks/update.sample 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/hooks/update.sample 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,128 @@ +#!/bin/sh +# +# An example hook script to blocks unannotated tags from entering. +# Called by "git receive-pack" with arguments: refname sha1-old sha1-new +# +# To enable this hook, rename this file to "update". +# +# Config +# ------ +# hooks.allowunannotated +# This boolean sets whether unannotated tags will be allowed into the +# repository. By default they won't be. +# hooks.allowdeletetag +# This boolean sets whether deleting tags will be allowed in the +# repository. By default they won't be. +# hooks.allowmodifytag +# This boolean sets whether a tag may be modified after creation. By default +# it won't be. +# hooks.allowdeletebranch +# This boolean sets whether deleting branches will be allowed in the +# repository. By default they won't be. +# hooks.denycreatebranch +# This boolean sets whether remotely creating branches will be denied +# in the repository. By default this is allowed. +# + +# --- Command line +refname="$1" +oldrev="$2" +newrev="$3" + +# --- Safety check +if [ -z "$GIT_DIR" ]; then + echo "Don't run this script from the command line." >&2 + echo " (if you want, you could supply GIT_DIR then run" >&2 + echo " $0 )" >&2 + exit 1 +fi + +if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +# --- Config +allowunannotated=$(git config --bool hooks.allowunannotated) +allowdeletebranch=$(git config --bool hooks.allowdeletebranch) +denycreatebranch=$(git config --bool hooks.denycreatebranch) +allowdeletetag=$(git config --bool hooks.allowdeletetag) +allowmodifytag=$(git config --bool hooks.allowmodifytag) + +# check for no description +projectdesc=$(sed -e '1q' "$GIT_DIR/description") +case "$projectdesc" in +"Unnamed repository"* | "") + echo "*** Project description file hasn't been set" >&2 + exit 1 + ;; +esac + +# --- Check types +# if $newrev is 0000...0000, it's a commit to delete a ref. +zero="0000000000000000000000000000000000000000" +if [ "$newrev" = "$zero" ]; then + newrev_type=delete +else + newrev_type=$(git cat-file -t $newrev) +fi + +case "$refname","$newrev_type" in + refs/tags/*,commit) + # un-annotated tag + short_refname=${refname##refs/tags/} + if [ "$allowunannotated" != "true" ]; then + echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2 + echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2 + exit 1 + fi + ;; + refs/tags/*,delete) + # delete tag + if [ "$allowdeletetag" != "true" ]; then + echo "*** Deleting a tag is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/tags/*,tag) + # annotated tag + if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1 + then + echo "*** Tag '$refname' already exists." >&2 + echo "*** Modifying a tag is not allowed in this repository." >&2 + exit 1 + fi + ;; + refs/heads/*,commit) + # branch + if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then + echo "*** Creating a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/heads/*,delete) + # delete branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + refs/remotes/*,commit) + # tracking branch + ;; + refs/remotes/*,delete) + # delete tracking branch + if [ "$allowdeletebranch" != "true" ]; then + echo "*** Deleting a tracking branch is not allowed in this repository" >&2 + exit 1 + fi + ;; + *) + # Anything else (is there anything else?) + echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2 + exit 1 + ;; +esac + +# --- Finished +exit 0 Binary files /tmp/wtOLKNVVyZ/pypdf2-1.23+git20141008/.git/index and /tmp/jbeSlZ7Eef/pypdf2-1.25/.git/index differ diff -Nru pypdf2-1.23+git20141008/.git/info/exclude pypdf2-1.25/.git/info/exclude --- pypdf2-1.23+git20141008/.git/info/exclude 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/info/exclude 2015-07-16 10:58:30.000000000 +0000 @@ -0,0 +1,6 @@ +# git ls-files --others --exclude-from=.git/info/exclude +# Lines that start with '#' are comments. +# For a project mostly in C, the following would be a good set of +# exclude patterns (uncomment them if you want to use them): +# *.[oa] +# *~ diff -Nru pypdf2-1.23+git20141008/.git/logs/HEAD pypdf2-1.25/.git/logs/HEAD --- pypdf2-1.23+git20141008/.git/logs/HEAD 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/logs/HEAD 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 fc05b046c05d9cf5c9cbbe67b12ffef7501babc7 REMnux 1437044314 -0400 clone: from https://github.com/mstamy2/PyPDF2.git diff -Nru pypdf2-1.23+git20141008/.git/logs/refs/heads/master pypdf2-1.25/.git/logs/refs/heads/master --- pypdf2-1.23+git20141008/.git/logs/refs/heads/master 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/logs/refs/heads/master 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 fc05b046c05d9cf5c9cbbe67b12ffef7501babc7 REMnux 1437044314 -0400 clone: from https://github.com/mstamy2/PyPDF2.git diff -Nru pypdf2-1.23+git20141008/.git/logs/refs/remotes/origin/HEAD pypdf2-1.25/.git/logs/refs/remotes/origin/HEAD --- pypdf2-1.23+git20141008/.git/logs/refs/remotes/origin/HEAD 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/logs/refs/remotes/origin/HEAD 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +0000000000000000000000000000000000000000 fc05b046c05d9cf5c9cbbe67b12ffef7501babc7 REMnux 1437044314 -0400 clone: from https://github.com/mstamy2/PyPDF2.git Binary files /tmp/wtOLKNVVyZ/pypdf2-1.23+git20141008/.git/objects/pack/pack-820270944cb8a5a8228cc6b9ce15c1ba34fe286b.idx and /tmp/jbeSlZ7Eef/pypdf2-1.25/.git/objects/pack/pack-820270944cb8a5a8228cc6b9ce15c1ba34fe286b.idx differ Binary files /tmp/wtOLKNVVyZ/pypdf2-1.23+git20141008/.git/objects/pack/pack-820270944cb8a5a8228cc6b9ce15c1ba34fe286b.pack and /tmp/jbeSlZ7Eef/pypdf2-1.25/.git/objects/pack/pack-820270944cb8a5a8228cc6b9ce15c1ba34fe286b.pack differ diff -Nru pypdf2-1.23+git20141008/.git/packed-refs pypdf2-1.25/.git/packed-refs --- pypdf2-1.23+git20141008/.git/packed-refs 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/packed-refs 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,18 @@ +# pack-refs with: peeled fully-peeled +947bfb0f5eba88949af2cc70886d8a31e0b59b5b refs/remotes/origin/gh-pages +fc05b046c05d9cf5c9cbbe67b12ffef7501babc7 refs/remotes/origin/master +563540b2a7f8c913d3861e67c4c9d50bf40a3ab5 refs/tags/v1.18 +^448018023efc31bce15ed9ae66acaae5d249f3d4 +c0de9e42f603b54894d8f00603678eca6e47d4ea refs/tags/v1.19 +^83bbfc2dbe74c0ad802efc9a35e73b05a3df711b +b6f665066f5b28166a94070833bf90be93cdb4ba refs/tags/v1.20 +^a5a3038b9013a4a709eccb03cae7250e6f1b2eaa +08a69ac377a537649ba8b86f8f7317fda9e25626 refs/tags/v1.21 +^37e3090559fc89957671bfa13fb803bd7247bc2d +58b85b21c0ebf093ec0b28d752846f8d238bee6d refs/tags/v1.22 +^86b4ebc23da6e7da42e4c2593f82c70801c63ab6 +1b5218e758f603659d420bc0374ca366c3f6c54d refs/tags/v1.23 +^2f499c55d575be976c0984a5e1047f99a6f7667f +da1b9d833e6c40f8484c4a60ab60687cf87ab78f refs/tags/v1.24 +^41d90b4d141d0b019d145748f53ea556efcb47d1 +e87538baf138296680c62d93332f4274090f4724 refs/tags/v1.25 diff -Nru pypdf2-1.23+git20141008/.git/refs/heads/master pypdf2-1.25/.git/refs/heads/master --- pypdf2-1.23+git20141008/.git/refs/heads/master 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/refs/heads/master 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +fc05b046c05d9cf5c9cbbe67b12ffef7501babc7 diff -Nru pypdf2-1.23+git20141008/.git/refs/remotes/origin/HEAD pypdf2-1.25/.git/refs/remotes/origin/HEAD --- pypdf2-1.23+git20141008/.git/refs/remotes/origin/HEAD 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.git/refs/remotes/origin/HEAD 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +ref: refs/remotes/origin/master diff -Nru pypdf2-1.23+git20141008/.gitignore pypdf2-1.25/.gitignore --- pypdf2-1.23+git20141008/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/.gitignore 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,4 @@ +*.pyc +*.swp +.DS_Store +build diff -Nru pypdf2-1.23+git20141008/PyPDF2/filters.py pypdf2-1.25/PyPDF2/filters.py --- pypdf2-1.23+git20141008/PyPDF2/filters.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/filters.py 2015-07-16 10:58:34.000000000 +0000 @@ -40,28 +40,35 @@ from cStringIO import StringIO else: from io import StringIO + import struct try: import zlib + def decompress(data): return zlib.decompress(data) + def compress(data): return zlib.compress(data) + except ImportError: # Unable to import zlib. Attempt to use the System.IO.Compression # library from the .NET framework. (IronPython only) import System from System import IO, Collections, Array + def _string_to_bytearr(buf): retval = Array.CreateInstance(System.Byte, len(buf)) for i in range(len(buf)): retval[i] = ord(buf[i]) return retval + def _bytearr_to_string(bytes): retval = "" for i in range(bytes.Length): retval += chr(bytes[i]) return retval + def _read_bytes(stream): ms = IO.MemoryStream() buf = Array.CreateInstance(System.Byte, 2048) @@ -74,6 +81,7 @@ retval = ms.ToArray() ms.Close() return retval + def decompress(data): bytes = _string_to_bytearr(data) ms = IO.MemoryStream() @@ -84,6 +92,7 @@ retval = _bytearr_to_string(bytes) gz.Close() return retval + def compress(data): bytes = _string_to_bytearr(data) ms = IO.MemoryStream() @@ -106,7 +115,7 @@ predictor = decodeParms.get("/Predictor", 1) except AttributeError: pass # usually an array with a null object was read - + # predictor 1 == no predictor if predictor != 1: columns = decodeParms["/Columns"] @@ -144,6 +153,7 @@ return compress(data) encode = staticmethod(encode) + class ASCIIHexDecode(object): def decode(data, decodeParms=None): retval = "" @@ -165,6 +175,7 @@ return retval decode = staticmethod(decode) + class LZWDecode(object): """Taken from: http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm @@ -184,7 +195,6 @@ def resetDict(self): self.dictlen=258 self.bitspercode=9 - def nextCode(self): fillbits=self.bitspercode @@ -196,8 +206,8 @@ bitsfromhere=8-self.bitpos if bitsfromhere>fillbits: bitsfromhere=fillbits - value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & - (0xff >> (8-bitsfromhere))) << + value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) & + (0xff >> (8-bitsfromhere))) << (fillbits-bitsfromhere)) fillbits -= bitsfromhere self.bitpos += bitsfromhere @@ -235,70 +245,93 @@ baos+=p self.dict[self.dictlen] = p; self.dictlen+=1 - if (self.dictlen >= (1 << self.bitspercode) - 1 and + if (self.dictlen >= (1 << self.bitspercode) - 1 and self.bitspercode < 12): self.bitspercode+=1 return baos - - @staticmethod def decode(data,decodeParams=None): return LZWDecode.decoder(data).decode() + class ASCII85Decode(object): def decode(data, decodeParms=None): - retval = "" - group = [] - x = 0 - hitEod = False - # remove all whitespace from data - data = [y for y in data if not (y in ' \n\r\t')] - while not hitEod: - c = data[x] - if len(retval) == 0 and c == "<" and data[x+1] == "~": - x += 2 - continue - #elif c.isspace(): - # x += 1 - # continue - elif c == 'z': - assert len(group) == 0 - retval += '\x00\x00\x00\x00' - x += 1 - continue - elif c == "~" and data[x+1] == ">": - if len(group) != 0: - # cannot have a final group of just 1 char - assert len(group) > 1 - cnt = len(group) - 1 - group += [ 85, 85, 85 ] - hitEod = cnt + if version_info < ( 3, 0 ): + retval = "" + group = [] + x = 0 + hitEod = False + # remove all whitespace from data + data = [y for y in data if not (y in ' \n\r\t')] + while not hitEod: + c = data[x] + if len(retval) == 0 and c == "<" and data[x+1] == "~": + x += 2 + continue + #elif c.isspace(): + # x += 1 + # continue + elif c == 'z': + assert len(group) == 0 + retval += '\x00\x00\x00\x00' + x += 1 + continue + elif c == "~" and data[x+1] == ">": + if len(group) != 0: + # cannot have a final group of just 1 char + assert len(group) > 1 + cnt = len(group) - 1 + group += [ 85, 85, 85 ] + hitEod = cnt + else: + break else: + c = ord(c) - 33 + assert c >= 0 and c < 85 + group += [ c ] + if len(group) >= 5: + b = group[0] * (85**4) + \ + group[1] * (85**3) + \ + group[2] * (85**2) + \ + group[3] * 85 + \ + group[4] + assert b < (2**32 - 1) + c4 = chr((b >> 0) % 256) + c3 = chr((b >> 8) % 256) + c2 = chr((b >> 16) % 256) + c1 = chr(b >> 24) + retval += (c1 + c2 + c3 + c4) + if hitEod: + retval = retval[:-4+hitEod] + group = [] + x += 1 + return retval + else: + if isinstance(data, str): + data = data.encode('ascii') + n = b = 0 + out = bytearray() + for c in data: + if ord('!') <= c and c <= ord('u'): + n += 1 + b = b*85+(c-33) + if n == 5: + out += struct.pack(b'>L',b) + n = b = 0 + elif c == ord('z'): + assert n == 0 + out += b'\0\0\0\0' + elif c == ord('~'): + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack(b'>L',b)[:n-1] break - else: - c = ord(c) - 33 - assert c >= 0 and c < 85 - group += [ c ] - if len(group) >= 5: - b = group[0] * (85**4) + \ - group[1] * (85**3) + \ - group[2] * (85**2) + \ - group[3] * 85 + \ - group[4] - assert b < (2**32 - 1) - c4 = chr((b >> 0) % 256) - c3 = chr((b >> 8) % 256) - c2 = chr((b >> 16) % 256) - c1 = chr(b >> 24) - retval += (c1 + c2 + c3 + c4) - if hitEod: - retval = retval[:-4+hitEod] - group = [] - x += 1 - return retval + return bytes(out) decode = staticmethod(decode) + def decodeStreamData(stream): from .generic import NameObject filters = stream.get("/Filter", ()) @@ -306,22 +339,24 @@ # we have a single filter instance filters = (filters,) data = stream._data - for filterType in filters: - if filterType == "/FlateDecode": - data = FlateDecode.decode(data, stream.get("/DecodeParms")) - elif filterType == "/ASCIIHexDecode": - data = ASCIIHexDecode.decode(data) - elif filterType == "/LZWDecode": - data = LZWDecode.decode(data, stream.get("/DecodeParms")) - elif filterType == "/ASCII85Decode": - data = ASCII85Decode.decode(data) - elif filterType == "/Crypt": - decodeParams = stream.get("/DecodeParams", {}) - if "/Name" not in decodeParams and "/Type" not in decodeParams: - pass + # If there is not data to decode we should not try to decode the data. + if data: + for filterType in filters: + if filterType == "/FlateDecode" or filterType == "/Fl": + data = FlateDecode.decode(data, stream.get("/DecodeParms")) + elif filterType == "/ASCIIHexDecode" or filterType == "/AHx": + data = ASCIIHexDecode.decode(data) + elif filterType == "/LZWDecode" or filterType == "/LZW": + data = LZWDecode.decode(data, stream.get("/DecodeParms")) + elif filterType == "/ASCII85Decode" or filterType == "/A85": + data = ASCII85Decode.decode(data) + elif filterType == "/Crypt": + decodeParams = stream.get("/DecodeParams", {}) + if "/Name" not in decodeParams and "/Type" not in decodeParams: + pass + else: + raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") else: - raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet") - else: - # unsupported filter - raise NotImplementedError("unsupported filter %s" % filterType) + # unsupported filter + raise NotImplementedError("unsupported filter %s" % filterType) return data diff -Nru pypdf2-1.23+git20141008/PyPDF2/generic.py pypdf2-1.25/PyPDF2/generic.py --- pypdf2-1.23+git20141008/PyPDF2/generic.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/generic.py 2015-07-16 10:58:34.000000000 +0000 @@ -43,11 +43,14 @@ from . import utils import decimal import codecs +import sys #import debugging ObjectPrefix = b_('/<[tf(n%') NumberSigns = b_('+-') IndirectPattern = re.compile(b_(r"(\d+)\s+(\d+)\s+R[^a-zA-Z]")) + + def readObject(stream, pdf): tok = stream.read(1) stream.seek(-1, 1) # reset to start @@ -94,6 +97,7 @@ else: return NumberObject.readFromStream(stream) + class PdfObject(object): def getObject(self): """Resolves indirect references.""" @@ -225,6 +229,7 @@ return decimal.Decimal.__new__(cls, utils.str_(value), context) except: return decimal.Decimal.__new__(cls, str(value)) + def __repr__(self): if self == self.to_integral(): return str(self.quantize(decimal.Decimal(1))) @@ -244,7 +249,11 @@ ByteDot = b_(".") def __new__(cls, value): - return int.__new__(cls, value) + val = int(value) + try: + return int.__new__(cls, val) + except OverflowError: + return int.__new__(cls, 0) def as_numeric(self): return int(b_(repr(self))) @@ -253,16 +262,7 @@ stream.write(b_(repr(self))) def readFromStream(stream): - num = b_("") - while True: - tok = stream.read(16) - m = NumberObject.NumberPattern.search(tok) - if m is not None: - stream.seek(m.start() - len(tok), 1) - num += tok[:m.start()] - break - - num += tok + num = utils.readUntilRegex(stream, NumberObject.NumberPattern) if num.find(NumberObject.ByteDot) != -1: return FloatObject(num) else: @@ -345,13 +345,18 @@ tok = b_("\b") elif tok == b_("f"): tok = b_("\f") + elif tok == b_("c"): + tok = b_("\c") elif tok == b_("("): tok = b_("(") elif tok == b_(")"): tok = b_(")") + elif tok == b_("/"): + tok = b_("/") elif tok == b_("\\"): tok = b_("\\") - elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), b_("]")): + elif tok in (b_(" "), b_("/"), b_("%"), b_("<"), b_(">"), b_("["), + b_("]"), b_("#"), b_("_"), b_("&"), b_('$')): # odd/unnessecary escape sequences we have encountered tok = b_(tok) elif tok.isdigit(): @@ -378,7 +383,7 @@ # line break was escaped: tok = b_('') else: - raise utils.PdfReadError("Unexpected escaped string") + raise utils.PdfReadError(r"Unexpected escaped string: %s" % tok) txt += tok return createStringObject(txt) @@ -456,7 +461,7 @@ class NameObject(str, PdfObject): - delimiterPattern = re.compile(b_("\s+|[()<>[\]{}/%]")) + delimiterPattern = re.compile(b_(r"\s+|[\(\)<>\[\]{}/%]")) surfix = b_("/") def writeToStream(self, stream, encryption_key): @@ -468,11 +473,12 @@ name = stream.read(1) if name != NameObject.surfix: raise utils.PdfReadError("name read error") - name += utils.readUntilRegex(stream, NameObject.delimiterPattern) + name += utils.readUntilRegex(stream, NameObject.delimiterPattern, + ignore_eof=True) if debug: print(name) try: return NameObject(name.decode('utf-8')) - except UnicodeDecodeError as e: + except (UnicodeEncodeError, UnicodeDecodeError) as e: # Name objects should represent irregular characters # with a '#' followed by the symbol's hex number if not pdf.strict: @@ -630,6 +636,7 @@ return retval readFromStream = staticmethod(readFromStream) + class TreeObject(DictionaryObject): def __init__(self): DictionaryObject.__init__(self) @@ -726,7 +733,6 @@ found = True break - prevRef = curRef prev = cur if NameObject('/Next') in cur: @@ -938,6 +944,7 @@ in (x,y) form. """ + class Field(TreeObject): """ A class representing a field dictionary. This class is accessed through @@ -1009,6 +1016,7 @@ See Section 8.5.2 of the PDF 1.7 reference. """ + class Destination(TreeObject): """ A class representing a destination within a PDF file. @@ -1157,6 +1165,7 @@ "does not exist in translation table") return retval + def decode_pdfdocencoding(byte_array): retval = u_('') for b in byte_array: @@ -1211,4 +1220,3 @@ continue assert char not in _pdfDocEncoding_rev _pdfDocEncoding_rev[char] = i - diff -Nru pypdf2-1.23+git20141008/PyPDF2/merger.py pypdf2-1.25/PyPDF2/merger.py --- pypdf2-1.23+git20141008/PyPDF2/merger.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/merger.py 2015-07-16 10:58:34.000000000 +0000 @@ -28,7 +28,7 @@ # POSSIBILITY OF SUCH DAMAGE. from .generic import * -from .utils import string_type +from .utils import isString, str_ from .pdf import PdfFileReader, PdfFileWriter from .pagerange import PageRange from sys import version_info @@ -40,6 +40,7 @@ from io import FileIO as file StreamIO = BytesIO + class _MergedPage(object): """ _MergedPage is used internally by PdfFileMerger to collect necessary @@ -50,13 +51,14 @@ self.pagedata = pagedata self.out_pagedata = None self.id = id - + + class PdfFileMerger(object): """ Initializes a PdfFileMerger object. PdfFileMerger merges multiple PDFs into a single PDF. It can concatenate, slice, insert, or any combination of the above. - + See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. @@ -64,7 +66,7 @@ problems and also causes some correctable problems to be fatal. Defaults to ``True``. """ - + def __init__(self, strict=True): self.inputs = [] self.pages = [] @@ -73,7 +75,7 @@ self.named_dests = [] self.id_count = 0 self.strict = strict - + def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True): """ Merges the pages from the given file into the output file at the @@ -85,29 +87,30 @@ :param fileobj: A File Object or an object that supports the standard read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - + :param str bookmark: Optionally, you may specify a bookmark to be applied at the beginning of the included file by supplying the text of the bookmark. :param pages: can be a :ref:`Page Range ` or a ``(start, stop[, step])`` tuple to merge only the specified range of pages from the source document into the output document. - + :param bool import_bookmarks: You may prevent the source document's bookmarks from being imported by specifying this as ``False``. """ - + # This parameter is passed to self.inputs.append and means # that the stream used was created in this method. my_file = False - + # If the fileobj parameter is a string, assume it is a path # and create a file object at that location. If it is a file, - # copy the file's contents into a BytesIO (or StreamIO) stream object; if - # it is a PdfFileReader, copy that reader's stream into a + # copy the file's contents into a BytesIO (or StreamIO) stream object; if + # it is a PdfFileReader, copy that reader's stream into a # BytesIO (or StreamIO) stream. # If fileobj is none of the above types, it is not modified - if type(fileobj) == string_type: + decryption_key = None + if isString(fileobj): fileobj = file(fileobj, 'rb') my_file = True elif isinstance(fileobj, file): @@ -116,17 +119,21 @@ fileobj = StreamIO(filecontent) my_file = True elif isinstance(fileobj, PdfFileReader): - orig_tell = fileobj.stream.tell() + orig_tell = fileobj.stream.tell() fileobj.stream.seek(0) filecontent = StreamIO(fileobj.stream.read()) fileobj.stream.seek(orig_tell) # reset the stream to its original location fileobj = filecontent + if hasattr(fileobj, '_decryption_key'): + decryption_key = fileobj._decryption_key my_file = True - + # Create a new PdfFileReader instance using the stream # (either file or BytesIO or StringIO) created above pdfr = PdfFileReader(fileobj, strict=self.strict) - + if decryption_key is not None: + pdfr._decryption_key = decryption_key + # Find the range of pages to merge. if pages == None: pages = (0, pdfr.getNumPages()) @@ -134,47 +141,45 @@ pages = pages.indices(pdfr.getNumPages()) elif not isinstance(pages, tuple): raise TypeError('"pages" must be a tuple of (start, stop[, step])') - + srcpages = [] if bookmark: bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit')) - + outline = [] if import_bookmarks: outline = pdfr.getOutlines() outline = self._trim_outline(pdfr, outline, pages) - + if bookmark: self.bookmarks += [bookmark, outline] else: self.bookmarks += outline - + dests = pdfr.namedDestinations dests = self._trim_dests(pdfr, dests, pages) self.named_dests += dests - + # Gather all the pages that are going to be merged for i in range(*pages): pg = pdfr.getPage(i) - + id = self.id_count self.id_count += 1 - + mp = _MergedPage(pg, pdfr, id) - + srcpages.append(mp) self._associate_dests_to_pages(srcpages) self._associate_bookmarks_to_pages(srcpages) - - + # Slice to insert the pages at the specified position self.pages[position:position] = srcpages - + # Keep track of our input files so we can close them later self.inputs.append((fileobj, pdfr, my_file)) - - + def append(self, fileobj, bookmark=None, pages=None, import_bookmarks=True): """ Identical to the :meth:`merge()` method, but assumes you want to concatenate @@ -183,7 +188,7 @@ :param fileobj: A File Object or an object that supports the standard read and seek methods similar to a File Object. Could also be a string representing a path to a PDF file. - + :param str bookmark: Optionally, you may specify a bookmark to be applied at the beginning of the included file by supplying the text of the bookmark. @@ -194,10 +199,9 @@ :param bool import_bookmarks: You may prevent the source document's bookmarks from being imported by specifying this as ``False``. """ - + self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks) - - + def write(self, fileobj): """ Writes all data that has been merged to the given output file. @@ -206,11 +210,10 @@ file-like object. """ my_file = False - if type(fileobj) in (str, str): + if isString(fileobj): fileobj = file(fileobj, 'wb') my_file = True - # Add pages to the PdfFileWriter # The commented out line below was replaced with the two lines below it to allow PdfFileMerger to work with PyPdf 1.13 for page in self.pages: @@ -222,15 +225,13 @@ # Once all pages are added, create bookmarks to point at those pages self._write_dests() self._write_bookmarks() - - # Write the output to the file + + # Write the output to the file self.output.write(fileobj) - + if my_file: fileobj.close() - - def close(self): """ Shuts all file descriptors (input and output) and clears all memory @@ -240,7 +241,7 @@ for fo, pdfr, mine in self.inputs: if mine: fo.close() - + self.inputs = [] self.output = None @@ -253,7 +254,7 @@ Example: ``{u'/Title': u'My title'}`` """ self.output.addMetadata(infos) - + def setPageLayout(self, layout): """ Set the page layout @@ -289,7 +290,7 @@ def _trim_dests(self, pdf, dests, pages): """ - Removes any named destinations that are not a part of the specified + Removes any named destinations that are not a part of the specified page set. """ new_dests = [] @@ -298,14 +299,14 @@ for j in range(*pages): if pdf.getPage(j).getObject() == o['/Page'].getObject(): o[NameObject('/Page')] = o['/Page'].getObject() - assert str(k) == str(o['/Title']) + assert str_(k) == str_(o['/Title']) new_dests.append(o) break return new_dests - + def _trim_outline(self, pdf, outline, pages): """ - Removes any outline/bookmark entries that are not a part of the + Removes any outline/bookmark entries that are not a part of the specified page set. """ new_outline = [] @@ -326,10 +327,10 @@ prev_header_added = True break return new_outline - + def _write_dests(self): dests = self.named_dests - + for v in dests: pageno = None pdf = None @@ -342,19 +343,18 @@ break if pageno != None: self.output.addNamedDestinationObject(v) - + def _write_bookmarks(self, bookmarks=None, parent=None): - + if bookmarks == None: bookmarks = self.bookmarks - last_added = None for b in bookmarks: if isinstance(b, list): self._write_bookmarks(b, last_added) continue - + pageno = None pdf = None if '/Page' in b: @@ -410,31 +410,31 @@ del b['/Left'], b['/Right'], b['/Bottom'], b['/Top'] b[NameObject('/A')] = DictionaryObject({NameObject('/S'): NameObject('/GoTo'), NameObject('/D'): ArrayObject(args)}) - + pageno = i pdf = p.src break if pageno != None: del b['/Page'], b['/Type'] - last_added = self.output.addBookmarkDict(b, parent) + last_added = self.output.addBookmarkDict(b, parent) def _associate_dests_to_pages(self, pages): for nd in self.named_dests: pageno = None np = nd['/Page'] - + if isinstance(np, NumberObject): continue - + for p in pages: if np.getObject() == p.pagedata.getObject(): pageno = p.id - + if pageno != None: nd[NameObject('/Page')] = NumberObject(pageno) else: raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],)) - + def _associate_bookmarks_to_pages(self, pages, bookmarks=None): if bookmarks == None: bookmarks = self.bookmarks @@ -443,35 +443,35 @@ if isinstance(b, list): self._associate_bookmarks_to_pages(pages, b) continue - + pageno = None bp = b['/Page'] - + if isinstance(bp, NumberObject): continue - + for p in pages: if bp.getObject() == p.pagedata.getObject(): pageno = p.id - + if pageno != None: b[NameObject('/Page')] = NumberObject(pageno) else: raise ValueError("Unresolved bookmark '%s'" % (b['/Title'],)) - + def findBookmark(self, bookmark, root=None): - if root == None: - root = self.bookmarks - - for i, b in enumerate(root): - if isinstance(b, list): - res = self.findBookmark(bookmark, b) - if res: - return [i] + res - elif b == bookmark or b['/Title'] == bookmark: - return [i] - - return None + if root == None: + root = self.bookmarks + + for i, b in enumerate(root): + if isinstance(b, list): + res = self.findBookmark(bookmark, b) + if res: + return [i] + res + elif b == bookmark or b['/Title'] == bookmark: + return [i] + + return None def addBookmark(self, title, pagenum, parent=None): """ @@ -483,28 +483,27 @@ bookmarks. """ if parent == None: - iloc = [len(self.bookmarks)-1] + iloc = [len(self.bookmarks)-1] elif isinstance(parent, list): - iloc = parent + iloc = parent else: - iloc = self.findBookmark(parent) - + iloc = self.findBookmark(parent) + dest = Bookmark(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) - + if parent == None: - self.bookmarks.append(dest) + self.bookmarks.append(dest) else: - bmparent = self.bookmarks - for i in iloc[:-1]: - bmparent = bmparent[i] - npos = iloc[-1]+1 - if npos < len(bmparent) and isinstance(bmparent[npos], list): - bmparent[npos].append(dest) - else: - bmparent.insert(npos, [dest]) + bmparent = self.bookmarks + for i in iloc[:-1]: + bmparent = bmparent[i] + npos = iloc[-1]+1 + if npos < len(bmparent) and isinstance(bmparent[npos], list): + bmparent[npos].append(dest) + else: + bmparent.insert(npos, [dest]) return dest - - + def addNamedDestination(self, title, pagenum): """ Add a destination to the output. @@ -512,7 +511,7 @@ :param str title: Title to use :param int pagenum: Page number this destination points at. """ - + dest = Destination(TextStringObject(title), NumberObject(pagenum), NameObject('/FitH'), NumberObject(826)) self.named_dests.append(dest) @@ -523,12 +522,12 @@ self.tree = tree self.pdf = pdf self.parent = parent - + def remove(self, index): obj = self[index] del self[index] self.tree.removeChild(obj) - + def add(self, title, pagenum): pageRef = self.pdf.getObject(self.pdf._pages)['/Kids'][pagenum] action = DictionaryObject() @@ -547,7 +546,7 @@ self.pdf._addObject(bookmark) self.tree.addChild(bookmark) - + def removeAll(self): for child in [x for x in self.tree.children()]: self.tree.removeChild(child) diff -Nru pypdf2-1.23+git20141008/PyPDF2/pagerange.py pypdf2-1.25/PyPDF2/pagerange.py --- pypdf2-1.23+git20141008/PyPDF2/pagerange.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/pagerange.py 2015-07-16 10:58:34.000000000 +0000 @@ -8,7 +8,7 @@ """ import re -from .utils import Str +from .utils import isString _INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) @@ -32,11 +32,11 @@ ::-1 all pages in reverse order. """ - + class PageRange(object): - """ + """ A slice-like representation of a range of page indices, - i.e. page numbers, only starting at zero. + i.e. page numbers, only starting at zero. The syntax is like what you would put between brackets [ ]. The slice is one of the few Python types that can't be subclassed, but this class converts to and from slices, and allows similar use. @@ -46,7 +46,7 @@ o str() and repr() allow printing. o indices(n) is like slice.indices(n). """ - + def __init__(self, arg): """ Initialize with either a slice -- giving the equivalent page range, @@ -67,8 +67,8 @@ if isinstance(arg, PageRange): self._slice = arg.to_slice() return - - m = isinstance(arg, Str) and re.match(PAGE_RANGE_RE, arg) + + m = isString(arg) and re.match(PAGE_RANGE_RE, arg) if not m: raise ParseError(arg) elif m.group(2): @@ -77,25 +77,25 @@ stop = start + 1 if start != -1 else None self._slice = slice(start, stop) else: - self._slice = slice(*[int(g) if g else None + self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)]) - + # Just formatting this when there is __doc__ for __init__ if __init__.__doc__: __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP) - + @staticmethod def valid(input): """ True if input is a valid initializer for a PageRange. """ return isinstance(input, slice) or \ isinstance(input, PageRange) or \ - (isinstance(input, Str) + (isString(input) and bool(re.match(PAGE_RANGE_RE, input))) def to_slice(self): """ Return the slice equivalent of this page range. """ return self._slice - + def __str__(self): """ A string like "1:2:3". """ s = self._slice @@ -127,7 +127,7 @@ """ Given a list of filenames and page ranges, return a list of (filename, page_range) pairs. - First arg must be a filename; other ags are filenames, page-range + First arg must be a filename; other ags are filenames, page-range expressions, slice objects, or PageRange objects. A filename not followed by a page range indicates all pages of the file. """ @@ -146,7 +146,7 @@ # New filename or end of list--do all of the previous file? if pdf_filename and not did_page_range: pairs.append( (pdf_filename, PAGE_RANGE_ALL) ) - + pdf_filename = arg did_page_range = False return pairs diff -Nru pypdf2-1.23+git20141008/PyPDF2/pdf.py pypdf2-1.25/PyPDF2/pdf.py --- pypdf2-1.23+git20141008/PyPDF2/pdf.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/pdf.py 2015-07-16 10:58:34.000000000 +0000 @@ -63,7 +63,7 @@ import codecs from .generic import * from .utils import readNonWhitespace, readUntilWhitespace, ConvertFunctionsToVirtualList -from .utils import Str, b_, u_, ord_, chr_, str_, string_type, formatWarning +from .utils import isString, b_, u_, ord_, chr_, str_, formatWarning if version_info < ( 2, 4 ): from sets import ImmutableSet as frozenset @@ -74,6 +74,7 @@ from hashlib import md5 import uuid + class PdfFileWriter(object): """ This class supports writing PDF files out, given pages produced by another @@ -228,6 +229,157 @@ NameObject("/OpenAction"): self._addObject(js) }) + def addAttachment(self, fname, fdata): + """ + Embed a file inside the PDF. + + :param str fname: The filename to display. + :param str fdata: The data in the file. + + Reference: + https://www.adobe.com/content/dam/Adobe/en/devnet/acrobat/pdfs/PDF32000_2008.pdf + Section 7.11.3 + """ + + # We need 3 entries: + # * The file's data + # * The /Filespec entry + # * The file's name, which goes in the Catalog + + + # The entry for the file + """ Sample: + 8 0 obj + << + /Length 12 + /Type /EmbeddedFile + >> + stream + Hello world! + endstream + endobj + """ + file_entry = DecodedStreamObject() + file_entry.setData(fdata) + file_entry.update({ + NameObject("/Type"): NameObject("/EmbeddedFile") + }) + + # The Filespec entry + """ Sample: + 7 0 obj + << + /Type /Filespec + /F (hello.txt) + /EF << /F 8 0 R >> + >> + """ + efEntry = DictionaryObject() + efEntry.update({ NameObject("/F"):file_entry }) + + filespec = DictionaryObject() + filespec.update({ + NameObject("/Type"): NameObject("/Filespec"), + NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject + NameObject("/EF"): efEntry + }) + + # Then create the entry for the root, as it needs a reference to the Filespec + """ Sample: + 1 0 obj + << + /Type /Catalog + /Outlines 2 0 R + /Pages 3 0 R + /Names << /EmbeddedFiles << /Names [(hello.txt) 7 0 R] >> >> + >> + endobj + + """ + embeddedFilesNamesDictionary = DictionaryObject() + embeddedFilesNamesDictionary.update({ + NameObject("/Names"): ArrayObject([createStringObject(fname), filespec]) + }) + + embeddedFilesDictionary = DictionaryObject() + embeddedFilesDictionary.update({ + NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary + }) + # Update the root + self._root_object.update({ + NameObject("/Names"): embeddedFilesDictionary + }) + + def appendPagesFromReader(self, reader, after_page_append=None): + """ + Copy pages from reader to writer. Includes an optional callback parameter + which is invoked after pages are appended to the writer. + + :param reader: a PdfFileReader object from which to copy page + annotations to this writer object. The writer's annots + will then be updated + :callback after_page_append (function): Callback function that is invoked after + each page is appended to the writer. Callback signature: + + :param writer_pageref (PDF page reference): Reference to the page + appended to the writer. + """ + # Get page count from writer and reader + reader_num_pages = reader.getNumPages() + writer_num_pages = self.getNumPages() + + # Copy pages from reader to writer + for rpagenum in range(0, reader_num_pages): + reader_page = reader.getPage(rpagenum) + self.addPage(reader_page) + writer_page = self.getPage(writer_num_pages+rpagenum) + # Trigger callback, pass writer page as parameter + if callable(after_page_append): after_page_append(writer_page) + + def updatePageFormFieldValues(self, page, fields): + ''' + Update the form field values for a given page from a fields dictionary. + Copy field texts and values from fields to page. + + :param page: Page reference from PDF writer where the annotations + and field data will be updated. + :param fields: a Python dictionary of field names (/T) and text + values (/V) + ''' + # Iterate through pages, update field values + for j in range(0, len(page['/Annots'])): + writer_annot = page['/Annots'][j].getObject() + for field in fields: + if writer_annot.get('/T') == field: + writer_annot.update({ + NameObject("/V"): TextStringObject(fields[field]) + }) + + def cloneReaderDocumentRoot(self, reader): + ''' + Copy the reader document root to the writer. + + :param reader: PdfFileReader from the document root should be copied. + :callback after_page_append + ''' + self._root_object = reader.trailer['/Root'] + + def cloneDocumentFromReader(self, reader, after_page_append=None): + ''' + Create a copy (clone) of a document from a PDF file reader + + :param reader: PDF file reader instance from which the clone + should be created. + :callback after_page_append (function): Callback function that is invoked after + each page is appended to the writer. Signature includes a reference to the + appended page (delegates to appendPagesFromReader). Callback signature: + + :param writer_pageref (PDF page reference): Reference to the page just + appended to the document. + ''' + self.cloneReaderDocumentRoot(reader) + self.appendPagesFromReader(reader, after_page_append) + def encrypt(self, user_pwd, owner_pwd = None, use_128bit = True): """ Encrypt this PDF file with the PDF Standard encryption handler. @@ -516,7 +668,6 @@ return bookmarkRef - def addBookmark(self, title, pagenum, parent=None, color=None, bold=False, italic=False, fit='/Fit', *args): """ Add a bookmark to this PDF file. @@ -553,7 +704,6 @@ if parent == None: parent = outlineRef - bookmark = TreeObject() bookmark.update({ @@ -759,7 +909,7 @@ else: borderArr = [NumberObject(0)] * 3 - if isinstance(rect, Str): + if isString(rect): rect = NameObject(rect) elif isinstance(rect, RectangleObject): pass @@ -871,6 +1021,7 @@ """Read and write property accessing the :meth:`getPageMode()` and :meth:`setPageMode()` methods.""" + class PdfFileReader(object): """ Initializes a PdfFileReader object. This operation can take some time, as @@ -904,9 +1055,10 @@ self.flattenedPages = None self.resolvedObjects = {} self.xrefIndex = 0 + self._pageId2Num = None # map page IndirectRef number to Page Number if hasattr(stream, 'mode') and 'b' not in stream.mode: warnings.warn("PdfFileReader stream/file object is not in binary mode. It may not be read correctly.", utils.PdfReadWarning) - if type(stream) in (string_type, str): + if isString(stream): fileobj = open(stream, 'rb') stream = BytesIO(b_(fileobj.read())) fileobj.close() @@ -973,6 +1125,7 @@ if self.isEncrypted: try: self._override_encryption = True + self.decrypt('') return self.trailer["/Root"]["/Pages"]["/Count"] except: raise utils.PdfReadError("File has not been decrypted") @@ -1160,7 +1313,14 @@ # get the outline dictionary and named destinations if "/Outlines" in catalog: - lines = catalog["/Outlines"] + try: + lines = catalog["/Outlines"] + except utils.PdfReadError: + # this occurs if the /Outlines object reference is incorrect + # for an example of such a file, see https://unglueit-files.s3.amazonaws.com/ebf/7552c42e9280b4476e59e77acc0bc812.pdf + # so continue to load the file without the Bookmarks + return outlines + if "/First" in lines: node = lines["/First"] self._namedDests = self.getNamedDestinations() @@ -1187,6 +1347,49 @@ return outlines + def _getPageNumberByIndirect(self, indirectRef): + """Generate _pageId2Num""" + if self._pageId2Num is None: + id2num = {} + for i, x in enumerate(self.pages): + id2num[x.indirectRef.idnum] = i + self._pageId2Num = id2num + + if isinstance(indirectRef, int): + idnum = indirectRef + else: + idnum = indirectRef.idnum + + ret = self._pageId2Num.get(idnum, -1) + return ret + + def getPageNumber(self, page): + """ + Retrieve page number of a given PageObject + + :param PageObject page: The page to get page number. Should be + an instance of :class:`PageObject` + :return: the page number or -1 if page not found + :rtype: int + """ + indirectRef = page.indirectRef + ret = self._getPageNumberByIndirect(indirectRef) + return ret + + def getDestinationPageNumber(self, destination): + """ + Retrieve page number of a given Destination object + + :param Destination destination: The destination to get page number. + Should be an instance of + :class:`Destination` + :return: the page number or -1 if page not found + :rtype: int + """ + indirectRef = destination.page + ret = self._getPageNumberByIndirect(indirectRef) + return ret + def _buildDestination(self, title, array): page, typ = array[0:2] array = array[2:] @@ -1210,7 +1413,7 @@ if dest: if isinstance(dest, ArrayObject): outline = self._buildDestination(title, dest) - elif isinstance(dest, Str) and dest in self._namedDests: + elif isString(dest) and dest in self._namedDests: outline = self._namedDests[dest] outline[NameObject("/Title")] = title else: @@ -1310,6 +1513,8 @@ assert idx < objStm['/N'] streamData = BytesIO(b_(objStm.getData())) for i in range(objStm['/N']): + readNonWhitespace(streamData) + streamData.seek(-1, 1) objnum = NumberObject.readFromStream(streamData) readNonWhitespace(streamData) streamData.seek(-1, 1) @@ -1347,7 +1552,6 @@ if self.strict: raise utils.PdfReadError("This is a fatal error in strict mode.") return NullObject() - def getObject(self, indirectReference): debug = False if debug: print(("looking at:", indirectReference.idnum, indirectReference.generation)) @@ -1580,6 +1784,7 @@ assert len(entrySizes) >= 3 if self.strict and len(entrySizes) > 3: raise utils.PdfReadError("Too many entry sizes: %s" %entrySizes) + def getEntry(i): # Reads the correct number of bytes for each entry. See the # discussion of the W parameter in PDF spec table 17. @@ -1664,8 +1869,7 @@ if found: continue # no xref table found at specified location - assert False - break + raise utils.PdfReadError("Could not find xref table at specified location") #if not zero-indexed, verify that the table is correct; change it if necessary if self.xrefIndex and not self.strict: loc = stream.tell() @@ -1683,7 +1887,6 @@ #if not, then either it's just plain wrong, or the non-zero-index is actually correct stream.seek(loc, 0) #return to where it was - def _zeroXref(self, generation): self.xref[generation] = dict( (k-self.xrefIndex, v) for (k, v) in list(self.xref[generation].items()) ) @@ -1700,8 +1903,13 @@ if debug: print(">>readNextEndLine") line = b_("") while True: + # Prevent infinite loops in malformed PDFs + if stream.tell() == 0: + raise utils.PdfReadError("Could not read malformed PDF file") x = stream.read(1) if debug: print((" x:", x, "%x"%ord(x))) + if stream.tell() < 2: + raise utils.PdfReadError("EOL marker not found") stream.seek(-2, 1) if x == b_('\n') or x == b_('\r'): ## \n = LF; \r = CR crlf = False @@ -1713,6 +1921,8 @@ if x == b_('\n') or x == b_('\r'): # account for CR+LF stream.seek(-1, 1) crlf = True + if stream.tell() < 2: + raise utils.PdfReadError("EOL marker not found") stream.seek(-2, 1) stream.seek(2 if crlf else 1, 1) #if using CR+LF, go back 2 bytes, else 1 break @@ -1827,14 +2037,17 @@ setRectangle(self, name, retval) return retval + def setRectangle(self, name, value): if not isinstance(name, NameObject): name = NameObject(name) self[name] = value + def deleteRectangle(self, name): del self[name] + def createRectangleAccessor(name, fallback): return \ property( @@ -1843,6 +2056,7 @@ lambda self: deleteRectangle(self, name) ) + class PageObject(DictionaryObject): """ This class represents a single page within a PDF file. Typically this @@ -2374,6 +2588,7 @@ for i in operands[0]: if isinstance(i, TextStringObject): text += i + text += "\n" return text mediaBox = createRectangleAccessor("/MediaBox", ()) @@ -2412,6 +2627,7 @@ page's creator. """ + class ContentStream(DecodedStreamObject): def __init__(self, stream, pdf): self.pdf = pdf @@ -2440,22 +2656,22 @@ if peek.isalpha() or peek == b_("'") or peek == b_('"'): operator = utils.readUntilRegex(stream, NameObject.delimiterPattern, True) - if operator == "BI": + if operator == b_("BI"): # begin inline image - a completely different parsing # mechanism is required, of course... thanks buddy... assert operands == [] ii = self._readInlineImage(stream) - self.operations.append((ii, "INLINE IMAGE")) + self.operations.append((ii, b_("INLINE IMAGE"))) else: self.operations.append((operands, operator)) operands = [] - elif peek == '%': + elif peek == b_('%'): # If we encounter a comment in the content stream, we have to # handle it here. Typically, readObject will handle # encountering a comment -- but readObject assumes that # following the comment must be the object we're trying to # read. In this case, it could be an operator instead. - while peek not in ('\r', '\n'): + while peek not in (b_('\r'), b_('\n')): peek = stream.read(1) else: operands.append(readObject(stream, None)) @@ -2467,7 +2683,7 @@ while True: tok = readNonWhitespace(stream) stream.seek(-1, 1) - if tok == "I": + if tok == b_("I"): # "ID" - begin of image data break key = readObject(stream, self.pdf) @@ -2477,28 +2693,32 @@ settings[key] = value # left at beginning of ID tmp = stream.read(3) - assert tmp[:2] == "ID" - data = "" + assert tmp[:2] == b_("ID") + data = b_("") while True: + # Read the inline image, while checking for EI (End Image) operator. tok = stream.read(1) - if tok == "E": + if tok == b_("E"): # Check for End Image - next1 = stream.read(1) - if next1 == "I": - next2 = readNonWhitespace(stream) - if next2 == 'Q': + tok2 = stream.read(1) + if tok2 == b_("I"): + # Sometimes that data will contain EI, so check for the Q operator. + tok3 = stream.read(1) + info = tok + tok2 + while tok3 in utils.WHITESPACES: + info += tok3 + tok3 = stream.read(1) + if tok3 == b_("Q"): stream.seek(-1, 1) break else: - stream.seek(-2, 1) - data += tok + stream.seek(-1,1) + data += info else: stream.seek(-1, 1) data += tok else: data += tok - x = readNonWhitespace(stream) - stream.seek(-1, 1) return {"settings": settings, "data": data} def _getData(self): @@ -2525,6 +2745,7 @@ _data = property(_getData, _setData) + class DocumentInformation(DictionaryObject): """ A class representing the basic document metadata provided in a PDF File. @@ -2588,6 +2809,7 @@ producer_raw = property(lambda self: self.get("/Producer")) """The "raw" version of producer; can return a ``ByteStringObject``.""" + def convertToInt(d, size): if size > 8: raise utils.PdfReadError("invalid size in convertToInt") @@ -2600,6 +2822,7 @@ b_('\xff\xfa\x01\x08\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c') + \ b_('\xa9\xfe\x64\x53\x69\x7a') + # Implementation of algorithm 3.2 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg32(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt=True): @@ -2643,6 +2866,7 @@ # entry. return md5_hash[:keylen] + # Implementation of algorithm 3.3 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg33(owner_pwd, user_pwd, rev, keylen): @@ -2670,6 +2894,7 @@ # the /O entry in the encryption dictionary. return val + # Steps 1-4 of algorithm 3.3 def _alg33_1(password, rev, keylen): # 1. Pad or truncate the owner password string as described in step 1 of @@ -2692,6 +2917,7 @@ key = md5_hash[:keylen] return key + # Implementation of algorithm 3.4 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg34(password, owner_entry, p_entry, id1_entry): @@ -2706,6 +2932,7 @@ # encryption dictionary. return U, key + # Implementation of algorithm 3.4 of the PDF standard security handler, # section 3.5.2 of the PDF 1.6 reference. def _alg35(password, rev, keylen, owner_entry, p_entry, id1_entry, metadata_encrypt): diff -Nru pypdf2-1.23+git20141008/PyPDF2/utils.py pypdf2-1.25/PyPDF2/utils.py --- pypdf2-1.23+git20141008/PyPDF2/utils.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/utils.py 2015-07-16 10:58:34.000000000 +0000 @@ -33,25 +33,35 @@ import sys -# "Str" maintains compatibility with Python 2.x. -# The next line is obfuscated like this so 2to3 won't change it. + try: import __builtin__ as builtins except ImportError: # Py3 import builtins -if sys.version_info[0] < 3: - string_type = unicode - bytes_type = str - int_types = (int, long) -else: - string_type = str - bytes_type = bytes - int_types = (int,) +xrange_fn = getattr(builtins, "xrange", range) +_basestring = getattr(builtins, "basestring", str) + +bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X +string_type = getattr(builtins, "unicode", str) +int_types = (int, long) if sys.version_info[0] < 3 else (int,) + + +# Make basic type tests more consistent +def isString(s): + """Test if arg is a string. Compatible with Python 2 and 3.""" + return isinstance(s, _basestring) + + +def isInt(n): + """Test if arg is an int. Compatible with Python 2 and 3.""" + return isinstance(n, int_types) -Xrange = getattr(builtins, "xrange", range) -Str = getattr(builtins, "basestring", str) + +def isBytes(b): + """Test if arg is a bytes instance. Compatible with Python 2 and 3.""" + return isinstance(b, bytes_type) #custom implementation of warnings.formatwarning @@ -59,6 +69,7 @@ file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno) + def readUntilWhitespace(stream, maxchars=None): """ Reads non-whitespace characters and returns them. @@ -74,6 +85,7 @@ break return txt + def readNonWhitespace(stream): """ Finds and reads the next non-whitespace character (ignores whitespace). @@ -83,6 +95,7 @@ tok = stream.read(1) return tok + def skipOverWhitespace(stream): """ Similar to readNonWhitespace, but returns a Boolean if more than @@ -95,6 +108,7 @@ cnt+=1 return (cnt > 1) + def skipOverComment(stream): tok = stream.read(1) stream.seek(-1, 1) @@ -102,6 +116,7 @@ while tok not in (b_('\n'), b_('\r')): tok = stream.read(1) + def readUntilRegex(stream, regex, ignore_eof=False): """ Reads until the regular expression pattern matched (ignore the match) @@ -125,6 +140,7 @@ name += tok return name + class ConvertFunctionsToVirtualList(object): def __init__(self, lengthFunction, getFunction): self.lengthFunction = lengthFunction @@ -135,10 +151,10 @@ def __getitem__(self, index): if isinstance(index, slice): - indices = Xrange(*index.indices(len(self))) + indices = xrange_fn(*index.indices(len(self))) cls = type(self) return cls(indices.__len__, lambda idx: self[indices[idx]]) - if not isinstance(index, int_types): + if not isInt(index): raise TypeError("sequence indices must be integers") len_self = len(self) if index < 0: @@ -148,6 +164,7 @@ raise IndexError("sequence index out of range") return self.getFunction(index) + def RC4_encrypt(key, plaintext): S = [i for i in range(256)] j = 0 @@ -164,12 +181,14 @@ retval += b_(chr(ord_(plaintext[x]) ^ t)) return retval + def matrixMultiply(a, b): return [[sum([float(i)*float(j) for i, j in zip(row, col)] ) for col in zip(*b)] for row in a] + def markLocation(stream): """Creates text file showing current location in context.""" # Mainly for debugging @@ -182,18 +201,23 @@ outputDoc.close() stream.seek(-RADIUS, 1) + class PyPdfError(Exception): pass + class PdfReadError(PyPdfError): pass + class PageSizeNotDefinedError(PyPdfError): pass + class PdfReadWarning(UserWarning): pass + class PdfStreamError(PdfReadError): pass @@ -203,6 +227,7 @@ return s else: B_CACHE = {} + def b_(s): bc = B_CACHE if s in bc: @@ -214,6 +239,8 @@ if len(s) < 2: bc[s] = r return r + + def u_(s): if sys.version_info[0] < 3: return unicode(s, 'unicode_escape') @@ -230,24 +257,28 @@ else: return b + def ord_(b): if sys.version_info[0] < 3 or type(b) == str: return ord(b) else: return b + def chr_(c): if sys.version_info[0] < 3: return c else: return chr(c) + def barray(b): if sys.version_info[0] < 3: return b else: return bytearray(b) + def hexencode(b): if sys.version_info[0] < 3: return b.encode('hex') @@ -256,6 +287,7 @@ coder = codecs.getencoder('hex_codec') return coder(b)[0] + def hexStr(num): return hex(num).replace('L', '') diff -Nru pypdf2-1.23+git20141008/PyPDF2/_version.py pypdf2-1.25/PyPDF2/_version.py --- pypdf2-1.23+git20141008/PyPDF2/_version.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/_version.py 2015-07-16 10:58:34.000000000 +0000 @@ -1,2 +1 @@ -__version__ = '1.23' - +__version__ = '1.25.1-a' diff -Nru pypdf2-1.23+git20141008/PyPDF2/xmp.py pypdf2-1.25/PyPDF2/xmp.py --- pypdf2-1.23+git20141008/PyPDF2/xmp.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/PyPDF2/xmp.py 2015-07-16 10:58:34.000000000 +0000 @@ -50,6 +50,7 @@ )? """, re.VERBOSE) + class XmpInformation(PdfObject): """ An object that represents Adobe XMP metadata. @@ -355,5 +356,3 @@ :return: a dictionary of key/value items for custom metadata properties. :rtype: dict """ - - diff -Nru pypdf2-1.23+git20141008/README.md pypdf2-1.25/README.md --- pypdf2-1.23+git20141008/README.md 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/README.md 2015-07-16 10:58:34.000000000 +0000 @@ -23,3 +23,12 @@ ##FAQ Please see http://mstamy2.github.io/PyPDF2/FAQ.html + + +##Tests +PyPDF2 includes a test suite built on the unittest framework. All tests are located in the "Tests" folder. +Tests can be run from the command line by: + +```bash +python -m unittest Tests.tests +``` \ No newline at end of file Binary files /tmp/wtOLKNVVyZ/pypdf2-1.23+git20141008/Resources/crazyones.pdf and /tmp/jbeSlZ7Eef/pypdf2-1.25/Resources/crazyones.pdf differ diff -Nru pypdf2-1.23+git20141008/Resources/crazyones.txt pypdf2-1.25/Resources/crazyones.txt --- pypdf2-1.23+git20141008/Resources/crazyones.txt 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/Resources/crazyones.txt 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1 @@ +TheCrazyOnesOctober14,1998Herestothecrazyones.Themis˝ts.Therebels.Thetroublemakers.Theroundpegsinthesquareholes.Theoneswhoseethingsdi˙erently.Theyrenotfondofrules.Andtheyhavenorespectforthestatusquo.Youcanquotethem,disagreewiththem,glorifyorvilifythem.Abouttheonlythingyoucantdoisignorethem.Becausetheychangethings.Theyinvent.Theyimagine.Theyheal.Theyexplore.Theycreate.Theyinspire.Theypushthehumanraceforward.Maybetheyhavetobecrazy.Howelsecanyoustareatanemptycanvasandseeaworkofart?Orsitinsilenceandhearasongthatsneverbeenwritten?Orgazeataredplanetandseealaboratoryonwheels?Wemaketoolsforthesekindsofpeople.Whilesomeseethemasthecrazyones,weseegenius.Becausethepeoplewhoarecrazyenoughtothinktheycanchangetheworld,aretheoneswhodo. \ No newline at end of file diff -Nru pypdf2-1.23+git20141008/Sample_Code/2-up.py pypdf2-1.25/Sample_Code/2-up.py --- pypdf2-1.23+git20141008/Sample_Code/2-up.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/Sample_Code/2-up.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,26 +0,0 @@ -from PyPDF2 import PdfFileWriter, PdfFileReader -import sys -import math - -def main(): - if (len(sys.argv) != 3): - print("usage: python 2-up.py input_file output_file") - sys.exit(1) - print ("2-up input " + sys.argv[1]) - input1 = PdfFileReader(open(sys.argv[1], "rb")) - output = PdfFileWriter() - for iter in range (0, input1.getNumPages()-1, 2): - lhs = input1.getPage(iter) - rhs = input1.getPage(iter+1) - lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(),0, True) - output.addPage(lhs) - print (str(iter) + " "), - sys.stdout.flush() - - print("writing " + sys.argv[2]) - outputStream = file(sys.argv[2], "wb") - output.write(outputStream) - print("done.") - -if __name__ == "__main__": - main() diff -Nru pypdf2-1.23+git20141008/Sample_Code/basic_merging.py pypdf2-1.25/Sample_Code/basic_merging.py --- pypdf2-1.23+git20141008/Sample_Code/basic_merging.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/Sample_Code/basic_merging.py 2015-07-16 10:58:34.000000000 +0000 @@ -1,7 +1,7 @@ from PyPDF2 import PdfFileMerger merger = PdfFileMerger() - + input1 = open("document1.pdf", "rb") input2 = open("document2.pdf", "rb") input3 = open("document3.pdf", "rb") diff -Nru pypdf2-1.23+git20141008/Sample_Code/makesimple.py pypdf2-1.25/Sample_Code/makesimple.py --- pypdf2-1.23+git20141008/Sample_Code/makesimple.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/Sample_Code/makesimple.py 2015-07-16 10:58:34.000000000 +0000 @@ -14,12 +14,13 @@ a wonderful file created with Sample_Code/makesimple.py""" + def make_pdf_file(output_filename, np): title = output_filename c = canvas.Canvas(output_filename, pagesize=(8.5 * inch, 11 * inch)) c.setStrokeColorRGB(0,0,0) c.setFillColorRGB(0,0,0) - c.setFont("Helvetica", 12 * point) + c.setFont("Helvetica", 12 * point) for pn in range(1, np + 1): v = 10 * inch for subtline in (TEXT % (output_filename, pn, np)).split( '\n' ): @@ -27,7 +28,7 @@ v -= 12 * point c.showPage() c.save() - + if __name__ == "__main__": nps = [None, 5, 11, 17] for i, np in enumerate(nps): diff -Nru pypdf2-1.23+git20141008/Sample_Code/pdfcat pypdf2-1.25/Sample_Code/pdfcat --- pypdf2-1.23+git20141008/Sample_Code/pdfcat 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/Sample_Code/pdfcat 1970-01-01 00:00:00.000000000 +0000 @@ -1,80 +0,0 @@ -#!/usr/bin/env python -""" -Concatenate pages from pdf files into a single pdf file. - -Page ranges refer to the previously-named file. -A file not followed by a page range means all the pages of the file. - -PAGE RANGES are like Python slices. - {page_range_help} -EXAMPLES - pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 - Concatenate all of head.pdf, all but page seven of content.pdf, - and the last page of tail.pdf, producing output.pdf. - - pdfcat chapter*.pdf >book.pdf - You can specify the output file by redirection. - - pdfcat chapter?.pdf chapter10.pdf >book.pdf - In case you don't want chapter 10 before chapter 2. -""" -# Copyright (c) 2014, Steve Witham . -# All rights reserved. This software is available under a BSD license; -# see https://github.com/mstamy2/PyPDF2/LICENSE - -from __future__ import print_function -import argparse -from PyPDF2.pagerange import PAGE_RANGE_HELP - -def parse_args(): - parser = argparse.ArgumentParser( - description=__doc__.format(page_range_help=PAGE_RANGE_HELP), - formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument("-o", "--output", - metavar="output_file") - parser.add_argument("-v", "--verbose", action="store_true", - help="show page ranges as they are being read") - parser.add_argument("first_filename", nargs=1, - metavar="filename [page range...]") - # argparse chokes on page ranges like "-2:" unless caught like this: - parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER, - metavar="filenames and/or page ranges") - args = parser.parse_args() - args.fn_pgrgs.insert(0, args.first_filename[0]) - return args - - -from sys import stderr, stdout, exit -import os -import traceback -from collections import defaultdict - -from PyPDF2 import PdfFileMerger, parse_filename_page_ranges - - -if __name__ == "__main__": - args = parse_args() - filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs) - if args.output: - output = open(args.output, "wb") - else: - stdout.flush() - output = os.fdopen(stdout.fileno(), "wb") - - merger = PdfFileMerger() - in_fs = dict() - try: - for (filename, page_range) in filename_page_ranges: - if args.verbose: - print(filename, page_range, file=stderr) - if filename not in in_fs: - in_fs[filename] = open(filename, "rb") - merger.append(in_fs[filename], pages=page_range) - except: - print(traceback.format_exc(), file=stderr) - print("Error while reading " + filename, file=stderr) - exit(1) - merger.write(output) - # In 3.0, input files must stay open until output is written. - # Not closing the in_fs because this script exits now. - diff -Nru pypdf2-1.23+git20141008/Scripts/2-up.py pypdf2-1.25/Scripts/2-up.py --- pypdf2-1.23+git20141008/Scripts/2-up.py 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/Scripts/2-up.py 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,54 @@ +from PyPDF2 import PdfFileWriter, PdfFileReader +import sys +import math + + +def main(): + if (len(sys.argv) != 3): + print("usage: python 2-up.py input_file output_file") + sys.exit(1) + print ("2-up input " + sys.argv[1]) + input1 = PdfFileReader(open(sys.argv[1], "rb")) + output = PdfFileWriter() + for iter in range (0, input1.getNumPages()-1, 2): + lhs = input1.getPage(iter) + rhs = input1.getPage(iter+1) + lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(),0, True) + output.addPage(lhs) + print (str(iter) + " "), + sys.stdout.flush() + + print("writing " + sys.argv[2]) + outputStream = file(sys.argv[2], "wb") + output.write(outputStream) + print("done.") + +if __name__ == "__main__": + main() +from PyPDF2 import PdfFileWriter, PdfFileReader +import sys +import math + + +def main(): + if (len(sys.argv) != 3): + print("usage: python 2-up.py input_file output_file") + sys.exit(1) + print ("2-up input " + sys.argv[1]) + input1 = PdfFileReader(open(sys.argv[1], "rb")) + output = PdfFileWriter() + for iter in range (0, input1.getNumPages()-1, 2): + lhs = input1.getPage(iter) + rhs = input1.getPage(iter+1) + lhs.mergeTranslatedPage(rhs, lhs.mediaBox.getUpperRight_x(),0, True) + output.addPage(lhs) + print (str(iter) + " "), + sys.stdout.flush() + + print("writing " + sys.argv[2]) + outputStream = open(sys.argv[2], "wb") + output.write(outputStream) + print("done.") + +if __name__ == "__main__": + main() diff -Nru pypdf2-1.23+git20141008/Scripts/pdfcat pypdf2-1.25/Scripts/pdfcat --- pypdf2-1.23+git20141008/Scripts/pdfcat 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/Scripts/pdfcat 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,80 @@ +#!/usr/bin/env python +""" +Concatenate pages from pdf files into a single pdf file. + +Page ranges refer to the previously-named file. +A file not followed by a page range means all the pages of the file. + +PAGE RANGES are like Python slices. + {page_range_help} +EXAMPLES + pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1 + Concatenate all of head.pdf, all but page seven of content.pdf, + and the last page of tail.pdf, producing output.pdf. + + pdfcat chapter*.pdf >book.pdf + You can specify the output file by redirection. + + pdfcat chapter?.pdf chapter10.pdf >book.pdf + In case you don't want chapter 10 before chapter 2. +""" +# Copyright (c) 2014, Steve Witham . +# All rights reserved. This software is available under a BSD license; +# see https://github.com/mstamy2/PyPDF2/LICENSE + +from __future__ import print_function +import argparse +from PyPDF2.pagerange import PAGE_RANGE_HELP + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__.format(page_range_help=PAGE_RANGE_HELP), + formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("-o", "--output", + metavar="output_file") + parser.add_argument("-v", "--verbose", action="store_true", + help="show page ranges as they are being read") + parser.add_argument("first_filename", nargs=1, + metavar="filename [page range...]") + # argparse chokes on page ranges like "-2:" unless caught like this: + parser.add_argument("fn_pgrgs", nargs=argparse.REMAINDER, + metavar="filenames and/or page ranges") + args = parser.parse_args() + args.fn_pgrgs.insert(0, args.first_filename[0]) + return args + + +from sys import stderr, stdout, exit +import os +import traceback +from collections import defaultdict + +from PyPDF2 import PdfFileMerger, parse_filename_page_ranges + + +if __name__ == "__main__": + args = parse_args() + filename_page_ranges = parse_filename_page_ranges(args.fn_pgrgs) + if args.output: + output = open(args.output, "wb") + else: + stdout.flush() + output = os.fdopen(stdout.fileno(), "wb") + + merger = PdfFileMerger() + in_fs = dict() + try: + for (filename, page_range) in filename_page_ranges: + if args.verbose: + print(filename, page_range, file=stderr) + if filename not in in_fs: + in_fs[filename] = open(filename, "rb") + merger.append(in_fs[filename], pages=page_range) + except: + print(traceback.format_exc(), file=stderr) + print("Error while reading " + filename, file=stderr) + exit(1) + merger.write(output) + # In 3.0, input files must stay open until output is written. + # Not closing the in_fs because this script exits now. diff -Nru pypdf2-1.23+git20141008/setup.py pypdf2-1.25/setup.py --- pypdf2-1.23+git20141008/setup.py 2014-10-25 20:55:38.000000000 +0000 +++ pypdf2-1.25/setup.py 2015-07-16 10:58:34.000000000 +0000 @@ -5,7 +5,7 @@ long_description = """ A Pure-Python library built as a PDF toolkit. It is capable of: - + - extracting document information (title, author, ...) - splitting documents page by page - merging documents page by page @@ -25,9 +25,9 @@ VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" mo = re.search(VSRE, verstrline, re.M) if mo: - verstr = mo.group(1) + verstr = mo.group(1) else: - raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE)) + raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE)) setup( name="PyPDF2", @@ -50,4 +50,3 @@ ], packages=["PyPDF2"], ) - diff -Nru pypdf2-1.23+git20141008/Tests/tests.py pypdf2-1.25/Tests/tests.py --- pypdf2-1.23+git20141008/Tests/tests.py 1970-01-01 00:00:00.000000000 +0000 +++ pypdf2-1.25/Tests/tests.py 2015-07-16 10:58:34.000000000 +0000 @@ -0,0 +1,35 @@ +import os, sys, unittest + +# Configure path environment +TESTS_ROOT = os.path.abspath(os.path.dirname(__file__)) +PROJECT_ROOT = os.path.dirname(TESTS_ROOT) +RESOURCE_ROOT = os.path.join(PROJECT_ROOT, 'Resources') + +sys.path.append(PROJECT_ROOT) + +# Test imports +import unittest +from PyPDF2 import PdfFileReader + + +class PdfReaderTestCases(unittest.TestCase): + + def test_PdfReaderFileLoad(self): + ''' Test loading and parsing of a file. Extract text of the file and compare to expected + textual output. Expected outcome: file loads, text matches expected. + ''' + with open(os.path.join(RESOURCE_ROOT, 'crazyones.pdf'), 'rb') as inputfile: + + # Load PDF file from file + ipdf = PdfFileReader(inputfile) + ipdf_p1 = ipdf.getPage(0) + + # Retrieve the text of the PDF + pdftext_file = open(os.path.join(RESOURCE_ROOT, 'crazyones.txt'), 'r') + pdftext = pdftext_file.read() + ipdf_p1_text = ipdf_p1.extractText() + + # Compare the text of the PDF to a known source + self.assertEqual(ipdf_p1_text.encode('utf-8', errors='ignore'), pdftext, + msg='PDF extracted text differs from expected value.\n\nExpected:\n\n%r\n\nExtracted:\n\n%r\n\n' + % (pdftext, ipdf_p1_text.encode('utf-8', errors='ignore'))) \ No newline at end of file