diff -Nru python-internetarchive-1.9.0/debian/changelog python-internetarchive-1.9.3/debian/changelog --- python-internetarchive-1.9.0/debian/changelog 2020-01-31 19:56:30.000000000 +0000 +++ python-internetarchive-1.9.3/debian/changelog 2020-05-11 03:23:59.000000000 +0000 @@ -1,3 +1,10 @@ +python-internetarchive (1.9.3-1) unstable; urgency=medium + + * new upstream release + * remove patches: all merged upstream + + -- Antoine Beaupré Sun, 10 May 2020 23:23:59 -0400 + python-internetarchive (1.9.0-3) unstable; urgency=medium * hotfix: close file after getting md5 (Closes: #950289) diff -Nru python-internetarchive-1.9.0/debian/patches/0001-close-file-after-getting-md5.patch python-internetarchive-1.9.3/debian/patches/0001-close-file-after-getting-md5.patch --- python-internetarchive-1.9.0/debian/patches/0001-close-file-after-getting-md5.patch 2020-01-31 19:56:30.000000000 +0000 +++ python-internetarchive-1.9.3/debian/patches/0001-close-file-after-getting-md5.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -From 086e2e65fc840fd827b02e1022fad084ee700d7c Mon Sep 17 00:00:00 2001 -From: kpcyrd -Date: Fri, 31 Jan 2020 14:53:05 -0500 -Subject: [PATCH] close file after getting md5 - -I've tried to upload to archive.org and noticed ia crashes on -large folders. - - $ ulimit -n - 1024 - $ ia upload asdf ./folder-with-more-than-1024-files/ - [...] - OSError: [Errno 24] Too many open files - [...] - $ - -The bug is present in src:python-internetarchive, I found a patch that -resolves the issue from 2018 that was never applied. You can find a -patch that cleanly applies to the current debian/sid below. The original -author is github.com/Arkiver2. - -Upstream patch: -https://github.com/jjjake/internetarchive/commit/4e4120f07c98ea98c61791293835df2797bfee61 - -Debian Bug: #950289 ---- - internetarchive/utils.py | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/internetarchive/utils.py b/internetarchive/utils.py -index db8412a..2f3e04e 100644 ---- a/internetarchive/utils.py -+++ b/internetarchive/utils.py -@@ -235,14 +235,16 @@ def recursive_file_count(files, item=None, checksum=False): - is_dir = False - if is_dir: - for x, _ in iter_directory(f): -- lmd5 = get_md5(open(x, 'rb')) -+ with open(x, 'rb') as f_: -+ lmd5 = get_md5(f_) - if lmd5 in md5s: - continue - else: - total_files += 1 - else: - try: -- lmd5 = get_md5(open(f, 'rb')) -+ with open(f, 'rb') as f_: -+ lmd5 = get_md5(f_) - except TypeError: - # Support file-like objects. - lmd5 = get_md5(f) --- -2.20.1 - diff -Nru python-internetarchive-1.9.0/debian/patches/0001-remove-backports-requirement-for-newer-python-releas.patch python-internetarchive-1.9.3/debian/patches/0001-remove-backports-requirement-for-newer-python-releas.patch --- python-internetarchive-1.9.0/debian/patches/0001-remove-backports-requirement-for-newer-python-releas.patch 2020-01-31 19:56:30.000000000 +0000 +++ python-internetarchive-1.9.3/debian/patches/0001-remove-backports-requirement-for-newer-python-releas.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -From 17a728511d74edce40eb31f8a50f8b1bff590f42 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= -Date: Mon, 9 Dec 2019 17:09:37 -0500 -Subject: [PATCH] remove backports requirement for newer python releases - -The backports.csv requirement makes no sense in Python releases after -3.4: it's part of the standard library and works fine. Remove that -requirement from 3.5 and later, as it breaks the install on Debian and -probably other platforms. - -Closes: #295 ---- - setup.py | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/setup.py b/setup.py -index b813025..844fce8 100644 ---- a/setup.py -+++ b/setup.py -@@ -45,8 +45,6 @@ - 'schema>=0.4.0', - 'backports.csv < 1.07;python_version<"2.7"', - 'backports.csv < 1.07;python_version<"3.4"', -- 'backports.csv;python_version>="2.7"', -- 'backports.csv;python_version>="3.4"', - 'total-ordering;python_version<"2.7"', - ], - classifiers=[ --- -2.20.1 - diff -Nru python-internetarchive-1.9.0/debian/patches/series python-internetarchive-1.9.3/debian/patches/series --- python-internetarchive-1.9.0/debian/patches/series 2020-01-31 19:56:30.000000000 +0000 +++ python-internetarchive-1.9.3/debian/patches/series 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -0001-close-file-after-getting-md5.patch -0001-remove-backports-requirement-for-newer-python-releas.patch diff -Nru python-internetarchive-1.9.0/HISTORY.rst python-internetarchive-1.9.3/HISTORY.rst --- python-internetarchive-1.9.0/HISTORY.rst 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/HISTORY.rst 2020-04-07 19:32:01.000000000 +0000 @@ -3,8 +3,46 @@ Release History --------------- -1.9.0 (?) -+++++++++ +1.9.3 (2020-04-07) +++++++++++++++++++ + +**Features and Improvements** + +- Added support for remvoing items from simplelists as if they were collections. +- Added ``Item.derive()`` method for deriving items. +- Added ``Item.fixer()`` method for submitting fixer tasks. +- Added ``--task-args`` to ``ia tasks`` for submitting task args to the Tasks API. + +**Bugfixes** + +- Minor bug fix in ``ia tasks`` to fix support for tasks that do not require a ``--comment`` option. + +1.9.2 (2020-03-15) +++++++++++++++++++ + +**Features and Improvements** + +- Switched to ``tqdm`` for progress bar (``clint`` is no longer maintained). +- Added ``Item.identifier_available()`` method for calling check_identifier.php. +- Added support for opening details page in default browser after upload. +- Added support for using ``item`` or ``identifier`` as column header in spreadsheet mode. +- Added ``ArchiveSession.get_my_catalog()`` method for retrieving running/queued tasks. +- Removed backports.csv requirement for newer Python releases. +- Authorization header is now used for metadata reads, to support privileged access to /metadata. +- ``ia download`` no longer downloads history dir by default. +- Added ``ignore_history_dir`` to ``Item.download()``. The default is False. + +**Bugfixes** + +- Fixed bug in ``ia copy`` and ``ia move`` where filenames weren't being encoded/quoted correctly. +- Fixed bug in ``Item.get_all_item_tasks()`` where all calls would fail unless a dict was provided to ``params``. +- Read from ~/.config/ia.ini with fallback to ~/.ia regardless of the existence of ~/.config +- Fixed S3 overload message always mentioning the total maximum number of retries, not the remaining ones. +- Fixed bug where a KeyError exception would be raised on most calls to dark items. +- Fixed bug where md5 was being calculated for every upload. + +1.9.0 (2019-12-05) +++++++++++++++++++ **Features and Improvements** diff -Nru python-internetarchive-1.9.0/internetarchive/catalog.py python-internetarchive-1.9.3/internetarchive/catalog.py --- python-internetarchive-1.9.0/internetarchive/catalog.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/catalog.py 2020-04-07 19:32:01.000000000 +0000 @@ -114,14 +114,14 @@ if identifier: params['identifier'] = identifier params.update(dict(summary=1, history=0, catalog=0)) - r = self.make_tasks_reqeust(params) + r = self.make_tasks_request(params) j = r.json() if j.get('success') is True: return j['value']['summary'] else: return j - def make_tasks_reqeust(self, params): + def make_tasks_request(self, params): """Make a GET request to the `Tasks API `_ @@ -158,7 +158,7 @@ :rtype: collections.Iterable[CatalogTask] """ while True: - r = self.make_tasks_reqeust(params) + r = self.make_tasks_request(params) j = r.json() for row in j.get('value', dict()).get('catalog', list()): yield CatalogTask(row, self) @@ -191,7 +191,7 @@ params.update(dict(limit=0)) if not params.get('summary'): params['summary'] = 0 - r = self.make_tasks_reqeust(params) + r = self.make_tasks_request(params) line = '' tasks = list() for c in r.iter_content(): diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_copy.py python-internetarchive-1.9.3/internetarchive/cli/ia_copy.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_copy.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_copy.py 2020-04-07 19:32:01.000000000 +0000 @@ -28,12 +28,17 @@ -m, --metadata=... Metadata to add to your new item, if you are moving the file to a new item. -H, --header=... S3 HTTP headers to send with your request. + +examples: + # Turn off backups + ia copy / / -H x-archive-keep-old-version:0 """ from __future__ import print_function, absolute_import import sys from docopt import docopt, printable_usage from schema import Schema, Use, Or, And, SchemaError +from six.moves.urllib import parse import internetarchive as ia from internetarchive.cli.argparser import get_args_dict @@ -91,14 +96,13 @@ print('{0}\n{1}'.format(str(exc), usage), file=sys.stderr) sys.exit(1) - args['--header']['x-amz-copy-source'] = '/{}'.format(src_path) + args['--header']['x-amz-copy-source'] = '/{}'.format(parse.quote(src_path)) args['--header']['x-amz-metadata-directive'] = 'COPY' - args['--header'] # Add keep-old-version by default. if 'x-archive-keep-old-version' not in args['--header']: args['--header']['x-archive-keep-old-version'] = '1' - url = '{}//s3.us.archive.org/{}'.format(session.protocol, dest_path) + url = '{}//s3.us.archive.org/{}'.format(session.protocol, parse.quote(dest_path)) req = ia.iarequest.S3Request(url=url, method='PUT', metadata=args['--metadata'], diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_delete.py python-internetarchive-1.9.3/internetarchive/cli/ia_delete.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_delete.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_delete.py 2020-04-07 19:32:01.000000000 +0000 @@ -39,6 +39,9 @@ -f, --format=... Only only delete files matching the specified format(s). -R, --retries= Number of times to retry if S3 returns a 503 SlowDown error [default: 2]. + +examples: + ia delete -H x-archive-keep-old-version:0 # Turn off backups """ from __future__ import absolute_import, print_function, unicode_literals @@ -97,7 +100,7 @@ if args['--all']: files = [f for f in item.get_files()] - args['--cacade'] = True + args['--cascade'] = True elif args['--glob']: files = item.get_files(glob_pattern=args['--glob']) elif args['--format']: diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_download.py python-internetarchive-1.9.3/internetarchive/cli/ia_download.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_download.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_download.py 2020-04-07 19:32:01.000000000 +0000 @@ -33,7 +33,7 @@ -d, --dry-run Print URLs to stdout and exit. -i, --ignore-existing Clobber files already downloaded. -C, --checksum Skip files based on checksum [default: False]. - -R, --retries= Set number of retries to [default: 5] + -R, --retries= Set number of retries to [default: 5]. -I, --itemlist= Download items from a specified file. Itemlists should be a plain text file with one identifier per line. -S, --search= Download items returned from a specified search query. @@ -58,6 +58,7 @@ --no-change-timestamp Don't change the timestamp of downloaded files to reflect the source material. -p, --parameters=... Parameters to send with your query (e.g. `cnt=0`). + -a, --download-history Do not download any files from the history dir. """ from __future__ import print_function, absolute_import import os @@ -98,6 +99,7 @@ '--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--on-the-fly': Use(bool), '--no-change-timestamp': Use(bool), + '--download-history': Use(bool), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), }) @@ -193,6 +195,7 @@ continue # Otherwise, download the entire item. + ignore_history_dir = True if not args['--download-history'] else False _errors = item.download( files=files, formats=args['--format'], @@ -209,7 +212,8 @@ ignore_errors=True, on_the_fly=args['--on-the-fly'], no_change_timestamp=args['--no-change-timestamp'], - params=args['--parameters'] + params=args['--parameters'], + ignore_history_dir=ignore_history_dir, ) if _errors: errors.append(_errors) diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_metadata.py python-internetarchive-1.9.3/internetarchive/cli/ia_metadata.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_metadata.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_metadata.py 2020-04-07 19:32:01.000000000 +0000 @@ -61,6 +61,7 @@ import six from internetarchive.cli.argparser import get_args_dict, get_args_dict_many_write +from internetarchive.exceptions import ItemLocateError # Only import backports.csv for Python2 (in support of FreeBSD port). PY2 = sys.version_info[0] == 2 @@ -73,8 +74,12 @@ def modify_metadata(item, metadata, args): append = True if args['--append'] else False append_list = True if args['--append-list'] else False - r = item.modify_metadata(metadata, target=args['--target'], append=append, - priority=args['--priority'], append_list=append_list) + try: + r = item.modify_metadata(metadata, target=args['--target'], append=append, + priority=args['--priority'], append_list=append_list) + except ItemLocateError as exc: + print('{} - error: {}'.format(item.identifier, str(exc)), file=sys.stderr) + sys.exit(1) if not r.json()['success']: error_msg = r.json()['error'] if 'no changes' in r.content.decode('utf-8'): @@ -96,6 +101,20 @@ print('{0}/metadata/{1} does not exist, skipping.'.format( item.identifier, key), file=sys.stderr) continue + elif key == 'collection' and metadata[key] not in src_md: + r = item.remove_from_simplelist(metadata[key], 'holdings') + j = r.json() + if j.get('success'): + print('{} - success: {} no longer in {}'.format( + item.identifier, item.identifier, metadata[key])) + sys.exit(0) + elif j.get('error', '').startswith('no row to delete for'): + print('{} - success: {} no longer in {}'.format( + item.identifier, item.identifier, metadata[key])) + sys.exit(0) + else: + print('{} - error: {}'.format(item.identifier, j.get('error'))) + sys.exit() elif not isinstance(src_md, list): if key == 'subject': src_md = src_md.split(';') diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_move.py python-internetarchive-1.9.3/internetarchive/cli/ia_move.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_move.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_move.py 2020-04-07 19:32:01.000000000 +0000 @@ -28,6 +28,10 @@ -m, --metadata=... Metadata to add to your new item, if you are moving the file to a new item. -H, --header=... S3 HTTP headers to send with your request. + +examples: + # Turn off backups + ia move / / -H x-archive-keep-old-version:0 """ from __future__ import print_function, absolute_import import sys diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_tasks.py python-internetarchive-1.9.3/internetarchive/cli/ia_tasks.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_tasks.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_tasks.py 2020-04-07 19:32:01.000000000 +0000 @@ -28,8 +28,9 @@ ia tasks [--task=...] [--get-task-log=] [--parameter=...] [--tab-output] ia tasks [--parameter=...] [--tab-output] - ia tasks --cmd= --comment= - [--data=...] [--tab-output] + ia tasks --cmd= [--comment=] + [--task-args=...] [--data=...] + [--tab-output] ia tasks --help options: @@ -41,6 +42,7 @@ -C, --comment= A reasonable explantion for why a task is being submitted. -T, --tab-output Output task info in tab-delimited columns. + -a, --task-args=... Args to submit to the Tasks API. -d, --data=... Additional data to send when submitting a task. @@ -53,6 +55,8 @@ ia tasks --cmd make_undark.php --comment '' # undark item ia tasks --cmd make_dark.php --comment '' # dark item + ia tasks --cmd fixer.php --task-args noop:1 # submit a noop fixer.php task + ia tasks --cmd fixer.php --task-args 'noop:1;asr:1 # submit multiple fixer ops """ from __future__ import absolute_import, print_function import sys @@ -70,6 +74,8 @@ # Tasks write API. if args['--cmd']: data = get_args_dict(args['--data'], query_string=True) + task_args = get_args_dict(args['--task-args'], query_string=True) + data['args'] = task_args r = session.submit_task(args[''], args['--cmd'], comment=args['--comment'], @@ -111,10 +117,11 @@ if not args[''] \ and not params.get('task_id'): - params.update(dict(catalog=1, history=0)) + _params = dict(catalog=1, history=0) + _params.update(params) + params = _params if not any(x in params for x in queryable_params): - # TODO: WTF _params = dict(submitter=session.user_email, catalog=1, history=0, summary=0) _params.update(params) params = _params diff -Nru python-internetarchive-1.9.0/internetarchive/cli/ia_upload.py python-internetarchive-1.9.3/internetarchive/cli/ia_upload.py --- python-internetarchive-1.9.0/internetarchive/cli/ia_upload.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/cli/ia_upload.py 2020-04-07 19:32:01.000000000 +0000 @@ -50,6 +50,11 @@ [default: 30]. --status-check Check if S3 is accepting requests to the given item. --no-collection-check Skip collection exists check [default: False]. + -o, --open-after-upload Open the details page for an item after upload + [default: False]. + +examples: + ia upload -H x-archive-keep-old-version:0 # Turn off backups """ from __future__ import absolute_import, unicode_literals, print_function @@ -58,6 +63,7 @@ import sys from tempfile import TemporaryFile from copy import deepcopy +import webbrowser import six from docopt import docopt, printable_usage @@ -226,6 +232,10 @@ break if (not _r.status_code) or (not _r.ok): ERRORS = True + else: + if args['--open-after-upload']: + webbrowser.open_new_tab('{}//{}/details/{}'.format( + session.protocol, session.host, item.identifier)) # Bulk upload using spreadsheet. else: @@ -236,9 +246,15 @@ for row in spreadsheet: upload_kwargs_copy = deepcopy(upload_kwargs) local_file = row['file'] - identifier = row['identifier'] + identifier = row.get('item', row.get('identifier')) + if not identifier: + print('error: no identifier column on spreadsheet!') + sys.exit(1) del row['file'] - del row['identifier'] + if 'identifier' in row: + del row['identifier'] + elif 'item' in row: + del row['item'] if (not identifier) and (prev_identifier): identifier = prev_identifier item = session.get_item(identifier) @@ -254,6 +270,10 @@ break if (not _r) or (not _r.ok): ERRORS = True + else: + if args['--open-after-upload']: + webbrowser.open_new_tab('{}//{}/details/{}'.format( + session.protocol, session.host, identifier)) prev_identifier = identifier if ERRORS: diff -Nru python-internetarchive-1.9.0/internetarchive/config.py python-internetarchive-1.9.3/internetarchive/config.py --- python-internetarchive-1.9.0/internetarchive/config.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/config.py 2020-04-07 19:32:01.000000000 +0000 @@ -103,11 +103,9 @@ config = configparser.RawConfigParser() if not config_file: - config_dir = os.path.expanduser('~/.config') - if not os.path.isdir(config_dir): + config_file = os.path.expanduser('~/.config/ia.ini') + if not os.path.isfile(config_file): config_file = os.path.expanduser('~/.ia') - else: - config_file = '{0}/ia.ini'.format(config_dir) config.read(config_file) if not config.has_section('s3'): diff -Nru python-internetarchive-1.9.0/internetarchive/exceptions.py python-internetarchive-1.9.3/internetarchive/exceptions.py --- python-internetarchive-1.9.0/internetarchive/exceptions.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/exceptions.py 2020-04-07 19:32:01.000000000 +0000 @@ -28,3 +28,12 @@ class AuthenticationError(Exception): """Authentication Failed""" + + +class ItemLocateError(Exception): + def __init__(self, *args, **kwargs): + default_message = 'Item cannot be located because it is dark or does not exist.' + if args or kwargs: + super().__init__(*args, **kwargs) + else: + super().__init__(default_message) diff -Nru python-internetarchive-1.9.0/internetarchive/files.py python-internetarchive-1.9.3/internetarchive/files.py --- python-internetarchive-1.9.0/internetarchive/files.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/files.py 2020-04-07 19:32:01.000000000 +0000 @@ -351,7 +351,7 @@ url = '{0}//s3.us.archive.org/{1}/{2}'.format(self.item.session.protocol, self.identifier, - self.name) + urllib.parse.quote(self.name)) self.item.session.mount_http_adapter(max_retries=max_retries, status_forcelist=[503], host='s3.us.archive.org') diff -Nru python-internetarchive-1.9.0/internetarchive/iarequest.py python-internetarchive-1.9.3/internetarchive/iarequest.py --- python-internetarchive-1.9.0/internetarchive/iarequest.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/iarequest.py 2020-04-07 19:32:01.000000000 +0000 @@ -42,6 +42,7 @@ from internetarchive import auth, __version__ from internetarchive.utils import needs_quote, delete_items_from_dict +from internetarchive.exceptions import ItemLocateError logger = logging.getLogger(__name__) @@ -125,10 +126,11 @@ prepared_metadata = prepare_metadata(metadata) headers['x-archive-auto-make-bucket'] = '1' - if queue_derive is False: - headers['x-archive-queue-derive'] = '0' - else: - headers['x-archive-queue-derive'] = '1' + if 'x-archive-queue-derive' not in headers: + if queue_derive is False: + headers['x-archive-queue-derive'] = '0' + else: + headers['x-archive-queue-derive'] = '1' for meta_key, meta_value in prepared_metadata.items(): # Encode arrays into JSON strings because Archive.org does not @@ -252,10 +254,13 @@ metadata = {target: metadata} for key in metadata: if key == 'metadata': - patch = prepare_patch(metadata[key], - source_metadata['metadata'], - append, - append_list) + try: + patch = prepare_patch(metadata[key], + source_metadata['metadata'], + append, + append_list) + except KeyError: + raise ItemLocateError elif key.startswith('files'): patch = prepare_files_patch(metadata[key], source_metadata['files'], @@ -276,8 +281,11 @@ else: if not target or 'metadata' in target: target = 'metadata' - patch = prepare_patch(metadata, source_metadata['metadata'], append, - append_list) + try: + patch = prepare_patch(metadata, source_metadata['metadata'], append, + append_list) + except KeyError: + raise ItemLocateError elif 'files' in target: patch = prepare_files_patch(metadata, source_metadata['files'], append, target, append_list) diff -Nru python-internetarchive-1.9.0/internetarchive/__init__.py python-internetarchive-1.9.3/internetarchive/__init__.py --- python-internetarchive-1.9.0/internetarchive/__init__.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/__init__.py 2020-04-07 19:32:01.000000000 +0000 @@ -37,7 +37,7 @@ from __future__ import absolute_import __title__ = 'internetarchive' -__version__ = '1.9.0' +__version__ = '1.9.3' __author__ = 'Jacob M. Johnson' __license__ = 'AGPL 3' __copyright__ = 'Copyright (C) 2012-2019 Internet Archive' diff -Nru python-internetarchive-1.9.0/internetarchive/item.py python-internetarchive-1.9.3/internetarchive/item.py --- python-internetarchive-1.9.0/internetarchive/item.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/item.py 2020-04-07 19:32:01.000000000 +0000 @@ -31,6 +31,8 @@ from fnmatch import fnmatch from logging import getLogger from time import sleep +import math +from xml.dom.minidom import parseString try: from functools import total_ordering @@ -42,7 +44,7 @@ from six import string_types from six.moves import urllib from requests import Response -from clint.textui import progress +from tqdm import tqdm from requests.exceptions import HTTPError from internetarchive.utils import IdentifierListAsItems, get_md5, chunk_generator, \ @@ -216,6 +218,26 @@ item_metadata = self.session.get_metadata(self.identifier, **kwargs) self.load(item_metadata) + def identifier_available(self): + """Check if the item identifier is available for creating a + new item. + + :rtype: bool + :return: `True` if identifier is available, or `False` if it is + not available. + """ + url = '{}//{}/services/check_identifier.php'.format(self.session.protocol, + self.session.host) + params = dict(identifier=self.identifier) + r = self.session.get(url, params=params) + p = parseString(r.text) + result = p.getElementsByTagName('result')[0] + available = result.attributes['code'].value + if available == 'not_available': + return False + else: + return True + def get_task_summary(self, params=None, request_kwargs=None): """Get a summary of the item's pending tasks. @@ -252,6 +274,7 @@ :rtype: List[CatalogTask] """ + params = dict() if not params else params params.update(dict(catalog=1, history=1)) return self.session.get_tasks(self.identifier, params, request_kwargs) @@ -287,6 +310,52 @@ catalog.append(t) return catalog + def derive(self, priority=None, request_kwargs=None): + """Derive an item. + + :rtype: :class:`requests.Response` + """ + r = self.session.submit_task(self.identifier, + 'derive.php', + priority=priority, + request_kwargs=request_kwargs) + r.raise_for_status() + return r + + def fixer(self, ops=None, priority=None, data=None, request_kwargs=None): + """Submit a fixer task on an item. + + :type ops: str or list + :param ops: (optional) The fixer operation(s) to run on the item + [default: noop]. + + :type priority: str or int + :param priority: (optional) The task priority. + + :type data: dict + :param data: (optional) Additional parameters to submit with + the task. + + :rtype: :class:`requests.Response` + """ + data = dict() if not data else data + if not ops: + ops = ['noop'] + if not isinstance(ops, (list, tuple, set)): + ops = [ops] + if not data.get('args'): + data['args'] = dict() + for op in ops: + data['args'][op] = '1' + + r = self.session.submit_task(self.identifier, + 'fixer.php', + priority=priority, + data=data, + request_kwargs=request_kwargs) + r.raise_for_status() + return r + def undark(self, comment, priority=None, data=None, request_kwargs=None): """Undark the item. @@ -406,6 +475,7 @@ on_the_fly=None, return_responses=None, no_change_timestamp=None, + ignore_history_dir=None, params=None): """Download files from an item. @@ -472,6 +542,10 @@ :param params: (optional) URL parameters to send with download request (e.g. `cnt=0`). + :type ignore_history_dir: bool + :param ignore_history_dir: (optional) Do not download any files from the history + dir. This param defaults to ``False``. + :rtype: bool :returns: True if if all files have been downloaded successfully. """ @@ -484,6 +558,7 @@ no_directory = False if no_directory is None else no_directory return_responses = False if not return_responses else True no_change_timestamp = False if not no_change_timestamp else no_change_timestamp + ignore_history_dir = False if ignore_history_dir is None else ignore_history_dir params = None if not params else params if not dry_run: @@ -536,6 +611,9 @@ responses = list() for f in files: + if ignore_history_dir is True: + if f.name.startswith('history/'): + continue if no_directory: path = f.name else: @@ -645,6 +723,23 @@ self.refresh() return resp + def remove_from_simplelist(self, parent, list): + """Remove item from a simplelist. + + :rtype: :class:`requests.Response` + """ + patch = dict( + op='delete', + parent=parent, + list=list, + ) + data = { + '-patch': json.dumps(patch), + '-target': 'simplelists', + } + r = self.session.post(self.urls.metadata, data=data) + return r + def upload_file(self, body, key=None, metadata=None, @@ -720,7 +815,7 @@ secret_key = self.session.secret_key if secret_key is None else secret_key queue_derive = True if queue_derive is None else queue_derive verbose = False if verbose is None else verbose - verify = True if verify is None else verify + verify = False if not verify else verify delete = False if delete is None else delete # Set checksum after delete. checksum = True if delete else checksum @@ -794,12 +889,13 @@ raise Exception chunk_size = 1048576 - expected_size = size / chunk_size + 1 + expected_size = math.ceil(size / chunk_size) chunks = chunk_generator(body, chunk_size) - progress_generator = progress.bar( - chunks, - expected_size=expected_size, - label=' uploading {f}: '.format(f=key)) + progress_generator = tqdm(chunks, + desc=' uploading {}'.format(key), + dynamic_ncols=True, + total=expected_size, + unit='MiB') data = IterableToFileAdapter(progress_generator, size) except: print(' uploading {f}'.format(f=key)) @@ -824,10 +920,10 @@ return prepared_request else: try: - error_msg = ('s3 is overloaded, sleeping for ' - '{0} seconds and retrying. ' - '{1} retries left.'.format(retries_sleep, retries)) while True: + error_msg = ('s3 is overloaded, sleeping for ' + '{0} seconds and retrying. ' + '{1} retries left.'.format(retries_sleep, retries)) if retries > 0: if self.session.s3_is_overloaded(access_key): sleep(retries_sleep) diff -Nru python-internetarchive-1.9.0/internetarchive/session.py python-internetarchive-1.9.3/internetarchive/session.py --- python-internetarchive-1.9.0/internetarchive/session.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/session.py 2020-04-07 19:32:01.000000000 +0000 @@ -46,7 +46,7 @@ from requests.packages.urllib3 import Retry from six.moves.urllib.parse import urlparse, unquote -from internetarchive import __version__ +from internetarchive import __version__, auth from internetarchive.config import get_config from internetarchive.item import Item, Collection from internetarchive.search import Search @@ -271,7 +271,11 @@ if 'timeout' not in request_kwargs: request_kwargs['timeout'] = 12 try: - resp = self.get(url, **request_kwargs) + if self.access_key and self.secret_key: + s3_auth = auth.S3Auth(self.access_key, self.secret_key) + else: + s3_auth = None + resp = self.get(url, auth=s3_auth, **request_kwargs) resp.raise_for_status() except Exception as exc: error_msg = 'Error retrieving metadata from {0}, {1}'.format(url, exc) @@ -462,6 +466,26 @@ c = Catalog(self, request_kwargs) return c.get_tasks(identifier=identifier, params=params) + def get_my_catalog(self, params=None, request_kwargs=None): + """Get all queued or running tasks. + + :type params: dict + :param params: (optional) Query parameters, refer to + `Tasks API + `_ + for available parameters. + + :type request_kwargs: dict + :param request_kwargs: (optional) Keyword arguments to be used in + :meth:`requests.sessions.Session.get` request. + + :rtype: List[CatalogTask] + """ + params = dict() if not params else params + _params = dict(submitter=self.user_email, catalog=1, history=0, summary=0) + params.update(_params) + return self.get_tasks(params=params, request_kwargs=request_kwargs) + def get_task_log(self, task_id, request_kwargs=None): """Get a task log. diff -Nru python-internetarchive-1.9.0/internetarchive/utils.py python-internetarchive-1.9.3/internetarchive/utils.py --- python-internetarchive-1.9.0/internetarchive/utils.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/internetarchive/utils.py 2020-04-07 19:32:01.000000000 +0000 @@ -235,21 +235,23 @@ is_dir = False if is_dir: for x, _ in iter_directory(f): - lmd5 = get_md5(open(x, 'rb')) + if checksum is True: + with open(x, 'rb') as fh: + lmd5 = get_md5(fh) + if lmd5 in md5s: + continue + total_files += 1 + else: + if checksum is True: + try: + with open(f, 'rb') as fh: + lmd5 = get_md5(fh) + except TypeError: + # Support file-like objects. + lmd5 = get_md5(f) if lmd5 in md5s: continue - else: - total_files += 1 - else: - try: - lmd5 = get_md5(open(f, 'rb')) - except TypeError: - # Support file-like objects. - lmd5 = get_md5(f) - if lmd5 in md5s: - continue - else: - total_files += 1 + total_files += 1 return total_files diff -Nru python-internetarchive-1.9.0/pex-requirements.txt python-internetarchive-1.9.3/pex-requirements.txt --- python-internetarchive-1.9.0/pex-requirements.txt 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/pex-requirements.txt 2020-04-07 19:32:01.000000000 +0000 @@ -1,7 +1,7 @@ requests>=2.9.1,<3.0.0 jsonpatch>=0.4 docopt>=0.6.0,<0.7.0 -clint>=0.4.0,<0.6.0 +tqdm>=4.0.0 six>=1.0.0,<2.0.0 schema>=0.4.0 total-ordering diff -Nru python-internetarchive-1.9.0/setup.cfg python-internetarchive-1.9.3/setup.cfg --- python-internetarchive-1.9.0/setup.cfg 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/setup.cfg 2020-04-07 19:32:01.000000000 +0000 @@ -9,7 +9,9 @@ ia_upload.py E128 tests/* E402 internetarchive/cli/ia_download.py E501 - internetarchive/cli/ia_copy.py E128 + internetarchive/cli/ia_copy.py E128 E501 + internetarchive/cli/ia_copy.py E501 + internetarchive/cli/ia_move.py E501 internetarchive/cli/ia_move.py E128 pep8maxlinelength = 90 flakes-ignore = diff -Nru python-internetarchive-1.9.0/setup.py python-internetarchive-1.9.3/setup.py --- python-internetarchive-1.9.0/setup.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/setup.py 2020-04-07 19:32:01.000000000 +0000 @@ -40,13 +40,11 @@ 'requests>=2.9.1,<3.0.0', 'jsonpatch>=0.4', 'docopt>=0.6.0,<0.7.0', - 'clint>=0.4.0,<0.6.0', + 'tqdm>=4.0.0', 'six>=1.0.0,<2.0.0', 'schema>=0.4.0', 'backports.csv < 1.07;python_version<"2.7"', 'backports.csv < 1.07;python_version<"3.4"', - 'backports.csv;python_version>="2.7"', - 'backports.csv;python_version>="3.4"', 'total-ordering;python_version<"2.7"', ], classifiers=[ diff -Nru python-internetarchive-1.9.0/tests/test_api.py python-internetarchive-1.9.3/tests/test_api.py --- python-internetarchive-1.9.0/tests/test_api.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/tests/test_api.py 2020-04-07 19:32:01.000000000 +0000 @@ -198,7 +198,6 @@ 'x-archive-queue-derive': '1', 'x-archive-meta00-scanner': 'uri(Internet%20Archive%20Python%20library', 'x-archive-size-hint': '7557', - 'content-md5': '6f1834f5c70c0eabf93dea675ccf90c4', 'x-archive-auto-make-bucket': '1', 'authorization': 'LOW test_access:test_secret', } diff -Nru python-internetarchive-1.9.0/tests/test_item.py python-internetarchive-1.9.3/tests/test_item.py --- python-internetarchive-1.9.0/tests/test_item.py 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/tests/test_item.py 2020-04-07 19:32:01.000000000 +0000 @@ -31,7 +31,6 @@ 'x-archive-queue-derive': '1', 'x-archive-meta00-scanner': 'uri(Internet%20Archive%20Python%20library', 'x-archive-size-hint': '7557', - 'content-md5': '6f1834f5c70c0eabf93dea675ccf90c4', 'x-archive-auto-make-bucket': '1', 'authorization': 'LOW a:b', 'accept': '*/*', @@ -495,6 +494,7 @@ _expected_headers = deepcopy(EXPECTED_S3_HEADERS) del _expected_headers['x-archive-meta00-scanner'] + _expected_headers['content-md5'] = '6f1834f5c70c0eabf93dea675ccf90c4' test_file = os.path.join(str(tmpdir), 'checksum_test.txt') with open(test_file, 'wb') as fh: diff -Nru python-internetarchive-1.9.0/tox.ini python-internetarchive-1.9.3/tox.ini --- python-internetarchive-1.9.0/tox.ini 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/tox.ini 2020-04-07 19:32:01.000000000 +0000 @@ -1,14 +1,9 @@ [tox] -envlist = - py27, - py35, - py36, - py37, +envlist = py27,py35,py36,py37,py38, [testenv] commands = py.test --pep8 {posargs} -deps = - -rtests/requirements.txt +deps = -r tests/requirements.txt [testenv:py27] basepython=python2.7 @@ -21,3 +16,6 @@ [testenv:py37] basepython=python3.7 + +[testenv:py38] +basepython=python3.8 diff -Nru python-internetarchive-1.9.0/.travis.yml python-internetarchive-1.9.3/.travis.yml --- python-internetarchive-1.9.0/.travis.yml 2019-12-05 23:00:37.000000000 +0000 +++ python-internetarchive-1.9.3/.travis.yml 2020-04-07 19:32:01.000000000 +0000 @@ -6,4 +6,5 @@ - "3.5" - "3.6" - "3.7" + - "3.8" script: tox