diff -Nru swift-2.17.0/AUTHORS swift-2.18.0/AUTHORS --- swift-2.17.0/AUTHORS 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/AUTHORS 2018-05-30 10:17:09.000000000 +0000 @@ -58,6 +58,7 @@ Arnaud JOST (arnaud.jost@ovh.net) Atsushi Sakai (sakaia@jp.fujitsu.com) Azhagu Selvan SP (tamizhgeek@gmail.com) +baiwenteng (baiwenteng@inspur.com) Ben Keller (bjkeller@us.ibm.com) Ben Martin (blmartin@us.ibm.com) bhavani.cr (bhavani.r@nectechnologies.in) @@ -109,8 +110,10 @@ Dan Hersam (dan.hersam@hp.com) Dan Prince (dprince@redhat.com) dangming (dangming@unitedstack.com) +Daniele Pizzolli (dpizzolli@fbk.eu) Daniele Valeriani (daniele@dvaleriani.net) Darrell Bishop (darrell@swiftstack.com) +Darryl Tam (dtam@swiftstack.com) David Goetz (david.goetz@rackspace.com) David Hadas (davidh@il.ibm.com) David Liu (david.liu@cn.ibm.com) @@ -161,6 +164,7 @@ Gaurav B. Gangalwar (gaurav@gluster.com) gecong1973 (ge.cong@zte.com.cn) gengchc2 (geng.changcai2@zte.com.cn) +Gerard Gine (ggine@swiftstack.com) Gerry Drudy (gerry.drudy@hpe.com) Gil Vernik (gilv@il.ibm.com) Gonéri Le Bouder (goneri.lebouder@enovance.com) @@ -244,6 +248,7 @@ M V P Nitesh (m.nitesh@nectechnologies.in) Madhuri Kumari (madhuri.rai07@gmail.com) Mahati Chamarthy (mahati.chamarthy@gmail.com) +malei (malei@maleideMacBook-Pro.local) maoshuai (fwsakura@163.com) Marcelo Martins (btorch@gmail.com) Maria Malyarova (savoreux69@gmail.com) @@ -252,6 +257,7 @@ Martin Geisler (martin@geisler.net) Martin Kletzander (mkletzan@redhat.com) Maru Newby (mnewby@internap.com) +Masaki Tsukuda (tsukuda.masaki@po.ntts.co.jp) Mathias Bjoerkqvist (mbj@zurich.ibm.com) Matt Kassawara (mkassawara@gmail.com) Matt Riedemann (mriedem@us.ibm.com) @@ -259,6 +265,7 @@ Matthieu Huin (mhu@enovance.com) Mauro Stettler (mauro.stettler@gmail.com) Mehdi Abaakouk (sileht@redhat.com) +melissaml (ma.lei@99cloud.net) Michael Matur (michael.matur@gmail.com) Michael Shuler (mshuler@gmail.com) Mike Fedosin (mfedosin@mirantis.com) @@ -274,6 +281,8 @@ Nakul Dahiwade (nakul.dahiwade@intel.com) Nam Nguyen Hoai (namnh@vn.fujitsu.com) Nandini Tata (nandini.tata@intel.com) +Naoto Nishizono (nishizono.naoto@po.ntts.co.jp) +Nassim Babaci (nassim.babaci@cloudwatt.com) Nathan Kinder (nkinder@redhat.com) Nelson Almeida (nelsonmarcos@gmail.com) Newptone (xingchao@unitedstack.com) @@ -334,6 +343,7 @@ Shri Javadekar (shrinand@maginatics.com) Sivasathurappan Radhakrishnan (siva.radhakrishnan@intel.com) Soren Hansen (soren@linux2go.dk) +Stefan Majewsky (stefan.majewsky@sap.com) Stephen Milton (milton@isomedia.com) Steve Kowalik (steven@wedontsleep.org) Steve Martinelli (stevemar@ca.ibm.com) @@ -356,6 +366,7 @@ Tomas Matlocha (tomas.matlocha@firma.seznam.cz) tone-zhang (tone.zhang@linaro.org) Tong Li (litong01@us.ibm.com) +Tovin Seven (vinhnt@vn.fujitsu.com) Travis McPeak (tmcpeak@us.ibm.com) Tushar Gohad (tushar.gohad@intel.com) Van Hung Pham (hungpv@vn.fujitsu.com) @@ -365,12 +376,16 @@ Victor Rodionov (victor.rodionov@nexenta.com) Victor Stinner (vstinner@redhat.com) Viktor Varga (vvarga@inf.u-szeged.hu) +Vil Surkin (mail@vills.me) Vincent Untz (vuntz@suse.com) Vladimir Vechkanov (vvechkanov@mirantis.com) Vu Cong Tuan (tuanvc@vn.fujitsu.com) vxlinux (yan.wei7@zte.com.cn) wanghongtaozz (wanghongtaozz@inspur.com) +wanghui (wang_hui@inspur.com) +wangqi (wang.qi@99cloud.net) Wu Wenxiang (wu.wenxiang@99cloud.net) +Wyllys Ingersoll (wyllys.ingersoll@evault.com) xhancar (pavel.hancar@gmail.com) XieYingYun (smokony@sina.com) Yaguang Wang (yaguang.wang@intel.com) @@ -394,6 +409,8 @@ Zheng Yao (zheng.yao1@zte.com.cn) zheng yin (yin.zheng@easystack.cn) Zhenguo Niu (zhenguo@unitedstack.com) +zhengwei6082 (zhengwei6082@fiberhome.com) ZhiQiang Fan (aji.zqfan@gmail.com) Zhongyue Luo (zhongyue.nah@intel.com) zhufl (zhu.fanglei@zte.com.cn) +Виль Суркин (vills@vills-pro.local) diff -Nru swift-2.17.0/bin/swift-account-info swift-2.18.0/bin/swift-account-info --- swift-2.17.0/bin/swift-account-info 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-account-info 2018-05-30 10:17:02.000000000 +0000 @@ -16,6 +16,7 @@ from optparse import OptionParser from swift.cli.info import print_info, InfoSystemExit +from swift.common.exceptions import LockTimeout def run_print_info(args, opts): @@ -23,7 +24,7 @@ print_info('account', *args, **opts) except InfoSystemExit: sys.exit(1) - except sqlite3.OperationalError as e: + except (sqlite3.OperationalError, LockTimeout) as e: if not opts.get('stale_reads_ok'): opts['stale_reads_ok'] = True print('Warning: Possibly Stale Data') @@ -38,6 +39,9 @@ parser.add_option( '-d', '--swift-dir', default='/etc/swift', help="Pass location of swift directory") + parser.add_option( + '--drop-prefixes', default=False, action="store_true", + help="When outputting metadata, drop the per-section common prefixes") options, args = parser.parse_args() diff -Nru swift-2.17.0/bin/swift-account-replicator swift-2.18.0/bin/swift-account-replicator --- swift-2.17.0/bin/swift-account-replicator 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-account-replicator 2018-05-30 10:17:02.000000000 +0000 @@ -14,10 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import optparse + from swift.account.replicator import AccountReplicator from swift.common.utils import parse_options from swift.common.daemon import run_daemon if __name__ == '__main__': - conf_file, options = parse_options(once=True) + parser = optparse.OptionParser("%prog CONFIG [options]") + parser.add_option('-d', '--devices', + help=('Replicate only given devices. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.')) + parser.add_option('-p', '--partitions', + help=('Replicate only given partitions. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.')) + conf_file, options = parse_options(parser=parser, once=True) run_daemon(AccountReplicator, conf_file, **options) diff -Nru swift-2.17.0/bin/swift-container-info swift-2.18.0/bin/swift-container-info --- swift-2.17.0/bin/swift-container-info 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-container-info 2018-05-30 10:17:02.000000000 +0000 @@ -16,6 +16,7 @@ from optparse import OptionParser from swift.cli.info import print_info, InfoSystemExit +from swift.common.exceptions import LockTimeout def run_print_info(args, opts): @@ -23,7 +24,7 @@ print_info('container', *args, **opts) except InfoSystemExit: sys.exit(1) - except sqlite3.OperationalError as e: + except (sqlite3.OperationalError, LockTimeout) as e: if not opts.get('stale_reads_ok'): opts['stale_reads_ok'] = True print('Warning: Possibly Stale Data') @@ -38,6 +39,9 @@ parser.add_option( '-d', '--swift-dir', default='/etc/swift', help="Pass location of swift directory") + parser.add_option( + '--drop-prefixes', default=False, action="store_true", + help="When outputting metadata, drop the per-section common prefixes") options, args = parser.parse_args() diff -Nru swift-2.17.0/bin/swift-container-replicator swift-2.18.0/bin/swift-container-replicator --- swift-2.17.0/bin/swift-container-replicator 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-container-replicator 2018-05-30 10:17:02.000000000 +0000 @@ -14,10 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import optparse + from swift.container.replicator import ContainerReplicator from swift.common.utils import parse_options from swift.common.daemon import run_daemon if __name__ == '__main__': - conf_file, options = parse_options(once=True) + parser = optparse.OptionParser("%prog CONFIG [options]") + parser.add_option('-d', '--devices', + help=('Replicate only given devices. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.')) + parser.add_option('-p', '--partitions', + help=('Replicate only given partitions. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.')) + conf_file, options = parse_options(parser=parser, once=True) run_daemon(ContainerReplicator, conf_file, **options) diff -Nru swift-2.17.0/bin/swift-container-sharder swift-2.18.0/bin/swift-container-sharder --- swift-2.17.0/bin/swift-container-sharder 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/bin/swift-container-sharder 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) 2010-2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.container.sharder import ContainerSharder +from swift.common.utils import parse_options +from swift.common.daemon import run_daemon +from optparse import OptionParser + +if __name__ == '__main__': + parser = OptionParser("%prog CONFIG [options]") + parser.add_option('-d', '--devices', + help='Shard containers only on given devices. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.') + parser.add_option('-p', '--partitions', + help='Shard containers only in given partitions. ' + 'Comma-separated list. ' + 'Only has effect if --once is used.') + conf_file, options = parse_options(parser=parser, once=True) + run_daemon(ContainerSharder, conf_file, **options) diff -Nru swift-2.17.0/bin/swift-object-info swift-2.18.0/bin/swift-object-info --- swift-2.17.0/bin/swift-object-info 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-object-info 2018-05-30 10:17:02.000000000 +0000 @@ -14,15 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +import codecs import sys from optparse import OptionParser +import six + from swift.common.storage_policy import reload_storage_policies from swift.common.utils import set_swift_dir from swift.cli.info import print_obj, InfoSystemExit if __name__ == '__main__': + if not six.PY2: + # Make stdout able to write escaped bytes + sys.stdout = codecs.getwriter("utf-8")( + sys.stdout.detach(), errors='surrogateescape') + parser = OptionParser('%prog [options] OBJECT_FILE') parser.add_option( '-n', '--no-check-etag', default=True, @@ -32,6 +40,9 @@ '-d', '--swift-dir', default='/etc/swift', dest='swift_dir', help="Pass location of swift directory") parser.add_option( + '--drop-prefixes', default=False, action="store_true", + help="When outputting metadata, drop the per-section common prefixes") + parser.add_option( '-P', '--policy-name', dest='policy_name', help="Specify storage policy name") diff -Nru swift-2.17.0/bin/swift-object-relinker swift-2.18.0/bin/swift-object-relinker --- swift-2.17.0/bin/swift-object-relinker 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-object-relinker 2018-05-30 10:17:02.000000000 +0000 @@ -29,10 +29,12 @@ parser.add_argument('--devices', default='/srv/node', dest='devices', help='Path to swift device directory') parser.add_argument('--skip-mount-check', default=False, + help='Don\'t test if disk is mounted', action="store_true", dest='skip_mount_check') parser.add_argument('--logfile', default=None, - dest='logfile') - parser.add_argument('--debug', default=False, action='store_true') + dest='logfile', help='Set log file name') + parser.add_argument('--debug', default=False, action='store_true', + help='Enable debug mode') args = parser.parse_args() diff -Nru swift-2.17.0/bin/swift-recon-cron swift-2.18.0/bin/swift-recon-cron --- swift-2.17.0/bin/swift-recon-cron 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bin/swift-recon-cron 2018-05-30 10:17:02.000000000 +0000 @@ -29,6 +29,8 @@ async_count = 0 for i in os.listdir(device_dir): device = os.path.join(device_dir, i) + if not os.path.isdir(device): + continue for asyncdir in os.listdir(device): # skip stuff like "accounts", "containers", etc. if not (asyncdir == ASYNCDIR_BASE or diff -Nru swift-2.17.0/bindep.txt swift-2.18.0/bindep.txt --- swift-2.17.0/bindep.txt 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/bindep.txt 2018-05-30 10:17:02.000000000 +0000 @@ -10,6 +10,10 @@ liberasurecode-devel [platform:rpm !platform:centos] libffi-dev [platform:dpkg] libffi-devel [platform:rpm] +libxml2-dev [platform:dpkg] +libxml2-devel [platform:rpm] +libxslt-devel [platform:rpm] +libxslt1-dev [platform:dpkg] memcached python-dev [platform:dpkg] python-devel [platform:rpm] diff -Nru swift-2.17.0/CHANGELOG swift-2.18.0/CHANGELOG --- swift-2.17.0/CHANGELOG 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/CHANGELOG 2018-05-30 10:17:09.000000000 +0000 @@ -1,3 +1,77 @@ +swift (2.18.0) + + * Added container sharding, an operator controlled feature that + may be used to shard very large container databases into a + number of smaller shard containers. This mitigates the issues + with one large DB by distributing the data across multiple + smaller databases throughout the cluster. Please read the full + overview at + https://docs.openstack.org/swift/latest/overview_container_sharding.html + + * Provide an S3 API compatibility layer. The external "swift3" + project has been imported into Swift's codebase as the "s3api" + middleware. + + * Added "emergency mode" hooks in the account and container replicators. + These options may be used to prioritize moving handoff + partitions to primary locations more quickly. This helps when + adding capacity to a ring. + + - Added `-d ` and `-p ` command line options. + + - Added a handoffs-only mode. + + * Add a multiprocess mode to the object replicator. Setting the + "replicator_workers" setting to a positive value N will result + in the replicator using up to N worker processes to perform + replication tasks. At most one worker per disk will be spawned. + + Worker process logs will have a bit of information prepended so + operators can tell which messages came from which worker. The + prefix is "[worker M/N pid=P] ", where M is the worker's index, + N is the total number of workers, and P is the process ID. Every + message from the replicator's logger will have the prefix + + * The object reconstructor will now fork all available worker + processes when operating on a subset of local devices. + + * Add support for PROXY protocol v1 to the proxy server. This + allows the Swift proxy server to log accurate client IP + addresses when there is a proxy or SSL-terminator between the + client and the Swift proxy server. Example servers supporting + this PROXY protocol include stunnel, haproxy, hitch, and + varnish. See the sample proxy server config file for the + appropriate config setting to enable or disable this + functionality. + + * In the ratelimit middleware, account whitelist and blacklist + settings have been deprecated and may be removed in a future + release. When found, a deprecation message will be logged. + Instead of these config file values, set X-Account-Sysmeta- + Global-Write-Ratelimit:WHITELIST and X-Account-Sysmeta-Global- + Write-Ratelimit:BLACKLIST on the particular accounts that need + to be whitelisted or blacklisted. System metadata cannot be added + or modified by standard clients. Use the internal client to set sysmeta. + + * Add a --drop-prefixes flag to swift-account-info, + swift-container-info, and swift-object-info. This makes the + output between the three more consistent. + + * statsd error messages correspond to 5xx responses only. This + makes monitoring more useful because actual errors (5xx) will + not be hidden by common user requests (4xx). Previously, some 4xx + responses would be included in timing information in the statsd + error messages. + + * Truncate error logs to prevent log handler from running out of buffer. + + * Updated requirements.txt to match global exclusions and formatting. + + * tempauth user names now support unicode characters. + + * Various other minor bug fixes and improvements. + + swift (2.17.0) * Added symlink objects support. @@ -1709,7 +1783,7 @@ from the codebase and is now in its own repository at https://github.com/openstack/swift-bench. New swift-bench binaries and packages may be found on PyPI at - https://pypi.python.org/pypi/swift-bench + https://pypi.org/project/swift-bench * Bulk delete now also supports the POST verb, in addition to DELETE diff -Nru swift-2.17.0/debian/changelog swift-2.18.0/debian/changelog --- swift-2.17.0/debian/changelog 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/changelog 2018-06-13 19:14:00.000000000 +0000 @@ -1,3 +1,18 @@ +swift (2.18.0-0ubuntu1) cosmic; urgency=medium + + * New upstream release. + * d/control: Align (Build-)Depends with upstream. + * d/swift-container.install, d/swift.install, + swift-container.swift-container-sharder.init: Install + usr/bin/swift-container-sharder and usr/bin/swift-manage-shard-ranges and + add init file for swift-container-sharder. + * d/control: Update Standards-Version to 4.1.4. + * d/control, d/tests/*: Drop obsolete autopkgtest header and enable + autopkgtest-pkg-python testsuite. + * d/control, d/watch: Use https URLs. + + -- Corey Bryant Wed, 13 Jun 2018 15:14:00 -0400 + swift (2.17.0-0ubuntu1) bionic; urgency=medium [ James Page ] diff -Nru swift-2.17.0/debian/control swift-2.18.0/debian/control --- swift-2.17.0/debian/control 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/control 2018-06-13 19:14:00.000000000 +0000 @@ -28,23 +28,29 @@ python-greenlet (>= 0.4.10), python-hacking (>= 0.11.0), python-keystoneclient (>= 1:2.0.0), - python-mock (>= 2.0.0), + python-keystonemiddleware (>= 4.17.0), + python-lxml, + python-mock (>= 2.0), python-netifaces (>= 0.5), python-nose, + python-nosexcover, python-nosehtmloutput (>= 0.0.3), + python-openstackclient, python-openstackdocstheme (>= 1.17.0), python-os-api-ref (>= 1.0.0), python-os-testr (>= 1.0.0), python-pastedeploy (>= 1.3.3), python-pyeclib (>= 1.3.1), + python-requests (>= 2.14.2), + python-requests-mock (>= 1.2.0), python-six (>= 1.9.0), python-swiftclient, python-xattr (>= 0.4), -Standards-Version: 4.1.2 -Homepage: http://launchpad.net/swift +Standards-Version: 4.1.4 +Homepage: https://launchpad.net/swift Vcs-Browser: https://git.launchpad.net/~ubuntu-server-dev/ubuntu/+source/swift -Vcs-Git: git://git.launchpad.net/~ubuntu-server-dev/ubuntu/+source/swift -XS-Testsuite: autopkgtest +Vcs-Git: https://git.launchpad.net/~ubuntu-server-dev/ubuntu/+source/swift +Testsuite: autopkgtest-pkg-python Package: python-swift Architecture: all @@ -56,9 +62,11 @@ python-dnspython (>= 1.14.0), python-eventlet (>= 0.17.4), python-greenlet (>= 0.4.10), + python-lxml, python-netifaces (>= 0.5), python-pastedeploy (>= 1.3.3), python-pyeclib (>= 1.3.1), + python-requests (>= 2.14.2), python-six (>= 1.9.0), python-xattr (>= 0.4), ${misc:Depends}, diff -Nru swift-2.17.0/debian/swift-container.install swift-2.18.0/debian/swift-container.install --- swift-2.17.0/debian/swift-container.install 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/swift-container.install 2018-06-13 19:14:00.000000000 +0000 @@ -3,6 +3,8 @@ usr/bin/swift-container-reconciler usr/bin/swift-container-replicator usr/bin/swift-container-server +usr/bin/swift-container-sharder usr/bin/swift-container-sync usr/bin/swift-container-updater +usr/bin/swift-manage-shard-ranges usr/bin/swift-reconciler-enqueue diff -Nru swift-2.17.0/debian/swift-container.swift-container-sharder.init swift-2.18.0/debian/swift-container.swift-container-sharder.init --- swift-2.17.0/debian/swift-container.swift-container-sharder.init 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/debian/swift-container.swift-container-sharder.init 2018-06-13 19:14:00.000000000 +0000 @@ -0,0 +1,49 @@ +#! /bin/sh +### BEGIN INIT INFO +# Provides: swift-container-sharder +# Required-Start: $remote_fs +# Required-Stop: $remote_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Swift container sharder +# Description: Container sharder for swift. +### END INIT INFO + +SERVICE_NAME="container-sharder" +PRINT_NAME="Swift container sharder" + +. /lib/lsb/init-functions + +if ! [ -x /usr/bin/swift-init ] ; then + exit 0 +fi + +if ! [ -f "/etc/swift/container-server.conf" ] ; then + exit 6 +fi + +case "$1" in +start) + log_daemon_msg "Starting Swift ${PRINT_NAME}" "swift-init ${SERVICE_NAME}" + /usr/bin/swift-init ${SERVICE_NAME} start + log_end_msg $? +;; +stop) + log_daemon_msg "Stopping Swift ${PRINT_NAME}" "swift-init ${SERVICE_NAME}" + /usr/bin/swift-init ${SERVICE_NAME} stop + log_end_msg $? +;; +restart|force-reload|reload) + log_daemon_msg "Restarting Swift ${PRINT_NAME}" "swift-init ${SERVICE_NAME}" + /usr/bin/swift-init ${SERVICE_NAME} reload +;; +status) + exec /usr/bin/swift-init ${SERVICE_NAME} status +;; +*) + echo "Usage: $0 {start|stop|restart|reload}" + exit 1 +;; +esac + +exit 0 diff -Nru swift-2.17.0/debian/tests/control swift-2.18.0/debian/tests/control --- swift-2.17.0/debian/tests/control 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/tests/control 2018-06-13 19:14:00.000000000 +0000 @@ -1,3 +1,3 @@ -Tests: python-swift swift-daemons -Depends: python-swift, swift, swift-proxy, swift-object, swift-container, swift-account +Tests: swift-daemons +Depends: swift, swift-proxy, swift-object, swift-container, swift-account Restrictions: needs-root diff -Nru swift-2.17.0/debian/tests/python-swift swift-2.18.0/debian/tests/python-swift --- swift-2.17.0/debian/tests/python-swift 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/tests/python-swift 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -#!/bin/bash -#------------------------- -# Testing client utilities -#------------------------- -set -e - -result=$(python `dirname $0`/test_import_swift.py 2>&1) -if [ "$result" ]; then - echo "ERROR: PYTHON-SWIFT MODULE CANNOT BE IMPORTED" - exit 1 -else - echo "OK" - exit 0 -fi diff -Nru swift-2.17.0/debian/tests/test_import_swift.py swift-2.18.0/debian/tests/test_import_swift.py --- swift-2.17.0/debian/tests/test_import_swift.py 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/tests/test_import_swift.py 1970-01-01 00:00:00.000000000 +0000 @@ -1,4 +0,0 @@ -try: - import swift -except ImportError, e: - print "ERROR IMPORTING MODULE" diff -Nru swift-2.17.0/debian/watch swift-2.18.0/debian/watch --- swift-2.17.0/debian/watch 2018-02-14 14:24:36.000000000 +0000 +++ swift-2.18.0/debian/watch 2018-06-13 19:14:00.000000000 +0000 @@ -1,3 +1,3 @@ version=3 opts="uversionmangle=s/rc/~rc/" \ - http://tarballs.openstack.org/swift/ swift-(\d.*).tar.gz + https://tarballs.openstack.org/swift/ swift-(\d.*).tar.gz diff -Nru swift-2.17.0/doc/manpages/object-server.conf.5 swift-2.18.0/doc/manpages/object-server.conf.5 --- swift-2.17.0/doc/manpages/object-server.conf.5 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/manpages/object-server.conf.5 2018-05-30 10:17:09.000000000 +0000 @@ -211,7 +211,7 @@ .IP "\fBallowed_headers\fR" Comma separated list of headers that can be set in metadata on an object. This list is in addition to X-Object-Meta-* headers and cannot include Content-Type, etag, Content-Length, or deleted. -The default is 'Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object'. +The default is 'Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object, Cache-Control, Content-Language, Expires, X-Robots-Tag'. .IP "\fBauto_create_account_prefix\fR" The default is '.'. .IP "\fBreplication_server\fR" diff -Nru swift-2.17.0/doc/manpages/proxy-server.conf.5 swift-2.18.0/doc/manpages/proxy-server.conf.5 --- swift-2.17.0/doc/manpages/proxy-server.conf.5 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/manpages/proxy-server.conf.5 2018-05-30 10:17:02.000000000 +0000 @@ -299,7 +299,7 @@ .PD 0 .RS 10 .IP "paste.filter_factory = keystonemiddleware.auth_token:filter_factory" -.IP "auth_uri = http://keystonehost:5000" +.IP "www_authenticate_uri = http://keystonehost:5000" .IP "auth_url = http://keystonehost:35357" .IP "auth_plugin = password" .IP "project_domain_id = default" diff -Nru swift-2.17.0/doc/manpages/swift-object-relinker.1 swift-2.18.0/doc/manpages/swift-object-relinker.1 --- swift-2.17.0/doc/manpages/swift-object-relinker.1 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/manpages/swift-object-relinker.1 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,75 @@ +.\" +.\" Copyright (c) 2017 OpenStack Foundation. +.\" +.\" Licensed under the Apache License, Version 2.0 (the "License"); +.\" you may not use this file except in compliance with the License. +.\" You may obtain a copy of the License at +.\" +.\" http://www.apache.org/licenses/LICENSE-2.0 +.\" +.\" Unless required by applicable law or agreed to in writing, software +.\" distributed under the License is distributed on an "AS IS" BASIS, +.\" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +.\" implied. +.\" See the License for the specific language governing permissions and +.\" limitations under the License. +.\" +.TH SWIFT-OBJECT-RELINKER "1" "December 2017" "OpenStack Swift" + +.SH NAME +\fBswift\-object\-relinker\fR \- relink and cleanup objects to increase partition power +.SH SYNOPSIS +.B swift\-object\-relinker +[\fIoptions\fR] <\fIcommand\fR> + +.SH DESCRIPTION +.PP +The relinker prepares an object server’s filesystem for a partition power +change by crawling the filesystem and linking existing objects to future +partition directories. + +More information can be found at +.BI https://docs.openstack.org/swift/latest/ring_partpower.html + +.SH COMMANDS +.TP +\fBrelink\fR +Relink files for partition power increase. + +.TP +\fBcleanup\fR +Remove hard links in the old locations. + +.SH OPTIONS +.TP +\fB\-h\fR, \fB\-\-help\fR +Show this help message and exit + +.TP +\fB\-\-swift-dir\fR \fISWIFT_DIR\fR +Path to swift directory + +.TP +\fB\-\-devices\fR \fIDEVICES\fR +Path to swift device directory + +.TP +\fB\-\-skip\-mount\-check\fR +Don't test if disk is mounted + +.TP +\fB\-\-logfile\fR \fILOGFILE\fR +Set log file name + +.TP +\fB\-\-debug\fR +Enable debug mode + +.SH DOCUMENTATION +.LP +More in depth documentation in regards to +.BI swift\-object\-relinker +and also about OpenStack Swift as a whole can be found at +.BI http://docs.openstack.org/developer/swift/index.html +and +.BI http://docs.openstack.org diff -Nru swift-2.17.0/doc/requirements.txt swift-2.18.0/doc/requirements.txt --- swift-2.17.0/doc/requirements.txt 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/requirements.txt 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,8 @@ +# The order of packages is significant, because pip processes them in the order +# of appearance. Changing the order has an impact on the overall integration +# process, which may cause wedges in the gate later. +# this is required for the docs build jobs +sphinx>=1.6.2 # BSD +openstackdocstheme>=1.11.0 # Apache-2.0 +reno>=1.8.0 # Apache-2.0 +os-api-ref>=1.0.0 # Apache-2.0 diff -Nru swift-2.17.0/doc/s3api/conf/ceph-known-failures-keystone.yaml swift-2.18.0/doc/s3api/conf/ceph-known-failures-keystone.yaml --- swift-2.17.0/doc/s3api/conf/ceph-known-failures-keystone.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/conf/ceph-known-failures-keystone.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,209 @@ +ceph_s3: + :teardown: {status: KNOWN} + :setup: {status: KNOWN} + s3tests.functional.test_headers.test_bucket_create_bad_authorization_invalid_aws2: {status: KNOWN} + s3tests.functional.test_headers.test_bucket_create_bad_authorization_none: {status: KNOWN} + s3tests.functional.test_headers.test_object_create_bad_authorization_invalid_aws2: {status: KNOWN} + s3tests.functional.test_headers.test_object_create_bad_authorization_none: {status: KNOWN} + s3tests.functional.test_s3.test_100_continue: {status: KNOWN} + s3tests.functional.test_s3.test_atomic_conditional_write_1mb: {status: KNOWN} + s3tests.functional.test_s3.test_atomic_dual_conditional_write_1mb: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_default: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_email: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_email_notexist: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_nonexist_user: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_userid_fullcontrol: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_userid_read: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_userid_readacp: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_userid_write: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_userid_writeacp: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_no_grants: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acls_changes_persistent: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_xml_fullcontrol: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_xml_read: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_xml_readacp: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_xml_write: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_xml_writeacp: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_create_exists: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_header_acl_grants: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_objects_anonymous: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_objects_anonymous_fail: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_recreate_not_overriding: {status: KNOWN} + s3tests.functional.test_s3.test_cors_origin_response: {status: KNOWN} + s3tests.functional.test_s3.test_cors_origin_wildcard: {status: KNOWN} + s3tests.functional.test_s3.test_list_buckets_anonymous: {status: KNOWN} + s3tests.functional.test_s3.test_list_buckets_invalid_auth: {status: KNOWN} + s3tests.functional.test_s3.test_logging_toggle: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_resend_first_finishes_last: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_full_control_verify_owner: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_xml: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_xml_read: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_xml_readacp: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_xml_write: {status: KNOWN} + s3tests.functional.test_s3.test_object_acl_xml_writeacp: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_canned_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_not_owned_object_bucket: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_replacing_metadata: {status: KNOWN} + s3tests.functional.test_s3.test_object_giveaway: {status: KNOWN} + s3tests.functional.test_s3.test_object_header_acl_grants: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_bucket_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_bucket_gone: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_object_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_object_gone: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_put: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_put_write_access: {status: KNOWN} + s3tests.functional.test_s3.test_object_set_valid_acl: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_anonymous_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_authenticated_request_bad_access_key: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_case_insensitive_condition_fields: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_condition_is_case_sensitive: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_escaped_field_values: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_expired_policy: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_expires_is_case_sensitive: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_ignored_header: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_access_key: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_content_length_argument: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_date_format: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_request_field_value: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_signature: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_conditions_list: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_content_length_argument: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_expires_condition: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_policy_condition: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_signature: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_no_key_specified: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_request_missing_policy_specified_field: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_invalid_success_code: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_key_from_filename: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_success_code: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_success_redirect_action: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_larger_than_chunk: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_size_below_minimum: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_size_limit_exceeded: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_user_specified_header: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_nonexisted_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_overwrite_existed_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_nonexisted_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_overwrite_existed_failed: {status: KNOWN} + s3tests.functional.test_s3.test_set_cors: {status: KNOWN} + s3tests.functional.test_s3.test_stress_bucket_acls_changes: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_concurrent_object_create_and_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_concurrent_object_create_concurrent_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_object_acl: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_bucket_create_suspend: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_copy_obj_version: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete_with_marker: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete_with_marker_create: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_overwrite_multipart: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_read_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_read_remove_head: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_versions_remove_all: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_versions_remove_special_names: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_list_marker: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_overwrite: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_overwrite_suspended: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_removal: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_suspend_versions: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_suspend_versions_simple: {status: KNOWN} + s3tests.functional.test_s3_website.check_can_test_website: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_base: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_path: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_path_upgrade: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_nonexistant_bucket_rgw: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_nonexistant_bucket_s3: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_public_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_public_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_nonwebsite: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_private_abs: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_private_relative: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_public_abs: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_public_relative: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_configure_recreate: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_return_data_versioning: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_acl: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_another_bucket: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_different_tenant: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_set_condition_operator_end_with_IfExists: {status: KNOWN} + s3tests.functional.test_s3.test_delete_tags_obj_public: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_invalid_md5: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_method_head: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_bad_download: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_invalid_chunks_1: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_invalid_chunks_2: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_no_key: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_no_md5: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_other_key: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_present: {status: KNOWN} + s3tests.functional.test_s3.test_get_obj_head_tagging: {status: KNOWN} + s3tests.functional.test_s3.test_get_obj_tagging: {status: KNOWN} + s3tests.functional.test_s3.test_get_tags_acl_public: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_deletemarker_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_expiration_date: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_get: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_get_no_id: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_id_too_long: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_multipart_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_noncur_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_rules_conflicted: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_same_id: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_date: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_deletemarker: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_empty_filter: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_filter: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_multipart: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_noncurrent: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_copy_invalid_range: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_copy_versioned: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_versioned_bucket: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_versioning_multipart_upload: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_empty_conditions: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_tags_anonymous_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_tags_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_put_delete_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_key_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_val_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_max_kvsize_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_max_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_modify_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_obj_with_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_tags_acl_public: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_method_head: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_invalid_chunks_1: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_invalid_chunks_2: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_upload: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_present: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_read_declare: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_13b: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1MB: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1b: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1kb: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_object_acl_no_version_specified: {status: KNOWN} diff -Nru swift-2.17.0/doc/s3api/conf/ceph-known-failures-tempauth.yaml swift-2.18.0/doc/s3api/conf/ceph-known-failures-tempauth.yaml --- swift-2.17.0/doc/s3api/conf/ceph-known-failures-tempauth.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/conf/ceph-known-failures-tempauth.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,187 @@ +ceph_s3: + :teardown: {status: KNOWN} + :setup: {status: KNOWN} + s3tests.functional.test_headers.test_bucket_create_bad_authorization_invalid_aws2: {status: KNOWN} + s3tests.functional.test_headers.test_bucket_create_bad_authorization_none: {status: KNOWN} + s3tests.functional.test_headers.test_object_create_bad_authorization_invalid_aws2: {status: KNOWN} + s3tests.functional.test_headers.test_object_create_bad_authorization_none: {status: KNOWN} + s3tests.functional.test_s3.test_100_continue: {status: KNOWN} + s3tests.functional.test_s3.test_atomic_conditional_write_1mb: {status: KNOWN} + s3tests.functional.test_s3.test_atomic_dual_conditional_write_1mb: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_email: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_email_notexist: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_grant_nonexist_user: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_acl_no_grants: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_create_exists: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_header_acl_grants: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_objects_anonymous: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_objects_anonymous_fail: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_recreate_not_overriding: {status: KNOWN} + s3tests.functional.test_s3.test_cors_origin_response: {status: KNOWN} + s3tests.functional.test_s3.test_cors_origin_wildcard: {status: KNOWN} + s3tests.functional.test_s3.test_list_buckets_anonymous: {status: KNOWN} + s3tests.functional.test_s3.test_list_buckets_invalid_auth: {status: KNOWN} + s3tests.functional.test_s3.test_logging_toggle: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_resend_first_finishes_last: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_canned_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_replacing_metadata: {status: KNOWN} + s3tests.functional.test_s3.test_object_header_acl_grants: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_bucket_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_bucket_gone: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_object_acl: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_get_object_gone: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_put: {status: KNOWN} + s3tests.functional.test_s3.test_object_raw_put_write_access: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_anonymous_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_authenticated_request_bad_access_key: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_case_insensitive_condition_fields: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_condition_is_case_sensitive: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_escaped_field_values: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_expired_policy: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_expires_is_case_sensitive: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_ignored_header: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_access_key: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_content_length_argument: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_date_format: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_request_field_value: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_invalid_signature: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_conditions_list: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_content_length_argument: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_expires_condition: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_policy_condition: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_missing_signature: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_no_key_specified: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_request_missing_policy_specified_field: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_invalid_success_code: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_key_from_filename: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_set_success_code: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_success_redirect_action: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_larger_than_chunk: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_size_below_minimum: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_upload_size_limit_exceeded: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_user_specified_header: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_nonexisted_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifmatch_overwrite_existed_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_failed: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_nonexisted_good: {status: KNOWN} + s3tests.functional.test_s3.test_put_object_ifnonmatch_overwrite_existed_failed: {status: KNOWN} + s3tests.functional.test_s3.test_set_cors: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_concurrent_object_create_and_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_concurrent_object_create_concurrent_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_object_acl: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_bucket_create_suspend: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_copy_obj_version: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete_with_marker: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_multi_object_delete_with_marker_create: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_overwrite_multipart: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_read_remove: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_read_remove_head: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_versions_remove_all: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_create_versions_remove_special_names: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_list_marker: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_overwrite: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_overwrite_suspended: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_plain_null_version_removal: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_suspend_versions: {status: KNOWN} + s3tests.functional.test_s3.test_versioning_obj_suspend_versions_simple: {status: KNOWN} + s3tests.functional.test_s3_website.check_can_test_website: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_base: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_path: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_bucket_private_redirectall_path_upgrade: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_nonexistant_bucket_rgw: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_nonexistant_bucket_s3: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_empty_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_private_index_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_private_bucket_list_public_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_empty_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_blockederrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_gooderrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_private_index_missingerrordoc: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_public_bucket_list_public_index: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_nonwebsite: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_private_abs: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_private_relative: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_public_abs: {status: KNOWN} + s3tests.functional.test_s3_website.test_website_xredirect_public_relative: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_configure_recreate: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_list_return_data_versioning: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_acl: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_another_bucket: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_different_tenant: {status: KNOWN} + s3tests.functional.test_s3.test_bucket_policy_set_condition_operator_end_with_IfExists: {status: KNOWN} + s3tests.functional.test_s3.test_delete_tags_obj_public: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_invalid_md5: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_method_head: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_bad_download: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_invalid_chunks_1: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_multipart_invalid_chunks_2: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_no_key: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_no_md5: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_other_key: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_encryption_sse_c_present: {status: KNOWN} + s3tests.functional.test_s3.test_get_obj_head_tagging: {status: KNOWN} + s3tests.functional.test_s3.test_get_obj_tagging: {status: KNOWN} + s3tests.functional.test_s3.test_get_tags_acl_public: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_deletemarker_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_expiration_date: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_get: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_get_no_id: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_id_too_long: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_multipart_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_noncur_expiration: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_rules_conflicted: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_same_id: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_date: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_deletemarker: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_empty_filter: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_filter: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_multipart: {status: KNOWN} + s3tests.functional.test_s3.test_lifecycle_set_noncurrent: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_copy_invalid_range: {status: KNOWN} + s3tests.functional.test_s3.test_multipart_copy_versioned: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_versioned_bucket: {status: KNOWN} + s3tests.functional.test_s3.test_object_copy_versioning_multipart_upload: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_empty_conditions: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_tags_anonymous_request: {status: KNOWN} + s3tests.functional.test_s3.test_post_object_tags_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_put_delete_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_key_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_excess_val_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_max_kvsize_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_max_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_modify_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_obj_with_tags: {status: KNOWN} + s3tests.functional.test_s3.test_put_tags_acl_public: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_method_head: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_invalid_chunks_1: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_invalid_chunks_2: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_multipart_upload: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_post_object_authenticated_request: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_present: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_read_declare: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_13b: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1MB: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1b: {status: KNOWN} + s3tests.functional.test_s3.test_sse_kms_transfer_1kb: {status: KNOWN} + s3tests.functional.test_s3.test_versioned_object_acl_no_version_specified: {status: KNOWN} diff -Nru swift-2.17.0/doc/s3api/rnc/access_control_policy.rnc swift-2.18.0/doc/s3api/rnc/access_control_policy.rnc --- swift-2.17.0/doc/s3api/rnc/access_control_policy.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/access_control_policy.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,7 @@ +include "common.rnc" + +start = + element AccessControlPolicy { + element Owner { CanonicalUser } & + element AccessControlList { AccessControlList } + } diff -Nru swift-2.17.0/doc/s3api/rnc/bucket_logging_status.rnc swift-2.18.0/doc/s3api/rnc/bucket_logging_status.rnc --- swift-2.17.0/doc/s3api/rnc/bucket_logging_status.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/bucket_logging_status.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,10 @@ +include "common.rnc" + +start = + element BucketLoggingStatus { + element LoggingEnabled { + element TargetBucket { xsd:string } & + element TargetPrefix { xsd:string } & + element TargetGrants { AccessControlList }? + }? + } diff -Nru swift-2.17.0/doc/s3api/rnc/common.rnc swift-2.18.0/doc/s3api/rnc/common.rnc --- swift-2.17.0/doc/s3api/rnc/common.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/common.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,26 @@ +namespace xsi = "http://www.w3.org/2001/XMLSchema-instance" + +CanonicalUser = + element ID { xsd:string } & + element DisplayName { xsd:string }? + +StorageClass = "STANDARD" | "REDUCED_REDUNDANCY" | "GLACIER" | "UNKNOWN" + +AccessControlList = + element Grant { + element Grantee { + ( + attribute xsi:type { "AmazonCustomerByEmail" }, + element EmailAddress { xsd:string } + ) | ( + attribute xsi:type { "CanonicalUser" }, + CanonicalUser + ) | ( + attribute xsi:type { "Group" }, + element URI { xsd:string } + ) + } & + element Permission { + "READ" | "WRITE" | "READ_ACP" | "WRITE_ACP" | "FULL_CONTROL" + } + }* diff -Nru swift-2.17.0/doc/s3api/rnc/complete_multipart_upload_result.rnc swift-2.18.0/doc/s3api/rnc/complete_multipart_upload_result.rnc --- swift-2.17.0/doc/s3api/rnc/complete_multipart_upload_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/complete_multipart_upload_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,7 @@ +start = + element CompleteMultipartUploadResult { + element Location { xsd:anyURI }, + element Bucket { xsd:string }, + element Key { xsd:string }, + element ETag { xsd:string } + } diff -Nru swift-2.17.0/doc/s3api/rnc/complete_multipart_upload.rnc swift-2.18.0/doc/s3api/rnc/complete_multipart_upload.rnc --- swift-2.17.0/doc/s3api/rnc/complete_multipart_upload.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/complete_multipart_upload.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,7 @@ +start = + element CompleteMultipartUpload { + element Part { + element PartNumber { xsd:int } & + element ETag { xsd:string } + }+ + } diff -Nru swift-2.17.0/doc/s3api/rnc/copy_object_result.rnc swift-2.18.0/doc/s3api/rnc/copy_object_result.rnc --- swift-2.17.0/doc/s3api/rnc/copy_object_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/copy_object_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,5 @@ +start = + element CopyObjectResult { + element LastModified { xsd:dateTime }, + element ETag { xsd:string } + } diff -Nru swift-2.17.0/doc/s3api/rnc/copy_part_result.rnc swift-2.18.0/doc/s3api/rnc/copy_part_result.rnc --- swift-2.17.0/doc/s3api/rnc/copy_part_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/copy_part_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,5 @@ +start = + element CopyPartResult { + element LastModified { xsd:dateTime }, + element ETag { xsd:string } + } diff -Nru swift-2.17.0/doc/s3api/rnc/create_bucket_configuration.rnc swift-2.18.0/doc/s3api/rnc/create_bucket_configuration.rnc --- swift-2.17.0/doc/s3api/rnc/create_bucket_configuration.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/create_bucket_configuration.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,4 @@ +start = + element * { + element LocationConstraint { xsd:string } + } diff -Nru swift-2.17.0/doc/s3api/rnc/delete_result.rnc swift-2.18.0/doc/s3api/rnc/delete_result.rnc --- swift-2.17.0/doc/s3api/rnc/delete_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/delete_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,17 @@ +start = + element DeleteResult { + ( + element Deleted { + element Key { xsd:string }, + element VersionId { xsd:string }?, + element DeleteMarker { xsd:boolean }?, + element DeleteMarkerVersionId { xsd:string }? + } | + element Error { + element Key { xsd:string }, + element VersionId { xsd:string }?, + element Code { xsd:string }, + element Message { xsd:string } + } + )* + } diff -Nru swift-2.17.0/doc/s3api/rnc/delete.rnc swift-2.18.0/doc/s3api/rnc/delete.rnc --- swift-2.17.0/doc/s3api/rnc/delete.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/delete.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,8 @@ +start = + element Delete { + element Quiet { xsd:boolean }? & + element Object { + element Key { xsd:string } & + element VersionId { xsd:string }? + }+ + } diff -Nru swift-2.17.0/doc/s3api/rnc/error.rnc swift-2.18.0/doc/s3api/rnc/error.rnc --- swift-2.17.0/doc/s3api/rnc/error.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/error.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,11 @@ +start = + element Error { + element Code { xsd:string }, + element Message { xsd:string }, + DebugInfo* + } + +DebugInfo = + element * { + (attribute * { text } | text | DebugInfo)* + } diff -Nru swift-2.17.0/doc/s3api/rnc/initiate_multipart_upload_result.rnc swift-2.18.0/doc/s3api/rnc/initiate_multipart_upload_result.rnc --- swift-2.17.0/doc/s3api/rnc/initiate_multipart_upload_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/initiate_multipart_upload_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,6 @@ +start = + element InitiateMultipartUploadResult { + element Bucket { xsd:string }, + element Key { xsd:string }, + element UploadId { xsd:string } + } diff -Nru swift-2.17.0/doc/s3api/rnc/lifecycle_configuration.rnc swift-2.18.0/doc/s3api/rnc/lifecycle_configuration.rnc --- swift-2.17.0/doc/s3api/rnc/lifecycle_configuration.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/lifecycle_configuration.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,20 @@ +include "common.rnc" + +start = + element LifecycleConfiguration { + element Rule { + element ID { xsd:string }? & + element Prefix { xsd:string } & + element Status { "Enabled" | "Disabled" } & + element Transition { Transition }? & + element Expiration { Expiration }? + }+ + } + +Expiration = + element Days { xsd:int } | + element Date { xsd:dateTime } + +Transition = + Expiration & + element StorageClass { StorageClass } diff -Nru swift-2.17.0/doc/s3api/rnc/list_all_my_buckets_result.rnc swift-2.18.0/doc/s3api/rnc/list_all_my_buckets_result.rnc --- swift-2.17.0/doc/s3api/rnc/list_all_my_buckets_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/list_all_my_buckets_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,12 @@ +include "common.rnc" + +start = + element ListAllMyBucketsResult { + element Owner { CanonicalUser }, + element Buckets { + element Bucket { + element Name { xsd:string }, + element CreationDate { xsd:dateTime } + }* + } + } diff -Nru swift-2.17.0/doc/s3api/rnc/list_bucket_result.rnc swift-2.18.0/doc/s3api/rnc/list_bucket_result.rnc --- swift-2.17.0/doc/s3api/rnc/list_bucket_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/list_bucket_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,33 @@ +include "common.rnc" + +start = + element ListBucketResult { + element Name { xsd:string }, + element Prefix { xsd:string }, + ( + ( + element Marker { xsd:string }, + element NextMarker { xsd:string }? + ) | ( + element NextContinuationToken { xsd:string }?, + element ContinuationToken { xsd:string }?, + element StartAfter { xsd:string }?, + element KeyCount { xsd:int } + ) + ), + element MaxKeys { xsd:int }, + element EncodingType { xsd:string }?, + element Delimiter { xsd:string }?, + element IsTruncated { xsd:boolean }, + element Contents { + element Key { xsd:string }, + element LastModified { xsd:dateTime }, + element ETag { xsd:string }, + element Size { xsd:long }, + element Owner { CanonicalUser }?, + element StorageClass { StorageClass } + }*, + element CommonPrefixes { + element Prefix { xsd:string } + }* + } diff -Nru swift-2.17.0/doc/s3api/rnc/list_multipart_uploads_result.rnc swift-2.18.0/doc/s3api/rnc/list_multipart_uploads_result.rnc --- swift-2.17.0/doc/s3api/rnc/list_multipart_uploads_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/list_multipart_uploads_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,26 @@ +include "common.rnc" + +start = + element ListMultipartUploadsResult { + element Bucket { xsd:string }, + element KeyMarker { xsd:string }, + element UploadIdMarker { xsd:string }, + element NextKeyMarker { xsd:string }, + element NextUploadIdMarker { xsd:string }, + element Delimiter { xsd:string }?, + element Prefix { xsd:string }?, + element MaxUploads { xsd:int }, + element EncodingType { xsd:string }?, + element IsTruncated { xsd:boolean }, + element Upload { + element Key { xsd:string }, + element UploadId { xsd:string }, + element Initiator { CanonicalUser }, + element Owner { CanonicalUser }, + element StorageClass { StorageClass }, + element Initiated { xsd:dateTime } + }*, + element CommonPrefixes { + element Prefix { xsd:string } + }* + } diff -Nru swift-2.17.0/doc/s3api/rnc/list_parts_result.rnc swift-2.18.0/doc/s3api/rnc/list_parts_result.rnc --- swift-2.17.0/doc/s3api/rnc/list_parts_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/list_parts_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,22 @@ +include "common.rnc" + +start = + element ListPartsResult { + element Bucket { xsd:string }, + element Key { xsd:string }, + element UploadId { xsd:string }, + element Initiator { CanonicalUser }, + element Owner { CanonicalUser }, + element StorageClass { StorageClass }, + element PartNumberMarker { xsd:int }, + element NextPartNumberMarker { xsd:int }, + element MaxParts { xsd:int }, + element EncodingType { xsd:string }?, + element IsTruncated { xsd:boolean }, + element Part { + element PartNumber { xsd:int }, + element LastModified { xsd:dateTime }, + element ETag { xsd:string }, + element Size { xsd:long } + }* + } diff -Nru swift-2.17.0/doc/s3api/rnc/list_versions_result.rnc swift-2.18.0/doc/s3api/rnc/list_versions_result.rnc --- swift-2.17.0/doc/s3api/rnc/list_versions_result.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/list_versions_result.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,37 @@ +include "common.rnc" + +start = + element ListVersionsResult { + element Name { xsd:string }, + element Prefix { xsd:string }, + element KeyMarker { xsd:string }, + element VersionIdMarker { xsd:string }, + element NextKeyMarker { xsd:string }?, + element NextVersionIdMarker { xsd:string }?, + element MaxKeys { xsd:int }, + element EncodingType { xsd:string }?, + element Delimiter { xsd:string }?, + element IsTruncated { xsd:boolean }, + ( + element Version { + element Key { xsd:string }, + element VersionId { xsd:string }, + element IsLatest { xsd:boolean }, + element LastModified { xsd:dateTime }, + element ETag { xsd:string }, + element Size { xsd:long }, + element Owner { CanonicalUser }?, + element StorageClass { StorageClass } + } | + element DeleteMarker { + element Key { xsd:string }, + element VersionId { xsd:string }, + element IsLatest { xsd:boolean }, + element LastModified { xsd:dateTime }, + element Owner { CanonicalUser }? + } + )*, + element CommonPrefixes { + element Prefix { xsd:string } + }* + } diff -Nru swift-2.17.0/doc/s3api/rnc/location_constraint.rnc swift-2.18.0/doc/s3api/rnc/location_constraint.rnc --- swift-2.17.0/doc/s3api/rnc/location_constraint.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/location_constraint.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1 @@ +start = element LocationConstraint { xsd:string } diff -Nru swift-2.17.0/doc/s3api/rnc/versioning_configuration.rnc swift-2.18.0/doc/s3api/rnc/versioning_configuration.rnc --- swift-2.17.0/doc/s3api/rnc/versioning_configuration.rnc 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/s3api/rnc/versioning_configuration.rnc 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,5 @@ +start = + element VersioningConfiguration { + element Status { "Enabled" | "Suspended" }? & + element MfaDelete { "Enabled" | "Disabled" }? + } diff -Nru swift-2.17.0/doc/saio/swift/container-server/1.conf swift-2.18.0/doc/saio/swift/container-server/1.conf --- swift-2.17.0/doc/saio/swift/container-server/1.conf 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/saio/swift/container-server/1.conf 2018-05-30 10:17:02.000000000 +0000 @@ -27,3 +27,13 @@ [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff -Nru swift-2.17.0/doc/saio/swift/container-server/2.conf swift-2.18.0/doc/saio/swift/container-server/2.conf --- swift-2.17.0/doc/saio/swift/container-server/2.conf 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/saio/swift/container-server/2.conf 2018-05-30 10:17:02.000000000 +0000 @@ -27,3 +27,13 @@ [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff -Nru swift-2.17.0/doc/saio/swift/container-server/3.conf swift-2.18.0/doc/saio/swift/container-server/3.conf --- swift-2.17.0/doc/saio/swift/container-server/3.conf 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/saio/swift/container-server/3.conf 2018-05-30 10:17:02.000000000 +0000 @@ -27,3 +27,13 @@ [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff -Nru swift-2.17.0/doc/saio/swift/container-server/4.conf swift-2.18.0/doc/saio/swift/container-server/4.conf --- swift-2.17.0/doc/saio/swift/container-server/4.conf 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/saio/swift/container-server/4.conf 2018-05-30 10:17:02.000000000 +0000 @@ -27,3 +27,13 @@ [container-auditor] [container-sync] + +[container-sharder] +auto_shard = true +rsync_module = {replication_ip}::container{replication_port} +# This is intentionally much smaller than the default of 1,000,000 so tests +# can run in a reasonable amount of time +shard_container_threshold = 100 +# The probe tests make explicit assumptions about the batch sizes +shard_scanner_batch_size = 10 +cleave_batch_size = 2 diff -Nru swift-2.17.0/doc/saio/swift/internal-client.conf swift-2.18.0/doc/saio/swift/internal-client.conf --- swift-2.17.0/doc/saio/swift/internal-client.conf 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/saio/swift/internal-client.conf 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,24 @@ +[DEFAULT] + +[pipeline:main] +pipeline = catch_errors proxy-logging cache symlink proxy-server + +[app:proxy-server] +use = egg:swift#proxy +account_autocreate = true +# See proxy-server.conf-sample for options + +[filter:symlink] +use = egg:swift#symlink +# See proxy-server.conf-sample for options + +[filter:cache] +use = egg:swift#memcache +# See proxy-server.conf-sample for options + +[filter:proxy-logging] +use = egg:swift#proxy_logging + +[filter:catch_errors] +use = egg:swift#catch_errors +# See proxy-server.conf-sample for options diff -Nru swift-2.17.0/doc/source/associated_projects.rst swift-2.18.0/doc/source/associated_projects.rst --- swift-2.17.0/doc/source/associated_projects.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/associated_projects.rst 2018-05-30 10:17:02.000000000 +0000 @@ -10,50 +10,91 @@ * OpenStack supported binding: - * `Python-SwiftClient `_ + * `Python-SwiftClient `_ * Unofficial libraries and bindings: - * `PHP-opencloud `_ - Official Rackspace PHP bindings that should work for other Swift deployments too. - * `PyRAX `_ - Official Rackspace Python bindings for CloudFiles that should work for other Swift deployments too. - * `openstack.net `_ - Official Rackspace .NET bindings that should work for other Swift deployments too. - * `RSwift `_ - R API bindings. - * `Go language bindings `_ - * `supload `_ - Bash script to upload file to cloud storage based on OpenStack Swift API. - * `libcloud `_ - Apache Libcloud - a unified interface in Python for different clouds with OpenStack Swift support. - * `SwiftBox `_ - C# library using RestSharp - * `jclouds `_ - Java library offering bindings for all OpenStack projects - * `java-openstack-swift `_ - Java bindings for OpenStack Swift - * `swift_client `_ - Small but powerful Ruby client to interact with OpenStack Swift - * `nightcrawler_swift `_ - This Ruby gem teleports your assets to a OpenStack Swift bucket/container - * `swift storage `_ - Simple OpenStack Swift storage client. - * `javaswift `_ - Collection of Java tools for Swift + * PHP + + * `PHP-opencloud `_ - Official Rackspace PHP + bindings that should work for other Swift deployments too. + + * Ruby + + * `swift_client `_ - + Small but powerful Ruby client to interact with OpenStack Swift + * `nightcrawler_swift `_ - + This Ruby gem teleports your assets to a OpenStack Swift bucket/container + * `swift storage `_ - + Simple OpenStack Swift storage client. + + * Java + + * `libcloud `_ - Apache Libcloud - a unified + interface in Python for different clouds with OpenStack Swift support. + * `jclouds `_ - + Java library offering bindings for all OpenStack projects + * `java-openstack-swift `_ - + Java bindings for OpenStack Swift + * `javaswift `_ - Collection of Java tools for Swift + + * Bash + + * `supload `_ - Bash script to + upload file to cloud storage based on OpenStack Swift API. + + * .NET + + * `openstacknetsdk.org `_ - An OpenStack + Cloud SDK for Microsoft .NET. + + * Go + + * `Go language bindings `_ + * `Gophercloud an OpenStack SDK for Go `_ + Authentication -------------- -* `Keystone `_ - Official Identity Service for OpenStack. -* `Swauth `_ - An alternative Swift authentication service that only requires Swift itself. -* `Basicauth `_ - HTTP Basic authentication support (keystone backed). +* `Keystone `_ - Official Identity + Service for OpenStack. +* `Swauth `_ - An alternative Swift + authentication service that only requires Swift itself. +* `Basicauth `_ - HTTP Basic + authentication support (keystone backed). Command Line Access ------------------- -* `Swiftly `_ - Alternate command line access to Swift with direct (no proxy) access capabilities as well. +* `Swiftly `_ - Alternate command line + access to Swift with direct (no proxy) access capabilities as well. + + +External Integration +-------------------- + +* `swift-s3-sync `_ - + Swift-S3 synchronization tool +* `swift-metadata-sync `_ - + Propagate OpenStack Swift object metadata into Elasticsearch Log Processing -------------- -* `Slogging `_ - Basic stats and logging tools. +* `slogging `_ - Basic stats and + logging tools. Monitoring & Statistics ----------------------- -* `Swift Informant `_ - Swift Proxy Middleware to send events to a statsd instance. -* `Swift Inspector `_ - Swift middleware to relay information about a request back to the client. +* `Swift Informant `_ - + Swift proxy Middleware to send events to a statsd instance. +* `Swift Inspector `_ - + Swift middleware to relay information about a request back to the client. Content Distribution Network Integration @@ -65,9 +106,11 @@ Alternative API --------------- -* `Swift3 `_ - Amazon S3 API emulation. -* `CDMI `_ - CDMI support -* `SwiftHLM `_ - a middleware for using OpenStack Swift with tape and other high latency media storage backends +* `ProxyFS `_ - Integrated file and + object access for Swift object storage +* `SwiftHLM `_ - a middleware for + using OpenStack Swift with tape and other high latency media storage + backends. Benchmarking/Load Generators @@ -83,14 +126,16 @@ Custom Logger Hooks ------------------- -* `swift-sentry `_ - Sentry exception reporting for Swift +* `swift-sentry `_ - + Sentry exception reporting for Swift Storage Backends (DiskFile API implementations) ----------------------------------------------- -* `Swift-on-File `_ - Enables objects created using Swift API to be accessed as files on a POSIX filesystem and vice versa. -* `swift-ceph-backend `_ - Ceph RADOS object server implementation for Swift. -* `kinetic-swift `_ - Seagate Kinetic Drive as backend for Swift -* `swift-scality-backend `_ - Scality sproxyd object server implementation for Swift. +* `Swift-on-File `_ - + Enables objects created using Swift API to be accessed as files on a POSIX + filesystem and vice versa. +* `swift-scality-backend `_ - + Scality sproxyd object server implementation for Swift. Developer Tools --------------- @@ -104,18 +149,31 @@ Quickly setup a standard development environment using Vagrant and Ansible in a Fedora virtual machine (with built-in `Swift-on-File `_ support). +* `runway `_ - Runway sets up a + swift-all-in-one (SAIO) dev environment in an lxc container. +* `Multi Swift `_ - + Bash scripts to spin up multiple Swift clusters sharing the same hardware + Other ----- -* `Glance `_ - Provides services for discovering, registering, and retrieving virtual machine images (for OpenStack Compute [Nova], for example). -* `Better Staticweb `_ - Makes swift containers accessible by default. -* `Django Swiftbrowser `_ - Simple Django web app to access OpenStack Swift. -* `Swift-account-stats `_ - Swift-account-stats is a tool to report statistics on Swift usage at tenant and global levels. -* `PyECLib `_ - High Level Erasure Code library used by Swift -* `liberasurecode `_ - Low Level Erasure Code library used by PyECLib -* `Swift Browser `_ - JavaScript interface for Swift -* `swift-ui `_ - OpenStack Swift web browser -* `Swift Durability Calculator `_ - Data Durability Calculation Tool for Swift -* `swiftbackmeup `_ - Utility that allows one to create backups and upload them to OpenStack Swift -* `Multi Swift `_ - Bash scripts to spin up multiple Swift clusters sharing the same hardware +* `Glance `_ - Provides services for + discovering, registering, and retrieving virtual machine images + (for OpenStack Compute [Nova], for example). +* `Django Swiftbrowser `_ - + Simple Django web app to access OpenStack Swift. +* `Swift-account-stats `_ - + Swift-account-stats is a tool to report statistics on Swift usage at + tenant and global levels. +* `PyECLib `_ - High-level erasure code + library used by Swift +* `liberasurecode `_ - Low-level + erasure code library used by PyECLib +* `Swift Browser `_ - JavaScript + interface for Swift +* `swift-ui `_ - OpenStack Swift + web browser +* `swiftbackmeup `_ - + Utility that allows one to create backups and upload them to OpenStack Swift +* `s3compat `_ - S3 API compatibility checker diff -Nru swift-2.17.0/doc/source/container.rst swift-2.18.0/doc/source/container.rst --- swift-2.17.0/doc/source/container.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/container.rst 2018-05-30 10:17:02.000000000 +0000 @@ -24,6 +24,16 @@ :undoc-members: :show-inheritance: +.. _container-replicator: + +Container Replicator +==================== + +.. automodule:: swift.container.replicator + :members: + :undoc-members: + :show-inheritance: + .. _container-server: Container Server @@ -44,12 +54,12 @@ :undoc-members: :show-inheritance: -.. _container-replicator: +.. _container-sharder: -Container Replicator -==================== +Container Sharder +================= -.. automodule:: swift.container.replicator +.. automodule:: swift.container.sharder :members: :undoc-members: :show-inheritance: diff -Nru swift-2.17.0/doc/source/deployment_guide.rst swift-2.18.0/doc/source/deployment_guide.rst --- swift-2.17.0/doc/source/deployment_guide.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/deployment_guide.rst 2018-05-30 10:17:09.000000000 +0000 @@ -594,6 +594,10 @@ X-Delete-At, This list is in addition to X-Object-Manifest, X-Object-Meta-* headers and cannot include X-Static-Large-Object Content-Type, etag, Content-Length, or deleted + Cache-Control, + Content-Language, + Expires, + X-Robots-Tag auto_create_account_prefix . Prefix used when automatically creating accounts. replication_server Configure parameter for creating @@ -692,8 +696,14 @@ as a daemon interval 30 Time in seconds to wait between replication passes -concurrency 1 Number of replication workers to - spawn +concurrency 1 Number of replication jobs to + run per worker process +replicator_workers 0 Number of worker processes to use. + No matter how big this number is, + at most one worker per disk will + be used. The default value of 0 + means no forking; all work is done + in the main process. sync_method rsync The sync method to use; default is rsync but you can use ssync to try the EXPERIMENTAL diff -Nru swift-2.17.0/doc/source/development_guidelines.rst swift-2.18.0/doc/source/development_guidelines.rst --- swift-2.17.0/doc/source/development_guidelines.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/development_guidelines.rst 2018-05-30 10:17:02.000000000 +0000 @@ -199,7 +199,7 @@ For example for Vim the `syntastic`_ plugin can do this for you. -.. _`hacking`: https://pypi.python.org/pypi/hacking +.. _`hacking`: https://pypi.org/project/hacking .. _`syntastic`: https://github.com/scrooloose/syntastic ------------------------ diff -Nru swift-2.17.0/doc/source/getting_started.rst swift-2.18.0/doc/source/getting_started.rst --- swift-2.17.0/doc/source/getting_started.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/getting_started.rst 2018-05-30 10:17:02.000000000 +0000 @@ -39,7 +39,7 @@ and SDK is python-swiftclient. * `Source code `_ -* `Python Package Index `_ +* `Python Package Index `_ ---------- Production diff -Nru swift-2.17.0/doc/source/howto_installmultinode.rst swift-2.18.0/doc/source/howto_installmultinode.rst --- swift-2.17.0/doc/source/howto_installmultinode.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/howto_installmultinode.rst 2018-05-30 10:17:02.000000000 +0000 @@ -3,7 +3,7 @@ ===================================================== Please refer to the latest official -`OpenStack Installation Guides `_ +`OpenStack Installation Guides `_ for the most up-to-date documentation. Current Install Guides diff -Nru swift-2.17.0/doc/source/images/sharded_GET.svg swift-2.18.0/doc/source/images/sharded_GET.svg --- swift-2.17.0/doc/source/images/sharded_GET.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharded_GET.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,2019 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + + cont-4837ad-<ts>-4 + "linux" - "" + + proxy + + + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + diff -Nru swift-2.17.0/doc/source/images/sharding_cleave1_load.svg swift-2.18.0/doc/source/images/sharding_cleave1_load.svg --- swift-2.17.0/doc/source/images/sharding_cleave1_load.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_cleave1_load.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1694 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + + + + + + + cat + + giraffe + + igloo + "igloo" - "" + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + diff -Nru swift-2.17.0/doc/source/images/sharding_cleave2_load.svg swift-2.18.0/doc/source/images/sharding_cleave2_load.svg --- swift-2.17.0/doc/source/images/sharding_cleave2_load.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_cleave2_load.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1754 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cat + + + + + + giraffe + + + + + + igloo + + linux + + + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + "linux" - "" + diff -Nru swift-2.17.0/doc/source/images/sharding_cleave_basic.svg swift-2.18.0/doc/source/images/sharding_cleave_basic.svg --- swift-2.17.0/doc/source/images/sharding_cleave_basic.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_cleave_basic.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,649 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /.shards_acct + /acct + cont-568d8e-<ts>-0 + cont-750ed3-<ts>-1 + cont + + diff -Nru swift-2.17.0/doc/source/images/sharding_db_states.svg swift-2.18.0/doc/source/images/sharding_db_states.svg --- swift-2.17.0/doc/source/images/sharding_db_states.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_db_states.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1502 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Container DB + + + Container DB + + + + + + + Retiring DB + + + Retiring DB + + + + + + + Fresh DB + + + Fresh DB + + + + + + + Fresh DB + + + Fresh DB + + + + + + + + + SHARDED + + + SHARDED + + + + + + + + + + + UNSHARDED + + + UNSHARDED + + + + + + + SHARDING + + + SHARDING + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/doc/source/images/sharding_GET.svg swift-2.18.0/doc/source/images/sharding_GET.svg --- swift-2.17.0/doc/source/images/sharding_GET.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_GET.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,2112 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + cat + + giraffe + + igloo + + linux + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + "linux" - "" + + + proxy + + + + + + 1 + + + + 2 + + + + 3 + + + + 4 + + + + 5 + + + + 3 + + + + 4 + + + + + + + diff -Nru swift-2.17.0/doc/source/images/sharding_scan_basic.svg swift-2.18.0/doc/source/images/sharding_scan_basic.svg --- swift-2.17.0/doc/source/images/sharding_scan_basic.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_scan_basic.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,259 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /acct + cont + + + cat + giraffe + diff -Nru swift-2.17.0/doc/source/images/sharding_scan_load.svg swift-2.18.0/doc/source/images/sharding_scan_load.svg --- swift-2.17.0/doc/source/images/sharding_scan_load.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_scan_load.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1665 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + cont (retiring db) + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + + + + + cat + + giraffe + + igloo + "igloo" - "" + diff -Nru swift-2.17.0/doc/source/images/sharding_sharded_load.svg swift-2.18.0/doc/source/images/sharding_sharded_load.svg --- swift-2.17.0/doc/source/images/sharding_sharded_load.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_sharded_load.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1650 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont (fresh db) + + /.shards_acct + /acct + cont-568d8e-<ts>-0 + + + cont-750ed3-<ts>-1 + cont-4ec28d-<ts>-2 + + + + cont-aef34f-<ts>-3 + "" - "cat" + "cat" - "giraffe" + "giraffe" - "igloo" + "igloo" - "linux" + + + + cont-4837ad-<ts>-4 + "linux" - "" + diff -Nru swift-2.17.0/doc/source/images/sharding_unsharded_load.svg swift-2.18.0/doc/source/images/sharding_unsharded_load.svg --- swift-2.17.0/doc/source/images/sharding_unsharded_load.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_unsharded_load.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,219 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + cont + /acct + diff -Nru swift-2.17.0/doc/source/images/sharding_unsharded.svg swift-2.18.0/doc/source/images/sharding_unsharded.svg --- swift-2.17.0/doc/source/images/sharding_unsharded.svg 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/images/sharding_unsharded.svg 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,199 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + /acct + cont + diff -Nru swift-2.17.0/doc/source/index.rst swift-2.18.0/doc/source/index.rst --- swift-2.17.0/doc/source/index.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/index.rst 2018-05-30 10:17:02.000000000 +0000 @@ -62,6 +62,7 @@ overview_erasure_code overview_encryption overview_backing_store + overview_container_sharding ring_background ring_partpower associated_projects diff -Nru swift-2.17.0/doc/source/install/controller-include.txt swift-2.18.0/doc/source/install/controller-include.txt --- swift-2.17.0/doc/source/install/controller-include.txt 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/controller-include.txt 2018-05-30 10:17:02.000000000 +0000 @@ -55,7 +55,7 @@ [filter:authtoken] paste.filter_factory = keystonemiddleware.auth_token:filter_factory ... - auth_uri = http://controller:5000 + www_authenticate_uri = http://controller:5000 auth_url = http://controller:35357 memcached_servers = controller:11211 auth_type = password diff -Nru swift-2.17.0/doc/source/install/controller-install-debian.rst swift-2.18.0/doc/source/install/controller-install-debian.rst --- swift-2.17.0/doc/source/install/controller-install-debian.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/controller-install-debian.rst 2018-05-30 10:17:02.000000000 +0000 @@ -47,4 +47,4 @@ .. code-block:: console - # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/ocata + # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/queens diff -Nru swift-2.17.0/doc/source/install/controller-install-rdo.rst swift-2.18.0/doc/source/install/controller-install-rdo.rst --- swift-2.17.0/doc/source/install/controller-install-rdo.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/controller-install-rdo.rst 2018-05-30 10:17:02.000000000 +0000 @@ -45,6 +45,6 @@ .. code-block:: console - # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/ocata + # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/queens 3. .. include:: controller-include.txt diff -Nru swift-2.17.0/doc/source/install/controller-install-ubuntu.rst swift-2.18.0/doc/source/install/controller-install-ubuntu.rst --- swift-2.17.0/doc/source/install/controller-install-ubuntu.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/controller-install-ubuntu.rst 2018-05-30 10:17:02.000000000 +0000 @@ -47,6 +47,6 @@ .. code-block:: console - # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/ocata + # curl -o /etc/swift/proxy-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/proxy-server.conf-sample?h=stable/queens 4. .. include:: controller-include.txt diff -Nru swift-2.17.0/doc/source/install/finalize-installation-rdo.rst swift-2.18.0/doc/source/install/finalize-installation-rdo.rst --- swift-2.17.0/doc/source/install/finalize-installation-rdo.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/finalize-installation-rdo.rst 2018-05-30 10:17:02.000000000 +0000 @@ -19,7 +19,7 @@ .. code-block:: console # curl -o /etc/swift/swift.conf \ - https://git.openstack.org/cgit/openstack/swift/plain/etc/swift.conf-sample?h=stable/ocata + https://git.openstack.org/cgit/openstack/swift/plain/etc/swift.conf-sample?h=stable/queens #. Edit the ``/etc/swift/swift.conf`` file and complete the following actions: diff -Nru swift-2.17.0/doc/source/install/finalize-installation-ubuntu-debian.rst swift-2.18.0/doc/source/install/finalize-installation-ubuntu-debian.rst --- swift-2.17.0/doc/source/install/finalize-installation-ubuntu-debian.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/finalize-installation-ubuntu-debian.rst 2018-05-30 10:17:02.000000000 +0000 @@ -19,7 +19,7 @@ .. code-block:: console # curl -o /etc/swift/swift.conf \ - https://git.openstack.org/cgit/openstack/swift/plain/etc/swift.conf-sample?h=stable/ocata + https://git.openstack.org/cgit/openstack/swift/plain/etc/swift.conf-sample?h=stable/queens #. Edit the ``/etc/swift/swift.conf`` file and complete the following actions: diff -Nru swift-2.17.0/doc/source/install/index.rst swift-2.18.0/doc/source/install/index.rst --- swift-2.17.0/doc/source/install/index.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/index.rst 2018-05-30 10:17:02.000000000 +0000 @@ -18,7 +18,7 @@ object storage and retrieval through a REST API. This chapter assumes a working setup of OpenStack following the -`OpenStack Installation Tutorial `_. +`OpenStack Installation Tutorial `_. Your environment must at least include the Identity service (keystone) prior to deploying Object Storage. diff -Nru swift-2.17.0/doc/source/install/next-steps.rst swift-2.18.0/doc/source/install/next-steps.rst --- swift-2.17.0/doc/source/install/next-steps.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/next-steps.rst 2018-05-30 10:17:02.000000000 +0000 @@ -7,4 +7,4 @@ Your OpenStack environment now includes Object Storage. To add more services, see the -`additional documentation on installing OpenStack `_ . +`additional documentation on installing OpenStack `_ . diff -Nru swift-2.17.0/doc/source/install/storage-install-rdo.rst swift-2.18.0/doc/source/install/storage-install-rdo.rst --- swift-2.17.0/doc/source/install/storage-install-rdo.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/storage-install-rdo.rst 2018-05-30 10:17:02.000000000 +0000 @@ -133,9 +133,9 @@ .. code-block:: console - # curl -o /etc/swift/account-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/account-server.conf-sample?h=stable/ocata - # curl -o /etc/swift/container-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/container-server.conf-sample?h=stable/ocata - # curl -o /etc/swift/object-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/object-server.conf-sample?h=stable/ocata + # curl -o /etc/swift/account-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/account-server.conf-sample?h=stable/queens + # curl -o /etc/swift/container-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/container-server.conf-sample?h=stable/queens + # curl -o /etc/swift/object-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/object-server.conf-sample?h=stable/queens 3. .. include:: storage-include1.txt 4. .. include:: storage-include2.txt diff -Nru swift-2.17.0/doc/source/install/storage-install-ubuntu-debian.rst swift-2.18.0/doc/source/install/storage-install-ubuntu-debian.rst --- swift-2.17.0/doc/source/install/storage-install-ubuntu-debian.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/install/storage-install-ubuntu-debian.rst 2018-05-30 10:17:02.000000000 +0000 @@ -137,9 +137,9 @@ .. code-block:: console - # curl -o /etc/swift/account-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/account-server.conf-sample?h=stable/ocata - # curl -o /etc/swift/container-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/container-server.conf-sample?h=stable/ocata - # curl -o /etc/swift/object-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/object-server.conf-sample?h=stable/ocata + # curl -o /etc/swift/account-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/account-server.conf-sample?h=stable/queens + # curl -o /etc/swift/container-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/container-server.conf-sample?h=stable/queens + # curl -o /etc/swift/object-server.conf https://git.openstack.org/cgit/openstack/swift/plain/etc/object-server.conf-sample?h=stable/queens 3. .. include:: storage-include1.txt 4. .. include:: storage-include2.txt diff -Nru swift-2.17.0/doc/source/logs.rst swift-2.18.0/doc/source/logs.rst --- swift-2.17.0/doc/source/logs.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/logs.rst 2018-05-30 10:17:02.000000000 +0000 @@ -105,6 +105,7 @@ VW :ref:`versioned_writes` SSC :ref:`copy` SYM :ref:`symlink` +SH :ref:`sharding_doc` ======================= ============================= diff -Nru swift-2.17.0/doc/source/middleware.rst swift-2.18.0/doc/source/middleware.rst --- swift-2.17.0/doc/source/middleware.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/middleware.rst 2018-05-30 10:17:02.000000000 +0000 @@ -11,6 +11,95 @@ :members: :show-inheritance: +.. _s3api: + +AWS S3 Api +========== + +.. automodule:: swift.common.middleware.s3api.s3api + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.s3token + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.s3request + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.s3response + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.exception + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.etree + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.utils + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.subresource + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.acl_handlers + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.acl_utils + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.base + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.service + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.bucket + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.obj + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.acl + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.s3_acl + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.multi_upload + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.multi_delete + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.versioning + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.location + :members: + :show-inheritance: + +.. automodule:: swift.common.middleware.s3api.controllers.logging + :members: + :show-inheritance: + .. _bulk: Bulk Operations (Delete and Archive Auto Extraction) diff -Nru swift-2.17.0/doc/source/overview_acl.rst swift-2.18.0/doc/source/overview_acl.rst --- swift-2.17.0/doc/source/overview_acl.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_acl.rst 2018-05-30 10:17:02.000000000 +0000 @@ -249,6 +249,98 @@ `Referer` header, the referrer ACL has very weak security. +Example: Sharing a Container with Another User +---------------------------------------------- + +Sharing a Container with another user requires the knowledge of few +parameters regarding the users. + +The sharing user must know: + +- the ``OpenStack user id`` of the other user + +The sharing user must communicate to the other user: + +- the name of the shared container +- the ``OS_STORAGE_URL`` + +Usually the ``OS_STORAGE_URL`` is not exposed directly to the user +because the ``swift client`` by default automatically construct the +``OS_STORAGE_URL`` based on the User credential. + +We assume that in the current directory there are the two client +environment script for the two users ``sharing.openrc`` and +``other.openrc``. + +The ``sharing.openrc`` should be similar to the following: + +.. code-block:: bash + + export OS_USERNAME=sharing + # WARNING: Save the password in clear text only for testing purposes + export OS_PASSWORD=password + export OS_TENANT_NAME=projectName + export OS_AUTH_URL=https://identityHost:portNumber/v2.0 + # The following lines can be omitted + export OS_TENANT_ID=tenantIDString + export OS_REGION_NAME=regionName + export OS_CACERT=/path/to/cacertFile + +The ``other.openrc`` should be similar to the following: + +.. code-block:: bash + + export OS_USERNAME=other + # WARNING: Save the password in clear text only for testing purposes + export OS_PASSWORD=otherPassword + export OS_TENANT_NAME=otherProjectName + export OS_AUTH_URL=https://identityHost:portNumber/v2.0 + # The following lines can be omitted + export OS_TENANT_ID=tenantIDString + export OS_REGION_NAME=regionName + export OS_CACERT=/path/to/cacertFile + +For more information see `using the OpenStack RC file +`_ + +First we figure out the other user id:: + + . other.openrc + OUID="$(openstack user show --format json "${OS_USERNAME}" | jq -r .id)" + +or alternatively:: + + . other.openrc + OUID="$(openstack token issue -f json | jq -r .user_id)" + +Then we figure out the storage url of the sharing user:: + + sharing.openrc + SURL="$(swift auth | awk -F = '/OS_STORAGE_URL/ {print $2}')" + +Running as the sharing user create a shared container named ``shared`` +in read-only mode with the other user using the proper acl:: + + sharing.openrc + swift post --read-acl "*:${OUID}" shared + +Running as the sharing user create and upload a test file:: + + touch void + swift upload shared void + +Running as the other user list the files in the ``shared`` container:: + + other.openrc + swift --os-storage-url="${SURL}" list shared + +Running as the other user download the ``shared`` container in the +``/tmp`` directory:: + + cd /tmp + swift --os-storage-url="${SURL}" download shared + + .. _account_acls: ------------ diff -Nru swift-2.17.0/doc/source/overview_architecture.rst swift-2.18.0/doc/source/overview_architecture.rst --- swift-2.17.0/doc/source/overview_architecture.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_architecture.rst 2018-05-30 10:17:02.000000000 +0000 @@ -172,6 +172,8 @@ for complete information on both Erasure Code support as well as the reconstructor. +.. _architecture_updaters: + -------- Updaters -------- diff -Nru swift-2.17.0/doc/source/overview_auth.rst swift-2.18.0/doc/source/overview_auth.rst --- swift-2.17.0/doc/source/overview_auth.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_auth.rst 2018-05-30 10:17:02.000000000 +0000 @@ -138,7 +138,7 @@ [filter:authtoken] paste.filter_factory = keystonemiddleware.auth_token:filter_factory - auth_uri = http://keystonehost:5000/ + www_authenticate_uri = http://keystonehost:5000/ auth_url = http://keystonehost:35357/ auth_plugin = password project_domain_id = default @@ -153,7 +153,7 @@ The actual values for these variables will need to be set depending on your situation, but in short: -* ``auth_uri`` should point to a Keystone service from which users may +* ``www_authenticate_uri`` should point to a Keystone service from which users may retrieve tokens. This value is used in the `WWW-Authenticate` header that auth_token sends with any denial response. * ``auth_url`` points to the Keystone Admin service. This information is diff -Nru swift-2.17.0/doc/source/overview_backing_store.rst swift-2.18.0/doc/source/overview_backing_store.rst --- swift-2.17.0/doc/source/overview_backing_store.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_backing_store.rst 2018-05-30 10:17:02.000000000 +0000 @@ -172,9 +172,9 @@ Service's token. If you use python-swiftclient you can achieve this by: - * Putting the URL in the ``preauthurl`` parameter - * Putting the in ``preauthtoken`` parameter - * Adding the X-Service-Token to the ``headers`` parameter + * Putting the URL in the ``preauthurl`` parameter + * Putting the in ``preauthtoken`` parameter + * Adding the X-Service-Token to the ``headers`` parameter Using the HTTP_X_SERVICE_CATALOG to get Swift Account Name diff -Nru swift-2.17.0/doc/source/overview_container_sharding.rst swift-2.18.0/doc/source/overview_container_sharding.rst --- swift-2.17.0/doc/source/overview_container_sharding.rst 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/doc/source/overview_container_sharding.rst 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,690 @@ +.. _sharding_doc: + +================== +Container Sharding +================== + +Container sharding is an operator controlled feature that may be used to shard +very large container databases into a number of smaller shard containers + +.. note:: + + Container sharding is currently an experimental feature. It is strongly + recommended that operators gain experience of sharding containers in a + non-production cluster before using in production. + + The sharding process involves moving all sharding container database + records via the container replication engine; the time taken to complete + sharding is dependent upon the existing cluster load and the performance of + the container database being sharded. + + There is currently no documented process for reversing the sharding + process once sharding has been enabled. + + +---------- +Background +---------- +The metadata for each container in Swift is stored in an SQLite database. This +metadata includes: information about the container such as its name, +modification time and current object count; user metadata that may been written +to the container by clients; a record of every object in the container. The +container database object records are used to generate container listings in +response to container GET requests; each object record stores the object's +name, size, hash and content-type as well as associated timestamps. + +As the number of objects in a container increases then the number of object +records in the container database increases. Eventually the container database +performance starts to degrade and the time taken to update an object record +increases. This can result in object updates timing out, with a corresponding +increase in the backlog of pending :ref:`asynchronous updates +` on object servers. Container databases are typically +replicated on several nodes and any database performance degradation can also +result in longer :doc:`container replication ` times. + +The point at which container database performance starts to degrade depends +upon the choice of hardware in the container ring. Anecdotal evidence suggests +that containers with tens of millions of object records have noticeably +degraded performance. + +This performance degradation can be avoided by ensuring that clients use an +object naming scheme that disperses objects across a number of containers +thereby distributing load across a number of container databases. However, that +is not always desirable nor is it under the control of the cluster operator. + +Swift's container sharding feature provides the operator with a mechanism to +distribute the load on a single client-visible container across multiple, +hidden, shard containers, each of which stores a subset of the container's +object records. Clients are unaware of container sharding; clients continue to +use the same API to access a container that, if sharded, maps to a number of +shard containers within the Swift cluster. + +------------------------ +Deployment and operation +------------------------ + +Upgrade Considerations +---------------------- + +It is essential that all servers in a Swift cluster have been upgraded to +support the container sharding feature before attempting to shard a container. + +Identifying containers in need of sharding +------------------------------------------ + +Container sharding is currently initiated by the ``swift-manage-shard-ranges`` +CLI tool :ref:`described below `. Operators must +first identify containers that are candidates for sharding. To assist with +this, the :ref:`sharder_daemon` inspects the size of containers that it visits +and writes a list of sharding candidates to recon cache. For example:: + + "sharding_candidates": { + "found": 1, + "top": [ + { + "account": "AUTH_test", + "container": "c1", + "file_size": 497763328, + "meta_timestamp": "1525346445.31161", + "node_index": 2, + "object_count": 3349028, + "path": , + "root": "AUTH_test/c1" + } + ] + } + +A container is considered to be a sharding candidate if its object count is +greater than or equal to the ``shard_container_threshold`` option. +The number of candidates reported is limited to a number configured by the +``recon_candidates_limit`` option such that only the largest candidate +containers are included in the ``sharding_candidates`` data. + + +.. _swift-manage-shard-ranges: + +``swift-manage-shard-ranges`` CLI tool +-------------------------------------- + +.. automodule:: swift.cli.manage_shard_ranges + :members: + :show-inheritance: + + +.. _sharder_daemon: + +``container-sharder`` daemon +---------------------------- + +Once sharding has been enabled for a container, the act of sharding is +performed by the :ref:`container-sharder`. The :ref:`container-sharder` daemon +must be running on all container servers. The ``container-sharder`` daemon +periodically visits each container database to perform any container sharding +tasks that are required. + +The ``container-sharder`` daemon requires a ``[container-sharder]`` config +section to exist in the container server configuration file; a sample config +section is shown in the `container-server.conf-sample` file. + +.. note:: + + Several of the ``[container-sharder]`` config options are only significant + when the ``auto_shard`` option is enabled. This option enables the + ``container-sharder`` daemon to automatically identify containers that are + candidates for sharding and initiate the sharding process, instead of using + the ``swift-manage-shard-ranges`` tool. The ``auto_shard`` option is + currently NOT recommended for production systems and shoud be set to + ``false`` (the default value). + +The container sharder uses an internal client and therefore requires an +internal client configuration file to exist. By default the internal-client +configuration file is expected to be found at +`/etc/swift/internal-client.conf`. An alternative location for the +configuration file may be specified using the ``internal_client_conf_path`` +option in the ``[container-sharder]`` config section. + +The content of the internal-client configuration file should be the same as the +`internal-client.conf-sample` file. In particular, the internal-client +configuration should have:: + + account_autocreate = True + +in the ``[proxy-server]`` section. + +A container database may require several visits by the ``container-sharder`` +daemon before it is fully sharded. On each visit the ``container-sharder`` +daemon will move a subset of object records to new shard containers by cleaving +new shard container databases from the original. By default, two shards are +processed per visit; this number may be configured by the ``cleave_batch_size`` +option. + +The ``container-sharder`` daemon periodically writes progress data for +containers that are being sharded to recon cache. For example:: + + "sharding_in_progress": { + "all": [ + { + "account": "AUTH_test", + "active": 0, + "cleaved": 2, + "container": "c1", + "created": 5, + "db_state": "sharding", + "error": null, + "file_size": 26624, + "found": 0, + "meta_timestamp": "1525349617.46235", + "node_index": 1, + "object_count": 3349030, + "path": , + "root": "AUTH_test/c1", + "state": "sharding" + } + ] + } + +This example indicates that from a total of 7 shard ranges, 2 have been cleaved +whereas 5 remain in created state waiting to be cleaved. + +Shard containers are created in an internal account and not visible to clients. +By default, shard containers for an account ``AUTH_test`` are created in the +internal account ``.shards_AUTH_test``. + +Once a container has started sharding, object updates to that container may be +redirected to the shard container. The ``container-sharder`` daemon is also +responsible for sending updates of a shard's object count and bytes_used to the +original container so that aggegrate object count and bytes used values can be +returned in responses to client requests. + +.. note:: + + The ``container-sharder`` daemon must continue to run on all container + servers in order for shards object stats updates to be generated. + + +-------------- +Under the hood +-------------- + +Terminology +----------- + +================== ==================================================== +Name Description +================== ==================================================== +Root container The original container that lives in the + user's account. It holds references to its + shard containers. +Retiring DB The original database file that is to be sharded. +Fresh DB A database file that will replace the retiring + database. +Epoch A timestamp at which the fresh DB is created; the + epoch value is embedded in the fresh DB filename. +Shard range A range of the object namespace defined by a lower + bound and and upper bound. +Shard container A container that holds object records for a shard + range. Shard containers exist in a hidden account + mirroring the user's account. +Parent container The container from which a shard container has been + cleaved. When first sharding a root container each + shard's parent container will be the root container. + When sharding a shard container each shard's parent + container will be the sharding shard container. +Misplaced objects Items that don't belong in a container's shard + range. These will be moved to their correct + location by the container-sharder. +Cleaving The act of moving object records within a shard + range to a shard container database. +Shrinking The act of merging a small shard container into + another shard container in order to delete the + small shard container. +Donor The shard range that is shrinking away. +Acceptor The shard range into which a donor is merged. +================== ==================================================== + + +Finding shard ranges +-------------------- + +The end goal of sharding a container is to replace the original container +database which has grown very large with a number of shard container databases, +each of which is responsible for storing a range of the entire object +namespace. The first step towards achieving this is to identify an appropriate +set of contiguous object namespaces, known as shard ranges, each of which +contains a similar sized portion of the container's current object content. + +Shard ranges cannot simply be selected by sharding the namespace uniformly, +because object names are not guaranteed to be distributed uniformly. If the +container were naively sharded into two shard ranges, one containing all +object names up to `m` and the other containing all object names beyond `m`, +then if all object names actually start with `o` the outcome would be an +extremely unbalanced pair of shard containers. + +It is also too simplistic to assume that every container that requires sharding +can be sharded into two. This might be the goal in the ideal world, but in +practice there will be containers that have grown very large and should be +sharded into many shards. Furthermore, the time required to find the exact +mid-point of the existing object names in a large SQLite database would +increase with container size. + +For these reasons, shard ranges of size `N` are found by searching for the +`Nth` object in the database table, sorted by object name, and then searching +for the `(2 * N)th` object, and so on until all objects have been searched. For +a container that has exactly `2N` objects, the end result is the same as +sharding the container at the midpoint of its object names. In practice +sharding would typically be enabled for containers with great than `2N` objects +and more than two shard ranges will be found, the last one probably containing +less than `N` objects. With containers having large multiples of `N` objects, +shard ranges can be identified in batches which enables more scalable solution. + +To illustrate this process, consider a very large container in a user account +``acct`` that is a candidate for sharding: + +.. image:: images/sharding_unsharded.svg + +The :ref:`swift-manage-shard-ranges` tool ``find`` sub-command searches the +object table for the `Nth` object whose name will become the upper bound of the +first shard range, and the lower bound of the second shard range. The lower +bound of the first shard range is the empty string. + +For the purposes of this example the first upper bound is `cat`: + +.. image:: images/sharding_scan_basic.svg + +:ref:`swift-manage-shard-ranges` continues to search the container to find +further shard ranges, with the final upper bound also being the empty string. + +Enabling sharding +----------------- + +Once shard ranges have been found the :ref:`swift-manage-shard-ranges` +``replace`` sub-command is used to insert them into the `shard_ranges` table +of the container database. In addition to its lower and upper bounds, each +shard range is given a unique name. + +The ``enable`` sub-command then creates some final state required to initiate +sharding the container, including a special shard range record referred to as +the container's `own_shard_range` whose name is equal to the container's path. +This is used to keep a record of the object namespace that the container +covers, which for user containers is always the entire namespace. Sharding of +the container will only begin when its own shard range's state has been set to +``SHARDING``. + +The :class:`~swift.common.utils.ShardRange` class +------------------------------------------------- + +The :class:`~swift.common.utils.ShardRange` class provides methods for +interactng with the attributes and state of a shard range. The class +encapsulates the following properties: + +* The name of the shard range which is also the name of the shard container + used to hold object records in its namespace. +* Lower and upper bounds which define the object namespace of the shard range. +* A deleted flag. +* A timestamp at which the bounds and deleted flag were last modified. +* The object stats for the shard range i.e. object count and bytes used. +* A timestamp at which the object stats were last modified. +* The state of the shard range, and an epoch, which is the timestamp used in + the shard container's database file name. +* A timestamp at which the state and epoch were last modified. + +A shard range progresses through the following states: + +* FOUND: the shard range has been identified in the container that is to be + sharded but no resources have been created for it. +* CREATED: a shard container has been created to store the contents of the + shard range. +* CLEAVED: the sharding container's contents for the shard range have been + copied to the shard container from *at least one replica* of the sharding + container. +* ACTIVE: a sharding container's constituent shard ranges are moved to this + state when all shard ranges in the sharding container have been cleaved. +* SHRINKING: the shard range has been enabled for shrinking; or +* SHARDING: the shard range has been enabled for sharding into further + sub-shards. +* SHARDED: the shard range has completed sharding or shrinking; the container + will typically now have a number of constituent ACTIVE shard ranges. + +.. note:: + + Shard range state represents the most advanced state of the shard range on + any replica of the container. For example, a shard range in CLEAVED state + may not have completed cleaving on all replicas but has cleaved on at least + one replica. + +Fresh and retiring database files +--------------------------------- + +As alluded to earlier, writing to a large container causes increased latency +for the container servers. Once sharding has been initiated on a container it +is desirable to stop writing to the large database; ultimately it will be +unlinked. This is primarily achieved by redirecting object updates to new shard +containers as they are created (see :ref:`redirecting_updates` below), but some +object updates may still need to be accepted by the root container and other +container metadata must still be modifiable. + +To render the large `retiring` database effectively read-only, when the +:ref:`sharder_daemon` finds a container with a set of shard range records, +including an `own_shard_range`, it first creates a fresh database file which +will ultimately replace the existing `retiring` database. For a retiring DB +whose filename is:: + + .db + +the fresh database file name is of the form:: + + _.db + +where `epoch` is a timestamp stored in the container's `own_shard_range`. + +The fresh DB has a copy of the shard ranges table from the retiring DB and all +other container metadata apart from the object records. Once a fresh DB file +has been created it is used to store any new object updates and no more object +records are written to the retiring DB file. + +Once the sharding process has completed, the retiring DB file will be unlinked +leaving only the fresh DB file in the container's directory. There are +therefore three states that the container DB directory may be in during the +sharding process: UNSHARDED, SHARDING and SHARDED. + +.. image:: images/sharding_db_states.svg + +If the container ever shrink to the point that is has no shards then the fresh +DB starts to store object records, behaving the same as an unsharded container. +This is known as the COLLAPSED state. + +In summary, the DB states that any container replica may be in are: + +- UNSHARDED - In this state there is just one standard container database. All + containers are originally in this state. +- SHARDING - There are now two databases, the retiring database and a fresh + database. The fresh database stores any metadata, container level stats, + an object holding table, and a table that stores shard ranges. +- SHARDED - There is only one database, the fresh database, which has one or + more shard ranges in addition to its own shard range. The retiring database + has been unlinked. +- COLLAPSED - There is only one database, the fresh database, which has only + its its own shard range and store object records. + +.. note:: + + DB state is unique to each replica of a container and is not necessarily + synchronised with shard range state. + +Creating shard containers +------------------------- + +The :ref:`sharder_daemon` next creates a shard container for each shard range +using the shard range name as the name of the shard container: + +.. image:: /images/sharding_cleave_basic.svg + +Each shard container has an `own_shard_range` record which has the +lower and upper bounds of the object namespace for which it is responsible, and +a reference to the sharding user container, which is referred to as the +`root_container`. Unlike the `root_container`, the shard container's +`own_shard_range` does not cover the entire namepsace. + +A shard range name takes the form ``/`` where `` +is a hidden account and `` is a container name that is derived from +the root container. + +The account name `` used for shard containers is formed by prefixing +the user account with the string ``.shards_``. This avoids namespace collisions +and also keeps all the shard containers out of view from users of the account. + +The container name for each shard container has the form:: + + --- + +where `root container name` is the name of the user container to which the +contents of the shard container belong, `parent container` is the name of the +container from which the shard is being cleaved, `timestamp` is the time at +which the shard range was created and `shard index` is the position of the +shard range in the name-ordered list of shard ranges for the `parent +container`. + +When sharding a user container the parent container name will be the same as +the root container. However, if a *shard container* grows to a size that it +requires sharding, then the parent container name for its shards will be the +name of the sharding shard container. + +For example, consider a user container with path ``AUTH_user/c`` which is +sharded into two shard containers whose name will be:: + + .shards_AUTH_user/c--1234512345.12345-0 + .shards_AUTH_user/c--1234512345.12345-1 + +If the first shard container is subsequently sharded into a further two shard +containers then they will be named:: + + .shards_AUTH_user/c--1234567890.12345-0)>-1234567890.12345-0 + .shards_AUTH_user/c--1234567890.12345-0)>-1234567890.12345-1 + +This naming scheme guarantees that shards, and shards of shards, each have a +unique name of bounded length. + + +Cleaving shard containers +------------------------- + +Having created empty shard containers the sharder daemon will proceed to cleave +objects from the retiring database to each shard range. Cleaving occurs in +batches of two (by default) shard ranges, so if a container has more than two +shard ranges then the daemon must visit it multiple times to complete cleaving. + +To cleave a shard range the daemon creates a shard database for the shard +container on a local device. This device may be one of the shard container's +primary nodes but often it will not. Object records from the corresponding +shard range namespace are then copied from the retiring DB to this shard DB. + +Swift's container replication mechanism is then used to replicate the shard DB +to its primary nodes. Checks are made to ensure that the new shard container DB +has been replicated to a sufficient number of its primary nodes before it is +considered to have been successfully cleaved. By default the daemon requires +successful replication of a new shard broker to at least a quorum of the +container rings replica count, but this requirement can be tuned using the +``shard_replication_quorum`` option. + +Once a shard range has been successfully cleaved from a retiring database the +daemon transitions its state to ``CLEAVED``. It should be noted that this state +transition occurs as soon as any one of the retiring DB replicas has cleaved +the shard range, and therefore does not imply that all retiring DB replicas +have cleaved that range. The significance of the state transition is that the +shard container is now considered suitable for contributing to object listings, +since its contents are present on a quorum of its primary nodes and are the +same as at least one of the retiring DBs for that namespace. + +Once a shard range is in the ``CLEAVED`` state, the requirement for +'successful' cleaving of other instances of the retirng DB may optionally be +relaxed since it is not so imperative that their contents are replicated +*immediately* to their primary nodes. The ``existing_shard_replication_quorum`` +option can be used to reduce the quorum required for a cleaved shard range to +be considered successfully replicated by the sharder daemon. + +.. note:: + + Once cleaved, shard container DBs will continue to be replicated by the + normal `container-replicator` daemon so that they will eventually be fully + replicated to all primary nodes regardless of any replication quorum options + used by the sharder daemon. + +The cleaving progress of each replica of a retiring DB must be +tracked independently of the shard range state. This is done using a per-DB +CleavingContext object that maintains a cleaving cursor for the retiring DB +that it is associated with. The cleaving cursor is simply the upper bound of +the last shard range to have been cleaved *from that particular retiring DB*. + +Each CleavingContext is stored in the sharding container's sysmeta under a key +that is the ``id`` of the retiring DB. Since all container DB files have a +unique ``id``, this guarantees that each retiring DB will have a unique +CleavingContext. Furthermore, if the retiring DB file is changed, for example +by an rsync_then_merge replication operation which might change the contents of +the DB's object table, then it will get a new unique CleavingContext. + +A CleavingContext maintains other state that is used to ensure that a retiring +DB is only considered to be fully cleaved, and ready to be deleted, if *all* of +its object rows have been cleaved to a shard range. + +Once all shard ranges have been cleaved from the retiring DB it is deleted. The +container is now represented by the fresh DB which has a table of shard range +records that point to the shard containers that store the container's object +records. + +.. _redirecting_updates: + +Redirecting object updates +-------------------------- + +Once a shard container exists, object updates arising from new client requests +and async pending files are directed to the shard container instead of the root +container. This takes load off of the root container. + +For a sharded (or partially sharded) container, when the proxy receives a new +object request it issues a GET request to the container for data describing a +shard container to which the object update should be sent. The proxy then +annotates the object request with the shard container location so that the +object server will forward object updates to the shard container. If those +updates fail then the async pending file that is written on the object server +contains the shard container location. + +When the object updater processes async pending files for previously failed +object updates, it may not find a shard container location. In this case the +updater sends the update to the `root container`, which returns a redirection +response with the shard container location. + +.. note:: + + Object updates are directed to shard containers as soon as they exist, even + if the retiring DB object records have not yet been cleaved to the shard + container. This prevents further writes to the retiring DB and also avoids + the fresh DB being polluted by new object updates. The goal is to + ultimately have all object records in the shard containers and none in the + root container. + +Building container listings +--------------------------- + +Listing requests for a sharded container are handled by querying the shard +containers for components of the listing. The proxy forwards the client listing +request to the root container, as it would for an unsharded container, but the +container server responds with a list of shard ranges rather than objects. The +proxy then queries each shard container in namespace order for their listing, +until either the listing length limit is reached or all shard ranges have been +listed. + +While a container is still in the process of sharding, only *cleaved* shard +ranges are used when building a container listing. Shard ranges that have not +yet cleaved will not have any object records from the root container. The root +container continues to provide listings for the uncleaved part of its +namespace. + +.. note:: + + New object updates are redirected to shard containers that have not yet been + cleaved. These updates will not therefore be included in container listings + until their shard range has been cleaved. + +Example request redirection +--------------------------- + +As an example, consider a sharding container in which 3 shard ranges have been +found ending in cat, giraffe and igloo. Their respective shard containers have +been created so update requests for objects up to "igloo" are redirected to the +appropriate shard container. The root DB continues to handle listing requests +and update requests for any object name beyond "igloo". + +.. image:: images/sharding_scan_load.svg + +The sharder daemon cleaves objects from the retiring DB to the shard range DBs; +it also moves any misplaced objects from the root container's fresh DB to the +shard DB. Cleaving progress is represented by the blue line. Once the first +shard range has been cleaved listing requests for that namespace are directed +to the shard container. The root container still provides listings for the +remainder of the namespace. + +.. image:: images/sharding_cleave1_load.svg + +The process continues: the sharder cleaves the next range and a new range is +found with upper bound of "linux". Now the root container only needs to handle +listing requests up to "giraffe" and update requests for objects whose name is +greater than "linux". Load will continue to diminish on the root DB and be +dispersed across the shard DBs. + +.. image:: images/sharding_cleave2_load.svg + + +Container replication +--------------------- + +Shard range records are replicated between container DB replicas in much the +same way as object records are for unsharded containers. However, the usual +replication of object records between replicas of a container is halted as soon +as a container is capable of being sharded. Instead, object records are moved +to their new locations in shard containers. This avoids unnecessary replication +traffic between container replicas. + +To facilitate this, shard ranges are both 'pushed' and 'pulled' during +replication, prior to any attempt to replicate objects. This means that the +node initiating replication learns about shard ranges from the destination node +early during the replication process and is able to skip object replication if +it discovers that it has shard ranges and is able to shard. + +.. note:: + + When the destination DB for container replication is missing then the + 'complete_rsync' replication mechanism is still used and in this case only + both object records and shard range records are copied to the destination + node. + +Container deletion +------------------ + +Sharded containers may be deleted by a ``DELETE`` request just like an +unsharded container. A sharded container must be empty before it can be deleted +which implies that all of its shard containers must have reported that they are +empty. + +Shard containers are *not* immediately deleted when their root container is +deleted; the shard containers remain undeleted so that they are able to +continue to receive object updates that might arrive after the root container +has been deleted. Shard containers continue to update their deleted root +container with their object stats. If a shard container does receive object +updates that cause it to no longer be empty then the root container will no +longer be considered deleted once that shard container sends an object stats +update. + + +Sharding a shard container +-------------------------- + +A shard container may grow to a size that requires it to be sharded. +``swift-manage-shard-ranges`` may be used to identify shard ranges within a +shard container and enable sharding in the same way as for a root container. +When a shard is sharding it notifies the root container of its shard ranges so +that the root container can start to redirect object updates to the new +'sub-shards'. When the shard has completed sharding the root is aware of all +the new sub-shards and the sharding shard deletes its shard range record in the +root container shard ranges table. At this point the root container is aware of +all the new sub-shards which collectively cover the namespace of the +now-deleted shard. + +There is no hierarchy of shards beyond the root container and its immediate +shards. When a shard shards, its sub-shards are effectively re-parented with +the root container. + + +Shrinking a shard container +--------------------------- + +A shard container's contents may reduce to a point where the shard container is +no longer required. If this happens then the shard container may be shrunk into +another shard range. Shrinking is achieved in a similar way to sharding: an +'acceptor' shard range is written to the shrinking shard container's shard +ranges table; unlike sharding, where shard ranges each cover a subset of the +sharding container's namespace, the acceptor shard range is a superset of the +shrinking shard range. + +Once given an acceptor shard range the shrinking shard will cleave itself to +its acceptor, and then delete itself from the root container shard ranges +table. diff -Nru swift-2.17.0/doc/source/overview_encryption.rst swift-2.18.0/doc/source/overview_encryption.rst --- swift-2.17.0/doc/source/overview_encryption.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_encryption.rst 2018-05-30 10:17:02.000000000 +0000 @@ -334,7 +334,7 @@ Plaintext data is encrypted to ciphertext using the AES cipher with 256-bit keys implemented by the python `cryptography package -`_. The cipher is used in counter +`_. The cipher is used in counter (CTR) mode so that any byte or range of bytes in the ciphertext may be decrypted independently of any other bytes in the ciphertext. This enables very simple handling of ranged GETs. diff -Nru swift-2.17.0/doc/source/overview_object_versioning.rst swift-2.18.0/doc/source/overview_object_versioning.rst --- swift-2.17.0/doc/source/overview_object_versioning.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/doc/source/overview_object_versioning.rst 2018-05-30 10:17:02.000000000 +0000 @@ -2,5 +2,4 @@ ================= .. automodule:: swift.common.middleware.versioned_writes - :members: :show-inheritance: diff -Nru swift-2.17.0/etc/account-server.conf-sample swift-2.18.0/etc/account-server.conf-sample --- swift-2.17.0/etc/account-server.conf-sample 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/etc/account-server.conf-sample 2018-05-30 10:17:02.000000000 +0000 @@ -163,6 +163,25 @@ # Work only with ionice_class. # ionice_class = # ionice_priority = +# +# The handoffs_only mode option is for special-case emergency +# situations such as full disks in the cluster. This option SHOULD NOT +# BE ENABLED except in emergencies. When handoffs_only mode is enabled +# the replicator will *only* replicate from handoff nodes to primary +# nodes and will not sync primary nodes with other primary nodes. +# +# This has two main effects: first, the replicator becomes much more +# effective at removing misplaced databases, thereby freeing up disk +# space at a much faster pace than normal. Second, the replicator does +# not sync data between primary nodes, so out-of-sync account and +# container listings will not resolve while handoffs_only is enabled. +# +# This mode is intended to allow operators to temporarily sacrifice +# consistency in order to gain faster rebalancing, such as during a +# capacity addition with nearly-full disks. It is not intended for +# long-term use. +# +# handoffs_only = no [account-auditor] # You can override the default log routing for this app here (don't use set!): diff -Nru swift-2.17.0/etc/container-server.conf-sample swift-2.18.0/etc/container-server.conf-sample --- swift-2.17.0/etc/container-server.conf-sample 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/etc/container-server.conf-sample 2018-05-30 10:17:02.000000000 +0000 @@ -69,6 +69,10 @@ # Work only with ionice_class. # ionice_class = # ionice_priority = +# +# The prefix used for hidden auto-created accounts, for example accounts in +# which shard containers are created. Defaults to '.'. +# auto_create_account_prefix = . [pipeline:main] pipeline = healthcheck recon container-server @@ -172,6 +176,25 @@ # Work only with ionice_class. # ionice_class = # ionice_priority = +# +# The handoffs_only mode option is for special-case emergency +# situations such as full disks in the cluster. This option SHOULD NOT +# BE ENABLED except in emergencies. When handoffs_only mode is enabled +# the replicator will *only* replicate from handoff nodes to primary +# nodes and will not sync primary nodes with other primary nodes. +# +# This has two main effects: first, the replicator becomes much more +# effective at removing misplaced databases, thereby freeing up disk +# space at a much faster pace than normal. Second, the replicator does +# not sync data between primary nodes, so out-of-sync account and +# container listings will not resolve while handoffs_only is enabled. +# +# This mode is intended to allow operators to temporarily sacrifice +# consistency in order to gain faster rebalancing, such as during a +# capacity addition with nearly-full disks. It is not intended for +# long-term use. +# +# handoffs_only = no [container-updater] # You can override the default log routing for this app here (don't use set!): @@ -304,3 +327,117 @@ # # unwind the iterator of applications # unwind = false + +[container-sharder] +# You can override the default log routing for this app here (don't use set!): +# log_name = container-sharder +# log_facility = LOG_LOCAL0 +# log_level = INFO +# log_address = /dev/log +# +# Container sharder specific settings +# +# If the auto_shard option is true then the sharder will automatically select +# containers to shard, scan for shard ranges, and select shards to shrink. +# The default is false. +# Warning: auto-sharding is still under development and should not be used in +# production; do not set this option to true in a production cluster. +# auto_shard = false +# +# When auto-sharding is enabled shard_container_threshold defines the object +# count at which a container with container-sharding enabled will start to +# shard. shard_container_threshold also indirectly determines the initial +# nominal size of shard containers, which is shard_container_threshold // 2, as +# well as determining the thresholds for shrinking and merging shard +# containers. +# shard_container_threshold = 1000000 +# +# When auto-sharding is enabled shard_shrink_point defines the object count +# below which a 'donor' shard container will be considered for shrinking into +# another 'acceptor' shard container. shard_shrink_point is a percentage of +# shard_container_threshold e.g. the default value of 5 means 5% of the +# shard_container_threshold. +# shard_shrink_point = 5 +# +# When auto-sharding is enabled shard_shrink_merge_point defines the maximum +# allowed size of an acceptor shard container after having a donor merged into +# it. Shard_shrink_merge_point is a percentage of shard_container_threshold. +# e.g. the default value of 75 means that the projected sum of a donor object +# count and acceptor count must be less than 75% of shard_container_threshold +# for the donor to be allowed to merge into the acceptor. +# +# For example, if the shard_container_threshold is 1 million, +# shard_shrink_point is 5, and shard_shrink_merge_point is 75 then a shard will +# be considered for shrinking if it has less than or equal to 50 thousand +# objects but will only merge into an acceptor if the combined object count +# would be less than or equal to 750 thousand objects. +# shard_shrink_merge_point = 75 +# +# When auto-sharding is enabled shard_scanner_batch_size defines the maximum +# number of shard ranges that will be found each time the sharder daemon visits +# a sharding container. If necessary the sharder daemon will continue to search +# for more shard ranges each time it visits the container. +# shard_scanner_batch_size = 10 +# +# cleave_batch_size defines the number of shard ranges that will be cleaved +# each time the sharder daemon visits a sharding container. +# cleave_batch_size = 2 +# +# cleave_row_batch_size defines the size of batches of object rows read from a +# sharding container and merged to a shard container during cleaving. +# cleave_row_batch_size = 10000 +# +# Defines the number of successfully replicated shard dbs required when +# cleaving a previously uncleaved shard range before the sharder will progress +# to the next shard range. The value should be less than or equal to the +# container ring replica count. The default of 'auto' causes the container ring +# quorum value to be used. This option only applies to the container-sharder +# replication and does not affect the number of shard container replicas that +# will eventually be replicated by the container-replicator. +# shard_replication_quorum = auto +# +# Defines the number of successfully replicated shard dbs required when +# cleaving a shard range that has been previously cleaved on another node +# before the sharder will progress to the next shard range. The value should be +# less than or equal to the container ring replica count. The default of 'auto' +# causes the shard_replication_quorum value to be used. This option only +# applies to the container-sharder replication and does not affect the number +# of shard container replicas that will eventually be replicated by the +# container-replicator. +# existing_shard_replication_quorum = auto +# +# The sharder uses an internal client to create and make requests to +# containers. The absolute path to the client config file can be configured. +# internal_client_conf_path = /etc/swift/internal-client.conf +# +# The number of time the internal client will retry requests. +# request_tries = 3 +# +# Each time the sharder dumps stats to the recon cache file it includes a list +# of containers that appear to need sharding but are not yet sharding. By +# default this list is limited to the top 5 containers, ordered by object +# count. The limit may be changed by setting recon_candidates_limit to an +# integer value. A negative value implies no limit. +# recon_candidates_limit = 5 +# +# Large databases tend to take a while to work with, but we want to make sure +# we write down our progress. Use a larger-than-normal broker timeout to make +# us less likely to bomb out on a LockTimeout. +# broker_timeout = 60 +# +# Time in seconds to wait between sharder cycles +# interval = 30 +# +# The container-sharder accepts the following configuration options as defined +# in the container-replicator section: +# +# per_diff = 1000 +# max_diffs = 100 +# concurrency = 8 +# node_timeout = 10 +# conn_timeout = 0.5 +# reclaim_age = 604800 +# rsync_compress = no +# rsync_module = {replication_ip}::container +# recon_cache_path = /var/cache/swift +# diff -Nru swift-2.17.0/etc/keymaster.conf-sample swift-2.18.0/etc/keymaster.conf-sample --- swift-2.17.0/etc/keymaster.conf-sample 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/etc/keymaster.conf-sample 2018-05-30 10:17:02.000000000 +0000 @@ -52,7 +52,7 @@ # project_id = changeme # The Keystone URL to authenticate to. The value of auth_endpoint may be -# set according to the value of auth_uri in [filter:authtoken] in +# set according to the value of www_authenticate_uri in [filter:authtoken] in # proxy-server.conf. # auth_endpoint = http://keystonehost/identity diff -Nru swift-2.17.0/etc/object-server.conf-sample swift-2.18.0/etc/object-server.conf-sample --- swift-2.17.0/etc/object-server.conf-sample 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/etc/object-server.conf-sample 2018-05-30 10:17:09.000000000 +0000 @@ -120,7 +120,7 @@ # Comma separated list of headers that can be set in metadata on an object. # This list is in addition to X-Object-Meta-* headers and cannot include # Content-Type, etag, Content-Length, or deleted -# allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object +# allowed_headers = Content-Disposition, Content-Encoding, X-Delete-At, X-Object-Manifest, X-Static-Large-Object, Cache-Control, Content-Language, Expires, X-Robots-Tag # # auto_create_account_prefix = . # @@ -225,7 +225,16 @@ # run_pause is deprecated, use interval instead # run_pause = 30 # +# Number of concurrent replication jobs to run. This is per-process, +# so replicator_workers=W and concurrency=C will result in W*C +# replication jobs running at once. # concurrency = 1 +# +# Number of worker processes to use. No matter how big this number is, +# at most one worker per disk will be used. 0 means no forking; all work +# is done in the main process. +# replicator_workers = 0 +# # stats_interval = 300 # # default is rsync, alternative is ssync diff -Nru swift-2.17.0/etc/proxy-server.conf-sample swift-2.18.0/etc/proxy-server.conf-sample --- swift-2.17.0/etc/proxy-server.conf-sample 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/etc/proxy-server.conf-sample 2018-05-30 10:17:02.000000000 +0000 @@ -109,6 +109,15 @@ # set log_level = INFO # set log_address = /dev/log # +# When deployed behind a proxy, load balancer, or SSL terminator that is +# configured to speak the human-readable (v1) PROXY protocol (see +# http://www.haproxy.org/download/1.7/doc/proxy-protocol.txt), you should set +# this option to true. The proxy-server will populate the client connection +# information using the PROXY protocol and reject any connection missing a +# valid PROXY line with a 400. Only v1 (human-readable) of the PROXY protocol +# is supported. +# require_proxy_protocol = false +# # log_handoffs = true # recheck_account_existence = 60 # recheck_container_existence = 60 @@ -361,7 +370,7 @@ # # [filter:authtoken] # paste.filter_factory = keystonemiddleware.auth_token:filter_factory -# auth_uri = http://keystonehost:5000 +# www_authenticate_uri = http://keystonehost:5000 # auth_url = http://keystonehost:35357 # auth_plugin = password # The following credentials must match the Keystone credentials for the Swift @@ -442,6 +451,145 @@ # in ACLs by setting allow_names_in_acls to false: # allow_names_in_acls = true +[filter:s3api] +use = egg:swift#s3api + +# s3api setup: +# +# With either tempauth or your custom auth: +# - Put s3api just before your auth filter(s) in the pipeline +# With keystone: +# - Put s3api and s3token before keystoneauth in the pipeline +# +# Swift has no concept of the S3's resource owner; the resources +# (i.e. containers and objects) created via the Swift API have no owner +# information. This option specifies how the s3api middleware handles them +# with the S3 API. If this option is 'false', such kinds of resources will be +# invisible and no users can access them with the S3 API. If set to 'true', +# a resource without an owner belongs to everyone and everyone can access it +# with the S3 API. If you care about S3 compatibility, set 'false' here. This +# option makes sense only when the s3_acl option is set to 'true' and your +# Swift cluster has the resources created via the Swift API. +# allow_no_owner = false +# +# Set a region name of your Swift cluster. Note that the s3api doesn't choose +# a region of the newly created bucket. This value is used for the +# GET Bucket location API and v4 signatures calculation. +# location = US +# +# Set whether to enforce DNS-compliant bucket names. Note that S3 enforces +# these conventions in all regions except the US Standard region. +# dns_compliant_bucket_names = True +# +# Set the default maximum number of objects returned in the GET Bucket +# response. +# max_bucket_listing = 1000 +# +# Set the maximum number of parts returned in the List Parts operation. +# (default: 1000 as well as S3 specification) +# If setting it larger than 10000 (swift container_listing_limit default) +# make sure you also increase the container_listing_limit in swift.conf. +# max_parts_listing = 1000 +# +# Set the maximum number of objects we can delete with the Multi-Object Delete +# operation. +# max_multi_delete_objects = 1000 +# +# If set to 'true', s3api uses its own metadata for ACLs +# (e.g. X-Container-Sysmeta-S3Api-Acl) to achieve the best S3 compatibility. +# If set to 'false', s3api tries to use Swift ACLs (e.g. X-Container-Read) +# instead of S3 ACLs as far as possible. +# There are some caveats that one should know about this setting. Firstly, +# if set to 'false' after being previously set to 'true' any new objects or +# containers stored while 'true' setting will be accessible to all users +# because the s3 ACLs will be ignored under s3_acl=False setting. Secondly, +# s3_acl True mode don't keep ACL consistency between both the S3 and Swift +# API. Meaning with s3_acl enabled S3 ACLs only effect objects and buckets +# via the S3 API. As this ACL information wont be available via the Swift API +# and so the ACL wont be applied. +# Note that s3_acl currently supports only keystone and tempauth. +# DON'T USE THIS for production before enough testing for your use cases. +# This stuff is still under development and it might cause something +# you don't expect. +# s3_acl = false +# +# Specify a host name of your Swift cluster. This enables virtual-hosted style +# requests. +# storage_domain = +# +# Enable pipeline order check for SLO, s3token, authtoken, keystoneauth +# according to standard s3api/Swift construction using either tempauth or +# keystoneauth. If the order is incorrect, it raises an exception to stop +# proxy. Turn auth_pipeline_check off only when you want to bypass these +# authenticate middlewares in order to use other 3rd party (or your +# proprietary) authenticate middleware. +# auth_pipeline_check = True +# +# Enable multi-part uploads. (default: true) +# This is required to store files larger than Swift's max_file_size (by +# default, 5GiB). Note that has performance implications when deleting objects, +# as we now have to check for whether there are also segments to delete. +# allow_multipart_uploads = True +# +# Set the maximum number of parts for Upload Part operation.(default: 1000) +# When setting it to be larger than the default value in order to match the +# specification of S3, set to be larger max_manifest_segments for slo +# middleware.(specification of S3: 10000) +# max_upload_part_num = 1000 +# +# Enable returning only buckets which owner are the user who requested +# GET Service operation. (default: false) +# If you want to enable the above feature, set this and s3_acl to true. +# That might cause significant performance degradation. So, only if your +# service absolutely need this feature, set this setting to true. +# If you set this to false, s3api returns all buckets. +# check_bucket_owner = false +# +# By default, Swift reports only S3 style access log. +# (e.g. PUT /bucket/object) If set force_swift_request_proxy_log +# to be 'true', Swift will become to output Swift style log +# (e.g. PUT /v1/account/container/object) in addition to S3 style log. +# Note that they will be reported twice (i.e. s3api doesn't care about +# the duplication) and Swift style log will includes also various subrequests +# to achieve S3 compatibilities when force_swift_request_proxy_log is set to +# 'true' +# force_swift_request_proxy_log = false +# +# AWS S3 document says that each part must be at least 5 MB in a multipart +# upload, except the last part. +# min_segment_size = 5242880 + +# You can override the default log routing for this filter here: +# log_name = s3api + +[filter:s3token] +# s3token middleware authenticates with keystone using the s3 credentials +# provided in the request header. Please put s3token between s3api +# and keystoneauth if you're using keystoneauth. +use = egg:swift#s3token + +# Prefix that will be prepended to the tenant to form the account +reseller_prefix = AUTH_ + +# By default, s3token will reject all invalid S3-style requests. Set this to +# True to delegate that decision to downstream WSGI components. This may be +# useful if there are multiple auth systems in the proxy pipeline. +delay_auth_decision = False + +# Keystone server details +auth_uri = http://keystonehost:35357/v3 + +# Connect/read timeout to use when communicating with Keystone +http_timeout = 10.0 + +# SSL-related options +# insecure = False +# certfile = +# keyfile = + +# You can override the default log routing for this filter here: +# log_name = s3token + [filter:healthcheck] use = egg:swift#healthcheck # An optional filesystem path, which if present, will cause the healthcheck @@ -593,7 +741,7 @@ # # Specify the nameservers to use to do the CNAME resolution. If unset, the # system configuration is used. Multiple nameservers can be specified -# separated by a comma. Default port 53 can be overriden. IPv6 is accepted. +# separated by a comma. Default port 53 can be overridden. IPv6 is accepted. # Example: 127.0.0.1, 127.0.0.2, 127.0.0.3:5353, [::1], [::1]:5353 # nameservers = diff -Nru swift-2.17.0/.functests swift-2.18.0/.functests --- swift-2.17.0/.functests 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/.functests 2018-05-30 10:17:02.000000000 +0000 @@ -5,10 +5,11 @@ SRC_DIR=$(python -c "import os; print os.path.dirname(os.path.realpath('$0'))") -cd ${SRC_DIR} +cd ${SRC_DIR} > /dev/null export TESTS_DIR=${SRC_DIR}/test/functional -ostestr --serial --pretty $@ +ARGS="--serial ${@:-"--pretty"}" +ostestr $ARGS rvalue=$? -cd - +cd - > /dev/null exit $rvalue diff -Nru swift-2.17.0/.mailmap swift-2.18.0/.mailmap --- swift-2.17.0/.mailmap 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/.mailmap 2018-05-30 10:17:09.000000000 +0000 @@ -43,6 +43,7 @@ Yaguang Wang ywang19 Liu Siqi dk647 James E. Blair +James E. Blair Kun Huang Michael Shuler Ilya Kharin @@ -122,3 +123,5 @@ Hisashi Osanai Bryan Keller Doug Hellmann +zhangdebo1987 zhangdebo +Thomas Goirand diff -Nru swift-2.17.0/PKG-INFO swift-2.18.0/PKG-INFO --- swift-2.17.0/PKG-INFO 2018-02-05 14:02:17.000000000 +0000 +++ swift-2.18.0/PKG-INFO 2018-05-30 10:18:54.000000000 +0000 @@ -1,18 +1,17 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: swift -Version: 2.17.0 +Version: 2.18.0 Summary: OpenStack Object Storage Home-page: https://docs.openstack.org/swift/latest/ Author: OpenStack Author-email: openstack-dev@lists.openstack.org License: UNKNOWN -Description-Content-Type: UNKNOWN Description: ======================== Team and repository tags ======================== - .. image:: https://governance.openstack.org/badges/swift.svg - :target: https://governance.openstack.org/reference/tags/index.html + .. image:: https://governance.openstack.org/tc/badges/swift.svg + :target: https://governance.openstack.org/tc/reference/tags/index.html .. Change things from this point on @@ -179,3 +178,6 @@ Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 +Provides-Extra: kms_keymaster +Provides-Extra: keystone +Provides-Extra: test diff -Nru swift-2.17.0/playbooks/probetests/post.yaml swift-2.18.0/playbooks/probetests/post.yaml --- swift-2.17.0/playbooks/probetests/post.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/probetests/post.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,16 @@ +- hosts: all + become: true + tasks: + - name: Ensure swift logs are readable before syncing + file: + path: '/var/log/swift' + mode: u=rwX,g=rX,o=rX + state: directory + recurse: yes + - name: Copy swift logs from worker nodes to executor node + synchronize: + src: '/var/log/swift' + dest: '{{ zuul.executor.log_root }}' + mode: pull + copy_links: true + verify_host: true diff -Nru swift-2.17.0/playbooks/probetests/run.yaml swift-2.18.0/playbooks/probetests/run.yaml --- swift-2.17.0/playbooks/probetests/run.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/probetests/run.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,26 @@ + + +# Copyright (c) 2018 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +- hosts: all + + tasks: + - name: run probe tests + shell: + cmd: | + source ~/.bashrc + nosetests test/probe/ + executable: /bin/bash + chdir: '{{ zuul.project.src_dir }}' diff -Nru swift-2.17.0/playbooks/saio_single_node_setup/install_dependencies.yaml swift-2.18.0/playbooks/saio_single_node_setup/install_dependencies.yaml --- swift-2.17.0/playbooks/saio_single_node_setup/install_dependencies.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/saio_single_node_setup/install_dependencies.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,50 @@ +# Copyright (c) 2018 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +- hosts: all + become: true + tasks: + - name: Check for /etc/yum/vars/contentdir + stat: + path: /etc/yum/vars/contentdir + register: yum_contentdir + + - when: not yum_contentdir.stat.exists + block: + + - name: Discover package architecture + command: rpm -q --qf "%{arch}" -f /etc/redhat-release + register: rpm_arch + + - debug: + msg: Package architecture is '{{ rpm_arch.stdout }}' + + - name: Set contentdir to altarch + set_fact: + yum_contentdir: altarch + when: rpm_arch.stdout in ['aarch64', 'ppc64le'] + + - name: Populate /etc/yum/vars/contentdir + copy: + dest: /etc/yum/vars/contentdir + content: "{{ yum_contentdir|default('centos') }}" + become: true + + - name: installing dependencies + yum: name={{ item }} state=present + with_items: + - python-eventlet + - python-pyeclib + - python-nose + - python-swiftclient diff -Nru swift-2.17.0/playbooks/saio_single_node_setup/make_rings.yaml swift-2.18.0/playbooks/saio_single_node_setup/make_rings.yaml --- swift-2.17.0/playbooks/saio_single_node_setup/make_rings.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/saio_single_node_setup/make_rings.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,29 @@ + +# Copyright (c) 2018 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +- hosts: all + tasks: + - name: install swift + become: true + shell: + cmd: python setup.py develop + executable: /bin/bash + chdir: '{{ zuul.project.src_dir }}' + + - name: make rings + shell: + cmd: remakerings + executable: /bin/bash + chdir: '/etc/swift' diff -Nru swift-2.17.0/playbooks/saio_single_node_setup/setup_saio.yaml swift-2.18.0/playbooks/saio_single_node_setup/setup_saio.yaml --- swift-2.17.0/playbooks/saio_single_node_setup/setup_saio.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/saio_single_node_setup/setup_saio.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,174 @@ +# Copyright (c) 2018 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +- hosts: all + become: true + tasks: + - name: assure /srv directory exists + file: path=/srv state=directory + + - name: create loopback device + command: truncate -s 1GB /srv/swift-disk creates=/srv/swift-disk + + - name: create filesystem /srv/swift-disk + become: true + filesystem: fstype=xfs dev=/srv/swift-disk + + - name: create mount path /mnt/sdb1 + file: path=/mnt/sdb1 state=directory + + - name: mount /mnt/sdb1 + mount: name=/mnt/sdb1 src=/srv/swift-disk fstype=xfs opts="loop,noatime,nodiratime,nobarrier,logbufs=8" dump=0 passno=0 state=mounted + + - name: create sub-partitions + file: > + path=/mnt/sdb1/{{ item }} + state=directory + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + with_items: + - 1 + - 2 + - 3 + - 4 + + - name: create symlinks + become: true + file: > + src=/mnt/sdb1/{{ item }} + dest=/srv/{{ item }} + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + state=link + with_items: + - 1 + - 2 + - 3 + - 4 + + - name: create node partition directories + file: > + path=/srv/{{ item[1] }}/node/sdb{{ item[0] + item[1] }} + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + state=directory + with_nested: + - [0, 4] + - [1, 2, 3, 4] + + - name: create /var/run/swift + file: > + path=/var/run/swift + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + state=directory + + - name: create /var/cache/swift + file: > + path=/var/cache/swift + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + state=directory + + - name: create /var/cache/swift[n] + file: > + path=/var/cache/swift{{ item }} + owner={{ ansible_user_id }} + group={{ ansible_user_gid }} + state=directory + with_items: + - 2 + - 3 + - 4 + + - name: create rc.local from template + template: src=rc.local.j2 dest=/etc/rc.d/rc.local owner=root group=root mode=0755 + + - name: create /etc/rsyncd.conf + command: cp {{ zuul.project.src_dir }}/doc/saio/rsyncd.conf /etc/ + + - name: update rsyncd.conf with correct username + replace: dest=/etc/rsyncd.conf regexp= replace={{ ansible_user_id }} + + - name: enable rsync + lineinfile: dest=/etc/xinetd.d/rsync line="disable = no" create=yes + + - name: set selinux to permissive + selinux: policy=targeted state=disabled + + - name: restart rsync + service: name=rsyncd state=restarted enabled=yes + + - name: start memcache + service: name=memcached state=started enabled=yes + + - name: configure rsyslog + command: cp {{ zuul.project.src_dir }}/doc/saio/rsyslog.d/10-swift.conf /etc/rsyslog.d/ + + - name: modify /etc/rsyslog.conf + lineinfile: dest=/etc/rsyslog.conf + line="$PrivDropToGroup adm" + create=yes + insertafter="^#### GLOBAL DIRECTIVES" + + - name: assure /var/log/swift directory exists + file: path=/var/log/swift + state=directory + owner=root + group=adm + mode="g+w" + + - name: restart rsyslog + service: name=rsyslog state=restarted enabled=yes + + - name: clean up /etc/swift directory + file: path=/etc/swift state=absent + + - name: create clean /etc/swift + command: cp -r {{ zuul.project.src_dir }}/doc/saio/swift /etc/swift + + - name: copy the sample configuration files for running tests + command: cp -r {{ zuul.project.src_dir }}/test/sample.conf /etc/swift/test.conf + + - name: set correct ownership of /etc/swift + file: path=/etc/swift owner={{ ansible_user_id }} group={{ ansible_user_gid }} recurse=yes + + - name: find config files to modify user option + find: paths="/etc/swift" patterns="*.conf" recurse=yes + register: find_result + + - name: replace user name + replace: dest={{ item.path }} regexp= replace={{ ansible_user_id }} + with_items: "{{ find_result.files }}" + + - name: copy the SAIO scripts for resetting the environment + command: cp -r {{ zuul.project.src_dir }}/doc/saio/bin /home/{{ ansible_ssh_user }}/bin creates=/home/{{ ansible_ssh_user }}/bin + + - name: set the correct file mode for SAIO scripts + file: dest=/home/{{ ansible_ssh_user }}/bin mode=0777 recurse=yes + + - name: add new env. variable for loopback device + lineinfile: dest=/home/{{ ansible_ssh_user }}/.bashrc line="export SAIO_BLOCK_DEVICE=/srv/swift-disk" + + - name: remove line from resetswift + lineinfile: dest=/home/{{ ansible_ssh_user }}/bin/resetswift line="sudo find /var/log/swift -type f -exec rm -f {} \;" state=absent + + - name: add new env. variable for running tests + lineinfile: dest=/home/{{ ansible_ssh_user }}/.bashrc line="export SWIFT_TEST_CONFIG_FILE=/etc/swift/test.conf" + + - name: make sure PATH includes the bin directory + lineinfile: dest=/home/{{ ansible_ssh_user }}/.bashrc line="export PATH=${PATH}:/home/{{ ansible_ssh_user }}/bin" + + - name: increase open files limit to run probe tests + lineinfile: dest=/home/{{ ansible_ssh_user }}/.bashrc line="ulimit -n 4096" diff -Nru swift-2.17.0/playbooks/saio_single_node_setup/templates/rc.local.j2 swift-2.18.0/playbooks/saio_single_node_setup/templates/rc.local.j2 --- swift-2.17.0/playbooks/saio_single_node_setup/templates/rc.local.j2 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/playbooks/saio_single_node_setup/templates/rc.local.j2 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p /var/cache/swift /var/cache/swift2 /var/cache/swift3 /var/cache/swift4 +chown {{ ansible_user_id }}:{{ ansible_user_gid }} /var/cache/swift* +mkdir -p /var/run/swift +chown {{ ansible_user_id }}:{{ ansible_user_gid }} /var/run/swift + +exit 0 diff -Nru swift-2.17.0/README.rst swift-2.18.0/README.rst --- swift-2.17.0/README.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/README.rst 2018-05-30 10:17:02.000000000 +0000 @@ -2,8 +2,8 @@ Team and repository tags ======================== -.. image:: https://governance.openstack.org/badges/swift.svg - :target: https://governance.openstack.org/reference/tags/index.html +.. image:: https://governance.openstack.org/tc/badges/swift.svg + :target: https://governance.openstack.org/tc/reference/tags/index.html .. Change things from this point on diff -Nru swift-2.17.0/releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml swift-2.18.0/releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml --- swift-2.17.0/releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml 2018-05-30 10:17:09.000000000 +0000 @@ -0,0 +1,85 @@ +--- +features: + - | + Added container sharding, an operator controlled feature that + may be used to shard very large container databases into a + number of smaller shard containers. This mitigates the issues + with one large DB by distributing the data across multiple + smaller databases throughout the cluster. Please read the full + overview at + https://docs.openstack.org/swift/latest/overview_container_sharding.html + + - | + Provide an S3 API compatibility layer. The external "swift3" + project has been imported into Swift's codebase as the "s3api" + middleware. + + - | + Added "emergency mode" hooks in the account and container replicators. + These options may be used to prioritize moving handoff + partitions to primary locations more quickly. This helps when + adding capacity to a ring. + + - Added ``-d `` and ``-p `` command line options. + + - Added a handoffs-only mode. + + - | + Add a multiprocess mode to the object replicator. Setting the + ``replicator_workers`` setting to a positive value N will result + in the replicator using up to N worker processes to perform + replication tasks. At most one worker per disk will be spawned. + + Worker process logs will have a bit of information prepended so + operators can tell which messages came from which worker. The + prefix is "[worker M/N pid=P] ", where M is the worker's index, + N is the total number of workers, and P is the process ID. Every + message from the replicator's logger will have the prefix + + - | + The object reconstructor will now fork all available worker + processes when operating on a subset of local devices. + + - | + Add support for PROXY protocol v1 to the proxy server. This + allows the Swift proxy server to log accurate client IP + addresses when there is a proxy or SSL-terminator between the + client and the Swift proxy server. Example servers supporting + this PROXY protocol include stunnel, haproxy, hitch, and + varnish. See the sample proxy server config file for the + appropriate config setting to enable or disable this + functionality. + + - | + In the ratelimit middleware, account whitelist and blacklist + settings have been deprecated and may be removed in a future + release. When found, a deprecation message will be logged. + Instead of these config file values, set X-Account-Sysmeta- + Global-Write-Ratelimit:WHITELIST and X-Account-Sysmeta-Global- + Write-Ratelimit:BLACKLIST on the particular accounts that need + to be whitelisted or blacklisted. System metadata cannot be added + or modified by standard clients. Use the internal client to set sysmeta. + + - | + Add a ``--drop-prefixes`` flag to swift-account-info, + swift-container-info, and swift-object-info. This makes the + output between the three more consistent. + + - | + statsd error messages correspond to 5xx responses only. This + makes monitoring more useful because actual errors (5xx) will + not be hidden by common user requests (4xx). Previously, some 4xx + responses would be included in timing information in the statsd + error messages. + + - | + Truncate error logs to prevent log handler from running out of buffer. + + - | + Updated requirements.txt to match global exclusions and formatting. + + - | + tempauth user names now support unicode characters. + + - | + Various other minor bug fixes and improvements. diff -Nru swift-2.17.0/releasenotes/notes/reno.cache swift-2.18.0/releasenotes/notes/reno.cache --- swift-2.17.0/releasenotes/notes/reno.cache 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/releasenotes/notes/reno.cache 2018-05-30 10:18:53.000000000 +0000 @@ -0,0 +1,121 @@ +--- +file-contents: + releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml: + features: ['Added container sharding, an operator controlled feature that + + may be used to shard very large container databases into a + + number of smaller shard containers. This mitigates the issues + + with one large DB by distributing the data across multiple + + smaller databases throughout the cluster. Please read the full + + overview at + + https://docs.openstack.org/swift/latest/overview_container_sharding.html + + ', 'Provide an S3 API compatibility layer. The external "swift3" + + project has been imported into Swift''s codebase as the "s3api" + + middleware. + + ', 'Added "emergency mode" hooks in the account and container replicators. + + These options may be used to prioritize moving handoff + + partitions to primary locations more quickly. This helps when + + adding capacity to a ring. + + + - Added ``-d `` and ``-p `` command line options. + + + - Added a handoffs-only mode. + + ', 'Add a multiprocess mode to the object replicator. Setting the + + ``replicator_workers`` setting to a positive value N will result + + in the replicator using up to N worker processes to perform + + replication tasks. At most one worker per disk will be spawned. + + + Worker process logs will have a bit of information prepended so + + operators can tell which messages came from which worker. The + + prefix is "[worker M/N pid=P] ", where M is the worker''s index, + + N is the total number of workers, and P is the process ID. Every + + message from the replicator''s logger will have the prefix + + ', 'The object reconstructor will now fork all available worker + + processes when operating on a subset of local devices. + + ', 'Add support for PROXY protocol v1 to the proxy server. This + + allows the Swift proxy server to log accurate client IP + + addresses when there is a proxy or SSL-terminator between the + + client and the Swift proxy server. Example servers supporting + + this PROXY protocol include stunnel, haproxy, hitch, and + + varnish. See the sample proxy server config file for the + + appropriate config setting to enable or disable this + + functionality. + + ', 'In the ratelimit middleware, account whitelist and blacklist + + settings have been deprecated and may be removed in a future + + release. When found, a deprecation message will be logged. + + Instead of these config file values, set X-Account-Sysmeta- + + Global-Write-Ratelimit:WHITELIST and X-Account-Sysmeta-Global- + + Write-Ratelimit:BLACKLIST on the particular accounts that need + + to be whitelisted or blacklisted. System metadata cannot be added + + or modified by standard clients. Use the internal client to set sysmeta. + + ', 'Add a ``--drop-prefixes`` flag to swift-account-info, + + swift-container-info, and swift-object-info. This makes the + + output between the three more consistent. + + ', 'statsd error messages correspond to 5xx responses only. This + + makes monitoring more useful because actual errors (5xx) will + + not be hidden by common user requests (4xx). Previously, some 4xx + + responses would be included in timing information in the statsd + + error messages. + + ', 'Truncate error logs to prevent log handler from running out of buffer. + + ', 'Updated requirements.txt to match global exclusions and formatting. + + ', 'tempauth user names now support unicode characters. + + ', 'Various other minor bug fixes and improvements. + + '] +notes: +- files: + - [releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml, fbb3b1ffdaf7d01744753369d48db6931e8c4dd4] + version: 2.18.0 diff -Nru swift-2.17.0/releasenotes/source/index.rst swift-2.18.0/releasenotes/source/index.rst --- swift-2.17.0/releasenotes/source/index.rst 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/releasenotes/source/index.rst 2018-05-30 10:17:02.000000000 +0000 @@ -7,6 +7,8 @@ current + queens + pike ocata diff -Nru swift-2.17.0/releasenotes/source/locale/en_GB/LC_MESSAGES/releasenotes.po swift-2.18.0/releasenotes/source/locale/en_GB/LC_MESSAGES/releasenotes.po --- swift-2.17.0/releasenotes/source/locale/en_GB/LC_MESSAGES/releasenotes.po 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/releasenotes/source/locale/en_GB/LC_MESSAGES/releasenotes.po 2018-05-30 10:17:02.000000000 +0000 @@ -4,15 +4,15 @@ msgstr "" "Project-Id-Version: Swift Release Notes\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"PO-Revision-Date: 2018-01-27 01:24+0000\n" +"PO-Revision-Date: 2018-02-16 07:33+0000\n" "Last-Translator: Andi Chandler \n" "Language-Team: English (United Kingdom)\n" -"Language: en-GB\n" -"X-Generator: Zanata 3.9.6\n" +"Language: en_GB\n" +"X-Generator: Zanata 4.3.3\n" "Plural-Forms: nplurals=2; plural=(n != 1)\n" msgid "2.10.0" @@ -125,6 +125,9 @@ "Added a configurable URL base to staticweb, fixing issues when the " "accessible endpoint isn't known to the Swift cluster (eg http vs https)." +msgid "Added a configurable URL base to staticweb." +msgstr "Added a configurable URL base to staticweb." + msgid "" "Added support for per-policy proxy config options. This allows per-policy " "affinity options to be set for use with duplicated EC policies and composite " @@ -149,6 +152,15 @@ "Added support for retrieving the encryption root secret from an external key " "management system. In practice, this is currently limited to Barbican." +msgid "" +"All 416 responses will now include a Content-Range header with an " +"unsatisfied-range value. This allows the caller to know the valid range " +"request value for an object." +msgstr "" +"All 416 responses will now include a Content-Range header with an " +"unsatisfied-range value. This allows the caller to know the valid range " +"request value for an object." + msgid "Always set Swift processes to use UTC." msgstr "Always set Swift processes to use UTC." @@ -165,6 +177,19 @@ "Closed a bug where ssync may have written bad fragment data in some " "circumstances. A check was added to ensure the correct number of bytes is " "written for a fragment before finalizing the write. Also, erasure coded " +"fragment metadata will now be validated on read requests and, if bad data is " +"found, the fragment will be quarantined." +msgstr "" +"Closed a bug where ssync may have written bad fragment data in some " +"circumstances. A check was added to ensure the correct number of bytes is " +"written for a fragment before finalising the write. Also, erasure coded " +"fragment metadata will now be validated on read requests and, if bad data is " +"found, the fragment will be quarantined." + +msgid "" +"Closed a bug where ssync may have written bad fragment data in some " +"circumstances. A check was added to ensure the correct number of bytes is " +"written for a fragment before finalizing the write. Also, erasure coded " "fragment metadata will now be validated when read and, if bad data is found, " "the fragment will be quarantined." msgstr "" @@ -201,6 +226,9 @@ "to be synced before all of the referenced segments. This fixes a bug where " "container sync would not copy SLO manifests." +msgid "Correctly handle deleted files with if-none-match requests." +msgstr "Correctly handle deleted files with if-none-match requests." + msgid "" "Correctly send 412 Precondition Failed if a user sends an invalid copy " "destination. Previously Swift would send a 500 Internal Server Error." @@ -208,6 +236,9 @@ "Correctly send 412 Precondition Failed if a user sends an invalid copy " "destination. Previously Swift would send a 500 Internal Server Error." +msgid "Critical Issues" +msgstr "Critical Issues" + msgid "Current (Unreleased) Release Notes" msgstr "Current (Unreleased) Release Notes" @@ -431,6 +462,17 @@ "2.7.0 and could cause an increase in rsync replication stats during and " "after upgrade, due to inconsistent hashing of partition suffixes." +msgid "" +"Fixed regression in consolidate_hashes that occurred when a new file was " +"stored to new suffix to a non-empty partition. This bug was introduced in " +"2.7.0 and could cause an increase in rsync replication stats during and " +"after upgrade, due to inconsistent hashing of partition suffixes." +msgstr "" +"Fixed regression in consolidate_hashes that occurred when a new file was " +"stored to new suffix to a non-empty partition. This bug was introduced in " +"2.7.0 and could cause an increase in rsync replication stats during and " +"after upgrade, due to inconsistent hashing of partition suffixes." + msgid "Fixed some minor test compatibility issues." msgstr "Fixed some minor test compatibility issues." @@ -487,6 +529,15 @@ msgstr "Improvements in key parts of the consistency engine" msgid "" +"In SLO manifests, the `etag` and `size_bytes` keys are now fully optional " +"and not required. Previously, the keys needed to exist but the values were " +"optional. The only required key is `path`." +msgstr "" +"In SLO manifests, the `etag` and `size_bytes` keys are now fully optional " +"and not required. Previously, the keys needed to exist but the values were " +"optional. The only required key is `path`." + +msgid "" "Include object sysmeta in POST responses. Sysmeta is still stripped from the " "response before being sent to the client, but this allows middleware to make " "use of the information." @@ -499,6 +550,17 @@ msgstr "Include received fragment index in reconstructor log warnings." msgid "" +"Instead of using a separate .durable file to indicate the durable status of " +"an EC fragment archive, we rename the .data to include a durable marker in " +"the filename. This saves one inode for every EC .data file. Existing ." +"durable files will not be removed, and they will continue to work just fine." +msgstr "" +"Instead of using a separate .durable file to indicate the durable status of " +"an EC fragment archive, we rename the .data to include a durable marker in " +"the filename. This saves one inode for every EC .data file. Existing ." +"durable files will not be removed, and they will continue to work just fine." + +msgid "" "Let clients request heartbeats during SLO PUTs by including the query " "parameter ``heartbeat=on``." msgstr "" @@ -538,6 +600,9 @@ "Make mount_check option usable in containerised environments by adding a " "check for an \".ismount\" file at the root directory of a device." +msgid "Mirror X-Trans-Id to X-Openstack-Request-Id." +msgstr "Mirror X-Trans-Id to X-Openstack-Request-Id." + msgid "" "Move listing formatting out to a new proxy middleware named " "``listing_formats``. ``listing_formats`` should be just right of the first " @@ -576,6 +641,13 @@ msgstr "Newton Series Release Notes" msgid "" +"Note that after writing EC data with Swift 2.11.0 or later, that data will " +"not be accessible to earlier versions of Swift." +msgstr "" +"Note that after writing EC data with Swift 2.11.0 or later, that data will " +"not be accessible to earlier versions of Swift." + +msgid "" "Note: if you have a custom middleware that makes account or container " "listings, it will only receive listings in JSON format." msgstr "" @@ -675,6 +747,9 @@ msgid "Remove deprecated ``vm_test_mode`` option." msgstr "Remove deprecated ``vm_test_mode`` option." +msgid "Remove empty db hash and suffix directories if a db gets quarantined." +msgstr "Remove empty DB hash and suffix directories if a DB gets quarantined." + msgid "" "Removed \"in-process-\" from func env tox name to work with upstream CI." msgstr "" @@ -715,6 +790,9 @@ msgid "Require that known-bad EC schemes be deprecated" msgstr "Require that known-bad EC schemes be deprecated" +msgid "Respect server type for --md5 check in swift-recon." +msgstr "Respect server type for --md5 check in swift-recon." + msgid "" "Respond 400 Bad Request when Accept headers fail to parse instead of " "returning 406 Not Acceptable." @@ -723,6 +801,27 @@ "returning 406 Not Acceptable." msgid "" +"Ring files now include byteorder information about the endian of the machine " +"used to generate the file, and the values are appropriately byteswapped if " +"deserialized on a machine with a different endianness. Newly created ring " +"files will be byteorder agnostic, but previously generated ring files will " +"still fail on different endian architectures. Regenerating older ring files " +"will cause them to become byteorder agnostic. The regeneration of the ring " +"files will not cause any new data movement. Newer ring files will still be " +"usable by older versions of Swift (on machines with the same endianness--" +"this maintains existing behavior)." +msgstr "" +"Ring files now include byteorder information about the endian of the machine " +"used to generate the file, and the values are appropriately byteswapped if " +"deserialised on a machine with a different endianness. Newly created ring " +"files will be byteorder agnostic, but previously generated ring files will " +"still fail on different endian architectures. Regenerating older ring files " +"will cause them to become byteorder agnostic. The regeneration of the ring " +"files will not cause any new data movement. Newer ring files will still be " +"usable by older versions of Swift (on machines with the same endianness--" +"this maintains existing behaviour)." + +msgid "" "Rings with min_part_hours set to zero will now only move one partition " "replica per rebalance, thus matching behavior when min_part_hours is greater " "than zero." @@ -741,6 +840,17 @@ "segments." msgid "" +"SLO will now concurrently HEAD segments, resulting in much faster manifest " +"validation and object creation. By default, two HEAD requests will be done " +"at a time, but this can be changed by the operator via the new `concurrency` " +"setting in the \"[filter:slo]\" section of the proxy server config." +msgstr "" +"SLO will now concurrently HEAD segments, resulting in much faster manifest " +"validation and object creation. By default, two HEAD requests will be done " +"at a time, but this can be changed by the operator via the new `concurrency` " +"setting in the \"[filter:slo]\" section of the proxy server config." + +msgid "" "Significant improvements to the api-ref doc available at http://developer." "openstack.org/api-ref/object-storage/." msgstr "" @@ -754,10 +864,37 @@ "Static Large Object (SLO) manifest may now (again) have zero-byte last " "segments." +msgid "Support multi-range GETs for static large objects." +msgstr "Support multi-range GETs for static large objects." + +msgid "Suppress unexpected-file warnings for rsync temp files." +msgstr "Suppress unexpected-file warnings for rsync temp files." + +msgid "Suppressed the KeyError message when auditor finds an expired object." +msgstr "Suppressed the KeyError message when auditor finds an expired object." + msgid "Swift Release Notes" msgstr "Swift Release Notes" msgid "" +"TempURLs now support a validation against a common prefix. A prefix-based " +"signature grants access to all objects which share the same prefix. This " +"avoids the creation of a large amount of signatures, when a whole container " +"or pseudofolder is shared." +msgstr "" +"TempURLs now support a validation against a common prefix. A prefix-based " +"signature grants access to all objects which share the same prefix. This " +"avoids the creation of a large amount of signatures, when a whole container " +"or pseudofolder is shared." + +msgid "" +"TempURLs using the \"inline\" parameter can now also set the \"filename\" " +"parameter. Both are used in the Content-Disposition response header." +msgstr "" +"TempURLs using the \"inline\" parameter can now also set the \"filename\" " +"parameter. Both are used in the Content-Disposition response header." + +msgid "" "Temporary URLs now support one common form of ISO 8601 timestamps in " "addition to Unix seconds-since-epoch timestamps. The ISO 8601 format " "accepted is '%Y-%m-%dT%H:%M:%SZ'. This makes TempURLs more user-friendly to " @@ -838,6 +975,15 @@ "and allows continued scaling as concurrency is increased." msgid "" +"The improvements to EC reads made in Swift 2.10.0 have also been applied to " +"the reconstructor. This allows fragments to be rebuilt in more " +"circumstances, resulting in faster recovery from failures." +msgstr "" +"The improvements to EC reads made in Swift 2.10.0 have also been applied to " +"the reconstructor. This allows fragments to be rebuilt in more " +"circumstances, resulting in faster recovery from failures." + +msgid "" "The object and container server config option ``slowdown`` has been " "deprecated in favor of the new ``objects_per_second`` and " "``containers_per_second`` options." @@ -875,6 +1021,13 @@ msgid "" "Throttle update_auditor_status calls so it updates no more than once per " +"minute." +msgstr "" +"Throttle update_auditor_status calls so it updates no more than once per " +"minute." + +msgid "" +"Throttle update_auditor_status calls so it updates no more than once per " "minute. This prevents excessive IO on a new cluster." msgstr "" "Throttle update_auditor_status calls so it updates no more than once per " @@ -890,6 +1043,9 @@ msgid "Updated docs to reference appropriate ports." msgstr "Updated docs to reference appropriate ports." +msgid "Updated the PyECLib dependency to 1.3.1." +msgstr "Updated the PyECLib dependency to 1.3.1." + msgid "" "Updated the `hashes.pkl` file format to include timestamp information for " "race detection. Also simplified hashing logic to prevent race conditions and " @@ -913,6 +1069,17 @@ msgstr "Various other minor bug fixes and improvements." msgid "" +"WARNING: If you are using the ISA-L library for erasure codes, please " +"upgrade to liberasurecode 1.3.1 (or later) as soon as possible. If you are " +"using isa_l_rs_vand with more than 4 parity, please read https://bugs." +"launchpad.net/swift/+bug/1639691 and take necessary action." +msgstr "" +"WARNING: If you are using the ISA-L library for erasure codes, please " +"upgrade to liberasurecode 1.3.1 (or later) as soon as possible. If you are " +"using isa_l_rs_vand with more than 4 parity, please read https://bugs." +"launchpad.net/swift/+bug/1639691 and take necessary action." + +msgid "" "We do not yet have CLI tools for creating composite rings, but the " "functionality has been enabled in the ring modules to support this advanced " "functionality. CLI tools will be delivered in a subsequent release." diff -Nru swift-2.17.0/releasenotes/source/locale/ja/LC_MESSAGES/releasenotes.po swift-2.18.0/releasenotes/source/locale/ja/LC_MESSAGES/releasenotes.po --- swift-2.17.0/releasenotes/source/locale/ja/LC_MESSAGES/releasenotes.po 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/releasenotes/source/locale/ja/LC_MESSAGES/releasenotes.po 2018-05-30 10:17:02.000000000 +0000 @@ -1,17 +1,18 @@ # Shu Muto , 2017. #zanata +# Shu Muto , 2018. #zanata msgid "" msgstr "" -"Project-Id-Version: Swift Release Notes 2.15.2\n" +"Project-Id-Version: Swift Release Notes\n" "Report-Msgid-Bugs-To: \n" -"POT-Creation-Date: 2017-08-24 21:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"PO-Revision-Date: 2017-08-25 06:15+0000\n" +"PO-Revision-Date: 2018-02-08 07:28+0000\n" "Last-Translator: Shu Muto \n" "Language-Team: Japanese\n" "Language: ja\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Plural-Forms: nplurals=1; plural=0\n" msgid "2.10.0" @@ -44,6 +45,12 @@ msgid "2.15.1" msgstr "2.15.1" +msgid "2.16.0" +msgstr "2.16.0" + +msgid "2.17.0" +msgstr "2.17.0" + msgid "" "A PUT or POST to a container will now update the container's Last-Modified " "time, and that value will be included in a GET/HEAD response." @@ -64,18 +71,35 @@ "ネント間のレプリカの分散を保証できます。" msgid "" +"Accept a trade off of dispersion for balance in the ring builder that will " +"result in getting to balanced rings much more quickly in some cases." +msgstr "" +"リングビルダーのバランスのために、分散のトレードオフを受け入れ、場合によって" +"はバランスされたリングにより早く到達します。" + +msgid "" "Account and container databases will now be quarantined if the database " "schema has been corrupted." msgstr "" "データベーススキーマが壊れていると、アカウントとコンテナーのデータベースが隔" "離されるようになりました。" +msgid "" +"Account and container replication stats logs now include ``remote_merges``, " +"the number of times a whole database was sent to another node." +msgstr "" +"アカウントとコンテナー複製の統計ログに、データベース全体が別のノードに送信さ" +"れた回数、``remote_merges`` が追加されました。" + msgid "Add Composite Ring Functionality" msgstr "複合リング機能を追加しました。" msgid "Add Vary headers for CORS responses." msgstr "CORS 応答用の Vary ヘッダーを追加しました。" +msgid "Add checksum to object extended attributes." +msgstr "オブジェクトの拡張属性にチェックサムを追加します。" + msgid "" "Add support to increase object ring partition power transparently to end " "users and with no cluster downtime. Increasing the ring part power allows " @@ -91,6 +115,13 @@ "ください。" msgid "" +"Added ``--swift-versions`` to ``swift-recon`` CLI to compare installed " +"versions in the cluster." +msgstr "" +"クラスターにインストールされているバージョンを比較するために、``swift-" +"recon`` CLI に ``--swift-versions`` を追加しました。" + +msgid "" "Added a \"user\" option to the drive-audit config file. Its value is used to " "set the owner of the drive-audit recon cache." msgstr "" @@ -108,6 +139,14 @@ msgid "Added a configurable URL base to staticweb." msgstr "静的ウェブに対する設定可能な URL ベースを追加しました。" +msgid "Added container/object listing with prefix to InternalClient." +msgstr "" +"InternalClient のコンテナー/オブジェクトの一覧作成で接頭辞を指定できるように" +"なりました。" + +msgid "Added support for inline data segments in SLO manifests." +msgstr "SLO マニフェストにおけるインラインデータセグメントをサポートしました。" + msgid "" "Added support for per-policy proxy config options. This allows per-policy " "affinity options to be set for use with duplicated EC policies and composite " @@ -126,6 +165,16 @@ "``write_affinity_handoff_delete_count`` です。" msgid "" +"Added support for retrieving the encryption root secret from an external key " +"management system. In practice, this is currently limited to Barbican." +msgstr "" +"外部鍵管理システムからの暗号化ルートシークレットの取得をサポートしました。現" +"在 Barbican に限定されています。" + +msgid "Added symlink objects support." +msgstr "シンボリックリンクオブジェクトをサポートしました。" + +msgid "" "All 416 responses will now include a Content-Range header with an " "unsatisfied-range value. This allows the caller to know the valid range " "request value for an object." @@ -134,6 +183,9 @@ "ようになりました。 これにより、呼び出し元はオブジェクトの有効範囲要求値を知る" "ことができます。" +msgid "Allow the expirer to gracefully move past updating stale work items." +msgstr "expirer が安全に古い作業項目を移動できるようになりました。" + msgid "Always set Swift processes to use UTC." msgstr "Swift プロセスがいつも UTC を使うように設定しました。" @@ -144,6 +196,18 @@ msgstr "cname_lookup でネームサーバーからのすべての応答をキャッシュします。" msgid "" +"Changed where liberasurecode-devel for CentOS 7 is referenced and installed " +"as a dependency." +msgstr "" +"CentOS 7 での、liberasurecode-devel が参照、インストールされる場所を変更しま" +"した。" + +msgid "Cleaned up logged tracebacks when talking to memcached servers." +msgstr "" +"memcached サーバーと通信するときのトレースバックログをクリーンアップしまし" +"た。" + +msgid "" "Closed a bug where ssync may have written bad fragment data in some " "circumstances. A check was added to ensure the correct number of bytes is " "written for a fragment before finalizing the write. Also, erasure coded " @@ -228,6 +292,17 @@ "InternalClient を使用するデーモンは、 SIGTERM を使用して適切に停止できます。" msgid "" +"Deleting an expiring object will now cause less work in the system. The " +"number of async pending files written has been reduced for all objects and " +"greatly reduced for erasure-coded objects. This dramatically reduces the " +"burden on container servers." +msgstr "" +"期限切れオブジェクトの削除は、システムでの作業を削減します。非同期で保留され" +"ているファイルの数は、すべてのオブジェクトで削減され、消去コード付きオブジェ" +"クトでは大幅に削減されます。これにより、コンテナーサーバーの負担が劇的に軽減" +"しました。" + +msgid "" "Deprecate swift-temp-url and call python-swiftclient's implementation " "instead. This adds python-swiftclient as an optional dependency of Swift." msgstr "" @@ -238,6 +313,13 @@ msgid "Deprecation Notes" msgstr "廃止予定の機能" +msgid "Disallow X-Delete-At header values equal to the X-Timestamp header." +msgstr "" +"X-Delete-At ヘッダーの値が X-Timestamp ヘッダーと等しいことを禁止します。" + +msgid "Display more info on empty rings." +msgstr "空のリングに詳細情報を表示します。" + msgid "Do not follow CNAME when host is in storage_domain." msgstr "ホストが storage_domain にある場合、CNAME に従わないようにしました。" @@ -306,6 +388,15 @@ "非 ASCII 名のオブジェクトが再構築されず、再構築プロセスがハングアップする原因" "となるオブジェクト再構成の UnicodeDecodeError が修正されました。" +msgid "" +"Fixed XML responses (eg on bulk extractions and SLO upload failures) to be " +"more correct. The enclosing \"delete\" tag was removed where it doesn't make " +"sense and replaced with \"extract\" or \"upload\" depending on the context." +msgstr "" +"XML レスポンス(一括抽出や SLO アップロードの失敗など)がより正確になりまし" +"た。意味のない \"delete\" の閉じタグは削除され、コンテキストに応じた " +"\"extract\" あるいは \"upload\" に置き換えられました。" + msgid "Fixed a bug in domain_remap when obj starts/ends with slash." msgstr "" "オブジェクトがスラッシュで開始/終了するときの domain_remap のバグを修正しまし" @@ -350,7 +441,9 @@ "した。" msgid "Fixed a bug where some tombstone files might never be reclaimed." -msgstr "いくつかの墓石ファイルが再利用されないかもしれないバグを修正しました。" +msgstr "" +"いくつかの廃棄済みオブジェクト (tombstone) ファイルが再利用されないかもしれな" +"いバグを修正しました。" msgid "" "Fixed a bug where the ring builder would not allow removal of a device when " @@ -374,11 +467,25 @@ msgstr "" "パーツを置いている間の`swift-ring-builder` のまれな無限ループを修正しました。" +msgid "" +"Fixed a rare issue where multiple backend timeouts could result in bad data " +"being returned to the client." +msgstr "" +"複数のバックエンドのタイムアウトが原因で、クライアントに不正なデータが返され" +"るという稀な問題を修正しました。" + msgid "Fixed a socket leak in copy middleware when a large object was copied." msgstr "" "ラージオブジェクトをコピーしたときの copy ミドルウェアのソケットリークを修正" "しました。" +msgid "" +"Fixed an issue where background consistency daemon child processes would " +"deadlock waiting on the same file descriptor." +msgstr "" +"バックグラウンド一貫性デーモンの子プロセスが同じファイル記述子を待ってデッド" +"ロックする問題を修正しました。" + msgid "Fixed deadlock when logging from a tpool thread." msgstr "tpool スレッドからのロギング時のデッドロックを修正しました。" @@ -408,6 +515,11 @@ "パーティションが予想よりもずっと少なく更新される可能性がある hashes.pkl の固" "定の非確定的なサフィックスの更新を修正しました。" +msgid "Fixed rare socket leak on range requests to erasure-coded objects." +msgstr "" +"消去コード付きオブジェクトへの範囲リクエストでの稀なソケットリークを修正しま" +"した。" + msgid "" "Fixed regression in consolidate_hashes that occured when a new file was " "stored to new suffix to a non-empty partition. This bug was introduced in " @@ -444,17 +556,47 @@ msgstr "消去コード再構成の統計計算を修正しました。" msgid "" +"Fixed using ``swift-ring-builder set_weight`` with more than one device." +msgstr "" +"複数のデバイスでの``swift-ring-builder set_weight`` の使用を修正しました。" + +msgid "" "For further information see the `docs `__" msgstr "" "詳細は `docs `__ を参照してください。" +msgid "Fractional replicas are no longer allowed for erasure code policies." +msgstr "断片的な複製は、消去コードポリシーには使用できなくなりました。" + +msgid "" +"GET and HEAD requests to a symlink will operate on the referenced object and " +"require appropriate permission in the target container. DELETE and PUT " +"requests will operate on the symlink object itself. POST requests are not " +"forwarded to the referenced object. POST requests sent to a symlink will " +"result in a 307 Temporary Redirect response." +msgstr "" +"シンボリックリンクに対する GET と HEAD リクエストは、参照されたオブジェクトに" +"対して操作が行われ、対象となるコンテナーへの適切な権限を必要とします。DELETE " +"と PUT リクエストは、シンボリックリンクオブジェクト自身に操作が行われます。" +"POST リクエストは参照されているオブジェクトに転送されません。シンボリックリン" +"クに対する POST リクエストの送信は、307 Temporary Redirect レスポンスになりま" +"す。" + msgid "I/O priority is now supported on AArch64 architecture." msgstr "" "AArch64 アーキテクチャーで I/O 優先順位がサポートされるようになりました。" msgid "" +"If a proxy server is configured to autocreate accounts and the account " +"create fails, it will now return a server error (500) instead of Not Found " +"(404)." +msgstr "" +"プロキシサーバーにアカウント自動作成が設定されていて、アカウント作成に失敗す" +"ると、Not Found (404) ではなく、サーバーエラー (500) が返されます。" + +msgid "" "If using erasure coding with ISA-L in rs_vand mode and 5 or more parity " "fragments, Swift will emit a warning. This is a configuration that is known " "to harm data durability. In a future release, this warning will be upgraded " @@ -483,10 +625,30 @@ "た。" msgid "" +"Improved ``object-updater`` stats logging. It now tells you all of its stats " +"(successes, failures, quarantines due to bad pickles, unlinks, and errors), " +"and it tells you incremental progress every five minutes. The logging at the " +"end of a pass remains and has been expanded to also include all stats." +msgstr "" +"``object-updater`` 統計ログを改善しました。すべての統計(成功、失敗、悪いピク" +"ルスによる検疫、リンク解除、エラー)を出力し、また、5分毎に進捗状況を出力し" +"ます。成功の最後のログは残り、すべての統計情報も含むように拡張されました。" + +msgid "" "Improved performance by eliminating an unneeded directory structure hash." msgstr "" "不要なディレクトリ構造ハッシュを排除してパフォーマンスを向上させました。" +msgid "" +"Improved the granularity of the ring dispersion metric so that small " +"improvements after a rebalance can show changes in the dispersion number. " +"Dispersion in existing and new rings can be recalculated using the new ``--" +"recalculate`` option to ``swift-ring-builder``." +msgstr "" +"再分散後の小さな改善により分散数の変化を示すことができるように、リング分散メ" +"トリックの粒度を改善しました。既存、および新しいリングの分散は、``swift-ring-" +"builder`` の新しい ``--recalculate`` オプションを使うことで再計算されます。" + msgid "Improvements in key parts of the consistency engine" msgstr "整合性エンジンの重要な部分を改善しました。" @@ -523,6 +685,13 @@ "す。 既存の .durable ファイルは削除されず、正常に動作し続けます。" msgid "" +"Let clients request heartbeats during SLO PUTs by including the query " +"parameter ``heartbeat=on``." +msgstr "" +"SLO PUT の間、クエリーパラメーター ``heartbeat=on`` を含めることで、クライア" +"ントがハートビートを要求できるようにしました。" + +msgid "" "Listing containers in accounts with json or xml now includes a " "`last_modified` time. This does not change any on-disk data, but simply " "exposes the value to offer consistency with the object listings on " @@ -536,6 +705,15 @@ msgid "Log correct status code for conditional requests." msgstr "条件付きリクエストの正しいステータスコードを記録します。" +msgid "" +"Log deprecation warning for ``allow_versions`` in the container server " +"config. Configure the ``versioned_writes`` middleware in the proxy server " +"instead. This option will be ignored in a future release." +msgstr "" +"コンテナーサーバーの設定の ``allow_versions`` のために、非推奨警告ログを出力" +"します。代わりに ``versioned_writes`` ミドルウェアをプロキシサーバーに設定し" +"ます。このオプションは将来のリリースでは無視されます。" + msgid "Log the correct request type of a subrequest downstream of copy." msgstr "サブリクエストの正しいリクエストタイプをコピーの後ろに記録します。" @@ -550,6 +728,20 @@ msgstr "X-Trans-Id を X-Openstack-Request-Id に写します。" msgid "" +"Move listing formatting out to a new proxy middleware named " +"``listing_formats``. ``listing_formats`` should be just right of the first " +"proxy-logging middleware, and left of most other middlewares. If it is not " +"already present, it will be automatically inserted for you." +msgstr "" +"リストの成型を ``listing_formats`` という新しいプロキシミドルウェアに移動しま" +"した。``listing_formats`` は、最初の proxy-logging ミドルウェアの直ぐ右にあ" +"り、他のミドルウェアの左になければなりません。まだ存在しない場合は、自動的に" +"挿入されます。" + +msgid "Moved Zuul v3 tox jobs into the Swift code repo." +msgstr "Zuul v3 の tox ジョブを Swift のリポジトリに移動しました。" + +msgid "" "Moved other-requirements.txt to bindep.txt. bindep.txt lists non-python " "dependencies of Swift." msgstr "" @@ -583,6 +775,13 @@ "そのデータにアクセスできないことに注意してください。" msgid "" +"Note: if you have a custom middleware that makes account or container " +"listings, it will only receive listings in JSON format." +msgstr "" +"注意: アカウントやコンテナー一覧を作るカスタムミドルウェアがある場合、受け取" +"る一覧は JSON 形式のみです。" + +msgid "" "Now Swift will use ``write_affinity_handoff_delete_count`` to define how " "many local handoff nodes should swift send request to get more candidates " "for the final response. The default value \"auto\" means Swift will " @@ -594,6 +793,12 @@ "きかを定義します。デフォルト値 \"auto\" は、 Swift がレプリカの数と現在のクラ" "スタートポロジーに基づいて自動的に数を計算することを意味します。" +msgid "Now ``swift-recon-cron`` works with conf.d configs." +msgstr "``swift-recon-cron`` は conf.d の設定で動作するようになりました。" + +msgid "Object expiry improvements" +msgstr "オブジェクトの有効期限の改善" + msgid "" "Object versioning now supports a \"history\" mode in addition to the older " "\"stack\" mode. The difference is in how DELETE requests are handled. For " @@ -666,6 +871,13 @@ "クトを削除すると、オブジェクトが適切なノードにレプリケートされる前にオブジェ" "クトを削除すると常に 404 となりました。" +msgid "" +"Remove ``swift-temp-url`` script. The functionality has been in swiftclient " +"for a long time and this script has been deprecated since 2.10.0." +msgstr "" +"``swift-temp-url`` スクリプトを削除しました。この機能は、長い間 swiftclient " +"にありましたが、2.10.0 から非推奨でした。" + msgid "Remove deprecated ``vm_test_mode`` option." msgstr "非推奨の ``vm_test_mode`` オプションを削除しました。" @@ -681,12 +893,37 @@ "た。" msgid "" +"Removed a race condition where a POST to an SLO could modify the X-Static-" +"Large-Object metadata." +msgstr "" +"SLO クラウドへの POST が X-Static-Large-Object メタデータを変更できる、競合状" +"態を削除しました。" + +msgid "" +"Removed all ``post_as_copy`` related code and configs. The option has been " +"deprecated since 2.13.0." +msgstr "" +"``post_as_copy`` に関連するすべてのコードと設定を削除しました。このオプション" +"は、2.13.0 から非推奨でした。" + +msgid "" "Removed per-device reconstruction stats. Now that the reconstructor is " "shuffling parts before going through them, those stats no longer make sense." msgstr "" "デバイスごとの再構成の統計を削除しました。再構成は、それらを通過する前にパー" "ツをシャッフルするので、それらの統計はもはや意味をなしません。" +msgid "" +"Replaced ``replication_one_per_device`` by custom count defined by " +"``replication_concurrency_per_device``. The original config value is " +"deprecated, but continues to function for now. If both values are defined, " +"the old ``replication_one_per_device`` is ignored." +msgstr "" +"``replication_one_per_device`` を ``replication_concurrency_per_device`` に" +"よって定義されるカスタムカウントに置き換えました。元の設定値は非推奨となりま" +"したが、引き続き機能します。両方の値が定義された場合、古い " +"``replication_one_per_device`` は無視されます。" + msgid "Require that known-bad EC schemes be deprecated" msgstr "既知の悪い EC スキームの要件を非推奨にしました。" @@ -694,6 +931,13 @@ msgstr "swift-recon での --md5 チェックのサーバー種別を尊重します。" msgid "" +"Respond 400 Bad Request when Accept headers fail to parse instead of " +"returning 406 Not Acceptable." +msgstr "" +"Accept ヘッダーの解析に失敗した時、406 Not Acceptable の代わりに 400 Bad " +"Request が返されます。" + +msgid "" "Ring files now include byteorder information about the endian of the machine " "used to generate the file, and the values are appropriately byteswapped if " "deserialized on a machine with a different endianness. Newly created ring " @@ -744,12 +988,38 @@ "ションの新しい `concurrency` 設定によってオペレーターが変更できます。" msgid "" +"Save the ring when dispersion improves, even if balance doesn't improve." +msgstr "" +"バランスが改善されない場合でも、分散が改善されたときにリングを保存します。" + +msgid "Send ETag header in 206 Partial Content responses to SLO reads." +msgstr "" +"SLO 読み込みへの 206 Partial Content 応答で ETag ヘッダーを送信します。" + +msgid "" "Significant improvements to the api-ref doc available at http://developer." "openstack.org/api-ref/object-storage/." msgstr "" "http://developer.openstack.org/api-ref/object-storage/ の api-ref ドキュメン" "トに対する重要な改善が行われました。" +msgid "" +"Static Large Object (SLO) manifest may now (again) have zero-byte last " +"segments." +msgstr "" +"Static Large Object (SLO) マニフェストは、0 バイトの最終セグメントを再度持つ" +"ようになりました。" + +msgid "" +"Stop logging tracebacks in the ``object-replicator`` when it runs out of " +"handoff locations." +msgstr "" +"``object-replicator`` を実行する場所を使い果たした時のトレースバックのログを" +"停止しました。" + +msgid "Stopped logging tracebacks when receiving an unexpected response." +msgstr "想定外の応答を受信した時のトレースバックのログを停止しました。" + msgid "Support multi-range GETs for static large objects." msgstr "静的ラージオブジェクトの multi-range GET をサポートしました。" @@ -765,6 +1035,19 @@ msgstr "Swift リリースノート" msgid "" +"Symlink objects reference one other object. They are created by creating an " +"empty object with an X-Symlink-Target header. The value of the header is of " +"the format /, and the target does not need to exist at " +"the time of symlink creation. Cross-account symlinks can be created by " +"including the X-Symlink-Target-Account header." +msgstr "" +"Symlink オブジェクトは他のオブジェクトを参照します。これらは、X-Symlink-" +"Target ヘッダーを持つ空のオブジェクトの作成によって作られます。ヘッダーの値" +"は / 形式であり、シンボリックリンク作成時にターゲットが存" +"在する必要はありません。クロスアカウントのシンボリックリンクは、X-Symlink-" +"Target-Account ヘッダーを含むことによって作成できます。" + +msgid "" "TempURLs now support a validation against a common prefix. A prefix-based " "signature grants access to all objects which share the same prefix. This " "avoids the creation of a large amount of signatures, when a whole container " @@ -806,6 +1089,19 @@ "す。" msgid "" +"The ``domain_remap`` middleware now supports the ``mangle_client_paths`` " +"option. Its default \"false\" value changes ``domain_remap`` parsing to stop " +"stripping the ``path_root`` value from URL paths. If users depend on this " +"path mangling, operators should set ``mangle_client_paths`` to \"True\" " +"before upgrading." +msgstr "" +"``domain_remap`` ミドルウェアは、``mangle_client_paths`` オプションをサポート" +"しました。デフォルト値 \"false\" では、``domain_remap`` の解析で URL のパスか" +"ら ``path_root`` 値を取り除かなくなります。このパスの切り取りに依存している場" +"合は、アップグレードする前に、オペレーターは ``mangle_client_paths`` を " +"\"True\" に設定する必要があります。" + +msgid "" "The default for `object_post_as_copy` has been changed to False. The option " "is now deprecated and will be removed in a future release. If your cluster " "is still running with post-as-copy enabled, please update it to use the " @@ -859,6 +1155,16 @@ "回復が可能になります。" msgid "" +"The number of container updates on object PUTs (ie to update listings) has " +"been recomputed to be far more efficient while maintaining durability " +"guarantees. Specifically, object PUTs to erasure-coded policies will now " +"normally result in far fewer container updates." +msgstr "" +"オブジェクトの PUT によるコンテナー更新の数(つまり、一覧の更新)は、耐久性の" +"保証を維持しながら、遥かに効率的に再計算されます。具体的には、消去符号化ポリ" +"シーへのオブジェクトの PUT は、通常、コンテナーの更新が大幅に少なくなります。" + +msgid "" "The object and container server config option ``slowdown`` has been " "deprecated in favor of the new ``objects_per_second`` and " "``containers_per_second`` options." @@ -895,6 +1201,17 @@ "スによって、並べ替えられます。" msgid "" +"The tempurl digest algorithm is now configurable, and Swift added support " +"for both SHA-256 and SHA-512. Supported tempurl digests are exposed to " +"clients in ``/info``. Additionally, tempurl signatures can now be base64 " +"encoded." +msgstr "" +"tmpurl のダイジェストアルゴリズムが設定可能になり、Swift は、SHA-256 および " +"SHA-512 の両方のサポートを追加しました。サポートされる tmpurl ダイジェスト" +"は、``/info`` にてクライアントに公開されます。さらに、tempurl の署名を " +"base64 でエンコードできるようになりました。" + +msgid "" "Throttle update_auditor_status calls so it updates no more than once per " "minute." msgstr "" @@ -940,6 +1257,16 @@ msgid "Upgrade Notes" msgstr "アップグレード時の注意" +msgid "" +"Upgrade impact -- during a rolling upgrade, an updated proxy server may " +"write a manifest that an out-of-date proxy server will not be able to read. " +"This will resolve itself once the upgrade completes on all nodes." +msgstr "" +"アップグレードの影響 -- ローリングアップグレード中に、更新されたプロキシサー" +"バーは、期限切れのプロキシサーバーが読み込むことができないマニフェストを書き" +"出す可能性があります。これは、すべてのノードでアップグレードが完了すると自ず" +"と解決します。" + msgid "Various other minor bug fixes and improvements." msgstr "様々な他のマイナーなバグ修正と改善。" @@ -963,10 +1290,48 @@ "ポートするためにリングモジュールで機能が有効になっています。 CLI ツールは、以" "降のリリースで提供されます。" +msgid "" +"When requesting objects, return 404 if a tombstone is found and is newer " +"than any data found. Previous behavior was to return stale data." +msgstr "" +"オブジェクトを要求するとき、廃棄済みオブジェクト (tombstone) があり、他のデー" +"タよりも新しい場合には 404 を返します。以前の動作では、古いデータが返されてい" +"ました。" + +msgid "" +"When the object auditor examines an object, it will now add any missing " +"metadata checksums." +msgstr "" +"オブジェクト監査がオブジェクトを検査するとき、欠落しているメタデータのチェッ" +"クサムを追加します。" + +msgid "" +"With heartbeating turned on, the proxy will start its response immediately " +"with 202 Accepted then send a single whitespace character periodically until " +"the request completes. At that point, a final summary chunk will be sent " +"which includes a \"Response Status\" key indicating success or failure and " +"(if successful) an \"Etag\" key indicating the Etag of the resulting SLO." +msgstr "" +"ハートビートをオンにすると、プロキシは 直ぐに 202 Accepted で応答を開始し、リ" +"クエストが完了するまで一つの空白文字を定期的に送信します。その時点で、成功か" +"失敗かを示す「Response Status 」キーと、成功した場合には SLO の結果として生じ" +"る Etag を示す「Etag」キーを含む最終サマリーチャンクが送信されるようになりま" +"す。" + msgid "Write-affinity aware object deletion" msgstr "書き込みアフィニティは、オブジェクトの削除を認識します。" msgid "" +"X-Delete-At computation now uses X-Timestamp instead of system time. This " +"prevents clock skew causing inconsistent expiry data." +msgstr "" +"X-Delete-At の計算に、システム時間の代わりに X-Timestamp を使うようになりまし" +"た。これは、時刻の誤差によって起こる期限データの矛盾を防止します。" + +msgid "``swift-ring-builder`` improvements" +msgstr "``swift-ring-builder`` の改善" + +msgid "" "cname_lookup middleware now accepts a ``nameservers`` config variable that, " "if defined, will be used for DNS lookups instead of the system default." msgstr "" diff -Nru swift-2.17.0/releasenotes/source/locale/ko_KR/LC_MESSAGES/releasenotes.po swift-2.18.0/releasenotes/source/locale/ko_KR/LC_MESSAGES/releasenotes.po --- swift-2.17.0/releasenotes/source/locale/ko_KR/LC_MESSAGES/releasenotes.po 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/releasenotes/source/locale/ko_KR/LC_MESSAGES/releasenotes.po 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,75 @@ +# Sungjin Kang , 2017. #zanata +msgid "" +msgstr "" +"Project-Id-Version: Swift Release Notes\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"PO-Revision-Date: 2017-02-07 03:09+0000\n" +"Last-Translator: Sungjin Kang \n" +"Language-Team: Korean (South Korea)\n" +"Language: ko_KR\n" +"X-Generator: Zanata 4.3.3\n" +"Plural-Forms: nplurals=1; plural=0\n" + +msgid "2.10.0" +msgstr "2.10.0" + +msgid "2.10.1" +msgstr "2.10.1" + +msgid "2.11.0" +msgstr "2.11.0" + +msgid "2.12.0" +msgstr "2.12.0" + +msgid "Bug Fixes" +msgstr "버그 수정" + +msgid "Critical Issues" +msgstr "치명적인 이슈" + +msgid "Current (Unreleased) Release Notes" +msgstr "현재 (릴리드전) 릴리즈 노트" + +msgid "New Features" +msgstr "새로운 기능" + +msgid "Newton Series Release Notes" +msgstr "Newton 시리즈 릴리즈 노트" + +msgid "Other Notes" +msgstr "기타 기능" + +msgid "Swift Release Notes" +msgstr "Swift 릴리즈 노트" + +msgid "" +"Update dnspython dependency to 1.14, removing the need to have separate " +"dnspython dependencies for Py2 and Py3." +msgstr "" +"Dnspython 의존성을 1.14로 업그래이드 하여 Py2 와 Py3 에 대한 별도의 " +"dnspython 의존성을 제거할 필요가 없습니다." + +msgid "Updated the PyECLib dependency to 1.3.1." +msgstr "PyECLib 의존성을 1.3.1 로 업그레이드 하였습니다." + +msgid "Upgrade Notes" +msgstr "업그레이드 노트" + +msgid "Various other minor bug fixes and improvements." +msgstr "다양한 다른 마이너 버그 수정 및 개선." + +msgid "" +"WARNING: If you are using the ISA-L library for erasure codes, please " +"upgrade to liberasurecode 1.3.1 (or later) as soon as possible. If you are " +"using isa_l_rs_vand with more than 4 parity, please read https://bugs." +"launchpad.net/swift/+bug/1639691 and take necessary action." +msgstr "" +"경고: Erasure 코드에서 사용하는 ISA-L 라이브러리를 사용하는 경우, 최대한 빨" +"리 liberasurecode 1.3.1 (또는 그 이상) 으로 업그레이드하십시오. 4 parity 보" +"다 큰 isa_l_rs_vand 를 사용하는 경우, https://bugs.launchpad.net/swift/" +"+bug/1639691 을 읽고 필요한 조치를 취하십시오." diff -Nru swift-2.17.0/releasenotes/source/queens.rst swift-2.18.0/releasenotes/source/queens.rst --- swift-2.17.0/releasenotes/source/queens.rst 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/releasenotes/source/queens.rst 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,6 @@ +=================================== + Queens Series Release Notes +=================================== + +.. release-notes:: + :branch: stable/queens diff -Nru swift-2.17.0/RELEASENOTES.rst swift-2.18.0/RELEASENOTES.rst --- swift-2.17.0/RELEASENOTES.rst 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/RELEASENOTES.rst 2018-05-30 10:18:53.000000000 +0000 @@ -0,0 +1,111 @@ +===== +swift +===== + +.. _swift_2.18.0: + +2.18.0 +====== + +.. _swift_2.18.0_New Features: + +New Features +------------ + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Added container sharding, an operator controlled feature that + may be used to shard very large container databases into a + number of smaller shard containers. This mitigates the issues + with one large DB by distributing the data across multiple + smaller databases throughout the cluster. Please read the full + overview at + https://docs.openstack.org/swift/latest/overview_container_sharding.html + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Provide an S3 API compatibility layer. The external "swift3" + project has been imported into Swift's codebase as the "s3api" + middleware. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Added "emergency mode" hooks in the account and container replicators. + These options may be used to prioritize moving handoff + partitions to primary locations more quickly. This helps when + adding capacity to a ring. + + - Added ``-d `` and ``-p `` command line options. + + - Added a handoffs-only mode. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Add a multiprocess mode to the object replicator. Setting the + ``replicator_workers`` setting to a positive value N will result + in the replicator using up to N worker processes to perform + replication tasks. At most one worker per disk will be spawned. + + Worker process logs will have a bit of information prepended so + operators can tell which messages came from which worker. The + prefix is "[worker M/N pid=P] ", where M is the worker's index, + N is the total number of workers, and P is the process ID. Every + message from the replicator's logger will have the prefix + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- The object reconstructor will now fork all available worker + processes when operating on a subset of local devices. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Add support for PROXY protocol v1 to the proxy server. This + allows the Swift proxy server to log accurate client IP + addresses when there is a proxy or SSL-terminator between the + client and the Swift proxy server. Example servers supporting + this PROXY protocol include stunnel, haproxy, hitch, and + varnish. See the sample proxy server config file for the + appropriate config setting to enable or disable this + functionality. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- In the ratelimit middleware, account whitelist and blacklist + settings have been deprecated and may be removed in a future + release. When found, a deprecation message will be logged. + Instead of these config file values, set X-Account-Sysmeta- + Global-Write-Ratelimit:WHITELIST and X-Account-Sysmeta-Global- + Write-Ratelimit:BLACKLIST on the particular accounts that need + to be whitelisted or blacklisted. System metadata cannot be added + or modified by standard clients. Use the internal client to set sysmeta. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Add a ``--drop-prefixes`` flag to swift-account-info, + swift-container-info, and swift-object-info. This makes the + output between the three more consistent. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- statsd error messages correspond to 5xx responses only. This + makes monitoring more useful because actual errors (5xx) will + not be hidden by common user requests (4xx). Previously, some 4xx + responses would be included in timing information in the statsd + error messages. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Truncate error logs to prevent log handler from running out of buffer. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Updated requirements.txt to match global exclusions and formatting. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- tempauth user names now support unicode characters. + +.. releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml @ fbb3b1ffdaf7d01744753369d48db6931e8c4dd4 + +- Various other minor bug fixes and improvements. + diff -Nru swift-2.17.0/requirements.txt swift-2.18.0/requirements.txt --- swift-2.17.0/requirements.txt 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/requirements.txt 2018-05-30 10:17:02.000000000 +0000 @@ -2,11 +2,13 @@ # of appearance. Changing the order has an impact on the overall integration # process, which may cause wedges in the gate later. -dnspython>=1.14.0 # http://www.dnspython.org/LICENSE +dnspython>=1.14.0;python_version=='2.7' # http://www.dnspython.org/LICENSE eventlet>=0.17.4 # MIT greenlet>=0.3.1 netifaces>=0.5,!=0.10.0,!=0.10.1 -pastedeploy>=1.3.3 +PasteDeploy>=1.3.3 +lxml +requests>=2.14.2 # Apache-2.0 six>=1.9.0 xattr>=0.4 PyECLib>=1.3.1 # BSD diff -Nru swift-2.17.0/setup.cfg swift-2.18.0/setup.cfg --- swift-2.17.0/setup.cfg 2018-02-05 14:02:17.000000000 +0000 +++ swift-2.18.0/setup.cfg 2018-05-30 10:18:54.000000000 +0000 @@ -36,6 +36,7 @@ bin/swift-container-info bin/swift-container-replicator bin/swift-container-server + bin/swift-container-sharder bin/swift-container-sync bin/swift-container-updater bin/swift-container-reconciler @@ -66,8 +67,12 @@ kms_keymaster = oslo.config>=4.0.0,!=4.3.0,!=4.4.0 # Apache-2.0 castellan>=0.13.0 # Apache-2.0 +keystone = + keystonemiddleware>=4.17.0 [entry_points] +console_scripts = + swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main paste.app_factory = proxy = swift.proxy.server:app_factory object = swift.obj.server:app_factory @@ -106,12 +111,8 @@ kms_keymaster = swift.common.middleware.crypto.kms_keymaster:filter_factory listing_formats = swift.common.middleware.listing_formats:filter_factory symlink = swift.common.middleware.symlink:filter_factory - -[build_sphinx] -all_files = 1 -build-dir = doc/build -source-dir = doc/source -warning-is-error = 1 + s3api = swift.common.middleware.s3api.s3api:filter_factory + s3token = swift.common.middleware.s3api.s3token:filter_factory [egg_info] tag_build = diff -Nru swift-2.17.0/swift/account/backend.py swift-2.18.0/swift/account/backend.py --- swift-2.17.0/swift/account/backend.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/account/backend.py 2018-05-30 10:17:02.000000000 +0000 @@ -22,7 +22,7 @@ import sqlite3 from swift.common.utils import Timestamp -from swift.common.db import DatabaseBroker, utf8encode +from swift.common.db import DatabaseBroker, utf8encode, zero_like DATADIR = 'accounts' @@ -233,7 +233,7 @@ with self.get() as conn: row = conn.execute( 'SELECT container_count from account_stat').fetchone() - return (row[0] == 0) + return zero_like(row[0]) def make_tuple_for_pickle(self, record): return (record['name'], record['put_timestamp'], @@ -254,7 +254,7 @@ :param storage_policy_index: the storage policy for this container """ if Timestamp(delete_timestamp) > Timestamp(put_timestamp) and \ - object_count in (None, '', 0, '0'): + zero_like(object_count): deleted = 1 else: deleted = 0 @@ -273,8 +273,7 @@ :returns: True if the DB is considered to be deleted, False otherwise """ - return status == 'DELETED' or ( - container_count in (None, '', 0, '0') and + return status == 'DELETED' or zero_like(container_count) and ( Timestamp(delete_timestamp) > Timestamp(put_timestamp)) def _is_deleted(self, conn): @@ -509,7 +508,7 @@ record[2] = row[2] # If deleted, mark as such if Timestamp(record[2]) > Timestamp(record[1]) and \ - record[3] in (None, '', 0, '0'): + zero_like(record[3]): record[5] = 1 else: record[5] = 0 diff -Nru swift-2.17.0/swift/cli/form_signature.py swift-2.18.0/swift/cli/form_signature.py --- swift-2.17.0/swift/cli/form_signature.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/cli/form_signature.py 2018-05-30 10:17:02.000000000 +0000 @@ -17,6 +17,7 @@ """ from __future__ import print_function import hmac +import six from hashlib import sha1 from os.path import basename from time import time @@ -92,8 +93,14 @@ print('For example: /v1/account/container') print(' Or: /v1/account/container/object_prefix') return 1 - sig = hmac.new(key, '%s\n%s\n%s\n%s\n%s' % (path, redirect, max_file_size, - max_file_count, expires), + data = '%s\n%s\n%s\n%s\n%s' % (path, redirect, max_file_size, + max_file_count, expires) + if six.PY3: + data = data if isinstance(data, six.binary_type) else \ + data.encode('utf8') + key = key if isinstance(key, six.binary_type) else \ + key.encode('utf8') + sig = hmac.new(key, data, sha1).hexdigest() print(' Expires:', expires) print('Signature:', sig) diff -Nru swift-2.17.0/swift/cli/info.py swift-2.18.0/swift/cli/info.py --- swift-2.17.0/swift/cli/info.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/cli/info.py 2018-05-30 10:17:02.000000000 +0000 @@ -23,7 +23,7 @@ from swift.common.ring import Ring from swift.common.request_helpers import is_sys_meta, is_user_meta, \ strip_sys_meta_prefix, strip_user_meta_prefix, \ - is_object_transient_sysmeta + is_object_transient_sysmeta, strip_object_transient_sysmeta_prefix from swift.account.backend import AccountBroker, DATADIR as ABDATADIR from swift.container.backend import ContainerBroker, DATADIR as CBDATADIR from swift.obj.diskfile import get_data_dir, read_metadata, DATADIR_BASE, \ @@ -191,13 +191,17 @@ 'real value is set in the config file on each storage node.') -def print_db_info_metadata(db_type, info, metadata): +def print_db_info_metadata(db_type, info, metadata, drop_prefixes=False): """ print out data base info/metadata based on its type :param db_type: database type, account or container :param info: dict of data base info :param metadata: dict of data base metadata + :param drop_prefixes: if True, strip "X-Account-Meta-", + "X-Container-Meta-", "X-Account-Sysmeta-", and + "X-Container-Sysmeta-" when displaying + User Metadata and System Metadata dicts """ if info is None: raise ValueError('DB info is None') @@ -274,9 +278,13 @@ sys_metadata = {} for key, (value, timestamp) in metadata.items(): if is_user_meta(db_type, key): - user_metadata[strip_user_meta_prefix(db_type, key)] = value + if drop_prefixes: + key = strip_user_meta_prefix(db_type, key) + user_metadata[key] = value elif is_sys_meta(db_type, key): - sys_metadata[strip_sys_meta_prefix(db_type, key)] = value + if drop_prefixes: + key = strip_sys_meta_prefix(db_type, key) + sys_metadata[key] = value else: title = key.replace('_', '-').title() print(' %s: %s' % (title, value)) @@ -290,8 +298,29 @@ else: print('No user metadata found in db file') + if db_type == 'container': + print('Sharding Metadata:') + shard_type = 'root' if info['is_root'] else 'shard' + print(' Type: %s' % shard_type) + print(' State: %s' % info['db_state']) + if info.get('shard_ranges'): + print('Shard Ranges (%d):' % len(info['shard_ranges'])) + for srange in info['shard_ranges']: + srange = dict(srange, state_text=srange.state_text) + print(' Name: %(name)s' % srange) + print(' lower: %(lower)r, upper: %(upper)r' % srange) + print(' Object Count: %(object_count)d, Bytes Used: ' + '%(bytes_used)d, State: %(state_text)s (%(state)d)' + % srange) + print(' Created at: %s (%s)' + % (Timestamp(srange['timestamp']).isoformat, + srange['timestamp'])) + print(' Meta Timestamp: %s (%s)' + % (Timestamp(srange['meta_timestamp']).isoformat, + srange['meta_timestamp'])) + -def print_obj_metadata(metadata): +def print_obj_metadata(metadata, drop_prefixes=False): """ Print out basic info and metadata from object, as returned from :func:`swift.obj.diskfile.read_metadata`. @@ -302,6 +331,10 @@ Additional metadata is displayed unmodified. :param metadata: dict of object metadata + :param drop_prefixes: if True, strip "X-Object-Meta-", "X-Object-Sysmeta-", + and "X-Object-Transient-Sysmeta-" when displaying + User Metadata, System Metadata, and Transient + System Metadata entries :raises ValueError: """ @@ -341,10 +374,16 @@ for key, value in metadata.items(): if is_user_meta('Object', key): + if drop_prefixes: + key = strip_user_meta_prefix('Object', key) user_metadata[key] = value elif is_sys_meta('Object', key): + if drop_prefixes: + key = strip_sys_meta_prefix('Object', key) sys_metadata[key] = value elif is_object_transient_sysmeta(key): + if drop_prefixes: + key = strip_object_transient_sysmeta_prefix(key) transient_sys_metadata[key] = value else: other_metadata[key] = value @@ -352,8 +391,8 @@ def print_metadata(title, items): print(title) if items: - for meta_key in sorted(items): - print(' %s: %s' % (meta_key, items[meta_key])) + for key, value in sorted(items.items()): + print(' %s: %s' % (key, value)) else: print(' No metadata found') @@ -363,7 +402,8 @@ print_metadata('Other Metadata:', other_metadata) -def print_info(db_type, db_file, swift_dir='/etc/swift', stale_reads_ok=False): +def print_info(db_type, db_file, swift_dir='/etc/swift', stale_reads_ok=False, + drop_prefixes=False): if db_type not in ('account', 'container'): print("Unrecognized DB type: internal error") raise InfoSystemExit() @@ -387,8 +427,14 @@ raise InfoSystemExit() raise account = info['account'] - container = info['container'] if db_type == 'container' else None - print_db_info_metadata(db_type, info, broker.metadata) + container = None + if db_type == 'container': + container = info['container'] + info['is_root'] = broker.is_root_container() + sranges = broker.get_shard_ranges() + if sranges: + info['shard_ranges'] = sranges + print_db_info_metadata(db_type, info, broker.metadata, drop_prefixes) try: ring = Ring(swift_dir, ring_name=db_type) except Exception: @@ -398,7 +444,7 @@ def print_obj(datafile, check_etag=True, swift_dir='/etc/swift', - policy_name=''): + policy_name='', drop_prefixes=False): """ Display information about an object read from the datafile. Optionally verify the datafile content matches the ETag metadata. @@ -409,6 +455,10 @@ metadata. :param swift_dir: the path on disk to rings :param policy_name: optionally the name to use when finding the ring + :param drop_prefixes: if True, strip "X-Object-Meta-", "X-Object-Sysmeta-", + and "X-Object-Transient-Sysmeta-" when displaying + User Metadata, System Metadata, and Transient + System Metadata entries """ if not os.path.exists(datafile): print("Data file doesn't exist") @@ -458,7 +508,7 @@ etag = metadata.pop('ETag', '') length = metadata.pop('Content-Length', '') path = metadata.get('name', '') - print_obj_metadata(metadata) + print_obj_metadata(metadata, drop_prefixes) # Optional integrity check; it's useful, but slow. file_len = None diff -Nru swift-2.17.0/swift/cli/manage_shard_ranges.py swift-2.18.0/swift/cli/manage_shard_ranges.py --- swift-2.17.0/swift/cli/manage_shard_ranges.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/cli/manage_shard_ranges.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,515 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +""" +The ``swift-manage-shard-ranges`` tool provides commands for initiating +sharding of a container. ``swift-manage-shard-ranges`` operates directly on a +container database file. + +.. note:: + + ``swift-manage-shard-ranges`` must only be used on one replica of a + container database to avoid inconsistent results. The modifications made by + ``swift-manage-shard-ranges`` will be automatically copied to other + replicas of the container database via normal replication processes. + +There are three steps in the process of initiating sharding, each of which may +be performed in isolation or, as shown below, using a single command. + +#. The ``find`` sub-command scans the container database to identify how many + shard containers will be required and which objects they will manage. Each + shard container manages a range of the object namespace defined by a + ``lower`` and ``upper`` bound. The maximum number of objects to be allocated + to each shard container is specified on the command line. For example:: + + $ swift-manage-shard-ranges find 500000 + Loaded db broker for AUTH_test/c1. + [ + { + "index": 0, + "lower": "", + "object_count": 500000, + "upper": "o_01086834" + }, + { + "index": 1, + "lower": "o_01086834", + "object_count": 500000, + "upper": "o_01586834" + }, + { + "index": 2, + "lower": "o_01586834", + "object_count": 500000, + "upper": "o_02087570" + }, + { + "index": 3, + "lower": "o_02087570", + "object_count": 500000, + "upper": "o_02587572" + }, + { + "index": 4, + "lower": "o_02587572", + "object_count": 500000, + "upper": "o_03087572" + }, + { + "index": 5, + "lower": "o_03087572", + "object_count": 500000, + "upper": "o_03587572" + }, + { + "index": 6, + "lower": "o_03587572", + "object_count": 349194, + "upper": "" + } + ] + Found 7 ranges in 4.37222s (total object count 3349194) + + This command returns a list of shard ranges each of which describes the + namespace to be managed by a shard container. No other action is taken by + this command and the container database is unchanged. The output may be + redirected to a file for subsequent retrieval by the ``replace`` command. + For example:: + + $ swift-manage-shard-ranges find 500000 > my_shard_ranges + Loaded db broker for AUTH_test/c1. + Found 7 ranges in 2.448s (total object count 3349194) + +#. The ``replace`` sub-command deletes any shard ranges that might already be + in the container database and inserts shard ranges from a given file. The + file contents should be in the format generated by the ``find`` sub-command. + For example:: + + $ swift-manage-shard-ranges replace my_shard_ranges + Loaded db broker for AUTH_test/c1. + No shard ranges found to delete. + Injected 7 shard ranges. + Run container-replicator to replicate them to other nodes. + Use the enable sub-command to enable sharding. + + The container database is modified to store the shard ranges, but the + container will not start sharding until sharding is enabled. The ``info`` + sub-command may be used to inspect the state of the container database at + any point, and the ``show`` sub-command may be used to display the inserted + shard ranges. + + Shard ranges stored in the container database may be replaced using the + ``replace`` sub-command. This will first delete all existing shard ranges + before storing new shard ranges. Shard ranges may also be deleted from the + container database using the ``delete`` sub-command. + + Shard ranges should not be replaced or deleted using + ``swift-manage-shard-ranges`` once the next step of enabling sharding has + been taken. + +#. The ``enable`` sub-command enables the container for sharding. The sharder + daemon and/or container replicator daemon will replicate shard ranges to + other replicas of the container DB and the sharder daemon will proceed to + shard the container. This process may take some time depending on the size + of the container, the number of shard ranges and the underlying hardware. + + .. note:: + + Once the ``enable`` sub-command has been used there is no supported + mechanism to revert sharding. Do not use ``swift-manage-shard-ranges`` + to make any further changes to the shard ranges in the container DB. + + For example:: + + $ swift-manage-shard-ranges enable + Loaded db broker for AUTH_test/c1. + Container moved to state 'sharding' with epoch 1525345093.22908. + Run container-sharder on all nodes to shard the container. + + This does not shard the container - sharding is performed by the + :ref:`sharder_daemon` - but sets the necessary state in the database for the + daemon to subsequently start the sharding process. + + The ``epoch`` value displayed in the output is the time at which sharding + was enabled. When the :ref:`sharder_daemon` starts sharding this container + it creates a new container database file using the epoch in the filename to + distinguish it from the retiring DB that is being sharded. + +All three steps may be performed with one sub-command:: + + $ swift-manage-shard-ranges find_and_replace 500000 --enable \ + > --force + Loaded db broker for AUTH_test/c1. + No shard ranges found to delete. + Injected 7 shard ranges. + Run container-replicator to replicate them to other nodes. + Container moved to state 'sharding' with epoch 1525345669.46153. + Run container-sharder on all nodes to shard the container. + +""" +from __future__ import print_function +import argparse +import json +import sys +import time + +from six.moves import input + +from swift.common.utils import Timestamp, get_logger, ShardRange +from swift.container.backend import ContainerBroker, UNSHARDED +from swift.container.sharder import make_shard_ranges, sharding_enabled, \ + CleavingContext + + +def _load_and_validate_shard_data(args): + try: + with open(args.input, 'rb') as fd: + try: + data = json.load(fd) + if not isinstance(data, list): + raise ValueError('Shard data must be a list of dicts') + for k in ('lower', 'upper', 'index', 'object_count'): + for shard in data: + shard[k] + return data + except (TypeError, ValueError, KeyError) as err: + print('Failed to load valid shard range data: %r' % err, + file=sys.stderr) + exit(2) + except IOError as err: + print('Failed to open file %s: %s' % (args.input, err), + file=sys.stderr) + exit(2) + + +def _check_shard_ranges(own_shard_range, shard_ranges): + reasons = [] + + def reason(x, y): + if x != y: + reasons.append('%s != %s' % (x, y)) + + if not shard_ranges: + reasons.append('No shard ranges.') + else: + reason(own_shard_range.lower, shard_ranges[0].lower) + reason(own_shard_range.upper, shard_ranges[-1].upper) + for x, y in zip(shard_ranges, shard_ranges[1:]): + reason(x.upper, y.lower) + + if reasons: + print('WARNING: invalid shard ranges: %s.' % reasons) + print('Aborting.') + exit(2) + + +def _check_own_shard_range(broker, args): + # TODO: this check is weak - if the shards prefix changes then we may not + # identify a shard container. The goal is to not inadvertently create an + # entire namespace default shard range for a shard container. + is_shard = broker.account.startswith(args.shards_account_prefix) + own_shard_range = broker.get_own_shard_range(no_default=is_shard) + if not own_shard_range: + print('WARNING: shard container missing own shard range.') + print('Aborting.') + exit(2) + return own_shard_range + + +def _find_ranges(broker, args, status_file=None): + start = last_report = time.time() + limit = 5 if status_file else -1 + shard_data, last_found = broker.find_shard_ranges( + args.rows_per_shard, limit=limit) + if shard_data: + while not last_found: + if last_report + 10 < time.time(): + print('Found %d ranges in %gs; looking for more...' % ( + len(shard_data), time.time() - start), file=status_file) + last_report = time.time() + # prefix doesn't matter since we aren't persisting it + found_ranges = make_shard_ranges(broker, shard_data, '.shards_') + more_shard_data, last_found = broker.find_shard_ranges( + args.rows_per_shard, existing_ranges=found_ranges, limit=5) + shard_data.extend(more_shard_data) + return shard_data, time.time() - start + + +def find_ranges(broker, args): + shard_data, delta_t = _find_ranges(broker, args, sys.stderr) + print(json.dumps(shard_data, sort_keys=True, indent=2)) + print('Found %d ranges in %gs (total object count %s)' % + (len(shard_data), delta_t, + sum(r['object_count'] for r in shard_data)), + file=sys.stderr) + return 0 + + +def show_shard_ranges(broker, args): + shard_ranges = broker.get_shard_ranges( + include_deleted=getattr(args, 'include_deleted', False)) + shard_data = [dict(sr, state=sr.state_text) + for sr in shard_ranges] + + if not shard_data: + print("No shard data found.", file=sys.stderr) + elif getattr(args, 'brief', False): + print("Existing shard ranges:", file=sys.stderr) + print(json.dumps([(sd['lower'], sd['upper']) for sd in shard_data], + sort_keys=True, indent=2)) + else: + print("Existing shard ranges:", file=sys.stderr) + print(json.dumps(shard_data, sort_keys=True, indent=2)) + return 0 + + +def db_info(broker, args): + print('Sharding enabled = %s' % sharding_enabled(broker)) + own_sr = broker.get_own_shard_range(no_default=True) + print('Own shard range: %s' % + (json.dumps(dict(own_sr, state=own_sr.state_text), + sort_keys=True, indent=2) + if own_sr else None)) + db_state = broker.get_db_state() + print('db_state = %s' % db_state) + if db_state == 'sharding': + print('Retiring db id: %s' % broker.get_brokers()[0].get_info()['id']) + print('Cleaving context: %s' % + json.dumps(dict(CleavingContext.load(broker)), + sort_keys=True, indent=2)) + print('Metadata:') + for k, (v, t) in broker.metadata.items(): + print(' %s = %s' % (k, v)) + + +def delete_shard_ranges(broker, args): + shard_ranges = broker.get_shard_ranges() + if not shard_ranges: + print("No shard ranges found to delete.") + return 0 + + while not args.force: + print('This will delete existing %d shard ranges.' % len(shard_ranges)) + if broker.get_db_state() != UNSHARDED: + print('WARNING: Be very cautious about deleting existing shard ' + 'ranges. Deleting all ranges in this db does not guarantee ' + 'deletion of all ranges on all replicas of the db.') + print(' - this db is in state %s' % broker.get_db_state()) + print(' - %d existing shard ranges have started sharding' % + [sr.state != ShardRange.FOUND + for sr in shard_ranges].count(True)) + choice = input('Do you want to show the existing ranges [s], ' + 'delete the existing ranges [yes] ' + 'or quit without deleting [q]? ') + if choice == 's': + show_shard_ranges(broker, args) + continue + elif choice == 'q': + return 1 + elif choice == 'yes': + break + else: + print('Please make a valid choice.') + print() + + now = Timestamp.now() + for sr in shard_ranges: + sr.deleted = 1 + sr.timestamp = now + broker.merge_shard_ranges(shard_ranges) + print('Deleted %s existing shard ranges.' % len(shard_ranges)) + return 0 + + +def _replace_shard_ranges(broker, args, shard_data, timeout=None): + own_shard_range = _check_own_shard_range(broker, args) + shard_ranges = make_shard_ranges( + broker, shard_data, args.shards_account_prefix) + _check_shard_ranges(own_shard_range, shard_ranges) + + if args.verbose > 0: + print('New shard ranges to be injected:') + print(json.dumps([dict(sr) for sr in shard_ranges], + sort_keys=True, indent=2)) + + # Crank up the timeout in an effort to *make sure* this succeeds + with broker.updated_timeout(max(timeout, args.replace_timeout)): + delete_shard_ranges(broker, args) + broker.merge_shard_ranges(shard_ranges) + + print('Injected %d shard ranges.' % len(shard_ranges)) + print('Run container-replicator to replicate them to other nodes.') + if args.enable: + return enable_sharding(broker, args) + else: + print('Use the enable sub-command to enable sharding.') + return 0 + + +def replace_shard_ranges(broker, args): + shard_data = _load_and_validate_shard_data(args) + return _replace_shard_ranges(broker, args, shard_data) + + +def find_replace_shard_ranges(broker, args): + shard_data, delta_t = _find_ranges(broker, args, sys.stdout) + # Since we're trying to one-shot this, and the previous step probably + # took a while, make the timeout for writing *at least* that long + return _replace_shard_ranges(broker, args, shard_data, timeout=delta_t) + + +def _enable_sharding(broker, own_shard_range, args): + if own_shard_range.update_state(ShardRange.SHARDING): + own_shard_range.epoch = Timestamp.now() + own_shard_range.state_timestamp = own_shard_range.epoch + + with broker.updated_timeout(args.enable_timeout): + broker.merge_shard_ranges([own_shard_range]) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('True', Timestamp.now().normal)}) + return own_shard_range + + +def enable_sharding(broker, args): + own_shard_range = _check_own_shard_range(broker, args) + _check_shard_ranges(own_shard_range, broker.get_shard_ranges()) + + if own_shard_range.state == ShardRange.ACTIVE: + own_shard_range = _enable_sharding(broker, own_shard_range, args) + print('Container moved to state %r with epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + elif own_shard_range.state == ShardRange.SHARDING: + if own_shard_range.epoch: + print('Container already in state %r with epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + print('No action required.') + else: + print('Container already in state %r but missing epoch.' % + own_shard_range.state_text) + own_shard_range = _enable_sharding(broker, own_shard_range, args) + print('Container in state %r given epoch %s.' % + (own_shard_range.state_text, own_shard_range.epoch.internal)) + else: + print('WARNING: container in state %s (should be active or sharding).' + % own_shard_range.state_text) + print('Aborting.') + return 2 + + print('Run container-sharder on all nodes to shard the container.') + return 0 + + +def _add_find_args(parser): + parser.add_argument('rows_per_shard', nargs='?', type=int, default=500000) + + +def _add_replace_args(parser): + parser.add_argument( + '--shards_account_prefix', metavar='shards_account_prefix', type=str, + required=False, help='Prefix for shards account', default='.shards_') + parser.add_argument( + '--replace-timeout', type=int, default=600, + help='Minimum DB timeout to use when replacing shard ranges.') + parser.add_argument( + '--force', '-f', action='store_true', default=False, + help='Delete existing shard ranges; no questions asked.') + parser.add_argument( + '--enable', action='store_true', default=False, + help='Enable sharding after adding shard ranges.') + + +def _add_enable_args(parser): + parser.add_argument( + '--enable-timeout', type=int, default=300, + help='DB timeout to use when enabling sharding.') + + +def _make_parser(): + parser = argparse.ArgumentParser(description='Manage shard ranges') + parser.add_argument('container_db') + parser.add_argument('--verbose', '-v', action='count', + help='Increase output verbosity') + subparsers = parser.add_subparsers( + help='Sub-command help', title='Sub-commands') + + # find + find_parser = subparsers.add_parser( + 'find', help='Find and display shard ranges') + _add_find_args(find_parser) + find_parser.set_defaults(func=find_ranges) + + # delete + delete_parser = subparsers.add_parser( + 'delete', help='Delete all existing shard ranges from db') + delete_parser.add_argument( + '--force', '-f', action='store_true', default=False, + help='Delete existing shard ranges; no questions asked.') + delete_parser.set_defaults(func=delete_shard_ranges) + + # show + show_parser = subparsers.add_parser( + 'show', help='Print shard range data') + show_parser.add_argument( + '--include_deleted', '-d', action='store_true', default=False, + help='Include deleted shard ranges in output.') + show_parser.add_argument( + '--brief', '-b', action='store_true', default=False, + help='Show only shard range bounds in output.') + show_parser.set_defaults(func=show_shard_ranges) + + # info + info_parser = subparsers.add_parser( + 'info', help='Print container db info') + info_parser.set_defaults(func=db_info) + + # replace + replace_parser = subparsers.add_parser( + 'replace', + help='Replace existing shard ranges. User will be prompted before ' + 'deleting any existing shard ranges.') + replace_parser.add_argument('input', metavar='input_file', + type=str, help='Name of file') + _add_replace_args(replace_parser) + replace_parser.set_defaults(func=replace_shard_ranges) + + # find_and_replace + find_replace_parser = subparsers.add_parser( + 'find_and_replace', + help='Find new shard ranges and replace existing shard ranges. ' + 'User will be prompted before deleting any existing shard ranges.' + ) + _add_find_args(find_replace_parser) + _add_replace_args(find_replace_parser) + _add_enable_args(find_replace_parser) + find_replace_parser.set_defaults(func=find_replace_shard_ranges) + + # enable + enable_parser = subparsers.add_parser( + 'enable', help='Enable sharding and move db to sharding state.') + _add_enable_args(enable_parser) + enable_parser.set_defaults(func=enable_sharding) + _add_replace_args(enable_parser) + return parser + + +def main(args=None): + parser = _make_parser() + args = parser.parse_args(args) + logger = get_logger({}, name='ContainerBroker', log_to_console=True) + broker = ContainerBroker(args.container_db, logger=logger, + skip_commits=True) + broker.get_info() + print('Loaded db broker for %s.' % broker.path, file=sys.stderr) + return args.func(broker, args) + + +if __name__ == '__main__': + exit(main()) diff -Nru swift-2.17.0/swift/cli/recon.py swift-2.18.0/swift/cli/recon.py --- swift-2.17.0/swift/cli/recon.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/cli/recon.py 2018-05-30 10:17:02.000000000 +0000 @@ -87,6 +87,8 @@ url = base_url + recon_type try: body = urllib2.urlopen(url, timeout=self.timeout).read() + if six.PY3 and isinstance(body, six.binary_type): + body = body.decode('utf8') content = json.loads(body) if self.verbose: print("-> %s: %s" % (url, content)) @@ -129,7 +131,7 @@ req = urllib2.Request(url) req.get_method = lambda: 'OPTIONS' conn = urllib2.urlopen(req) - header = conn.info().getheader('Server') + header = conn.info().get('Server') server_header = header.split('/') content = server_header[0] status = 200 diff -Nru swift-2.17.0/swift/cli/ringbuilder.py swift-2.18.0/swift/cli/ringbuilder.py --- swift-2.17.0/swift/cli/ringbuilder.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/cli/ringbuilder.py 2018-05-30 10:17:02.000000000 +0000 @@ -1067,7 +1067,7 @@ print('Worst tier is %.06f (%s)' % (report['max_dispersion'], report['worst_tier'])) if report['graph']: - replica_range = range(int(math.ceil(builder.replicas + 1))) + replica_range = list(range(int(math.ceil(builder.replicas + 1)))) part_count_width = '%%%ds' % max(len(str(builder.parts)), 5) replica_counts_tmpl = ' '.join(part_count_width for i in replica_range) diff -Nru swift-2.17.0/swift/cli/shard-info.py swift-2.18.0/swift/cli/shard-info.py --- swift-2.17.0/swift/cli/shard-info.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/cli/shard-info.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,195 @@ +# Copyright (c) 2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from collections import defaultdict + +from swift.common import utils +from swift.common.db_replicator import roundrobin_datadirs +from swift.common.ring import ring +from swift.common.utils import Timestamp +from swift.container.backend import ContainerBroker, DATADIR + +TAB = ' ' + + +def broker_key(broker): + broker.get_info() + return broker.path + + +def container_type(broker): + return 'ROOT' if broker.is_root_container() else 'SHARD' + + +def collect_brokers(conf_path, names2nodes): + conf = utils.readconf(conf_path, 'container-replicator') + root = conf.get('devices', '/srv/node') + swift_dir = conf.get('swift_dir', '/etc/swift') + c_ring = ring.Ring(swift_dir, ring_name='container') + dirs = [] + brokers = defaultdict(dict) + for node in c_ring.devs: + if node is None: + continue + datadir = os.path.join(root, node['device'], DATADIR) + if os.path.isdir(datadir): + dirs.append((datadir, node['id'], lambda *args: True)) + for part, object_file, node_id in roundrobin_datadirs(dirs): + broker = ContainerBroker(object_file) + for node in c_ring.get_part_nodes(int(part)): + if node['id'] == node_id: + node_index = str(node['index']) + break + else: + node_index = 'handoff' + names2nodes[broker_key(broker)][(node_id, node_index)] = broker + return brokers + + +def print_broker_info(node, broker, indent_level=0): + indent = indent_level * TAB + info = broker.get_info() + raw_info = broker._get_info() + deleted_at = float(info['delete_timestamp']) + if deleted_at: + deleted_at = Timestamp(info['delete_timestamp']).isoformat + else: + deleted_at = ' - ' + print('%s(%s) %s, objs: %s, bytes: %s, actual_objs: %s, put: %s, ' + 'deleted: %s' % + (indent, node[1][0], broker.get_db_state(), + info['object_count'], info['bytes_used'], raw_info['object_count'], + Timestamp(info['put_timestamp']).isoformat, deleted_at)) + + +def print_db(node, broker, expect_type='ROOT', indent_level=0): + indent = indent_level * TAB + print('%s(%s) %s node id: %s, node index: %s' % + (indent, node[1][0], broker.db_file, node[0], node[1])) + actual_type = container_type(broker) + if actual_type != expect_type: + print('%s ERROR expected %s but found %s' % + (indent, expect_type, actual_type)) + + +def print_own_shard_range(node, sr, indent_level): + indent = indent_level * TAB + range = '%r - %r' % (sr.lower, sr.upper) + print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), ' + 'modified: %s (%s), %7s: %s (%s), deleted: %s epoch: %s' % + (indent, node[1][0], range, sr.object_count, sr.bytes_used, + sr.timestamp.isoformat, sr.timestamp.internal, + sr.meta_timestamp.isoformat, sr.meta_timestamp.internal, + sr.state_text, sr.state_timestamp.isoformat, + sr.state_timestamp.internal, sr.deleted, + sr.epoch.internal if sr.epoch else None)) + + +def print_own_shard_range_info(node, shard_ranges, indent_level=0): + shard_ranges.sort(key=lambda x: x.deleted) + for sr in shard_ranges: + print_own_shard_range(node, sr, indent_level) + + +def print_shard_range(node, sr, indent_level): + indent = indent_level * TAB + range = '%r - %r' % (sr.lower, sr.upper) + print('%s(%s) %23s, objs: %3s, bytes: %3s, timestamp: %s (%s), ' + 'modified: %s (%s), %7s: %s (%s), deleted: %s %s' % + (indent, node[1][0], range, sr.object_count, sr.bytes_used, + sr.timestamp.isoformat, sr.timestamp.internal, + sr.meta_timestamp.isoformat, sr.meta_timestamp.internal, + sr.state_text, sr.state_timestamp.isoformat, + sr.state_timestamp.internal, sr.deleted, sr.name)) + + +def print_shard_range_info(node, shard_ranges, indent_level=0): + shard_ranges.sort(key=lambda x: x.deleted) + for sr in shard_ranges: + print_shard_range(node, sr, indent_level) + + +def print_sharding_info(node, broker, indent_level=0): + indent = indent_level * TAB + print('%s(%s) %s' % (indent, node[1][0], broker.get_sharding_sysmeta())) + + +def print_container(name, name2nodes2brokers, expect_type='ROOT', + indent_level=0, used_names=None): + used_names = used_names or set() + indent = indent_level * TAB + node2broker = name2nodes2brokers[name] + ordered_by_index = sorted(node2broker.keys(), key=lambda x: x[1]) + brokers = [(node, node2broker[node]) for node in ordered_by_index] + + print('%sName: %s' % (indent, name)) + if name in used_names: + print('%s (Details already listed)\n' % indent) + return + + used_names.add(name) + print(indent + 'DB files:') + for node, broker in brokers: + print_db(node, broker, expect_type, indent_level=indent_level + 1) + + print(indent + 'Info:') + for node, broker in brokers: + print_broker_info(node, broker, indent_level=indent_level + 1) + + print(indent + 'Sharding info:') + for node, broker in brokers: + print_sharding_info(node, broker, indent_level=indent_level + 1) + print(indent + 'Own shard range:') + for node, broker in brokers: + shard_ranges = broker.get_shard_ranges( + include_deleted=True, include_own=True, exclude_others=True) + print_own_shard_range_info(node, shard_ranges, + indent_level=indent_level + 1) + print(indent + 'Shard ranges:') + shard_names = set() + for node, broker in brokers: + shard_ranges = broker.get_shard_ranges(include_deleted=True) + for sr_name in shard_ranges: + shard_names.add(sr_name.name) + print_shard_range_info(node, shard_ranges, + indent_level=indent_level + 1) + print(indent + 'Shards:') + for sr_name in shard_names: + print_container(sr_name, name2nodes2brokers, expect_type='SHARD', + indent_level=indent_level + 1, used_names=used_names) + print('\n') + + +def run(conf_paths): + # container_name -> (node id, node index) -> broker + name2nodes2brokers = defaultdict(dict) + for conf_path in conf_paths: + collect_brokers(conf_path, name2nodes2brokers) + + print('First column on each line is (node index)\n') + for name, node2broker in name2nodes2brokers.items(): + expect_root = False + for node, broker in node2broker.items(): + expect_root = broker.is_root_container() or expect_root + if expect_root: + print_container(name, name2nodes2brokers) + + +if __name__ == '__main__': + conf_dir = '/etc/swift/container-server' + conf_paths = [os.path.join(conf_dir, p) for p in os.listdir(conf_dir) + if p.endswith(('conf', 'conf.d'))] + run(conf_paths) diff -Nru swift-2.17.0/swift/common/daemon.py swift-2.18.0/swift/common/daemon.py --- swift-2.17.0/swift/common/daemon.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/daemon.py 2018-05-30 10:17:02.000000000 +0000 @@ -64,6 +64,16 @@ else: self.run_forever(**kwargs) + def post_multiprocess_run(self): + """ + Override this to do something after running using multiple worker + processes. This method is called in the parent process. + + This is probably only useful for run-once mode since there is no + "after running" in run-forever mode. + """ + pass + def get_worker_args(self, once=False, **kwargs): """ For each worker yield a (possibly empty) dict of kwargs to pass along @@ -173,7 +183,7 @@ yield per_worker_options def spawned_pids(self): - return self.options_by_pid.keys() + return list(self.options_by_pid.keys()) def register_worker_start(self, pid, per_worker_options): self.logger.debug('Spawned worker %s with %r', pid, per_worker_options) @@ -229,6 +239,7 @@ self.logger.notice('Finished %s', os.getpid()) break time.sleep(0.1) + self.daemon.post_multiprocess_run() return 0 def cleanup(self): diff -Nru swift-2.17.0/swift/common/db.py swift-2.18.0/swift/common/db.py --- swift-2.17.0/swift/common/db.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/db.py 2018-05-30 10:17:02.000000000 +0000 @@ -56,12 +56,31 @@ for s in args] -def utf8encodekeys(metadata): - uni_keys = [k for k in metadata if isinstance(k, six.text_type)] - for k in uni_keys: - sv = metadata[k] - del metadata[k] - metadata[k.encode('utf-8')] = sv +def native_str_keys(metadata): + if six.PY2: + uni_keys = [k for k in metadata if isinstance(k, six.text_type)] + for k in uni_keys: + sv = metadata[k] + del metadata[k] + metadata[k.encode('utf-8')] = sv + else: + bin_keys = [k for k in metadata if isinstance(k, six.binary_type)] + for k in bin_keys: + sv = metadata[k] + del metadata[k] + metadata[k.decode('utf-8')] = sv + + +ZERO_LIKE_VALUES = {None, '', 0, '0'} + + +def zero_like(count): + """ + We've cargo culted our consumers to be tolerant of various expressions of + zero in our databases for backwards compatibility with less disciplined + producers. + """ + return count in ZERO_LIKE_VALUES def _db_timeout(timeout, db_file, call): @@ -201,11 +220,27 @@ def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None, account=None, container=None, pending_timeout=None, - stale_reads_ok=False): - """Encapsulates working with a database.""" + stale_reads_ok=False, skip_commits=False): + """Encapsulates working with a database. + + :param db_file: path to a database file. + :param timeout: timeout used for database operations. + :param logger: a logger instance. + :param account: name of account. + :param container: name of container. + :param pending_timeout: timeout used when attempting to take a lock to + write to pending file. + :param stale_reads_ok: if True then no error is raised if pending + commits cannot be committed before the database is read, otherwise + an error is raised. + :param skip_commits: if True then this broker instance will never + commit records from the pending file to the database; + :meth:`~swift.common.db.DatabaseBroker.put_record` should not + called on brokers with skip_commits True. + """ self.conn = None - self.db_file = db_file - self.pending_file = self.db_file + '.pending' + self._db_file = db_file + self.pending_file = self._db_file + '.pending' self.pending_timeout = pending_timeout or 10 self.stale_reads_ok = stale_reads_ok self.db_dir = os.path.dirname(db_file) @@ -214,6 +249,7 @@ self.account = account self.container = container self._db_version = -1 + self.skip_commits = skip_commits def __str__(self): """ @@ -233,9 +269,9 @@ :param put_timestamp: internalized timestamp of initial PUT request :param storage_policy_index: only required for containers """ - if self.db_file == ':memory:': + if self._db_file == ':memory:': tmp_db_file = None - conn = get_db_connection(self.db_file, self.timeout) + conn = get_db_connection(self._db_file, self.timeout) else: mkdirs(self.db_dir) fd, tmp_db_file = mkstemp(suffix='.tmp', dir=self.db_dir) @@ -322,29 +358,22 @@ self._delete_db(conn, timestamp) conn.commit() - def possibly_quarantine(self, exc_type, exc_value, exc_traceback): + @property + def db_file(self): + return self._db_file + + def get_device_path(self): + suffix_path = os.path.dirname(self.db_dir) + partition_path = os.path.dirname(suffix_path) + dbs_path = os.path.dirname(partition_path) + return os.path.dirname(dbs_path) + + def quarantine(self, reason): """ - Checks the exception info to see if it indicates a quarantine situation - (malformed or corrupted database). If not, the original exception will - be reraised. If so, the database will be quarantined and a new + The database will be quarantined and a sqlite3.DatabaseError will be raised indicating the action taken. """ - if 'database disk image is malformed' in str(exc_value): - exc_hint = 'malformed' - elif 'malformed database schema' in str(exc_value): - exc_hint = 'malformed' - elif ' is not a database' in str(exc_value): - # older versions said 'file is not a database' - # now 'file is encrypted or is not a database' - exc_hint = 'corrupted' - elif 'disk I/O error' in str(exc_value): - exc_hint = 'disk error while accessing' - else: - six.reraise(exc_type, exc_value, exc_traceback) - prefix_path = os.path.dirname(self.db_dir) - partition_path = os.path.dirname(prefix_path) - dbs_path = os.path.dirname(partition_path) - device_path = os.path.dirname(dbs_path) + device_path = self.get_device_path() quar_path = os.path.join(device_path, 'quarantined', self.db_type + 's', os.path.basename(self.db_dir)) @@ -356,12 +385,56 @@ quar_path = "%s-%s" % (quar_path, uuid4().hex) renamer(self.db_dir, quar_path, fsync=False) detail = _('Quarantined %(db_dir)s to %(quar_path)s due to ' - '%(exc_hint)s database') % {'db_dir': self.db_dir, - 'quar_path': quar_path, - 'exc_hint': exc_hint} + '%(reason)s') % {'db_dir': self.db_dir, + 'quar_path': quar_path, + 'reason': reason} self.logger.error(detail) raise sqlite3.DatabaseError(detail) + def possibly_quarantine(self, exc_type, exc_value, exc_traceback): + """ + Checks the exception info to see if it indicates a quarantine situation + (malformed or corrupted database). If not, the original exception will + be reraised. If so, the database will be quarantined and a new + sqlite3.DatabaseError will be raised indicating the action taken. + """ + if 'database disk image is malformed' in str(exc_value): + exc_hint = 'malformed database' + elif 'malformed database schema' in str(exc_value): + exc_hint = 'malformed database' + elif ' is not a database' in str(exc_value): + # older versions said 'file is not a database' + # now 'file is encrypted or is not a database' + exc_hint = 'corrupted database' + elif 'disk I/O error' in str(exc_value): + exc_hint = 'disk error while accessing database' + else: + six.reraise(exc_type, exc_value, exc_traceback) + + self.quarantine(exc_hint) + + @contextmanager + def updated_timeout(self, new_timeout): + """Use with "with" statement; updates ``timeout`` within the block.""" + old_timeout = self.timeout + try: + self.timeout = new_timeout + if self.conn: + self.conn.timeout = new_timeout + yield old_timeout + finally: + self.timeout = old_timeout + if self.conn: + self.conn.timeout = old_timeout + + @contextmanager + def maybe_get(self, conn): + if conn: + yield conn + else: + with self.get() as conn: + yield conn + @contextmanager def get(self): """Use with the "with" statement; returns a database connection.""" @@ -462,6 +535,23 @@ with self.get() as conn: return self._is_deleted(conn) + def empty(self): + """ + Check if the broker abstraction contains any undeleted records. + """ + raise NotImplementedError() + + def is_reclaimable(self, now, reclaim_age): + """ + Check if the broker abstraction is empty, and has been marked deleted + for at least a reclaim age. + """ + info = self.get_replication_info() + return (zero_like(info['count']) and + (Timestamp(now - reclaim_age) > + Timestamp(info['delete_timestamp']) > + Timestamp(info['put_timestamp']))) + def merge_timestamps(self, created_at, put_timestamp, delete_timestamp): """ Used in replication to handle updating timestamps. @@ -533,13 +623,15 @@ result.append({'remote_id': row[0], 'sync_point': row[1]}) return result - def get_max_row(self): + def get_max_row(self, table=None): + if not table: + table = self.db_contains_type query = ''' SELECT SQLITE_SEQUENCE.seq FROM SQLITE_SEQUENCE WHERE SQLITE_SEQUENCE.name == '%s' LIMIT 1 - ''' % (self.db_contains_type) + ''' % (table, ) with self.get() as conn: row = conn.execute(query).fetchone() return row[0] if row else -1 @@ -567,11 +659,26 @@ return curs.fetchone() def put_record(self, record): - if self.db_file == ':memory:': + """ + Put a record into the DB. If the DB has an associated pending file with + space then the record is appended to that file and a commit to the DB + is deferred. If the DB is in-memory or its pending file is full then + the record will be committed immediately. + + :param record: a record to be added to the DB. + :raises DatabaseConnectionError: if the DB file does not exist or if + ``skip_commits`` is True. + :raises LockTimeout: if a timeout occurs while waiting to take a lock + to write to the pending file. + """ + if self._db_file == ':memory:': self.merge_items([record]) return if not os.path.exists(self.db_file): raise DatabaseConnectionError(self.db_file, "DB doesn't exist") + if self.skip_commits: + raise DatabaseConnectionError(self.db_file, + 'commits not accepted') with lock_parent_directory(self.pending_file, self.pending_timeout): pending_size = 0 try: @@ -591,6 +698,10 @@ protocol=PICKLE_PROTOCOL).encode('base64')) fp.flush() + def _skip_commit_puts(self): + return (self._db_file == ':memory:' or self.skip_commits or not + os.path.exists(self.pending_file)) + def _commit_puts(self, item_list=None): """ Scan for .pending files and commit the found records by feeding them @@ -599,7 +710,13 @@ :param item_list: A list of items to commit in addition to .pending """ - if self.db_file == ':memory:' or not os.path.exists(self.pending_file): + if self._skip_commit_puts(): + if item_list: + # this broker instance should not be used to commit records, + # but if it is then raise an error rather than quietly + # discarding the records in item_list. + raise DatabaseConnectionError(self.db_file, + 'commits not accepted') return if item_list is None: item_list = [] @@ -630,7 +747,7 @@ Catch failures of _commit_puts() if broker is intended for reading of stats, and thus does not care for pending updates. """ - if self.db_file == ':memory:' or not os.path.exists(self.pending_file): + if self._skip_commit_puts(): return try: with lock_parent_directory(self.pending_file, @@ -648,6 +765,12 @@ """ raise NotImplementedError + def merge_items(self, item_list, source=None): + """ + Save :param:item_list to the database. + """ + raise NotImplementedError + def make_tuple_for_pickle(self, record): """ Turn this db record dict into the format this service uses for @@ -686,7 +809,7 @@ within 512k of a boundary, it allocates to the next boundary. Boundaries are 2m, 5m, 10m, 25m, 50m, then every 50m after. """ - if not DB_PREALLOCATION or self.db_file == ':memory:': + if not DB_PREALLOCATION or self._db_file == ':memory:': return MB = (1024 * 1024) @@ -711,8 +834,12 @@ def get_raw_metadata(self): with self.get() as conn: try: - metadata = conn.execute('SELECT metadata FROM %s_stat' % - self.db_type).fetchone()[0] + row = conn.execute('SELECT metadata FROM %s_stat' % + self.db_type).fetchone() + if not row: + self.quarantine("missing row in %s_stat table" % + self.db_type) + metadata = row[0] except sqlite3.OperationalError as err: if 'no such column: metadata' not in str(err): raise @@ -729,7 +856,7 @@ metadata = self.get_raw_metadata() if metadata: metadata = json.loads(metadata) - utf8encodekeys(metadata) + native_str_keys(metadata) else: metadata = {} return metadata @@ -784,10 +911,14 @@ return with self.get() as conn: try: - md = conn.execute('SELECT metadata FROM %s_stat' % - self.db_type).fetchone()[0] + row = conn.execute('SELECT metadata FROM %s_stat' % + self.db_type).fetchone() + if not row: + self.quarantine("missing row in %s_stat table" % + self.db_type) + md = row[0] md = json.loads(md) if md else {} - utf8encodekeys(md) + native_str_keys(md) except sqlite3.OperationalError as err: if 'no such column: metadata' not in str(err): raise @@ -807,40 +938,46 @@ def reclaim(self, age_timestamp, sync_timestamp): """ - Delete rows from the db_contains_type table that are marked deleted - and whose created_at timestamp is < age_timestamp. Also deletes rows - from incoming_sync and outgoing_sync where the updated_at timestamp is - < sync_timestamp. + Delete reclaimable rows and metadata from the db. - In addition, this calls the DatabaseBroker's :func:`_reclaim` method. + By default this method will delete rows from the db_contains_type table + that are marked deleted and whose created_at timestamp is < + age_timestamp, and deletes rows from incoming_sync and outgoing_sync + where the updated_at timestamp is < sync_timestamp. In addition, this + calls the :meth:`_reclaim_metadata` method. + + Subclasses may reclaim other items by overriding :meth:`_reclaim`. :param age_timestamp: max created_at timestamp of object rows to delete :param sync_timestamp: max update_at timestamp of sync rows to delete """ - if self.db_file != ':memory:' and os.path.exists(self.pending_file): + if not self._skip_commit_puts(): with lock_parent_directory(self.pending_file, self.pending_timeout): self._commit_puts() with self.get() as conn: - conn.execute(''' - DELETE FROM %s WHERE deleted = 1 AND %s < ? - ''' % (self.db_contains_type, self.db_reclaim_timestamp), - (age_timestamp,)) - try: - conn.execute(''' - DELETE FROM outgoing_sync WHERE updated_at < ? - ''', (sync_timestamp,)) - conn.execute(''' - DELETE FROM incoming_sync WHERE updated_at < ? - ''', (sync_timestamp,)) - except sqlite3.OperationalError as err: - # Old dbs didn't have updated_at in the _sync tables. - if 'no such column: updated_at' not in str(err): - raise - DatabaseBroker._reclaim(self, conn, age_timestamp) + self._reclaim(conn, age_timestamp, sync_timestamp) + self._reclaim_metadata(conn, age_timestamp) conn.commit() - def _reclaim(self, conn, timestamp): + def _reclaim(self, conn, age_timestamp, sync_timestamp): + conn.execute(''' + DELETE FROM %s WHERE deleted = 1 AND %s < ? + ''' % (self.db_contains_type, self.db_reclaim_timestamp), + (age_timestamp,)) + try: + conn.execute(''' + DELETE FROM outgoing_sync WHERE updated_at < ? + ''', (sync_timestamp,)) + conn.execute(''' + DELETE FROM incoming_sync WHERE updated_at < ? + ''', (sync_timestamp,)) + except sqlite3.OperationalError as err: + # Old dbs didn't have updated_at in the _sync tables. + if 'no such column: updated_at' not in str(err): + raise + + def _reclaim_metadata(self, conn, timestamp): """ Removes any empty metadata values older than the timestamp using the given database connection. This function will not call commit on the @@ -854,8 +991,12 @@ :returns: True if conn.commit() should be called """ try: - md = conn.execute('SELECT metadata FROM %s_stat' % - self.db_type).fetchone()[0] + row = conn.execute('SELECT metadata FROM %s_stat' % + self.db_type).fetchone() + if not row: + self.quarantine("missing row in %s_stat table" % + self.db_type) + md = row[0] if md: md = json.loads(md) keys_to_delete = [] diff -Nru swift-2.17.0/swift/common/db_replicator.py swift-2.18.0/swift/common/db_replicator.py --- swift-2.17.0/swift/common/db_replicator.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/db_replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -33,10 +33,12 @@ from swift.common.utils import get_logger, whataremyips, storage_directory, \ renamer, mkdirs, lock_parent_directory, config_true_value, \ unlink_older_than, dump_recon_cache, rsync_module_interpolation, \ - json, Timestamp + json, parse_override_options, round_robin_iter, Everything, get_db_files, \ + parse_db_filename from swift.common import ring from swift.common.ring.utils import is_local_device -from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE +from swift.common.http import HTTP_NOT_FOUND, HTTP_INSUFFICIENT_STORAGE, \ + is_success from swift.common.bufferedhttp import BufferedHTTPConnection from swift.common.exceptions import DriveNotMounted from swift.common.daemon import Daemon @@ -69,6 +71,17 @@ renamer(object_dir, quarantine_dir, fsync=False) +def looks_like_partition(dir_name): + """ + True if the directory name is a valid partition number, False otherwise. + """ + try: + part = int(dir_name) + return part >= 0 + except ValueError: + return False + + def roundrobin_datadirs(datadirs): """ Generator to walk the data dirs in a round robin manner, evenly @@ -76,12 +89,16 @@ found (in their proper places). The partitions within each data dir are walked randomly, however. - :param datadirs: a list of (path, node_id) to walk - :returns: A generator of (partition, path_to_db_file, node_id) + :param datadirs: a list of tuples of (path, context, partition_filter) to + walk. The context may be any object; the context is not + used by this function but is included with each yielded + tuple. + :returns: A generator of (partition, path_to_db_file, context) """ - def walk_datadir(datadir, node_id): - partitions = os.listdir(datadir) + def walk_datadir(datadir, context, part_filter): + partitions = [pd for pd in os.listdir(datadir) + if looks_like_partition(pd) and part_filter(pd)] random.shuffle(partitions) for partition in partitions: part_dir = os.path.join(datadir, partition) @@ -104,22 +121,27 @@ if not os.path.isdir(hash_dir): continue object_file = os.path.join(hash_dir, hsh + '.db') + # common case if os.path.exists(object_file): - yield (partition, object_file, node_id) - else: - try: - os.rmdir(hash_dir) - except OSError as e: - if e.errno is not errno.ENOTEMPTY: - raise - - its = [walk_datadir(datadir, node_id) for datadir, node_id in datadirs] - while its: - for it in its: - try: - yield next(it) - except StopIteration: - its.remove(it) + yield (partition, object_file, context) + continue + # look for any alternate db filenames + db_files = get_db_files(object_file) + if db_files: + yield (partition, db_files[-1], context) + continue + try: + os.rmdir(hash_dir) + except OSError as e: + if e.errno != errno.ENOTEMPTY: + raise + + its = [walk_datadir(datadir, context, filt) + for datadir, context, filt in datadirs] + + rr_its = round_robin_iter(its) + for datadir in rr_its: + yield datadir class ReplConnection(BufferedHTTPConnection): @@ -194,13 +216,14 @@ self.recon_replicator) self.extract_device_re = re.compile('%s%s([^%s]+)' % ( self.root, os.path.sep, os.path.sep)) + self.handoffs_only = config_true_value(conf.get('handoffs_only', 'no')) def _zero_stats(self): """Zero out the stats.""" self.stats = {'attempted': 0, 'success': 0, 'failure': 0, 'ts_repl': 0, 'no_change': 0, 'hashmatch': 0, 'rsync': 0, 'diff': 0, 'remove': 0, 'empty': 0, 'remote_merge': 0, - 'start': time.time(), 'diff_capped': 0, + 'start': time.time(), 'diff_capped': 0, 'deferred': 0, 'failure_nodes': {}} def _report_stats(self): @@ -297,9 +320,20 @@ different_region=different_region): return False with Timeout(replicate_timeout or self.node_timeout): - response = http.replicate(replicate_method, local_id) + response = http.replicate(replicate_method, local_id, + os.path.basename(broker.db_file)) return response and 200 <= response.status < 300 + def _send_replicate_request(self, http, *repl_args): + with Timeout(self.node_timeout): + response = http.replicate(*repl_args) + if not response or not is_success(response.status): + if response: + self.logger.error('ERROR Bad response %s from %s', + response.status, http.host) + return False + return True + def _usync_db(self, point, broker, http, remote_id, local_id): """ Sync a db by sending all records since the last sync. @@ -314,26 +348,29 @@ """ self.stats['diff'] += 1 self.logger.increment('diffs') - self.logger.debug('Syncing chunks with %s, starting at %s', - http.host, point) + self.logger.debug('%s usyncing chunks to %s, starting at row %s', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % http.node, + point) + start = time.time() sync_table = broker.get_syncs() objects = broker.get_items_since(point, self.per_diff) diffs = 0 while len(objects) and diffs < self.max_diffs: diffs += 1 - with Timeout(self.node_timeout): - response = http.replicate('merge_items', objects, local_id) - if not response or response.status >= 300 or response.status < 200: - if response: - self.logger.error(_('ERROR Bad response %(status)s from ' - '%(host)s'), - {'status': response.status, - 'host': http.host}) + if not self._send_replicate_request( + http, 'merge_items', objects, local_id): return False # replication relies on db order to send the next merge batch in # order with no gaps point = objects[-1]['ROWID'] objects = broker.get_items_since(point, self.per_diff) + + self.logger.debug('%s usyncing chunks to %s, finished at row %s (%gs)', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % http.node, + point, time.time() - start) + if objects: self.logger.debug( 'Synchronization for %s has fallen more than ' @@ -385,9 +422,8 @@ :returns: ReplConnection object """ - return ReplConnection(node, partition, - os.path.basename(db_file).split('.', 1)[0], - self.logger) + hsh, other, ext = parse_db_filename(db_file) + return ReplConnection(node, partition, hsh, self.logger) def _gather_sync_args(self, info): """ @@ -434,33 +470,82 @@ elif 200 <= response.status < 300: rinfo = json.loads(response.data) local_sync = broker.get_sync(rinfo['id'], incoming=False) + if rinfo.get('metadata', ''): + broker.update_metadata(json.loads(rinfo['metadata'])) if self._in_sync(rinfo, info, broker, local_sync): + self.logger.debug('%s in sync with %s, nothing to do', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) return True - # if the difference in rowids between the two differs by - # more than 50% and the difference is greater than per_diff, - # rsync then do a remote merge. - # NOTE: difference > per_diff stops us from dropping to rsync - # on smaller containers, who have only a few rows to sync. - if rinfo['max_row'] / float(info['max_row']) < 0.5 and \ - info['max_row'] - rinfo['max_row'] > self.per_diff: - self.stats['remote_merge'] += 1 - self.logger.increment('remote_merges') - return self._rsync_db(broker, node, http, info['id'], - replicate_method='rsync_then_merge', - replicate_timeout=(info['count'] / 2000), - different_region=different_region) - # else send diffs over to the remote server - return self._usync_db(max(rinfo['point'], local_sync), - broker, http, rinfo['id'], info['id']) + return self._choose_replication_mode( + node, rinfo, info, local_sync, broker, http, + different_region) + return False + + def _choose_replication_mode(self, node, rinfo, info, local_sync, broker, + http, different_region): + # if the difference in rowids between the two differs by + # more than 50% and the difference is greater than per_diff, + # rsync then do a remote merge. + # NOTE: difference > per_diff stops us from dropping to rsync + # on smaller containers, who have only a few rows to sync. + if (rinfo['max_row'] / float(info['max_row']) < 0.5 and + info['max_row'] - rinfo['max_row'] > self.per_diff): + self.stats['remote_merge'] += 1 + self.logger.increment('remote_merges') + return self._rsync_db(broker, node, http, info['id'], + replicate_method='rsync_then_merge', + replicate_timeout=(info['count'] / 2000), + different_region=different_region) + # else send diffs over to the remote server + return self._usync_db(max(rinfo['point'], local_sync), + broker, http, rinfo['id'], info['id']) def _post_replicate_hook(self, broker, info, responses): """ - :param broker: the container that just replicated + :param broker: broker instance for the database that just replicated :param info: pre-replication full info dict :param responses: a list of bools indicating success from nodes """ pass + def cleanup_post_replicate(self, broker, orig_info, responses): + """ + Cleanup non primary database from disk if needed. + + :param broker: the broker for the database we're replicating + :param orig_info: snapshot of the broker replication info dict taken + before replication + :param responses: a list of boolean success values for each replication + request to other nodes + + :return success: returns False if deletion of the database was + attempted but unsuccessful, otherwise returns True. + """ + log_template = 'Not deleting db %s (%%s)' % broker.db_file + max_row_delta = broker.get_max_row() - orig_info['max_row'] + if max_row_delta < 0: + reason = 'negative max_row_delta: %s' % max_row_delta + self.logger.error(log_template, reason) + return True + if max_row_delta: + reason = '%s new rows' % max_row_delta + self.logger.debug(log_template, reason) + return True + if not (responses and all(responses)): + reason = '%s/%s success' % (responses.count(True), len(responses)) + self.logger.debug(log_template, reason) + return True + # If the db has been successfully synced to all of its peers, it can be + # removed. Callers should have already checked that the db is not on a + # primary node. + if not self.delete_db(broker): + self.logger.debug( + 'Failed to delete db %s', broker.db_file) + return False + self.logger.debug('Successfully deleted db %s', broker.db_file) + return True + def _replicate_object(self, partition, object_file, node_id): """ Replicate the db, choosing method based on whether or not it @@ -469,12 +554,20 @@ :param partition: partition to be replicated to :param object_file: DB file name to be replicated :param node_id: node id of the node to be replicated to + :returns: a tuple (success, responses). ``success`` is a boolean that + is True if the method completed successfully, False otherwise. + ``responses`` is a list of booleans each of which indicates the + success or not of replicating to a peer node if replication has + been attempted. ``success`` is False if any of ``responses`` is + False; when ``responses`` is empty, ``success`` may be either True + or False. """ start_time = now = time.time() self.logger.debug('Replicating db %s', object_file) self.stats['attempted'] += 1 self.logger.increment('attempts') shouldbehere = True + responses = [] try: broker = self.brokerclass(object_file, pending_timeout=30) broker.reclaim(now - self.reclaim_age, @@ -504,18 +597,12 @@ failure_dev['device']) for failure_dev in nodes]) self.logger.increment('failures') - return - # The db is considered deleted if the delete_timestamp value is greater - # than the put_timestamp, and there are no objects. - delete_timestamp = Timestamp(info.get('delete_timestamp') or 0) - put_timestamp = Timestamp(info.get('put_timestamp') or 0) - if (now - self.reclaim_age) > delete_timestamp > put_timestamp and \ - info['count'] in (None, '', 0, '0'): + return False, responses + if broker.is_reclaimable(now, self.reclaim_age): if self.report_up_to_date(info): self.delete_db(broker) self.logger.timing_since('timing', start_time) - return - responses = [] + return True, responses failure_devs_info = set() nodes = self.ring.get_part_nodes(int(partition)) local_dev = None @@ -573,14 +660,11 @@ except (Exception, Timeout): self.logger.exception('UNHANDLED EXCEPTION: in post replicate ' 'hook for %s', broker.db_file) - if not shouldbehere and responses and all(responses): - # If the db shouldn't be on this node and has been successfully - # synced to all of its peers, it can be removed. - if not self.delete_db(broker): + if not shouldbehere: + if not self.cleanup_post_replicate(broker, info, responses): failure_devs_info.update( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in repl_nodes]) - target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in repl_nodes]) @@ -588,6 +672,9 @@ self._add_failure_stats(failure_devs_info) self.logger.timing_since('timing', start_time) + if shouldbehere: + responses.append(True) + return all(responses), responses def delete_db(self, broker): object_file = broker.db_file @@ -619,17 +706,46 @@ return match.groups()[0] return "UNKNOWN" + def _partition_dir_filter(self, device_id, partitions_to_replicate): + + def filt(partition_dir): + partition = int(partition_dir) + if self.handoffs_only: + primary_node_ids = [ + d['id'] for d in self.ring.get_part_nodes(partition)] + if device_id in primary_node_ids: + return False + + if partition not in partitions_to_replicate: + return False + + return True + + return filt + def report_up_to_date(self, full_info): return True def run_once(self, *args, **kwargs): """Run a replication pass once.""" + override_options = parse_override_options(once=True, **kwargs) + + devices_to_replicate = override_options.devices or Everything() + partitions_to_replicate = override_options.partitions or Everything() + self._zero_stats() dirs = [] ips = whataremyips(self.bind_ip) if not ips: self.logger.error(_('ERROR Failed to get my own IPs?')) return + + if self.handoffs_only: + self.logger.warning( + 'Starting replication pass with handoffs_only enabled. ' + 'This mode is not intended for normal ' + 'operation; use handoffs_only with care.') + self._local_device_ids = set() found_local = False for node in self.ring.devs: @@ -646,13 +762,20 @@ self.logger.warning( _('Skipping %(device)s as it is not mounted') % node) continue + if node['device'] not in devices_to_replicate: + self.logger.debug( + 'Skipping device %s due to given arguments', + node['device']) + continue unlink_older_than( os.path.join(self.root, node['device'], 'tmp'), time.time() - self.reclaim_age) datadir = os.path.join(self.root, node['device'], self.datadir) if os.path.isdir(datadir): self._local_device_ids.add(node['id']) - dirs.append((datadir, node['id'])) + part_filt = self._partition_dir_filter( + node['id'], partitions_to_replicate) + dirs.append((datadir, node['id'], part_filt)) if not found_local: self.logger.error("Can't find itself %s with port %s in ring " "file, not replicating", @@ -663,6 +786,10 @@ self._replicate_object, part, object_file, node_id) self.cpool.waitall() self.logger.info(_('Replication run OVER')) + if self.handoffs_only: + self.logger.warning( + 'Finished replication pass with handoffs_only enabled. ' + 'If handoffs_only is no longer required, disable it.') self._report_stats() def run_forever(self, *args, **kwargs): @@ -692,6 +819,9 @@ self.mount_check = mount_check self.logger = logger or get_logger({}, log_route='replicator-rpc') + def _db_file_exists(self, db_path): + return os.path.exists(db_path) + def dispatch(self, replicate_args, args): if not hasattr(args, 'pop'): return HTTPBadRequest(body='Invalid object type') @@ -710,7 +840,7 @@ # someone might be about to rsync a db to us, # make sure there's a tmp dir to receive it. mkdirs(os.path.join(self.root, drive, 'tmp')) - if not os.path.exists(db_file): + if not self._db_file_exists(db_file): return HTTPNotFound() return getattr(self, op)(self.broker_class(db_file), args) @@ -809,6 +939,8 @@ def complete_rsync(self, drive, db_file, args): old_filename = os.path.join(self.root, drive, 'tmp', args[0]) + if args[1:]: + db_file = os.path.join(os.path.dirname(db_file), args[1]) if os.path.exists(db_file): return HTTPNotFound() if not os.path.exists(old_filename): @@ -818,12 +950,21 @@ renamer(old_filename, db_file) return HTTPNoContent() + def _abort_rsync_then_merge(self, db_file, tmp_filename): + return not (self._db_file_exists(db_file) and + os.path.exists(tmp_filename)) + + def _post_rsync_then_merge_hook(self, existing_broker, new_broker): + # subclasses may override to make custom changes to the new broker + pass + def rsync_then_merge(self, drive, db_file, args): - old_filename = os.path.join(self.root, drive, 'tmp', args[0]) - if not os.path.exists(db_file) or not os.path.exists(old_filename): + tmp_filename = os.path.join(self.root, drive, 'tmp', args[0]) + if self._abort_rsync_then_merge(db_file, tmp_filename): return HTTPNotFound() - new_broker = self.broker_class(old_filename) + new_broker = self.broker_class(tmp_filename) existing_broker = self.broker_class(db_file) + db_file = existing_broker.db_file point = -1 objects = existing_broker.get_items_since(point, 1000) while len(objects): @@ -831,9 +972,13 @@ point = objects[-1]['ROWID'] objects = existing_broker.get_items_since(point, 1000) sleep() + new_broker.merge_syncs(existing_broker.get_syncs()) + self._post_rsync_then_merge_hook(existing_broker, new_broker) new_broker.newid(args[0]) new_broker.update_metadata(existing_broker.metadata) - renamer(old_filename, db_file) + if self._abort_rsync_then_merge(db_file, tmp_filename): + return HTTPNotFound() + renamer(tmp_filename, db_file) return HTTPNoContent() # Footnote [1]: diff -Nru swift-2.17.0/swift/common/direct_client.py swift-2.18.0/swift/common/direct_client.py --- swift-2.17.0/swift/common/direct_client.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/direct_client.py 2018-05-30 10:17:02.000000000 +0000 @@ -54,22 +54,72 @@ http_reason=resp.reason, http_headers=headers) -def _make_req(node, part, method, path, _headers, stype, - conn_timeout=5, response_timeout=15): +def _make_req(node, part, method, path, headers, stype, + conn_timeout=5, response_timeout=15, send_timeout=15, + contents=None, content_length=None, chunk_size=65535): """ Make request to backend storage node. (i.e. 'Account', 'Container', 'Object') :param node: a node dict from a ring - :param part: an integer, the partion number + :param part: an integer, the partition number :param method: a string, the HTTP method (e.g. 'PUT', 'DELETE', etc) :param path: a string, the request path :param headers: a dict, header name => value :param stype: a string, describing the type of service + :param conn_timeout: timeout while waiting for connection; default is 5 + seconds + :param response_timeout: timeout while waiting for response; default is 15 + seconds + :param send_timeout: timeout for sending request body; default is 15 + seconds + :param contents: an iterable or string to read object data from + :param content_length: value to send as content-length header + :param chunk_size: if defined, chunk size of data to send :returns: an HTTPResponse object - """ + :raises DirectClientException: if the response status is not 2xx + :raises eventlet.Timeout: if either conn_timeout or response_timeout is + exceeded + """ + if contents is not None: + if content_length is not None: + headers['Content-Length'] = str(content_length) + else: + for n, v in headers.items(): + if n.lower() == 'content-length': + content_length = int(v) + if not contents: + headers['Content-Length'] = '0' + if isinstance(contents, six.string_types): + contents = [contents] + if content_length is None: + headers['Transfer-Encoding'] = 'chunked' + with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, - method, path, headers=_headers) + method, path, headers=headers) + + if contents is not None: + contents_f = FileLikeIter(contents) + + with Timeout(send_timeout): + if content_length is None: + chunk = contents_f.read(chunk_size) + while chunk: + conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) + chunk = contents_f.read(chunk_size) + conn.send('0\r\n\r\n') + else: + left = content_length + while left > 0: + size = chunk_size + if size > left: + size = left + chunk = contents_f.read(size) + if not chunk: + break + conn.send(chunk) + left -= len(chunk) + with Timeout(response_timeout): resp = conn.getresponse() resp.read() @@ -82,7 +132,7 @@ marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15, - end_marker=None, reverse=None): + end_marker=None, reverse=None, headers=None): """Base class for get direct account and container. Do not use directly use the get_direct_account or @@ -105,7 +155,7 @@ with Timeout(conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], part, 'GET', path, query_string=qs, - headers=gen_headers()) + headers=gen_headers(hdrs_in=headers)) with Timeout(response_timeout): resp = conn.getresponse() if not is_success(resp.status): @@ -121,11 +171,12 @@ return resp_headers, json.loads(resp.read()) -def gen_headers(hdrs_in=None, add_ts=False): +def gen_headers(hdrs_in=None, add_ts=False, add_user_agent=True): hdrs_out = HeaderKeyDict(hdrs_in) if hdrs_in else HeaderKeyDict() if add_ts: hdrs_out['X-Timestamp'] = Timestamp.now().internal - hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid() + if add_user_agent: + hdrs_out['User-Agent'] = 'direct-client %s' % os.getpid() return hdrs_out @@ -197,7 +248,7 @@ def direct_get_container(node, part, account, container, marker=None, limit=None, prefix=None, delimiter=None, conn_timeout=5, response_timeout=15, end_marker=None, - reverse=None): + reverse=None, headers=None): """ Get container listings directly from the container server. @@ -213,6 +264,7 @@ :param response_timeout: timeout in seconds for getting the response :param end_marker: end_marker query :param reverse: reverse the returned listing + :param headers: headers to be included in the request :returns: a tuple of (response headers, a list of objects) The response headers will be a HeaderKeyDict. """ @@ -224,7 +276,8 @@ end_marker=end_marker, reverse=reverse, conn_timeout=conn_timeout, - response_timeout=response_timeout) + response_timeout=response_timeout, + headers=headers) def direct_delete_container(node, part, account, container, conn_timeout=5, @@ -250,6 +303,37 @@ 'Container', conn_timeout, response_timeout) +def direct_put_container(node, part, account, container, conn_timeout=5, + response_timeout=15, headers=None, contents=None, + content_length=None, chunk_size=65535): + """ + Make a PUT request to a container server. + + :param node: node dictionary from the ring + :param part: partition the container is on + :param account: account name + :param container: container name + :param conn_timeout: timeout in seconds for establishing the connection + :param response_timeout: timeout in seconds for getting the response + :param headers: additional headers to include in the request + :param contents: an iterable or string to send in request body (optional) + :param content_length: value to send as content-length header (optional) + :param chunk_size: chunk size of data to send (optional) + :raises ClientException: HTTP PUT request failed + """ + if headers is None: + headers = {} + + lower_headers = set(k.lower() for k in headers) + headers_out = gen_headers(headers, + add_ts='x-timestamp' not in lower_headers, + add_user_agent='user-agent' not in lower_headers) + path = '/%s/%s' % (account, container) + _make_req(node, part, 'PUT', path, headers_out, 'Container', conn_timeout, + response_timeout, contents=contents, + content_length=content_length, chunk_size=chunk_size) + + def direct_put_container_object(node, part, account, container, obj, conn_timeout=5, response_timeout=15, headers=None): @@ -385,56 +469,18 @@ headers = {} if etag: headers['ETag'] = etag.strip('"') - if content_length is not None: - headers['Content-Length'] = str(content_length) - else: - for n, v in headers.items(): - if n.lower() == 'content-length': - content_length = int(v) if content_type is not None: headers['Content-Type'] = content_type else: headers['Content-Type'] = 'application/octet-stream' - if not contents: - headers['Content-Length'] = '0' - if isinstance(contents, six.string_types): - contents = [contents] # Incase the caller want to insert an object with specific age add_ts = 'X-Timestamp' not in headers - if content_length is None: - headers['Transfer-Encoding'] = 'chunked' - - with Timeout(conn_timeout): - conn = http_connect(node['ip'], node['port'], node['device'], part, - 'PUT', path, headers=gen_headers(headers, add_ts)) - - contents_f = FileLikeIter(contents) - - if content_length is None: - chunk = contents_f.read(chunk_size) - while chunk: - conn.send('%x\r\n%s\r\n' % (len(chunk), chunk)) - chunk = contents_f.read(chunk_size) - conn.send('0\r\n\r\n') - else: - left = content_length - while left > 0: - size = chunk_size - if size > left: - size = left - chunk = contents_f.read(size) - if not chunk: - break - conn.send(chunk) - left -= len(chunk) + resp = _make_req( + node, part, 'PUT', path, gen_headers(headers, add_ts=add_ts), + 'Object', conn_timeout, response_timeout, contents=contents, + content_length=content_length, chunk_size=chunk_size) - with Timeout(response_timeout): - resp = conn.getresponse() - resp.read() - if not is_success(resp.status): - raise DirectClientException('Object', 'PUT', - node, part, path, resp) return resp.getheader('etag').strip('"') diff -Nru swift-2.17.0/swift/common/linkat.py swift-2.18.0/swift/common/linkat.py --- swift-2.17.0/swift/common/linkat.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/linkat.py 2018-05-30 10:17:02.000000000 +0000 @@ -17,6 +17,8 @@ import ctypes from ctypes.util import find_library +import six + __all__ = ['linkat'] @@ -70,6 +72,11 @@ if not isinstance(olddirfd, int) or not isinstance(newdirfd, int): raise TypeError("fd must be an integer.") + if isinstance(oldpath, six.text_type): + oldpath = oldpath.encode('utf8') + if isinstance(newpath, six.text_type): + newpath = newpath.encode('utf8') + return self._c_linkat(olddirfd, oldpath, newdirfd, newpath, flags) linkat = Linkat() diff -Nru swift-2.17.0/swift/common/manager.py swift-2.18.0/swift/common/manager.py --- swift-2.17.0/swift/common/manager.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/manager.py 2018-05-30 10:17:02.000000000 +0000 @@ -34,7 +34,7 @@ ALL_SERVERS = ['account-auditor', 'account-server', 'container-auditor', 'container-replicator', 'container-reconciler', - 'container-server', 'container-sync', + 'container-server', 'container-sharder', 'container-sync', 'container-updater', 'object-auditor', 'object-server', 'object-expirer', 'object-replicator', 'object-reconstructor', 'object-updater', @@ -637,13 +637,16 @@ {'server': self.server, 'pid': pid, 'conf': conf_file}) return 0 - def spawn(self, conf_file, once=False, wait=True, daemon=True, **kwargs): + def spawn(self, conf_file, once=False, wait=True, daemon=True, + additional_args=None, **kwargs): """Launch a subprocess for this server. :param conf_file: path to conf_file to use as first arg :param once: boolean, add once argument to command :param wait: boolean, if true capture stdout with a pipe :param daemon: boolean, if false ask server to log to console + :param additional_args: list of additional arguments to pass + on the command line :returns: the pid of the spawned process """ @@ -653,6 +656,10 @@ if not daemon: # ask the server to log to console args.append('verbose') + if additional_args: + if isinstance(additional_args, str): + additional_args = [additional_args] + args.extend(additional_args) # figure out what we're going to do with stdio if not daemon: @@ -678,8 +685,13 @@ """ status = 0 for proc in self.procs: - # wait for process to close its stdout - output = proc.stdout.read() + # wait for process to close its stdout (if we haven't done that) + if proc.stdout.closed: + output = '' + else: + output = proc.stdout.read() + proc.stdout.close() + if kwargs.get('once', False): # if you don't want once to wait you can send it to the # background on the command line, I generally just run with @@ -703,7 +715,7 @@ status = 0 for proc in self.procs: # wait for process to terminate - proc.communicate() + proc.communicate() # should handle closing pipes if proc.returncode: status += 1 return status diff -Nru swift-2.17.0/swift/common/memcached.py swift-2.18.0/swift/common/memcached.py --- swift-2.17.0/swift/common/memcached.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/memcached.py 2018-05-30 10:17:02.000000000 +0000 @@ -76,7 +76,9 @@ def md5hash(key): - return md5(key).hexdigest() + if not isinstance(key, bytes): + key = key.encode('utf-8') + return md5(key).hexdigest().encode('ascii') def sanitize_timeout(timeout): @@ -88,7 +90,21 @@ """ if timeout > (30 * 24 * 60 * 60): timeout += time.time() - return timeout + return int(timeout) + + +def set_msg(key, flags, timeout, value): + if not isinstance(key, bytes): + raise TypeError('key must be bytes') + if not isinstance(value, bytes): + raise TypeError('value must be bytes') + return b' '.join([ + b'set', + key, + str(flags).encode('ascii'), + str(timeout).encode('ascii'), + str(len(value)).encode('ascii'), + ]) + (b'\r\n' + value + b'\r\n') class MemcacheConnectionError(Exception): @@ -253,13 +269,15 @@ value = pickle.dumps(value, PICKLE_PROTOCOL) flags |= PICKLE_FLAG elif serialize: - value = json.dumps(value) + value = json.dumps(value).encode('ascii') flags |= JSON_FLAG + elif not isinstance(value, bytes): + value = str(value).encode('utf-8') + for (server, fp, sock) in self._get_conns(key): try: with Timeout(self._io_timeout): - sock.sendall('set %s %d %d %s\r\n%s\r\n' % - (key, flags, timeout, len(value), value)) + sock.sendall(set_msg(key, flags, timeout, value)) # Wait for the set to complete fp.readline() self._return_conn(server, fp, sock) @@ -281,14 +299,14 @@ for (server, fp, sock) in self._get_conns(key): try: with Timeout(self._io_timeout): - sock.sendall('get %s\r\n' % key) + sock.sendall(b'get ' + key + b'\r\n') line = fp.readline().strip().split() while True: if not line: raise MemcacheConnectionError('incomplete read') - if line[0].upper() == 'END': + if line[0].upper() == b'END': break - if line[0].upper() == 'VALUE' and line[1] == key: + if line[0].upper() == b'VALUE' and line[1] == key: size = int(line[3]) value = fp.read(size) if int(line[2]) & PICKLE_FLAG: @@ -297,7 +315,7 @@ else: value = None elif int(line[2]) & JSON_FLAG: - value = json.loads(value) + value = json.loads(value.decode('ascii')) fp.readline() line = fp.readline().strip().split() self._return_conn(server, fp, sock) @@ -323,28 +341,31 @@ :raises MemcacheConnectionError: """ key = md5hash(key) - command = 'incr' + command = b'incr' if delta < 0: - command = 'decr' - delta = str(abs(int(delta))) + command = b'decr' + delta = str(abs(int(delta))).encode('ascii') timeout = sanitize_timeout(time) for (server, fp, sock) in self._get_conns(key): try: with Timeout(self._io_timeout): - sock.sendall('%s %s %s\r\n' % (command, key, delta)) + sock.sendall(b' '.join([ + command, key, delta]) + b'\r\n') line = fp.readline().strip().split() if not line: raise MemcacheConnectionError('incomplete read') - if line[0].upper() == 'NOT_FOUND': + if line[0].upper() == b'NOT_FOUND': add_val = delta - if command == 'decr': - add_val = '0' - sock.sendall('add %s %d %d %s\r\n%s\r\n' % - (key, 0, timeout, len(add_val), add_val)) + if command == b'decr': + add_val = b'0' + sock.sendall(b' '.join([ + b'add', key, b'0', str(timeout).encode('ascii'), + str(len(add_val)).encode('ascii') + ]) + b'\r\n' + add_val + b'\r\n') line = fp.readline().strip().split() - if line[0].upper() == 'NOT_STORED': - sock.sendall('%s %s %s\r\n' % (command, key, - delta)) + if line[0].upper() == b'NOT_STORED': + sock.sendall(b' '.join([ + command, key, delta]) + b'\r\n') line = fp.readline().strip().split() ret = int(line[0].strip()) else: @@ -382,7 +403,7 @@ for (server, fp, sock) in self._get_conns(key): try: with Timeout(self._io_timeout): - sock.sendall('delete %s\r\n' % key) + sock.sendall(b'delete ' + key + b'\r\n') # Wait for the delete to complete fp.readline() self._return_conn(server, fp, sock) @@ -409,7 +430,7 @@ """ server_key = md5hash(server_key) timeout = sanitize_timeout(time) - msg = '' + msg = [] for key, value in mapping.items(): key = md5hash(key) flags = 0 @@ -417,14 +438,13 @@ value = pickle.dumps(value, PICKLE_PROTOCOL) flags |= PICKLE_FLAG elif serialize: - value = json.dumps(value) + value = json.dumps(value).encode('ascii') flags |= JSON_FLAG - msg += ('set %s %d %d %s\r\n%s\r\n' % - (key, flags, timeout, len(value), value)) + msg.append(set_msg(key, flags, timeout, value)) for (server, fp, sock) in self._get_conns(server_key): try: with Timeout(self._io_timeout): - sock.sendall(msg) + sock.sendall(b''.join(msg)) # Wait for the set to complete for line in range(len(mapping)): fp.readline() @@ -447,15 +467,15 @@ for (server, fp, sock) in self._get_conns(server_key): try: with Timeout(self._io_timeout): - sock.sendall('get %s\r\n' % ' '.join(keys)) + sock.sendall(b'get ' + b' '.join(keys) + b'\r\n') line = fp.readline().strip().split() responses = {} while True: if not line: raise MemcacheConnectionError('incomplete read') - if line[0].upper() == 'END': + if line[0].upper() == b'END': break - if line[0].upper() == 'VALUE': + if line[0].upper() == b'VALUE': size = int(line[3]) value = fp.read(size) if int(line[2]) & PICKLE_FLAG: @@ -464,7 +484,7 @@ else: value = None elif int(line[2]) & JSON_FLAG: - value = json.loads(value) + value = json.loads(value.decode('ascii')) responses[line[1]] = value fp.readline() line = fp.readline().strip().split() diff -Nru swift-2.17.0/swift/common/middleware/bulk.py swift-2.18.0/swift/common/middleware/bulk.py --- swift-2.17.0/swift/common/middleware/bulk.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/bulk.py 2018-05-30 10:17:02.000000000 +0000 @@ -314,7 +314,7 @@ resp = head_cont_req.get_response(self.app) if resp.is_success: return False - if resp.status_int == 404: + if resp.status_int == HTTP_NOT_FOUND: new_env = req.environ.copy() new_env['PATH_INFO'] = container_path new_env['swift.source'] = 'EA' diff -Nru swift-2.17.0/swift/common/middleware/formpost.py swift-2.18.0/swift/common/middleware/formpost.py --- swift-2.17.0/swift/common/middleware/formpost.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/formpost.py 2018-05-30 10:17:02.000000000 +0000 @@ -121,7 +121,7 @@ from swift.common.middleware.tempurl import get_tempurl_keys_from_metadata from swift.common.utils import streq_const_time, register_swift_info, \ parse_content_disposition, parse_mime_headers, \ - iter_multipart_mime_documents + iter_multipart_mime_documents, reiterate, close_if_possible from swift.common.wsgi import make_pre_authed_env from swift.common.swob import HTTPUnauthorized from swift.proxy.controllers.base import get_account_info, get_container_info @@ -270,7 +270,7 @@ if 'content-type' not in attributes and 'content-type' in hdrs: attributes['content-type'] = \ hdrs['Content-Type'] or 'application/octet-stream' - status, subheaders, message = \ + status, subheaders = \ self._perform_subrequest(env, attributes, fp, keys) if not status.startswith('2'): break @@ -323,7 +323,7 @@ :param attributes: dict of the attributes of the form so far. :param fp: The file-like object containing the request body. :param keys: The account keys to validate the signature with. - :returns: (status_line, headers_list, message) + :returns: (status_line, headers_list) """ if not keys: raise FormUnauthorized('invalid signature') @@ -357,8 +357,6 @@ if 'content-type' in attributes: subenv['CONTENT_TYPE'] = \ attributes['content-type'] or 'application/octet-stream' - elif 'CONTENT_TYPE' in subenv: - del subenv['CONTENT_TYPE'] try: if int(attributes.get('expires') or 0) < time(): raise FormUnauthorized('form expired') @@ -392,12 +390,10 @@ substatus[0] = status subheaders[0] = headers - i = iter(self.app(subenv, _start_response)) - try: - next(i) - except StopIteration: - pass - return substatus[0], subheaders[0], '' + # reiterate to ensure the response started, + # but drop any data on the floor + close_if_possible(reiterate(self.app(subenv, _start_response))) + return substatus[0], subheaders[0] def _get_keys(self, env): """ diff -Nru swift-2.17.0/swift/common/middleware/gatekeeper.py swift-2.18.0/swift/common/middleware/gatekeeper.py --- swift-2.17.0/swift/common/middleware/gatekeeper.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/gatekeeper.py 2018-05-30 10:17:02.000000000 +0000 @@ -92,7 +92,7 @@ def gatekeeper_response(status, response_headers, exc_info=None): def fixed_response_headers(): def relative_path(value): - parsed = urlsplit(v) + parsed = urlsplit(value) new_path = parsed.path if parsed.query: new_path += ('?%s' % parsed.query) diff -Nru swift-2.17.0/swift/common/middleware/ratelimit.py swift-2.18.0/swift/common/middleware/ratelimit.py --- swift-2.17.0/swift/common/middleware/ratelimit.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/ratelimit.py 2018-05-30 10:17:02.000000000 +0000 @@ -109,9 +109,19 @@ self.ratelimit_whitelist = \ [acc.strip() for acc in conf.get('account_whitelist', '').split(',') if acc.strip()] + if self.ratelimit_whitelist: + self.logger.warning('Option account_whitelist is deprecated. Use ' + 'an internal client to POST a `X-Account-' + 'Sysmeta-Global-Write-Ratelimit: WHITELIST` ' + 'header to the specific accounts instead.') self.ratelimit_blacklist = \ [acc.strip() for acc in conf.get('account_blacklist', '').split(',') if acc.strip()] + if self.ratelimit_blacklist: + self.logger.warning('Option account_blacklist is deprecated. Use ' + 'an internal client to POST a `X-Account-' + 'Sysmeta-Global-Write-Ratelimit: BLACKLIST` ' + 'header to the specific accounts instead.') self.container_ratelimits = interpret_conf_limits( conf, 'container_ratelimit_') self.container_listing_ratelimits = interpret_conf_limits( diff -Nru swift-2.17.0/swift/common/middleware/s3api/acl_handlers.py swift-2.18.0/swift/common/middleware/s3api/acl_handlers.py --- swift-2.17.0/swift/common/middleware/s3api/acl_handlers.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/acl_handlers.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,463 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +------------ +Acl Handlers +------------ + +Why do we need this +^^^^^^^^^^^^^^^^^^^ + +To make controller classes clean, we need these handlers. +It is really useful for customizing acl checking algorithms for +each controller. + +Basic Information +^^^^^^^^^^^^^^^^^ + +BaseAclHandler wraps basic Acl handling. +(i.e. it will check acl from ACL_MAP by using HEAD) + +How to extend +^^^^^^^^^^^^^ + +Make a handler with the name of the controller. +(e.g. BucketAclHandler is for BucketController) +It consists of method(s) for actual S3 method on controllers as follows. + +Example:: + + class BucketAclHandler(BaseAclHandler): + def PUT: + << put acl handling algorithms here for PUT bucket >> + +.. note:: + If the method DON'T need to recall _get_response in outside of + acl checking, the method have to return the response it needs at + the end of method. + +""" +import sys + +from swift.common.middleware.s3api.subresource import ACL, Owner, encode_acl +from swift.common.middleware.s3api.s3response import MissingSecurityHeader, \ + MalformedACLError, UnexpectedContent +from swift.common.middleware.s3api.etree import fromstring, XMLSyntaxError, \ + DocumentInvalid +from swift.common.middleware.s3api.utils import MULTIUPLOAD_SUFFIX, \ + sysmeta_header + + +def get_acl_handler(controller_name): + for base_klass in [BaseAclHandler, MultiUploadAclHandler]: + # pylint: disable-msg=E1101 + for handler in base_klass.__subclasses__(): + handler_suffix_len = len('AclHandler') \ + if not handler.__name__ == 'S3AclHandler' else len('Handler') + if handler.__name__[:-handler_suffix_len] == controller_name: + return handler + return BaseAclHandler + + +class BaseAclHandler(object): + """ + BaseAclHandler: Handling ACL for basic requests mapped on ACL_MAP + """ + def __init__(self, req, logger, container=None, obj=None, headers=None): + self.req = req + self.container = req.container_name if container is None else container + self.obj = req.object_name if obj is None else obj + self.method = req.environ['REQUEST_METHOD'] + self.user_id = self.req.user_id + self.headers = req.headers if headers is None else headers + self.logger = logger + + def request_with(self, container, obj, headers): + return type(self)(self.req, self.logger, + container=container, obj=obj, headers=headers) + + def handle_acl(self, app, method, container=None, obj=None, headers=None): + method = method or self.method + + ah = self.request_with(container, obj, headers) + if hasattr(ah, method): + return getattr(ah, method)(app) + else: + return ah._handle_acl(app, method) + + def _handle_acl(self, app, sw_method, container=None, obj=None, + permission=None, headers=None): + """ + General acl handling method. + This method expects to call Request._get_response() in outside of + this method so that this method returns response only when sw_method + is HEAD. + """ + + container = self.container if container is None else container + obj = self.obj if obj is None else obj + sw_method = sw_method or self.req.environ['REQUEST_METHOD'] + resource = 'object' if obj else 'container' + headers = self.headers if headers is None else headers + + self.logger.debug( + 'checking permission: %s %s %s %s' % + (container, obj, sw_method, dict(headers))) + + if not container: + return + + if not permission and (self.method, sw_method, resource) in ACL_MAP: + acl_check = ACL_MAP[(self.method, sw_method, resource)] + resource = acl_check.get('Resource') or resource + permission = acl_check['Permission'] + + if not permission: + self.logger.debug( + '%s %s %s %s' % (container, obj, sw_method, headers)) + raise Exception('No permission to be checked exists') + + if resource == 'object': + resp = self.req.get_acl_response(app, 'HEAD', + container, obj, + headers) + acl = resp.object_acl + elif resource == 'container': + resp = self.req.get_acl_response(app, 'HEAD', + container, '') + acl = resp.bucket_acl + + try: + acl.check_permission(self.user_id, permission) + except Exception as e: + self.logger.debug(acl) + self.logger.debug('permission denined: %s %s %s' % + (e, self.user_id, permission)) + raise + + if sw_method == 'HEAD': + return resp + + def get_acl(self, headers, body, bucket_owner, object_owner=None): + """ + Get ACL instance from S3 (e.g. x-amz-grant) headers or S3 acl xml body. + """ + acl = ACL.from_headers(headers, bucket_owner, object_owner, + as_private=False) + + if acl is None: + # Get acl from request body if possible. + if not body: + raise MissingSecurityHeader(missing_header_name='x-amz-acl') + try: + elem = fromstring(body, ACL.root_tag) + acl = ACL.from_elem( + elem, True, self.req.allow_no_owner) + except(XMLSyntaxError, DocumentInvalid): + raise MalformedACLError() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.logger.error(e) + raise exc_type, exc_value, exc_traceback + else: + if body: + # Specifying grant with both header and xml is not allowed. + raise UnexpectedContent() + + return acl + + +class BucketAclHandler(BaseAclHandler): + """ + BucketAclHandler: Handler for BucketController + """ + def DELETE(self, app): + if self.container.endswith(MULTIUPLOAD_SUFFIX): + # anyways, delete multiupload container doesn't need acls + # because it depends on GET segment container result for + # cleanup + pass + else: + return self._handle_acl(app, 'DELETE') + + def HEAD(self, app): + if self.method == 'DELETE': + return self._handle_acl(app, 'DELETE') + else: + return self._handle_acl(app, 'HEAD') + + def GET(self, app): + if self.method == 'DELETE' and \ + self.container.endswith(MULTIUPLOAD_SUFFIX): + pass + else: + return self._handle_acl(app, 'GET') + + def PUT(self, app): + req_acl = ACL.from_headers(self.req.headers, + Owner(self.user_id, self.user_id)) + + # To avoid overwriting the existing bucket's ACL, we send PUT + # request first before setting the ACL to make sure that the target + # container does not exist. + self.req.get_acl_response(app, 'PUT') + + # update metadata + self.req.bucket_acl = req_acl + + # FIXME If this request is failed, there is a possibility that the + # bucket which has no ACL is left. + return self.req.get_acl_response(app, 'POST') + + +class ObjectAclHandler(BaseAclHandler): + """ + ObjectAclHandler: Handler for ObjectController + """ + def HEAD(self, app): + # No check object permission needed at DELETE Object + if self.method != 'DELETE': + return self._handle_acl(app, 'HEAD') + + def PUT(self, app): + b_resp = self._handle_acl(app, 'HEAD', obj='') + req_acl = ACL.from_headers(self.req.headers, + b_resp.bucket_acl.owner, + Owner(self.user_id, self.user_id)) + self.req.object_acl = req_acl + + +class S3AclHandler(BaseAclHandler): + """ + S3AclHandler: Handler for S3AclController + """ + def GET(self, app): + self._handle_acl(app, 'HEAD', permission='READ_ACP') + + def PUT(self, app): + if self.req.is_object_request: + b_resp = self.req.get_acl_response(app, 'HEAD', obj='') + o_resp = self._handle_acl(app, 'HEAD', permission='WRITE_ACP') + req_acl = self.get_acl(self.req.headers, + self.req.xml(ACL.max_xml_length), + b_resp.bucket_acl.owner, + o_resp.object_acl.owner) + + # Don't change the owner of the resource by PUT acl request. + o_resp.object_acl.check_owner(req_acl.owner.id) + + for g in req_acl.grants: + self.logger.debug( + 'Grant %s %s permission on the object /%s/%s' % + (g.grantee, g.permission, self.req.container_name, + self.req.object_name)) + self.req.object_acl = req_acl + else: + self._handle_acl(app, self.method) + + def POST(self, app): + if self.req.is_bucket_request: + resp = self._handle_acl(app, 'HEAD', permission='WRITE_ACP') + + req_acl = self.get_acl(self.req.headers, + self.req.xml(ACL.max_xml_length), + resp.bucket_acl.owner) + + # Don't change the owner of the resource by PUT acl request. + resp.bucket_acl.check_owner(req_acl.owner.id) + + for g in req_acl.grants: + self.logger.debug( + 'Grant %s %s permission on the bucket /%s' % + (g.grantee, g.permission, self.req.container_name)) + self.req.bucket_acl = req_acl + else: + self._handle_acl(app, self.method) + + +class MultiObjectDeleteAclHandler(BaseAclHandler): + """ + MultiObjectDeleteAclHandler: Handler for MultiObjectDeleteController + """ + def HEAD(self, app): + # Only bucket write acl is required + if not self.obj: + return self._handle_acl(app, 'HEAD') + + def DELETE(self, app): + # Only bucket write acl is required + pass + + +class MultiUploadAclHandler(BaseAclHandler): + """ + MultiUpload stuff requires acl checking just once for BASE container + so that MultiUploadAclHandler extends BaseAclHandler to check acl only + when the verb defined. We should define the verb as the first step to + request to backend Swift at incoming request. + + Basic Rules: + - BASE container name is always w/o 'MULTIUPLOAD_SUFFIX' + - Any check timing is ok but we should check it as soon as possible. + + ========== ====== ============= ========== + Controller Verb CheckResource Permission + ========== ====== ============= ========== + Part PUT Container WRITE + Uploads GET Container READ + Uploads POST Container WRITE + Upload GET Container READ + Upload DELETE Container WRITE + Upload POST Container WRITE + ========== ====== ============= ========== + + """ + def __init__(self, req, logger, **kwargs): + super(MultiUploadAclHandler, self).__init__(req, logger, **kwargs) + self.acl_checked = False + + def handle_acl(self, app, method, container=None, obj=None, headers=None): + method = method or self.method + ah = self.request_with(container, obj, headers) + # MultiUpload stuffs don't need acl check basically. + if hasattr(ah, method): + return getattr(ah, method)(app) + + def HEAD(self, app): + # For _check_upload_info + self._handle_acl(app, 'HEAD', self.container, '') + + +class PartAclHandler(MultiUploadAclHandler): + """ + PartAclHandler: Handler for PartController + """ + def __init__(self, req, logger, **kwargs): + # pylint: disable-msg=E1003 + super(MultiUploadAclHandler, self).__init__(req, logger, **kwargs) + + def HEAD(self, app): + if self.container.endswith(MULTIUPLOAD_SUFFIX): + # For _check_upload_info + container = self.container[:-len(MULTIUPLOAD_SUFFIX)] + self._handle_acl(app, 'HEAD', container, '') + else: + # For check_copy_source + return self._handle_acl(app, 'HEAD', self.container, self.obj) + + +class UploadsAclHandler(MultiUploadAclHandler): + """ + UploadsAclHandler: Handler for UploadsController + """ + def handle_acl(self, app, method, *args, **kwargs): + method = method or self.method + if hasattr(self, method): + return getattr(self, method)(app) + else: + pass + + def GET(self, app): + # List Multipart Upload + self._handle_acl(app, 'GET', self.container, '') + + def PUT(self, app): + if not self.acl_checked: + resp = self._handle_acl(app, 'HEAD', obj='') + req_acl = ACL.from_headers(self.req.headers, + resp.bucket_acl.owner, + Owner(self.user_id, self.user_id)) + acl_headers = encode_acl('object', req_acl) + self.req.headers[sysmeta_header('object', 'tmpacl')] = \ + acl_headers[sysmeta_header('object', 'acl')] + self.acl_checked = True + + +class UploadAclHandler(MultiUploadAclHandler): + """ + UploadAclHandler: Handler for UploadController + """ + def handle_acl(self, app, method, *args, **kwargs): + method = method or self.method + if hasattr(self, method): + return getattr(self, method)(app) + else: + pass + + def HEAD(self, app): + # FIXME: GET HEAD case conflicts with GET service + method = 'GET' if self.method == 'GET' else 'HEAD' + self._handle_acl(app, method, self.container, '') + + def PUT(self, app): + container = self.req.container_name + MULTIUPLOAD_SUFFIX + obj = '%s/%s' % (self.obj, self.req.params['uploadId']) + resp = self.req._get_response(app, 'HEAD', container, obj) + self.req.headers[sysmeta_header('object', 'acl')] = \ + resp.sysmeta_headers.get(sysmeta_header('object', 'tmpacl')) + + +""" +ACL_MAP = + { + ('', '', ''): + {'Resource': '', + 'Permission': ''}, + ... + } + +s3_method: Method of S3 Request from user to s3api +swift_method: Method of Swift Request from s3api to swift +swift_resource: Resource of Swift Request from s3api to swift +check_resource: +check_permission: +""" +ACL_MAP = { + # HEAD Bucket + ('HEAD', 'HEAD', 'container'): + {'Permission': 'READ'}, + # GET Service + ('GET', 'HEAD', 'container'): + {'Permission': 'OWNER'}, + # GET Bucket, List Parts, List Multipart Upload + ('GET', 'GET', 'container'): + {'Permission': 'READ'}, + # PUT Object, PUT Object Copy + ('PUT', 'HEAD', 'container'): + {'Permission': 'WRITE'}, + # DELETE Bucket + ('DELETE', 'DELETE', 'container'): + {'Permission': 'OWNER'}, + # HEAD Object + ('HEAD', 'HEAD', 'object'): + {'Permission': 'READ'}, + # GET Object + ('GET', 'GET', 'object'): + {'Permission': 'READ'}, + # PUT Object Copy, Upload Part Copy + ('PUT', 'HEAD', 'object'): + {'Permission': 'READ'}, + # Abort Multipart Upload + ('DELETE', 'HEAD', 'container'): + {'Permission': 'WRITE'}, + # Delete Object + ('DELETE', 'DELETE', 'object'): + {'Resource': 'container', + 'Permission': 'WRITE'}, + # Complete Multipart Upload, DELETE Multiple Objects, + # Initiate Multipart Upload + ('POST', 'HEAD', 'container'): + {'Permission': 'WRITE'}, +} diff -Nru swift-2.17.0/swift/common/middleware/s3api/acl_utils.py swift-2.18.0/swift/common/middleware/s3api/acl_utils.py --- swift-2.17.0/swift/common/middleware/s3api/acl_utils.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/acl_utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,95 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.middleware.s3api.exception import ACLError +from swift.common.middleware.s3api.etree import fromstring, XMLSyntaxError, \ + DocumentInvalid, XMLNS_XSI +from swift.common.middleware.s3api.s3response import S3NotImplemented, \ + MalformedACLError, InvalidArgument + + +def swift_acl_translate(acl, group='', user='', xml=False): + """ + Takes an S3 style ACL and returns a list of header/value pairs that + implement that ACL in Swift, or "NotImplemented" if there isn't a way to do + that yet. + """ + swift_acl = {} + swift_acl['public-read'] = [['X-Container-Read', '.r:*,.rlistings']] + # Swift does not support public write: + # https://answers.launchpad.net/swift/+question/169541 + swift_acl['public-read-write'] = [['X-Container-Write', '.r:*'], + ['X-Container-Read', + '.r:*,.rlistings']] + + # TODO: if there's a way to get group and user, this should work for + # private: + # swift_acl['private'] = \ + # [['HTTP_X_CONTAINER_WRITE', group + ':' + user], \ + # ['HTTP_X_CONTAINER_READ', group + ':' + user]] + swift_acl['private'] = [['X-Container-Write', '.'], + ['X-Container-Read', '.']] + if xml: + # We are working with XML and need to parse it + try: + elem = fromstring(acl, 'AccessControlPolicy') + except (XMLSyntaxError, DocumentInvalid): + raise MalformedACLError() + acl = 'unknown' + for grant in elem.findall('./AccessControlList/Grant'): + permission = grant.find('./Permission').text + grantee = grant.find('./Grantee').get('{%s}type' % XMLNS_XSI) + if permission == "FULL_CONTROL" and grantee == 'CanonicalUser' and\ + acl != 'public-read' and acl != 'public-read-write': + acl = 'private' + elif permission == "READ" and grantee == 'Group' and\ + acl != 'public-read-write': + acl = 'public-read' + elif permission == "WRITE" and grantee == 'Group': + acl = 'public-read-write' + else: + acl = 'unsupported' + + if acl == 'authenticated-read': + raise S3NotImplemented() + elif acl not in swift_acl: + raise ACLError() + + return swift_acl[acl] + + +def handle_acl_header(req): + """ + Handle the x-amz-acl header. + Note that this header currently used for only normal-acl + (not implemented) on s3acl. + TODO: add translation to swift acl like as x-container-read to s3acl + """ + + amz_acl = req.environ['HTTP_X_AMZ_ACL'] + # Translate the Amazon ACL to something that can be + # implemented in Swift, 501 otherwise. Swift uses POST + # for ACLs, whereas S3 uses PUT. + del req.environ['HTTP_X_AMZ_ACL'] + if req.query_string: + req.query_string = '' + + try: + translated_acl = swift_acl_translate(amz_acl) + except ACLError: + raise InvalidArgument('x-amz-acl', amz_acl) + + for header, acl in translated_acl: + req.headers[header] = acl diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/acl.py swift-2.18.0/swift/common/middleware/s3api/controllers/acl.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/acl.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/acl.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,130 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.http import HTTP_OK +from swift.common.middleware.acl import parse_acl, referrer_allowed +from swift.common.utils import public + +from swift.common.middleware.s3api.exception import ACLError +from swift.common.middleware.s3api.controllers.base import Controller +from swift.common.middleware.s3api.s3response import HTTPOk, S3NotImplemented, \ + MalformedACLError, UnexpectedContent, MissingSecurityHeader +from swift.common.middleware.s3api.etree import Element, SubElement, tostring +from swift.common.middleware.s3api.acl_utils import swift_acl_translate, \ + XMLNS_XSI + + +MAX_ACL_BODY_SIZE = 200 * 1024 + + +def get_acl(account_name, headers): + """ + Attempts to construct an S3 ACL based on what is found in the swift headers + """ + + elem = Element('AccessControlPolicy') + owner = SubElement(elem, 'Owner') + SubElement(owner, 'ID').text = account_name + SubElement(owner, 'DisplayName').text = account_name + access_control_list = SubElement(elem, 'AccessControlList') + + # grant FULL_CONTROL to myself by default + grant = SubElement(access_control_list, 'Grant') + grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'CanonicalUser') + SubElement(grantee, 'ID').text = account_name + SubElement(grantee, 'DisplayName').text = account_name + SubElement(grant, 'Permission').text = 'FULL_CONTROL' + + referrers, _ = parse_acl(headers.get('x-container-read')) + if referrer_allowed('unknown', referrers): + # grant public-read access + grant = SubElement(access_control_list, 'Grant') + grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Group') + SubElement(grantee, 'URI').text = \ + 'http://acs.amazonaws.com/groups/global/AllUsers' + SubElement(grant, 'Permission').text = 'READ' + + referrers, _ = parse_acl(headers.get('x-container-write')) + if referrer_allowed('unknown', referrers): + # grant public-write access + grant = SubElement(access_control_list, 'Grant') + grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Group') + SubElement(grantee, 'URI').text = \ + 'http://acs.amazonaws.com/groups/global/AllUsers' + SubElement(grant, 'Permission').text = 'WRITE' + + body = tostring(elem) + + return HTTPOk(body=body, content_type="text/plain") + + +class AclController(Controller): + """ + Handles the following APIs: + + * GET Bucket acl + * PUT Bucket acl + * GET Object acl + * PUT Object acl + + Those APIs are logged as ACL operations in the S3 server log. + """ + @public + def GET(self, req): + """ + Handles GET Bucket acl and GET Object acl. + """ + resp = req.get_response(self.app, method='HEAD') + + return get_acl(req.user_id, resp.headers) + + @public + def PUT(self, req): + """ + Handles PUT Bucket acl and PUT Object acl. + """ + if req.is_object_request: + # Handle Object ACL + raise S3NotImplemented() + else: + # Handle Bucket ACL + xml = req.xml(MAX_ACL_BODY_SIZE) + if all(['HTTP_X_AMZ_ACL' in req.environ, xml]): + # S3 doesn't allow to give ACL with both ACL header and body. + raise UnexpectedContent() + elif not any(['HTTP_X_AMZ_ACL' in req.environ, xml]): + # Both canned ACL header and xml body are missing + raise MissingSecurityHeader(missing_header_name='x-amz-acl') + else: + # correct ACL exists in the request + if xml: + # We very likely have an XML-based ACL request. + # let's try to translate to the request header + try: + translated_acl = swift_acl_translate(xml, xml=True) + except ACLError: + raise MalformedACLError() + + for header, acl in translated_acl: + req.headers[header] = acl + + resp = req.get_response(self.app, 'POST') + resp.status = HTTP_OK + resp.headers.update({'Location': req.container_name}) + + return resp diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/base.py swift-2.18.0/swift/common/middleware/s3api/controllers/base.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/base.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/base.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,100 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools + +from swift.common.middleware.s3api.s3response import S3NotImplemented, \ + InvalidRequest +from swift.common.middleware.s3api.utils import camel_to_snake + + +def bucket_operation(func=None, err_resp=None, err_msg=None): + """ + A decorator to ensure that the request is a bucket operation. If the + target resource is an object, this decorator updates the request by default + so that the controller handles it as a bucket operation. If 'err_resp' is + specified, this raises it on error instead. + """ + def _bucket_operation(func): + @functools.wraps(func) + def wrapped(self, req): + if not req.is_bucket_request: + if err_resp: + raise err_resp(msg=err_msg) + + self.logger.debug('A key is specified for bucket API.') + req.object_name = None + + return func(self, req) + + return wrapped + + if func: + return _bucket_operation(func) + else: + return _bucket_operation + + +def object_operation(func): + """ + A decorator to ensure that the request is an object operation. If the + target resource is not an object, this raises an error response. + """ + @functools.wraps(func) + def wrapped(self, req): + if not req.is_object_request: + raise InvalidRequest('A key must be specified') + + return func(self, req) + + return wrapped + + +def check_container_existence(func): + """ + A decorator to ensure the container existence. + """ + @functools.wraps(func) + def check_container(self, req): + req.get_container_info(self.app) + return func(self, req) + + return check_container + + +class Controller(object): + """ + Base WSGI controller class for the middleware + """ + def __init__(self, app, conf, logger, **kwargs): + self.app = app + self.conf = conf + self.logger = logger + + @classmethod + def resource_type(cls): + """ + Returns the target resource type of this controller. + """ + name = cls.__name__[:-len('Controller')] + return camel_to_snake(name).upper() + + +class UnsupportedController(Controller): + """ + Handles unsupported requests. + """ + def __init__(self, app, conf, logger, **kwargs): + raise S3NotImplemented('The requested resource is not implemented') diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/bucket.py swift-2.18.0/swift/common/middleware/s3api/controllers/bucket.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/bucket.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/bucket.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,251 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from base64 import standard_b64encode as b64encode +from base64 import standard_b64decode as b64decode + +from swift.common.http import HTTP_OK +from swift.common.utils import json, public, config_true_value + +from swift.common.middleware.s3api.controllers.base import Controller +from swift.common.middleware.s3api.etree import Element, SubElement, tostring, \ + fromstring, XMLSyntaxError, DocumentInvalid +from swift.common.middleware.s3api.s3response import HTTPOk, S3NotImplemented, \ + InvalidArgument, \ + MalformedXML, InvalidLocationConstraint, NoSuchBucket, \ + BucketNotEmpty, InternalError, ServiceUnavailable, NoSuchKey +from swift.common.middleware.s3api.utils import MULTIUPLOAD_SUFFIX + +MAX_PUT_BUCKET_BODY_SIZE = 10240 + + +class BucketController(Controller): + """ + Handles bucket request. + """ + def _delete_segments_bucket(self, req): + """ + Before delete bucket, delete segments bucket if existing. + """ + container = req.container_name + MULTIUPLOAD_SUFFIX + marker = '' + seg = '' + + try: + resp = req.get_response(self.app, 'HEAD') + if int(resp.sw_headers['X-Container-Object-Count']) > 0: + raise BucketNotEmpty() + # FIXME: This extra HEAD saves unexpected segment deletion + # but if a complete multipart upload happen while cleanup + # segment container below, completed object may be missing its + # segments unfortunately. To be safer, it might be good + # to handle if the segments can be deleted for each object. + except NoSuchBucket: + pass + + try: + while True: + # delete all segments + resp = req.get_response(self.app, 'GET', container, + query={'format': 'json', + 'marker': marker}) + segments = json.loads(resp.body) + for seg in segments: + try: + req.get_response(self.app, 'DELETE', container, + seg['name']) + except NoSuchKey: + pass + except InternalError: + raise ServiceUnavailable() + if segments: + marker = seg['name'] + else: + break + req.get_response(self.app, 'DELETE', container) + except NoSuchBucket: + return + except (BucketNotEmpty, InternalError): + raise ServiceUnavailable() + + @public + def HEAD(self, req): + """ + Handle HEAD Bucket (Get Metadata) request + """ + resp = req.get_response(self.app) + + return HTTPOk(headers=resp.headers) + + @public + def GET(self, req): + """ + Handle GET Bucket (List Objects) request + """ + + max_keys = req.get_validated_param( + 'max-keys', self.conf.max_bucket_listing) + # TODO: Separate max_bucket_listing and default_bucket_listing + tag_max_keys = max_keys + max_keys = min(max_keys, self.conf.max_bucket_listing) + + encoding_type = req.params.get('encoding-type') + if encoding_type is not None and encoding_type != 'url': + err_msg = 'Invalid Encoding Method specified in Request' + raise InvalidArgument('encoding-type', encoding_type, err_msg) + + query = { + 'format': 'json', + 'limit': max_keys + 1, + } + if 'marker' in req.params: + query.update({'marker': req.params['marker']}) + if 'prefix' in req.params: + query.update({'prefix': req.params['prefix']}) + if 'delimiter' in req.params: + query.update({'delimiter': req.params['delimiter']}) + + # GET Bucket (List Objects) Version 2 parameters + is_v2 = int(req.params.get('list-type', '1')) == 2 + fetch_owner = False + if is_v2: + if 'start-after' in req.params: + query.update({'marker': req.params['start-after']}) + # continuation-token overrides start-after + if 'continuation-token' in req.params: + decoded = b64decode(req.params['continuation-token']) + query.update({'marker': decoded}) + if 'fetch-owner' in req.params: + fetch_owner = config_true_value(req.params['fetch-owner']) + + resp = req.get_response(self.app, query=query) + + objects = json.loads(resp.body) + + elem = Element('ListBucketResult') + SubElement(elem, 'Name').text = req.container_name + SubElement(elem, 'Prefix').text = req.params.get('prefix') + + # in order to judge that truncated is valid, check whether + # max_keys + 1 th element exists in swift. + is_truncated = max_keys > 0 and len(objects) > max_keys + objects = objects[:max_keys] + + if not is_v2: + SubElement(elem, 'Marker').text = req.params.get('marker') + if is_truncated and 'delimiter' in req.params: + if 'name' in objects[-1]: + SubElement(elem, 'NextMarker').text = \ + objects[-1]['name'] + if 'subdir' in objects[-1]: + SubElement(elem, 'NextMarker').text = \ + objects[-1]['subdir'] + else: + if is_truncated: + if 'name' in objects[-1]: + SubElement(elem, 'NextContinuationToken').text = \ + b64encode(objects[-1]['name']) + if 'subdir' in objects[-1]: + SubElement(elem, 'NextContinuationToken').text = \ + b64encode(objects[-1]['subdir']) + if 'continuation-token' in req.params: + SubElement(elem, 'ContinuationToken').text = \ + req.params['continuation-token'] + if 'start-after' in req.params: + SubElement(elem, 'StartAfter').text = \ + req.params['start-after'] + SubElement(elem, 'KeyCount').text = str(len(objects)) + + SubElement(elem, 'MaxKeys').text = str(tag_max_keys) + + if 'delimiter' in req.params: + SubElement(elem, 'Delimiter').text = req.params['delimiter'] + + if encoding_type is not None: + SubElement(elem, 'EncodingType').text = encoding_type + + SubElement(elem, 'IsTruncated').text = \ + 'true' if is_truncated else 'false' + + for o in objects: + if 'subdir' not in o: + contents = SubElement(elem, 'Contents') + SubElement(contents, 'Key').text = o['name'] + SubElement(contents, 'LastModified').text = \ + o['last_modified'][:-3] + 'Z' + SubElement(contents, 'ETag').text = '"%s"' % o['hash'] + SubElement(contents, 'Size').text = str(o['bytes']) + if fetch_owner or not is_v2: + owner = SubElement(contents, 'Owner') + SubElement(owner, 'ID').text = req.user_id + SubElement(owner, 'DisplayName').text = req.user_id + SubElement(contents, 'StorageClass').text = 'STANDARD' + + for o in objects: + if 'subdir' in o: + common_prefixes = SubElement(elem, 'CommonPrefixes') + SubElement(common_prefixes, 'Prefix').text = o['subdir'] + + body = tostring(elem, encoding_type=encoding_type) + + return HTTPOk(body=body, content_type='application/xml') + + @public + def PUT(self, req): + """ + Handle PUT Bucket request + """ + xml = req.xml(MAX_PUT_BUCKET_BODY_SIZE) + if xml: + # check location + try: + elem = fromstring( + xml, 'CreateBucketConfiguration', self.logger) + location = elem.find('./LocationConstraint').text + except (XMLSyntaxError, DocumentInvalid): + raise MalformedXML() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.logger.error(e) + raise exc_type, exc_value, exc_traceback + + if location != self.conf.location: + # s3api cannot support multiple regions currently. + raise InvalidLocationConstraint() + + resp = req.get_response(self.app) + + resp.status = HTTP_OK + resp.location = '/' + req.container_name + + return resp + + @public + def DELETE(self, req): + """ + Handle DELETE Bucket request + """ + if self.conf.allow_multipart_uploads: + self._delete_segments_bucket(req) + resp = req.get_response(self.app) + return resp + + @public + def POST(self, req): + """ + Handle POST Bucket request + """ + raise S3NotImplemented() diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/__init__.py swift-2.18.0/swift/common/middleware/s3api/controllers/__init__.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,52 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.middleware.s3api.controllers.base import Controller, \ + UnsupportedController +from swift.common.middleware.s3api.controllers.service import ServiceController +from swift.common.middleware.s3api.controllers.bucket import BucketController +from swift.common.middleware.s3api.controllers.obj import ObjectController + +from swift.common.middleware.s3api.controllers.acl import AclController +from swift.common.middleware.s3api.controllers.s3_acl import S3AclController +from swift.common.middleware.s3api.controllers.multi_delete import \ + MultiObjectDeleteController +from swift.common.middleware.s3api.controllers.multi_upload import \ + UploadController, PartController, UploadsController +from swift.common.middleware.s3api.controllers.location import \ + LocationController +from swift.common.middleware.s3api.controllers.logging import \ + LoggingStatusController +from swift.common.middleware.s3api.controllers.versioning import \ + VersioningController + +__all__ = [ + 'Controller', + 'ServiceController', + 'BucketController', + 'ObjectController', + + 'AclController', + 'S3AclController', + 'MultiObjectDeleteController', + 'PartController', + 'UploadsController', + 'UploadController', + 'LocationController', + 'LoggingStatusController', + 'VersioningController', + + 'UnsupportedController', +] diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/location.py swift-2.18.0/swift/common/middleware/s3api/controllers/location.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/location.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/location.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,42 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.utils import public + +from swift.common.middleware.s3api.controllers.base import Controller, \ + bucket_operation +from swift.common.middleware.s3api.etree import Element, tostring +from swift.common.middleware.s3api.s3response import HTTPOk + + +class LocationController(Controller): + """ + Handles GET Bucket location, which is logged as a LOCATION operation in the + S3 server log. + """ + @public + @bucket_operation + def GET(self, req): + """ + Handles GET Bucket location. + """ + req.get_response(self.app, method='HEAD') + + elem = Element('LocationConstraint') + if self.conf.location != 'US': + elem.text = self.conf.location + body = tostring(elem) + + return HTTPOk(body=body, content_type='application/xml') diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/logging.py swift-2.18.0/swift/common/middleware/s3api/controllers/logging.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/logging.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/logging.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,54 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.utils import public + +from swift.common.middleware.s3api.controllers.base import Controller, \ + bucket_operation +from swift.common.middleware.s3api.etree import Element, tostring +from swift.common.middleware.s3api.s3response import HTTPOk, S3NotImplemented, \ + NoLoggingStatusForKey + + +class LoggingStatusController(Controller): + """ + Handles the following APIs: + + * GET Bucket logging + * PUT Bucket logging + + Those APIs are logged as LOGGING_STATUS operations in the S3 server log. + """ + @public + @bucket_operation(err_resp=NoLoggingStatusForKey) + def GET(self, req): + """ + Handles GET Bucket logging. + """ + req.get_response(self.app, method='HEAD') + + # logging disabled + elem = Element('BucketLoggingStatus') + body = tostring(elem) + + return HTTPOk(body=body, content_type='application/xml') + + @public + @bucket_operation(err_resp=NoLoggingStatusForKey) + def PUT(self, req): + """ + Handles PUT Bucket logging. + """ + raise S3NotImplemented() diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/multi_delete.py swift-2.18.0/swift/common/middleware/s3api/controllers/multi_delete.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/multi_delete.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/multi_delete.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,126 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +from swift.common.utils import public + +from swift.common.middleware.s3api.controllers.base import Controller, \ + bucket_operation +from swift.common.middleware.s3api.etree import Element, SubElement, \ + fromstring, tostring, XMLSyntaxError, DocumentInvalid +from swift.common.middleware.s3api.s3response import HTTPOk, S3NotImplemented, \ + NoSuchKey, ErrorResponse, MalformedXML, UserKeyMustBeSpecified, \ + AccessDenied, MissingRequestBodyError + +MAX_MULTI_DELETE_BODY_SIZE = 61365 + + +class MultiObjectDeleteController(Controller): + """ + Handles Delete Multiple Objects, which is logged as a MULTI_OBJECT_DELETE + operation in the S3 server log. + """ + def _gen_error_body(self, error, elem, delete_list): + for key, version in delete_list: + if version is not None: + # TODO: delete the specific version of the object + raise S3NotImplemented() + + error_elem = SubElement(elem, 'Error') + SubElement(error_elem, 'Key').text = key + SubElement(error_elem, 'Code').text = error.__class__.__name__ + SubElement(error_elem, 'Message').text = error._msg + + return tostring(elem) + + @public + @bucket_operation + def POST(self, req): + """ + Handles Delete Multiple Objects. + """ + def object_key_iter(elem): + for obj in elem.iterchildren('Object'): + key = obj.find('./Key').text + if not key: + raise UserKeyMustBeSpecified() + version = obj.find('./VersionId') + if version is not None: + version = version.text + + yield key, version + + try: + xml = req.xml(MAX_MULTI_DELETE_BODY_SIZE) + if not xml: + raise MissingRequestBodyError() + + req.check_md5(xml) + elem = fromstring(xml, 'Delete', self.logger) + + quiet = elem.find('./Quiet') + if quiet is not None and quiet.text.lower() == 'true': + self.quiet = True + else: + self.quiet = False + + delete_list = list(object_key_iter(elem)) + if len(delete_list) > self.conf.max_multi_delete_objects: + raise MalformedXML() + except (XMLSyntaxError, DocumentInvalid): + raise MalformedXML() + except ErrorResponse: + raise + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.logger.error(e) + raise exc_type, exc_value, exc_traceback + + elem = Element('DeleteResult') + + # check bucket existence + try: + req.get_response(self.app, 'HEAD') + except AccessDenied as error: + body = self._gen_error_body(error, elem, delete_list) + return HTTPOk(body=body) + + for key, version in delete_list: + if version is not None: + # TODO: delete the specific version of the object + raise S3NotImplemented() + + req.object_name = key + + try: + query = req.gen_multipart_manifest_delete_query(self.app) + req.get_response(self.app, method='DELETE', query=query) + except NoSuchKey: + pass + except ErrorResponse as e: + error = SubElement(elem, 'Error') + SubElement(error, 'Key').text = key + SubElement(error, 'Code').text = e.__class__.__name__ + SubElement(error, 'Message').text = e._msg + continue + + if not self.quiet: + deleted = SubElement(elem, 'Deleted') + SubElement(deleted, 'Key').text = key + + body = tostring(elem) + + return HTTPOk(body=body) diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/multi_upload.py swift-2.18.0/swift/common/middleware/s3api/controllers/multi_upload.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/multi_upload.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/multi_upload.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,671 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Implementation of S3 Multipart Upload. + +This module implements S3 Multipart Upload APIs with the Swift SLO feature. +The following explains how S3api uses swift container and objects to store S3 +upload information: + +----------------- +[bucket]+segments +----------------- + +A container to store upload information. [bucket] is the original bucket +where multipart upload is initiated. + +----------------------------- +[bucket]+segments/[upload_id] +----------------------------- + +A object of the ongoing upload id. The object is empty and used for +checking the target upload status. If the object exists, it means that the +upload is initiated but not either completed or aborted. + +------------------------------------------- +[bucket]+segments/[upload_id]/[part_number] +------------------------------------------- + +The last suffix is the part number under the upload id. When the client uploads +the parts, they will be stored in the namespace with +[bucket]+segments/[upload_id]/[part_number]. + +Example listing result in the [bucket]+segments container:: + + [bucket]+segments/[upload_id1] # upload id object for upload_id1 + [bucket]+segments/[upload_id1]/1 # part object for upload_id1 + [bucket]+segments/[upload_id1]/2 # part object for upload_id1 + [bucket]+segments/[upload_id1]/3 # part object for upload_id1 + [bucket]+segments/[upload_id2] # upload id object for upload_id2 + [bucket]+segments/[upload_id2]/1 # part object for upload_id2 + [bucket]+segments/[upload_id2]/2 # part object for upload_id2 + . + . + +Those part objects are directly used as segments of a Swift +Static Large Object when the multipart upload is completed. + +""" + +import os +import re +import sys + +from swift.common.swob import Range +from swift.common.utils import json, public +from swift.common.db import utf8encode + +from six.moves.urllib.parse import urlparse # pylint: disable=F0401 + +from swift.common.middleware.s3api.controllers.base import Controller, \ + bucket_operation, object_operation, check_container_existence +from swift.common.middleware.s3api.s3response import InvalidArgument, \ + ErrorResponse, MalformedXML, \ + InvalidPart, BucketAlreadyExists, EntityTooSmall, InvalidPartOrder, \ + InvalidRequest, HTTPOk, HTTPNoContent, NoSuchKey, NoSuchUpload, \ + NoSuchBucket +from swift.common.middleware.s3api.exception import BadSwiftRequest +from swift.common.middleware.s3api.utils import unique_id, \ + MULTIUPLOAD_SUFFIX, S3Timestamp, sysmeta_header +from swift.common.middleware.s3api.etree import Element, SubElement, \ + fromstring, tostring, XMLSyntaxError, DocumentInvalid + +DEFAULT_MAX_PARTS_LISTING = 1000 +DEFAULT_MAX_UPLOADS = 1000 + +MAX_COMPLETE_UPLOAD_BODY_SIZE = 2048 * 1024 + + +def _get_upload_info(req, app, upload_id): + + container = req.container_name + MULTIUPLOAD_SUFFIX + obj = '%s/%s' % (req.object_name, upload_id) + + try: + return req.get_response(app, 'HEAD', container=container, obj=obj) + except NoSuchKey: + raise NoSuchUpload(upload_id=upload_id) + + +def _check_upload_info(req, app, upload_id): + + _get_upload_info(req, app, upload_id) + + +class PartController(Controller): + """ + Handles the following APIs: + + * Upload Part + * Upload Part - Copy + + Those APIs are logged as PART operations in the S3 server log. + """ + @public + @object_operation + @check_container_existence + def PUT(self, req): + """ + Handles Upload Part and Upload Part Copy. + """ + + if 'uploadId' not in req.params: + raise InvalidArgument('ResourceType', 'partNumber', + 'Unexpected query string parameter') + + try: + part_number = int(req.params['partNumber']) + if part_number < 1 or self.conf.max_upload_part_num < part_number: + raise Exception() + except Exception: + err_msg = 'Part number must be an integer between 1 and %d,' \ + ' inclusive' % self.conf.max_upload_part_num + raise InvalidArgument('partNumber', req.params['partNumber'], + err_msg) + + upload_id = req.params['uploadId'] + _check_upload_info(req, self.app, upload_id) + + req.container_name += MULTIUPLOAD_SUFFIX + req.object_name = '%s/%s/%d' % (req.object_name, upload_id, + part_number) + + req_timestamp = S3Timestamp.now() + req.headers['X-Timestamp'] = req_timestamp.internal + source_resp = req.check_copy_source(self.app) + if 'X-Amz-Copy-Source' in req.headers and \ + 'X-Amz-Copy-Source-Range' in req.headers: + rng = req.headers['X-Amz-Copy-Source-Range'] + + header_valid = True + try: + rng_obj = Range(rng) + if len(rng_obj.ranges) != 1: + header_valid = False + except ValueError: + header_valid = False + if not header_valid: + err_msg = ('The x-amz-copy-source-range value must be of the ' + 'form bytes=first-last where first and last are ' + 'the zero-based offsets of the first and last ' + 'bytes to copy') + raise InvalidArgument('x-amz-source-range', rng, err_msg) + + source_size = int(source_resp.headers['Content-Length']) + if not rng_obj.ranges_for_length(source_size): + err_msg = ('Range specified is not valid for source object ' + 'of size: %s' % source_size) + raise InvalidArgument('x-amz-source-range', rng, err_msg) + + req.headers['Range'] = rng + del req.headers['X-Amz-Copy-Source-Range'] + resp = req.get_response(self.app) + + if 'X-Amz-Copy-Source' in req.headers: + resp.append_copy_resp_body(req.controller_name, + req_timestamp.s3xmlformat) + + resp.status = 200 + return resp + + +class UploadsController(Controller): + """ + Handles the following APIs: + + * List Multipart Uploads + * Initiate Multipart Upload + + Those APIs are logged as UPLOADS operations in the S3 server log. + """ + @public + @bucket_operation(err_resp=InvalidRequest, + err_msg="Key is not expected for the GET method " + "?uploads subresource") + @check_container_existence + def GET(self, req): + """ + Handles List Multipart Uploads + """ + + def separate_uploads(uploads, prefix, delimiter): + """ + separate_uploads will separate uploads into non_delimited_uploads + (a subset of uploads) and common_prefixes according to the + specified delimiter. non_delimited_uploads is a list of uploads + which exclude the delimiter. common_prefixes is a set of prefixes + prior to the specified delimiter. Note that the prefix in the + common_prefixes includes the delimiter itself. + + i.e. if '/' delimiter specified and then the uploads is consists of + ['foo', 'foo/bar'], this function will return (['foo'], ['foo/']). + + :param uploads: A list of uploads dictionary + :param prefix: A string of prefix reserved on the upload path. + (i.e. the delimiter must be searched behind the + prefix) + :param delimiter: A string of delimiter to split the path in each + upload + + :return (non_delimited_uploads, common_prefixes) + """ + (prefix, delimiter) = \ + utf8encode(prefix, delimiter) + non_delimited_uploads = [] + common_prefixes = set() + for upload in uploads: + key = upload['key'] + end = key.find(delimiter, len(prefix)) + if end >= 0: + common_prefix = key[:end + len(delimiter)] + common_prefixes.add(common_prefix) + else: + non_delimited_uploads.append(upload) + return non_delimited_uploads, sorted(common_prefixes) + + encoding_type = req.params.get('encoding-type') + if encoding_type is not None and encoding_type != 'url': + err_msg = 'Invalid Encoding Method specified in Request' + raise InvalidArgument('encoding-type', encoding_type, err_msg) + + keymarker = req.params.get('key-marker', '') + uploadid = req.params.get('upload-id-marker', '') + maxuploads = req.get_validated_param( + 'max-uploads', DEFAULT_MAX_UPLOADS, DEFAULT_MAX_UPLOADS) + + query = { + 'format': 'json', + 'limit': maxuploads + 1, + } + + if uploadid and keymarker: + query.update({'marker': '%s/%s' % (keymarker, uploadid)}) + elif keymarker: + query.update({'marker': '%s/~' % (keymarker)}) + if 'prefix' in req.params: + query.update({'prefix': req.params['prefix']}) + + container = req.container_name + MULTIUPLOAD_SUFFIX + try: + resp = req.get_response(self.app, container=container, query=query) + objects = json.loads(resp.body) + except NoSuchBucket: + # Assume NoSuchBucket as no uploads + objects = [] + + def object_to_upload(object_info): + obj, upid = object_info['name'].rsplit('/', 1) + obj_dict = {'key': obj, + 'upload_id': upid, + 'last_modified': object_info['last_modified']} + return obj_dict + + # uploads is a list consists of dict, {key, upload_id, last_modified} + # Note that pattern matcher will drop whole segments objects like as + # object_name/upload_id/1. + pattern = re.compile('/[0-9]+$') + uploads = [object_to_upload(obj) for obj in objects if + pattern.search(obj.get('name', '')) is None] + + prefixes = [] + if 'delimiter' in req.params: + prefix = req.params.get('prefix', '') + delimiter = req.params['delimiter'] + uploads, prefixes = \ + separate_uploads(uploads, prefix, delimiter) + + if len(uploads) > maxuploads: + uploads = uploads[:maxuploads] + truncated = True + else: + truncated = False + + nextkeymarker = '' + nextuploadmarker = '' + if len(uploads) > 1: + nextuploadmarker = uploads[-1]['upload_id'] + nextkeymarker = uploads[-1]['key'] + + result_elem = Element('ListMultipartUploadsResult') + SubElement(result_elem, 'Bucket').text = req.container_name + SubElement(result_elem, 'KeyMarker').text = keymarker + SubElement(result_elem, 'UploadIdMarker').text = uploadid + SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker + SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker + if 'delimiter' in req.params: + SubElement(result_elem, 'Delimiter').text = \ + req.params['delimiter'] + if 'prefix' in req.params: + SubElement(result_elem, 'Prefix').text = req.params['prefix'] + SubElement(result_elem, 'MaxUploads').text = str(maxuploads) + if encoding_type is not None: + SubElement(result_elem, 'EncodingType').text = encoding_type + SubElement(result_elem, 'IsTruncated').text = \ + 'true' if truncated else 'false' + + # TODO: don't show uploads which are initiated before this bucket is + # created. + for u in uploads: + upload_elem = SubElement(result_elem, 'Upload') + SubElement(upload_elem, 'Key').text = u['key'] + SubElement(upload_elem, 'UploadId').text = u['upload_id'] + initiator_elem = SubElement(upload_elem, 'Initiator') + SubElement(initiator_elem, 'ID').text = req.user_id + SubElement(initiator_elem, 'DisplayName').text = req.user_id + owner_elem = SubElement(upload_elem, 'Owner') + SubElement(owner_elem, 'ID').text = req.user_id + SubElement(owner_elem, 'DisplayName').text = req.user_id + SubElement(upload_elem, 'StorageClass').text = 'STANDARD' + SubElement(upload_elem, 'Initiated').text = \ + u['last_modified'][:-3] + 'Z' + + for p in prefixes: + elem = SubElement(result_elem, 'CommonPrefixes') + SubElement(elem, 'Prefix').text = p + + body = tostring(result_elem, encoding_type=encoding_type) + + return HTTPOk(body=body, content_type='application/xml') + + @public + @object_operation + @check_container_existence + def POST(self, req): + """ + Handles Initiate Multipart Upload. + """ + + # Create a unique S3 upload id from UUID to avoid duplicates. + upload_id = unique_id() + + container = req.container_name + MULTIUPLOAD_SUFFIX + content_type = req.headers.get('Content-Type') + if content_type: + req.headers[sysmeta_header('object', 'has-content-type')] = 'yes' + req.headers[ + sysmeta_header('object', 'content-type')] = content_type + else: + req.headers[sysmeta_header('object', 'has-content-type')] = 'no' + req.headers['Content-Type'] = 'application/directory' + + try: + req.get_response(self.app, 'PUT', container, '') + except BucketAlreadyExists: + pass + + obj = '%s/%s' % (req.object_name, upload_id) + + req.headers.pop('Etag', None) + req.headers.pop('Content-Md5', None) + + req.get_response(self.app, 'PUT', container, obj, body='') + + result_elem = Element('InitiateMultipartUploadResult') + SubElement(result_elem, 'Bucket').text = req.container_name + SubElement(result_elem, 'Key').text = req.object_name + SubElement(result_elem, 'UploadId').text = upload_id + + body = tostring(result_elem) + + return HTTPOk(body=body, content_type='application/xml') + + +class UploadController(Controller): + """ + Handles the following APIs: + + * List Parts + * Abort Multipart Upload + * Complete Multipart Upload + + Those APIs are logged as UPLOAD operations in the S3 server log. + """ + @public + @object_operation + @check_container_existence + def GET(self, req): + """ + Handles List Parts. + """ + def filter_part_num_marker(o): + try: + num = int(os.path.basename(o['name'])) + return num > part_num_marker + except ValueError: + return False + + encoding_type = req.params.get('encoding-type') + if encoding_type is not None and encoding_type != 'url': + err_msg = 'Invalid Encoding Method specified in Request' + raise InvalidArgument('encoding-type', encoding_type, err_msg) + + upload_id = req.params['uploadId'] + _check_upload_info(req, self.app, upload_id) + + maxparts = req.get_validated_param( + 'max-parts', DEFAULT_MAX_PARTS_LISTING, + self.conf.max_parts_listing) + part_num_marker = req.get_validated_param( + 'part-number-marker', 0) + + query = { + 'format': 'json', + 'limit': maxparts + 1, + 'prefix': '%s/%s/' % (req.object_name, upload_id), + 'delimiter': '/' + } + + container = req.container_name + MULTIUPLOAD_SUFFIX + resp = req.get_response(self.app, container=container, obj='', + query=query) + objects = json.loads(resp.body) + + last_part = 0 + + # If the caller requested a list starting at a specific part number, + # construct a sub-set of the object list. + objList = filter(filter_part_num_marker, objects) + + # pylint: disable-msg=E1103 + objList.sort(key=lambda o: int(o['name'].split('/')[-1])) + + if len(objList) > maxparts: + objList = objList[:maxparts] + truncated = True + else: + truncated = False + # TODO: We have to retrieve object list again when truncated is True + # and some objects filtered by invalid name because there could be no + # enough objects for limit defined by maxparts. + + if objList: + o = objList[-1] + last_part = os.path.basename(o['name']) + + result_elem = Element('ListPartsResult') + SubElement(result_elem, 'Bucket').text = req.container_name + SubElement(result_elem, 'Key').text = req.object_name + SubElement(result_elem, 'UploadId').text = upload_id + + initiator_elem = SubElement(result_elem, 'Initiator') + SubElement(initiator_elem, 'ID').text = req.user_id + SubElement(initiator_elem, 'DisplayName').text = req.user_id + owner_elem = SubElement(result_elem, 'Owner') + SubElement(owner_elem, 'ID').text = req.user_id + SubElement(owner_elem, 'DisplayName').text = req.user_id + + SubElement(result_elem, 'StorageClass').text = 'STANDARD' + SubElement(result_elem, 'PartNumberMarker').text = str(part_num_marker) + SubElement(result_elem, 'NextPartNumberMarker').text = str(last_part) + SubElement(result_elem, 'MaxParts').text = str(maxparts) + if 'encoding-type' in req.params: + SubElement(result_elem, 'EncodingType').text = \ + req.params['encoding-type'] + SubElement(result_elem, 'IsTruncated').text = \ + 'true' if truncated else 'false' + + for i in objList: + part_elem = SubElement(result_elem, 'Part') + SubElement(part_elem, 'PartNumber').text = i['name'].split('/')[-1] + SubElement(part_elem, 'LastModified').text = \ + i['last_modified'][:-3] + 'Z' + SubElement(part_elem, 'ETag').text = '"%s"' % i['hash'] + SubElement(part_elem, 'Size').text = str(i['bytes']) + + body = tostring(result_elem, encoding_type=encoding_type) + + return HTTPOk(body=body, content_type='application/xml') + + @public + @object_operation + @check_container_existence + def DELETE(self, req): + """ + Handles Abort Multipart Upload. + """ + upload_id = req.params['uploadId'] + _check_upload_info(req, self.app, upload_id) + + # First check to see if this multi-part upload was already + # completed. Look in the primary container, if the object exists, + # then it was completed and we return an error here. + container = req.container_name + MULTIUPLOAD_SUFFIX + obj = '%s/%s' % (req.object_name, upload_id) + req.get_response(self.app, container=container, obj=obj) + + # The completed object was not found so this + # must be a multipart upload abort. + # We must delete any uploaded segments for this UploadID and then + # delete the object in the main container as well + query = { + 'format': 'json', + 'prefix': '%s/%s/' % (req.object_name, upload_id), + 'delimiter': '/', + } + + resp = req.get_response(self.app, 'GET', container, '', query=query) + + # Iterate over the segment objects and delete them individually + objects = json.loads(resp.body) + for o in objects: + container = req.container_name + MULTIUPLOAD_SUFFIX + req.get_response(self.app, container=container, obj=o['name']) + + return HTTPNoContent() + + @public + @object_operation + @check_container_existence + def POST(self, req): + """ + Handles Complete Multipart Upload. + """ + upload_id = req.params['uploadId'] + resp = _get_upload_info(req, self.app, upload_id) + headers = {} + for key, val in resp.headers.iteritems(): + _key = key.lower() + if _key.startswith('x-amz-meta-'): + headers['x-object-meta-' + _key[11:]] = val + + hct_header = sysmeta_header('object', 'has-content-type') + if resp.sysmeta_headers.get(hct_header) == 'yes': + content_type = resp.sysmeta_headers.get( + sysmeta_header('object', 'content-type')) + elif hct_header in resp.sysmeta_headers: + # has-content-type is present but false, so no content type was + # set on initial upload. In that case, we won't set one on our + # PUT request. Swift will end up guessing one based on the + # object name. + content_type = None + else: + content_type = resp.headers.get('Content-Type') + + if content_type: + headers['Content-Type'] = content_type + + # Query for the objects in the segments area to make sure it completed + query = { + 'format': 'json', + 'prefix': '%s/%s/' % (req.object_name, upload_id), + 'delimiter': '/' + } + + container = req.container_name + MULTIUPLOAD_SUFFIX + resp = req.get_response(self.app, 'GET', container, '', query=query) + objinfo = json.loads(resp.body) + objtable = dict((o['name'], + {'path': '/'.join(['', container, o['name']]), + 'etag': o['hash'], + 'size_bytes': o['bytes']}) for o in objinfo) + + manifest = [] + previous_number = 0 + try: + xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE) + if not xml: + raise InvalidRequest(msg='You must specify at least one part') + + complete_elem = fromstring( + xml, 'CompleteMultipartUpload', self.logger) + for part_elem in complete_elem.iterchildren('Part'): + part_number = int(part_elem.find('./PartNumber').text) + + if part_number <= previous_number: + raise InvalidPartOrder(upload_id=upload_id) + previous_number = part_number + + etag = part_elem.find('./ETag').text + if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"': + # strip double quotes + etag = etag[1:-1] + + info = objtable.get("%s/%s/%s" % (req.object_name, upload_id, + part_number)) + if info is None or info['etag'] != etag: + raise InvalidPart(upload_id=upload_id, + part_number=part_number) + + info['size_bytes'] = int(info['size_bytes']) + manifest.append(info) + except (XMLSyntaxError, DocumentInvalid): + raise MalformedXML() + except ErrorResponse: + raise + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + self.logger.error(e) + raise exc_type, exc_value, exc_traceback + + # Check the size of each segment except the last and make sure they are + # all more than the minimum upload chunk size + for info in manifest[:-1]: + if info['size_bytes'] < self.conf.min_segment_size: + raise EntityTooSmall() + + try: + # TODO: add support for versioning + if manifest: + resp = req.get_response(self.app, 'PUT', + body=json.dumps(manifest), + query={'multipart-manifest': 'put'}, + headers=headers) + else: + # the upload must have consisted of a single zero-length part + # just write it directly + resp = req.get_response(self.app, 'PUT', body='', + headers=headers) + except BadSwiftRequest as e: + msg = str(e) + expected_msg = 'too small; each segment must be at least 1 byte' + if expected_msg in msg: + # FIXME: AWS S3 allows a smaller object than 5 MB if there is + # only one part. Use a COPY request to copy the part object + # from the segments container instead. + raise EntityTooSmall(msg) + else: + raise + + # clean up the multipart-upload record + obj = '%s/%s' % (req.object_name, upload_id) + try: + req.get_response(self.app, 'DELETE', container, obj) + except NoSuchKey: + pass # We know that this existed long enough for us to HEAD + + result_elem = Element('CompleteMultipartUploadResult') + + # NOTE: boto with sig v4 appends port to HTTP_HOST value at the + # request header when the port is non default value and it makes + # req.host_url like as http://localhost:8080:8080/path + # that obviously invalid. Probably it should be resolved at + # swift.common.swob though, tentatively we are parsing and + # reconstructing the correct host_url info here. + # in detail, https://github.com/boto/boto/pull/3513 + parsed_url = urlparse(req.host_url) + host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname) + if parsed_url.port: + host_url += ':%s' % parsed_url.port + + SubElement(result_elem, 'Location').text = host_url + req.path + SubElement(result_elem, 'Bucket').text = req.container_name + SubElement(result_elem, 'Key').text = req.object_name + SubElement(result_elem, 'ETag').text = resp.etag + + resp.body = tostring(result_elem) + resp.status = 200 + resp.content_type = "application/xml" + + return resp diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/obj.py swift-2.18.0/swift/common/middleware/s3api/controllers/obj.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/obj.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/obj.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,150 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +from swift.common.http import HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_NO_CONTENT +from swift.common.swob import Range, content_range_header_value +from swift.common.utils import public + +from swift.common.middleware.s3api.utils import S3Timestamp +from swift.common.middleware.s3api.controllers.base import Controller +from swift.common.middleware.s3api.s3response import S3NotImplemented, \ + InvalidRange, NoSuchKey, InvalidArgument + + +class ObjectController(Controller): + """ + Handles requests on objects + """ + def _gen_head_range_resp(self, req_range, resp): + """ + Swift doesn't handle Range header for HEAD requests. + So, this method generates HEAD range response from HEAD response. + S3 return HEAD range response, if the value of range satisfies the + conditions which are described in the following document. + - http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 + """ + length = long(resp.headers.get('Content-Length')) + + try: + content_range = Range(req_range) + except ValueError: + return resp + + ranges = content_range.ranges_for_length(length) + if ranges == []: + raise InvalidRange() + elif ranges: + if len(ranges) == 1: + start, end = ranges[0] + resp.headers['Content-Range'] = \ + content_range_header_value(start, end, length) + resp.headers['Content-Length'] = (end - start) + resp.status = HTTP_PARTIAL_CONTENT + return resp + else: + # TODO: It is necessary to confirm whether need to respond to + # multi-part response.(e.g. bytes=0-10,20-30) + pass + + return resp + + def GETorHEAD(self, req): + resp = req.get_response(self.app) + + if req.method == 'HEAD': + resp.app_iter = None + + for key in ('content-type', 'content-language', 'expires', + 'cache-control', 'content-disposition', + 'content-encoding'): + if 'response-' + key in req.params: + resp.headers[key] = req.params['response-' + key] + + return resp + + @public + def HEAD(self, req): + """ + Handle HEAD Object request + """ + resp = self.GETorHEAD(req) + + if 'range' in req.headers: + req_range = req.headers['range'] + resp = self._gen_head_range_resp(req_range, resp) + + return resp + + @public + def GET(self, req): + """ + Handle GET Object request + """ + return self.GETorHEAD(req) + + @public + def PUT(self, req): + """ + Handle PUT Object and PUT Object (Copy) request + """ + # set X-Timestamp by s3api to use at copy resp body + req_timestamp = S3Timestamp.now() + req.headers['X-Timestamp'] = req_timestamp.internal + if all(h in req.headers + for h in ('X-Amz-Copy-Source', 'X-Amz-Copy-Source-Range')): + raise InvalidArgument('x-amz-copy-source-range', + req.headers['X-Amz-Copy-Source-Range'], + 'Illegal copy header') + req.check_copy_source(self.app) + resp = req.get_response(self.app) + + if 'X-Amz-Copy-Source' in req.headers: + resp.append_copy_resp_body(req.controller_name, + req_timestamp.s3xmlformat) + + # delete object metadata from response + for key in list(resp.headers.keys()): + if key.startswith('x-amz-meta-'): + del resp.headers[key] + + resp.status = HTTP_OK + return resp + + @public + def POST(self, req): + raise S3NotImplemented() + + @public + def DELETE(self, req): + """ + Handle DELETE Object request + """ + try: + query = req.gen_multipart_manifest_delete_query(self.app) + req.headers['Content-Type'] = None # Ignore client content-type + resp = req.get_response(self.app, query=query) + if query and resp.status_int == HTTP_OK: + for chunk in resp.app_iter: + pass # drain the bulk-deleter response + resp.status = HTTP_NO_CONTENT + resp.body = '' + except NoSuchKey: + # expect to raise NoSuchBucket when the bucket doesn't exist + exc_type, exc_value, exc_traceback = sys.exc_info() + req.get_container_info(self.app) + raise exc_type, exc_value, exc_traceback + return resp diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/s3_acl.py swift-2.18.0/swift/common/middleware/s3api/controllers/s3_acl.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/s3_acl.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/s3_acl.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,67 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from urllib import quote +from swift.common.utils import public + +from swift.common.middleware.s3api.controllers.base import Controller +from swift.common.middleware.s3api.s3response import HTTPOk +from swift.common.middleware.s3api.etree import tostring + + +class S3AclController(Controller): + """ + Handles the following APIs: + + * GET Bucket acl + * PUT Bucket acl + * GET Object acl + * PUT Object acl + + Those APIs are logged as ACL operations in the S3 server log. + """ + @public + def GET(self, req): + """ + Handles GET Bucket acl and GET Object acl. + """ + resp = req.get_response(self.app) + + acl = resp.object_acl if req.is_object_request else resp.bucket_acl + + resp = HTTPOk() + resp.body = tostring(acl.elem()) + + return resp + + @public + def PUT(self, req): + """ + Handles PUT Bucket acl and PUT Object acl. + """ + if req.is_object_request: + headers = {} + src_path = '/%s/%s' % (req.container_name, req.object_name) + + # object-sysmeta' can be updated by 'Copy' method, + # but can not be by 'POST' method. + # So headers['X-Copy-From'] for copy request is added here. + headers['X-Copy-From'] = quote(src_path) + headers['Content-Length'] = 0 + req.get_response(self.app, 'PUT', headers=headers) + else: + req.get_response(self.app, 'POST') + + return HTTPOk() diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/service.py swift-2.18.0/swift/common/middleware/s3api/controllers/service.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/service.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/service.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,68 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.utils import json, public + +from swift.common.middleware.s3api.controllers.base import Controller +from swift.common.middleware.s3api.etree import Element, SubElement, tostring +from swift.common.middleware.s3api.s3response import HTTPOk, AccessDenied, \ + NoSuchBucket +from swift.common.middleware.s3api.utils import validate_bucket_name + + +class ServiceController(Controller): + """ + Handles account level requests. + """ + @public + def GET(self, req): + """ + Handle GET Service request + """ + resp = req.get_response(self.app, query={'format': 'json'}) + + containers = json.loads(resp.body) + + containers = filter( + lambda item: validate_bucket_name( + item['name'], self.conf.dns_compliant_bucket_names), + containers) + + # we don't keep the creation time of a bucket (s3cmd doesn't + # work without that) so we use something bogus. + elem = Element('ListAllMyBucketsResult') + + owner = SubElement(elem, 'Owner') + SubElement(owner, 'ID').text = req.user_id + SubElement(owner, 'DisplayName').text = req.user_id + + buckets = SubElement(elem, 'Buckets') + for c in containers: + if self.conf.s3_acl and self.conf.check_bucket_owner: + try: + req.get_response(self.app, 'HEAD', c['name']) + except AccessDenied: + continue + except NoSuchBucket: + continue + + bucket = SubElement(buckets, 'Bucket') + SubElement(bucket, 'Name').text = c['name'] + SubElement(bucket, 'CreationDate').text = \ + '2009-02-03T16:45:09.000Z' + + body = tostring(elem) + + return HTTPOk(content_type='application/xml', body=body) diff -Nru swift-2.17.0/swift/common/middleware/s3api/controllers/versioning.py swift-2.18.0/swift/common/middleware/s3api/controllers/versioning.py --- swift-2.17.0/swift/common/middleware/s3api/controllers/versioning.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/controllers/versioning.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,53 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from swift.common.utils import public + +from swift.common.middleware.s3api.controllers.base import Controller, \ + bucket_operation +from swift.common.middleware.s3api.etree import Element, tostring +from swift.common.middleware.s3api.s3response import HTTPOk, S3NotImplemented + + +class VersioningController(Controller): + """ + Handles the following APIs: + + * GET Bucket versioning + * PUT Bucket versioning + + Those APIs are logged as VERSIONING operations in the S3 server log. + """ + @public + @bucket_operation + def GET(self, req): + """ + Handles GET Bucket versioning. + """ + req.get_response(self.app, method='HEAD') + + # Just report there is no versioning configured here. + elem = Element('VersioningConfiguration') + body = tostring(elem) + + return HTTPOk(body=body, content_type="text/plain") + + @public + @bucket_operation + def PUT(self, req): + """ + Handles PUT Bucket versioning. + """ + raise S3NotImplemented() diff -Nru swift-2.17.0/swift/common/middleware/s3api/etree.py swift-2.18.0/swift/common/middleware/s3api/etree.py --- swift-2.17.0/swift/common/middleware/s3api/etree.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/etree.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,146 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import lxml.etree +from urllib import quote +from copy import deepcopy +from pkg_resources import resource_stream # pylint: disable-msg=E0611 +import sys + +from swift.common.utils import get_logger +from swift.common.middleware.s3api.exception import S3Exception +from swift.common.middleware.s3api.utils import camel_to_snake, \ + utf8encode, utf8decode + +XMLNS_S3 = 'http://s3.amazonaws.com/doc/2006-03-01/' +XMLNS_XSI = 'http://www.w3.org/2001/XMLSchema-instance' + + +class XMLSyntaxError(S3Exception): + pass + + +class DocumentInvalid(S3Exception): + pass + + +def cleanup_namespaces(elem): + def remove_ns(tag, ns): + if tag.startswith('{%s}' % ns): + tag = tag[len('{%s}' % ns):] + return tag + + if not isinstance(elem.tag, basestring): + # elem is a comment element. + return + + # remove s3 namespace + elem.tag = remove_ns(elem.tag, XMLNS_S3) + + # remove default namespace + if elem.nsmap and None in elem.nsmap: + elem.tag = remove_ns(elem.tag, elem.nsmap[None]) + + for e in elem.iterchildren(): + cleanup_namespaces(e) + + +def fromstring(text, root_tag=None, logger=None): + try: + elem = lxml.etree.fromstring(text, parser) + except lxml.etree.XMLSyntaxError as e: + if logger: + logger.debug(e) + raise XMLSyntaxError(e) + + cleanup_namespaces(elem) + + if root_tag is not None: + # validate XML + try: + path = 'schema/%s.rng' % camel_to_snake(root_tag) + with resource_stream(__name__, path) as rng: + lxml.etree.RelaxNG(file=rng).assertValid(elem) + except IOError as e: + # Probably, the schema file doesn't exist. + exc_type, exc_value, exc_traceback = sys.exc_info() + logger = logger or get_logger({}, log_route='s3api') + logger.error(e) + raise exc_type, exc_value, exc_traceback + except lxml.etree.DocumentInvalid as e: + if logger: + logger.debug(e) + raise DocumentInvalid(e) + + return elem + + +def tostring(tree, encoding_type=None, use_s3ns=True): + if use_s3ns: + nsmap = tree.nsmap.copy() + nsmap[None] = XMLNS_S3 + + root = Element(tree.tag, attrib=tree.attrib, nsmap=nsmap) + root.text = tree.text + root.extend(deepcopy(tree.getchildren())) + tree = root + + if encoding_type == 'url': + tree = deepcopy(tree) + for e in tree.iter(): + # Some elements are not url-encoded even when we specify + # encoding_type=url. + blacklist = ['LastModified', 'ID', 'DisplayName', 'Initiated'] + if e.tag not in blacklist: + if isinstance(e.text, basestring): + e.text = quote(e.text) + + return lxml.etree.tostring(tree, xml_declaration=True, encoding='UTF-8') + + +class _Element(lxml.etree.ElementBase): + """ + Wrapper Element class of lxml.etree.Element to support + a utf-8 encoded non-ascii string as a text. + + Why we need this?: + Original lxml.etree.Element supports only unicode for the text. + It declines maintainability because we have to call a lot of encode/decode + methods to apply account/container/object name (i.e. PATH_INFO) to each + Element instance. When using this class, we can remove such a redundant + codes from swift.common.middleware.s3api middleware. + """ + def __init__(self, *args, **kwargs): + # pylint: disable-msg=E1002 + super(_Element, self).__init__(*args, **kwargs) + + @property + def text(self): + """ + utf-8 wrapper property of lxml.etree.Element.text + """ + return utf8encode(lxml.etree.ElementBase.text.__get__(self)) + + @text.setter + def text(self, value): + lxml.etree.ElementBase.text.__set__(self, utf8decode(value)) + + +parser_lookup = lxml.etree.ElementDefaultClassLookup(element=_Element) +parser = lxml.etree.XMLParser() +parser.set_element_class_lookup(parser_lookup) + +Element = parser.makeelement +SubElement = lxml.etree.SubElement diff -Nru swift-2.17.0/swift/common/middleware/s3api/exception.py swift-2.18.0/swift/common/middleware/s3api/exception.py --- swift-2.17.0/swift/common/middleware/s3api/exception.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/exception.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,36 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class S3Exception(Exception): + pass + + +class NotS3Request(S3Exception): + pass + + +class BadSwiftRequest(S3Exception): + pass + + +class ACLError(S3Exception): + pass + + +class InvalidSubresource(S3Exception): + def __init__(self, resource, cause): + self.resource = resource + self.cause = cause diff -Nru swift-2.17.0/swift/common/middleware/s3api/s3api.py swift-2.18.0/swift/common/middleware/s3api/s3api.py --- swift-2.17.0/swift/common/middleware/s3api/s3api.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/s3api.py 2018-05-30 10:17:09.000000000 +0000 @@ -0,0 +1,273 @@ +# Copyright (c) 2010-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +The s3api middleware will emulate the S3 REST api on top of swift. + +To enable this middleware to your configuration, add the s3api middleware +in front of the auth middleware. See ``proxy-server.conf-sample`` for more +detail and configurable options. + +To set up your client, the access key will be the concatenation of the +account and user strings that should look like test:tester, and the +secret access key is the account password. The host should also point +to the swift storage hostname. + +An example client using the python boto library is as follows:: + + from boto.s3.connection import S3Connection + connection = S3Connection( + aws_access_key_id='test:tester', + aws_secret_access_key='testing', + port=8080, + host='127.0.0.1', + is_secure=False, + calling_format=boto.s3.connection.OrdinaryCallingFormat()) + +---------- +Deployment +---------- + +Proxy-Server Setting +^^^^^^^^^^^^^^^^^^^^ + +Set s3api before your auth in your pipeline in ``proxy-server.conf`` file. +To enable all compatiblity currently supported, you should make sure that +bulk, slo, and your auth middleware are also included in your proxy +pipeline setting. + +Minimum example config is:: + + [pipeline:main] + pipeline = proxy-logging cache s3api tempauth bulk slo proxy-logging + proxy-server + +When using keystone, the config will be:: + + [pipeline:main] + pipeline = proxy-logging cache s3api s3token keystoneauth bulk slo + proxy-logging proxy-server + +.. note:: + ``keystonemiddleware.authtoken`` can be located before/after s3api but + we recommend to put it before s3api because when authtoken is after s3api, + both authtoken and s3token will issue the acceptable token to keystone + (i.e. authenticate twice). + +----------- +Constraints +----------- +Currently, the s3api is being ported from https://github.com/openstack/swift3 +so any existing issues in swift3 are still remaining. Please make sure +descriptions in the example ``proxy-server.conf`` and what happens with the +config, before enabling the options. + +------------- +Supported API +------------- +The compatibility will continue to be improved upstream, you can keep and +eye on compatibility via a check tool build by SwiftStack. See +https://github.com/swiftstack/s3compat in detail. + +""" + +from paste.deploy import loadwsgi + +from swift.common.wsgi import PipelineWrapper, loadcontext + +from swift.common.middleware.s3api.exception import NotS3Request, \ + InvalidSubresource +from swift.common.middleware.s3api.s3request import get_request_class +from swift.common.middleware.s3api.s3response import ErrorResponse, \ + InternalError, MethodNotAllowed, S3ResponseBase, S3NotImplemented +from swift.common.utils import get_logger, register_swift_info, \ + config_true_value, config_positive_int_value +from swift.common.middleware.s3api.utils import Config +from swift.common.middleware.s3api.acl_handlers import get_acl_handler + + +class S3ApiMiddleware(object): + """S3Api: S3 compatibility middleware""" + def __init__(self, app, conf, *args, **kwargs): + self.app = app + self.conf = Config() + + # Set default values if they are not configured + self.conf.allow_no_owner = config_true_value( + conf.get('allow_no_owner', False)) + self.conf.location = conf.get('location', 'US') + self.conf.dns_compliant_bucket_names = config_true_value( + conf.get('dns_compliant_bucket_names', True)) + self.conf.max_bucket_listing = config_positive_int_value( + conf.get('max_bucket_listing', 1000)) + self.conf.max_parts_listing = config_positive_int_value( + conf.get('max_parts_listing', 1000)) + self.conf.max_multi_delete_objects = config_positive_int_value( + conf.get('max_multi_delete_objects', 1000)) + self.conf.s3_acl = config_true_value( + conf.get('s3_acl', False)) + self.conf.storage_domain = conf.get('storage_domain', '') + self.conf.auth_pipeline_check = config_true_value( + conf.get('auth_pipeline_check', True)) + self.conf.max_upload_part_num = config_positive_int_value( + conf.get('max_upload_part_num', 1000)) + self.conf.check_bucket_owner = config_true_value( + conf.get('check_bucket_owner', False)) + self.conf.force_swift_request_proxy_log = config_true_value( + conf.get('force_swift_request_proxy_log', False)) + self.conf.allow_multipart_uploads = config_true_value( + conf.get('allow_multipart_uploads', True)) + self.conf.min_segment_size = config_positive_int_value( + conf.get('min_segment_size', 5242880)) + + self.logger = get_logger( + conf, log_route=conf.get('log_name', 's3api')) + self.slo_enabled = self.conf.allow_multipart_uploads + self.check_pipeline(self.conf) + + def __call__(self, env, start_response): + try: + req_class = get_request_class(env, self.conf.s3_acl) + req = req_class( + env, self.app, self.slo_enabled, self.conf.storage_domain, + self.conf.location, self.conf.force_swift_request_proxy_log, + self.conf.dns_compliant_bucket_names, + self.conf.allow_multipart_uploads, self.conf.allow_no_owner) + resp = self.handle_request(req) + except NotS3Request: + resp = self.app + except InvalidSubresource as e: + self.logger.debug(e.cause) + except ErrorResponse as err_resp: + if isinstance(err_resp, InternalError): + self.logger.exception(err_resp) + resp = err_resp + except Exception as e: + self.logger.exception(e) + resp = InternalError(reason=e) + + if isinstance(resp, S3ResponseBase) and 'swift.trans_id' in env: + resp.headers['x-amz-id-2'] = env['swift.trans_id'] + resp.headers['x-amz-request-id'] = env['swift.trans_id'] + + return resp(env, start_response) + + def handle_request(self, req): + self.logger.debug('Calling S3Api Middleware') + self.logger.debug(req.__dict__) + try: + controller = req.controller(self.app, self.conf, self.logger) + except S3NotImplemented: + # TODO: Probably we should distinct the error to log this warning + self.logger.warning('multipart: No SLO middleware in pipeline') + raise + + acl_handler = get_acl_handler(req.controller_name)(req, self.logger) + req.set_acl_handler(acl_handler) + + if hasattr(controller, req.method): + handler = getattr(controller, req.method) + if not getattr(handler, 'publicly_accessible', False): + raise MethodNotAllowed(req.method, + req.controller.resource_type()) + res = handler(req) + else: + raise MethodNotAllowed(req.method, + req.controller.resource_type()) + + return res + + def check_pipeline(self, conf): + """ + Check that proxy-server.conf has an appropriate pipeline for s3api. + """ + if conf.get('__file__', None) is None: + return + + ctx = loadcontext(loadwsgi.APP, conf.__file__) + pipeline = str(PipelineWrapper(ctx)).split(' ') + + # Add compatible with 3rd party middleware. + self.check_filter_order(pipeline, ['s3api', 'proxy-server']) + + auth_pipeline = pipeline[pipeline.index('s3api') + 1: + pipeline.index('proxy-server')] + + # Check SLO middleware + if self.slo_enabled and 'slo' not in auth_pipeline: + self.slo_enabled = False + self.logger.warning('s3api middleware requires SLO middleware ' + 'to support multi-part upload, please add it ' + 'in pipeline') + + if not conf.auth_pipeline_check: + self.logger.debug('Skip pipeline auth check.') + return + + if 'tempauth' in auth_pipeline: + self.logger.debug('Use tempauth middleware.') + elif 'keystoneauth' in auth_pipeline: + self.check_filter_order( + auth_pipeline, + ['s3token', 'keystoneauth']) + self.logger.debug('Use keystone middleware.') + elif len(auth_pipeline): + self.logger.debug('Use third party(unknown) auth middleware.') + else: + raise ValueError('Invalid pipeline %r: expected auth between ' + 's3api and proxy-server ' % pipeline) + + def check_filter_order(self, pipeline, required_filters): + """ + Check that required filters are present in order in the pipeline. + """ + indexes = [] + missing_filters = [] + for required_filter in required_filters: + try: + indexes.append(pipeline.index(required_filter)) + except ValueError as e: + self.logger.debug(e) + missing_filters.append(required_filter) + + if missing_filters: + raise ValueError('Invalid pipeline %r: missing filters %r' % ( + pipeline, missing_filters)) + + if indexes != sorted(indexes): + raise ValueError('Invalid pipeline %r: expected filter %s' % ( + pipeline, ' before '.join(required_filters))) + + +def filter_factory(global_conf, **local_conf): + """Standard filter factory to use the middleware with paste.deploy""" + conf = global_conf.copy() + conf.update(local_conf) + + register_swift_info( + 's3api', + # TODO: make default values as variables + max_bucket_listing=conf.get('max_bucket_listing', 1000), + max_parts_listing=conf.get('max_parts_listing', 1000), + max_upload_part_num=conf.get('max_upload_part_num', 1000), + max_multi_delete_objects=conf.get('max_multi_delete_objects', 1000), + allow_multipart_uploads=conf.get('allow_multipart_uploads', True), + min_segment_size=conf.get('min_segment_size', 5242880), + s3_acl=conf.get('s3_acl', False) + ) + + def s3api_filter(app): + return S3ApiMiddleware(app, conf) + + return s3api_filter diff -Nru swift-2.17.0/swift/common/middleware/s3api/s3request.py swift-2.18.0/swift/common/middleware/s3api/s3request.py --- swift-2.17.0/swift/common/middleware/s3api/s3request.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/s3request.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1402 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +from collections import defaultdict +from email.header import Header +from hashlib import sha1, sha256, md5 +import hmac +import re +import six +# pylint: disable-msg=import-error +from six.moves.urllib.parse import quote, unquote, parse_qsl +import string + +from swift.common.utils import split_path +from swift.common import swob +from swift.common.http import HTTP_OK, HTTP_CREATED, HTTP_ACCEPTED, \ + HTTP_NO_CONTENT, HTTP_UNAUTHORIZED, HTTP_FORBIDDEN, HTTP_NOT_FOUND, \ + HTTP_CONFLICT, HTTP_UNPROCESSABLE_ENTITY, HTTP_REQUEST_ENTITY_TOO_LARGE, \ + HTTP_PARTIAL_CONTENT, HTTP_NOT_MODIFIED, HTTP_PRECONDITION_FAILED, \ + HTTP_REQUESTED_RANGE_NOT_SATISFIABLE, HTTP_LENGTH_REQUIRED, \ + HTTP_BAD_REQUEST, HTTP_REQUEST_TIMEOUT, is_success + +from swift.common.constraints import check_utf8 +from swift.proxy.controllers.base import get_container_info, \ + headers_to_container_info +from swift.common.request_helpers import check_path_header + +from swift.common.middleware.s3api.controllers import ServiceController, \ + ObjectController, AclController, MultiObjectDeleteController, \ + LocationController, LoggingStatusController, PartController, \ + UploadController, UploadsController, VersioningController, \ + UnsupportedController, S3AclController, BucketController +from swift.common.middleware.s3api.s3response import AccessDenied, \ + InvalidArgument, InvalidDigest, \ + RequestTimeTooSkewed, S3Response, SignatureDoesNotMatch, \ + BucketAlreadyExists, BucketNotEmpty, EntityTooLarge, \ + InternalError, NoSuchBucket, NoSuchKey, PreconditionFailed, InvalidRange, \ + MissingContentLength, InvalidStorageClass, S3NotImplemented, InvalidURI, \ + MalformedXML, InvalidRequest, RequestTimeout, InvalidBucketName, \ + BadDigest, AuthorizationHeaderMalformed, AuthorizationQueryParametersError +from swift.common.middleware.s3api.exception import NotS3Request, \ + BadSwiftRequest +from swift.common.middleware.s3api.utils import utf8encode, \ + S3Timestamp, mktime, MULTIUPLOAD_SUFFIX +from swift.common.middleware.s3api.subresource import decode_acl, encode_acl +from swift.common.middleware.s3api.utils import sysmeta_header, \ + validate_bucket_name +from swift.common.middleware.s3api.acl_utils import handle_acl_header + + +# List of sub-resources that must be maintained as part of the HMAC +# signature string. +ALLOWED_SUB_RESOURCES = sorted([ + 'acl', 'delete', 'lifecycle', 'location', 'logging', 'notification', + 'partNumber', 'policy', 'requestPayment', 'torrent', 'uploads', 'uploadId', + 'versionId', 'versioning', 'versions', 'website', + 'response-cache-control', 'response-content-disposition', + 'response-content-encoding', 'response-content-language', + 'response-content-type', 'response-expires', 'cors', 'tagging', 'restore' +]) + + +MAX_32BIT_INT = 2147483647 +SIGV2_TIMESTAMP_FORMAT = '%Y-%m-%dT%H:%M:%S' +SIGV4_X_AMZ_DATE_FORMAT = '%Y%m%dT%H%M%SZ' +SERVICE = 's3' # useful for mocking out in tests + + +def _header_strip(value): + # S3 seems to strip *all* control characters + if value is None: + return None + stripped = _header_strip.re.sub('', value) + if value and not stripped: + # If there's nothing left after stripping, + # behave as though it wasn't provided + return None + return stripped +_header_strip.re = re.compile('^[\x00-\x20]*|[\x00-\x20]*$') + + +def _header_acl_property(resource): + """ + Set and retrieve the acl in self.headers + """ + def getter(self): + return getattr(self, '_%s' % resource) + + def setter(self, value): + self.headers.update(encode_acl(resource, value)) + setattr(self, '_%s' % resource, value) + + def deleter(self): + self.headers[sysmeta_header(resource, 'acl')] = '' + + return property(getter, setter, deleter, + doc='Get and set the %s acl property' % resource) + + +class SigV4Mixin(object): + """ + A request class mixin to provide S3 signature v4 functionality + """ + + def check_signature(self, secret): + user_signature = self.signature + derived_secret = 'AWS4' + secret + for scope_piece in self.scope: + derived_secret = hmac.new( + derived_secret, scope_piece, sha256).digest() + valid_signature = hmac.new( + derived_secret, self.string_to_sign, sha256).hexdigest() + return user_signature == valid_signature + + @property + def _is_query_auth(self): + return 'X-Amz-Credential' in self.params + + @property + def timestamp(self): + """ + Return timestamp string according to the auth type + The difference from v2 is v4 have to see 'X-Amz-Date' even though + it's query auth type. + """ + if not self._timestamp: + try: + if self._is_query_auth and 'X-Amz-Date' in self.params: + # NOTE(andrey-mp): Date in Signature V4 has different + # format + timestamp = mktime( + self.params['X-Amz-Date'], SIGV4_X_AMZ_DATE_FORMAT) + else: + if self.headers.get('X-Amz-Date'): + timestamp = mktime( + self.headers.get('X-Amz-Date'), + SIGV4_X_AMZ_DATE_FORMAT) + else: + timestamp = mktime(self.headers.get('Date')) + except (ValueError, TypeError): + raise AccessDenied('AWS authentication requires a valid Date ' + 'or x-amz-date header') + + if timestamp < 0: + raise AccessDenied('AWS authentication requires a valid Date ' + 'or x-amz-date header') + + try: + self._timestamp = S3Timestamp(timestamp) + except ValueError: + # Must be far-future; blame clock skew + raise RequestTimeTooSkewed() + + return self._timestamp + + def _validate_expire_param(self): + """ + Validate X-Amz-Expires in query parameter + :raises: AccessDenied + :raises: AuthorizationQueryParametersError + :raises: AccessDenined + """ + err = None + try: + expires = int(self.params['X-Amz-Expires']) + except ValueError: + err = 'X-Amz-Expires should be a number' + else: + if expires < 0: + err = 'X-Amz-Expires must be non-negative' + elif expires >= 2 ** 63: + err = 'X-Amz-Expires should be a number' + elif expires > 604800: + err = ('X-Amz-Expires must be less than a week (in seconds); ' + 'that is, the given X-Amz-Expires must be less than ' + '604800 seconds') + if err: + raise AuthorizationQueryParametersError(err) + + if int(self.timestamp) + expires < S3Timestamp.now(): + raise AccessDenied('Request has expired') + + def _parse_query_authentication(self): + """ + Parse v4 query authentication + - version 4: + 'X-Amz-Credential' and 'X-Amz-Signature' should be in param + :raises: AccessDenied + :raises: AuthorizationHeaderMalformed + """ + if self.params.get('X-Amz-Algorithm') != 'AWS4-HMAC-SHA256': + raise InvalidArgument('X-Amz-Algorithm', + self.params.get('X-Amz-Algorithm')) + try: + cred_param = self.params['X-Amz-Credential'].split("/") + access = cred_param[0] + sig = self.params['X-Amz-Signature'] + expires = self.params['X-Amz-Expires'] + except KeyError: + raise AccessDenied() + + try: + signed_headers = self.params['X-Amz-SignedHeaders'] + except KeyError: + # TODO: make sure if is it malformed request? + raise AuthorizationHeaderMalformed() + + self._signed_headers = set(signed_headers.split(';')) + + # credential must be in following format: + # ////aws4_request + if not all([access, sig, len(cred_param) == 5, expires]): + raise AccessDenied() + + return access, sig + + def _parse_header_authentication(self): + """ + Parse v4 header authentication + - version 4: + 'X-Amz-Credential' and 'X-Amz-Signature' should be in param + :raises: AccessDenied + :raises: AuthorizationHeaderMalformed + """ + + auth_str = self.headers['Authorization'] + cred_param = auth_str.partition( + "Credential=")[2].split(',')[0].split("/") + access = cred_param[0] + sig = auth_str.partition("Signature=")[2].split(',')[0] + signed_headers = auth_str.partition( + "SignedHeaders=")[2].split(',', 1)[0] + # credential must be in following format: + # ////aws4_request + if not all([access, sig, len(cred_param) == 5]): + raise AccessDenied() + if not signed_headers: + # TODO: make sure if is it Malformed? + raise AuthorizationHeaderMalformed() + + self._signed_headers = set(signed_headers.split(';')) + + return access, sig + + def _canonical_query_string(self): + return '&'.join( + '%s=%s' % (quote(key, safe='-_.~'), + quote(value, safe='-_.~')) + for key, value in sorted(self.params.items()) + if key not in ('Signature', 'X-Amz-Signature')) + + def _headers_to_sign(self): + """ + Select the headers from the request that need to be included + in the StringToSign. + + :return : dict of headers to sign, the keys are all lower case + """ + if 'headers_raw' in self.environ: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + headers_lower_dict = defaultdict(list) + for key, value in self.environ['headers_raw']: + headers_lower_dict[key.lower().strip()].append( + ' '.join(_header_strip(value or '').split())) + headers_lower_dict = {k: ','.join(v) + for k, v in headers_lower_dict.items()} + else: # mostly-functional fallback + headers_lower_dict = dict( + (k.lower().strip(), ' '.join(_header_strip(v or '').split())) + for (k, v) in six.iteritems(self.headers)) + + if 'host' in headers_lower_dict and re.match( + 'Boto/2.[0-9].[0-2]', + headers_lower_dict.get('user-agent', '')): + # Boto versions < 2.9.3 strip the port component of the host:port + # header, so detect the user-agent via the header and strip the + # port if we detect an old boto version. + headers_lower_dict['host'] = \ + headers_lower_dict['host'].split(':')[0] + + headers_to_sign = [ + (key, value) for key, value in sorted(headers_lower_dict.items()) + if key in self._signed_headers] + + if len(headers_to_sign) != len(self._signed_headers): + # NOTE: if we are missing the header suggested via + # signed_header in actual header, it results in + # SignatureDoesNotMatch in actual S3 so we can raise + # the error immediately here to save redundant check + # process. + raise SignatureDoesNotMatch() + + return headers_to_sign + + def _canonical_uri(self): + """ + It won't require bucket name in canonical_uri for v4. + """ + return self.environ.get('RAW_PATH_INFO', self.path) + + def _canonical_request(self): + # prepare 'canonical_request' + # Example requests are like following: + # + # GET + # / + # Action=ListUsers&Version=2010-05-08 + # content-type:application/x-www-form-urlencoded; charset=utf-8 + # host:iam.amazonaws.com + # x-amz-date:20150830T123600Z + # + # content-type;host;x-amz-date + # e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + # + + # 1. Add verb like: GET + cr = [self.method.upper()] + + # 2. Add path like: / + path = self._canonical_uri() + cr.append(path) + + # 3. Add query like: Action=ListUsers&Version=2010-05-08 + cr.append(self._canonical_query_string()) + + # 4. Add headers like: + # content-type:application/x-www-form-urlencoded; charset=utf-8 + # host:iam.amazonaws.com + # x-amz-date:20150830T123600Z + headers_to_sign = self._headers_to_sign() + cr.append(''.join('%s:%s\n' % (key, value) + for key, value in headers_to_sign)) + + # 5. Add signed headers into canonical request like + # content-type;host;x-amz-date + cr.append(';'.join(k for k, v in headers_to_sign)) + + # 6. Add payload string at the tail + if 'X-Amz-Credential' in self.params: + # V4 with query parameters only + hashed_payload = 'UNSIGNED-PAYLOAD' + elif 'X-Amz-Content-SHA256' not in self.headers: + msg = 'Missing required header for this request: ' \ + 'x-amz-content-sha256' + raise InvalidRequest(msg) + else: + hashed_payload = self.headers['X-Amz-Content-SHA256'] + cr.append(hashed_payload) + return '\n'.join(cr).encode('utf-8') + + @property + def scope(self): + return [self.timestamp.amz_date_format.split('T')[0], + self.location, SERVICE, 'aws4_request'] + + def _string_to_sign(self): + """ + Create 'StringToSign' value in Amazon terminology for v4. + """ + return '\n'.join(['AWS4-HMAC-SHA256', + self.timestamp.amz_date_format, + '/'.join(self.scope), + sha256(self._canonical_request()).hexdigest()]) + + +def get_request_class(env, s3_acl): + """ + Helper function to find a request class to use from Map + """ + if s3_acl: + request_classes = (S3AclRequest, SigV4S3AclRequest) + else: + request_classes = (S3Request, SigV4Request) + + req = swob.Request(env) + if 'X-Amz-Credential' in req.params or \ + req.headers.get('Authorization', '').startswith( + 'AWS4-HMAC-SHA256 '): + # This is an Amazon SigV4 request + return request_classes[1] + else: + # The others using Amazon SigV2 class + return request_classes[0] + + +class S3Request(swob.Request): + """ + S3 request object. + """ + + bucket_acl = _header_acl_property('container') + object_acl = _header_acl_property('object') + + def __init__(self, env, app=None, slo_enabled=True, + storage_domain='', location='US', force_request_log=False, + dns_compliant_bucket_names=True, allow_multipart_uploads=True, + allow_no_owner=False): + # NOTE: app and allow_no_owner are not used by this class, need for + # compatibility of S3acl + swob.Request.__init__(self, env) + self.storage_domain = storage_domain + self.location = location + self.force_request_log = force_request_log + self.dns_compliant_bucket_names = dns_compliant_bucket_names + self.allow_multipart_uploads = allow_multipart_uploads + self._timestamp = None + self.access_key, self.signature = self._parse_auth_info() + self.bucket_in_host = self._parse_host() + self.container_name, self.object_name = self._parse_uri() + self._validate_headers() + # Lock in string-to-sign now, before we start messing with query params + self.string_to_sign = self._string_to_sign() + self.environ['s3api.auth_details'] = { + 'access_key': self.access_key, + 'signature': self.signature, + 'string_to_sign': self.string_to_sign, + 'check_signature': self.check_signature, + } + self.token = None + self.account = None + self.user_id = None + self.slo_enabled = slo_enabled + + # NOTE(andrey-mp): substitute authorization header for next modules + # in pipeline (s3token). it uses this and X-Auth-Token in specific + # format. + # (kota_): yeah, the reason we need this is s3token only supports + # v2 like header consists of AWS access:signature. Since the commit + # b626a3ca86e467fc7564eac236b9ee2efd49bdcc, the s3token is in swift3 + # repo so probably we need to change s3token to support v4 format. + self.headers['Authorization'] = 'AWS %s:%s' % ( + self.access_key, self.signature) + # Avoids that swift.swob.Response replaces Location header value + # by full URL when absolute path given. See swift.swob for more detail. + self.environ['swift.leave_relative_location'] = True + + def check_signature(self, secret): + user_signature = self.signature + valid_signature = base64.b64encode(hmac.new( + secret, self.string_to_sign, sha1).digest()).strip() + return user_signature == valid_signature + + @property + def timestamp(self): + """ + S3Timestamp from Date header. If X-Amz-Date header specified, it + will be prior to Date header. + + :return : S3Timestamp instance + """ + if not self._timestamp: + try: + if self._is_query_auth and 'Timestamp' in self.params: + # If Timestamp specified in query, it should be prior + # to any Date header (is this right?) + timestamp = mktime( + self.params['Timestamp'], SIGV2_TIMESTAMP_FORMAT) + else: + timestamp = mktime( + self.headers.get('X-Amz-Date', + self.headers.get('Date'))) + except ValueError: + raise AccessDenied('AWS authentication requires a valid Date ' + 'or x-amz-date header') + + if timestamp < 0: + raise AccessDenied('AWS authentication requires a valid Date ' + 'or x-amz-date header') + try: + self._timestamp = S3Timestamp(timestamp) + except ValueError: + # Must be far-future; blame clock skew + raise RequestTimeTooSkewed() + + return self._timestamp + + @property + def _is_header_auth(self): + return 'Authorization' in self.headers + + @property + def _is_query_auth(self): + return 'AWSAccessKeyId' in self.params + + def _parse_host(self): + storage_domain = self.storage_domain + if not storage_domain: + return None + + if not storage_domain.startswith('.'): + storage_domain = '.' + storage_domain + + if 'HTTP_HOST' in self.environ: + given_domain = self.environ['HTTP_HOST'] + elif 'SERVER_NAME' in self.environ: + given_domain = self.environ['SERVER_NAME'] + else: + return None + + port = '' + if ':' in given_domain: + given_domain, port = given_domain.rsplit(':', 1) + if given_domain.endswith(storage_domain): + return given_domain[:-len(storage_domain)] + + return None + + def _parse_uri(self): + if not check_utf8(self.environ['PATH_INFO']): + raise InvalidURI(self.path) + + if self.bucket_in_host: + obj = self.environ['PATH_INFO'][1:] or None + return self.bucket_in_host, obj + + bucket, obj = self.split_path(0, 2, True) + + if bucket and not validate_bucket_name( + bucket, self.dns_compliant_bucket_names): + # Ignore GET service case + raise InvalidBucketName(bucket) + return (bucket, obj) + + def _parse_query_authentication(self): + """ + Parse v2 authentication query args + TODO: make sure if 0, 1, 3 is supported? + - version 0, 1, 2, 3: + 'AWSAccessKeyId' and 'Signature' should be in param + + :return: a tuple of access_key and signature + :raises: AccessDenied + """ + try: + access = self.params['AWSAccessKeyId'] + expires = self.params['Expires'] + sig = self.params['Signature'] + except KeyError: + raise AccessDenied() + + if not all([access, sig, expires]): + raise AccessDenied() + + return access, sig + + def _parse_header_authentication(self): + """ + Parse v2 header authentication info + + :returns: a tuple of access_key and signature + :raises: AccessDenied + """ + auth_str = self.headers['Authorization'] + if not auth_str.startswith('AWS ') or ':' not in auth_str: + raise AccessDenied() + # This means signature format V2 + access, sig = auth_str.split(' ', 1)[1].rsplit(':', 1) + return access, sig + + def _parse_auth_info(self): + """Extract the access key identifier and signature. + + :returns: a tuple of access_key and signature + :raises: NotS3Request + """ + if self._is_query_auth: + return self._parse_query_authentication() + elif self._is_header_auth: + return self._parse_header_authentication() + else: + # if this request is neither query auth nor header auth + # s3api regard this as not s3 request + raise NotS3Request() + + def _validate_expire_param(self): + """ + Validate Expires in query parameters + :raises: AccessDenied + """ + # Expires header is a float since epoch + try: + ex = S3Timestamp(float(self.params['Expires'])) + except ValueError: + raise AccessDenied() + + if S3Timestamp.now() > ex: + raise AccessDenied('Request has expired') + + if ex >= 2 ** 31: + raise AccessDenied( + 'Invalid date (should be seconds since epoch): %s' % + self.params['Expires']) + + def _validate_dates(self): + """ + Validate Date/X-Amz-Date headers for signature v2 + :raises: AccessDenied + :raises: RequestTimeTooSkewed + """ + if self._is_query_auth: + self._validate_expire_param() + # TODO: make sure the case if timestamp param in query + return + + date_header = self.headers.get('Date') + amz_date_header = self.headers.get('X-Amz-Date') + if not date_header and not amz_date_header: + raise AccessDenied('AWS authentication requires a valid Date ' + 'or x-amz-date header') + + # Anyways, request timestamp should be validated + epoch = S3Timestamp(0) + if self.timestamp < epoch: + raise AccessDenied() + + # If the standard date is too far ahead or behind, it is an + # error + delta = 60 * 5 + if abs(int(self.timestamp) - int(S3Timestamp.now())) > delta: + raise RequestTimeTooSkewed() + + def _validate_headers(self): + if 'CONTENT_LENGTH' in self.environ: + try: + if self.content_length < 0: + raise InvalidArgument('Content-Length', + self.content_length) + except (ValueError, TypeError): + raise InvalidArgument('Content-Length', + self.environ['CONTENT_LENGTH']) + + self._validate_dates() + + value = _header_strip(self.headers.get('Content-MD5')) + if value is not None: + if not re.match('^[A-Za-z0-9+/]+={0,2}$', value): + # Non-base64-alphabet characters in value. + raise InvalidDigest(content_md5=value) + try: + self.headers['ETag'] = value.decode('base64').encode('hex') + except Exception: + raise InvalidDigest(content_md5=value) + + if len(self.headers['ETag']) != 32: + raise InvalidDigest(content_md5=value) + + if self.method == 'PUT' and any(h in self.headers for h in ( + 'If-Match', 'If-None-Match', + 'If-Modified-Since', 'If-Unmodified-Since')): + raise S3NotImplemented( + 'Conditional object PUTs are not supported.') + + if 'X-Amz-Copy-Source' in self.headers: + try: + check_path_header(self, 'X-Amz-Copy-Source', 2, '') + except swob.HTTPException: + msg = 'Copy Source must mention the source bucket and key: ' \ + 'sourcebucket/sourcekey' + raise InvalidArgument('x-amz-copy-source', + self.headers['X-Amz-Copy-Source'], + msg) + + if 'x-amz-metadata-directive' in self.headers: + value = self.headers['x-amz-metadata-directive'] + if value not in ('COPY', 'REPLACE'): + err_msg = 'Unknown metadata directive.' + raise InvalidArgument('x-amz-metadata-directive', value, + err_msg) + + if 'x-amz-storage-class' in self.headers: + # Only STANDARD is supported now. + if self.headers['x-amz-storage-class'] != 'STANDARD': + raise InvalidStorageClass() + + if 'x-amz-mfa' in self.headers: + raise S3NotImplemented('MFA Delete is not supported.') + + if 'x-amz-server-side-encryption' in self.headers: + raise S3NotImplemented('Server-side encryption is not supported.') + + if 'x-amz-website-redirect-location' in self.headers: + raise S3NotImplemented('Website redirection is not supported.') + + @property + def body(self): + """ + swob.Request.body is not secure against malicious input. It consumes + too much memory without any check when the request body is excessively + large. Use xml() instead. + """ + raise AttributeError("No attribute 'body'") + + def xml(self, max_length): + """ + Similar to swob.Request.body, but it checks the content length before + creating a body string. + """ + te = self.headers.get('transfer-encoding', '') + te = [x.strip() for x in te.split(',') if x.strip()] + if te and (len(te) > 1 or te[-1] != 'chunked'): + raise S3NotImplemented('A header you provided implies ' + 'functionality that is not implemented', + header='Transfer-Encoding') + + if self.message_length() > max_length: + raise MalformedXML() + + if te or self.message_length(): + # Limit the read similar to how SLO handles manifests + body = self.body_file.read(max_length) + else: + # No (or zero) Content-Length provided, and not chunked transfer; + # no body. Assume zero-length, and enforce a required body below. + return None + + return body + + def check_md5(self, body): + if 'HTTP_CONTENT_MD5' not in self.environ: + raise InvalidRequest('Missing required header for this request: ' + 'Content-MD5') + + digest = md5(body).digest().encode('base64').strip() + if self.environ['HTTP_CONTENT_MD5'] != digest: + raise BadDigest(content_md5=self.environ['HTTP_CONTENT_MD5']) + + def _copy_source_headers(self): + env = {} + for key, value in self.environ.items(): + if key.startswith('HTTP_X_AMZ_COPY_SOURCE_'): + env[key.replace('X_AMZ_COPY_SOURCE_', '')] = value + + return swob.HeaderEnvironProxy(env) + + def check_copy_source(self, app): + """ + check_copy_source checks the copy source existence and if copying an + object to itself, for illegal request parameters + + :returns: the source HEAD response + """ + try: + src_path = self.headers['X-Amz-Copy-Source'] + except KeyError: + return None + + if '?' in src_path: + src_path, qs = src_path.split('?', 1) + query = parse_qsl(qs, True) + if not query: + pass # ignore it + elif len(query) > 1 or query[0][0] != 'versionId': + raise InvalidArgument('X-Amz-Copy-Source', + self.headers['X-Amz-Copy-Source'], + 'Unsupported copy source parameter.') + elif query[0][1] != 'null': + # TODO: once we support versioning, we'll need to translate + # src_path to the proper location in the versions container + raise S3NotImplemented('Versioning is not yet supported') + self.headers['X-Amz-Copy-Source'] = src_path + + src_path = unquote(src_path) + src_path = src_path if src_path.startswith('/') else ('/' + src_path) + src_bucket, src_obj = split_path(src_path, 0, 2, True) + + headers = swob.HeaderKeyDict() + headers.update(self._copy_source_headers()) + + src_resp = self.get_response(app, 'HEAD', src_bucket, src_obj, + headers=headers) + if src_resp.status_int == 304: # pylint: disable-msg=E1101 + raise PreconditionFailed() + + self.headers['X-Amz-Copy-Source'] = \ + '/' + self.headers['X-Amz-Copy-Source'].lstrip('/') + source_container, source_obj = \ + split_path(self.headers['X-Amz-Copy-Source'], 1, 2, True) + + if (self.container_name == source_container and + self.object_name == source_obj and + self.headers.get('x-amz-metadata-directive', + 'COPY') == 'COPY'): + raise InvalidRequest("This copy request is illegal " + "because it is trying to copy an " + "object to itself without " + "changing the object's metadata, " + "storage class, website redirect " + "location or encryption " + "attributes.") + return src_resp + + def _canonical_uri(self): + """ + Require bucket name in canonical_uri for v2 in virtual hosted-style. + """ + raw_path_info = self.environ.get('RAW_PATH_INFO', self.path) + if self.bucket_in_host: + raw_path_info = '/' + self.bucket_in_host + raw_path_info + return raw_path_info + + def _string_to_sign(self): + """ + Create 'StringToSign' value in Amazon terminology for v2. + """ + amz_headers = {} + + buf = [self.method, + _header_strip(self.headers.get('Content-MD5')) or '', + _header_strip(self.headers.get('Content-Type')) or ''] + + if 'headers_raw' in self.environ: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + amz_headers = defaultdict(list) + for key, value in self.environ['headers_raw']: + key = key.lower() + if not key.startswith('x-amz-'): + continue + amz_headers[key.strip()].append(value.strip()) + amz_headers = dict((key, ','.join(value)) + for key, value in amz_headers.items()) + else: # mostly-functional fallback + amz_headers = dict((key.lower(), value) + for key, value in self.headers.items() + if key.lower().startswith('x-amz-')) + + if self._is_header_auth: + if 'x-amz-date' in amz_headers: + buf.append('') + elif 'Date' in self.headers: + buf.append(self.headers['Date']) + elif self._is_query_auth: + buf.append(self.params['Expires']) + else: + # Should have already raised NotS3Request in _parse_auth_info, + # but as a sanity check... + raise AccessDenied() + + for key, value in sorted(amz_headers.items()): + buf.append("%s:%s" % (key, value)) + + path = self._canonical_uri() + if self.query_string: + path += '?' + self.query_string + params = [] + if '?' in path: + path, args = path.split('?', 1) + for key, value in sorted(self.params.items()): + if key in ALLOWED_SUB_RESOURCES: + params.append('%s=%s' % (key, value) if value else key) + if params: + buf.append('%s?%s' % (path, '&'.join(params))) + else: + buf.append(path) + return '\n'.join(buf) + + @property + def controller_name(self): + return self.controller.__name__[:-len('Controller')] + + @property + def controller(self): + if self.is_service_request: + return ServiceController + + if not self.slo_enabled: + multi_part = ['partNumber', 'uploadId', 'uploads'] + if len([p for p in multi_part if p in self.params]): + raise S3NotImplemented("Multi-part feature isn't support") + + if 'acl' in self.params: + return AclController + if 'delete' in self.params: + return MultiObjectDeleteController + if 'location' in self.params: + return LocationController + if 'logging' in self.params: + return LoggingStatusController + if 'partNumber' in self.params: + return PartController + if 'uploadId' in self.params: + return UploadController + if 'uploads' in self.params: + return UploadsController + if 'versioning' in self.params: + return VersioningController + + unsupported = ('notification', 'policy', 'requestPayment', 'torrent', + 'website', 'cors', 'tagging', 'restore') + if set(unsupported) & set(self.params): + return UnsupportedController + + if self.is_object_request: + return ObjectController + return BucketController + + @property + def is_service_request(self): + return not self.container_name + + @property + def is_bucket_request(self): + return self.container_name and not self.object_name + + @property + def is_object_request(self): + return self.container_name and self.object_name + + @property + def is_authenticated(self): + return self.account is not None + + def to_swift_req(self, method, container, obj, query=None, + body=None, headers=None): + """ + Create a Swift request based on this request's environment. + """ + if self.account is None: + account = self.access_key + else: + account = self.account + + env = self.environ.copy() + + def sanitize(value): + if set(value).issubset(string.printable): + return value + + value = Header(value, 'UTF-8').encode() + if value.startswith('=?utf-8?q?'): + return '=?UTF-8?Q?' + value[10:] + elif value.startswith('=?utf-8?b?'): + return '=?UTF-8?B?' + value[10:] + else: + return value + + if 'headers_raw' in env: # eventlet >= 0.19.0 + # See https://github.com/eventlet/eventlet/commit/67ec999 + for key, value in env['headers_raw']: + if not key.lower().startswith('x-amz-meta-'): + continue + # AWS ignores user-defined headers with these characters + if any(c in key for c in ' "),/;<=>?@[\\]{}'): + # NB: apparently, '(' *is* allowed + continue + # Note that this may have already been deleted, e.g. if the + # client sent multiple headers with the same name, or both + # x-amz-meta-foo-bar and x-amz-meta-foo_bar + env.pop('HTTP_' + key.replace('-', '_').upper(), None) + # Need to preserve underscores. Since we know '=' can't be + # present, quoted-printable seems appropriate. + key = key.replace('_', '=5F').replace('-', '_').upper() + key = 'HTTP_X_OBJECT_META_' + key[11:] + if key in env: + env[key] += ',' + sanitize(value) + else: + env[key] = sanitize(value) + else: # mostly-functional fallback + for key in self.environ: + if not key.startswith('HTTP_X_AMZ_META_'): + continue + # AWS ignores user-defined headers with these characters + if any(c in key for c in ' "),/;<=>?@[\\]{}'): + # NB: apparently, '(' *is* allowed + continue + env['HTTP_X_OBJECT_META_' + key[16:]] = sanitize(env[key]) + del env[key] + + if 'HTTP_X_AMZ_COPY_SOURCE' in env: + env['HTTP_X_COPY_FROM'] = env['HTTP_X_AMZ_COPY_SOURCE'] + del env['HTTP_X_AMZ_COPY_SOURCE'] + env['CONTENT_LENGTH'] = '0' + + if self.force_request_log: + env['swift.proxy_access_log_made'] = False + env['swift.source'] = 'S3' + if method is not None: + env['REQUEST_METHOD'] = method + + env['HTTP_X_AUTH_TOKEN'] = self.token + + if obj: + path = '/v1/%s/%s/%s' % (account, container, obj) + elif container: + path = '/v1/%s/%s' % (account, container) + else: + path = '/v1/%s' % (account) + env['PATH_INFO'] = path + + query_string = '' + if query is not None: + params = [] + for key, value in sorted(query.items()): + if value is not None: + params.append('%s=%s' % (key, quote(str(value)))) + else: + params.append(key) + query_string = '&'.join(params) + env['QUERY_STRING'] = query_string + + return swob.Request.blank(quote(path), environ=env, body=body, + headers=headers) + + def _swift_success_codes(self, method, container, obj): + """ + Returns a list of expected success codes from Swift. + """ + if not container: + # Swift account access. + code_map = { + 'GET': [ + HTTP_OK, + ], + } + elif not obj: + # Swift container access. + code_map = { + 'HEAD': [ + HTTP_NO_CONTENT, + ], + 'GET': [ + HTTP_OK, + HTTP_NO_CONTENT, + ], + 'PUT': [ + HTTP_CREATED, + ], + 'POST': [ + HTTP_NO_CONTENT, + ], + 'DELETE': [ + HTTP_NO_CONTENT, + ], + } + else: + # Swift object access. + code_map = { + 'HEAD': [ + HTTP_OK, + HTTP_PARTIAL_CONTENT, + HTTP_NOT_MODIFIED, + ], + 'GET': [ + HTTP_OK, + HTTP_PARTIAL_CONTENT, + HTTP_NOT_MODIFIED, + ], + 'PUT': [ + HTTP_CREATED, + ], + 'POST': [ + HTTP_ACCEPTED, + ], + 'DELETE': [ + HTTP_OK, + HTTP_NO_CONTENT, + ], + } + + return code_map[method] + + def _swift_error_codes(self, method, container, obj, env, app): + """ + Returns a dict from expected Swift error codes to the corresponding S3 + error responses. + """ + if not container: + # Swift account access. + code_map = { + 'GET': { + }, + } + elif not obj: + # Swift container access. + code_map = { + 'HEAD': { + HTTP_NOT_FOUND: (NoSuchBucket, container), + }, + 'GET': { + HTTP_NOT_FOUND: (NoSuchBucket, container), + }, + 'PUT': { + HTTP_ACCEPTED: (BucketAlreadyExists, container), + }, + 'POST': { + HTTP_NOT_FOUND: (NoSuchBucket, container), + }, + 'DELETE': { + HTTP_NOT_FOUND: (NoSuchBucket, container), + HTTP_CONFLICT: BucketNotEmpty, + }, + } + else: + # Swift object access. + + # 404s differ depending upon whether the bucket exists + # Note that base-container-existence checks happen elsewhere for + # multi-part uploads, and get_container_info should be pulling + # from the env cache + def not_found_handler(): + if container.endswith(MULTIUPLOAD_SUFFIX) or \ + is_success(get_container_info( + env, app, swift_source='S3').get('status')): + return NoSuchKey(obj) + return NoSuchBucket(container) + + code_map = { + 'HEAD': { + HTTP_NOT_FOUND: not_found_handler, + HTTP_PRECONDITION_FAILED: PreconditionFailed, + }, + 'GET': { + HTTP_NOT_FOUND: not_found_handler, + HTTP_PRECONDITION_FAILED: PreconditionFailed, + HTTP_REQUESTED_RANGE_NOT_SATISFIABLE: InvalidRange, + }, + 'PUT': { + HTTP_NOT_FOUND: (NoSuchBucket, container), + HTTP_UNPROCESSABLE_ENTITY: BadDigest, + HTTP_REQUEST_ENTITY_TOO_LARGE: EntityTooLarge, + HTTP_LENGTH_REQUIRED: MissingContentLength, + HTTP_REQUEST_TIMEOUT: RequestTimeout, + }, + 'POST': { + HTTP_NOT_FOUND: not_found_handler, + HTTP_PRECONDITION_FAILED: PreconditionFailed, + }, + 'DELETE': { + HTTP_NOT_FOUND: (NoSuchKey, obj), + }, + } + + return code_map[method] + + def _get_response(self, app, method, container, obj, + headers=None, body=None, query=None): + """ + Calls the application with this request's environment. Returns a + S3Response object that wraps up the application's result. + """ + + method = method or self.environ['REQUEST_METHOD'] + + if container is None: + container = self.container_name + if obj is None: + obj = self.object_name + + sw_req = self.to_swift_req(method, container, obj, headers=headers, + body=body, query=query) + + sw_resp = sw_req.get_response(app) + + # reuse account and tokens + _, self.account, _ = split_path(sw_resp.environ['PATH_INFO'], + 2, 3, True) + self.account = utf8encode(self.account) + + resp = S3Response.from_swift_resp(sw_resp) + status = resp.status_int # pylint: disable-msg=E1101 + + if not self.user_id: + if 'HTTP_X_USER_NAME' in sw_resp.environ: + # keystone + self.user_id = \ + utf8encode("%s:%s" % + (sw_resp.environ['HTTP_X_TENANT_NAME'], + sw_resp.environ['HTTP_X_USER_NAME'])) + else: + # tempauth + self.user_id = self.access_key + + success_codes = self._swift_success_codes(method, container, obj) + error_codes = self._swift_error_codes(method, container, obj, + sw_req.environ, app) + + if status in success_codes: + return resp + + err_msg = resp.body + + if status in error_codes: + err_resp = \ + error_codes[sw_resp.status_int] # pylint: disable-msg=E1101 + if isinstance(err_resp, tuple): + raise err_resp[0](*err_resp[1:]) + else: + raise err_resp() + + if status == HTTP_BAD_REQUEST: + raise BadSwiftRequest(err_msg) + if status == HTTP_UNAUTHORIZED: + raise SignatureDoesNotMatch() + if status == HTTP_FORBIDDEN: + raise AccessDenied() + + raise InternalError('unexpected status code %d' % status) + + def get_response(self, app, method=None, container=None, obj=None, + headers=None, body=None, query=None): + """ + get_response is an entry point to be extended for child classes. + If additional tasks needed at that time of getting swift response, + we can override this method. + swift.common.middleware.s3api.s3request.S3Request need to just call + _get_response to get pure swift response. + """ + + if 'HTTP_X_AMZ_ACL' in self.environ: + handle_acl_header(self) + + return self._get_response(app, method, container, obj, + headers, body, query) + + def get_validated_param(self, param, default, limit=MAX_32BIT_INT): + value = default + if param in self.params: + try: + value = int(self.params[param]) + if value < 0: + err_msg = 'Argument %s must be an integer between 0 and' \ + ' %d' % (param, MAX_32BIT_INT) + raise InvalidArgument(param, self.params[param], err_msg) + + if value > MAX_32BIT_INT: + # check the value because int() could build either a long + # instance or a 64bit integer. + raise ValueError() + + if limit < value: + value = limit + + except ValueError: + err_msg = 'Provided %s not an integer or within ' \ + 'integer range' % param + raise InvalidArgument(param, self.params[param], err_msg) + + return value + + def get_container_info(self, app): + """ + get_container_info will return a result dict of get_container_info + from the backend Swift. + + :returns: a dictionary of container info from + swift.controllers.base.get_container_info + :raises: NoSuchBucket when the container doesn't exist + :raises: InternalError when the request failed without 404 + """ + if self.is_authenticated: + # if we have already authenticated, yes we can use the account + # name like as AUTH_xxx for performance efficiency + sw_req = self.to_swift_req(app, self.container_name, None) + info = get_container_info(sw_req.environ, app) + if is_success(info['status']): + return info + elif info['status'] == 404: + raise NoSuchBucket(self.container_name) + else: + raise InternalError( + 'unexpected status code %d' % info['status']) + else: + # otherwise we do naive HEAD request with the authentication + resp = self.get_response(app, 'HEAD', self.container_name, '') + return headers_to_container_info( + resp.sw_headers, resp.status_int) # pylint: disable-msg=E1101 + + def gen_multipart_manifest_delete_query(self, app, obj=None): + if not self.allow_multipart_uploads: + return None + query = {'multipart-manifest': 'delete'} + if not obj: + obj = self.object_name + resp = self.get_response(app, 'HEAD', obj=obj) + return query if resp.is_slo else None + + def set_acl_handler(self, handler): + pass + + +class S3AclRequest(S3Request): + """ + S3Acl request object. + """ + def __init__(self, env, app, slo_enabled=True, + storage_domain='', location='US', force_request_log=False, + dns_compliant_bucket_names=True, allow_multipart_uploads=True, + allow_no_owner=False): + super(S3AclRequest, self).__init__( + env, app, slo_enabled, storage_domain, location, force_request_log, + dns_compliant_bucket_names, allow_multipart_uploads) + self.allow_no_owner = allow_no_owner + self.authenticate(app) + self.acl_handler = None + + @property + def controller(self): + if 'acl' in self.params and not self.is_service_request: + return S3AclController + return super(S3AclRequest, self).controller + + def authenticate(self, app): + """ + authenticate method will run pre-authenticate request and retrieve + account information. + Note that it currently supports only keystone and tempauth. + (no support for the third party authentication middleware) + """ + sw_req = self.to_swift_req('TEST', None, None, body='') + # don't show log message of this request + sw_req.environ['swift.proxy_access_log_made'] = True + + sw_resp = sw_req.get_response(app) + + if not sw_req.remote_user: + raise SignatureDoesNotMatch() + + _, self.account, _ = split_path(sw_resp.environ['PATH_INFO'], + 2, 3, True) + self.account = utf8encode(self.account) + + if 'HTTP_X_USER_NAME' in sw_resp.environ: + # keystone + self.user_id = "%s:%s" % (sw_resp.environ['HTTP_X_TENANT_NAME'], + sw_resp.environ['HTTP_X_USER_NAME']) + self.user_id = utf8encode(self.user_id) + self.token = sw_resp.environ.get('HTTP_X_AUTH_TOKEN') + else: + # tempauth + self.user_id = self.access_key + + # Need to skip S3 authorization on subsequent requests to prevent + # overwriting the account in PATH_INFO + del self.headers['Authorization'] + del self.environ['s3api.auth_details'] + + def to_swift_req(self, method, container, obj, query=None, + body=None, headers=None): + sw_req = super(S3AclRequest, self).to_swift_req( + method, container, obj, query, body, headers) + if self.account: + sw_req.environ['swift_owner'] = True # needed to set ACL + sw_req.environ['swift.authorize_override'] = True + sw_req.environ['swift.authorize'] = lambda req: None + return sw_req + + def get_acl_response(self, app, method=None, container=None, obj=None, + headers=None, body=None, query=None): + """ + Wrapper method of _get_response to add s3 acl information + from response sysmeta headers. + """ + + resp = self._get_response( + app, method, container, obj, headers, body, query) + resp.bucket_acl = decode_acl( + 'container', resp.sysmeta_headers, self.allow_no_owner) + resp.object_acl = decode_acl( + 'object', resp.sysmeta_headers, self.allow_no_owner) + + return resp + + def get_response(self, app, method=None, container=None, obj=None, + headers=None, body=None, query=None): + """ + Wrap up get_response call to hook with acl handling method. + """ + if not self.acl_handler: + # we should set acl_handler all time before calling get_response + raise Exception('get_response called before set_acl_handler') + resp = self.acl_handler.handle_acl( + app, method, container, obj, headers) + + # possible to skip recalling get_response_acl if resp is not + # None (e.g. HEAD) + if resp: + return resp + return self.get_acl_response(app, method, container, obj, + headers, body, query) + + def set_acl_handler(self, acl_handler): + self.acl_handler = acl_handler + + +class SigV4Request(SigV4Mixin, S3Request): + pass + + +class SigV4S3AclRequest(SigV4Mixin, S3AclRequest): + pass diff -Nru swift-2.17.0/swift/common/middleware/s3api/s3response.py swift-2.18.0/swift/common/middleware/s3api/s3response.py --- swift-2.17.0/swift/common/middleware/s3api/s3response.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/s3response.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,684 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from UserDict import DictMixin +from functools import partial + +from swift.common import swob +from swift.common.utils import config_true_value +from swift.common.request_helpers import is_sys_meta + +from swift.common.middleware.s3api.utils import snake_to_camel, sysmeta_prefix +from swift.common.middleware.s3api.etree import Element, SubElement, tostring + + +class HeaderKey(str): + """ + A string object that normalizes string as S3 clients expect with title(). + """ + def title(self): + if self.lower() == 'etag': + # AWS Java SDK expects only 'ETag'. + return 'ETag' + if self.lower().startswith('x-amz-'): + # AWS headers returned by S3 are lowercase. + return self.lower() + return str.title(self) + + +class HeaderKeyDict(swob.HeaderKeyDict): + """ + Similar to the HeaderKeyDict class in Swift, but its key name is normalized + as S3 clients expect. + """ + def __getitem__(self, key): + return swob.HeaderKeyDict.__getitem__(self, HeaderKey(key)) + + def __setitem__(self, key, value): + return swob.HeaderKeyDict.__setitem__(self, HeaderKey(key), value) + + def __contains__(self, key): + return swob.HeaderKeyDict.__contains__(self, HeaderKey(key)) + + def __delitem__(self, key): + return swob.HeaderKeyDict.__delitem__(self, HeaderKey(key)) + + def get(self, key, default=None): + return swob.HeaderKeyDict.get(self, HeaderKey(key), default) + + def pop(self, key, default=None): + return swob.HeaderKeyDict.pop(self, HeaderKey(key), default) + + +class S3ResponseBase(object): + """ + Base class for swift3 responses. + """ + pass + + +class S3Response(S3ResponseBase, swob.Response): + """ + Similar to the Response class in Swift, but uses our HeaderKeyDict for + headers instead of Swift's HeaderKeyDict. This also translates Swift + specific headers to S3 headers. + """ + def __init__(self, *args, **kwargs): + swob.Response.__init__(self, *args, **kwargs) + + if self.etag: + # add double quotes to the etag header + self.etag = self.etag + + sw_sysmeta_headers = swob.HeaderKeyDict() + sw_headers = swob.HeaderKeyDict() + headers = HeaderKeyDict() + self.is_slo = False + + def is_swift3_sysmeta(sysmeta_key, server_type): + swift3_sysmeta_prefix = ( + 'x-%s-sysmeta-swift3' % server_type).lower() + return sysmeta_key.lower().startswith(swift3_sysmeta_prefix) + + def is_s3api_sysmeta(sysmeta_key, server_type): + s3api_sysmeta_prefix = sysmeta_prefix(_server_type).lower() + return sysmeta_key.lower().startswith(s3api_sysmeta_prefix) + + for key, val in self.headers.iteritems(): + if is_sys_meta('object', key) or is_sys_meta('container', key): + _server_type = key.split('-')[1] + if is_swift3_sysmeta(key, _server_type): + # To be compatible with older swift3, translate swift3 + # sysmeta to s3api sysmeta here + key = sysmeta_prefix(_server_type) + \ + key[len('x-%s-sysmeta-swift3-' % _server_type):] + + if key not in sw_sysmeta_headers: + # To avoid overwrite s3api sysmeta by older swift3 + # sysmeta set the key only when the key does not exist + sw_sysmeta_headers[key] = val + elif is_s3api_sysmeta(key, _server_type): + sw_sysmeta_headers[key] = val + else: + sw_headers[key] = val + + # Handle swift headers + for key, val in sw_headers.iteritems(): + _key = key.lower() + + if _key.startswith('x-object-meta-'): + # Note that AWS ignores user-defined headers with '=' in the + # header name. We translated underscores to '=5F' on the way + # in, though. + headers['x-amz-meta-' + _key[14:].replace('=5f', '_')] = val + elif _key in ('content-length', 'content-type', + 'content-range', 'content-encoding', + 'content-disposition', 'content-language', + 'etag', 'last-modified', 'x-robots-tag', + 'cache-control', 'expires'): + headers[key] = val + elif _key == 'x-static-large-object': + # for delete slo + self.is_slo = config_true_value(val) + + self.headers = headers + # Used for pure swift header handling at the request layer + self.sw_headers = sw_headers + self.sysmeta_headers = sw_sysmeta_headers + + @classmethod + def from_swift_resp(cls, sw_resp): + """ + Create a new S3 response object based on the given Swift response. + """ + if sw_resp.app_iter: + body = None + app_iter = sw_resp.app_iter + else: + body = sw_resp.body + app_iter = None + + resp = cls(status=sw_resp.status, headers=sw_resp.headers, + request=sw_resp.request, body=body, app_iter=app_iter, + conditional_response=sw_resp.conditional_response) + resp.environ.update(sw_resp.environ) + + return resp + + def append_copy_resp_body(self, controller_name, last_modified): + elem = Element('Copy%sResult' % controller_name) + SubElement(elem, 'LastModified').text = last_modified + SubElement(elem, 'ETag').text = '"%s"' % self.etag + self.headers['Content-Type'] = 'application/xml' + self.body = tostring(elem) + self.etag = None + + +HTTPOk = partial(S3Response, status=200) +HTTPCreated = partial(S3Response, status=201) +HTTPAccepted = partial(S3Response, status=202) +HTTPNoContent = partial(S3Response, status=204) +HTTPPartialContent = partial(S3Response, status=206) + + +class ErrorResponse(S3ResponseBase, swob.HTTPException): + """ + S3 error object. + + Reference information about S3 errors is available at: + http://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html + """ + _status = '' + _msg = '' + _code = '' + + def __init__(self, msg=None, *args, **kwargs): + if msg: + self._msg = msg + if not self._code: + self._code = self.__class__.__name__ + + self.info = kwargs.copy() + for reserved_key in ('headers', 'body'): + if self.info.get(reserved_key): + del(self.info[reserved_key]) + + swob.HTTPException.__init__(self, status=self._status, + app_iter=self._body_iter(), + content_type='application/xml', *args, + **kwargs) + self.headers = HeaderKeyDict(self.headers) + + def _body_iter(self): + error_elem = Element('Error') + SubElement(error_elem, 'Code').text = self._code + SubElement(error_elem, 'Message').text = self._msg + if 'swift.trans_id' in self.environ: + request_id = self.environ['swift.trans_id'] + SubElement(error_elem, 'RequestId').text = request_id + + self._dict_to_etree(error_elem, self.info) + + yield tostring(error_elem, use_s3ns=False) + + def _dict_to_etree(self, parent, d): + for key, value in d.items(): + tag = re.sub('\W', '', snake_to_camel(key)) + elem = SubElement(parent, tag) + + if isinstance(value, (dict, DictMixin)): + self._dict_to_etree(elem, value) + else: + try: + elem.text = str(value) + except ValueError: + # We set an invalid string for XML. + elem.text = '(invalid string)' + + +class AccessDenied(ErrorResponse): + _status = '403 Forbidden' + _msg = 'Access Denied.' + + +class AccountProblem(ErrorResponse): + _status = '403 Forbidden' + _msg = 'There is a problem with your AWS account that prevents the ' \ + 'operation from completing successfully.' + + +class AmbiguousGrantByEmailAddress(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The e-mail address you provided is associated with more than ' \ + 'one account.' + + +class AuthorizationHeaderMalformed(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The authorization header is malformed; the authorization ' \ + 'header requires three components: Credential, SignedHeaders, ' \ + 'and Signature.' + + +class AuthorizationQueryParametersError(ErrorResponse): + _status = '400 Bad Request' + + +class BadDigest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The Content-MD5 you specified did not match what we received.' + + +class BucketAlreadyExists(ErrorResponse): + _status = '409 Conflict' + _msg = 'The requested bucket name is not available. The bucket ' \ + 'namespace is shared by all users of the system. Please select a ' \ + 'different name and try again.' + + def __init__(self, bucket, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, bucket_name=bucket, *args, **kwargs) + + +class BucketAlreadyOwnedByYou(ErrorResponse): + _status = '409 Conflict' + _msg = 'Your previous request to create the named bucket succeeded and ' \ + 'you already own it.' + + def __init__(self, bucket, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, bucket_name=bucket, *args, **kwargs) + + +class BucketNotEmpty(ErrorResponse): + _status = '409 Conflict' + _msg = 'The bucket you tried to delete is not empty' + + +class CredentialsNotSupported(ErrorResponse): + _status = '400 Bad Request' + _msg = 'This request does not support credentials.' + + +class CrossLocationLoggingProhibited(ErrorResponse): + _status = '403 Forbidden' + _msg = 'Cross location logging not allowed. Buckets in one geographic ' \ + 'location cannot log information to a bucket in another location.' + + +class EntityTooSmall(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your proposed upload is smaller than the minimum allowed object ' \ + 'size.' + + +class EntityTooLarge(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your proposed upload exceeds the maximum allowed object size.' + + +class ExpiredToken(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The provided token has expired.' + + +class IllegalVersioningConfigurationException(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The Versioning configuration specified in the request is invalid.' + + +class IncompleteBody(ErrorResponse): + _status = '400 Bad Request' + _msg = 'You did not provide the number of bytes specified by the ' \ + 'Content-Length HTTP header.' + + +class IncorrectNumberOfFilesInPostRequest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'POST requires exactly one file upload per request.' + + +class InlineDataTooLarge(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Inline data exceeds the maximum allowed size.' + + +class InternalError(ErrorResponse): + _status = '500 Internal Server Error' + _msg = 'We encountered an internal error. Please try again.' + + +class InvalidAccessKeyId(ErrorResponse): + _status = '403 Forbidden' + _msg = 'The AWS Access Key Id you provided does not exist in our records.' + + +class InvalidArgument(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Invalid Argument.' + + def __init__(self, name, value, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, argument_name=name, + argument_value=value, *args, **kwargs) + + +class InvalidBucketName(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The specified bucket is not valid.' + + def __init__(self, bucket, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, bucket_name=bucket, *args, **kwargs) + + +class InvalidBucketState(ErrorResponse): + _status = '409 Conflict' + _msg = 'The request is not valid with the current state of the bucket.' + + +class InvalidDigest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The Content-MD5 you specified was an invalid.' + + +class InvalidLocationConstraint(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The specified location constraint is not valid.' + + +class InvalidObjectState(ErrorResponse): + _status = '403 Forbidden' + _msg = 'The operation is not valid for the current state of the object.' + + +class InvalidPart(ErrorResponse): + _status = '400 Bad Request' + _msg = 'One or more of the specified parts could not be found. The part ' \ + 'might not have been uploaded, or the specified entity tag might ' \ + 'not have matched the part\'s entity tag.' + + +class InvalidPartOrder(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The list of parts was not in ascending order.Parts list must ' \ + 'specified in order by part number.' + + +class InvalidPayer(ErrorResponse): + _status = '403 Forbidden' + _msg = 'All access to this object has been disabled.' + + +class InvalidPolicyDocument(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The content of the form does not meet the conditions specified ' \ + 'in the policy document.' + + +class InvalidRange(ErrorResponse): + _status = '416 Requested Range Not Satisfiable' + _msg = 'The requested range cannot be satisfied.' + + +class InvalidRequest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Invalid Request.' + + +class InvalidSecurity(ErrorResponse): + _status = '403 Forbidden' + _msg = 'The provided security credentials are not valid.' + + +class InvalidSOAPRequest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The SOAP request body is invalid.' + + +class InvalidStorageClass(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The storage class you specified is not valid.' + + +class InvalidTargetBucketForLogging(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The target bucket for logging does not exist, is not owned by ' \ + 'you, or does not have the appropriate grants for the ' \ + 'log-delivery group.' + + def __init__(self, bucket, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, target_bucket=bucket, *args, + **kwargs) + + +class InvalidToken(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The provided token is malformed or otherwise invalid.' + + +class InvalidURI(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Couldn\'t parse the specified URI.' + + def __init__(self, uri, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, uri=uri, *args, **kwargs) + + +class KeyTooLong(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your key is too long.' + + +class MalformedACLError(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The XML you provided was not well-formed or did not validate ' \ + 'against our published schema.' + + +class MalformedPOSTRequest(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The body of your POST request is not well-formed ' \ + 'multipart/form-data.' + + +class MalformedXML(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The XML you provided was not well-formed or did not validate ' \ + 'against our published schema.' + + +class MaxMessageLengthExceeded(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your request was too big.' + + +class MaxPostPreDataLengthExceededError(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your POST request fields preceding the upload file were too large.' + + +class MetadataTooLarge(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your metadata headers exceed the maximum allowed metadata size.' + + +class MethodNotAllowed(ErrorResponse): + _status = '405 Method Not Allowed' + _msg = 'The specified method is not allowed against this resource.' + + def __init__(self, method, resource_type, msg=None, *args, **kwargs): + ErrorResponse.__init__(self, msg, method=method, + resource_type=resource_type, *args, **kwargs) + + +class MissingContentLength(ErrorResponse): + _status = '411 Length Required' + _msg = 'You must provide the Content-Length HTTP header.' + + +class MissingRequestBodyError(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Request body is empty.' + + +class MissingSecurityElement(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The SOAP 1.1 request is missing a security element.' + + +class MissingSecurityHeader(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your request was missing a required header.' + + +class NoLoggingStatusForKey(ErrorResponse): + _status = '400 Bad Request' + _msg = 'There is no such thing as a logging status sub-resource for a key.' + + +class NoSuchBucket(ErrorResponse): + _status = '404 Not Found' + _msg = 'The specified bucket does not exist.' + + def __init__(self, bucket, msg=None, *args, **kwargs): + if not bucket: + raise InternalError() + ErrorResponse.__init__(self, msg, bucket_name=bucket, *args, **kwargs) + + +class NoSuchKey(ErrorResponse): + _status = '404 Not Found' + _msg = 'The specified key does not exist.' + + def __init__(self, key, msg=None, *args, **kwargs): + if not key: + raise InternalError() + ErrorResponse.__init__(self, msg, key=key, *args, **kwargs) + + +class NoSuchLifecycleConfiguration(ErrorResponse): + _status = '404 Not Found' + _msg = 'The lifecycle configuration does not exist. .' + + +class NoSuchUpload(ErrorResponse): + _status = '404 Not Found' + _msg = 'The specified multipart upload does not exist. The upload ID ' \ + 'might be invalid, or the multipart upload might have been ' \ + 'aborted or completed.' + + +class NoSuchVersion(ErrorResponse): + _status = '404 Not Found' + _msg = 'The specified version does not exist.' + + def __init__(self, key, version_id, msg=None, *args, **kwargs): + if not key: + raise InternalError() + ErrorResponse.__init__(self, msg, key=key, version_id=version_id, + *args, **kwargs) + + +# NotImplemented is a python built-in constant. Use S3NotImplemented instead. +class S3NotImplemented(ErrorResponse): + _status = '501 Not Implemented' + _msg = 'Not implemented.' + _code = 'NotImplemented' + + +class NotSignedUp(ErrorResponse): + _status = '403 Forbidden' + _msg = 'Your account is not signed up for the Amazon S3 service.' + + +class NotSuchBucketPolicy(ErrorResponse): + _status = '404 Not Found' + _msg = 'The specified bucket does not have a bucket policy.' + + +class OperationAborted(ErrorResponse): + _status = '409 Conflict' + _msg = 'A conflicting conditional operation is currently in progress ' \ + 'against this resource. Please try again.' + + +class PermanentRedirect(ErrorResponse): + _status = '301 Moved Permanently' + _msg = 'The bucket you are attempting to access must be addressed using ' \ + 'the specified endpoint. Please send all future requests to this ' \ + 'endpoint.' + + +class PreconditionFailed(ErrorResponse): + _status = '412 Precondition Failed' + _msg = 'At least one of the preconditions you specified did not hold.' + + +class Redirect(ErrorResponse): + _status = '307 Moved Temporarily' + _msg = 'Temporary redirect.' + + +class RestoreAlreadyInProgress(ErrorResponse): + _status = '409 Conflict' + _msg = 'Object restore is already in progress.' + + +class RequestIsNotMultiPartContent(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Bucket POST must be of the enclosure-type multipart/form-data.' + + +class RequestTimeout(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Your socket connection to the server was not read from or ' \ + 'written to within the timeout period.' + + +class RequestTimeTooSkewed(ErrorResponse): + _status = '403 Forbidden' + _msg = 'The difference between the request time and the current time ' \ + 'is too large.' + + +class RequestTorrentOfBucketError(ErrorResponse): + _status = '400 Bad Request' + _msg = 'Requesting the torrent file of a bucket is not permitted.' + + +class SignatureDoesNotMatch(ErrorResponse): + _status = '403 Forbidden' + _msg = 'The request signature we calculated does not match the ' \ + 'signature you provided. Check your key and signing method.' + + +class ServiceUnavailable(ErrorResponse): + _status = '503 Service Unavailable' + _msg = 'Please reduce your request rate.' + + +class SlowDown(ErrorResponse): + _status = '503 Slow Down' + _msg = 'Please reduce your request rate.' + + +class TemporaryRedirect(ErrorResponse): + _status = '307 Moved Temporarily' + _msg = 'You are being redirected to the bucket while DNS updates.' + + +class TokenRefreshRequired(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The provided token must be refreshed.' + + +class TooManyBuckets(ErrorResponse): + _status = '400 Bad Request' + _msg = 'You have attempted to create more buckets than allowed.' + + +class UnexpectedContent(ErrorResponse): + _status = '400 Bad Request' + _msg = 'This request does not support content.' + + +class UnresolvableGrantByEmailAddress(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The e-mail address you provided does not match any account on ' \ + 'record.' + + +class UserKeyMustBeSpecified(ErrorResponse): + _status = '400 Bad Request' + _msg = 'The bucket POST must contain the specified field name. If it is ' \ + 'specified, please check the order of the fields.' diff -Nru swift-2.17.0/swift/common/middleware/s3api/s3token.py swift-2.18.0/swift/common/middleware/s3api/s3token.py --- swift-2.17.0/swift/common/middleware/s3api/s3token.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/s3token.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,324 @@ +# Copyright 2012 OpenStack Foundation +# Copyright 2010 United States Government as represented by the +# Administrator of the National Aeronautics and Space Administration. +# Copyright 2011,2012 Akira YOSHIYAMA +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# This source code is based ./auth_token.py and ./ec2_token.py. +# See them for their copyright. + +""" +------------------- +S3 Token Middleware +------------------- +s3token middleware is for authentication with s3api + keystone. +This middleware: + +* Gets a request from the s3api middleware with an S3 Authorization + access key. +* Validates s3 token with Keystone. +* Transforms the account name to AUTH_%(tenant_name). + +""" + +import base64 +import json + +import requests +import six +from six.moves import urllib + +from swift.common.swob import Request, HTTPBadRequest, HTTPUnauthorized, \ + HTTPException +from swift.common.utils import config_true_value, split_path, get_logger +from swift.common.wsgi import ConfigFileError + + +PROTOCOL_NAME = 'S3 Token Authentication' + +# Headers to purge if they came from (or may have come from) the client +KEYSTONE_AUTH_HEADERS = ( + 'X-Identity-Status', 'X-Service-Identity-Status', + 'X-Domain-Id', 'X-Service-Domain-Id', + 'X-Domain-Name', 'X-Service-Domain-Name', + 'X-Project-Id', 'X-Service-Project-Id', + 'X-Project-Name', 'X-Service-Project-Name', + 'X-Project-Domain-Id', 'X-Service-Project-Domain-Id', + 'X-Project-Domain-Name', 'X-Service-Project-Domain-Name', + 'X-User-Id', 'X-Service-User-Id', + 'X-User-Name', 'X-Service-User-Name', + 'X-User-Domain-Id', 'X-Service-User-Domain-Id', + 'X-User-Domain-Name', 'X-Service-User-Domain-Name', + 'X-Roles', 'X-Service-Roles', + 'X-Is-Admin-Project', + 'X-Service-Catalog', + # Deprecated headers, too... + 'X-Tenant-Id', + 'X-Tenant-Name', + 'X-Tenant', + 'X-User', + 'X-Role', +) + + +def parse_v2_response(token): + access_info = token['access'] + headers = { + 'X-Identity-Status': 'Confirmed', + 'X-Roles': ','.join(r['name'] + for r in access_info['user']['roles']), + 'X-User-Id': access_info['user']['id'], + 'X-User-Name': access_info['user']['name'], + 'X-Tenant-Id': access_info['token']['tenant']['id'], + 'X-Tenant-Name': access_info['token']['tenant']['name'], + 'X-Project-Id': access_info['token']['tenant']['id'], + 'X-Project-Name': access_info['token']['tenant']['name'], + } + return ( + headers, + access_info['token'].get('id'), + access_info['token']['tenant']) + + +def parse_v3_response(token): + token = token['token'] + headers = { + 'X-Identity-Status': 'Confirmed', + 'X-Roles': ','.join(r['name'] + for r in token['roles']), + 'X-User-Id': token['user']['id'], + 'X-User-Name': token['user']['name'], + 'X-User-Domain-Id': token['user']['domain']['id'], + 'X-User-Domain-Name': token['user']['domain']['name'], + 'X-Tenant-Id': token['project']['id'], + 'X-Tenant-Name': token['project']['name'], + 'X-Project-Id': token['project']['id'], + 'X-Project-Name': token['project']['name'], + 'X-Project-Domain-Id': token['project']['domain']['id'], + 'X-Project-Domain-Name': token['project']['domain']['name'], + } + return headers, None, token['project'] + + +class S3Token(object): + """Middleware that handles S3 authentication.""" + + def __init__(self, app, conf): + """Common initialization code.""" + self._app = app + self._logger = get_logger( + conf, log_route=conf.get('log_name', 's3token')) + self._logger.debug('Starting the %s component', PROTOCOL_NAME) + self._timeout = float(conf.get('http_timeout', '10.0')) + if not (0 < self._timeout <= 60): + raise ValueError('http_timeout must be between 0 and 60 seconds') + self._reseller_prefix = conf.get('reseller_prefix', 'AUTH_') + self._delay_auth_decision = config_true_value( + conf.get('delay_auth_decision')) + + # where to find the auth service (we use this to validate tokens) + self._request_uri = conf.get('auth_uri', '').rstrip('/') + '/s3tokens' + parsed = urllib.parse.urlsplit(self._request_uri) + if not parsed.scheme or not parsed.hostname: + raise ConfigFileError( + 'Invalid auth_uri; must include scheme and host') + if parsed.scheme not in ('http', 'https'): + raise ConfigFileError( + 'Invalid auth_uri; scheme must be http or https') + if parsed.query or parsed.fragment or '@' in parsed.netloc: + raise ConfigFileError('Invalid auth_uri; must not include ' + 'username, query, or fragment') + + # SSL + insecure = config_true_value(conf.get('insecure')) + cert_file = conf.get('certfile') + key_file = conf.get('keyfile') + + if insecure: + self._verify = False + elif cert_file and key_file: + self._verify = (cert_file, key_file) + elif cert_file: + self._verify = cert_file + else: + self._verify = None + + def _deny_request(self, code): + error_cls, message = { + 'AccessDenied': (HTTPUnauthorized, 'Access denied'), + 'InvalidURI': (HTTPBadRequest, + 'Could not parse the specified URI'), + }[code] + resp = error_cls(content_type='text/xml') + error_msg = ('\r\n' + '\r\n %s\r\n ' + '%s\r\n\r\n' % + (code, message)) + if six.PY3: + error_msg = error_msg.encode() + resp.body = error_msg + return resp + + def _json_request(self, creds_json): + headers = {'Content-Type': 'application/json'} + try: + response = requests.post(self._request_uri, + headers=headers, data=creds_json, + verify=self._verify, + timeout=self._timeout) + except requests.exceptions.RequestException as e: + self._logger.info('HTTP connection exception: %s', e) + raise self._deny_request('InvalidURI') + + if response.status_code < 200 or response.status_code >= 300: + self._logger.debug('Keystone reply error: status=%s reason=%s', + response.status_code, response.reason) + raise self._deny_request('AccessDenied') + + return response + + def __call__(self, environ, start_response): + """Handle incoming request. authenticate and send downstream.""" + req = Request(environ) + self._logger.debug('Calling S3Token middleware.') + + # Always drop auth headers if we're first in the pipeline + if 'keystone.token_info' not in req.environ: + req.headers.update({h: None for h in KEYSTONE_AUTH_HEADERS}) + + try: + parts = split_path(req.path, 1, 4, True) + version, account, container, obj = parts + except ValueError: + msg = 'Not a path query: %s, skipping.' % req.path + self._logger.debug(msg) + return self._app(environ, start_response) + + # Read request signature and access id. + s3_auth_details = req.environ.get('s3api.auth_details') + if not s3_auth_details: + msg = 'No authorization details from s3api. skipping.' + self._logger.debug(msg) + return self._app(environ, start_response) + + access = s3_auth_details['access_key'] + if isinstance(access, six.binary_type): + access = access.decode('utf-8') + + signature = s3_auth_details['signature'] + if isinstance(signature, six.binary_type): + signature = signature.decode('utf-8') + + string_to_sign = s3_auth_details['string_to_sign'] + if isinstance(string_to_sign, six.text_type): + string_to_sign = string_to_sign.encode('utf-8') + token = base64.urlsafe_b64encode(string_to_sign).encode('ascii') + + # NOTE(chmou): This is to handle the special case with nova + # when we have the option s3_affix_tenant. We will force it to + # connect to another account than the one + # authenticated. Before people start getting worried about + # security, I should point that we are connecting with + # username/token specified by the user but instead of + # connecting to its own account we will force it to go to an + # another account. In a normal scenario if that user don't + # have the reseller right it will just fail but since the + # reseller account can connect to every account it is allowed + # by the swift_auth middleware. + force_tenant = None + if ':' in access: + access, force_tenant = access.split(':') + + # Authenticate request. + creds = {'credentials': {'access': access, + 'token': token, + 'signature': signature}} + creds_json = json.dumps(creds) + self._logger.debug('Connecting to Keystone sending this JSON: %s', + creds_json) + # NOTE(vish): We could save a call to keystone by having + # keystone return token, tenant, user, and roles + # from this call. + # + # NOTE(chmou): We still have the same problem we would need to + # change token_auth to detect if we already + # identified and not doing a second query and just + # pass it through to swiftauth in this case. + try: + # NB: requests.Response, not swob.Response + resp = self._json_request(creds_json) + except HTTPException as e_resp: + if self._delay_auth_decision: + msg = 'Received error, deferring rejection based on error: %s' + self._logger.debug(msg, e_resp.status) + return self._app(environ, start_response) + else: + msg = 'Received error, rejecting request with error: %s' + self._logger.debug(msg, e_resp.status) + # NB: swob.Response, not requests.Response + return e_resp(environ, start_response) + + self._logger.debug('Keystone Reply: Status: %d, Output: %s', + resp.status_code, resp.content) + + try: + token = resp.json() + if 'access' in token: + headers, token_id, tenant = parse_v2_response(token) + elif 'token' in token: + headers, token_id, tenant = parse_v3_response(token) + else: + raise ValueError + + # Populate the environment similar to auth_token, + # so we don't have to contact Keystone again. + # + # Note that although the strings are unicode following json + # deserialization, Swift's HeaderEnvironProxy handles ensuring + # they're stored as native strings + req.headers.update(headers) + req.environ['keystone.token_info'] = token + except (ValueError, KeyError, TypeError): + if self._delay_auth_decision: + error = ('Error on keystone reply: %d %s - ' + 'deferring rejection downstream') + self._logger.debug(error, resp.status_code, resp.content) + return self._app(environ, start_response) + else: + error = ('Error on keystone reply: %d %s - ' + 'rejecting request') + self._logger.debug(error, resp.status_code, resp.content) + return self._deny_request('InvalidURI')( + environ, start_response) + + req.headers['X-Auth-Token'] = token_id + tenant_to_connect = force_tenant or tenant['id'] + if six.PY2 and isinstance(tenant_to_connect, six.text_type): + tenant_to_connect = tenant_to_connect.encode('utf-8') + self._logger.debug('Connecting with tenant: %s', tenant_to_connect) + new_tenant_name = '%s%s' % (self._reseller_prefix, tenant_to_connect) + environ['PATH_INFO'] = environ['PATH_INFO'].replace(account, + new_tenant_name) + return self._app(environ, start_response) + + +def filter_factory(global_conf, **local_conf): + """Returns a WSGI filter app for use with paste.deploy.""" + conf = global_conf.copy() + conf.update(local_conf) + + def auth_filter(app): + return S3Token(app, conf) + return auth_filter diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/access_control_policy.rng swift-2.18.0/swift/common/middleware/s3api/schema/access_control_policy.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/access_control_policy.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/access_control_policy.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/bucket_logging_status.rng swift-2.18.0/swift/common/middleware/s3api/schema/bucket_logging_status.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/bucket_logging_status.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/bucket_logging_status.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/common.rng swift-2.18.0/swift/common/middleware/s3api/schema/common.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/common.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/common.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + STANDARD + REDUCED_REDUNDANCY + GLACIER + UNKNOWN + + + + + + + + + + + AmazonCustomerByEmail + + + + + + + + CanonicalUser + + + + + + Group + + + + + + + + + + READ + WRITE + READ_ACP + WRITE_ACP + FULL_CONTROL + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/complete_multipart_upload_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/complete_multipart_upload_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/complete_multipart_upload_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/complete_multipart_upload_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/complete_multipart_upload.rng swift-2.18.0/swift/common/middleware/s3api/schema/complete_multipart_upload.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/complete_multipart_upload.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/complete_multipart_upload.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/copy_object_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/copy_object_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/copy_object_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/copy_object_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/copy_part_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/copy_part_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/copy_part_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/copy_part_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/create_bucket_configuration.rng swift-2.18.0/swift/common/middleware/s3api/schema/create_bucket_configuration.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/create_bucket_configuration.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/create_bucket_configuration.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,11 @@ + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/delete_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/delete_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/delete_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/delete_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/delete.rng swift-2.18.0/swift/common/middleware/s3api/schema/delete.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/delete.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/delete.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/error.rng swift-2.18.0/swift/common/middleware/s3api/schema/error.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/error.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/error.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/initiate_multipart_upload_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/initiate_multipart_upload_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/initiate_multipart_upload_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/initiate_multipart_upload_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/lifecycle_configuration.rng swift-2.18.0/swift/common/middleware/s3api/schema/lifecycle_configuration.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/lifecycle_configuration.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/lifecycle_configuration.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + + Enabled + Disabled + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/list_all_my_buckets_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/list_all_my_buckets_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/list_all_my_buckets_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/list_all_my_buckets_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/list_bucket_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/list_bucket_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/list_bucket_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/list_bucket_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,93 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/list_multipart_uploads_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/list_multipart_uploads_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/list_multipart_uploads_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/list_multipart_uploads_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,73 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/list_parts_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/list_parts_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/list_parts_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/list_parts_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/list_versions_result.rng swift-2.18.0/swift/common/middleware/s3api/schema/list_versions_result.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/list_versions_result.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/list_versions_result.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,104 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/location_constraint.rng swift-2.18.0/swift/common/middleware/s3api/schema/location_constraint.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/location_constraint.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/location_constraint.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,8 @@ + + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/schema/versioning_configuration.rng swift-2.18.0/swift/common/middleware/s3api/schema/versioning_configuration.rng --- swift-2.17.0/swift/common/middleware/s3api/schema/versioning_configuration.rng 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/schema/versioning_configuration.rng 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,25 @@ + + + + + + + + + Enabled + Suspended + + + + + + + Enabled + Disabled + + + + + + + diff -Nru swift-2.17.0/swift/common/middleware/s3api/subresource.py swift-2.18.0/swift/common/middleware/s3api/subresource.py --- swift-2.17.0/swift/common/middleware/s3api/subresource.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/subresource.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,563 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +--------------------------- +s3api's ACLs implementation +--------------------------- +s3api uses a different implementation approach to achieve S3 ACLs. + +First, we should understand what we have to design to achieve real S3 ACLs. +Current s3api(real S3)'s ACLs Model is as follows:: + + AccessControlPolicy: + Owner: + AccessControlList: + Grant[n]: + (Grantee, Permission) + +Each bucket or object has its own acl consisting of Owner and +AcessControlList. AccessControlList can contain some Grants. +By default, AccessControlList has only one Grant to allow FULL +CONTROLL to owner. Each Grant includes single pair with Grantee, +Permission. Grantee is the user (or user group) allowed the given permission. + +This module defines the groups and the relation tree. + +If you wanna get more information about S3's ACLs model in detail, +please see official documentation here, + +http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html + +""" +from functools import partial + +from swift.common.utils import json + +from swift.common.middleware.s3api.s3response import InvalidArgument, \ + MalformedACLError, S3NotImplemented, InvalidRequest, AccessDenied +from swift.common.middleware.s3api.etree import Element, SubElement, tostring +from swift.common.middleware.s3api.utils import sysmeta_header +from swift.common.middleware.s3api.exception import InvalidSubresource + +XMLNS_XSI = 'http://www.w3.org/2001/XMLSchema-instance' +PERMISSIONS = ['FULL_CONTROL', 'READ', 'WRITE', 'READ_ACP', 'WRITE_ACP'] +LOG_DELIVERY_USER = '.log_delivery' + + +def encode_acl(resource, acl): + """ + Encode an ACL instance to Swift metadata. + + Given a resource type and an ACL instance, this method returns HTTP + headers, which can be used for Swift metadata. + """ + header_value = {"Owner": acl.owner.id} + grants = [] + for grant in acl.grants: + grant = {"Permission": grant.permission, + "Grantee": str(grant.grantee)} + grants.append(grant) + header_value.update({"Grant": grants}) + headers = {} + key = sysmeta_header(resource, 'acl') + headers[key] = json.dumps(header_value, separators=(',', ':')) + + return headers + + +def decode_acl(resource, headers, allow_no_owner): + """ + Decode Swift metadata to an ACL instance. + + Given a resource type and HTTP headers, this method returns an ACL + instance. + """ + value = '' + + key = sysmeta_header(resource, 'acl') + if key in headers: + value = headers[key] + + if value == '': + # Fix me: In the case of value is empty or not dict instance, + # I want an instance of Owner as None. + # However, in the above process would occur error in reference + # to an instance variable of Owner. + return ACL(Owner(None, None), [], True, allow_no_owner) + + try: + encode_value = json.loads(value) + if not isinstance(encode_value, dict): + return ACL(Owner(None, None), [], True, allow_no_owner) + + id = None + name = None + grants = [] + if 'Owner' in encode_value: + id = encode_value['Owner'] + name = encode_value['Owner'] + if 'Grant' in encode_value: + for grant in encode_value['Grant']: + grantee = None + # pylint: disable-msg=E1101 + for group in Group.__subclasses__(): + if group.__name__ == grant['Grantee']: + grantee = group() + if not grantee: + grantee = User(grant['Grantee']) + permission = grant['Permission'] + grants.append(Grant(grantee, permission)) + return ACL(Owner(id, name), grants, True, allow_no_owner) + except Exception as e: + raise InvalidSubresource((resource, 'acl', value), e) + + +class Grantee(object): + """ + Base class for grantee. + + Methods: + + * init: create a Grantee instance + * elem: create an ElementTree from itself + + Static Methods: + + * from_header: convert a grantee string in the HTTP header + to an Grantee instance. + * from_elem: convert a ElementTree to an Grantee instance. + + """ + # Needs confirmation whether we really need these methods or not. + # * encode (method): create a JSON which includes whole own elements + # * encode_from_elem (static method): convert from an ElementTree to a JSON + # * elem_from_json (static method): convert from a JSON to an ElementTree + # * from_json (static method): convert a Json string to an Grantee + # instance. + + def __contains__(self, key): + """ + The key argument is a S3 user id. This method checks that the user id + belongs to this class. + """ + raise S3NotImplemented() + + def elem(self): + """ + Get an etree element of this instance. + """ + raise S3NotImplemented() + + @staticmethod + def from_elem(elem): + type = elem.get('{%s}type' % XMLNS_XSI) + if type == 'CanonicalUser': + value = elem.find('./ID').text + return User(value) + elif type == 'Group': + value = elem.find('./URI').text + subclass = get_group_subclass_from_uri(value) + return subclass() + elif type == 'AmazonCustomerByEmail': + raise S3NotImplemented() + else: + raise MalformedACLError() + + @staticmethod + def from_header(grantee): + """ + Convert a grantee string in the HTTP header to an Grantee instance. + """ + type, value = grantee.split('=', 1) + value = value.strip('"\'') + if type == 'id': + return User(value) + elif type == 'emailAddress': + raise S3NotImplemented() + elif type == 'uri': + # return a subclass instance of Group class + subclass = get_group_subclass_from_uri(value) + return subclass() + else: + raise InvalidArgument(type, value, + 'Argument format not recognized') + + +class User(Grantee): + """ + Canonical user class for S3 accounts. + """ + type = 'CanonicalUser' + + def __init__(self, name): + self.id = name + self.display_name = name + + def __contains__(self, key): + return key == self.id + + def elem(self): + elem = Element('Grantee', nsmap={'xsi': XMLNS_XSI}) + elem.set('{%s}type' % XMLNS_XSI, self.type) + SubElement(elem, 'ID').text = self.id + SubElement(elem, 'DisplayName').text = self.display_name + return elem + + def __str__(self): + return self.display_name + + +class Owner(object): + """ + Owner class for S3 accounts + """ + def __init__(self, id, name): + self.id = id + self.name = name + + +def get_group_subclass_from_uri(uri): + """ + Convert a URI to one of the predefined groups. + """ + for group in Group.__subclasses__(): # pylint: disable-msg=E1101 + if group.uri == uri: + return group + raise InvalidArgument('uri', uri, 'Invalid group uri') + + +class Group(Grantee): + """ + Base class for Amazon S3 Predefined Groups + """ + type = 'Group' + uri = '' + + def __init__(self): + # Initialize method to clarify this has nothing to do + pass + + def elem(self): + elem = Element('Grantee', nsmap={'xsi': XMLNS_XSI}) + elem.set('{%s}type' % XMLNS_XSI, self.type) + SubElement(elem, 'URI').text = self.uri + + return elem + + def __str__(self): + return self.__class__.__name__ + + +def canned_acl_grantees(bucket_owner, object_owner=None): + """ + A set of predefined grants supported by AWS S3. + """ + owner = object_owner or bucket_owner + + return { + 'private': [ + ('FULL_CONTROL', User(owner.name)), + ], + 'public-read': [ + ('READ', AllUsers()), + ('FULL_CONTROL', User(owner.name)), + ], + 'public-read-write': [ + ('READ', AllUsers()), + ('WRITE', AllUsers()), + ('FULL_CONTROL', User(owner.name)), + ], + 'authenticated-read': [ + ('READ', AuthenticatedUsers()), + ('FULL_CONTROL', User(owner.name)), + ], + 'bucket-owner-read': [ + ('READ', User(bucket_owner.name)), + ('FULL_CONTROL', User(owner.name)), + ], + 'bucket-owner-full-control': [ + ('FULL_CONTROL', User(owner.name)), + ('FULL_CONTROL', User(bucket_owner.name)), + ], + 'log-delivery-write': [ + ('WRITE', LogDelivery()), + ('READ_ACP', LogDelivery()), + ('FULL_CONTROL', User(owner.name)), + ], + } + + +class AuthenticatedUsers(Group): + """ + This group represents all AWS accounts. Access permission to this group + allows any AWS account to access the resource. However, all requests must + be signed (authenticated). + """ + uri = 'http://acs.amazonaws.com/groups/global/AuthenticatedUsers' + + def __contains__(self, key): + # s3api handles only signed requests. + return True + + +class AllUsers(Group): + """ + Access permission to this group allows anyone to access the resource. The + requests can be signed (authenticated) or unsigned (anonymous). Unsigned + requests omit the Authentication header in the request. + + Note: s3api regards unsigned requests as Swift API accesses, and bypasses + them to Swift. As a result, AllUsers behaves completely same as + AuthenticatedUsers. + """ + uri = 'http://acs.amazonaws.com/groups/global/AllUsers' + + def __contains__(self, key): + return True + + +class LogDelivery(Group): + """ + WRITE and READ_ACP permissions on a bucket enables this group to write + server access logs to the bucket. + """ + uri = 'http://acs.amazonaws.com/groups/s3/LogDelivery' + + def __contains__(self, key): + if ':' in key: + tenant, user = key.split(':', 1) + else: + user = key + return user == LOG_DELIVERY_USER + + +class Grant(object): + """ + Grant Class which includes both Grantee and Permission + """ + + def __init__(self, grantee, permission): + """ + :param grantee: a grantee class or its subclass + :param permission: string + """ + if permission.upper() not in PERMISSIONS: + raise S3NotImplemented() + if not isinstance(grantee, Grantee): + raise ValueError() + self.grantee = grantee + self.permission = permission + + @classmethod + def from_elem(cls, elem): + """ + Convert an ElementTree to an ACL instance + """ + grantee = Grantee.from_elem(elem.find('./Grantee')) + permission = elem.find('./Permission').text + return cls(grantee, permission) + + def elem(self): + """ + Create an etree element. + """ + elem = Element('Grant') + elem.append(self.grantee.elem()) + SubElement(elem, 'Permission').text = self.permission + + return elem + + def allow(self, grantee, permission): + return permission == self.permission and grantee in self.grantee + + +class ACL(object): + """ + S3 ACL class. + + Refs (S3 API - acl-overview: + http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html): + + The sample ACL includes an Owner element identifying the owner via the + AWS account's canonical user ID. The Grant element identifies the grantee + (either an AWS account or a predefined group), and the permission granted. + This default ACL has one Grant element for the owner. You grant permissions + by adding Grant elements, each grant identifying the grantee and the + permission. + """ + metadata_name = 'acl' + root_tag = 'AccessControlPolicy' + max_xml_length = 200 * 1024 + + def __init__(self, owner, grants=None, s3_acl=False, allow_no_owner=False): + """ + :param owner: Owner instance for ACL instance + :param grants: a list of Grant instances + :param s3_acl: boolean indicates whether this class is used under + s3_acl is True or False (from s3api middleware configuration) + :param allow_no_owner: boolean indicates this ACL instance can be + handled when no owner information found + """ + self.owner = owner + self.grants = grants or [] + self.s3_acl = s3_acl + self.allow_no_owner = allow_no_owner + + def __repr__(self): + return tostring(self.elem()) + + @classmethod + def from_elem(cls, elem, s3_acl=False, allow_no_owner=False): + """ + Convert an ElementTree to an ACL instance + """ + id = elem.find('./Owner/ID').text + try: + name = elem.find('./Owner/DisplayName').text + except AttributeError: + name = id + + grants = [Grant.from_elem(e) + for e in elem.findall('./AccessControlList/Grant')] + return cls(Owner(id, name), grants, s3_acl, allow_no_owner) + + def elem(self): + """ + Decode the value to an ACL instance. + """ + elem = Element(self.root_tag) + + owner = SubElement(elem, 'Owner') + SubElement(owner, 'ID').text = self.owner.id + SubElement(owner, 'DisplayName').text = self.owner.name + + SubElement(elem, 'AccessControlList').extend( + g.elem() for g in self.grants + ) + + return elem + + def check_owner(self, user_id): + """ + Check that the user is an owner. + """ + if not self.s3_acl: + # Ignore S3api ACL. + return + + if not self.owner.id: + if self.allow_no_owner: + # No owner means public. + return + raise AccessDenied() + + if user_id != self.owner.id: + raise AccessDenied() + + def check_permission(self, user_id, permission): + """ + Check that the user has a permission. + """ + if not self.s3_acl: + # Ignore S3api ACL. + return + + try: + # owners have full control permission + self.check_owner(user_id) + return + except AccessDenied: + pass + + if permission in PERMISSIONS: + for g in self.grants: + if g.allow(user_id, 'FULL_CONTROL') or \ + g.allow(user_id, permission): + return + + raise AccessDenied() + + @classmethod + def from_headers(cls, headers, bucket_owner, object_owner=None, + as_private=True): + """ + Convert HTTP headers to an ACL instance. + """ + grants = [] + try: + for key, value in headers.items(): + if key.lower().startswith('x-amz-grant-'): + permission = key[len('x-amz-grant-'):] + permission = permission.upper().replace('-', '_') + if permission not in PERMISSIONS: + continue + for grantee in value.split(','): + grants.append( + Grant(Grantee.from_header(grantee), permission)) + + if 'x-amz-acl' in headers: + try: + acl = headers['x-amz-acl'] + if len(grants) > 0: + err_msg = 'Specifying both Canned ACLs and Header ' \ + 'Grants is not allowed' + raise InvalidRequest(err_msg) + grantees = canned_acl_grantees( + bucket_owner, object_owner)[acl] + for permission, grantee in grantees: + grants.append(Grant(grantee, permission)) + except KeyError: + # expects canned_acl_grantees()[] raises KeyError + raise InvalidArgument('x-amz-acl', headers['x-amz-acl']) + except (KeyError, ValueError): + # TODO: think about we really catch this except sequence + raise InvalidRequest() + + if len(grants) == 0: + # No ACL headers + if as_private: + return ACLPrivate(bucket_owner, object_owner) + else: + return None + + return cls(object_owner or bucket_owner, grants) + + +class CannedACL(object): + """ + A dict-like object that returns canned ACL. + """ + def __getitem__(self, key): + def acl(key, bucket_owner, object_owner=None, + s3_acl=False, allow_no_owner=False): + grants = [] + grantees = canned_acl_grantees(bucket_owner, object_owner)[key] + for permission, grantee in grantees: + grants.append(Grant(grantee, permission)) + return ACL(object_owner or bucket_owner, + grants, s3_acl, allow_no_owner) + + return partial(acl, key) + + +canned_acl = CannedACL() + +ACLPrivate = canned_acl['private'] +ACLPublicRead = canned_acl['public-read'] +ACLPublicReadWrite = canned_acl['public-read-write'] +ACLAuthenticatedRead = canned_acl['authenticated-read'] +ACLBucketOwnerRead = canned_acl['bucket-owner-read'] +ACLBucketOwnerFullControl = canned_acl['bucket-owner-full-control'] +ACLLogDeliveryWrite = canned_acl['log-delivery-write'] diff -Nru swift-2.17.0/swift/common/middleware/s3api/utils.py swift-2.18.0/swift/common/middleware/s3api/utils.py --- swift-2.17.0/swift/common/middleware/s3api/utils.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/s3api/utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,189 @@ +# Copyright (c) 2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import calendar +import email.utils +import re +import time +import uuid + +from swift.common import utils + +MULTIUPLOAD_SUFFIX = '+segments' + + +def sysmeta_prefix(resource): + """ + Returns the system metadata prefix for given resource type. + """ + if resource.lower() == 'object': + return 'x-object-sysmeta-s3api-' + else: + return 'x-container-sysmeta-s3api-' + + +def sysmeta_header(resource, name): + """ + Returns the system metadata header for given resource type and name. + """ + return sysmeta_prefix(resource) + name + + +def camel_to_snake(camel): + return re.sub('(.)([A-Z])', r'\1_\2', camel).lower() + + +def snake_to_camel(snake): + return snake.title().replace('_', '') + + +def unique_id(): + return base64.urlsafe_b64encode(str(uuid.uuid4())) + + +def utf8encode(s): + if isinstance(s, unicode): + s = s.encode('utf8') + return s + + +def utf8decode(s): + if isinstance(s, str): + s = s.decode('utf8') + return s + + +def validate_bucket_name(name, dns_compliant_bucket_names): + """ + Validates the name of the bucket against S3 criteria, + http://docs.amazonwebservices.com/AmazonS3/latest/BucketRestrictions.html + True is valid, False is invalid. + """ + valid_chars = '-.a-z0-9' + if not dns_compliant_bucket_names: + valid_chars += 'A-Z_' + max_len = 63 if dns_compliant_bucket_names else 255 + + if len(name) < 3 or len(name) > max_len or not name[0].isalnum(): + # Bucket names should be between 3 and 63 (or 255) characters long + # Bucket names must start with a letter or a number + return False + elif dns_compliant_bucket_names and ( + '.-' in name or '-.' in name or '..' in name or + not name[-1].isalnum()): + # Bucket names cannot contain dashes next to periods + # Bucket names cannot contain two adjacent periods + # Bucket names must end with a letter or a number + return False + elif name.endswith('.'): + # Bucket names must not end with dot + return False + elif re.match("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.)" + "{3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$", + name): + # Bucket names cannot be formatted as an IP Address + return False + elif not re.match("^[%s]*$" % valid_chars, name): + # Bucket names can contain lowercase letters, numbers, and hyphens. + return False + else: + return True + + +class S3Timestamp(utils.Timestamp): + @property + def s3xmlformat(self): + return self.isoformat[:-7] + '.000Z' + + @property + def amz_date_format(self): + """ + this format should be like 'YYYYMMDDThhmmssZ' + """ + return self.isoformat.replace( + '-', '').replace(':', '')[:-7] + 'Z' + + @classmethod + def now(cls): + return cls(time.time()) + + +def mktime(timestamp_str, time_format='%Y-%m-%dT%H:%M:%S'): + """ + mktime creates a float instance in epoch time really like as time.mktime + + the difference from time.mktime is allowing to 2 formats string for the + argument for the S3 testing usage. + TODO: support + + :param timestamp_str: a string of timestamp formatted as + (a) RFC2822 (e.g. date header) + (b) %Y-%m-%dT%H:%M:%S (e.g. copy result) + :param time_format: a string of format to parse in (b) process + :returns: a float instance in epoch time + """ + # time_tuple is the *remote* local time + time_tuple = email.utils.parsedate_tz(timestamp_str) + if time_tuple is None: + time_tuple = time.strptime(timestamp_str, time_format) + # add timezone info as utc (no time difference) + time_tuple += (0, ) + + # We prefer calendar.gmtime and a manual adjustment over + # email.utils.mktime_tz because older versions of Python (<2.7.4) may + # double-adjust for timezone in some situations (such when swift changes + # os.environ['TZ'] without calling time.tzset()). + epoch_time = calendar.timegm(time_tuple) - time_tuple[9] + + return epoch_time + + +class Config(dict): + def __init__(self, base=None): + if base is not None: + self.update(base) + + def __getattr__(self, name): + if name not in self: + raise AttributeError("No attribute '%s'" % name) + + return self[name] + + def __setattr__(self, name, value): + self[name] = value + + def __delattr__(self, name): + del self[name] + + def update(self, other): + if hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + + def __setitem__(self, key, value): + if isinstance(self.get(key), bool): + dict.__setitem__(self, key, utils.config_true_value(value)) + elif isinstance(self.get(key), int): + try: + dict.__setitem__(self, key, int(value)) + except ValueError: + if value: # No need to raise the error if value is '' + raise + else: + dict.__setitem__(self, key, value) diff -Nru swift-2.17.0/swift/common/middleware/slo.py swift-2.18.0/swift/common/middleware/slo.py --- swift-2.17.0/swift/common/middleware/slo.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/slo.py 2018-05-30 10:17:02.000000000 +0000 @@ -332,7 +332,7 @@ register_swift_info, RateLimitedIterator, quote, close_if_possible, \ closing_if_possible, LRUCache, StreamingPile, strict_b64decode from swift.common.request_helpers import SegmentedIterable, \ - get_sys_meta_prefix, update_etag_is_at_header + get_sys_meta_prefix, update_etag_is_at_header, resolve_etag_is_at_header from swift.common.constraints import check_utf8, MAX_BUFFERED_SLO_SEGMENTS from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED, is_success from swift.common.wsgi import WSGIContext, make_subrequest @@ -792,16 +792,19 @@ if slo_etag and slo_size and ( req.method == 'HEAD' or is_conditional): # Since we have length and etag, we can respond immediately - for i, (header, _value) in enumerate(self._response_headers): - lheader = header.lower() - if lheader == 'etag': - self._response_headers[i] = (header, '"%s"' % slo_etag) - elif lheader == 'content-length' and not is_conditional: - self._response_headers[i] = (header, slo_size) - start_response(self._response_status, - self._response_headers, - self._response_exc_info) - return resp_iter + resp = Response( + status=self._response_status, + headers=self._response_headers, + app_iter=resp_iter, + request=req, + conditional_etag=resolve_etag_is_at_header( + req, self._response_headers), + conditional_response=True) + resp.headers.update({ + 'Etag': '"%s"' % slo_etag, + 'Content-Length': slo_size, + }) + return resp(req.environ, start_response) if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True @@ -874,14 +877,15 @@ response_headers = [] for header, value in resp_headers: lheader = header.lower() + if lheader not in ('etag', 'content-length'): + response_headers.append((header, value)) + if lheader == SYSMETA_SLO_ETAG: slo_etag = value elif lheader == SYSMETA_SLO_SIZE: # it's from sysmeta, so we don't worry about non-integer # values here content_length = int(value) - elif lheader not in ('etag', 'content-length'): - response_headers.append((header, value)) # Prep to calculate content_length & etag if necessary if slo_etag is None: @@ -926,7 +930,9 @@ req, content_length, response_headers, segments) def _manifest_head_response(self, req, response_headers): + conditional_etag = resolve_etag_is_at_header(req, response_headers) return HTTPOk(request=req, headers=response_headers, body='', + conditional_etag=conditional_etag, conditional_response=True) def _manifest_get_response(self, req, content_length, response_headers, @@ -984,9 +990,11 @@ # the proxy logs and the user will receive incomplete results. return HTTPConflict(request=req) + conditional_etag = resolve_etag_is_at_header(req, response_headers) response = Response(request=req, content_length=content_length, headers=response_headers, conditional_response=True, + conditional_etag=conditional_etag, app_iter=segmented_iter) return response @@ -1359,8 +1367,8 @@ '%s MultipartDELETE' % new_env.get('HTTP_USER_AGENT') new_env['swift.source'] = 'SLO' new_env['PATH_INFO'] = ( - '/%s/%s/%s' % (vrs, account, obj_name.lstrip('/')) - ).encode('utf-8') + '/%s/%s/%s' % (vrs, account, obj_name.lstrip('/').encode('utf-8')) + ) resp = Request.blank('', new_env).get_response(self.app) if resp.is_success: diff -Nru swift-2.17.0/swift/common/middleware/symlink.py swift-2.18.0/swift/common/middleware/symlink.py --- swift-2.17.0/swift/common/middleware/symlink.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/symlink.py 2018-05-30 10:17:02.000000000 +0000 @@ -434,12 +434,12 @@ # We have a design decision to use etag space to store symlink info for # object listing because it's immutable unless the object is # overwritten. This may impact the downgrade scenario that the symlink - # info can be appreared as the suffix in the hash value of object + # info can appear as the suffix in the hash value of object # listing result for clients. - # To create override etag easily, we have a contraint that the symlink + # To create override etag easily, we have a constraint that the symlink # must be 0 byte so we can add etag of the empty string + symlink info # here, simply. Note that this override etag may be encrypted in the - # container db by encrypion middleware. + # container db by encryption middleware. etag_override = [ MD5_OF_EMPTY_STRING, 'symlink_target=%s' % req.headers[TGT_OBJ_SYSMETA_SYMLINK_HDR] diff -Nru swift-2.17.0/swift/common/middleware/tempauth.py swift-2.18.0/swift/common/middleware/tempauth.py --- swift-2.17.0/swift/common/middleware/tempauth.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/tempauth.py 2018-05-30 10:17:02.000000000 +0000 @@ -273,7 +273,7 @@ return self.app(env, start_response) if env.get('PATH_INFO', '').startswith(self.auth_prefix): return self.handle(env, start_response) - s3 = env.get('swift3.auth_details') + s3 = env.get('s3api.auth_details') token = env.get('HTTP_X_AUTH_TOKEN', env.get('HTTP_X_STORAGE_TOKEN')) service_token = env.get('HTTP_X_SERVICE_TOKEN') if s3 or (token and token.startswith(self.reseller_prefix)): @@ -432,8 +432,10 @@ expires, groups = cached_auth_data if expires < time(): groups = None + else: + groups = groups.encode('utf8') - s3_auth_details = env.get('swift3.auth_details') + s3_auth_details = env.get('s3api.auth_details') if s3_auth_details: if 'check_signature' not in s3_auth_details: self.logger.warning( @@ -788,7 +790,8 @@ cached_auth_data = memcache_client.get(memcache_token_key) if cached_auth_data: expires, old_groups = cached_auth_data - old_groups = old_groups.split(',') + old_groups = [group.encode('utf8') + for group in old_groups.split(',')] new_groups = self._get_user_groups(account, account_user, account_id) diff -Nru swift-2.17.0/swift/common/middleware/versioned_writes.py swift-2.18.0/swift/common/middleware/versioned_writes.py --- swift-2.17.0/swift/common/middleware/versioned_writes.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/middleware/versioned_writes.py 2018-05-30 10:17:02.000000000 +0000 @@ -16,9 +16,9 @@ """ Object versioning in swift is implemented by setting a flag on the container to tell swift to version all objects in the container. The value of the flag is -the container where the versions are stored (commonly referred to as the -"archive container"). The flag itself is one of two headers, which determines -how object ``DELETE`` requests are handled: +the URL-encoded container name where the versions are stored (commonly referred +to as the "archive container"). The flag itself is one of two headers, which +determines how object ``DELETE`` requests are handled: * ``X-History-Location`` @@ -327,7 +327,7 @@ while True: lreq = make_pre_authed_request( env, method='GET', swift_source='VW', - path='/v1/%s/%s' % (account_name, lcontainer)) + path=quote('/v1/%s/%s' % (account_name, lcontainer))) lreq.environ['QUERY_STRING'] = \ 'prefix=%s&marker=%s' % (quote(lprefix), quote(marker)) if end_marker: @@ -372,7 +372,7 @@ # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here get_req = make_pre_authed_request( - req.environ, path=path_info, + req.environ, path=quote(path_info), headers={'X-Newest': 'True'}, method='GET', swift_source='VW') source_resp = get_req.get_response(self.app) @@ -387,7 +387,7 @@ # Create a new Request object to PUT to the versions container, copying # all headers from the source object apart from x-timestamp. put_req = make_pre_authed_request( - req.environ, path=put_path_info, method='PUT', + req.environ, path=quote(put_path_info), method='PUT', swift_source='VW') copy_header_subset(source_resp, put_req, lambda k: k.lower() != 'x-timestamp') @@ -506,7 +506,7 @@ 'content-length': '0', 'x-auth-token': req.headers.get('x-auth-token')} marker_req = make_pre_authed_request( - req.environ, path=marker_path, + req.environ, path=quote(marker_path), headers=marker_headers, method='PUT', swift_source='VW') marker_req.environ['swift.content_type_overridden'] = True marker_resp = marker_req.get_response(self.app) @@ -579,7 +579,7 @@ obj_head_headers = {'X-Newest': 'True'} obj_head_headers.update(auth_token_header) head_req = make_pre_authed_request( - req.environ, path=req.path_info, method='HEAD', + req.environ, path=quote(req.path_info), method='HEAD', headers=obj_head_headers, swift_source='VW') hresp = head_req.get_response(self.app) close_if_possible(hresp.app_iter) @@ -604,8 +604,9 @@ continue old_del_req = make_pre_authed_request( - req.environ, path=restored_path, method='DELETE', - headers=auth_token_header, swift_source='VW') + req.environ, path=quote(restored_path), + method='DELETE', headers=auth_token_header, + swift_source='VW') del_resp = old_del_req.get_response(self.app) close_if_possible(del_resp.app_iter) if del_resp.status_int != HTTP_NOT_FOUND: @@ -618,7 +619,7 @@ previous_version['name'].encode('utf-8')) # done restoring, redirect the delete to the marker req = make_pre_authed_request( - req.environ, path=marker_path, method='DELETE', + req.environ, path=quote(marker_path), method='DELETE', headers=auth_token_header, swift_source='VW') else: # there are older versions so copy the previous version to the @@ -634,7 +635,7 @@ # version object - we already auth'd original req so make a # pre-authed request req = make_pre_authed_request( - req.environ, path=restored_path, method='DELETE', + req.environ, path=quote(restored_path), method='DELETE', headers=auth_token_header, swift_source='VW') # remove 'X-If-Delete-At', since it is not for the older copy @@ -749,9 +750,18 @@ def object_request(self, req, api_version, account, container, obj, allow_versioned_writes): - account_name = unquote(account) - container_name = unquote(container) - object_name = unquote(obj) + """ + Handle request for object resource. + + Note that account, container, obj should be unquoted by caller + if the url path is under url encoding (e.g. %FF) + + :param req: swift.common.swob.Request instance + :param api_version: should be v1 unless swift bumps api version + :param account: account name string + :param container: container name string + :param object: object name string + """ resp = None is_enabled = config_true_value(allow_versioned_writes) container_info = get_container_info( @@ -779,17 +789,17 @@ vw_ctx = VersionedWritesContext(self.app, self.logger) if req.method == 'PUT': resp = vw_ctx.handle_obj_versions_put( - req, versions_cont, api_version, account_name, - object_name) + req, versions_cont, api_version, account, + obj) # handle DELETE elif versioning_mode == 'history': resp = vw_ctx.handle_obj_versions_delete_push( - req, versions_cont, api_version, account_name, - container_name, object_name) + req, versions_cont, api_version, account, + container, obj) else: resp = vw_ctx.handle_obj_versions_delete_pop( - req, versions_cont, api_version, account_name, - container_name, object_name) + req, versions_cont, api_version, account, + container, obj) if resp: return resp diff -Nru swift-2.17.0/swift/common/request_helpers.py swift-2.18.0/swift/common/request_helpers.py --- swift-2.17.0/swift/common/request_helpers.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/request_helpers.py 2018-05-30 10:17:02.000000000 +0000 @@ -353,7 +353,6 @@ self.current_resp = None def _coalesce_requests(self): - start_time = time.time() pending_req = pending_etag = pending_size = None try: for seg_dict in self.listing_iter: @@ -376,11 +375,6 @@ first_byte = first_byte or 0 go_to_end = last_byte is None or ( seg_size is not None and last_byte == seg_size - 1) - if time.time() - start_time > self.max_get_time: - raise SegmentError( - 'While processing manifest %s, ' - 'max LO GET time of %ds exceeded' % - (self.name, self.max_get_time)) # The "multipart-manifest=get" query param ensures that the # segment is a plain old object, not some flavor of large # object; therefore, its etag is its MD5sum and hence we can @@ -433,108 +427,119 @@ except ListingIterError: e_type, e_value, e_traceback = sys.exc_info() - if time.time() - start_time > self.max_get_time: - raise SegmentError( - 'While processing manifest %s, ' - 'max LO GET time of %ds exceeded' % - (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size six.reraise(e_type, e_value, e_traceback) - if time.time() - start_time > self.max_get_time: - raise SegmentError( - 'While processing manifest %s, ' - 'max LO GET time of %ds exceeded' % - (self.name, self.max_get_time)) if pending_req: yield pending_req, pending_etag, pending_size - def _internal_iter(self): - bytes_left = self.response_body_length + def _requests_to_bytes_iter(self): + # Take the requests out of self._coalesce_requests, actually make + # the requests, and generate the bytes from the responses. + # + # Yields 2-tuples (segment-name, byte-chunk). The segment name is + # used for logging. + for data_or_req, seg_etag, seg_size in self._coalesce_requests(): + if isinstance(data_or_req, bytes): # ugly, awful overloading + yield ('data segment', data_or_req) + continue + seg_req = data_or_req + seg_resp = seg_req.get_response(self.app) + if not is_success(seg_resp.status_int): + close_if_possible(seg_resp.app_iter) + raise SegmentError( + 'While processing manifest %s, ' + 'got %d while retrieving %s' % + (self.name, seg_resp.status_int, seg_req.path)) - try: - for data_or_req, seg_etag, seg_size in self._coalesce_requests(): - if isinstance(data_or_req, bytes): - chunk = data_or_req # ugly, awful overloading - if bytes_left is None: - yield chunk - elif bytes_left >= len(chunk): - yield chunk - bytes_left -= len(chunk) - else: - yield chunk[:bytes_left] - continue - seg_req = data_or_req - seg_resp = seg_req.get_response(self.app) - if not is_success(seg_resp.status_int): - close_if_possible(seg_resp.app_iter) - raise SegmentError( - 'While processing manifest %s, ' - 'got %d while retrieving %s' % - (self.name, seg_resp.status_int, seg_req.path)) - - elif ((seg_etag and (seg_resp.etag != seg_etag)) or - (seg_size and (seg_resp.content_length != seg_size) and - not seg_req.range)): - # The content-length check is for security reasons. Seems - # possible that an attacker could upload a >1mb object and - # then replace it with a much smaller object with same - # etag. Then create a big nested SLO that calls that - # object many times which would hammer our obj servers. If - # this is a range request, don't check content-length - # because it won't match. - close_if_possible(seg_resp.app_iter) - raise SegmentError( - 'Object segment no longer valid: ' - '%(path)s etag: %(r_etag)s != %(s_etag)s or ' - '%(r_size)s != %(s_size)s.' % - {'path': seg_req.path, 'r_etag': seg_resp.etag, - 'r_size': seg_resp.content_length, - 's_etag': seg_etag, - 's_size': seg_size}) - else: - self.current_resp = seg_resp - - seg_hash = None - if seg_resp.etag and not seg_req.headers.get('Range'): - # Only calculate the MD5 if it we can use it to validate - seg_hash = hashlib.md5() - - document_iters = maybe_multipart_byteranges_to_document_iters( - seg_resp.app_iter, - seg_resp.headers['Content-Type']) - - for chunk in itertools.chain.from_iterable(document_iters): - if seg_hash: - seg_hash.update(chunk) - - if bytes_left is None: - yield chunk - elif bytes_left >= len(chunk): - yield chunk - bytes_left -= len(chunk) - else: - yield chunk[:bytes_left] - bytes_left -= len(chunk) - close_if_possible(seg_resp.app_iter) - raise SegmentError( - 'Too many bytes for %(name)s; truncating in ' - '%(seg)s with %(left)d bytes left' % - {'name': self.name, 'seg': seg_req.path, - 'left': bytes_left}) + elif ((seg_etag and (seg_resp.etag != seg_etag)) or + (seg_size and (seg_resp.content_length != seg_size) and + not seg_req.range)): + # The content-length check is for security reasons. Seems + # possible that an attacker could upload a >1mb object and + # then replace it with a much smaller object with same + # etag. Then create a big nested SLO that calls that + # object many times which would hammer our obj servers. If + # this is a range request, don't check content-length + # because it won't match. close_if_possible(seg_resp.app_iter) + raise SegmentError( + 'Object segment no longer valid: ' + '%(path)s etag: %(r_etag)s != %(s_etag)s or ' + '%(r_size)s != %(s_size)s.' % + {'path': seg_req.path, 'r_etag': seg_resp.etag, + 'r_size': seg_resp.content_length, + 's_etag': seg_etag, + 's_size': seg_size}) + else: + self.current_resp = seg_resp + + seg_hash = None + if seg_resp.etag and not seg_req.headers.get('Range'): + # Only calculate the MD5 if it we can use it to validate + seg_hash = hashlib.md5() + + document_iters = maybe_multipart_byteranges_to_document_iters( + seg_resp.app_iter, + seg_resp.headers['Content-Type']) + + for chunk in itertools.chain.from_iterable(document_iters): + if seg_hash: + seg_hash.update(chunk) + yield (seg_req.path, chunk) + close_if_possible(seg_resp.app_iter) - if seg_hash and seg_hash.hexdigest() != seg_resp.etag: - raise SegmentError( - "Bad MD5 checksum in %(name)s for %(seg)s: headers had" - " %(etag)s, but object MD5 was actually %(actual)s" % - {'seg': seg_req.path, 'etag': seg_resp.etag, - 'name': self.name, 'actual': seg_hash.hexdigest()}) + if seg_hash and seg_hash.hexdigest() != seg_resp.etag: + raise SegmentError( + "Bad MD5 checksum in %(name)s for %(seg)s: headers had" + " %(etag)s, but object MD5 was actually %(actual)s" % + {'seg': seg_req.path, 'etag': seg_resp.etag, + 'name': self.name, 'actual': seg_hash.hexdigest()}) + + def _byte_counting_iter(self): + # Checks that we give the client the right number of bytes. Raises + # SegmentError if the number of bytes is wrong. + bytes_left = self.response_body_length - if bytes_left: + for seg_name, chunk in self._requests_to_bytes_iter(): + if bytes_left is None: + yield chunk + elif bytes_left >= len(chunk): + yield chunk + bytes_left -= len(chunk) + else: + yield chunk[:bytes_left] + bytes_left -= len(chunk) + raise SegmentError( + 'Too many bytes for %(name)s; truncating in ' + '%(seg)s with %(left)d bytes left' % + {'name': self.name, 'seg': seg_name, + 'left': -bytes_left}) + + if bytes_left: + raise SegmentError('Expected another %d bytes for %s; ' + 'closing connection' % (bytes_left, self.name)) + + def _time_limited_iter(self): + # Makes sure a GET response doesn't take more than self.max_get_time + # seconds to process. Raises an exception if things take too long. + start_time = time.time() + for chunk in self._byte_counting_iter(): + now = time.time() + yield chunk + if now - start_time > self.max_get_time: raise SegmentError( - 'Not enough bytes for %s; closing connection' % self.name) + 'While processing manifest %s, ' + 'max LO GET time of %ds exceeded' % + (self.name, self.max_get_time)) + + def _internal_iter(self): + # Top level of our iterator stack: pass bytes through; catch and + # handle exceptions. + try: + for chunk in self._time_limited_iter(): + yield chunk except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: diff -Nru swift-2.17.0/swift/common/ring/builder.py swift-2.18.0/swift/common/ring/builder.py --- swift-2.17.0/swift/common/ring/builder.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/ring/builder.py 2018-05-30 10:17:02.000000000 +0000 @@ -49,16 +49,6 @@ pass -try: - # python 2.7+ - from logging import NullHandler -except ImportError: - # python 2.6 - class NullHandler(logging.Handler): - def emit(self, *a, **kw): - pass - - @contextlib.contextmanager def _set_random_seed(seed): # If random seed is set when entering this context then reset original @@ -148,7 +138,7 @@ if not self.logger.handlers: self.logger.disabled = True # silence "no handler for X" error messages - self.logger.addHandler(NullHandler()) + self.logger.addHandler(logging.NullHandler()) @property def id(self): @@ -696,7 +686,7 @@ (dev['id'], dev['port'])) int_replicas = int(math.ceil(self.replicas)) - rep2part_len = map(len, self._replica2part2dev) + rep2part_len = list(map(len, self._replica2part2dev)) # check the assignments of each part's replicas for part in range(self.parts): devs_for_part = [] @@ -872,7 +862,7 @@ device is "overweight" and wishes to give partitions away if possible. :param replica_plan: a dict of dicts, as returned from - _build_replica_plan, that that maps + _build_replica_plan, that maps each tier to it's target replicanths. """ tier2children = self._build_tier2children() @@ -932,7 +922,7 @@ more recently than min_part_hours. """ self._part_moved_bitmap = bytearray(max(2 ** (self.part_power - 3), 1)) - elapsed_hours = int(time() - self._last_part_moves_epoch) / 3600 + elapsed_hours = int(time() - self._last_part_moves_epoch) // 3600 if elapsed_hours <= 0: return for part in range(self.parts): @@ -1182,7 +1172,7 @@ """ # pick a random starting point on the other side of the ring quarter_turn = (self.parts // 4) - random_half = random.randint(0, self.parts / 2) + random_half = random.randint(0, self.parts // 2) start = (self._last_part_gather_start + quarter_turn + random_half) % self.parts self.logger.debug('Gather start is %(start)s ' diff -Nru swift-2.17.0/swift/common/ring/composite_builder.py swift-2.18.0/swift/common/ring/composite_builder.py --- swift-2.17.0/swift/common/ring/composite_builder.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/ring/composite_builder.py 2018-05-30 10:17:02.000000000 +0000 @@ -455,7 +455,7 @@ metadata """ try: - with open(path_to_file, 'rb') as fp: + with open(path_to_file, 'rt') as fp: metadata = json.load(fp) builder_files = [metadata['component_builder_files'][comp['id']] for comp in metadata['components']] @@ -494,7 +494,7 @@ if not self.components or not self._builder_files: raise ValueError("No composed ring to save.") # persist relative paths to builder files - with open(path_to_file, 'wb') as fp: + with open(path_to_file, 'wt') as fp: metadata = self.to_dict() # future-proofing: # - saving abs path to component builder files and this file should @@ -640,7 +640,7 @@ """ self._load_components() self.update_last_part_moves() - component_builders = zip(self._builder_files, self._builders) + component_builders = list(zip(self._builder_files, self._builders)) # don't let the same builder go first each time shuffle(component_builders) results = {} diff -Nru swift-2.17.0/swift/common/ring/ring.py swift-2.18.0/swift/common/ring/ring.py --- swift-2.17.0/swift/common/ring/ring.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/ring/ring.py 2018-05-30 10:17:02.000000000 +0000 @@ -78,7 +78,7 @@ """ json_len, = struct.unpack('!I', gz_file.read(4)) - ring_dict = json.loads(gz_file.read(json_len)) + ring_dict = json.loads(gz_file.read(json_len).decode('ascii')) ring_dict['replica2part2dev_id'] = [] if metadata_only: @@ -111,7 +111,7 @@ # See if the file is in the new format magic = gz_file.read(4) - if magic == 'R1NG': + if magic == b'R1NG': format_version, = struct.unpack('!H', gz_file.read(2)) if format_version == 1: ring_data = cls.deserialize_v1( @@ -132,7 +132,7 @@ def serialize_v1(self, file_obj): # Write out new-style serialization magic and version: - file_obj.write(struct.pack('!4sH', 'R1NG', 1)) + file_obj.write(struct.pack('!4sH', b'R1NG', 1)) ring = self.to_dict() # Only include next_part_power if it is set in the @@ -145,8 +145,8 @@ if next_part_power is not None: _text['next_part_power'] = next_part_power - json_encoder = json.JSONEncoder(sort_keys=True) - json_text = json_encoder.encode(_text) + json_text = json.dumps(_text, sort_keys=True, + ensure_ascii=True).encode('ascii') json_len = len(json_text) file_obj.write(struct.pack('!I', json_len)) file_obj.write(json_text) @@ -418,8 +418,8 @@ (d['region'], d['zone'], d['ip']) for d in primary_nodes) parts = len(self._replica2part2dev_id[0]) - start = struct.unpack_from( - '>I', md5(str(part)).digest())[0] >> self._part_shift + part_hash = md5(str(part).encode('ascii')).digest() + start = struct.unpack_from('>I', part_hash)[0] >> self._part_shift inc = int(parts / 65536) or 1 # Multiple loops for execution speed; the checks and bookkeeping get # simpler as you go along diff -Nru swift-2.17.0/swift/common/splice.py swift-2.18.0/swift/common/splice.py --- swift-2.17.0/swift/common/splice.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/splice.py 2018-05-30 10:17:02.000000000 +0000 @@ -28,9 +28,6 @@ c_loff_t = ctypes.c_long -# python 2.6 doesn't have c_ssize_t -c_ssize_t = getattr(ctypes, 'c_ssize_t', ctypes.c_long) - class Tee(object): '''Binding to `tee`''' @@ -53,7 +50,7 @@ ctypes.c_uint ] - c_tee.restype = c_ssize_t + c_tee.restype = ctypes.c_ssize_t def errcheck(result, func, arguments): if result == -1: @@ -134,7 +131,7 @@ ctypes.c_uint ] - c_splice.restype = c_ssize_t + c_splice.restype = ctypes.c_ssize_t def errcheck(result, func, arguments): if result == -1: diff -Nru swift-2.17.0/swift/common/storage_policy.py swift-2.18.0/swift/common/storage_policy.py --- swift-2.17.0/swift/common/storage_policy.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/storage_policy.py 2018-05-30 10:17:02.000000000 +0000 @@ -203,8 +203,17 @@ def __int__(self): return self.idx - def __cmp__(self, other): - return cmp(self.idx, int(other)) + def __eq__(self, other): + return self.idx == int(other) + + def __ne__(self, other): + return self.idx != int(other) + + def __lt__(self, other): + return self.idx < int(other) + + def __gt__(self, other): + return self.idx > int(other) def __repr__(self): return ("%s(%d, %r, is_default=%s, " @@ -923,7 +932,12 @@ Reload POLICIES from ``swift.conf``. """ global _POLICIES - policy_conf = ConfigParser() + if six.PY2: + policy_conf = ConfigParser() + else: + # Python 3.2 disallows section or option duplicates by default + # strict=False allows us to preserve the older behavior + policy_conf = ConfigParser(strict=False) policy_conf.read(utils.SWIFT_CONF_FILE) try: _POLICIES = parse_storage_policies(policy_conf) diff -Nru swift-2.17.0/swift/common/swob.py swift-2.18.0/swift/common/swob.py --- swift-2.17.0/swift/common/swob.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/swob.py 2018-05-30 10:17:02.000000000 +0000 @@ -288,8 +288,6 @@ self.status_int = value self.explanation = self.title = RESPONSE_REASONS[value][0] else: - if isinstance(value, six.text_type): - value = value.encode('utf-8') self.status_int = int(value.split(' ', 1)[0]) self.explanation = self.title = value.split(' ', 1)[1] @@ -309,7 +307,7 @@ if not self._app_iter: return '' with closing_if_possible(self._app_iter): - self._body = ''.join(self._app_iter) + self._body = b''.join(self._app_iter) self._app_iter = None return self._body @@ -622,7 +620,10 @@ """ def __init__(self, headerval): self.tags = set() - for tag in headerval.split(', '): + for tag in headerval.split(','): + tag = tag.strip() + if not tag: + continue if tag.startswith('"') and tag.endswith('"'): self.tags.add(tag[1:-1]) else: @@ -631,6 +632,10 @@ def __contains__(self, val): return '*' in self.tags or val in self.tags + def __repr__(self): + return '%s(%r)' % ( + self.__class__.__name__, ', '.join(sorted(self.tags))) + class Accept(object): """ @@ -843,11 +848,13 @@ 'https': 443}.get(parsed_path.scheme, 80) if parsed_path.scheme and parsed_path.scheme not in ['http', 'https']: raise TypeError('Invalid scheme: %s' % parsed_path.scheme) + path_info = urllib.parse.unquote( + parsed_path.path.decode('utf8') if six.PY3 else parsed_path.path) env = { 'REQUEST_METHOD': 'GET', 'SCRIPT_NAME': '', 'QUERY_STRING': parsed_path.query, - 'PATH_INFO': urllib.parse.unquote(parsed_path.path), + 'PATH_INFO': path_info, 'SERVER_NAME': server_name, 'SERVER_PORT': str(server_port), 'HTTP_HOST': '%s:%d' % (server_name, server_port), @@ -1256,6 +1263,20 @@ # body text from RESPONSE_REASONS. body = None app_iter = None + elif self.content_length == 0: + # If ranges_for_length found ranges but our content length + # is 0, then that means we got a suffix-byte-range request + # (e.g. "bytes=-512"). This is asking for *up to* the last N + # bytes of the file. If we had any bytes to send at all, + # we'd return a 206 with an appropriate Content-Range header, + # but we can't construct a Content-Range header because we + # have no byte indices because we have no bytes. + # + # The only reasonable thing to do is to return a 200 with + # the whole object (all zero bytes of it). This is also what + # Apache and Nginx do, so if we're wrong, at least we're in + # good company. + pass elif ranges: range_size = len(ranges) if range_size > 0: @@ -1377,7 +1398,7 @@ if 'location' in self.headers and \ not env.get('swift.leave_relative_location'): self.location = self.absolute_location() - start_response(self.status, self.headers.items()) + start_response(self.status, list(self.headers.items())) return self.response_iter diff -Nru swift-2.17.0/swift/common/utils.py swift-2.18.0/swift/common/utils.py --- swift-2.17.0/swift/common/utils.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -19,9 +19,12 @@ import base64 import binascii +import bisect +import collections import errno import fcntl import grp +import hashlib import hmac import json import math @@ -45,7 +48,7 @@ import ctypes.util from optparse import OptionParser -from tempfile import mkstemp, NamedTemporaryFile +from tempfile import gettempdir, mkstemp, NamedTemporaryFile import glob import itertools import stat @@ -65,18 +68,21 @@ utf8_decoder = codecs.getdecoder('utf-8') utf8_encoder = codecs.getencoder('utf-8') import six +if not six.PY2: + utf16_decoder = codecs.getdecoder('utf-16') + utf16_encoder = codecs.getencoder('utf-16') from six.moves import cPickle as pickle from six.moves.configparser import (ConfigParser, NoSectionError, NoOptionError, RawConfigParser) -from six.moves import range +from six.moves import range, http_client from six.moves.urllib.parse import ParseResult from six.moves.urllib.parse import quote as _quote from six.moves.urllib.parse import urlparse as stdlib_urlparse +from six import string_types from swift import gettext_ as _ import swift.common.exceptions -from swift.common.http import is_success, is_redirection, HTTP_NOT_FOUND, \ - HTTP_PRECONDITION_FAILED, HTTP_REQUESTED_RANGE_NOT_SATISFIABLE +from swift.common.http import is_server_error from swift.common.header_key_dict import HeaderKeyDict from swift.common.linkat import linkat @@ -161,8 +167,8 @@ # Used by hash_path to offer a bit more security when generating hashes for # paths. It simply appends this value to all paths; guessing the hash a path # will end up with would also require knowing this suffix. -HASH_PATH_SUFFIX = '' -HASH_PATH_PREFIX = '' +HASH_PATH_SUFFIX = b'' +HASH_PATH_PREFIX = b'' SWIFT_CONF_FILE = '/etc/swift/swift.conf' @@ -203,8 +209,8 @@ swift_dir != os.path.dirname(SWIFT_CONF_FILE)): SWIFT_CONF_FILE = os.path.join( swift_dir, os.path.basename(SWIFT_CONF_FILE)) - HASH_PATH_PREFIX = '' - HASH_PATH_SUFFIX = '' + HASH_PATH_PREFIX = b'' + HASH_PATH_SUFFIX = b'' validate_configuration() return True return False @@ -215,17 +221,29 @@ global HASH_PATH_PREFIX if not HASH_PATH_SUFFIX and not HASH_PATH_PREFIX: hash_conf = ConfigParser() - if hash_conf.read(SWIFT_CONF_FILE): + + if six.PY3: + # Use Latin1 to accept arbitrary bytes in the hash prefix/suffix + confs_read = hash_conf.read(SWIFT_CONF_FILE, encoding='latin1') + else: + confs_read = hash_conf.read(SWIFT_CONF_FILE) + + if confs_read: try: HASH_PATH_SUFFIX = hash_conf.get('swift-hash', 'swift_hash_path_suffix') + if six.PY3: + HASH_PATH_SUFFIX = HASH_PATH_SUFFIX.encode('latin1') except (NoSectionError, NoOptionError): pass try: HASH_PATH_PREFIX = hash_conf.get('swift-hash', 'swift_hash_path_prefix') + if six.PY3: + HASH_PATH_PREFIX = HASH_PATH_PREFIX.encode('latin1') except (NoSectionError, NoOptionError): pass + if not HASH_PATH_SUFFIX and not HASH_PATH_PREFIX: raise InvalidHashPathConfigError() @@ -253,9 +271,13 @@ :returns: hexdigest str of the HMAC for the request using the specified digest algorithm. """ + parts = (request_method, str(expires), path) + if not isinstance(key, six.binary_type): + key = key.encode('utf8') return hmac.new( - key, - '%s\n%s\n%s' % (request_method, expires, path), + key, b'\n'.join( + x if isinstance(x, six.binary_type) else x.encode('utf8') + for x in parts), digest).hexdigest() @@ -381,13 +403,28 @@ integer > 0. (not including zero) Raises ValueError otherwise. """ try: - value = int(value) - if value < 1: + result = int(value) + if result < 1: raise ValueError() except (TypeError, ValueError): raise ValueError( 'Config option must be an positive int number, not "%s".' % value) - return value + return result + + +def config_float_value(value, minimum=None, maximum=None): + try: + val = float(value) + if minimum is not None and val < minimum: + raise ValueError() + if maximum is not None and val > maximum: + raise ValueError() + return val + except (TypeError, ValueError): + min_ = ', greater than %s' % minimum if minimum is not None else '' + max_ = ', less than %s' % maximum if maximum is not None else '' + raise ValueError('Config option must be a number%s%s, not "%s".' % + (min_, max_, value)) def config_auto_int_value(value, default): @@ -530,7 +567,7 @@ def generate_trans_id(trans_id_suffix): return 'tx%s-%010x%s' % ( - uuid.uuid4().hex[:21], time.time(), quote(trans_id_suffix)) + uuid.uuid4().hex[:21], int(time.time()), quote(trans_id_suffix)) def get_policy_index(req_headers, res_headers): @@ -545,6 +582,8 @@ """ header = 'X-Backend-Storage-Policy-Index' policy_index = res_headers.get(header, req_headers.get(header)) + if isinstance(policy_index, six.binary_type) and not six.PY2: + policy_index = policy_index.decode('ascii') return str(policy_index) if policy_index is not None else None @@ -752,7 +791,7 @@ if float(free) <= float(FALLOCATE_RESERVE): raise OSError( errno.ENOSPC, - 'FALLOCATE_RESERVE fail %s <= %s' % + 'FALLOCATE_RESERVE fail %g <= %g' % (free, FALLOCATE_RESERVE)) args = { 'fallocate': (fd, mode, offset, length), @@ -919,11 +958,16 @@ :param delta: deca-microsecond difference from the base timestamp param, an int """ + if isinstance(timestamp, bytes): + timestamp = timestamp.decode('ascii') if isinstance(timestamp, six.string_types): - parts = timestamp.split('_', 1) - self.timestamp = float(parts.pop(0)) - if parts: - self.offset = int(parts[0], 16) + base, base_offset = timestamp.partition('_')[::2] + self.timestamp = float(base) + if '_' in base_offset: + raise ValueError('invalid literal for int() with base 16: ' + '%r' % base_offset) + if base_offset: + self.offset = int(base_offset, 16) else: self.offset = 0 else: @@ -1640,24 +1684,6 @@ sample_rate) -def server_handled_successfully(status_int): - """ - True for successful responses *or* error codes that are not Swift's fault, - False otherwise. For example, 500 is definitely the server's fault, but - 412 is an error code (4xx are all errors) that is due to a header the - client sent. - - If one is tracking error rates to monitor server health, one would be - advised to use a function like this one, lest a client cause a flurry of - 404s or 416s and make a spurious spike in your errors graph. - """ - return (is_success(status_int) or - is_redirection(status_int) or - status_int == HTTP_NOT_FOUND or - status_int == HTTP_PRECONDITION_FAILED or - status_int == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE) - - def timing_stats(**dec_kwargs): """ Returns a decorator that logs timing events or errors for public methods in @@ -1670,7 +1696,15 @@ def _timing_stats(ctrl, *args, **kwargs): start_time = time.time() resp = func(ctrl, *args, **kwargs) - if server_handled_successfully(resp.status_int): + # .timing is for successful responses *or* error codes that are + # not Swift's fault. For example, 500 is definitely the server's + # fault, but 412 is an error code (4xx are all errors) that is + # due to a header the client sent. + # + # .errors.timing is for failures that *are* Swift's fault. + # Examples include 507 for an unmounted drive or 500 for an + # unhandled exception. + if not is_server_error(resp.status_int): ctrl.logger.timing_since(method + '.timing', start_time, **dec_kwargs) else: @@ -1682,6 +1716,51 @@ return decorating_func +class SwiftLoggerAdapter(logging.LoggerAdapter): + """ + A logging.LoggerAdapter subclass that also passes through StatsD method + calls. + + Like logging.LoggerAdapter, you have to subclass this and override the + process() method to accomplish anything useful. + """ + def update_stats(self, *a, **kw): + return self.logger.update_stats(*a, **kw) + + def increment(self, *a, **kw): + return self.logger.increment(*a, **kw) + + def decrement(self, *a, **kw): + return self.logger.decrement(*a, **kw) + + def timing(self, *a, **kw): + return self.logger.timing(*a, **kw) + + def timing_since(self, *a, **kw): + return self.logger.timing_since(*a, **kw) + + def transfer_rate(self, *a, **kw): + return self.logger.transfer_rate(*a, **kw) + + +class PrefixLoggerAdapter(SwiftLoggerAdapter): + """ + Adds an optional prefix to all its log messages. When the prefix has not + been set, messages are unchanged. + """ + def set_prefix(self, prefix): + self.extra['prefix'] = prefix + + def exception(self, *a, **kw): + self.logger.exception(*a, **kw) + + def process(self, msg, kwargs): + msg, kwargs = super(PrefixLoggerAdapter, self).process(msg, kwargs) + if 'prefix' in self.extra: + msg = self.extra['prefix'] + msg + return (msg, kwargs) + + # double inheritance to support property with setter class LogAdapter(logging.LoggerAdapter, object): """ @@ -1755,12 +1834,19 @@ emsg = str(exc) elif exc.errno == errno.ECONNREFUSED: emsg = _('Connection refused') + elif exc.errno == errno.ECONNRESET: + emsg = _('Connection reset') elif exc.errno == errno.EHOSTUNREACH: emsg = _('Host unreachable') + elif exc.errno == errno.ENETUNREACH: + emsg = _('Network unreachable') elif exc.errno == errno.ETIMEDOUT: emsg = _('Connection timeout') else: call = self._exception + elif isinstance(exc, http_client.BadStatusLine): + # Use error(); not really exceptional + emsg = '%s: %s' % (exc.__class__.__name__, exc.line) elif isinstance(exc, eventlet.Timeout): emsg = exc.__class__.__name__ if hasattr(exc, 'seconds'): @@ -2274,16 +2360,19 @@ """ if object and not container: raise ValueError('container is required if object is provided') - paths = [account] + paths = [account if isinstance(account, six.binary_type) + else account.encode('utf8')] if container: - paths.append(container) + paths.append(container if isinstance(container, six.binary_type) + else container.encode('utf8')) if object: - paths.append(object) + paths.append(object if isinstance(object, six.binary_type) + else object.encode('utf8')) if raw_digest: - return md5(HASH_PATH_PREFIX + '/' + '/'.join(paths) + return md5(HASH_PATH_PREFIX + b'/' + b'/'.join(paths) + HASH_PATH_SUFFIX).digest() else: - return md5(HASH_PATH_PREFIX + '/' + '/'.join(paths) + return md5(HASH_PATH_PREFIX + b'/' + b'/'.join(paths) + HASH_PATH_SUFFIX).hexdigest() @@ -2391,9 +2480,9 @@ flags = os.O_CREAT | os.O_RDWR if append: flags |= os.O_APPEND - mode = 'a+' + mode = 'a+b' else: - mode = 'r+' + mode = 'r+b' while True: fd = os.open(filename, flags) file_obj = os.fdopen(fd, mode) @@ -2979,7 +3068,7 @@ :param value: The X-Container-Sync-To header value to validate. :param allowed_sync_hosts: A list of allowed hosts in endpoints, if realms_conf does not apply. - :param realms_conf: A instance of + :param realms_conf: An instance of swift.common.container_sync_realms.ContainerSyncRealms to validate against. :returns: A tuple of (error_string, validated_endpoint, realm, @@ -3214,7 +3303,7 @@ try: existing_entry = cf.readline() if existing_entry: - cache_entry = json.loads(existing_entry) + cache_entry = json.loads(existing_entry.decode('utf8')) except ValueError: # file doesn't have a valid entry, we'll recreate it pass @@ -3224,7 +3313,9 @@ try: with NamedTemporaryFile(dir=os.path.dirname(cache_file), delete=False) as tf: - tf.write(json.dumps(cache_entry, sort_keys=True) + '\n') + cache_data = json.dumps(cache_entry, ensure_ascii=True, + sort_keys=True) + tf.write(cache_data.encode('ascii') + b'\n') if set_owner: os.chown(tf.name, pwd.getpwnam(set_owner).pw_uid, -1) renamer(tf.name, cache_file, fsync=False) @@ -3235,8 +3326,24 @@ except OSError as err: if err.errno != errno.ENOENT: raise - except (Exception, Timeout): - logger.exception(_('Exception dumping recon cache')) + except (Exception, Timeout) as err: + logger.exception('Exception dumping recon cache: %s' % err) + + +def load_recon_cache(cache_file): + """ + Load a recon cache file. Treats missing file as empty. + """ + try: + with open(cache_file) as fh: + return json.load(fh) + except IOError as e: + if e.errno == errno.ENOENT: + return {} + else: + raise + except ValueError: # invalid JSON + return {} def listdir(path): @@ -3372,10 +3479,31 @@ :param str_or_unicode: a string or an unicode which can be invalid utf-8 """ - if isinstance(str_or_unicode, six.text_type): - (str_or_unicode, _len) = utf8_encoder(str_or_unicode, 'replace') - (valid_utf8_str, _len) = utf8_decoder(str_or_unicode, 'replace') - return valid_utf8_str.encode('utf-8') + if six.PY2: + if isinstance(str_or_unicode, six.text_type): + (str_or_unicode, _len) = utf8_encoder(str_or_unicode, 'replace') + (valid_unicode_str, _len) = utf8_decoder(str_or_unicode, 'replace') + else: + # Apparently under py3 we need to go to utf-16 to collapse surrogates? + if isinstance(str_or_unicode, six.binary_type): + try: + (str_or_unicode, _len) = utf8_decoder(str_or_unicode, + 'surrogatepass') + except UnicodeDecodeError: + (str_or_unicode, _len) = utf8_decoder(str_or_unicode, + 'replace') + (str_or_unicode, _len) = utf16_encoder(str_or_unicode, 'surrogatepass') + (valid_unicode_str, _len) = utf16_decoder(str_or_unicode, 'replace') + return valid_unicode_str.encode('utf-8') + + +class Everything(object): + """ + A container that contains everything. If "e" is an instance of + Everything, then "x in e" is true for all x. + """ + def __contains__(self, element): + return True def list_from_csv(comma_separated_str): @@ -3832,7 +3960,10 @@ """ Patched version of urllib.quote that encodes utf-8 strings before quoting """ - return _quote(get_valid_utf8_str(value), safe) + quoted = _quote(get_valid_utf8_str(value), safe) + if isinstance(value, six.binary_type): + quoted = quoted.encode('utf-8') + return quoted def get_expirer_container(x_delete_at, expirer_divisor, acc, cont, obj): @@ -3937,11 +4068,11 @@ :returns: A generator of file-like objects for each part. :raises MimeInvalid: if the document is malformed """ - boundary = '--' + boundary + boundary = b'--' + boundary blen = len(boundary) + 2 # \r\n try: got = wsgi_input.readline(blen) - while got == '\r\n': + while got == b'\r\n': got = wsgi_input.readline(blen) except (IOError, ValueError) as e: raise swift.common.exceptions.ChunkReadError(str(e)) @@ -3949,8 +4080,8 @@ if got.strip() != boundary: raise swift.common.exceptions.MimeInvalid( 'invalid starting boundary: wanted %r, got %r', (boundary, got)) - boundary = '\r\n' + boundary - input_buffer = '' + boundary = b'\r\n' + boundary + input_buffer = b'' done = False while not done: it = _MultipartMimeFileLikeObject(wsgi_input, boundary, input_buffer, @@ -4257,6 +4388,553 @@ return md5_sockfd +class ShardRange(object): + """ + A ShardRange encapsulates sharding state related to a container including + lower and upper bounds that define the object namespace for which the + container is responsible. + + Shard ranges may be persisted in a container database. Timestamps + associated with subsets of the shard range attributes are used to resolve + conflicts when a shard range needs to be merged with an existing shard + range record and the most recent version of an attribute should be + persisted. + + :param name: the name of the shard range; this should take the form of a + path to a container i.e. /. + :param timestamp: a timestamp that represents the time at which the + shard range's ``lower``, ``upper`` or ``deleted`` attributes were + last modified. + :param lower: the lower bound of object names contained in the shard range; + the lower bound *is not* included in the shard range namespace. + :param upper: the upper bound of object names contained in the shard range; + the upper bound *is* included in the shard range namespace. + :param object_count: the number of objects in the shard range; defaults to + zero. + :param bytes_used: the number of bytes in the shard range; defaults to + zero. + :param meta_timestamp: a timestamp that represents the time at which the + shard range's ``object_count`` and ``bytes_used`` were last updated; + defaults to the value of ``timestamp``. + :param deleted: a boolean; if True the shard range is considered to be + deleted. + :param state: the state; must be one of ShardRange.STATES; defaults to + CREATED. + :param state_timestamp: a timestamp that represents the time at which + ``state`` was forced to its current value; defaults to the value of + ``timestamp``. This timestamp is typically not updated with every + change of ``state`` because in general conflicts in ``state`` + attributes are resolved by choosing the larger ``state`` value. + However, when this rule does not apply, for example when changing state + from ``SHARDED`` to ``ACTIVE``, the ``state_timestamp`` may be advanced + so that the new ``state`` value is preferred over any older ``state`` + value. + :param epoch: optional epoch timestamp which represents the time at which + sharding was enabled for a container. + """ + FOUND = 10 + CREATED = 20 + CLEAVED = 30 + ACTIVE = 40 + SHRINKING = 50 + SHARDING = 60 + SHARDED = 70 + STATES = {FOUND: 'found', + CREATED: 'created', + CLEAVED: 'cleaved', + ACTIVE: 'active', + SHRINKING: 'shrinking', + SHARDING: 'sharding', + SHARDED: 'sharded'} + STATES_BY_NAME = dict((v, k) for k, v in STATES.items()) + + class OuterBound(object): + def __eq__(self, other): + return isinstance(other, type(self)) + + def __ne__(self, other): + return not self.__eq__(other) + + def __str__(self): + return '' + + def __repr__(self): + return type(self).__name__ + + def __bool__(self): + return False + + __nonzero__ = __bool__ + + @functools.total_ordering + class MaxBound(OuterBound): + def __ge__(self, other): + return True + + @functools.total_ordering + class MinBound(OuterBound): + def __le__(self, other): + return True + + MIN = MinBound() + MAX = MaxBound() + + def __init__(self, name, timestamp, lower=MIN, upper=MAX, + object_count=0, bytes_used=0, meta_timestamp=None, + deleted=False, state=None, state_timestamp=None, epoch=None): + self.account = self.container = self._timestamp = \ + self._meta_timestamp = self._state_timestamp = self._epoch = None + self._lower = ShardRange.MIN + self._upper = ShardRange.MAX + self._deleted = False + self._state = None + + self.name = name + self.timestamp = timestamp + self.lower = lower + self.upper = upper + self.deleted = deleted + self.object_count = object_count + self.bytes_used = bytes_used + self.meta_timestamp = meta_timestamp + self.state = self.FOUND if state is None else state + self.state_timestamp = state_timestamp + self.epoch = epoch + + @classmethod + def _encode(cls, value): + if six.PY2 and isinstance(value, six.text_type): + return value.encode('utf-8') + return value + + def _encode_bound(self, bound): + if isinstance(bound, ShardRange.OuterBound): + return bound + if not isinstance(bound, string_types): + raise TypeError('must be a string type') + return self._encode(bound) + + @classmethod + def _make_container_name(cls, root_container, parent_container, timestamp, + index): + if not isinstance(parent_container, bytes): + parent_container = parent_container.encode('utf-8') + return "%s-%s-%s-%s" % (root_container, + hashlib.md5(parent_container).hexdigest(), + cls._to_timestamp(timestamp).internal, + index) + + @classmethod + def make_path(cls, shards_account, root_container, parent_container, + timestamp, index): + """ + Returns a path for a shard container that is valid to use as a name + when constructing a :class:`~swift.common.utils.ShardRange`. + + :param shards_account: the hidden internal account to which the shard + container belongs. + :param root_container: the name of the root container for the shard. + :param parent_container: the name of the parent container for the + shard; for initial first generation shards this should be the same + as ``root_container``; for shards of shards this should be the name + of the sharding shard container. + :param timestamp: an instance of :class:`~swift.common.utils.Timestamp` + :param index: a unique index that will distinguish the path from any + other path generated using the same combination of + ``shards_account``, ``root_container``, ``parent_container`` and + ``timestamp``. + :return: a string of the form / + """ + shard_container = cls._make_container_name( + root_container, parent_container, timestamp, index) + return '%s/%s' % (shards_account, shard_container) + + @classmethod + def _to_timestamp(cls, timestamp): + if timestamp is None or isinstance(timestamp, Timestamp): + return timestamp + return Timestamp(timestamp) + + @property + def name(self): + return '%s/%s' % (self.account, self.container) + + @name.setter + def name(self, path): + path = self._encode(path) + if not path or len(path.split('/')) != 2 or not all(path.split('/')): + raise ValueError( + "Name must be of the form '/', got %r" % + path) + self.account, self.container = path.split('/') + + @property + def timestamp(self): + return self._timestamp + + @timestamp.setter + def timestamp(self, ts): + if ts is None: + raise TypeError('timestamp cannot be None') + self._timestamp = self._to_timestamp(ts) + + @property + def meta_timestamp(self): + if self._meta_timestamp is None: + return self.timestamp + return self._meta_timestamp + + @meta_timestamp.setter + def meta_timestamp(self, ts): + self._meta_timestamp = self._to_timestamp(ts) + + @property + def lower(self): + return self._lower + + @property + def lower_str(self): + return str(self.lower) + + @lower.setter + def lower(self, value): + if value in (None, ''): + value = ShardRange.MIN + try: + value = self._encode_bound(value) + except TypeError as err: + raise TypeError('lower %s' % err) + if value > self._upper: + raise ValueError( + 'lower (%r) must be less than or equal to upper (%r)' % + (value, self.upper)) + self._lower = value + + @property + def end_marker(self): + return self.upper_str + '\x00' if self.upper else '' + + @property + def upper(self): + return self._upper + + @property + def upper_str(self): + return str(self.upper) + + @upper.setter + def upper(self, value): + if value in (None, ''): + value = ShardRange.MAX + try: + value = self._encode_bound(value) + except TypeError as err: + raise TypeError('upper %s' % err) + if value < self._lower: + raise ValueError( + 'upper (%r) must be greater than or equal to lower (%r)' % + (value, self.lower)) + self._upper = value + + @property + def object_count(self): + return self._count + + @object_count.setter + def object_count(self, count): + count = int(count) + if count < 0: + raise ValueError('object_count cannot be < 0') + self._count = count + + @property + def bytes_used(self): + return self._bytes + + @bytes_used.setter + def bytes_used(self, bytes_used): + bytes_used = int(bytes_used) + if bytes_used < 0: + raise ValueError('bytes_used cannot be < 0') + self._bytes = bytes_used + + def update_meta(self, object_count, bytes_used, meta_timestamp=None): + """ + Set the object stats metadata to the given values and update the + meta_timestamp to the current time. + + :param object_count: should be an integer + :param bytes_used: should be an integer + :param meta_timestamp: timestamp for metadata; if not given the + current time will be set. + :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be + cast to an int, or if meta_timestamp is neither None nor can be + cast to a :class:`~swift.common.utils.Timestamp`. + """ + self.object_count = int(object_count) + self.bytes_used = int(bytes_used) + if meta_timestamp is None: + self.meta_timestamp = Timestamp.now() + else: + self.meta_timestamp = meta_timestamp + + def increment_meta(self, object_count, bytes_used): + """ + Increment the object stats metadata by the given values and update the + meta_timestamp to the current time. + + :param object_count: should be an integer + :param bytes_used: should be an integer + :raises ValueError: if ``object_count`` or ``bytes_used`` cannot be + cast to an int. + """ + self.update_meta(self.object_count + int(object_count), + self.bytes_used + int(bytes_used)) + + @classmethod + def resolve_state(cls, state): + """ + Given a value that may be either the name or the number of a state + return a tuple of (state number, state name). + + :param state: Either a string state name or an integer state number. + :return: A tuple (state number, state name) + :raises ValueError: if ``state`` is neither a valid state name nor a + valid state number. + """ + try: + state = state.lower() + state_num = cls.STATES_BY_NAME[state] + except (KeyError, AttributeError): + try: + state_name = cls.STATES[state] + except KeyError: + raise ValueError('Invalid state %r' % state) + else: + state_num = state + else: + state_name = state + return state_num, state_name + + @property + def state(self): + return self._state + + @state.setter + def state(self, state): + try: + float_state = float(state) + int_state = int(float_state) + except (ValueError, TypeError): + raise ValueError('Invalid state %r' % state) + if int_state != float_state or int_state not in self.STATES: + raise ValueError('Invalid state %r' % state) + self._state = int_state + + @property + def state_text(self): + return self.STATES[self.state] + + @property + def state_timestamp(self): + if self._state_timestamp is None: + return self.timestamp + return self._state_timestamp + + @state_timestamp.setter + def state_timestamp(self, ts): + self._state_timestamp = self._to_timestamp(ts) + + @property + def epoch(self): + return self._epoch + + @epoch.setter + def epoch(self, epoch): + self._epoch = self._to_timestamp(epoch) + + def update_state(self, state, state_timestamp=None): + """ + Set state to the given value and optionally update the state_timestamp + to the given time. + + :param state: new state, should be an integer + :param state_timestamp: timestamp for state; if not given the + state_timestamp will not be changed. + :return: True if the state or state_timestamp was changed, False + otherwise + """ + if state_timestamp is None and self.state == state: + return False + self.state = state + if state_timestamp is not None: + self.state_timestamp = state_timestamp + return True + + @property + def deleted(self): + return self._deleted + + @deleted.setter + def deleted(self, value): + self._deleted = bool(value) + + def set_deleted(self, timestamp=None): + """ + Mark the shard range deleted and set timestamp to the current time. + + :param timestamp: optional timestamp to set; if not given the + current time will be set. + :return: True if the deleted attribute or timestamp was changed, False + otherwise + """ + if timestamp is None and self.deleted: + return False + self.deleted = True + self.timestamp = timestamp or Timestamp.now() + return True + + def __contains__(self, item): + # test if the given item is within the namespace + if item == '': + return False + item = self._encode_bound(item) + return self.lower < item <= self.upper + + def __lt__(self, other): + # a ShardRange is less than other if its entire namespace is less than + # other; if other is another ShardRange that implies that this + # ShardRange's upper must be less than or equal to the other + # ShardRange's lower + if self.upper == ShardRange.MAX: + return False + if isinstance(other, ShardRange): + return self.upper <= other.lower + elif other is None: + return True + else: + return self.upper < other + + def __gt__(self, other): + # a ShardRange is greater than other if its entire namespace is greater + # than other; if other is another ShardRange that implies that this + # ShardRange's lower must be less greater than or equal to the other + # ShardRange's upper + if self.lower == ShardRange.MIN: + return False + if isinstance(other, ShardRange): + return self.lower >= other.upper + elif other is None: + return False + else: + return self.lower >= other + + def __eq__(self, other): + # test for equality of range bounds only + if not isinstance(other, ShardRange): + return False + return self.lower == other.lower and self.upper == other.upper + + def __ne__(self, other): + return not (self == other) + + def __repr__(self): + return '%s<%r to %r as of %s, (%d, %d) as of %s, %s as of %s>' % ( + self.__class__.__name__, self.lower, self.upper, + self.timestamp.internal, self.object_count, self.bytes_used, + self.meta_timestamp.internal, self.state_text, + self.state_timestamp.internal) + + def entire_namespace(self): + """ + Returns True if the ShardRange includes the entire namespace, False + otherwise. + """ + return (self.lower == ShardRange.MIN and + self.upper == ShardRange.MAX) + + def overlaps(self, other): + """ + Returns True if the ShardRange namespace overlaps with the other + ShardRange's namespace. + + :param other: an instance of :class:`~swift.common.utils.ShardRange` + """ + if not isinstance(other, ShardRange): + return False + return max(self.lower, other.lower) < min(self.upper, other.upper) + + def includes(self, other): + """ + Returns True if this namespace includes the whole of the other + namespace, False otherwise. + + :param other: an instance of :class:`~swift.common.utils.ShardRange` + """ + return (self.lower <= other.lower) and (other.upper <= self.upper) + + def __iter__(self): + yield 'name', self.name + yield 'timestamp', self.timestamp.internal + yield 'lower', str(self.lower) + yield 'upper', str(self.upper) + yield 'object_count', self.object_count + yield 'bytes_used', self.bytes_used + yield 'meta_timestamp', self.meta_timestamp.internal + yield 'deleted', 1 if self.deleted else 0 + yield 'state', self.state + yield 'state_timestamp', self.state_timestamp.internal + yield 'epoch', self.epoch.internal if self.epoch is not None else None + + def copy(self, timestamp=None, **kwargs): + """ + Creates a copy of the ShardRange. + + :param timestamp: (optional) If given, the returned ShardRange will + have all of its timestamps set to this value. Otherwise the + returned ShardRange will have the original timestamps. + :return: an instance of :class:`~swift.common.utils.ShardRange` + """ + new = ShardRange.from_dict(dict(self, **kwargs)) + if timestamp: + new.timestamp = timestamp + new.meta_timestamp = new.state_timestamp = None + return new + + @classmethod + def from_dict(cls, params): + """ + Return an instance constructed using the given dict of params. This + method is deliberately less flexible than the class `__init__()` method + and requires all of the `__init__()` args to be given in the dict of + params. + + :param params: a dict of parameters + :return: an instance of this class + """ + return cls( + params['name'], params['timestamp'], params['lower'], + params['upper'], params['object_count'], params['bytes_used'], + params['meta_timestamp'], params['deleted'], params['state'], + params['state_timestamp'], params['epoch']) + + +def find_shard_range(item, ranges): + """ + Find a ShardRange in given list of ``shard_ranges`` whose namespace + contains ``item``. + + :param item: The item for a which a ShardRange is to be found. + :param ranges: a sorted list of ShardRanges. + :return: the ShardRange whose namespace contains ``item``, or None if + no suitable range is found. + """ + index = bisect.bisect_left(ranges, item) + if index != len(ranges) and item in ranges[index]: + return ranges[index] + return None + + def modify_priority(conf, logger): """ Modify priority by nice and ionice. @@ -4323,6 +5001,27 @@ _ioprio_set(io_class, io_priority) +def o_tmpfile_in_path_supported(dirpath): + fd = None + try: + fd = os.open(dirpath, os.O_WRONLY | O_TMPFILE) + return True + except OSError as e: + if e.errno in (errno.EINVAL, errno.EISDIR, errno.EOPNOTSUPP): + return False + else: + raise Exception("Error on '%(path)s' while checking " + "O_TMPFILE: '%(ex)s'", + {'path': dirpath, 'ex': e}) + finally: + if fd is not None: + os.close(fd) + + +def o_tmpfile_in_tmpdir_supported(): + return o_tmpfile_in_path_supported(gettempdir()) + + def o_tmpfile_supported(): """ Returns True if O_TMPFILE flag is supported. @@ -4361,7 +5060,12 @@ :raises ValueError: if ``value`` is not a string, contains invalid characters, or has insufficient padding ''' - if not isinstance(value, six.string_types): + if isinstance(value, bytes): + try: + value = value.decode('ascii') + except UnicodeDecodeError: + raise ValueError + if not isinstance(value, six.text_type): raise ValueError # b64decode will silently discard bad characters, but we want to # treat them as an error @@ -4390,7 +5094,7 @@ """ with open(fname, 'rb') as f: md5sum = md5() - for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), ''): + for block in iter(lambda: f.read(MD5_BLOCK_READ_BYTES), b''): md5sum.update(block) return md5sum.hexdigest() @@ -4535,3 +5239,188 @@ class ThreadSafeSysLogHandler(SysLogHandler): def createLock(self): self.lock = PipeMutex() + + +def round_robin_iter(its): + """ + Takes a list of iterators, yield an element from each in a round-robin + fashion until all of them are exhausted. + :param its: list of iterators + """ + while its: + for it in its: + try: + yield next(it) + except StopIteration: + its.remove(it) + + +OverrideOptions = collections.namedtuple( + 'OverrideOptions', ['devices', 'partitions', 'policies']) + + +def parse_override_options(**kwargs): + """ + Figure out which policies, devices, and partitions we should operate on, + based on kwargs. + + If 'override_policies' is already present in kwargs, then return that + value. This happens when using multiple worker processes; the parent + process supplies override_policies=X to each child process. + + Otherwise, in run-once mode, look at the 'policies' keyword argument. + This is the value of the "--policies" command-line option. In + run-forever mode or if no --policies option was provided, an empty list + will be returned. + + The procedures for devices and partitions are similar. + + :returns: a named tuple with fields "devices", "partitions", and + "policies". + """ + run_once = kwargs.get('once', False) + + if 'override_policies' in kwargs: + policies = kwargs['override_policies'] + elif run_once: + policies = [ + int(p) for p in list_from_csv(kwargs.get('policies'))] + else: + policies = [] + + if 'override_devices' in kwargs: + devices = kwargs['override_devices'] + elif run_once: + devices = list_from_csv(kwargs.get('devices')) + else: + devices = [] + + if 'override_partitions' in kwargs: + partitions = kwargs['override_partitions'] + elif run_once: + partitions = [ + int(p) for p in list_from_csv(kwargs.get('partitions'))] + else: + partitions = [] + + return OverrideOptions(devices=devices, partitions=partitions, + policies=policies) + + +def distribute_evenly(items, num_buckets): + """ + Distribute items as evenly as possible into N buckets. + """ + out = [[] for _ in range(num_buckets)] + for index, item in enumerate(items): + out[index % num_buckets].append(item) + return out + + +def get_redirect_data(response): + """ + Extract a redirect location from a response's headers. + + :param response: a response + :return: a tuple of (path, Timestamp) if a Location header is found, + otherwise None + :raises ValueError: if the Location header is found but a + X-Backend-Redirect-Timestamp is not found, or if there is a problem + with the format of etiher header + """ + headers = HeaderKeyDict(response.getheaders()) + if 'Location' not in headers: + return None + location = urlparse(headers['Location']).path + account, container, _junk = split_path(location, 2, 3, True) + timestamp_val = headers.get('X-Backend-Redirect-Timestamp') + try: + timestamp = Timestamp(timestamp_val) + except (TypeError, ValueError): + raise ValueError('Invalid timestamp value: %s' % timestamp_val) + return '%s/%s' % (account, container), timestamp + + +def parse_db_filename(filename): + """ + Splits a db filename into three parts: the hash, the epoch, and the + extension. + + >>> parse_db_filename("ab2134.db") + ('ab2134', None, '.db') + >>> parse_db_filename("ab2134_1234567890.12345.db") + ('ab2134', '1234567890.12345', '.db') + + :param filename: A db file basename or path to a db file. + :return: A tuple of (hash , epoch, extension). ``epoch`` may be None. + :raises ValueError: if ``filename`` is not a path to a file. + """ + filename = os.path.basename(filename) + if not filename: + raise ValueError('Path to a file required.') + name, ext = os.path.splitext(filename) + parts = name.split('_') + hash_ = parts.pop(0) + epoch = parts[0] if parts else None + return hash_, epoch, ext + + +def make_db_file_path(db_path, epoch): + """ + Given a path to a db file, return a modified path whose filename part has + the given epoch. + + A db filename takes the form ``[_].db``; this method replaces + the ```` part of the given ``db_path`` with the given ``epoch`` + value, or drops the epoch part if the given ``epoch`` is ``None``. + + :param db_path: Path to a db file that does not necessarily exist. + :param epoch: A string (or ``None``) that will be used as the epoch + in the new path's filename; non-``None`` values will be + normalized to the normal string representation of a + :class:`~swift.common.utils.Timestamp`. + :return: A modified path to a db file. + :raises ValueError: if the ``epoch`` is not valid for constructing a + :class:`~swift.common.utils.Timestamp`. + """ + hash_, _, ext = parse_db_filename(db_path) + db_dir = os.path.dirname(db_path) + if epoch is None: + return os.path.join(db_dir, hash_ + ext) + epoch = Timestamp(epoch).normal + return os.path.join(db_dir, '%s_%s%s' % (hash_, epoch, ext)) + + +def get_db_files(db_path): + """ + Given the path to a db file, return a sorted list of all valid db files + that actually exist in that path's dir. A valid db filename has the form: + + [_].db + + where matches the part of the given db_path as would be + parsed by :meth:`~swift.utils.common.parse_db_filename`. + + :param db_path: Path to a db file that does not necessarily exist. + :return: List of valid db files that do exist in the dir of the + ``db_path``. This list may be empty. + """ + db_dir, db_file = os.path.split(db_path) + try: + files = os.listdir(db_dir) + except OSError as err: + if err.errno == errno.ENOENT: + return [] + raise + if not files: + return [] + match_hash, epoch, ext = parse_db_filename(db_file) + results = [] + for f in files: + hash_, epoch, ext = parse_db_filename(f) + if ext != '.db': + continue + if hash_ != match_hash: + continue + results.append(os.path.join(db_dir, f)) + return sorted(results) diff -Nru swift-2.17.0/swift/common/wsgi.py swift-2.18.0/swift/common/wsgi.py --- swift-2.17.0/swift/common/wsgi.py 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/swift/common/wsgi.py 2018-05-30 10:17:02.000000000 +0000 @@ -18,7 +18,6 @@ from __future__ import print_function import errno -import inspect import os import signal import time @@ -45,6 +44,9 @@ validate_configuration, get_hub, config_auto_int_value, \ reiterate +SIGNUM_TO_NAME = {getattr(signal, n): n for n in dir(signal) + if n.startswith('SIG') and '_' not in n} + # Set maximum line size of message headers to be accepted. wsgi.MAX_HEADER_LINE = constraints.MAX_HEADER_SIZE @@ -119,6 +121,8 @@ } self.parser = loadwsgi.NicerConfigParser("string", defaults=defaults) self.parser.optionxform = str # Don't lower-case keys + # Defaults don't need interpolation (crazy PasteDeploy...) + self.parser.defaults = lambda: dict(self.parser._defaults, **defaults) self.parser.readfp(self.contents) @@ -415,22 +419,102 @@ return app_conf +class SwiftHttpProtocol(wsgi.HttpProtocol): + default_request_version = "HTTP/1.0" + + def log_request(self, *a): + """ + Turn off logging requests by the underlying WSGI software. + """ + pass + + def log_message(self, f, *a): + """ + Redirect logging other messages by the underlying WSGI software. + """ + logger = getattr(self.server.app, 'logger', None) or self.server.log + logger.error('ERROR WSGI: ' + f, *a) + + +class SwiftHttpProxiedProtocol(SwiftHttpProtocol): + """ + Protocol object that speaks HTTP, including multiple requests, but with + a single PROXY line as the very first thing coming in over the socket. + This is so we can learn what the client's IP address is when Swift is + behind a TLS terminator, like hitch, that does not understand HTTP and + so cannot add X-Forwarded-For or other similar headers. + + See http://www.haproxy.org/download/1.7/doc/proxy-protocol.txt for + protocol details. + """ + def handle_error(self, connection_line): + if not six.PY2: + connection_line = connection_line.decode('latin-1') + + # No further processing will proceed on this connection under any + # circumstances. We always send the request into the superclass to + # handle any cleanup - this ensures that the request will not be + # processed. + self.rfile.close() + # We don't really have any confidence that an HTTP Error will be + # processable by the client as our transmission broken down between + # ourselves and our gateway proxy before processing the client + # protocol request. Hopefully the operator will know what to do! + msg = 'Invalid PROXY line %r' % connection_line + self.log_message(msg) + # Even assuming HTTP we don't even known what version of HTTP the + # client is sending? This entire endeavor seems questionable. + self.request_version = self.default_request_version + # appease http.server + self.command = 'PROXY' + self.send_error(400, msg) + + def handle(self): + """Handle multiple requests if necessary.""" + # ensure the opening line for the connection is a valid PROXY protcol + # line; this is the only IO we do on this connection before any + # additional wrapping further pollutes the raw socket. + connection_line = self.rfile.readline(self.server.url_length_limit) + + if connection_line.startswith(b'PROXY'): + proxy_parts = connection_line.split(b' ') + if len(proxy_parts) >= 2 and proxy_parts[0] == b'PROXY': + if proxy_parts[1] in (b'TCP4', b'TCP6') and \ + len(proxy_parts) == 6: + if six.PY2: + self.client_address = (proxy_parts[2], proxy_parts[4]) + else: + self.client_address = ( + proxy_parts[2].decode('latin-1'), + proxy_parts[4].decode('latin-1')) + elif proxy_parts[1].startswith(b'UNKNOWN'): + # "UNKNOWN", in PROXY protocol version 1, means "not + # TCP4 or TCP6". This includes completely legitimate + # things like QUIC or Unix domain sockets. The PROXY + # protocol (section 2.1) states that the receiver + # (that's us) MUST ignore anything after "UNKNOWN" and + # before the CRLF, essentially discarding the first + # line. + pass + else: + self.handle_error(connection_line) + else: + self.handle_error(connection_line) + else: + self.handle_error(connection_line) + + return SwiftHttpProtocol.handle(self) + + def run_server(conf, logger, sock, global_conf=None): # Ensure TZ environment variable exists to avoid stat('/etc/localtime') on # some platforms. This locks in reported times to UTC. os.environ['TZ'] = 'UTC+0' time.tzset() - wsgi.HttpProtocol.default_request_version = "HTTP/1.0" - # Turn off logging requests by the underlying WSGI software. - wsgi.HttpProtocol.log_request = lambda *a: None - # Redirect logging other messages by the underlying WSGI software. - wsgi.HttpProtocol.log_message = \ - lambda s, f, *a: logger.error('ERROR WSGI: ' + f % a) wsgi.WRITE_TIMEOUT = int(conf.get('client_timeout') or 60) eventlet.hubs.use_hub(get_hub()) - utils.eventlet_monkey_patch() eventlet_debug = config_true_value(conf.get('eventlet_debug', 'no')) eventlet.debug.hub_exceptions(eventlet_debug) wsgi_logger = NullLogger() @@ -447,15 +531,24 @@ app = loadapp(conf['__file__'], global_conf=global_conf) max_clients = int(conf.get('max_clients', '1024')) pool = RestrictedGreenPool(size=max_clients) + + # Select which protocol class to use (normal or one expecting PROXY + # protocol) + if config_true_value(conf.get('require_proxy_protocol', 'no')): + protocol_class = SwiftHttpProxiedProtocol + else: + protocol_class = SwiftHttpProtocol + + server_kwargs = { + 'custom_pool': pool, + 'protocol': protocol_class, + # Disable capitalizing headers in Eventlet. This is necessary for + # the AWS SDK to work with s3api middleware (it needs an "ETag" + # header; "Etag" just won't do). + 'capitalize_response_headers': False, + } try: - # Disable capitalizing headers in Eventlet if possible. This is - # necessary for the AWS SDK to work with swift3 middleware. - argspec = inspect.getargspec(wsgi.server) - if 'capitalize_response_headers' in argspec.args: - wsgi.server(sock, app, wsgi_logger, custom_pool=pool, - capitalize_response_headers=False) - else: - wsgi.server(sock, app, wsgi_logger, custom_pool=pool) + wsgi.server(sock, app, wsgi_logger, **server_kwargs) except socket.error as err: if err[0] != errno.EINVAL: raise @@ -558,7 +651,8 @@ :param int pid: The new worker process' PID """ - self.logger.notice('Started child %s' % pid) + self.logger.notice('Started child %s from parent %s', + pid, os.getpid()) self.children.append(pid) def register_worker_exit(self, pid): @@ -568,7 +662,8 @@ :param int pid: The PID of the worker that exited. """ - self.logger.error('Removing dead child %s' % pid) + self.logger.error('Removing dead child %s from parent %s', + pid, os.getpid()) self.children.remove(pid) def shutdown_sockets(self): @@ -905,6 +1000,9 @@ else: strategy = WorkersStrategy(conf, logger) + # patch event before loadapp + utils.eventlet_monkey_patch() + # Ensure the configuration and application can be loaded before proceeding. global_conf = {'log_name': log_name} if 'global_conf_callback' in kwargs: @@ -931,24 +1029,17 @@ run_server(conf, logger, no_fork_sock, global_conf=global_conf) return 0 - def kill_children(*args): - """Kills the entire process group.""" - logger.error('SIGTERM received') - signal.signal(signal.SIGTERM, signal.SIG_IGN) - running[0] = False - os.killpg(0, signal.SIGTERM) - - def hup(*args): - """Shuts down the server, but allows running requests to complete""" - logger.error('SIGHUP received') - signal.signal(signal.SIGHUP, signal.SIG_IGN) - running[0] = False - - running = [True] - signal.signal(signal.SIGTERM, kill_children) - signal.signal(signal.SIGHUP, hup) + def stop_with_signal(signum, *args): + """Set running flag to False and capture the signum""" + running_context[0] = False + running_context[1] = signum + + # context to hold boolean running state and stop signum + running_context = [True, None] + signal.signal(signal.SIGTERM, stop_with_signal) + signal.signal(signal.SIGHUP, stop_with_signal) - while running[0]: + while running_context[0]: for sock, sock_info in strategy.new_worker_socks(): pid = os.fork() if pid == 0: @@ -988,11 +1079,23 @@ sleep(0.01) except KeyboardInterrupt: logger.notice('User quit') - running[0] = False + running_context[0] = False break + if running_context[1] is not None: + try: + signame = SIGNUM_TO_NAME[running_context[1]] + except KeyError: + logger.error('Stopping with unexpected signal %r' % + running_context[1]) + else: + logger.error('%s received', signame) + if running_context[1] == signal.SIGTERM: + os.killpg(0, signal.SIGTERM) + strategy.shutdown_sockets() - logger.notice('Exited') + signal.signal(signal.SIGTERM, signal.SIG_IGN) + logger.notice('Exited (%s)', os.getpid()) return 0 diff -Nru swift-2.17.0/swift/container/backend.py swift-2.18.0/swift/container/backend.py --- swift-2.17.0/swift/container/backend.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/container/backend.py 2018-05-30 10:17:02.000000000 +0000 @@ -15,6 +15,7 @@ """ Pluggable Back-ends for Container Server """ +import errno import os from uuid import uuid4 @@ -23,16 +24,45 @@ import six.moves.cPickle as pickle from six.moves import range import sqlite3 +from eventlet import tpool +from swift.common.constraints import CONTAINER_LISTING_LIMIT +from swift.common.exceptions import LockTimeout from swift.common.utils import Timestamp, encode_timestamps, \ - decode_timestamps, extract_swift_bytes -from swift.common.db import DatabaseBroker, utf8encode - + decode_timestamps, extract_swift_bytes, storage_directory, hash_path, \ + ShardRange, renamer, find_shard_range, MD5_OF_EMPTY_STRING, mkdirs, \ + get_db_files, parse_db_filename, make_db_file_path, split_path +from swift.common.db import DatabaseBroker, utf8encode, BROKER_TIMEOUT, \ + zero_like, DatabaseAlreadyExists SQLITE_ARG_LIMIT = 999 DATADIR = 'containers' +RECORD_TYPE_OBJECT = 'object' +RECORD_TYPE_SHARD = 'shard' +SHARD_RANGE_TABLE = 'shard_range' + +NOTFOUND = 'not_found' +UNSHARDED = 'unsharded' +SHARDING = 'sharding' +SHARDED = 'sharded' +COLLAPSED = 'collapsed' + + +SHARD_STATS_STATES = [ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING] +SHARD_LISTING_STATES = SHARD_STATS_STATES + [ShardRange.CLEAVED] +SHARD_UPDATE_STATES = [ShardRange.CREATED, ShardRange.CLEAVED, + ShardRange.ACTIVE, ShardRange.SHARDING] + + +# attribute names in order used when transforming shard ranges from dicts to +# tuples and vice-versa +SHARD_RANGE_KEYS = ('name', 'timestamp', 'lower', 'upper', 'object_count', + 'bytes_used', 'meta_timestamp', 'deleted', 'state', + 'state_timestamp', 'epoch') + POLICY_STAT_TABLE_CREATE = ''' CREATE TABLE policy_stat ( storage_policy_index INTEGER PRIMARY KEY, @@ -221,12 +251,217 @@ return any(newer_than_existing) +def merge_shards(shard_data, existing): + """ + Compares ``shard_data`` with ``existing`` and updates ``shard_data`` with + any items of ``existing`` that take precedence over the corresponding item + in ``shard_data``. + + :param shard_data: a dict representation of shard range that may be + modified by this method. + :param existing: a dict representation of shard range. + :returns: True if ``shard data`` has any item(s) that are considered to + take precedence over the corresponding item in ``existing`` + """ + if not existing: + return True + if existing['timestamp'] < shard_data['timestamp']: + # note that currently we do not roll forward any meta or state from + # an item that was created at older time, newer created time trumps + return True + elif existing['timestamp'] > shard_data['timestamp']: + return False + + new_content = False + # timestamp must be the same, so preserve existing range bounds and deleted + for k in ('lower', 'upper', 'deleted'): + shard_data[k] = existing[k] + + # now we need to look for meta data updates + if existing['meta_timestamp'] >= shard_data['meta_timestamp']: + for k in ('object_count', 'bytes_used', 'meta_timestamp'): + shard_data[k] = existing[k] + else: + new_content = True + + if (existing['state_timestamp'] == shard_data['state_timestamp'] + and shard_data['state'] > existing['state']): + new_content = True + elif existing['state_timestamp'] >= shard_data['state_timestamp']: + for k in ('state', 'state_timestamp', 'epoch'): + shard_data[k] = existing[k] + else: + new_content = True + return new_content + + class ContainerBroker(DatabaseBroker): - """Encapsulates working with a container database.""" + """ + Encapsulates working with a container database. + + Note that this may involve multiple on-disk DB files if the container + becomes sharded: + + * :attr:`_db_file` is the path to the legacy container DB name, i.e. + ``.db``. This file should exist for an initialised broker that + has never been sharded, but will not exist once a container has been + sharded. + * :attr:`db_files` is a list of existing db files for the broker. This + list should have at least one entry for an initialised broker, and + should have two entries while a broker is in SHARDING state. + * :attr:`db_file` is the path to whichever db is currently authoritative + for the container. Depending on the container's state, this may not be + the same as the ``db_file`` argument given to :meth:`~__init__`, unless + ``force_db_file`` is True in which case :attr:`db_file` is always equal + to the ``db_file`` argument given to :meth:`~__init__`. + * :attr:`pending_file` is always equal to :attr:`_db_file` extended with + ``.pending``, i.e. ``.db.pending``. + """ db_type = 'container' db_contains_type = 'object' db_reclaim_timestamp = 'created_at' + def __init__(self, db_file, timeout=BROKER_TIMEOUT, logger=None, + account=None, container=None, pending_timeout=None, + stale_reads_ok=False, skip_commits=False, + force_db_file=False): + self._init_db_file = db_file + if db_file == ':memory:': + base_db_file = db_file + else: + base_db_file = make_db_file_path(db_file, None) + super(ContainerBroker, self).__init__( + base_db_file, timeout, logger, account, container, pending_timeout, + stale_reads_ok, skip_commits=skip_commits) + # the root account and container are populated on demand + self._root_account = self._root_container = None + self._force_db_file = force_db_file + self._db_files = None + + @classmethod + def create_broker(self, device_path, part, account, container, logger=None, + epoch=None, put_timestamp=None, + storage_policy_index=None): + """ + Create a ContainerBroker instance. If the db doesn't exist, initialize + the db file. + + :param device_path: device path + :param part: partition number + :param account: account name string + :param container: container name string + :param logger: a logger instance + :param epoch: a timestamp to include in the db filename + :param put_timestamp: initial timestamp if broker needs to be + initialized + :param storage_policy_index: the storage policy index + :return: a :class:`swift.container.backend.ContainerBroker` instance + """ + hsh = hash_path(account, container) + db_dir = storage_directory(DATADIR, part, hsh) + db_path = make_db_file_path( + os.path.join(device_path, db_dir, hsh + '.db'), epoch) + broker = ContainerBroker(db_path, account=account, container=container, + logger=logger) + if not os.path.exists(broker.db_file): + try: + broker.initialize(put_timestamp, storage_policy_index) + except DatabaseAlreadyExists: + pass + return broker + + def get_db_state(self): + """ + Returns the current state of on disk db files. + """ + if self._db_file == ':memory:': + return UNSHARDED + if not self.db_files: + return NOTFOUND + if len(self.db_files) > 1: + return SHARDING + if self.db_epoch is None: + # never been sharded + return UNSHARDED + if self.db_epoch != self._own_shard_range().epoch: + return UNSHARDED + if not self.get_shard_ranges(): + return COLLAPSED + return SHARDED + + def sharding_initiated(self): + """ + Returns True if a broker has shard range state that would be necessary + for sharding to have been initiated, False otherwise. + """ + own_shard_range = self.get_own_shard_range() + if own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + return bool(self.get_shard_ranges()) + return False + + def sharding_required(self): + """ + Returns True if a broker has shard range state that would be necessary + for sharding to have been initiated but has not yet completed sharding, + False otherwise. + """ + db_state = self.get_db_state() + return (db_state == SHARDING or + (db_state == UNSHARDED and self.sharding_initiated())) + + def is_sharded(self): + return self.get_db_state() == SHARDED + + def reload_db_files(self): + """ + Reloads the cached list of valid on disk db files for this broker. + """ + if self._db_file == ':memory:': + return + # reset connection so the next access will use the correct DB file + self.conn = None + self._db_files = get_db_files(self._init_db_file) + + @property + def db_files(self): + """ + Gets the cached list of valid db files that exist on disk for this + broker. + + The cached list may be refreshed by calling + :meth:`~swift.container.backend.ContainerBroker.reload_db_files`. + + :return: A list of paths to db files ordered by ascending epoch; + the list may be empty. + """ + if not self._db_files: + self.reload_db_files() + return self._db_files + + @property + def db_file(self): + """ + Get the path to the primary db file for this broker. This is typically + the db file for the most recent sharding epoch. However, if no db files + exist on disk, or if ``force_db_file`` was True when the broker was + constructed, then the primary db file is the file passed to the broker + constructor. + + :return: A path to a db file; the file does not necessarily exist. + """ + if self._force_db_file: + return self._init_db_file + if self.db_files: + return self.db_files[-1] + return self._init_db_file + + @property + def db_epoch(self): + hash_, epoch, ext = parse_db_filename(self.db_file) + return epoch + @property def storage_policy_index(self): if not hasattr(self, '_storage_policy_index'): @@ -234,6 +469,11 @@ self.get_info()['storage_policy_index'] return self._storage_policy_index + @property + def path(self): + self._populate_instance_cache() + return '%s/%s' % (self.account, self.container) + def _initialize(self, conn, put_timestamp, storage_policy_index): """ Create a brand new container database (tables, indices, triggers, etc.) @@ -250,6 +490,8 @@ self.create_policy_stat_table(conn, storage_policy_index) self.create_container_info_table(conn, put_timestamp, storage_policy_index) + self.create_shard_range_table(conn) + self._db_files = None def create_object_table(self, conn): """ @@ -331,6 +573,40 @@ VALUES (?) """, (storage_policy_index,)) + def create_shard_range_table(self, conn): + """ + Create the shard_range table which is specific to the container DB. + + :param conn: DB connection object + """ + # Use execute (not executescript) so we get the benefits of our + # GreenDBConnection. Creating a table requires a whole-DB lock; + # *any* in-progress cursor will otherwise trip a "database is locked" + # error. + conn.execute(""" + CREATE TABLE %s ( + ROWID INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT, + timestamp TEXT, + lower TEXT, + upper TEXT, + object_count INTEGER DEFAULT 0, + bytes_used INTEGER DEFAULT 0, + meta_timestamp TEXT, + deleted INTEGER DEFAULT 0, + state INTEGER, + state_timestamp TEXT, + epoch TEXT + ); + """ % SHARD_RANGE_TABLE) + + conn.execute(""" + CREATE TRIGGER shard_range_update BEFORE UPDATE ON %s + BEGIN + SELECT RAISE(FAIL, 'UPDATE not allowed; DELETE and INSERT'); + END; + """ % SHARD_RANGE_TABLE) + def get_db_version(self, conn): if self._db_version == -1: self._db_version = 0 @@ -340,6 +616,11 @@ self._db_version = 1 return self._db_version + def _get_deleted_key(self, connection): + if self.get_db_version(connection) < 1: + return '+deleted' + return 'deleted' + def _newid(self, conn): conn.execute(''' UPDATE container_stat @@ -383,12 +664,7 @@ 'ctype_timestamp': content_type_timestamp, 'meta_timestamp': meta_timestamp}) - def empty(self): - """ - Check if container DB is empty. - - :returns: True if the database has no active objects, False otherwise - """ + def _empty(self): self._commit_puts_stale_ok() with self.get() as conn: try: @@ -401,7 +677,27 @@ raise row = conn.execute( 'SELECT object_count from container_stat').fetchone() - return (row[0] == 0) + return zero_like(row[0]) + + def empty(self): + """ + Check if container DB is empty. + + This method uses more stringent checks on object count than + :meth:`is_deleted`: this method checks that there are no objects in any + policy; if the container is in the process of sharding then both fresh + and retiring databases are checked to be empty; if a root container has + shard ranges then they are checked to be empty. + + :returns: True if the database has no active objects, False otherwise + """ + if not all(broker._empty() for broker in self.get_brokers()): + return False + if self.is_root_container() and self.sharding_initiated(): + # sharded shards don't get updates from their shards so their shard + # usage should not be relied upon + return self.get_shard_usage()['object_count'] <= 0 + return True def delete_object(self, name, timestamp, storage_policy_index=0): """ @@ -447,6 +743,43 @@ 'meta_timestamp': meta_timestamp} self.put_record(record) + def remove_objects(self, lower, upper, max_row=None): + """ + Removes object records in the given namespace range from the object + table. + + Note that objects are removed regardless of their storage_policy_index. + + :param lower: defines the lower bound of object names that will be + removed; names greater than this value will be removed; names less + than or equal to this value will not be removed. + :param upper: defines the upper bound of object names that will be + removed; names less than or equal to this value will be removed; + names greater than this value will not be removed. The empty string + is interpreted as there being no upper bound. + :param max_row: if specified only rows less than or equal to max_row + will be removed + """ + query_conditions = [] + query_args = [] + if max_row is not None: + query_conditions.append('ROWID <= ?') + query_args.append(str(max_row)) + if lower: + query_conditions.append('name > ?') + query_args.append(lower) + if upper: + query_conditions.append('name <= ?') + query_args.append(upper) + + query = 'DELETE FROM object WHERE deleted in (0, 1)' + if query_conditions: + query += ' AND ' + ' AND '.join(query_conditions) + + with self.get() as conn: + conn.execute(query, query_args) + conn.commit() + def _is_deleted_info(self, object_count, put_timestamp, delete_timestamp, **kwargs): """ @@ -457,12 +790,17 @@ # The container is considered deleted if the delete_timestamp # value is greater than the put_timestamp, and there are no # objects in the container. - return (object_count in (None, '', 0, '0')) and ( + return zero_like(object_count) and ( Timestamp(delete_timestamp) > Timestamp(put_timestamp)) def _is_deleted(self, conn): """ - Check container_stat view and evaluate info. + Check if the DB is considered to be deleted. + + This object count used in this check is the same as the container + object count that would be returned in the result of :meth:`get_info` + and exposed to a client i.e. it is based on the container_stat view for + the current storage policy index or relevant shard range usage. :param conn: database conn @@ -471,8 +809,21 @@ info = conn.execute(''' SELECT put_timestamp, delete_timestamp, object_count FROM container_stat''').fetchone() + info = dict(info) + info.update(self._get_alternate_object_stats()[1]) return self._is_deleted_info(**info) + def is_reclaimable(self, now, reclaim_age): + with self.get() as conn: + info = conn.execute(''' + SELECT put_timestamp, delete_timestamp + FROM container_stat''').fetchone() + if (Timestamp(now - reclaim_age) > + Timestamp(info['delete_timestamp']) > + Timestamp(info['put_timestamp'])): + return self.empty() + return False + def get_info_is_deleted(self): """ Get the is_deleted status and info for the container. @@ -485,6 +836,70 @@ info = self.get_info() return info, self._is_deleted_info(**info) + def get_replication_info(self): + info = super(ContainerBroker, self).get_replication_info() + info['shard_max_row'] = self.get_max_row('shard_ranges') + return info + + def _do_get_info_query(self, conn): + data = None + trailing_sync = 'x_container_sync_point1, x_container_sync_point2' + trailing_pol = 'storage_policy_index' + errors = set() + while not data: + try: + data = conn.execute((''' + SELECT account, container, created_at, put_timestamp, + delete_timestamp, status_changed_at, + object_count, bytes_used, + reported_put_timestamp, reported_delete_timestamp, + reported_object_count, reported_bytes_used, hash, + id, %s, %s + FROM container_stat + ''') % (trailing_sync, trailing_pol)).fetchone() + except sqlite3.OperationalError as err: + err_msg = str(err) + if err_msg in errors: + # only attempt migration once + raise + errors.add(err_msg) + if 'no such column: storage_policy_index' in err_msg: + trailing_pol = '0 AS storage_policy_index' + elif 'no such column: x_container_sync_point' in err_msg: + trailing_sync = '-1 AS x_container_sync_point1, ' \ + '-1 AS x_container_sync_point2' + else: + raise + data = dict(data) + # populate instance cache + self._storage_policy_index = data['storage_policy_index'] + self.account = data['account'] + self.container = data['container'] + return data + + def _get_info(self): + self._commit_puts_stale_ok() + with self.get() as conn: + return self._do_get_info_query(conn) + + def _populate_instance_cache(self, conn=None): + # load cached instance attributes from the database if necessary + if self.container is None: + with self.maybe_get(conn) as conn: + self._do_get_info_query(conn) + + def _get_alternate_object_stats(self): + state = self.get_db_state() + if state == SHARDING: + other_info = self.get_brokers()[0]._get_info() + stats = {'object_count': other_info['object_count'], + 'bytes_used': other_info['bytes_used']} + elif state == SHARDED and self.is_root_container(): + stats = self.get_shard_usage() + else: + stats = {} + return state, stats + def get_info(self): """ Get global data for the container. @@ -494,44 +909,14 @@ object_count, bytes_used, reported_put_timestamp, reported_delete_timestamp, reported_object_count, reported_bytes_used, hash, id, x_container_sync_point1, - x_container_sync_point2, and storage_policy_index. + x_container_sync_point2, and storage_policy_index, + db_state. """ - self._commit_puts_stale_ok() - with self.get() as conn: - data = None - trailing_sync = 'x_container_sync_point1, x_container_sync_point2' - trailing_pol = 'storage_policy_index' - errors = set() - while not data: - try: - data = conn.execute((''' - SELECT account, container, created_at, put_timestamp, - delete_timestamp, status_changed_at, - object_count, bytes_used, - reported_put_timestamp, reported_delete_timestamp, - reported_object_count, reported_bytes_used, hash, - id, %s, %s - FROM container_stat - ''') % (trailing_sync, trailing_pol)).fetchone() - except sqlite3.OperationalError as err: - err_msg = str(err) - if err_msg in errors: - # only attempt migration once - raise - errors.add(err_msg) - if 'no such column: storage_policy_index' in err_msg: - trailing_pol = '0 AS storage_policy_index' - elif 'no such column: x_container_sync_point' in err_msg: - trailing_sync = '-1 AS x_container_sync_point1, ' \ - '-1 AS x_container_sync_point2' - else: - raise - data = dict(data) - # populate instance cache - self._storage_policy_index = data['storage_policy_index'] - self.account = data['account'] - self.container = data['container'] - return data + data = self._get_info() + state, stats = self._get_alternate_object_stats() + data.update(stats) + data['db_state'] = state + return data def set_x_container_sync_points(self, sync_point1, sync_point2): with self.get() as conn: @@ -657,7 +1042,9 @@ conn.commit() def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, - path=None, storage_policy_index=0, reverse=False): + path=None, storage_policy_index=0, reverse=False, + include_deleted=False, since_row=None, + transform_func=None, all_policies=False): """ Get a list of objects sorted by name starting at marker onward, up to limit entries. Entries will begin with the prefix and will not @@ -672,10 +1059,29 @@ the path :param storage_policy_index: storage policy index for query :param reverse: reverse the result order. - + :param include_deleted: if True, include only deleted objects; if + False (default), include only undeleted objects; otherwise, include + both deleted and undeleted objects. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :param transform_func: an optional function that if given will be + called for each object to get a transformed version of the object + to include in the listing; should have same signature as + :meth:`~_transform_record`; defaults to :meth:`~_transform_record`. + :param all_policies: if True, include objects for all storage policies + ignoring any value given for ``storage_policy_index`` :returns: list of tuples of (name, created_at, size, content_type, - etag) + etag, deleted) """ + if include_deleted is True: + deleted_arg = ' = 1' + elif include_deleted is False: + deleted_arg = ' = 0' + else: + deleted_arg = ' in (0, 1)' + + if transform_func is None: + transform_func = self._transform_record delim_force_gte = False (marker, end_marker, prefix, delimiter, path) = utf8encode( marker, end_marker, prefix, delimiter, path) @@ -695,60 +1101,71 @@ orig_marker = marker with self.get() as conn: results = [] + deleted_key = self._get_deleted_key(conn) + query_keys = ['name', 'created_at', 'size', 'content_type', + 'etag', deleted_key] while len(results) < limit: - query = '''SELECT name, created_at, size, content_type, etag - FROM object WHERE''' query_args = [] + query_conditions = [] if end_marker and (not prefix or end_marker < end_prefix): - query += ' name < ? AND' + query_conditions.append('name < ?') query_args.append(end_marker) elif prefix: - query += ' name < ? AND' + query_conditions.append('name < ?') query_args.append(end_prefix) if delim_force_gte: - query += ' name >= ? AND' + query_conditions.append('name >= ?') query_args.append(marker) # Always set back to False delim_force_gte = False elif marker and marker >= prefix: - query += ' name > ? AND' + query_conditions.append('name > ?') query_args.append(marker) elif prefix: - query += ' name >= ? AND' + query_conditions.append('name >= ?') query_args.append(prefix) - if self.get_db_version(conn) < 1: - query += ' +deleted = 0' - else: - query += ' deleted = 0' - orig_tail_query = ''' - ORDER BY name %s LIMIT ? - ''' % ('DESC' if reverse else '') - orig_tail_args = [limit - len(results)] + query_conditions.append(deleted_key + deleted_arg) + if since_row: + query_conditions.append('ROWID > ?') + query_args.append(since_row) + + def build_query(keys, conditions, args): + query = 'SELECT ' + ', '.join(keys) + ' FROM object ' + if conditions: + query += 'WHERE ' + ' AND '.join(conditions) + tail_query = ''' + ORDER BY name %s LIMIT ? + ''' % ('DESC' if reverse else '') + return query + tail_query, args + [limit - len(results)] + # storage policy filter - policy_tail_query = ''' - AND storage_policy_index = ? - ''' + orig_tail_query - policy_tail_args = [storage_policy_index] + orig_tail_args - tail_query, tail_args = \ - policy_tail_query, policy_tail_args + if all_policies: + query, args = build_query( + query_keys + ['storage_policy_index'], + query_conditions, + query_args) + else: + query, args = build_query( + query_keys + ['storage_policy_index'], + query_conditions + ['storage_policy_index = ?'], + query_args + [storage_policy_index]) try: - curs = conn.execute(query + tail_query, - tuple(query_args + tail_args)) + curs = conn.execute(query, tuple(args)) except sqlite3.OperationalError as err: if 'no such column: storage_policy_index' not in str(err): raise - tail_query, tail_args = \ - orig_tail_query, orig_tail_args - curs = conn.execute(query + tail_query, - tuple(query_args + tail_args)) + query, args = build_query( + query_keys + ['0 as storage_policy_index'], + query_conditions, query_args) + curs = conn.execute(query, tuple(args)) curs.row_factory = None # Delimiters without a prefix is ignored, further if there # is no delimiter then we can simply return the result as # prefixes are now handled in the SQL statement. if prefix is None or not delimiter: - return [self._transform_record(r) for r in curs] + return [transform_func(r) for r in curs] # We have a delimiter and a prefix (possibly empty string) to # handle @@ -787,19 +1204,51 @@ results.append([dir_name, '0', 0, None, '']) curs.close() break - results.append(self._transform_record(row)) + results.append(transform_func(row)) if not rowcount: break return results + def get_objects(self, limit=None, marker='', end_marker='', + include_deleted=None, since_row=None): + """ + Returns a list of objects, including deleted objects, in all policies. + Each object in the list is described by a dict with keys {'name', + 'created_at', 'size', 'content_type', 'etag', 'deleted', + 'storage_policy_index'}. + + :param limit: maximum number of entries to get + :param marker: if set, objects with names less than or equal to this + value will not be included in the list. + :param end_marker: if set, objects with names greater than or equal to + this value will not be included in the list. + :param include_deleted: if True, include only deleted objects; if + False, include only undeleted objects; otherwise (default), include + both deleted and undeleted objects. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :return: a list of dicts, each describing an object. + """ + + limit = CONTAINER_LISTING_LIMIT if limit is None else limit + return self.list_objects_iter( + limit, marker, end_marker, prefix=None, delimiter=None, path=None, + reverse=False, include_deleted=include_deleted, + transform_func=self._record_to_dict, since_row=since_row, + all_policies=True + ) + def _transform_record(self, record): """ - Decode the created_at timestamp into separate data, content-type and - meta timestamps and replace the created_at timestamp with the - metadata timestamp i.e. the last-modified time. + Returns a tuple of (name, last-modified time, size, content_type and + etag) for the given record. + + The given record's created_at timestamp is decoded into separate data, + content-type and meta timestamps and the metadata timestamp is used as + the last-modified time value. """ t_data, t_ctype, t_meta = decode_timestamps(record[1]) - return (record[0], t_meta.internal) + record[2:] + return (record[0], t_meta.internal) + record[2:5] def _record_to_dict(self, rec): if rec: @@ -822,7 +1271,7 @@ if isinstance(item['name'], six.text_type): item['name'] = item['name'].encode('utf-8') - def _really_merge_items(conn): + def _really_really_merge_items(conn): curs = conn.cursor() if self.get_db_version(conn) >= 1: query_mod = ' deleted IN (0, 1) AND ' @@ -843,7 +1292,7 @@ ','.join('?' * len(chunk)), chunk)) # Sort item_list into things that need adding and deleting, based # on results of created_at query. - to_delete = {} + to_delete = set() to_add = {} for item in item_list: item.setdefault('storage_policy_index', 0) # legacy @@ -851,7 +1300,7 @@ existing = self._record_to_dict(records.get(item_ident)) if update_new_item_from_existing(item, existing): if item_ident in records: # exists with older timestamp - to_delete[item_ident] = item + to_delete.add(item_ident) if item_ident in to_add: # duplicate entries in item_list update_new_item_from_existing(item, to_add[item_ident]) to_add[item_ident] = item @@ -859,8 +1308,7 @@ curs.executemany( 'DELETE FROM object WHERE ' + query_mod + 'name=? AND storage_policy_index=?', - ((rec['name'], rec['storage_policy_index']) - for rec in to_delete.values())) + (item_ident for item_ident in to_delete)) if to_add: curs.executemany( 'INSERT INTO object (name, created_at, size, content_type,' @@ -885,6 +1333,9 @@ ''', (sync_point, source)) conn.commit() + def _really_merge_items(conn): + return tpool.execute(_really_really_merge_items, conn) + with self.get() as conn: try: return _really_merge_items(conn) @@ -894,6 +1345,86 @@ self._migrate_add_storage_policy(conn) return _really_merge_items(conn) + def merge_shard_ranges(self, shard_ranges): + """ + Merge shard ranges into the shard range table. + + :param shard_ranges: a shard range or a list of shard ranges; each + shard range should be an instance of + :class:`~swift.common.utils.ShardRange` or a dict representation of + a shard range having ``SHARD_RANGE_KEYS``. + """ + if not shard_ranges: + return + if not isinstance(shard_ranges, list): + shard_ranges = [shard_ranges] + + item_list = [] + for item in shard_ranges: + if isinstance(item, ShardRange): + item = dict(item) + for col in ('name', 'lower', 'upper'): + if isinstance(item[col], six.text_type): + item[col] = item[col].encode('utf-8') + item_list.append(item) + + def _really_merge_items(conn): + curs = conn.cursor() + curs.execute('BEGIN IMMEDIATE') + + # Get rows for items that already exist. + # We must chunk it up to avoid sqlite's limit of 999 args. + records = {} + for offset in range(0, len(item_list), SQLITE_ARG_LIMIT): + chunk = [record['name'] for record + in item_list[offset:offset + SQLITE_ARG_LIMIT]] + records.update( + (rec[0], rec) for rec in curs.execute( + 'SELECT %s FROM %s ' + 'WHERE deleted IN (0, 1) AND name IN (%s)' % + (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE, + ','.join('?' * len(chunk))), chunk)) + + # Sort item_list into things that need adding and deleting + to_delete = set() + to_add = {} + for item in item_list: + item_ident = item['name'] + existing = records.get(item_ident) + if existing: + existing = dict(zip(SHARD_RANGE_KEYS, existing)) + if merge_shards(item, existing): + # exists with older timestamp + if item_ident in records: + to_delete.add(item_ident) + # duplicate entries in item_list + if (item_ident not in to_add or + merge_shards(item, to_add[item_ident])): + to_add[item_ident] = item + + if to_delete: + curs.executemany( + 'DELETE FROM %s WHERE deleted in (0, 1) ' + 'AND name = ?' % SHARD_RANGE_TABLE, + ((item_ident,) for item_ident in to_delete)) + if to_add: + vals = ','.join('?' * len(SHARD_RANGE_KEYS)) + curs.executemany( + 'INSERT INTO %s (%s) VALUES (%s)' % + (SHARD_RANGE_TABLE, ','.join(SHARD_RANGE_KEYS), vals), + tuple([item[k] for k in SHARD_RANGE_KEYS] + for item in to_add.values())) + conn.commit() + + with self.get() as conn: + try: + return _really_merge_items(conn) + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + self.create_shard_range_table(conn) + return _really_merge_items(conn) + def get_reconciler_sync(self): with self.get() as conn: try: @@ -1039,3 +1570,626 @@ ''' % (column_names, column_names) + CONTAINER_STAT_VIEW_SCRIPT + 'COMMIT;') + + def _reclaim(self, conn, age_timestamp, sync_timestamp): + super(ContainerBroker, self)._reclaim(conn, age_timestamp, + sync_timestamp) + # populate instance cache, but use existing conn to avoid deadlock + # when it has a pending update + self._populate_instance_cache(conn=conn) + try: + conn.execute(''' + DELETE FROM %s WHERE deleted = 1 AND timestamp < ? + AND name != ? + ''' % SHARD_RANGE_TABLE, (sync_timestamp, self.path)) + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + + def _get_shard_range_rows(self, connection=None, include_deleted=False, + states=None, include_own=False, + exclude_others=False): + """ + Returns a list of shard range rows. + + To get all shard ranges use ``include_own=True``. To get only the + broker's own shard range use ``include_own=True`` and + ``exclude_others=True``. + + :param connection: db connection + :param include_deleted: include rows marked as deleted + :param states: include only rows matching the given state(s); can be an + int or a list of ints. + :param include_own: boolean that governs whether the row whose name + matches the broker's path is included in the returned list. If + True, that row is included, otherwise it is not included. Default + is False. + :param exclude_others: boolean that governs whether the rows whose + names do not match the broker's path are included in the returned + list. If True, those rows are not included, otherwise they are + included. Default is False. + :return: a list of tuples. + """ + + if exclude_others and not include_own: + return [] + + included_states = set() + if isinstance(states, (list, tuple, set)): + included_states.update(states) + elif states is not None: + included_states.add(states) + + def do_query(conn): + condition = '' + conditions = [] + params = [] + if not include_deleted: + conditions.append('deleted=0') + if included_states: + conditions.append('state in (%s)' % ','.join( + '?' * len(included_states))) + params.extend(included_states) + if not include_own: + conditions.append('name != ?') + params.append(self.path) + if exclude_others: + conditions.append('name = ?') + params.append(self.path) + if conditions: + condition = ' WHERE ' + ' AND '.join(conditions) + sql = ''' + SELECT %s + FROM %s%s; + ''' % (', '.join(SHARD_RANGE_KEYS), SHARD_RANGE_TABLE, condition) + data = conn.execute(sql, params) + data.row_factory = None + return [row for row in data] + + try: + with self.maybe_get(connection) as conn: + return do_query(conn) + except sqlite3.OperationalError as err: + if ('no such table: %s' % SHARD_RANGE_TABLE) not in str(err): + raise + return [] + + @classmethod + def resolve_shard_range_states(cls, states): + """ + Given a list of values each of which may be the name of a state, the + number of a state, or an alias, return the set of state numbers + described by the list. + + The following alias values are supported: 'listing' maps to all states + that are considered valid when listing objects; 'updating' maps to all + states that are considered valid for redirecting an object update. + + :param states: a list of values each of which may be the name of a + state, the number of a state, or an alias + :return: a set of integer state numbers, or None if no states are given + :raises ValueError: if any value in the given list is neither a valid + state nor a valid alias + """ + if states: + resolved_states = set() + for state in states: + if state == 'listing': + resolved_states.update(SHARD_LISTING_STATES) + elif state == 'updating': + resolved_states.update(SHARD_UPDATE_STATES) + else: + resolved_states.add(ShardRange.resolve_state(state)[0]) + return resolved_states + return None + + def get_shard_ranges(self, marker=None, end_marker=None, includes=None, + reverse=False, include_deleted=False, states=None, + include_own=False, + exclude_others=False, fill_gaps=False): + """ + Returns a list of persisted shard ranges. + + :param marker: restricts the returned list to shard ranges whose + namespace includes or is greater than the marker value. + :param end_marker: restricts the returned list to shard ranges whose + namespace includes or is less than the end_marker value. + :param includes: restricts the returned list to the shard range that + includes the given value; if ``includes`` is specified then + ``marker`` and ``end_marker`` are ignored. + :param reverse: reverse the result order. + :param include_deleted: include items that have the delete marker set + :param states: if specified, restricts the returned list to shard + ranges that have the given state(s); can be a list of ints or a + single int. + :param include_own: boolean that governs whether the row whose name + matches the broker's path is included in the returned list. If + True, that row is included, otherwise it is not included. Default + is False. + :param exclude_others: boolean that governs whether the rows whose + names do not match the broker's path are included in the returned + list. If True, those rows are not included, otherwise they are + included. Default is False. + :param fill_gaps: if True, insert own shard range to fill any gaps in + at the tail of other shard ranges. + :return: a list of instances of :class:`swift.common.utils.ShardRange` + """ + def shard_range_filter(sr): + end = start = True + if end_marker: + end = end_marker > sr.lower + if marker: + start = marker < sr.upper + return start and end + + if reverse: + marker, end_marker = end_marker, marker + if marker and end_marker and marker >= end_marker: + return [] + + shard_ranges = [ + ShardRange(*row) + for row in self._get_shard_range_rows( + include_deleted=include_deleted, states=states, + include_own=include_own, + exclude_others=exclude_others)] + # note if this ever changes to *not* sort by upper first then it breaks + # a key assumption for bisect, which is used by utils.find_shard_ranges + shard_ranges.sort(key=lambda sr: (sr.upper, sr.state, sr.lower)) + if includes: + shard_range = find_shard_range(includes, shard_ranges) + return [shard_range] if shard_range else [] + + if reverse: + shard_ranges.reverse() + if marker or end_marker: + shard_ranges = list(filter(shard_range_filter, shard_ranges)) + + if fill_gaps: + if reverse: + if shard_ranges: + last_upper = shard_ranges[0].upper + else: + last_upper = marker or ShardRange.MIN + required_upper = end_marker or ShardRange.MAX + filler_index = 0 + else: + if shard_ranges: + last_upper = shard_ranges[-1].upper + else: + last_upper = marker or ShardRange.MIN + required_upper = end_marker or ShardRange.MAX + filler_index = len(shard_ranges) + if required_upper > last_upper: + filler_sr = self.get_own_shard_range() + filler_sr.lower = last_upper + filler_sr.upper = required_upper + shard_ranges.insert(filler_index, filler_sr) + + return shard_ranges + + def _own_shard_range(self, no_default=False): + shard_ranges = self.get_shard_ranges(include_own=True, + include_deleted=True, + exclude_others=True) + if shard_ranges: + own_shard_range = shard_ranges[0] + elif no_default: + return None + else: + own_shard_range = ShardRange( + self.path, Timestamp.now(), ShardRange.MIN, ShardRange.MAX, + state=ShardRange.ACTIVE) + return own_shard_range + + def get_own_shard_range(self, no_default=False): + """ + Returns a shard range representing this broker's own shard range. If no + such range has been persisted in the broker's shard ranges table then a + default shard range representing the entire namespace will be returned. + + The returned shard range will be updated with the current object stats + for this broker and a meta timestamp set to the current time. For these + values to be persisted the caller must merge the shard range. + + :param no_default: if True and the broker's own shard range is not + found in the shard ranges table then None is returned, otherwise a + default shard range is returned. + :return: an instance of :class:`~swift.common.utils.ShardRange` + """ + own_shard_range = self._own_shard_range(no_default=no_default) + if own_shard_range: + info = self.get_info() + own_shard_range.update_meta( + info['object_count'], info['bytes_used']) + return own_shard_range + + def is_own_shard_range(self, shard_range): + return shard_range.name == self.path + + def enable_sharding(self, epoch): + """ + Updates this broker's own shard range with the given epoch, sets its + state to SHARDING and persists it in the DB. + + :param epoch: a :class:`~swift.utils.common.Timestamp` + :return: the broker's updated own shard range. + """ + own_shard_range = self._own_shard_range() + own_shard_range.update_state(ShardRange.SHARDING, epoch) + own_shard_range.epoch = epoch + self.merge_shard_ranges(own_shard_range) + return own_shard_range + + def get_shard_usage(self): + """ + Get the aggregate object stats for all shard ranges in states ACTIVE, + SHARDING or SHRINKING. + + :return: a dict with keys {bytes_used, object_count} + """ + shard_ranges = self.get_shard_ranges(states=SHARD_STATS_STATES) + return {'bytes_used': sum(sr.bytes_used for sr in shard_ranges), + 'object_count': sum(sr.object_count for sr in shard_ranges)} + + def get_all_shard_range_data(self): + """ + Returns a list of all shard range data, including own shard range and + deleted shard ranges. + + :return: A list of dict representations of a ShardRange. + """ + shard_ranges = self.get_shard_ranges(include_deleted=True, + include_own=True) + return [dict(sr) for sr in shard_ranges] + + def set_sharding_state(self): + """ + Creates and initializes a fresh DB file in preparation for sharding a + retiring DB. The broker's own shard range must have an epoch timestamp + for this method to succeed. + + :return: True if the fresh DB was successfully created, False + otherwise. + """ + epoch = self.get_own_shard_range().epoch + if not epoch: + self.logger.warning("Container '%s' cannot be set to sharding " + "state: missing epoch", self.path) + return False + state = self.get_db_state() + if not state == UNSHARDED: + self.logger.warning("Container '%s' cannot be set to sharding " + "state while in %s state", self.path, state) + return False + + info = self.get_info() + # The tmp_dir is cleaned up by the replicators after reclaim_age, so if + # we initially create the fresh DB there, we will already have cleanup + # covered if there is an error. + tmp_dir = os.path.join(self.get_device_path(), 'tmp') + if not os.path.exists(tmp_dir): + mkdirs(tmp_dir) + tmp_db_file = os.path.join(tmp_dir, "fresh%s.db" % str(uuid4())) + fresh_broker = ContainerBroker(tmp_db_file, self.timeout, self.logger, + self.account, self.container) + fresh_broker.initialize(info['put_timestamp'], + info['storage_policy_index']) + # copy relevant data from the retiring db to the fresh db + fresh_broker.update_metadata(self.metadata) + fresh_broker.merge_shard_ranges(self.get_all_shard_range_data()) + # copy sync points so that any peer in sync with retiring db will + # appear to be in sync with the fresh db, although the peer shouldn't + # attempt to replicate objects to a db with shard ranges. + for incoming in (True, False): + syncs = self.get_syncs(incoming) + fresh_broker.merge_syncs(syncs, incoming) + + max_row = self.get_max_row() + with fresh_broker.get() as fresh_broker_conn: + # Initialise the rowid to continue from where the retiring db ended + try: + sql = "INSERT into object " \ + "(ROWID, name, created_at, size, content_type, etag) " \ + "values (?, 'tmp_sharding', ?, 0, '', ?)" + fresh_broker_conn.execute( + sql, (max_row, Timestamp.now().internal, + MD5_OF_EMPTY_STRING)) + fresh_broker_conn.execute( + 'DELETE FROM object WHERE ROWID = ?', (max_row,)) + fresh_broker_conn.commit() + except sqlite3.OperationalError as err: + self.logger.error( + 'Failed to set the ROWID of the fresh database for %s: %s', + self.path, err) + return False + + # Set the created_at and hash in the container_info table the same + # in both brokers + try: + fresh_broker_conn.execute( + 'UPDATE container_stat SET created_at=?', + (info['created_at'],)) + fresh_broker_conn.commit() + except sqlite3.OperationalError as err: + self.logger.error('Failed to set matching created_at time in ' + 'the fresh database for %s: %s', + self.path, err) + return False + + # Rename to the new database + fresh_db_filename = make_db_file_path(self._db_file, epoch) + renamer(tmp_db_file, fresh_db_filename) + self.reload_db_files() + return True + + def set_sharded_state(self): + """ + Unlink's the broker's retiring DB file. + + :return: True if the retiring DB was successfully unlinked, False + otherwise. + """ + state = self.get_db_state() + if not state == SHARDING: + self.logger.warning("Container %r cannot be set to sharded " + "state while in %s state", + self.path, state) + return False + + self.reload_db_files() + if len(self.db_files) < 2: + self.logger.warning( + 'Refusing to delete db file for %r: no fresher db file found ' + 'in %r.', self.path, self.db_files) + return False + + retiring_file = self.db_files[-2] + try: + os.unlink(retiring_file) + self.logger.debug('Unlinked retiring db %r', retiring_file) + except OSError as err: + if err.errno != errno.ENOENT: + self.logger.exception('Failed to unlink %r' % self._db_file) + return False + + self.reload_db_files() + if len(self.db_files) >= 2: + self.logger.warning( + 'Still have multiple db files after unlinking %r: %r', + retiring_file, self.db_files) + return False + + return True + + def get_brokers(self): + """ + Return a list of brokers for component dbs. The list has two entries + while the db state is sharding: the first entry is a broker for the + retiring db with ``skip_commits`` set to ``True``; the second entry is + a broker for the fresh db with ``skip_commits`` set to ``False``. For + any other db state the list has one entry. + + :return: a list of :class:`~swift.container.backend.ContainerBroker` + """ + if len(self.db_files) > 2: + self.logger.warning('Unexpected db files will be ignored: %s' % + self.db_files[:-2]) + brokers = [] + db_files = self.db_files[-2:] + while db_files: + db_file = db_files.pop(0) + sub_broker = ContainerBroker( + db_file, self.timeout, self.logger, self.account, + self.container, self.pending_timeout, self.stale_reads_ok, + force_db_file=True, skip_commits=bool(db_files)) + brokers.append(sub_broker) + return brokers + + def set_sharding_sysmeta(self, key, value): + """ + Updates the broker's metadata metadata stored under the given key + prefixed with a sharding specific namespace. + + :param key: metadata key in the sharding metadata namespace. + :param value: metadata value + """ + self.update_metadata({'X-Container-Sysmeta-Shard-' + key: + (value, Timestamp.now().internal)}) + + def get_sharding_sysmeta(self, key=None): + """ + Returns sharding specific info from the broker's metadata. + + :param key: if given the value stored under ``key`` in the sharding + info will be returned. + :return: either a dict of sharding info or the value stored under + ``key`` in that dict. + """ + prefix = 'X-Container-Sysmeta-Shard-' + metadata = self.metadata + info = dict((k[len(prefix):], v[0]) for + k, v in metadata.items() if k.startswith(prefix)) + if key: + return info.get(key) + return info + + def _load_root_info(self): + """ + Load the root container name and account for the container represented + by this broker. + + The root container path, if set, is stored in sysmeta under the key + ``X-Container-Sysmeta-Shard-Root``. If this sysmeta is not set then the + container is considered to be a root container and ``_root_account`` + and ``_root_container`` are set equal to the broker ``account`` and + ``container`` attributes respectively. + + """ + path = self.get_sharding_sysmeta('Root') + if not path: + # Ensure account/container get populated + self._populate_instance_cache() + self._root_account = self.account + self._root_container = self.container + return + + try: + self._root_account, self._root_container = split_path( + '/' + path, 2, 2) + except ValueError: + raise ValueError("Expected X-Container-Sysmeta-Shard-Root to be " + "of the form 'account/container', got %r" % path) + + @property + def root_account(self): + if not self._root_account: + self._load_root_info() + return self._root_account + + @property + def root_container(self): + if not self._root_container: + self._load_root_info() + return self._root_container + + @property + def root_path(self): + return '%s/%s' % (self.root_account, self.root_container) + + def is_root_container(self): + """ + Returns True if this container is a root container, False otherwise. + + A root container is a container that is not a shard of another + container. + """ + self._populate_instance_cache() + return (self.root_account == self.account and + self.root_container == self.container) + + def _get_next_shard_range_upper(self, shard_size, last_upper=None): + """ + Returns the name of the object that is ``shard_size`` rows beyond + ``last_upper`` in the object table ordered by name. If ``last_upper`` + is not given then it defaults to the start of object table ordered by + name. + + :param last_upper: the upper bound of the last found shard range. + :return: an object name, or None if the number of rows beyond + ``last_upper`` is less than ``shard_size``. + """ + self._commit_puts_stale_ok() + with self.get() as connection: + sql = ('SELECT name FROM object WHERE %s=0 ' % + self._get_deleted_key(connection)) + args = [] + if last_upper: + sql += "AND name > ? " + args.append(str(last_upper)) + sql += "ORDER BY name LIMIT 1 OFFSET %d" % (shard_size - 1) + row = connection.execute(sql, args).fetchone() + return row['name'] if row else None + + def find_shard_ranges(self, shard_size, limit=-1, existing_ranges=None): + """ + Scans the container db for shard ranges. Scanning will start at the + upper bound of the any ``existing_ranges`` that are given, otherwise + at ``ShardRange.MIN``. Scanning will stop when ``limit`` shard ranges + have been found or when no more shard ranges can be found. In the + latter case, the upper bound of the final shard range will be equal to + the upper bound of the container namespace. + + This method does not modify the state of the db; callers are + responsible for persisting any shard range data in the db. + + :param shard_size: the size of each shard range + :param limit: the maximum number of shard points to be found; a + negative value (default) implies no limit. + :param existing_ranges: an optional list of existing ShardRanges; if + given, this list should be sorted in order of upper bounds; the + scan for new shard ranges will start at the upper bound of the last + existing ShardRange. + :return: a tuple; the first value in the tuple is a list of + dicts each having keys {'index', 'lower', 'upper', 'object_count'} + in order of ascending 'upper'; the second value in the tuple is a + boolean which is True if the last shard range has been found, False + otherwise. + """ + existing_ranges = existing_ranges or [] + object_count = self.get_info().get('object_count', 0) + if shard_size >= object_count: + # container not big enough to shard + return [], False + + own_shard_range = self.get_own_shard_range() + progress = 0 + progress_reliable = True + # update initial state to account for any existing shard ranges + if existing_ranges: + if all([sr.state == ShardRange.FOUND + for sr in existing_ranges]): + progress = sum([sr.object_count for sr in existing_ranges]) + else: + # else: object count in existing shard ranges may have changed + # since they were found so progress cannot be reliably + # calculated; use default progress of zero - that's ok, + # progress is used for optimisation not correctness + progress_reliable = False + last_shard_upper = existing_ranges[-1].upper + if last_shard_upper >= own_shard_range.upper: + # == implies all ranges were previously found + # > implies an acceptor range has been set into which this + # shard should cleave itself + return [], True + else: + last_shard_upper = own_shard_range.lower + + found_ranges = [] + sub_broker = self.get_brokers()[0] + index = len(existing_ranges) + while limit < 0 or len(found_ranges) < limit: + if progress + shard_size >= object_count: + # next shard point is at or beyond final object name so don't + # bother with db query + next_shard_upper = None + else: + try: + next_shard_upper = sub_broker._get_next_shard_range_upper( + shard_size, last_shard_upper) + except (sqlite3.OperationalError, LockTimeout): + self.logger.exception( + "Problem finding shard upper in %r: " % self.db_file) + break + + if (next_shard_upper is None or + next_shard_upper > own_shard_range.upper): + # We reached the end of the container namespace, or possibly + # beyond if the container has misplaced objects. In either case + # limit the final shard range to own_shard_range.upper. + next_shard_upper = own_shard_range.upper + if progress_reliable: + # object count may include misplaced objects so the final + # shard size may not be accurate until cleaved, but at + # least the sum of shard sizes will equal the unsharded + # object_count + shard_size = object_count - progress + + # NB shard ranges are created with a non-zero object count so that + # the apparent container object count remains constant, and the + # container is non-deletable while shards have been found but not + # yet cleaved + found_ranges.append( + {'index': index, + 'lower': str(last_shard_upper), + 'upper': str(next_shard_upper), + 'object_count': shard_size}) + + if next_shard_upper == own_shard_range.upper: + return found_ranges, True + + progress += shard_size + last_shard_upper = next_shard_upper + index += 1 + + return found_ranges, False diff -Nru swift-2.17.0/swift/container/replicator.py swift-2.18.0/swift/container/replicator.py --- swift-2.17.0/swift/container/replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/container/replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -26,11 +26,10 @@ get_reconciler_container_name, get_row_to_q_entry_translator) from swift.common import db_replicator from swift.common.storage_policy import POLICIES +from swift.common.swob import HTTPOk, HTTPAccepted from swift.common.exceptions import DeviceUnavailable from swift.common.http import is_success -from swift.common.db import DatabaseAlreadyExists -from swift.common.utils import (Timestamp, hash_path, - storage_directory, majority_size) +from swift.common.utils import Timestamp, majority_size, get_db_files class ContainerReplicator(db_replicator.Replicator): @@ -39,6 +38,10 @@ datadir = DATADIR default_port = 6201 + def __init__(self, conf, logger=None): + super(ContainerReplicator, self).__init__(conf, logger=logger) + self.reconciler_cleanups = self.sync_store = None + def report_up_to_date(self, full_info): reported_key_map = { 'reported_put_timestamp': 'put_timestamp', @@ -61,8 +64,7 @@ return sync_args def _handle_sync_response(self, node, response, info, broker, http, - different_region): - parent = super(ContainerReplicator, self) + different_region=False): if is_success(response.status): remote_info = json.loads(response.data) if incorrect_policy_index(info, remote_info): @@ -75,9 +77,50 @@ if any(info[key] != remote_info[key] for key in sync_timestamps): broker.merge_timestamps(*(remote_info[key] for key in sync_timestamps)) - rv = parent._handle_sync_response( + + # Grab remote's shard ranges, too + self._fetch_and_merge_shard_ranges(http, broker) + + return super(ContainerReplicator, self)._handle_sync_response( node, response, info, broker, http, different_region) - return rv + + def _sync_shard_ranges(self, broker, http, local_id): + # TODO: currently the number of shard ranges is expected to be _much_ + # less than normal objects so all are sync'd on each cycle. However, in + # future there should be sync points maintained much like for object + # syncing so that only new shard range rows are sync'd. + shard_range_data = broker.get_all_shard_range_data() + if shard_range_data: + if not self._send_replicate_request( + http, 'merge_shard_ranges', shard_range_data, local_id): + return False + self.logger.debug('%s synced %s shard ranges to %s', + broker.db_file, len(shard_range_data), + '%(ip)s:%(port)s/%(device)s' % http.node) + return True + + def _choose_replication_mode(self, node, rinfo, info, local_sync, broker, + http, different_region): + # Always replicate shard ranges + shard_range_success = self._sync_shard_ranges(broker, http, info['id']) + if broker.sharding_initiated(): + self.logger.warning( + '%s is able to shard -- refusing to replicate objects to peer ' + '%s; have shard ranges and will wait for cleaving', + broker.db_file, + '%(ip)s:%(port)s/%(device)s' % node) + self.stats['deferred'] += 1 + return shard_range_success + + success = super(ContainerReplicator, self)._choose_replication_mode( + node, rinfo, info, local_sync, broker, http, + different_region) + return shard_range_success and success + + def _fetch_and_merge_shard_ranges(self, http, broker): + response = http.replicate('get_shard_ranges') + if is_success(response.status): + broker.merge_shard_ranges(json.loads(response.data)) def find_local_handoff_for_part(self, part): """ @@ -114,15 +157,10 @@ raise DeviceUnavailable( 'No mounted devices found suitable to Handoff reconciler ' 'container %s in partition %s' % (container, part)) - hsh = hash_path(account, container) - db_dir = storage_directory(DATADIR, part, hsh) - db_path = os.path.join(self.root, node['device'], db_dir, hsh + '.db') - broker = ContainerBroker(db_path, account=account, container=container) - if not os.path.exists(broker.db_file): - try: - broker.initialize(timestamp, 0) - except DatabaseAlreadyExists: - pass + broker = ContainerBroker.create_broker( + os.path.join(self.root, node['device']), part, account, container, + logger=self.logger, put_timestamp=timestamp, + storage_policy_index=0) if self.reconciler_containers is not None: self.reconciler_containers[container] = part, broker, node['id'] return broker @@ -207,6 +245,18 @@ # replication broker.update_reconciler_sync(max_sync) + def cleanup_post_replicate(self, broker, orig_info, responses): + if broker.sharding_required(): + # despite being a handoff, since we're sharding we're not going to + # do any cleanup so we can continue cleaving - this is still + # considered "success" + self.logger.debug( + 'Not deleting db %s (requires sharding, state %s)', + broker.db_file, broker.get_db_state()) + return True + return super(ContainerReplicator, self).cleanup_post_replicate( + broker, orig_info, responses) + def delete_db(self, broker): """ Ensure that reconciler databases are only cleaned up at the end of the @@ -217,12 +267,13 @@ # this container shouldn't be here, make sure it's cleaned up self.reconciler_cleanups[broker.container] = broker return - try: - # DB is going to get deleted. Be preemptive about it - self.sync_store.remove_synced_container(broker) - except Exception: - self.logger.exception('Failed to remove sync_store entry %s' % - broker.db_file) + if self.sync_store: + try: + # DB is going to get deleted. Be preemptive about it + self.sync_store.remove_synced_container(broker) + except Exception: + self.logger.exception('Failed to remove sync_store entry %s' % + broker.db_file) return super(ContainerReplicator, self).delete_db(broker) @@ -259,9 +310,20 @@ self.replicate_reconcilers() return rv + def _in_sync(self, rinfo, info, broker, local_sync): + # TODO: don't always sync shard ranges! + if broker.get_shard_ranges(include_own=True, include_deleted=True): + return False + + return super(ContainerReplicator, self)._in_sync( + rinfo, info, broker, local_sync) + class ContainerReplicatorRpc(db_replicator.ReplicatorRpc): + def _db_file_exists(self, db_path): + return bool(get_db_files(db_path)) + def _parse_sync_args(self, args): parent = super(ContainerReplicatorRpc, self) remote_info = parent._parse_sync_args(args) @@ -289,3 +351,27 @@ timestamp=status_changed_at) info = broker.get_replication_info() return info + + def _abort_rsync_then_merge(self, db_file, old_filename): + if super(ContainerReplicatorRpc, self)._abort_rsync_then_merge( + db_file, old_filename): + return True + # if the local db has started sharding since the original 'sync' + # request then abort object replication now; instantiate a fresh broker + # each time this check if performed so to get latest state + broker = ContainerBroker(db_file) + return broker.sharding_initiated() + + def _post_rsync_then_merge_hook(self, existing_broker, new_broker): + # Note the following hook will need to change to using a pointer and + # limit in the future. + new_broker.merge_shard_ranges( + existing_broker.get_all_shard_range_data()) + + def merge_shard_ranges(self, broker, args): + broker.merge_shard_ranges(args[0]) + return HTTPAccepted() + + def get_shard_ranges(self, broker, args): + return HTTPOk(headers={'Content-Type': 'application/json'}, + body=json.dumps(broker.get_all_shard_range_data())) diff -Nru swift-2.17.0/swift/container/server.py swift-2.18.0/swift/container/server.py --- swift-2.17.0/swift/container/server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/container/server.py 2018-05-30 10:17:02.000000000 +0000 @@ -24,7 +24,8 @@ import swift.common.db from swift.container.sync_store import ContainerSyncStore -from swift.container.backend import ContainerBroker, DATADIR +from swift.container.backend import ContainerBroker, DATADIR, \ + RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, SHARD_UPDATE_STATES from swift.container.replicator import ContainerReplicatorRpc from swift.common.db import DatabaseAlreadyExists from swift.common.container_sync_realms import ContainerSyncRealms @@ -33,12 +34,13 @@ from swift.common.utils import get_logger, hash_path, public, \ Timestamp, storage_directory, validate_sync_to, \ config_true_value, timing_stats, replication, \ - override_bytes_from_content_type, get_log_line + override_bytes_from_content_type, get_log_line, ShardRange, list_from_csv + from swift.common.constraints import valid_timestamp, check_utf8, check_drive from swift.common import constraints from swift.common.bufferedhttp import http_connect from swift.common.exceptions import ConnectionTimeout -from swift.common.http import HTTP_NOT_FOUND, is_success +from swift.common.http import HTTP_NO_CONTENT, HTTP_NOT_FOUND, is_success from swift.common.middleware import listing_formats from swift.common.storage_policy import POLICIES from swift.common.base_storage_server import BaseStorageServer @@ -46,7 +48,7 @@ from swift.common.swob import HTTPAccepted, HTTPBadRequest, HTTPConflict, \ HTTPCreated, HTTPInternalServerError, HTTPNoContent, HTTPNotFound, \ HTTPPreconditionFailed, HTTPMethodNotAllowed, Request, Response, \ - HTTPInsufficientStorage, HTTPException + HTTPInsufficientStorage, HTTPException, HTTPMovedPermanently def gen_resp_headers(info, is_deleted=False): @@ -72,6 +74,7 @@ 'X-Timestamp': Timestamp(info.get('created_at', 0)).normal, 'X-PUT-Timestamp': Timestamp( info.get('put_timestamp', 0)).normal, + 'X-Backend-Sharding-State': info.get('db_state', UNSHARDED), }) return headers @@ -261,6 +264,40 @@ self.logger.exception('Failed to update sync_store %s during %s' % (broker.db_file, method)) + def _redirect_to_shard(self, req, broker, obj_name): + """ + If the request indicates that it can accept a redirection, look for a + shard range that contains ``obj_name`` and if one exists return a + HTTPMovedPermanently response. + + :param req: an instance of :class:`~swift.common.swob.Request` + :param broker: a container broker + :param obj_name: an object name + :return: an instance of :class:`swift.common.swob.HTTPMovedPermanently` + if a shard range exists for the given ``obj_name``, otherwise None. + """ + if not config_true_value( + req.headers.get('x-backend-accept-redirect', False)): + return None + + shard_ranges = broker.get_shard_ranges( + includes=obj_name, states=SHARD_UPDATE_STATES) + if not shard_ranges: + return None + + # note: obj_name may be included in both a created sub-shard and its + # sharding parent. get_shard_ranges will return the created sub-shard + # in preference to the parent, which is the desired result. + containing_range = shard_ranges[0] + location = "/%s/%s" % (containing_range.name, obj_name) + headers = {'Location': location, + 'X-Backend-Redirect-Timestamp': + containing_range.timestamp.internal} + + # we do not want the host added to the location + req.environ['swift.leave_relative_location'] = True + return HTTPMovedPermanently(headers=headers, request=req) + @public @timing_stats() def DELETE(self, req): @@ -283,6 +320,11 @@ if not os.path.exists(broker.db_file): return HTTPNotFound() if obj: # delete object + # redirect if a shard range exists for the object name + redirect = self._redirect_to_shard(req, broker, obj) + if redirect: + return redirect + broker.delete_object(obj, req.headers.get('x-timestamp'), obj_policy_index) return HTTPNoContent(request=req) @@ -343,6 +385,40 @@ broker.update_status_changed_at(timestamp) return recreated + def _maybe_autocreate(self, broker, req_timestamp, account, + policy_index): + created = False + if account.startswith(self.auto_create_account_prefix) and \ + not os.path.exists(broker.db_file): + if policy_index is None: + raise HTTPBadRequest( + 'X-Backend-Storage-Policy-Index header is required') + try: + broker.initialize(req_timestamp.internal, policy_index) + except DatabaseAlreadyExists: + pass + else: + created = True + if not os.path.exists(broker.db_file): + raise HTTPNotFound() + return created + + def _update_metadata(self, req, broker, req_timestamp, method): + metadata = {} + metadata.update( + (key, (value, req_timestamp.internal)) + for key, value in req.headers.items() + if key.lower() in self.save_headers or + is_sys_or_user_meta('container', key)) + if metadata: + if 'X-Container-Sync-To' in metadata: + if 'X-Container-Sync-To' not in broker.metadata or \ + metadata['X-Container-Sync-To'][0] != \ + broker.metadata['X-Container-Sync-To'][0]: + broker.set_x_container_sync_points(-1, -1) + broker.update_metadata(metadata, validate_metadata=True) + self._update_sync_store(broker, method) + @public @timing_stats() def PUT(self, req): @@ -364,14 +440,13 @@ # obj put expects the policy_index header, default is for # legacy support during upgrade. obj_policy_index = requested_policy_index or 0 - if account.startswith(self.auto_create_account_prefix) and \ - not os.path.exists(broker.db_file): - try: - broker.initialize(req_timestamp.internal, obj_policy_index) - except DatabaseAlreadyExists: - pass - if not os.path.exists(broker.db_file): - return HTTPNotFound() + self._maybe_autocreate(broker, req_timestamp, account, + obj_policy_index) + # redirect if a shard exists for this object name + response = self._redirect_to_shard(req, broker, obj) + if response: + return response + broker.put_object(obj, req_timestamp.internal, int(req.headers['x-size']), req.headers['x-content-type'], @@ -380,6 +455,22 @@ req.headers.get('x-content-type-timestamp'), req.headers.get('x-meta-timestamp')) return HTTPCreated(request=req) + + record_type = req.headers.get('x-backend-record-type', '').lower() + if record_type == RECORD_TYPE_SHARD: + try: + # validate incoming data... + shard_ranges = [ShardRange.from_dict(sr) + for sr in json.loads(req.body)] + except (ValueError, KeyError, TypeError) as err: + return HTTPBadRequest('Invalid body: %r' % err) + created = self._maybe_autocreate(broker, req_timestamp, account, + requested_policy_index) + self._update_metadata(req, broker, req_timestamp, 'PUT') + if shard_ranges: + # TODO: consider writing the shard ranges into the pending + # file, but if so ensure an all-or-none semantic for the write + broker.merge_shard_ranges(shard_ranges) else: # put container if requested_policy_index is None: # use the default index sent by the proxy if available @@ -391,31 +482,18 @@ req_timestamp.internal, new_container_policy, requested_policy_index) - metadata = {} - metadata.update( - (key, (value, req_timestamp.internal)) - for key, value in req.headers.items() - if key.lower() in self.save_headers or - is_sys_or_user_meta('container', key)) - if 'X-Container-Sync-To' in metadata: - if 'X-Container-Sync-To' not in broker.metadata or \ - metadata['X-Container-Sync-To'][0] != \ - broker.metadata['X-Container-Sync-To'][0]: - broker.set_x_container_sync_points(-1, -1) - broker.update_metadata(metadata, validate_metadata=True) - if metadata: - self._update_sync_store(broker, 'PUT') + self._update_metadata(req, broker, req_timestamp, 'PUT') resp = self.account_update(req, account, container, broker) if resp: return resp - if created: - return HTTPCreated(request=req, - headers={'x-backend-storage-policy-index': - broker.storage_policy_index}) - else: - return HTTPAccepted(request=req, - headers={'x-backend-storage-policy-index': - broker.storage_policy_index}) + if created: + return HTTPCreated(request=req, + headers={'x-backend-storage-policy-index': + broker.storage_policy_index}) + else: + return HTTPAccepted(request=req, + headers={'x-backend-storage-policy-index': + broker.storage_policy_index}) @public @timing_stats(sample_rate=0.1) @@ -454,13 +532,18 @@ :params record: object entry record :returns: modified record """ - (name, created, size, content_type, etag) = record[:5] - if content_type is None: - return {'subdir': name.decode('utf8')} - response = {'bytes': size, 'hash': etag, 'name': name.decode('utf8'), - 'content_type': content_type} + if isinstance(record, ShardRange): + created = record.timestamp + response = dict(record) + else: + (name, created, size, content_type, etag) = record[:5] + if content_type is None: + return {'subdir': name.decode('utf8')} + response = { + 'bytes': size, 'hash': etag, 'name': name.decode('utf8'), + 'content_type': content_type} + override_bytes_from_content_type(response, logger=self.logger) response['last_modified'] = Timestamp(created).isoformat - override_bytes_from_content_type(response, logger=self.logger) return response @public @@ -494,12 +577,45 @@ pending_timeout=0.1, stale_reads_ok=True) info, is_deleted = broker.get_info_is_deleted() - resp_headers = gen_resp_headers(info, is_deleted=is_deleted) - if is_deleted: - return HTTPNotFound(request=req, headers=resp_headers) - container_list = broker.list_objects_iter( - limit, marker, end_marker, prefix, delimiter, path, - storage_policy_index=info['storage_policy_index'], reverse=reverse) + record_type = req.headers.get('x-backend-record-type', '').lower() + if record_type == 'auto' and info.get('db_state') in (SHARDING, + SHARDED): + record_type = 'shard' + if record_type == 'shard': + override_deleted = info and config_true_value( + req.headers.get('x-backend-override-deleted', False)) + resp_headers = gen_resp_headers( + info, is_deleted=is_deleted and not override_deleted) + if is_deleted and not override_deleted: + return HTTPNotFound(request=req, headers=resp_headers) + resp_headers['X-Backend-Record-Type'] = 'shard' + includes = get_param(req, 'includes') + states = get_param(req, 'states') + fill_gaps = False + if states: + states = list_from_csv(states) + fill_gaps = any(('listing' in states, 'updating' in states)) + try: + states = broker.resolve_shard_range_states(states) + except ValueError: + return HTTPBadRequest(request=req, body='Bad state') + include_deleted = config_true_value( + req.headers.get('x-backend-include-deleted', False)) + container_list = broker.get_shard_ranges( + marker, end_marker, includes, reverse, states=states, + include_deleted=include_deleted, fill_gaps=fill_gaps) + else: + resp_headers = gen_resp_headers(info, is_deleted=is_deleted) + if is_deleted: + return HTTPNotFound(request=req, headers=resp_headers) + resp_headers['X-Backend-Record-Type'] = 'object' + # Use the retired db while container is in process of sharding, + # otherwise use current db + src_broker = broker.get_brokers()[0] + container_list = src_broker.list_objects_iter( + limit, marker, end_marker, prefix, delimiter, path, + storage_policy_index=info['storage_policy_index'], + reverse=reverse) return self.create_listing(req, out_content_type, info, resp_headers, broker.metadata, container_list, container) @@ -522,7 +638,7 @@ content_type=out_content_type, charset='utf-8') ret.last_modified = math.ceil(float(resp_headers['X-PUT-Timestamp'])) if not ret.body: - ret.status_int = 204 + ret.status_int = HTTP_NO_CONTENT return ret @public @@ -562,20 +678,7 @@ if broker.is_deleted(): return HTTPNotFound(request=req) broker.update_put_timestamp(req_timestamp.internal) - metadata = {} - metadata.update( - (key, (value, req_timestamp.internal)) - for key, value in req.headers.items() - if key.lower() in self.save_headers or - is_sys_or_user_meta('container', key)) - if metadata: - if 'X-Container-Sync-To' in metadata: - if 'X-Container-Sync-To' not in broker.metadata or \ - metadata['X-Container-Sync-To'][0] != \ - broker.metadata['X-Container-Sync-To'][0]: - broker.set_x_container_sync_points(-1, -1) - broker.update_metadata(metadata, validate_metadata=True) - self._update_sync_store(broker, 'POST') + self._update_metadata(req, broker, req_timestamp, 'POST') return HTTPNoContent(request=req) def __call__(self, env, start_response): diff -Nru swift-2.17.0/swift/container/sharder.py swift-2.18.0/swift/container/sharder.py --- swift-2.17.0/swift/container/sharder.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/swift/container/sharder.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1568 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import json +import time +from collections import defaultdict +from random import random + +import os +import six +from eventlet import Timeout + +from swift.common import internal_client, db_replicator +from swift.common.constraints import check_drive +from swift.common.direct_client import (direct_put_container, + DirectClientException) +from swift.common.exceptions import DeviceUnavailable +from swift.common.ring.utils import is_local_device +from swift.common.utils import get_logger, config_true_value, \ + dump_recon_cache, whataremyips, Timestamp, ShardRange, GreenAsyncPile, \ + config_float_value, config_positive_int_value, \ + quorum_size, parse_override_options, Everything, config_auto_int_value +from swift.container.backend import ContainerBroker, \ + RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \ + SHARD_UPDATE_STATES +from swift.container.replicator import ContainerReplicator + + +def sharding_enabled(broker): + # NB all shards will by default have been created with + # X-Container-Sysmeta-Sharding set and will therefore be candidates for + # sharding, along with explicitly configured root containers. + sharding = broker.metadata.get('X-Container-Sysmeta-Sharding') + if sharding and config_true_value(sharding[0]): + return True + # if broker has been marked deleted it will have lost sysmeta, but we still + # need to process the broker (for example, to shrink any shard ranges) so + # fallback to checking if it has any shard ranges + if broker.get_shard_ranges(): + return True + return False + + +def make_shard_ranges(broker, shard_data, shards_account_prefix): + timestamp = Timestamp.now() + shard_ranges = [] + for data in shard_data: + # Make a copy so we don't mutate the original + kwargs = data.copy() + path = ShardRange.make_path( + shards_account_prefix + broker.root_account, + broker.root_container, broker.container, + timestamp, kwargs.pop('index')) + + shard_ranges.append(ShardRange(path, timestamp, **kwargs)) + return shard_ranges + + +def find_missing_ranges(shard_ranges): + """ + Find any ranges in the entire object namespace that are not covered by any + shard range in the given list. + + :param shard_ranges: A list of :class:`~swift.utils.ShardRange` + :return: a list of missing ranges + """ + gaps = [] + if not shard_ranges: + return ((ShardRange.MIN, ShardRange.MAX),) + if shard_ranges[0].lower > ShardRange.MIN: + gaps.append((ShardRange.MIN, shard_ranges[0].lower)) + for first, second in zip(shard_ranges, shard_ranges[1:]): + if first.upper < second.lower: + gaps.append((first.upper, second.lower)) + if shard_ranges[-1].upper < ShardRange.MAX: + gaps.append((shard_ranges[-1].upper, ShardRange.MAX)) + return gaps + + +def find_overlapping_ranges(shard_ranges): + """ + Find all pairs of overlapping ranges in the given list. + + :param shard_ranges: A list of :class:`~swift.utils.ShardRange` + :return: a set of tuples, each tuple containing ranges that overlap with + each other. + """ + result = set() + for shard_range in shard_ranges: + overlapping = [sr for sr in shard_ranges + if shard_range != sr and shard_range.overlaps(sr)] + if overlapping: + overlapping.append(shard_range) + overlapping.sort() + result.add(tuple(overlapping)) + + return result + + +def is_sharding_candidate(shard_range, threshold): + return (shard_range.state == ShardRange.ACTIVE and + shard_range.object_count >= threshold) + + +def find_sharding_candidates(broker, threshold, shard_ranges=None): + # this should only execute on root containers; the goal is to find + # large shard containers that should be sharded. + # First cut is simple: assume root container shard usage stats are good + # enough to make decision. + # TODO: object counts may well not be the appropriate metric for + # deciding to shrink because a shard with low object_count may have a + # large number of deleted object rows that will need to be merged with + # a neighbour. We may need to expose row count as well as object count. + if shard_ranges is None: + shard_ranges = broker.get_shard_ranges(states=[ShardRange.ACTIVE]) + candidates = [] + for shard_range in shard_ranges: + if not is_sharding_candidate(shard_range, threshold): + continue + shard_range.update_state(ShardRange.SHARDING, + state_timestamp=Timestamp.now()) + shard_range.epoch = shard_range.state_timestamp + candidates.append(shard_range) + return candidates + + +def find_shrinking_candidates(broker, shrink_threshold, merge_size): + # this should only execute on root containers that have sharded; the + # goal is to find small shard containers that could be retired by + # merging with a neighbour. + # First cut is simple: assume root container shard usage stats are good + # enough to make decision; only merge with upper neighbour so that + # upper bounds never change (shard names include upper bound). + # TODO: object counts may well not be the appropriate metric for + # deciding to shrink because a shard with low object_count may have a + # large number of deleted object rows that will need to be merged with + # a neighbour. We may need to expose row count as well as object count. + shard_ranges = broker.get_shard_ranges() + own_shard_range = broker.get_own_shard_range() + if len(shard_ranges) == 1: + # special case to enable final shard to shrink into root + shard_ranges.append(own_shard_range) + + merge_pairs = {} + for donor, acceptor in zip(shard_ranges, shard_ranges[1:]): + if donor in merge_pairs: + # this range may already have been made an acceptor; if so then + # move on. In principle it might be that even after expansion + # this range and its donor(s) could all be merged with the next + # range. In practice it is much easier to reason about a single + # donor merging into a single acceptor. Don't fret - eventually + # all the small ranges will be retired. + continue + if (acceptor.name != own_shard_range.name and + acceptor.state != ShardRange.ACTIVE): + # don't shrink into a range that is not yet ACTIVE + continue + if donor.state not in (ShardRange.ACTIVE, ShardRange.SHRINKING): + # found? created? sharded? don't touch it + continue + + proposed_object_count = donor.object_count + acceptor.object_count + if (donor.state == ShardRange.SHRINKING or + (donor.object_count < shrink_threshold and + proposed_object_count < merge_size)): + # include previously identified merge pairs on presumption that + # following shrink procedure is idempotent + merge_pairs[acceptor] = donor + if donor.update_state(ShardRange.SHRINKING): + # Set donor state to shrinking so that next cycle won't use + # it as an acceptor; state_timestamp defines new epoch for + # donor and new timestamp for the expanded acceptor below. + donor.epoch = donor.state_timestamp = Timestamp.now() + if acceptor.lower != donor.lower: + # Update the acceptor container with its expanding state to + # prevent it treating objects cleaved from the donor + # as misplaced. + acceptor.lower = donor.lower + acceptor.timestamp = donor.state_timestamp + return merge_pairs + + +class CleavingContext(object): + def __init__(self, ref, cursor='', max_row=None, cleave_to_row=None, + last_cleave_to_row=None, cleaving_done=False, + misplaced_done=False, ranges_done=0, ranges_todo=0): + self.ref = ref + self._cursor = None + self.cursor = cursor + self.max_row = max_row + self.cleave_to_row = cleave_to_row + self.last_cleave_to_row = last_cleave_to_row + self.cleaving_done = cleaving_done + self.misplaced_done = misplaced_done + self.ranges_done = ranges_done + self.ranges_todo = ranges_todo + + def __iter__(self): + yield 'ref', self.ref + yield 'cursor', self.cursor + yield 'max_row', self.max_row + yield 'cleave_to_row', self.cleave_to_row + yield 'last_cleave_to_row', self.last_cleave_to_row + yield 'cleaving_done', self.cleaving_done + yield 'misplaced_done', self.misplaced_done + yield 'ranges_done', self.ranges_done + yield 'ranges_todo', self.ranges_todo + + def _encode(cls, value): + if value is not None and six.PY2 and isinstance(value, six.text_type): + return value.encode('utf-8') + return value + + @property + def cursor(self): + return self._cursor + + @cursor.setter + def cursor(self, value): + self._cursor = self._encode(value) + + @property + def marker(self): + return self.cursor + '\x00' + + @classmethod + def _make_ref(cls, broker): + return broker.get_info()['id'] + + @classmethod + def load(cls, broker): + """ + Returns a context dict for tracking the progress of cleaving this + broker's retiring DB. The context is persisted in sysmeta using a key + that is based off the retiring db id and max row. This form of + key ensures that a cleaving context is only loaded for a db that + matches the id and max row when the context was created; if a db is + modified such that its max row changes then a different context, or no + context, will be loaded. + + :return: A dict to which cleave progress metadata may be added. The + dict initially has a key ``ref`` which should not be modified by + any caller. + """ + brokers = broker.get_brokers() + ref = cls._make_ref(brokers[0]) + data = brokers[-1].get_sharding_sysmeta('Context-' + ref) + data = json.loads(data) if data else {} + data['ref'] = ref + data['max_row'] = brokers[0].get_max_row() + return cls(**data) + + def store(self, broker): + broker.set_sharding_sysmeta('Context-' + self.ref, + json.dumps(dict(self))) + + def reset(self): + self.cursor = '' + self.ranges_done = 0 + self.ranges_todo = 0 + self.cleaving_done = False + self.misplaced_done = False + self.last_cleave_to_row = self.cleave_to_row + + def start(self): + self.cursor = '' + self.ranges_done = 0 + self.ranges_todo = 0 + self.cleaving_done = False + self.cleave_to_row = self.max_row + + def done(self): + return all((self.misplaced_done, self.cleaving_done, + self.max_row == self.cleave_to_row)) + + +DEFAULT_SHARD_CONTAINER_THRESHOLD = 1000000 +DEFAULT_SHARD_SHRINK_POINT = 25 +DEFAULT_SHARD_MERGE_POINT = 75 + + +class ContainerSharder(ContainerReplicator): + """Shards containers.""" + + def __init__(self, conf, logger=None): + logger = logger or get_logger(conf, log_route='container-sharder') + super(ContainerSharder, self).__init__(conf, logger=logger) + self.shards_account_prefix = ( + (conf.get('auto_create_account_prefix') or '.') + 'shards_') + + def percent_value(key, default): + try: + value = conf.get(key, default) + return config_float_value(value, 0, 100) / 100.0 + except ValueError as err: + raise ValueError("%s: %s" % (str(err), key)) + + self.shard_shrink_point = percent_value('shard_shrink_point', + DEFAULT_SHARD_SHRINK_POINT) + self.shrink_merge_point = percent_value('shard_shrink_merge_point', + DEFAULT_SHARD_MERGE_POINT) + self.shard_container_threshold = config_positive_int_value( + conf.get('shard_container_threshold', + DEFAULT_SHARD_CONTAINER_THRESHOLD)) + self.shrink_size = (self.shard_container_threshold * + self.shard_shrink_point) + self.merge_size = (self.shard_container_threshold * + self.shrink_merge_point) + self.split_size = self.shard_container_threshold // 2 + self.scanner_batch_size = config_positive_int_value( + conf.get('shard_scanner_batch_size', 10)) + self.cleave_batch_size = config_positive_int_value( + conf.get('cleave_batch_size', 2)) + self.cleave_row_batch_size = config_positive_int_value( + conf.get('cleave_row_batch_size', 10000)) + self.auto_shard = config_true_value(conf.get('auto_shard', False)) + self.sharding_candidates = [] + self.recon_candidates_limit = int( + conf.get('recon_candidates_limit', 5)) + self.broker_timeout = config_positive_int_value( + conf.get('broker_timeout', 60)) + replica_count = self.ring.replica_count + quorum = quorum_size(replica_count) + self.shard_replication_quorum = config_auto_int_value( + conf.get('shard_replication_quorum'), quorum) + if self.shard_replication_quorum > replica_count: + self.logger.warning( + 'shard_replication_quorum of %s exceeds replica count %s' + ', reducing to %s', self.shard_replication_quorum, + replica_count, replica_count) + self.shard_replication_quorum = replica_count + self.existing_shard_replication_quorum = config_auto_int_value( + conf.get('existing_shard_replication_quorum'), + self.shard_replication_quorum) + if self.existing_shard_replication_quorum > replica_count: + self.logger.warning( + 'existing_shard_replication_quorum of %s exceeds replica count' + ' %s, reducing to %s', self.existing_shard_replication_quorum, + replica_count, replica_count) + self.existing_shard_replication_quorum = replica_count + + # internal client + self.conn_timeout = float(conf.get('conn_timeout', 5)) + request_tries = config_positive_int_value( + conf.get('request_tries', 3)) + internal_client_conf_path = conf.get('internal_client_conf_path', + '/etc/swift/internal-client.conf') + try: + self.int_client = internal_client.InternalClient( + internal_client_conf_path, + 'Swift Container Sharder', + request_tries, + allow_modify_pipeline=False) + except IOError as err: + if err.errno != errno.ENOENT: + raise + raise SystemExit( + 'Unable to load internal client from config: %r (%s)' % + (internal_client_conf_path, err)) + self.reported = 0 + + def _zero_stats(self): + """Zero out the stats.""" + super(ContainerSharder, self)._zero_stats() + # all sharding stats that are additional to the inherited replicator + # stats are maintained under the 'sharding' key in self.stats + self.stats['sharding'] = defaultdict(lambda: defaultdict(int)) + self.sharding_candidates = [] + + def _append_stat(self, category, key, value): + if not self.stats['sharding'][category][key]: + self.stats['sharding'][category][key] = list() + self.stats['sharding'][category][key].append(value) + + def _min_stat(self, category, key, value): + current = self.stats['sharding'][category][key] + if not current: + self.stats['sharding'][category][key] = value + else: + self.stats['sharding'][category][key] = min(current, value) + + def _max_stat(self, category, key, value): + current = self.stats['sharding'][category][key] + if not current: + self.stats['sharding'][category][key] = value + else: + self.stats['sharding'][category][key] = max(current, value) + + def _increment_stat(self, category, key, step=1, statsd=False): + self.stats['sharding'][category][key] += step + if statsd: + statsd_key = '%s_%s' % (category, key) + self.logger.increment(statsd_key) + + def _make_stats_info(self, broker, node, own_shard_range): + try: + file_size = os.stat(broker.db_file).st_size + except OSError: + file_size = None + + return {'path': broker.db_file, + 'node_index': node.get('index'), + 'account': broker.account, + 'container': broker.container, + 'root': broker.root_path, + 'object_count': own_shard_range.object_count, + 'meta_timestamp': own_shard_range.meta_timestamp.internal, + 'file_size': file_size} + + def _identify_sharding_candidate(self, broker, node): + own_shard_range = broker.get_own_shard_range() + if is_sharding_candidate( + own_shard_range, self.shard_container_threshold): + self.sharding_candidates.append( + self._make_stats_info(broker, node, own_shard_range)) + + def _transform_sharding_candidate_stats(self): + category = self.stats['sharding']['sharding_candidates'] + candidates = self.sharding_candidates + category['found'] = len(candidates) + candidates.sort(key=lambda c: c['object_count'], reverse=True) + if self.recon_candidates_limit >= 0: + category['top'] = candidates[:self.recon_candidates_limit] + else: + category['top'] = candidates + + def _record_sharding_progress(self, broker, node, error): + own_shard_range = broker.get_own_shard_range() + if (broker.get_db_state() in (UNSHARDED, SHARDING) and + own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHARDED)): + info = self._make_stats_info(broker, node, own_shard_range) + info['state'] = own_shard_range.state_text + info['db_state'] = broker.get_db_state() + states = [ShardRange.FOUND, ShardRange.CREATED, + ShardRange.CLEAVED, ShardRange.ACTIVE] + shard_ranges = broker.get_shard_ranges(states=states) + state_count = {} + for state in states: + state_count[ShardRange.STATES[state]] = 0 + for shard_range in shard_ranges: + state_count[shard_range.state_text] += 1 + info.update(state_count) + info['error'] = error and str(error) + self._append_stat('sharding_in_progress', 'all', info) + + def _report_stats(self): + # report accumulated stats since start of one sharder cycle + default_stats = ('attempted', 'success', 'failure') + category_keys = ( + ('visited', default_stats + ('skipped', 'completed')), + ('scanned', default_stats + ('found', 'min_time', 'max_time')), + ('created', default_stats), + ('cleaved', default_stats + ('min_time', 'max_time',)), + ('misplaced', default_stats + ('found', 'placed', 'unplaced')), + ('audit_root', default_stats), + ('audit_shard', default_stats), + ) + + now = time.time() + last_report = time.ctime(self.stats['start']) + elapsed = now - self.stats['start'] + sharding_stats = self.stats['sharding'] + for category, keys in category_keys: + stats = sharding_stats[category] + msg = ' '.join(['%s:%s' % (k, str(stats[k])) for k in keys]) + self.logger.info('Since %s %s - %s', last_report, category, msg) + + self._transform_sharding_candidate_stats() + + dump_recon_cache( + {'sharding_stats': self.stats, + 'sharding_time': elapsed, + 'sharding_last': now}, + self.rcache, self.logger) + self.reported = now + + def _periodic_report_stats(self): + if (time.time() - self.reported) >= 3600: # once an hour + self._report_stats() + + def _check_node(self, node): + if not node: + return False + if not is_local_device(self.ips, self.port, + node['replication_ip'], + node['replication_port']): + return False + if not check_drive(self.root, node['device'], + self.mount_check): + self.logger.warning( + 'Skipping %(device)s as it is not mounted' % node) + return False + return True + + def _fetch_shard_ranges(self, broker, newest=False, params=None, + include_deleted=False): + path = self.int_client.make_path(broker.root_account, + broker.root_container) + params = params or {} + params.setdefault('format', 'json') + headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true', + 'X-Backend-Include-Deleted': str(include_deleted)} + if newest: + headers['X-Newest'] = 'true' + try: + try: + resp = self.int_client.make_request( + 'GET', path, headers, acceptable_statuses=(2,), + params=params) + except internal_client.UnexpectedResponse as err: + self.logger.warning("Failed to get shard ranges from %s: %s", + broker.root_path, err) + return None + record_type = resp.headers.get('x-backend-record-type') + if record_type != 'shard': + err = 'unexpected record type %r' % record_type + self.logger.error("Failed to get shard ranges from %s: %s", + broker.root_path, err) + return None + + try: + data = json.loads(resp.body) + if not isinstance(data, list): + raise ValueError('not a list') + return [ShardRange.from_dict(shard_range) + for shard_range in data] + except (ValueError, TypeError, KeyError) as err: + self.logger.error( + "Failed to get shard ranges from %s: invalid data: %r", + broker.root_path, err) + return None + finally: + self.logger.txn_id = None + + def _put_container(self, node, part, account, container, headers, body): + try: + direct_put_container(node, part, account, container, + conn_timeout=self.conn_timeout, + response_timeout=self.node_timeout, + headers=headers, contents=body) + except DirectClientException as err: + self.logger.warning( + 'Failed to put shard ranges to %s:%s/%s: %s', + node['ip'], node['port'], node['device'], err.http_status) + except (Exception, Timeout) as err: + self.logger.exception( + 'Failed to put shard ranges to %s:%s/%s: %s', + node['ip'], node['port'], node['device'], err) + else: + return True + return False + + def _send_shard_ranges(self, account, container, shard_ranges, + headers=None): + body = json.dumps([dict(sr) for sr in shard_ranges]) + part, nodes = self.ring.get_nodes(account, container) + headers = headers or {} + headers.update({'X-Backend-Record-Type': RECORD_TYPE_SHARD, + 'User-Agent': 'container-sharder %s' % os.getpid(), + 'X-Timestamp': Timestamp.now().normal, + 'Content-Length': len(body), + 'Content-Type': 'application/json'}) + + pool = GreenAsyncPile(len(nodes)) + for node in nodes: + pool.spawn(self._put_container, node, part, account, + container, headers, body) + + results = pool.waitall(None) + return results.count(True) >= quorum_size(self.ring.replica_count) + + def _get_shard_broker(self, shard_range, root_path, policy_index): + """ + Get a broker for a container db for the given shard range. If one of + the shard container's primary nodes is a local device then that will be + chosen for the db, otherwise the first of the shard container's handoff + nodes that is local will be chosen. + + :param shard_range: a :class:`~swift.common.utils.ShardRange` + :param root_path: the path of the shard's root container + :param policy_index: the storage policy index + :returns: a tuple of ``(part, broker, node_id)`` where ``part`` is the + shard container's partition, ``broker`` is an instance of + :class:`~swift.container.backend.ContainerBroker`, + ``node_id`` is the id of the selected node. + """ + part = self.ring.get_part(shard_range.account, shard_range.container) + node = self.find_local_handoff_for_part(part) + if not node: + raise DeviceUnavailable( + 'No mounted devices found suitable for creating shard broker' + 'for %s in partition %s' % (shard_range.name, part)) + + shard_broker = ContainerBroker.create_broker( + os.path.join(self.root, node['device']), part, shard_range.account, + shard_range.container, epoch=shard_range.epoch, + storage_policy_index=policy_index) + + # Get the valid info into the broker.container, etc + shard_broker.get_info() + shard_broker.merge_shard_ranges(shard_range) + shard_broker.set_sharding_sysmeta('Root', root_path) + shard_broker.update_metadata({ + 'X-Container-Sysmeta-Sharding': + ('True', Timestamp.now().internal)}) + + return part, shard_broker, node['id'] + + def _audit_root_container(self, broker): + # This is the root container, and therefore the tome of knowledge, + # all we can do is check there is nothing screwy with the ranges + self._increment_stat('audit_root', 'attempted') + warnings = [] + own_shard_range = broker.get_own_shard_range() + + if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED): + shard_ranges = broker.get_shard_ranges() + missing_ranges = find_missing_ranges(shard_ranges) + if missing_ranges: + warnings.append( + 'missing range(s): %s' % + ' '.join(['%s-%s' % (lower, upper) + for lower, upper in missing_ranges])) + + for state in ShardRange.STATES: + shard_ranges = broker.get_shard_ranges(states=state) + overlaps = find_overlapping_ranges(shard_ranges) + for overlapping_ranges in overlaps: + warnings.append( + 'overlapping ranges in state %s: %s' % + (ShardRange.STATES[state], + ' '.join(['%s-%s' % (sr.lower, sr.upper) + for sr in overlapping_ranges]))) + + if warnings: + self.logger.warning( + 'Audit failed for root %s (%s): %s' % + (broker.db_file, broker.path, ', '.join(warnings))) + self._increment_stat('audit_root', 'failure', statsd=True) + return False + + self._increment_stat('audit_root', 'success', statsd=True) + return True + + def _audit_shard_container(self, broker): + # Get the root view of the world. + self._increment_stat('audit_shard', 'attempted') + warnings = [] + errors = [] + if not broker.account.startswith(self.shards_account_prefix): + warnings.append('account not in shards namespace %r' % + self.shards_account_prefix) + + own_shard_range = broker.get_own_shard_range(no_default=True) + + shard_range = None + if own_shard_range: + shard_ranges = self._fetch_shard_ranges( + broker, newest=True, + params={'marker': own_shard_range.lower, + 'end_marker': own_shard_range.upper}, + include_deleted=True) + if shard_ranges: + for shard_range in shard_ranges: + if (shard_range.lower == own_shard_range.lower and + shard_range.upper == own_shard_range.upper and + shard_range.name == own_shard_range.name): + break + else: + # this is not necessarily an error - some replicas of the + # root may not yet know about this shard container + warnings.append('root has no matching shard range') + shard_range = None + else: + warnings.append('unable to get shard ranges from root') + else: + errors.append('missing own shard range') + + if warnings: + self.logger.warning( + 'Audit warnings for shard %s (%s): %s' % + (broker.db_file, broker.path, ', '.join(warnings))) + + if errors: + self.logger.warning( + 'Audit failed for shard %s (%s) - skipping: %s' % + (broker.db_file, broker.path, ', '.join(errors))) + self._increment_stat('audit_shard', 'failure', statsd=True) + return False + + if shard_range: + self.logger.debug('Updating shard from root %s', dict(shard_range)) + broker.merge_shard_ranges(shard_range) + own_shard_range = broker.get_own_shard_range() + delete_age = time.time() - self.reclaim_age + if (own_shard_range.state == ShardRange.SHARDED and + own_shard_range.deleted and + own_shard_range.timestamp < delete_age and + broker.empty()): + broker.delete_db(Timestamp.now().internal) + self.logger.debug('Deleted shard container %s (%s)', + broker.db_file, broker.path) + self._increment_stat('audit_shard', 'success', statsd=True) + return True + + def _audit_container(self, broker): + if broker.is_deleted(): + # if the container has been marked as deleted, all metadata will + # have been erased so no point auditing. But we want it to pass, in + # case any objects exist inside it. + return True + if broker.is_root_container(): + return self._audit_root_container(broker) + return self._audit_shard_container(broker) + + def yield_objects(self, broker, src_shard_range, since_row=None): + """ + Iterates through all objects in ``src_shard_range`` in name order + yielding them in lists of up to CONTAINER_LISTING_LIMIT length. + + :param broker: A :class:`~swift.container.backend.ContainerBroker`. + :param src_shard_range: A :class:`~swift.common.utils.ShardRange` + describing the source range. + :param since_row: include only items whose ROWID is greater than + the given row id; by default all rows are included. + :return: a generator of tuples of (list of objects, broker info dict) + """ + for include_deleted in (False, True): + marker = src_shard_range.lower_str + while True: + info = broker.get_info() + info['max_row'] = broker.get_max_row() + start = time.time() + objects = broker.get_objects( + self.cleave_row_batch_size, + marker=marker, + end_marker=src_shard_range.end_marker, + include_deleted=include_deleted, + since_row=since_row) + if objects: + self.logger.debug('got %s objects from %s in %ss', + len(objects), broker.db_file, + time.time() - start) + yield objects, info + + if len(objects) < self.cleave_row_batch_size: + break + marker = objects[-1]['name'] + + def yield_objects_to_shard_range(self, broker, src_shard_range, + dest_shard_ranges): + """ + Iterates through all objects in ``src_shard_range`` to place them in + destination shard ranges provided by the ``next_shard_range`` function. + Yields tuples of (object list, destination shard range in which those + objects belong). Note that the same destination shard range may be + referenced in more than one yielded tuple. + + :param broker: A :class:`~swift.container.backend.ContainerBroker`. + :param src_shard_range: A :class:`~swift.common.utils.ShardRange` + describing the source range. + :param dest_shard_ranges: A function which should return a list of + destination shard ranges in name order. + :return: a generator of tuples of + (object list, shard range, broker info dict) + """ + dest_shard_range_iter = dest_shard_range = None + for objs, info in self.yield_objects(broker, src_shard_range): + if not objs: + return + + def next_or_none(it): + try: + return next(it) + except StopIteration: + return None + + if dest_shard_range_iter is None: + dest_shard_range_iter = iter(dest_shard_ranges()) + dest_shard_range = next_or_none(dest_shard_range_iter) + + unplaced = False + last_index = next_index = 0 + for obj in objs: + if dest_shard_range is None: + # no more destinations: yield remainder of batch and return + # NB there may be more batches of objects but none of them + # will be placed so no point fetching them + yield objs[last_index:], None, info + return + if obj['name'] <= dest_shard_range.lower: + unplaced = True + elif unplaced: + # end of run of unplaced objects, yield them + yield objs[last_index:next_index], None, info + last_index = next_index + unplaced = False + while (dest_shard_range and + obj['name'] > dest_shard_range.upper): + if next_index != last_index: + # yield the objects in current dest_shard_range + yield (objs[last_index:next_index], + dest_shard_range, + info) + last_index = next_index + dest_shard_range = next_or_none(dest_shard_range_iter) + next_index += 1 + + if next_index != last_index: + # yield tail of current batch of objects + # NB there may be more objects for the current + # dest_shard_range in the next batch from yield_objects + yield (objs[last_index:next_index], + None if unplaced else dest_shard_range, + info) + + def _post_replicate_hook(self, broker, info, responses): + # override superclass behaviour + pass + + def _replicate_and_delete(self, broker, dest_shard_range, part, + dest_broker, node_id, info): + success, responses = self._replicate_object( + part, dest_broker.db_file, node_id) + quorum = quorum_size(self.ring.replica_count) + if not success and responses.count(True) < quorum: + self.logger.warning( + 'Failed to sufficiently replicate misplaced objects: %s in %s ' + '(not removing)', dest_shard_range, broker.path) + return False + + if broker.get_info()['id'] != info['id']: + # the db changed - don't remove any objects + success = False + else: + # remove objects up to the max row of the db sampled prior to + # the first object yielded for this destination; objects added + # after that point may not have been yielded and replicated so + # it is not safe to remove them yet + broker.remove_objects( + dest_shard_range.lower_str, + dest_shard_range.upper_str, + max_row=info['max_row']) + success = True + + if not success: + self.logger.warning( + 'Refused to remove misplaced objects: %s in %s', + dest_shard_range, broker.path) + return success + + def _move_objects(self, src_broker, src_shard_range, policy_index, + shard_range_fetcher): + # move objects from src_shard_range in src_broker to destination shard + # ranges provided by shard_range_fetcher + dest_brokers = {} # map shard range -> broker + placed = unplaced = 0 + success = True + for objs, dest_shard_range, info in self.yield_objects_to_shard_range( + src_broker, src_shard_range, shard_range_fetcher): + if not dest_shard_range: + unplaced += len(objs) + success = False + continue + + if dest_shard_range.name == src_broker.path: + self.logger.debug( + 'Skipping source as misplaced objects destination') + # in shrinking context, the misplaced objects might actually be + # correctly placed if the root has expanded this shard but this + # broker has not yet been updated + continue + + if dest_shard_range not in dest_brokers: + part, dest_broker, node_id = self._get_shard_broker( + dest_shard_range, src_broker.root_path, policy_index) + # save the broker info that was sampled prior to the *first* + # yielded objects for this destination + destination = {'part': part, + 'dest_broker': dest_broker, + 'node_id': node_id, + 'info': info} + dest_brokers[dest_shard_range] = destination + else: + destination = dest_brokers[dest_shard_range] + destination['dest_broker'].merge_items(objs) + placed += len(objs) + + if unplaced: + self.logger.warning( + 'Failed to find destination for at least %s misplaced objects ' + 'in %s' % (unplaced, src_broker.path)) + + # TODO: consider executing the replication jobs concurrently + for dest_shard_range, dest_args in dest_brokers.items(): + self.logger.debug('moving misplaced objects found in range %s' % + dest_shard_range) + success &= self._replicate_and_delete( + src_broker, dest_shard_range, **dest_args) + + self._increment_stat('misplaced', 'placed', step=placed) + self._increment_stat('misplaced', 'unplaced', step=unplaced) + return success, placed + unplaced + + def _make_shard_range_fetcher(self, broker, src_shard_range): + # returns a function that will lazy load shard ranges on demand; + # this means only one lookup is made for all misplaced ranges. + outer = {} + + def shard_range_fetcher(): + if not outer: + if broker.is_root_container(): + ranges = broker.get_shard_ranges( + marker=src_shard_range.lower_str, + end_marker=src_shard_range.end_marker, + states=SHARD_UPDATE_STATES) + else: + # TODO: the root may not yet know about shard ranges to + # which a shard is sharding, but those could come from + # the broker + ranges = self._fetch_shard_ranges( + broker, newest=True, + params={'states': 'updating', + 'marker': src_shard_range.lower_str, + 'end_marker': src_shard_range.end_marker}) + outer['ranges'] = iter(ranges) + return outer['ranges'] + return shard_range_fetcher + + def _make_default_misplaced_object_bounds(self, broker): + # Objects outside of this container's own range are misplaced. + own_shard_range = broker.get_own_shard_range() + bounds = [] + if own_shard_range.lower: + bounds.append(('', own_shard_range.lower)) + if own_shard_range.upper: + bounds.append((own_shard_range.upper, '')) + return bounds + + def _make_misplaced_object_bounds(self, broker): + bounds = [] + state = broker.get_db_state() + if state == SHARDED: + # Anything in the object table is treated as a misplaced object. + bounds.append(('', '')) + + if not bounds and state == SHARDING: + # Objects outside of this container's own range are misplaced. + # Objects in already cleaved shard ranges are also misplaced. + cleave_context = CleavingContext.load(broker) + if cleave_context.cursor: + bounds.append(('', cleave_context.cursor)) + own_shard_range = broker.get_own_shard_range() + if own_shard_range.upper: + bounds.append((own_shard_range.upper, '')) + + return bounds or self._make_default_misplaced_object_bounds(broker) + + def _move_misplaced_objects(self, broker, src_broker=None, + src_bounds=None): + """ + Search for objects in the given broker that do not belong in that + broker's namespace and move those objects to their correct shard + container. + + :param broker: An instance of :class:`swift.container.ContainerBroker`. + :param src_broker: optional alternative broker to use as the source + of misplaced objects; if not specified then ``broker`` is used as + the source. + :param src_bounds: optional list of (lower, upper) namespace bounds to + use when searching for misplaced objects + :return: True if all misplaced objects were sufficiently replicated to + their correct shard containers, False otherwise + """ + self.logger.debug('Looking for misplaced objects in %s (%s)', + broker.path.decode('utf-8'), broker.db_file) + self._increment_stat('misplaced', 'attempted') + src_broker = src_broker or broker + if src_bounds is None: + src_bounds = self._make_misplaced_object_bounds(broker) + # (ab)use ShardRange instances to encapsulate source namespaces + src_ranges = [ShardRange('dont/care', Timestamp.now(), lower, upper) + for lower, upper in src_bounds] + self.logger.debug('misplaced object source bounds %s' % src_bounds) + policy_index = broker.storage_policy_index + success = True + num_found = 0 + for src_shard_range in src_ranges: + part_success, part_num_found = self._move_objects( + src_broker, src_shard_range, policy_index, + self._make_shard_range_fetcher(broker, src_shard_range)) + success &= part_success + num_found += part_num_found + + if num_found: + self._increment_stat('misplaced', 'found', statsd=True) + self.logger.debug('Moved %s misplaced objects' % num_found) + self._increment_stat('misplaced', 'success' if success else 'failure') + self.logger.debug('Finished handling misplaced objects') + return success + + def _find_shard_ranges(self, broker): + """ + Scans the container to find shard ranges and adds them to the shard + ranges table. If there are existing shard ranges then scanning starts + from the upper bound of the uppermost existing shard range. + + :param broker: An instance of :class:`swift.container.ContainerBroker` + :return: a tuple of (success, num of shard ranges found) where success + is True if the last shard range has been found, False otherwise. + """ + own_shard_range = broker.get_own_shard_range() + shard_ranges = broker.get_shard_ranges() + if shard_ranges and shard_ranges[-1].upper >= own_shard_range.upper: + self.logger.debug('Scan already completed for %s', broker.path) + return 0 + + self.logger.info('Starting scan for shard ranges on %s', broker.path) + self._increment_stat('scanned', 'attempted') + + start = time.time() + shard_data, last_found = broker.find_shard_ranges( + self.split_size, limit=self.scanner_batch_size, + existing_ranges=shard_ranges) + elapsed = time.time() - start + + if not shard_data: + if last_found: + self.logger.info("Already found all shard ranges") + self._increment_stat('scanned', 'success', statsd=True) + else: + # we didn't find anything + self.logger.warning("No shard ranges found") + self._increment_stat('scanned', 'failure', statsd=True) + return 0 + + shard_ranges = make_shard_ranges( + broker, shard_data, self.shards_account_prefix) + broker.merge_shard_ranges(shard_ranges) + num_found = len(shard_ranges) + self.logger.info( + "Completed scan for shard ranges: %d found", num_found) + self._increment_stat('scanned', 'found', step=num_found) + self._min_stat('scanned', 'min_time', round(elapsed / num_found, 3)) + self._max_stat('scanned', 'max_time', round(elapsed / num_found, 3)) + + if last_found: + self.logger.info("Final shard range reached.") + self._increment_stat('scanned', 'success', statsd=True) + return num_found + + def _create_shard_containers(self, broker): + # Create shard containers that are ready to receive redirected object + # updates. Do this now, so that redirection can begin immediately + # without waiting for cleaving to complete. + found_ranges = broker.get_shard_ranges(states=ShardRange.FOUND) + created_ranges = [] + for shard_range in found_ranges: + self._increment_stat('created', 'attempted') + shard_range.update_state(ShardRange.CREATED) + headers = { + 'X-Backend-Storage-Policy-Index': broker.storage_policy_index, + 'X-Container-Sysmeta-Shard-Root': broker.root_path, + 'X-Container-Sysmeta-Sharding': True} + success = self._send_shard_ranges( + shard_range.account, shard_range.container, + [shard_range], headers=headers) + if success: + self.logger.debug('PUT new shard range container for %s', + shard_range) + self._increment_stat('created', 'success', statsd=True) + else: + self.logger.error( + 'PUT of new shard container %r failed for %s.', + shard_range, broker.path) + self._increment_stat('created', 'failure', statsd=True) + # break, not continue, because elsewhere it is assumed that + # finding and cleaving shard ranges progresses linearly, so we + # do not want any subsequent shard ranges to be in created + # state while this one is still in found state + break + created_ranges.append(shard_range) + + if created_ranges: + broker.merge_shard_ranges(created_ranges) + if not broker.is_root_container(): + self._send_shard_ranges( + broker.root_account, broker.root_container, created_ranges) + self.logger.info( + "Completed creating shard range containers: %d created.", + len(created_ranges)) + return len(created_ranges) + + def _cleave_shard_range(self, broker, cleaving_context, shard_range): + self.logger.info("Cleaving '%s' from row %s into %s for %r", + broker.path, cleaving_context.last_cleave_to_row, + shard_range.name, shard_range) + self._increment_stat('cleaved', 'attempted') + start = time.time() + policy_index = broker.storage_policy_index + try: + shard_part, shard_broker, node_id = self._get_shard_broker( + shard_range, broker.root_path, policy_index) + except DeviceUnavailable as duex: + self.logger.warning(str(duex)) + self._increment_stat('cleaved', 'failure', statsd=True) + return False + + # only cleave from the retiring db - misplaced objects handler will + # deal with any objects in the fresh db + source_broker = broker.get_brokers()[0] + # if this range has been cleaved before but replication + # failed then the shard db may still exist and it may not be + # necessary to merge all the rows again + source_db_id = source_broker.get_info()['id'] + source_max_row = source_broker.get_max_row() + sync_point = shard_broker.get_sync(source_db_id) + if sync_point < source_max_row: + sync_from_row = max(cleaving_context.last_cleave_to_row, + sync_point) + for objects, info in self.yield_objects( + source_broker, shard_range, + since_row=sync_from_row): + shard_broker.merge_items(objects) + # Note: the max row stored as a sync point is sampled *before* + # objects are yielded to ensure that is less than or equal to + # the last yielded row. Other sync points are also copied from the + # source broker to the shards; if another replica of the source + # happens to subsequently cleave into a primary replica of the + # shard then it will only need to cleave rows after its last sync + # point with this replica of the source broker. + shard_broker.merge_syncs( + [{'sync_point': source_max_row, 'remote_id': source_db_id}] + + source_broker.get_syncs()) + else: + self.logger.debug("Cleaving '%s': %r - shard db already in sync", + broker.path, shard_range) + + own_shard_range = broker.get_own_shard_range() + + replication_quorum = self.existing_shard_replication_quorum + if shard_range.includes(own_shard_range): + # When shrinking, include deleted own (donor) shard range in + # the replicated db so that when acceptor next updates root it + # will atomically update its namespace *and* delete the donor. + # Don't do this when sharding a shard because the donor + # namespace should not be deleted until all shards are cleaved. + if own_shard_range.update_state(ShardRange.SHARDED): + own_shard_range.set_deleted() + broker.merge_shard_ranges(own_shard_range) + shard_broker.merge_shard_ranges(own_shard_range) + elif shard_range.state == ShardRange.CREATED: + # The shard range object stats may have changed since the shard + # range was found, so update with stats of objects actually + # copied to the shard broker. Only do this the first time each + # shard range is cleaved. + info = shard_broker.get_info() + shard_range.update_meta( + info['object_count'], info['bytes_used']) + shard_range.update_state(ShardRange.CLEAVED) + shard_broker.merge_shard_ranges(shard_range) + replication_quorum = self.shard_replication_quorum + + self.logger.info( + 'Replicating new shard container %s for %s', + shard_broker.path, shard_broker.get_own_shard_range()) + + success, responses = self._replicate_object( + shard_part, shard_broker.db_file, node_id) + + replication_successes = responses.count(True) + if (not success and (not responses or + replication_successes < replication_quorum)): + # insufficient replication or replication not even attempted; + # break because we don't want to progress the cleave cursor + # until each shard range has been successfully cleaved + self.logger.warning( + 'Failed to sufficiently replicate cleaved shard %s for %s: ' + '%s successes, %s required.', shard_range, broker.path, + replication_successes, replication_quorum) + self._increment_stat('cleaved', 'failure', statsd=True) + return False + + elapsed = round(time.time() - start, 3) + self._min_stat('cleaved', 'min_time', elapsed) + self._max_stat('cleaved', 'max_time', elapsed) + broker.merge_shard_ranges(shard_range) + cleaving_context.cursor = shard_range.upper_str + cleaving_context.ranges_done += 1 + cleaving_context.ranges_todo -= 1 + if shard_range.upper >= own_shard_range.upper: + # cleaving complete + cleaving_context.cleaving_done = True + cleaving_context.store(broker) + self.logger.info( + 'Cleaved %s for shard range %s in %gs.', + broker.path, shard_range, elapsed) + self._increment_stat('cleaved', 'success', statsd=True) + return True + + def _cleave(self, broker): + # Returns True if misplaced objects have been moved and the entire + # container namespace has been successfully cleaved, False otherwise + if broker.is_sharded(): + self.logger.debug('Passing over already sharded container %s/%s', + broker.account, broker.container) + return True + + cleaving_context = CleavingContext.load(broker) + if not cleaving_context.misplaced_done: + # ensure any misplaced objects in the source broker are moved; note + # that this invocation of _move_misplaced_objects is targetted at + # the *retiring* db. + self.logger.debug( + 'Moving any misplaced objects from sharding container: %s', + broker.path) + bounds = self._make_default_misplaced_object_bounds(broker) + cleaving_context.misplaced_done = self._move_misplaced_objects( + broker, src_broker=broker.get_brokers()[0], + src_bounds=bounds) + cleaving_context.store(broker) + + if cleaving_context.cleaving_done: + self.logger.debug('Cleaving already complete for container %s', + broker.path) + return cleaving_context.misplaced_done + + ranges_todo = broker.get_shard_ranges(marker=cleaving_context.marker) + if cleaving_context.cursor: + # always update ranges_todo in case more ranges have been found + # since last visit + cleaving_context.ranges_todo = len(ranges_todo) + self.logger.debug('Continuing to cleave (%s done, %s todo): %s', + cleaving_context.ranges_done, + cleaving_context.ranges_todo, + broker.path) + else: + cleaving_context.start() + cleaving_context.ranges_todo = len(ranges_todo) + self.logger.debug('Starting to cleave (%s todo): %s', + cleaving_context.ranges_todo, broker.path) + + ranges_done = [] + for shard_range in ranges_todo[:self.cleave_batch_size]: + if shard_range.state == ShardRange.FOUND: + break + elif shard_range.state in (ShardRange.CREATED, + ShardRange.CLEAVED, + ShardRange.ACTIVE): + if self._cleave_shard_range( + broker, cleaving_context, shard_range): + ranges_done.append(shard_range) + else: + break + else: + self.logger.warning('Unexpected shard range state for cleave', + shard_range.state) + break + + if not ranges_done: + cleaving_context.store(broker) + self.logger.debug( + 'Cleaved %s shard ranges for %s', len(ranges_done), broker.path) + return (cleaving_context.misplaced_done and + cleaving_context.cleaving_done) + + def _complete_sharding(self, broker): + cleaving_context = CleavingContext.load(broker) + if cleaving_context.done(): + # Move all CLEAVED shards to ACTIVE state and if a shard then + # delete own shard range; these changes will be simultaneously + # reported in the next update to the root container. + modified_shard_ranges = broker.get_shard_ranges( + states=ShardRange.CLEAVED) + for sr in modified_shard_ranges: + sr.update_state(ShardRange.ACTIVE) + own_shard_range = broker.get_own_shard_range() + own_shard_range.update_state(ShardRange.SHARDED) + own_shard_range.update_meta(0, 0) + if (not broker.is_root_container() and not + own_shard_range.deleted): + own_shard_range = own_shard_range.copy( + timestamp=Timestamp.now(), deleted=1) + modified_shard_ranges.append(own_shard_range) + broker.merge_shard_ranges(modified_shard_ranges) + if broker.set_sharded_state(): + return True + else: + self.logger.warning( + 'Failed to remove retiring db file for %s', + broker.path) + else: + self.logger.warning( + 'Repeat cleaving required for %r with context: %s' + % (broker.db_files[0], dict(cleaving_context))) + cleaving_context.reset() + cleaving_context.store(broker) + + return False + + def _find_and_enable_sharding_candidates(self, broker, shard_ranges=None): + candidates = find_sharding_candidates( + broker, self.shard_container_threshold, shard_ranges) + if candidates: + self.logger.debug('Identified %s sharding candidates' + % len(candidates)) + broker.merge_shard_ranges(candidates) + + def _find_and_enable_shrinking_candidates(self, broker): + if not broker.is_sharded(): + self.logger.warning('Cannot shrink a not yet sharded container %s', + broker.path) + return + + merge_pairs = find_shrinking_candidates( + broker, self.shrink_size, self.merge_size) + self.logger.debug('Found %s shrinking candidates' % len(merge_pairs)) + own_shard_range = broker.get_own_shard_range() + for acceptor, donor in merge_pairs.items(): + self.logger.debug('shrinking shard range %s into %s in %s' % + (donor, acceptor, broker.db_file)) + broker.merge_shard_ranges([acceptor, donor]) + if acceptor.name != own_shard_range.name: + self._send_shard_ranges( + acceptor.account, acceptor.container, [acceptor]) + acceptor.increment_meta(donor.object_count, donor.bytes_used) + else: + # no need to change namespace or stats + acceptor.update_state(ShardRange.ACTIVE, + state_timestamp=Timestamp.now()) + # Now send a copy of the expanded acceptor, with an updated + # timestamp, to the donor container. This forces the donor to + # asynchronously cleave its entire contents to the acceptor and + # delete itself. The donor will pass its own deleted shard range to + # the acceptor when cleaving. Subsequent updates from the donor or + # the acceptor will then update the root to have the deleted donor + # shard range. + self._send_shard_ranges( + donor.account, donor.container, [donor, acceptor]) + + def _update_root_container(self, broker): + own_shard_range = broker.get_own_shard_range(no_default=True) + if not own_shard_range: + return + + # persist the reported shard metadata + broker.merge_shard_ranges(own_shard_range) + # now get a consistent list of own and other shard ranges + shard_ranges = broker.get_shard_ranges( + include_own=True, + include_deleted=True) + # send everything + self._send_shard_ranges( + broker.root_account, broker.root_container, + shard_ranges) + + def _process_broker(self, broker, node, part): + broker.get_info() # make sure account/container are populated + state = broker.get_db_state() + self.logger.debug('Starting processing %s state %s', + broker.path, state) + + if not self._audit_container(broker): + return + + # now look and deal with misplaced objects. + self._move_misplaced_objects(broker) + + if broker.is_deleted(): + # This container is deleted so we can skip it. We still want + # deleted containers to go via misplaced items because they may + # have new objects sitting in them that may need to move. + return + + is_leader = node['index'] == 0 and self.auto_shard + if state in (UNSHARDED, COLLAPSED): + if is_leader and broker.is_root_container(): + # bootstrap sharding of root container + self._find_and_enable_sharding_candidates( + broker, shard_ranges=[broker.get_own_shard_range()]) + + own_shard_range = broker.get_own_shard_range() + if own_shard_range.state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + if broker.get_shard_ranges(): + # container has been given shard ranges rather than + # found them e.g. via replication or a shrink event + if broker.set_sharding_state(): + state = SHARDING + elif is_leader: + if broker.set_sharding_state(): + state = SHARDING + else: + self.logger.debug( + 'Own shard range in state %r but no shard ranges ' + 'and not leader; remaining unsharded: %s' + % (own_shard_range.state_text, broker.path)) + + if state == SHARDING: + if is_leader: + num_found = self._find_shard_ranges(broker) + else: + num_found = 0 + + # create shard containers for newly found ranges + num_created = self._create_shard_containers(broker) + + if num_found or num_created: + # share updated shard range state with other nodes + self._replicate_object(part, broker.db_file, node['id']) + + # always try to cleave any pending shard ranges + cleave_complete = self._cleave(broker) + + if cleave_complete: + self.logger.info('Completed cleaving of %s', broker.path) + if self._complete_sharding(broker): + state = SHARDED + self._increment_stat('visited', 'completed', statsd=True) + else: + self.logger.debug('Remaining in sharding state %s', + broker.path) + + if state == SHARDED and broker.is_root_container(): + if is_leader: + self._find_and_enable_shrinking_candidates(broker) + self._find_and_enable_sharding_candidates(broker) + for shard_range in broker.get_shard_ranges( + states=[ShardRange.SHARDING]): + self._send_shard_ranges( + shard_range.account, shard_range.container, + [shard_range]) + + if not broker.is_root_container(): + # Update the root container with this container's shard range + # info; do this even when sharded in case previous attempts + # failed; don't do this if there is no own shard range. When + # sharding a shard, this is when the root will see the new + # shards move to ACTIVE state and the sharded shard + # simultaneously become deleted. + self._update_root_container(broker) + + self.logger.debug('Finished processing %s/%s state %s', + broker.account, broker.container, + broker.get_db_state()) + + def _one_shard_cycle(self, devices_to_shard, partitions_to_shard): + """ + The main function, everything the sharder does forks from this method. + + The sharder loops through each container with sharding enabled and each + sharded container on the server, on each container it: + - audits the container + - checks and deals with misplaced items + - cleaves any shard ranges as required + - if not a root container, reports shard range stats to the root + container + """ + self.logger.info('Container sharder cycle starting, auto-sharding %s', + self.auto_shard) + if isinstance(devices_to_shard, (list, tuple)): + self.logger.info('(Override devices: %s)', + ', '.join(str(d) for d in devices_to_shard)) + if isinstance(partitions_to_shard, (list, tuple)): + self.logger.info('(Override partitions: %s)', + ', '.join(str(p) for p in partitions_to_shard)) + self._zero_stats() + self._local_device_ids = set() + dirs = [] + self.ips = whataremyips(bind_ip=self.bind_ip) + for node in self.ring.devs: + if not self._check_node(node): + continue + datadir = os.path.join(self.root, node['device'], self.datadir) + if os.path.isdir(datadir): + # Populate self._local_device_ids so we can find devices for + # shard containers later + self._local_device_ids.add(node['id']) + if node['device'] not in devices_to_shard: + continue + part_filt = self._partition_dir_filter( + node['id'], + partitions_to_shard) + dirs.append((datadir, node, part_filt)) + if not dirs: + self.logger.warning('Found no data dirs!') + for part, path, node in db_replicator.roundrobin_datadirs(dirs): + # NB: get_part_nodes always provides an 'index' key; + # this will be used in leader selection + for primary in self.ring.get_part_nodes(int(part)): + if node['id'] == primary['id']: + node = primary + break + else: + # Set index such that we'll *never* be selected as a leader + node['index'] = 'handoff' + + broker = ContainerBroker(path, logger=self.logger, + timeout=self.broker_timeout) + error = None + try: + self._identify_sharding_candidate(broker, node) + if sharding_enabled(broker): + self._increment_stat('visited', 'attempted') + self._process_broker(broker, node, part) + self._increment_stat('visited', 'success', statsd=True) + else: + self._increment_stat('visited', 'skipped') + except (Exception, Timeout) as error: + self._increment_stat('visited', 'failure', statsd=True) + self.logger.exception( + 'Unhandled exception while processing %s: %s', path, error) + try: + self._record_sharding_progress(broker, node, error) + except (Exception, Timeout) as error: + self.logger.exception( + 'Unhandled exception while dumping progress for %s: %s', + path, error) + self._periodic_report_stats() + + self._report_stats() + + def run_forever(self, *args, **kwargs): + """Run the container sharder until stopped.""" + self.reported = time.time() + time.sleep(random() * self.interval) + while True: + begin = time.time() + try: + self._one_shard_cycle(devices_to_shard=Everything(), + partitions_to_shard=Everything()) + except (Exception, Timeout): + self.logger.increment('errors') + self.logger.exception('Exception in sharder') + elapsed = time.time() - begin + self.logger.info( + 'Container sharder cycle completed: %.02fs', elapsed) + if elapsed < self.interval: + time.sleep(self.interval - elapsed) + + def run_once(self, *args, **kwargs): + """Run the container sharder once.""" + self.logger.info('Begin container sharder "once" mode') + override_options = parse_override_options(once=True, **kwargs) + devices_to_shard = override_options.devices or Everything() + partitions_to_shard = override_options.partitions or Everything() + begin = self.reported = time.time() + self._one_shard_cycle(devices_to_shard=devices_to_shard, + partitions_to_shard=partitions_to_shard) + elapsed = time.time() - begin + self.logger.info( + 'Container sharder "once" mode completed: %.02fs', elapsed) diff -Nru swift-2.17.0/swift/container/updater.py swift-2.18.0/swift/container/updater.py --- swift-2.17.0/swift/container/updater.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/container/updater.py 2018-05-30 10:17:09.000000000 +0000 @@ -29,7 +29,7 @@ from swift.common.constraints import check_drive from swift.container.backend import ContainerBroker, DATADIR from swift.common.bufferedhttp import http_connect -from swift.common.exceptions import ConnectionTimeout +from swift.common.exceptions import ConnectionTimeout, LockTimeout from swift.common.ring import Ring from swift.common.utils import get_logger, config_true_value, \ dump_recon_cache, majority_size, Timestamp, ratelimit_sleep, \ @@ -232,7 +232,12 @@ """ start_time = time.time() broker = ContainerBroker(dbfile, logger=self.logger) - info = broker.get_info() + try: + info = broker.get_info() + except LockTimeout: + self.logger.exception("Failed to get container info for %s", + dbfile) + return # Don't send updates if the container was auto-created since it # definitely doesn't have up to date statistics. if Timestamp(info['put_timestamp']) <= 0: diff -Nru swift-2.17.0/swift/locale/en_GB/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/en_GB/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/en_GB/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/en_GB/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -10,16 +10,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"PO-Revision-Date: 2018-01-27 09:17+0000\n" +"PO-Revision-Date: 2018-02-16 07:31+0000\n" "Last-Translator: Andi Chandler \n" -"Language: en-GB\n" +"Language: en_GB\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: English (United Kingdom)\n" msgid "" @@ -673,10 +673,6 @@ msgstr "Exception in top-level reconstruction loop" #, python-format -msgid "Exception while deleting container %(container)s %(err)s" -msgstr "Exception while deleting container %(container)s %(err)s" - -#, python-format msgid "Exception with %(ip)s:%(port)s/%(device)s" msgstr "Exception with %(ip)s:%(port)s/%(device)s" @@ -926,14 +922,6 @@ msgstr "Partition times: max %(max).4fs, min %(min).4fs, med %(med).4fs" #, python-format -msgid "" -"Pass beginning; %(containers)s possible containers; %(objects)s possible " -"objects" -msgstr "" -"Pass beginning; %(containers)s possible containers; %(objects)s possible " -"objects" - -#, python-format msgid "Pass completed in %(time)ds; %(objects)d objects expired" msgstr "Pass completed in %(time)ds; %(objects)d objects expired" @@ -977,8 +965,8 @@ msgstr "Profiling Error: %s" #, python-format -msgid "Quarantined %(db_dir)s to %(quar_path)s due to %(exc_hint)s database" -msgstr "Quarantined %(db_dir)s to %(quar_path)s due to %(exc_hint)s database" +msgid "Quarantined %(db_dir)s to %(quar_path)s due to %(reason)s" +msgstr "Quarantined %(db_dir)s to %(quar_path)s due to %(reason)s" #, python-format msgid "Quarantined %(hsh_path)s to %(quar_path)s because it is not a directory" diff -Nru swift-2.17.0/swift/locale/es/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/es/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/es/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/es/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -9,7 +9,7 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" @@ -18,7 +18,7 @@ "Language: es\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Spanish\n" msgid "" @@ -621,10 +621,6 @@ msgstr "Excepción en el bucle de reconstrucción de nivel superior" #, python-format -msgid "Exception while deleting container %(container)s %(err)s" -msgstr "Excepción al suprimir el contenedor %(container)s %(err)s" - -#, python-format msgid "Exception with %(ip)s:%(port)s/%(device)s" msgstr "Excepción con %(ip)s:%(port)s/%(device)s" @@ -861,14 +857,6 @@ "Tiempos de partición: máximo %(max).4fs, mínimo %(min).4fs, medio %(med).4fs" #, python-format -msgid "" -"Pass beginning; %(containers)s possible containers; %(objects)s possible " -"objects" -msgstr "" -"Inicio del paso; %(containers)s posibles contenedores; %(objects)s posibles " -"objetos" - -#, python-format msgid "Pass completed in %(time)ds; %(objects)d objects expired" msgstr "Paso completado en %(time)ds; %(objects)d objetos caducados" @@ -892,11 +880,6 @@ msgstr "Error de perfil: %s" #, python-format -msgid "Quarantined %(db_dir)s to %(quar_path)s due to %(exc_hint)s database" -msgstr "" -"En cuarentena%(db_dir)s hasta %(quar_path)s debido a %(exc_hint)s database" - -#, python-format msgid "Quarantined %(hsh_path)s to %(quar_path)s because it is not a directory" msgstr "" "Se ha puesto en cuarentena %(hsh_path)s en %(quar_path)s debido a que no es " diff -Nru swift-2.17.0/swift/locale/ja/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/ja/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/ja/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/ja/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -5,16 +5,17 @@ # Translators: # Sasuke(Kyohei MORIYAMA) <>, 2015 # Andreas Jaeger , 2016. #zanata +# Shu Muto , 2018. #zanata msgid "" msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-09 02:13+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"PO-Revision-Date: 2016-04-12 06:43+0000\n" -"Last-Translator: Copied by Zanata \n" +"PO-Revision-Date: 2018-02-09 05:39+0000\n" +"Last-Translator: Shu Muto \n" "Language: ja\n" "Plural-Forms: nplurals=1; plural=0;\n" "Generated-By: Babel 2.0\n" @@ -56,6 +57,22 @@ "複製されました (%(rate).2f/秒、残り %(remaining)s)" #, python-format +msgid "%(server)s #%(number)d not running (%(conf)s)" +msgstr "%(server)s #%(number)d は稼働していません (%(conf)s)" + +#, python-format +msgid "%(server)s (%(pid)s) appears to have stopped" +msgstr "%(server)s (%(pid)s) が停止された可能性があります" + +#, python-format +msgid "%(server)s running (%(pid)s - %(conf)s)" +msgstr "%(server)s 稼働中 (%(pid)s - %(conf)s)" + +#, python-format +msgid "%(server)s running (%(pid)s - %(pid_file)s)" +msgstr "%(server)s 稼働中 (%(pid)s - %(pid_file)s)" + +#, python-format msgid "%(success)s successes, %(failure)s failures" msgstr "成功 %(success)s、失敗 %(failure)s" @@ -64,6 +81,10 @@ msgstr "%(type)s が %(statuses)s について 503 を返しています" #, python-format +msgid "%(type)s: %(value)s" +msgstr "%(type)s: %(value)s" + +#, python-format msgid "%s already started..." msgstr "%s は既に開始されています..." @@ -118,6 +139,10 @@ msgstr "アカウント" #, python-format +msgid "Account %(account)s has not been reaped since %(time)s" +msgstr "アカウント %(account)s は %(time)s 以降リープされていません" + +#, python-format msgid "Account audit \"once\" mode completed: %.02fs" msgstr "アカウント監査 \"once\" モードが完了しました: %.02fs" @@ -131,6 +156,18 @@ msgstr "%(time).5f 秒で %(count)d 個の DB の複製を試行しました (%(rate).5f/s)" #, python-format +msgid "Audit Failed for %(path)s: %(err)s" +msgstr "%(path)s の監査が失敗しました: %(err)s" + +#, python-format +msgid "Audit passed for %s" +msgstr "%s の監査が合格しました。" + +#, python-format +msgid "Bad key for %(name)r: %(err)s" +msgstr "%(name)r のキーが不正です: %(err)s" + +#, python-format msgid "Bad rsync return code: %(ret)d <- %(args)s" msgstr "正しくない再同期戻りコード: %(ret)d <- %(args)s" @@ -249,6 +286,10 @@ "%(success)s、失敗 %(fail)s、未変更 %(no_change)s" #, python-format +msgid "Could not load %(conf)r: %(error)s" +msgstr "%(conf)r をロードできませんでした: %(error)s" + +#, python-format msgid "Data download error: %s" msgstr "データダウンロードエラー: %s" @@ -256,6 +297,9 @@ msgid "Devices pass completed: %.02fs" msgstr "デバイスの処理が完了しました: %.02fs" +msgid "Did not get a keys dict" +msgstr "キーの辞書を取得できませんでした。" + #, python-format msgid "ERROR %(db_file)s: %(validate_sync_to_err)s" msgstr "エラー %(db_file)s: %(validate_sync_to_err)s" @@ -343,6 +387,14 @@ msgstr "%s の同期エラー" #, python-format +msgid "" +"ERROR There are not enough handoff nodes to reach replica count for " +"partition %s" +msgstr "" +"エラー パーティション %s のレプリカ数に達するための handoff ノードが不足して" +"います。" + +#, python-format msgid "ERROR Trying to audit %s" msgstr "%s の監査を試行中にエラーが発生しました" @@ -388,6 +440,13 @@ "エラー: コンテナー更新が %(ip)s:%(port)s/%(dev)s で失敗しました (後の非同期更" "新のために保存中)" +msgid "ERROR get_keys() missing callback" +msgstr "エラー get_keys() コールバックがありません" + +#, python-format +msgid "ERROR get_keys(): from callback: %s" +msgstr "エラー get_keys(): コールバック: %s" + #, python-format msgid "ERROR reading HTTP response from %s" msgstr "%s からの HTTP 応答の読み取りエラー" @@ -439,6 +498,10 @@ msgid "Error hashing suffix" msgstr "サフィックスのハッシュエラー" +#, python-format +msgid "Error in %(conf)r with mtime_check_interval: %(error)s" +msgstr "mtime_check_interval で %(conf)r にエラーがあります: %(error)s" + msgid "Error listing devices" msgstr "デバイスのリストエラー" @@ -461,6 +524,10 @@ msgid "Error retrieving recon data" msgstr "再構成データの取得エラー" +#, python-format +msgid "Error sending UDP message to %(target)r: %(err)s" +msgstr "%(target)r への UDP メッセージ送信エラー: %(err)s" + msgid "Error syncing handoff partition" msgstr "ハンドオフパーティションの同期エラー" @@ -571,6 +638,9 @@ "無効なスキーム %r が X-Container-Sync-To にあります。「//」、「http」、" "「https」のいずれかでなければなりません。" +msgid "Invalid swift_bytes" +msgstr "無効な swift_bytes" + #, python-format msgid "Killing long-running rsync: %s" msgstr "長期実行の再同期を強制終了中: %s" @@ -583,10 +653,18 @@ msgstr "%(given_domain)s が %(found_domain)s にマップされました" #, python-format +msgid "Missing key for %r" +msgstr "%r にキーがありません。" + +#, python-format msgid "No %s running" msgstr "%s が実行されていません" #, python-format +msgid "No cluster endpoint for %(realm)r %(cluster)r" +msgstr "%(realm)r %(cluster)r のエンドポイントクラスターがありません" + +#, python-format msgid "No permission to signal PID %d" msgstr "PID %d にシグナル通知する許可がありません" @@ -785,6 +863,10 @@ msgstr "スクリプトモードでオブジェクトレプリケーターを実行中です。" #, python-format +msgid "Signal %(server)s pid: %(pid)s signal: %(signal)s" +msgstr "%(server)s pid: %(pid)s へのシグナル: %(signal)s" + +#, python-format msgid "" "Since %(time)s: %(sync)s synced [%(delete)s deletes, %(put)s puts], %(skip)s " "skipped, %(fail)s failed" @@ -808,6 +890,10 @@ "%(time)s 以降: コンテナー監査: 合格した監査 %(pass)s、不合格の監査%(fail)s" #, python-format +msgid "Skipping %(datadir)s because %(err)s" +msgstr "%(err)s のため %(datadir)s をスキップします" + +#, python-format msgid "Skipping %(device)s as it is not mounted" msgstr "%(device)s はマウントされていないため、スキップされます" @@ -879,6 +965,10 @@ msgstr "キャッチされていない例外" #, python-format +msgid "Unable to find %(section)s config section in %(conf)s" +msgstr "%(section)s 構成セクションが %(conf)s に見つかりません" + +#, python-format msgid "Unable to locate %s in libc. Leaving as a no-op." msgstr "%s が libc に見つかりません。no-op として終了します。" @@ -886,12 +976,20 @@ msgid "Unable to locate config for %s" msgstr "%s の設定が見つかりません" +#, python-format +msgid "Unable to locate config number %(number)s for %(server)s" +msgstr "%(server)s の設定番号 %(number)s が見つかりません" + msgid "" "Unable to locate fallocate, posix_fallocate in libc. Leaving as a no-op." msgstr "" "fallocate、posix_fallocate が libc に見つかりません。no-op として終了します。" #, python-format +msgid "Unable to perform fsync() on directory %(dir)s: %(err)s" +msgstr "ディレクトリー %(dir)s で fsync() を実行できません: %(err)s" + +#, python-format msgid "Unable to read config from %s" msgstr "構成を %s から読み取ることができません" @@ -931,6 +1029,14 @@ msgid "WARNING: Unable to modify memory limit. Running as non-root?" msgstr "警告: メモリー制限を変更できません。非ルートとして実行しますか?" +#, python-format +msgid "Waited %(kill_wait)s seconds for %(server)s to die; giving up" +msgstr "%(kill_wait)s 秒間、%(server)s の停止を待機しました。中止します" + +#, python-format +msgid "Waited %(kill_wait)s seconds for %(server)s to die; killing" +msgstr "%(kill_wait)s 秒間、%(server)s の停止を待機しました。強制終了します" + msgid "Warning: Cannot ratelimit without a memcached client" msgstr "警告: memcached クライアントなしで ratelimit を行うことはできません" diff -Nru swift-2.17.0/swift/locale/ko_KR/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/ko_KR/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/ko_KR/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/ko_KR/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -10,16 +10,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "PO-Revision-Date: 2016-04-12 06:43+0000\n" "Last-Translator: Copied by Zanata \n" -"Language: ko-KR\n" +"Language: ko_KR\n" "Plural-Forms: nplurals=1; plural=0;\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Korean (South Korea)\n" msgid "" diff -Nru swift-2.17.0/swift/locale/pt_BR/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/pt_BR/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/pt_BR/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/pt_BR/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -12,16 +12,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "PO-Revision-Date: 2016-04-12 06:43+0000\n" "Last-Translator: Copied by Zanata \n" -"Language: pt-BR\n" +"Language: pt_BR\n" "Plural-Forms: nplurals=2; plural=(n > 1);\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Portuguese (Brazil)\n" msgid "" diff -Nru swift-2.17.0/swift/locale/tr_TR/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/tr_TR/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/tr_TR/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/tr_TR/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -9,16 +9,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "PO-Revision-Date: 2016-04-12 06:43+0000\n" "Last-Translator: Copied by Zanata \n" -"Language: tr-TR\n" +"Language: tr_TR\n" "Plural-Forms: nplurals=1; plural=0;\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Turkish (Turkey)\n" msgid "" diff -Nru swift-2.17.0/swift/locale/zh_CN/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/zh_CN/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/zh_CN/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/zh_CN/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -9,16 +9,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "PO-Revision-Date: 2016-04-12 06:43+0000\n" "Last-Translator: Copied by Zanata \n" -"Language: zh-CN\n" +"Language: zh_CN\n" "Plural-Forms: nplurals=1; plural=0;\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Chinese (China)\n" msgid "" diff -Nru swift-2.17.0/swift/locale/zh_TW/LC_MESSAGES/swift.po swift-2.18.0/swift/locale/zh_TW/LC_MESSAGES/swift.po --- swift-2.17.0/swift/locale/zh_TW/LC_MESSAGES/swift.po 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/locale/zh_TW/LC_MESSAGES/swift.po 2018-05-30 10:17:02.000000000 +0000 @@ -8,16 +8,16 @@ msgstr "" "Project-Id-Version: swift VERSION\n" "Report-Msgid-Bugs-To: https://bugs.launchpad.net/openstack-i18n/\n" -"POT-Creation-Date: 2018-01-31 06:08+0000\n" +"POT-Creation-Date: 2018-02-28 19:39+0000\n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" "PO-Revision-Date: 2016-04-12 06:43+0000\n" "Last-Translator: Copied by Zanata \n" -"Language: zh-TW\n" +"Language: zh_TW\n" "Plural-Forms: nplurals=1; plural=0;\n" "Generated-By: Babel 2.0\n" -"X-Generator: Zanata 3.9.6\n" +"X-Generator: Zanata 4.3.3\n" "Language-Team: Chinese (Taiwan)\n" msgid "" diff -Nru swift-2.17.0/swift/obj/auditor.py swift-2.18.0/swift/obj/auditor.py --- swift-2.17.0/swift/obj/auditor.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/auditor.py 2018-05-30 10:17:02.000000000 +0000 @@ -27,7 +27,7 @@ from swift.obj import diskfile, replicator from swift.common.utils import ( get_logger, ratelimit_sleep, dump_recon_cache, list_from_csv, listdir, - unlink_paths_older_than, readconf, config_auto_int_value) + unlink_paths_older_than, readconf, config_auto_int_value, round_robin_iter) from swift.common.exceptions import DiskFileQuarantined, DiskFileNotExist,\ DiskFileDeleted, DiskFileExpired from swift.common.daemon import Daemon @@ -120,18 +120,17 @@ total_quarantines = 0 total_errors = 0 time_auditing = 0 - # TODO: we should move audit-location generation to the storage policy, - # as we may (conceivably) have a different filesystem layout for each. - # We'd still need to generate the policies to audit from the actual - # directories found on-disk, and have appropriate error reporting if we - # find a directory that doesn't correspond to any known policy. This - # will require a sizable refactor, but currently all diskfile managers - # can find all diskfile locations regardless of policy -- so for now - # just use Policy-0's manager. - all_locs = (self.diskfile_router[POLICIES[0]] + + # get AuditLocations for each policy + loc_generators = [] + for policy in POLICIES: + loc_generators.append( + self.diskfile_router[policy] .object_audit_location_generator( - device_dirs=device_dirs, + policy, device_dirs=device_dirs, auditor_type=self.auditor_type)) + + all_locs = round_robin_iter(loc_generators) for location in all_locs: loop_time = time.time() self.failsafe_object_audit(location) @@ -192,8 +191,11 @@ self.logger.info( _('Object audit stats: %s') % json.dumps(self.stats_buckets)) - # Unset remaining partitions to not skip them in the next run - diskfile.clear_auditor_status(self.devices, self.auditor_type) + for policy in POLICIES: + # Unset remaining partitions to not skip them in the next run + self.diskfile_router[policy].clear_auditor_status( + policy, + self.auditor_type) def record_stats(self, obj_size): """ @@ -319,7 +321,8 @@ zero_byte_only_at_fps=zero_byte_only_at_fps) worker.audit_all_objects(mode=mode, device_dirs=device_dirs) - def fork_child(self, zero_byte_fps=False, **kwargs): + def fork_child(self, zero_byte_fps=False, sleep_between_zbf_scanner=False, + **kwargs): """Child execution""" pid = os.fork() if pid: @@ -328,6 +331,8 @@ signal.signal(signal.SIGTERM, signal.SIG_DFL) if zero_byte_fps: kwargs['zero_byte_fps'] = self.conf_zero_byte_fps + if sleep_between_zbf_scanner: + self._sleep() try: self.run_audit(**kwargs) except Exception as e: @@ -391,8 +396,9 @@ len(pids) > 1 and not once: kwargs['device_dirs'] = override_devices # sleep between ZBF scanner forks - self._sleep() - zbf_pid = self.fork_child(zero_byte_fps=True, **kwargs) + zbf_pid = self.fork_child(zero_byte_fps=True, + sleep_between_zbf_scanner=True, + **kwargs) pids.add(zbf_pid) pids.discard(pid) diff -Nru swift-2.17.0/swift/obj/diskfile.py swift-2.18.0/swift/obj/diskfile.py --- swift-2.17.0/swift/obj/diskfile.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/diskfile.py 2018-05-30 10:17:02.000000000 +0000 @@ -83,8 +83,8 @@ DEFAULT_RECLAIM_AGE = timedelta(weeks=1).total_seconds() HASH_FILE = 'hashes.pkl' HASH_INVALIDATIONS_FILE = 'hashes.invalid' -METADATA_KEY = 'user.swift.metadata' -METADATA_CHECKSUM_KEY = 'user.swift.metadata_checksum' +METADATA_KEY = b'user.swift.metadata' +METADATA_CHECKSUM_KEY = b'user.swift.metadata_checksum' DROP_CACHE_WINDOW = 1024 * 1024 # These are system-set metadata keys that cannot be changed with a POST. # They should be lowercase. @@ -131,6 +131,26 @@ return dict(((encode_str(k), encode_str(v)) for k, v in metadata.items())) +def _decode_metadata(metadata): + """ + Given a metadata dict from disk, convert keys and values to native strings. + + :param metadata: a dict + """ + if six.PY2: + def to_str(item): + if isinstance(item, six.text_type): + return item.encode('utf8') + return item + else: + def to_str(item): + if isinstance(item, six.binary_type): + return item.decode('utf8', 'surrogateescape') + return item + + return dict(((to_str(k), to_str(v)) for k, v in metadata.items())) + + def read_metadata(fd, add_missing_checksum=False): """ Helper function to read the pickled metadata from an object file. @@ -144,8 +164,8 @@ key = 0 try: while True: - metadata += xattr.getxattr(fd, '%s%s' % (METADATA_KEY, - (key or ''))) + metadata += xattr.getxattr( + fd, METADATA_KEY + str(key or '').encode('ascii')) key += 1 except (IOError, OSError) as e: if errno.errorcode.get(e.errno) in ('ENOTSUP', 'EOPNOTSUPP'): @@ -173,7 +193,7 @@ logging.error("Error adding metadata: %s" % e) if metadata_checksum: - computed_checksum = hashlib.md5(metadata).hexdigest() + computed_checksum = hashlib.md5(metadata).hexdigest().encode('ascii') if metadata_checksum != computed_checksum: raise DiskFileBadMetadataChecksum( "Metadata checksum mismatch for %s: " @@ -183,7 +203,11 @@ # strings are utf-8 encoded when written, but have not always been # (see https://bugs.launchpad.net/swift/+bug/1678018) so encode them again # when read - return _encode_metadata(pickle.loads(metadata)) + if six.PY2: + metadata = pickle.loads(metadata) + else: + metadata = pickle.loads(metadata, encoding='bytes') + return _decode_metadata(metadata) def write_metadata(fd, metadata, xattr_size=65536): @@ -194,11 +218,11 @@ :param metadata: metadata to write """ metastr = pickle.dumps(_encode_metadata(metadata), PICKLE_PROTOCOL) - metastr_md5 = hashlib.md5(metastr).hexdigest() + metastr_md5 = hashlib.md5(metastr).hexdigest().encode('ascii') key = 0 try: while metastr: - xattr.setxattr(fd, '%s%s' % (METADATA_KEY, key or ''), + xattr.setxattr(fd, METADATA_KEY + str(key or '').encode('ascii'), metastr[:xattr_size]) metastr = metastr[xattr_size:] key += 1 @@ -368,9 +392,10 @@ suffix = basename(suffix_dir) partition_dir = dirname(suffix_dir) invalidations_file = join(partition_dir, HASH_INVALIDATIONS_FILE) - with lock_path(partition_dir): - with open(invalidations_file, 'ab') as inv_fh: - inv_fh.write(suffix + "\n") + if not isinstance(suffix, bytes): + suffix = suffix.encode('utf-8') + with lock_path(partition_dir), open(invalidations_file, 'ab') as inv_fh: + inv_fh.write(suffix + b"\n") def relink_paths(target_path, new_target_path, check_existing=False): @@ -428,18 +453,20 @@ return str(self.path) -def object_audit_location_generator(devices, mount_check=True, logger=None, - device_dirs=None, auditor_type="ALL"): +def object_audit_location_generator(devices, datadir, mount_check=True, + logger=None, device_dirs=None, + auditor_type="ALL"): """ Given a devices path (e.g. "/srv/node"), yield an AuditLocation for all - objects stored under that directory if device_dirs isn't set. If - device_dirs is set, only yield AuditLocation for the objects under the - entries in device_dirs. The AuditLocation only knows the path to the hash - directory, not to the .data file therein (if any). This is to avoid a - double listdir(hash_dir); the DiskFile object will always do one, so - we don't. + objects stored under that directory for the given datadir (policy), + if device_dirs isn't set. If device_dirs is set, only yield AuditLocation + for the objects under the entries in device_dirs. The AuditLocation only + knows the path to the hash directory, not to the .data file therein + (if any). This is to avoid a double listdir(hash_dir); the DiskFile object + will always do one, so we don't. :param devices: parent directory of the devices to be audited + :param datadir: objects directory :param mount_check: flag to check if a mount check should be performed on devices :param logger: a logger object @@ -455,6 +482,7 @@ # randomize devices in case of process restart before sweep completed shuffle(device_dirs) + base, policy = split_policy_string(datadir) for device in device_dirs: if not check_drive(devices, device, mount_check): if logger: @@ -462,55 +490,37 @@ 'Skipping %s as it is not %s', device, 'mounted' if mount_check else 'a dir') continue - # loop through object dirs for all policies - device_dir = os.path.join(devices, device) - try: - dirs = os.listdir(device_dir) - except OSError as e: - if logger: - logger.debug( - _('Skipping %(dir)s: %(err)s') % {'dir': device_dir, - 'err': e.strerror}) + + datadir_path = os.path.join(devices, device, datadir) + if not os.path.exists(datadir_path): continue - for dir_ in dirs: - if not dir_.startswith(DATADIR_BASE): - continue - try: - base, policy = split_policy_string(dir_) - except PolicyError as e: - if logger: - logger.warning(_('Directory %(directory)r does not map ' - 'to a valid policy (%(error)s)') % { - 'directory': dir_, 'error': e}) - continue - datadir_path = os.path.join(devices, device, dir_) - partitions = get_auditor_status(datadir_path, logger, auditor_type) + partitions = get_auditor_status(datadir_path, logger, auditor_type) - for pos, partition in enumerate(partitions): - update_auditor_status(datadir_path, logger, - partitions[pos:], auditor_type) - part_path = os.path.join(datadir_path, partition) + for pos, partition in enumerate(partitions): + update_auditor_status(datadir_path, logger, + partitions[pos:], auditor_type) + part_path = os.path.join(datadir_path, partition) + try: + suffixes = listdir(part_path) + except OSError as e: + if e.errno != errno.ENOTDIR: + raise + continue + for asuffix in suffixes: + suff_path = os.path.join(part_path, asuffix) try: - suffixes = listdir(part_path) + hashes = listdir(suff_path) except OSError as e: if e.errno != errno.ENOTDIR: raise continue - for asuffix in suffixes: - suff_path = os.path.join(part_path, asuffix) - try: - hashes = listdir(suff_path) - except OSError as e: - if e.errno != errno.ENOTDIR: - raise - continue - for hsh in hashes: - hsh_path = os.path.join(suff_path, hsh) - yield AuditLocation(hsh_path, device, partition, - policy) + for hsh in hashes: + hsh_path = os.path.join(suff_path, hsh) + yield AuditLocation(hsh_path, device, partition, + policy) - update_auditor_status(datadir_path, logger, [], auditor_type) + update_auditor_status(datadir_path, logger, [], auditor_type) def get_auditor_status(datadir_path, logger, auditor_type): @@ -564,15 +574,13 @@ {'auditor_status': auditor_status, 'err': e}) -def clear_auditor_status(devices, auditor_type="ALL"): - for device in os.listdir(devices): - for dir_ in os.listdir(os.path.join(devices, device)): - if not dir_.startswith("objects"): - continue - datadir_path = os.path.join(devices, device, dir_) - auditor_status = os.path.join( - datadir_path, "auditor_status_%s.json" % auditor_type) - remove_file(auditor_status) +def clear_auditor_status(devices, datadir, auditor_type="ALL"): + device_dirs = listdir(devices) + for device in device_dirs: + datadir_path = os.path.join(devices, device, datadir) + auditor_status = os.path.join( + datadir_path, "auditor_status_%s.json" % auditor_type) + remove_file(auditor_status) def strip_self(f): @@ -608,10 +616,10 @@ self.policy_to_manager = {} for policy in POLICIES: manager_cls = self.policy_type_to_manager_cls[policy.policy_type] - self.policy_to_manager[policy] = manager_cls(*args, **kwargs) + self.policy_to_manager[int(policy)] = manager_cls(*args, **kwargs) def __getitem__(self, policy): - return self.policy_to_manager[policy] + return self.policy_to_manager[int(policy)] class BaseDiskFileManager(object): @@ -1315,15 +1323,22 @@ pipe_size=self.pipe_size, use_linkat=self.use_linkat, **kwargs) - def object_audit_location_generator(self, device_dirs=None, + def clear_auditor_status(self, policy, auditor_type="ALL"): + datadir = get_data_dir(policy) + clear_auditor_status(self.devices, datadir, auditor_type) + + def object_audit_location_generator(self, policy, device_dirs=None, auditor_type="ALL"): """ Yield an AuditLocation for all objects stored under device_dirs. + :param policy: the StoragePolicy instance :param device_dirs: directory of target device :param auditor_type: either ALL or ZBF """ - return object_audit_location_generator(self.devices, self.mount_check, + datadir = get_data_dir(policy) + return object_audit_location_generator(self.devices, datadir, + self.mount_check, self.logger, device_dirs, auditor_type) diff -Nru swift-2.17.0/swift/obj/expirer.py swift-2.18.0/swift/obj/expirer.py --- swift-2.17.0/swift/obj/expirer.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/expirer.py 2018-05-30 10:17:02.000000000 +0000 @@ -19,6 +19,7 @@ from time import time from os.path import join from swift import gettext_ as _ +from collections import defaultdict, deque import hashlib from eventlet import sleep, Timeout @@ -26,7 +27,8 @@ from swift.common.daemon import Daemon from swift.common.internal_client import InternalClient, UnexpectedResponse -from swift.common.utils import get_logger, dump_recon_cache, split_path +from swift.common.utils import get_logger, dump_recon_cache, split_path, \ + Timestamp from swift.common.http import HTTP_NOT_FOUND, HTTP_CONFLICT, \ HTTP_PRECONDITION_FAILED @@ -37,8 +39,8 @@ class ObjectExpirer(Daemon): """ - Daemon that queries the internal hidden expiring_objects_account to - discover objects that need to be deleted. + Daemon that queries the internal hidden task accounts to discover objects + that need to be deleted. :param conf: The daemon configuration. """ @@ -47,13 +49,9 @@ self.conf = conf self.logger = logger or get_logger(conf, log_route='object-expirer') self.interval = int(conf.get('interval') or 300) - self.expiring_objects_account = \ - (conf.get('auto_create_account_prefix') or '.') + \ - (conf.get('expiring_objects_account_name') or 'expiring_objects') - conf_path = conf.get('__file__') or '/etc/swift/object-expirer.conf' - request_tries = int(conf.get('request_tries') or 3) - self.swift = swift or InternalClient( - conf_path, 'Swift Object Expirer', request_tries) + + self.read_conf_for_queue_access(swift) + self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 @@ -63,13 +61,29 @@ self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") - self.processes = int(self.conf.get('processes', 0)) - self.process = int(self.conf.get('process', 0)) # This option defines how long an un-processable expired object # marker will be retried before it is abandoned. It is not coupled # with the tombstone reclaim age in the consistency engine. self.reclaim_age = int(conf.get('reclaim_age', 604800)) + def read_conf_for_queue_access(self, swift): + self.expiring_objects_account = \ + (self.conf.get('auto_create_account_prefix') or '.') + \ + (self.conf.get('expiring_objects_account_name') or + 'expiring_objects') + + # This is for common parameter with general task queue in future + self.task_container_prefix = '' + + self.ic_conf_path = \ + self.conf.get('__file__') or '/etc/swift/object-expirer.conf' + request_tries = int(self.conf.get('request_tries') or 3) + self.swift = swift or InternalClient( + self.ic_conf_path, 'Swift Object Expirer', request_tries) + + self.processes = int(self.conf.get('processes', 0)) + self.process = int(self.conf.get('process', 0)) + def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress @@ -93,65 +107,138 @@ 'time': elapsed, 'objects': self.report_objects}) self.report_last_time = time() - def iter_cont_objs_to_expire(self): + def parse_task_obj(self, task_obj): + """ + :param task_obj: a task object name in format of + "-/" + + "/" + :return: 4-tuples of (delete_at_time, target_account, target_container, + target_obj) """ - Yields (container, obj) tuples to be deleted + timestamp, target_path = task_obj.split('-', 1) + timestamp = Timestamp(timestamp) + target_account, target_container, target_obj = \ + split_path('/' + target_path, 3, 3, True) + return timestamp, target_account, target_container, target_obj + + def round_robin_order(self, task_iter): """ - obj_cache = {} + Change order of expiration tasks to avoid deleting objects in a + certain container continuously. + + :param task_iter: An iterator of delete-task dicts, which should each + have a ``target_path`` key. + """ + obj_cache = defaultdict(deque) cnt = 0 - all_containers = set() + def dump_obj_cache_in_round_robin(): + while obj_cache: + for key in sorted(obj_cache): + if obj_cache[key]: + yield obj_cache[key].popleft() + else: + del obj_cache[key] - for c in self.swift.iter_containers(self.expiring_objects_account): - container = str(c['name']) - timestamp = int(container) - if timestamp > int(time()): + for delete_task in task_iter: + try: + target_account, target_container, _junk = \ + split_path('/' + delete_task['target_path'], 3, 3, True) + cache_key = '%s/%s' % (target_account, target_container) + # sanity + except ValueError: + self.logger.error('Unexcepted error handling task %r' % + delete_task) + continue + + obj_cache[cache_key].append(delete_task) + cnt += 1 + + if cnt > MAX_OBJECTS_TO_CACHE: + for task in dump_obj_cache_in_round_robin(): + yield task + cnt = 0 + + for task in dump_obj_cache_in_round_robin(): + yield task + + def hash_mod(self, name, divisor): + """ + :param name: a task object name + :param divisor: a divisor number + :return: an integer to decide which expirer is assigned to the task + """ + # md5 is only used for shuffling mod + return int(hashlib.md5(name).hexdigest(), 16) % divisor + + def iter_task_accounts_to_expire(self): + """ + Yields (task_account, my_index, divisor). + my_index and divisor is used to assign task obj to only one + expirer. In expirer method, expirer calculates assigned index for each + expiration task. The assigned index is in [0, 1, ..., divisor - 1]. + Expirers have their own "my_index" for each task_account. Expirer whose + "my_index" is equal to the assigned index executes the task. Because + each expirer have different "my_index", task objects are executed by + only one expirer. + """ + if self.processes > 0: + yield self.expiring_objects_account, self.process, self.processes + else: + yield self.expiring_objects_account, 0, 1 + + def delete_at_time_of_task_container(self, task_container): + """ + get delete_at timestamp from task_container name + """ + # task_container name is timestamp + return Timestamp(task_container) + + def iter_task_containers_to_expire(self, task_account): + """ + Yields task_container names under the task_account if the delete at + timestamp of task_container is past. + """ + for c in self.swift.iter_containers(task_account, + prefix=self.task_container_prefix): + task_container = str(c['name']) + timestamp = self.delete_at_time_of_task_container(task_container) + if timestamp > Timestamp.now(): break - all_containers.add(container) - for o in self.swift.iter_objects(self.expiring_objects_account, - container): - obj = o['name'].encode('utf8') - timestamp, actual_obj = obj.split('-', 1) - timestamp = int(timestamp) - if timestamp > int(time()): - break + yield task_container + + def iter_task_to_expire(self, task_account_container_list, + my_index, divisor): + """ + Yields task expire info dict which consists of task_account, + task_container, task_object, timestamp_to_delete, and target_path + """ + for task_account, task_container in task_account_container_list: + for o in self.swift.iter_objects(task_account, task_container): + task_object = o['name'].encode('utf8') try: - cust_account, cust_cont, cust_obj = \ - split_path('/' + actual_obj, 3, 3, True) - cache_key = '%s/%s' % (cust_account, cust_cont) + delete_timestamp, target_account, target_container, \ + target_object = self.parse_task_obj(task_object) except ValueError: - cache_key = None + self.logger.exception('Unexcepted error handling task %r' % + task_object) + continue + if delete_timestamp > Timestamp.now(): + # we shouldn't yield the object that doesn't reach + # the expiration date yet. + break - if self.processes > 0: - obj_process = int( - hashlib.md5('%s/%s' % (container, obj)). - hexdigest(), 16) - if obj_process % self.processes != self.process: - continue - - if cache_key not in obj_cache: - obj_cache[cache_key] = [] - obj_cache[cache_key].append((container, obj)) - cnt += 1 - - if cnt > MAX_OBJECTS_TO_CACHE: - while obj_cache: - for key in obj_cache.keys(): - if obj_cache[key]: - yield obj_cache[key].pop() - cnt -= 1 - else: - del obj_cache[key] - - while obj_cache: - for key in obj_cache.keys(): - if obj_cache[key]: - yield obj_cache[key].pop() - else: - del obj_cache[key] + # Only one expirer daemon assigned for one task + if self.hash_mod('%s/%s' % (task_container, task_object), + divisor) != my_index: + continue - for container in all_containers: - yield (container, None) + yield {'task_account': task_account, + 'task_container': task_container, + 'task_object': task_object, + 'target_path': '/'.join([ + target_account, target_container, target_object]), + 'delete_timestamp': delete_timestamp} def run_once(self, *args, **kwargs): """ @@ -166,44 +253,59 @@ """ self.get_process_values(kwargs) pool = GreenPool(self.concurrency) - containers_to_delete = set([]) self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug('Run begin') - containers, objects = \ - self.swift.get_account_info(self.expiring_objects_account) - self.logger.info(_('Pass beginning; ' - '%(containers)s possible containers; ' - '%(objects)s possible objects') % { - 'containers': containers, 'objects': objects}) + task_account_container_list_to_delete = list() + for task_account, my_index, divisor in \ + self.iter_task_accounts_to_expire(): + container_count, obj_count = \ + self.swift.get_account_info(task_account) - for container, obj in self.iter_cont_objs_to_expire(): - containers_to_delete.add(container) - - if not obj: + # the task account is skipped if there are no task container + if not container_count: continue - timestamp, actual_obj = obj.split('-', 1) - timestamp = int(timestamp) - if timestamp > int(time()): - break - pool.spawn_n( - self.delete_object, actual_obj, timestamp, - container, obj) + self.logger.info(_( + 'Pass beginning for task account %(account)s; ' + '%(container_count)s possible containers; ' + '%(obj_count)s possible objects') % { + 'account': task_account, + 'container_count': container_count, + 'obj_count': obj_count}) + + task_account_container_list = \ + [(task_account, task_container) for task_container in + self.iter_task_containers_to_expire(task_account)] + + task_account_container_list_to_delete.extend( + task_account_container_list) + + # delete_task_iter is a generator to yield a dict of + # task_account, task_container, task_object, delete_timestamp, + # target_path to handle delete actual object and pop the task + # from the queue. + delete_task_iter = \ + self.round_robin_order(self.iter_task_to_expire( + task_account_container_list, my_index, divisor)) + + for delete_task in delete_task_iter: + pool.spawn_n(self.delete_object, **delete_task) pool.waitall() - for container in containers_to_delete: + for task_account, task_container in \ + task_account_container_list_to_delete: try: self.swift.delete_container( - self.expiring_objects_account, - container, + task_account, task_container, acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( - _('Exception while deleting container %(container)s ' - '%(err)s') % {'container': container, - 'err': str(err)}) + _('Exception while deleting container %(account)s ' + '%(container)s %(err)s') % { + 'account': task_account, + 'container': task_container, 'err': str(err)}) self.logger.debug('Run end') self.report(final=True) except (Exception, Timeout): @@ -257,44 +359,47 @@ raise ValueError( 'process must be less than processes') - def delete_object(self, actual_obj, timestamp, container, obj): + def delete_object(self, target_path, delete_timestamp, + task_account, task_container, task_object): start_time = time() try: try: - self.delete_actual_object(actual_obj, timestamp) + self.delete_actual_object(target_path, delete_timestamp) except UnexpectedResponse as err: if err.resp.status_int not in {HTTP_NOT_FOUND, HTTP_PRECONDITION_FAILED}: raise - if float(timestamp) > time() - self.reclaim_age: + if float(delete_timestamp) > time() - self.reclaim_age: # we'll have to retry the DELETE later raise - self.pop_queue(container, obj) + self.pop_queue(task_account, task_container, task_object) self.report_objects += 1 self.logger.increment('objects') except UnexpectedResponse as err: self.logger.increment('errors') self.logger.error( - 'Unexpected response while deleting object %(container)s ' - '%(obj)s: %(err)s' % {'container': container, 'obj': obj, - 'err': str(err.resp.status_int)}) + 'Unexpected response while deleting object ' + '%(account)s %(container)s %(obj)s: %(err)s' % { + 'account': task_account, 'container': task_container, + 'obj': task_object, 'err': str(err.resp.status_int)}) + self.logger.debug(err.resp.body) except (Exception, Timeout) as err: self.logger.increment('errors') self.logger.exception( - 'Exception while deleting object %(container)s %(obj)s' - ' %(err)s' % {'container': container, - 'obj': obj, 'err': str(err)}) + 'Exception while deleting object %(account)s %(container)s ' + '%(obj)s %(err)s' % { + 'account': task_account, 'container': task_container, + 'obj': task_object, 'err': str(err)}) self.logger.timing_since('timing', start_time) self.report() - def pop_queue(self, container, obj): + def pop_queue(self, task_account, task_container, task_object): """ - Issue a delete object request to the container for the expiring object - queue entry. + Issue a delete object request to the task_container for the expiring + object queue entry. """ - direct_delete_container_entry(self.swift.container_ring, - self.expiring_objects_account, - container, obj) + direct_delete_container_entry(self.swift.container_ring, task_account, + task_container, task_object) def delete_actual_object(self, actual_obj, timestamp): """ @@ -304,15 +409,16 @@ :param actual_obj: The name of the end-user object to delete: '//' - :param timestamp: The timestamp the X-Delete-At value must match to - perform the actual delete. + :param timestamp: The swift.common.utils.Timestamp instance the + X-Delete-At value must match to perform the actual + delete. :raises UnexpectedResponse: if the delete was unsuccessful and should be retried later """ path = '/v1/' + urllib.parse.quote(actual_obj.lstrip('/')) self.swift.make_request( 'DELETE', path, - {'X-If-Delete-At': str(timestamp), - 'X-Timestamp': str(timestamp), + {'X-If-Delete-At': timestamp.normal, + 'X-Timestamp': timestamp.normal, 'X-Backend-Clean-Expiring-Object-Queue': 'no'}, (2, HTTP_CONFLICT)) diff -Nru swift-2.17.0/swift/obj/reconstructor.py swift-2.18.0/swift/obj/reconstructor.py --- swift-2.17.0/swift/obj/reconstructor.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/reconstructor.py 2018-05-30 10:17:02.000000000 +0000 @@ -15,7 +15,6 @@ import json import errno -import math import os from os.path import join import random @@ -32,8 +31,9 @@ from swift import gettext_ as _ from swift.common.utils import ( whataremyips, unlink_older_than, compute_eta, get_logger, - dump_recon_cache, mkdirs, config_true_value, list_from_csv, - tpool_reraise, GreenAsyncPile, Timestamp, remove_file) + dump_recon_cache, mkdirs, config_true_value, + tpool_reraise, GreenAsyncPile, Timestamp, remove_file, + load_recon_cache, parse_override_options, distribute_evenly) from swift.common.header_key_dict import HeaderKeyDict from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon @@ -90,30 +90,6 @@ } -def parse_override_options(**kwargs): - """ - Return a dict with keys `override_devices` and `override_partitions` whose - values have been parsed from `kwargs`. If either key is found in `kwargs` - then copy its value from kwargs. Otherwise, if `once` is set in `kwargs` - then parse `devices` and `partitions` keys for the value of - `override_devices` and `override_partitions` respectively. - - :return: a dict with keys `override_devices` and `override_partitions` - """ - if kwargs.get('once', False): - devices = list_from_csv(kwargs.get('devices')) - partitions = [ - int(p) for p in list_from_csv(kwargs.get('partitions'))] - else: - devices = [] - partitions = [] - - return { - 'override_devices': kwargs.get('override_devices', devices), - 'override_partitions': kwargs.get('override_partitions', partitions), - } - - class RebuildingECDiskFileStream(object): """ This class wraps the reconstructed fragment archive data and @@ -236,29 +212,29 @@ """ if self.reconstructor_workers < 1: return - override_options = parse_override_options(once=once, **kwargs) + override_opts = parse_override_options(once=once, **kwargs) # Note that this get re-used when dumping stats and in is_healthy self.all_local_devices = self.get_local_devices() - if override_options['override_devices']: - devices = [d for d in override_options['override_devices'] + if override_opts.devices: + devices = [d for d in override_opts.devices if d in self.all_local_devices] else: devices = list(self.all_local_devices) if not devices: # we only need a single worker to do nothing until a ring change - yield dict(override_options) + yield dict(override_devices=override_opts.devices, + override_partitions=override_opts.partitions) return - # for somewhat uniform load per worker use same max_devices_per_worker - # when handling all devices or just override devices... - max_devices_per_worker = int(math.ceil( - 1.0 * len(self.all_local_devices) / self.reconstructor_workers)) - # ...but only use enough workers for the actual devices being handled - n = int(math.ceil(1.0 * len(devices) / max_devices_per_worker)) - override_devices_per_worker = [devices[i::n] for i in range(n)] - for override_devices in override_devices_per_worker: - yield dict(override_options, override_devices=override_devices) + # for somewhat uniform load per worker use same + # max_devices_per_worker when handling all devices or just override + # devices, but only use enough workers for the actual devices being + # handled + n_workers = min(self.reconstructor_workers, len(devices)) + for ods in distribute_evenly(devices, n_workers): + yield dict(override_partitions=override_opts.partitions, + override_devices=ods) def is_healthy(self): """ @@ -276,14 +252,7 @@ """ Aggregate per-disk rcache updates from child workers. """ - try: - with open(self.rcache) as f: - existing_data = json.load(f) - except IOError as e: - if e.errno != errno.ENOENT: - raise - # dump_recon_cache will create new file and dirs - existing_data = {} + existing_data = load_recon_cache(self.rcache) first_start = time.time() last_finish = 0 all_devices_reporting = True @@ -1247,18 +1216,20 @@ def run_once(self, *args, **kwargs): start = time.time() self.logger.info(_("Running object reconstructor in script mode.")) - override_options = parse_override_options(once=True, **kwargs) - self.reconstruct(**override_options) + override_opts = parse_override_options(once=True, **kwargs) + self.reconstruct(override_devices=override_opts.devices, + override_partitions=override_opts.partitions) total = (time.time() - start) / 60 self.logger.info( _("Object reconstruction complete (once). (%.02f minutes)"), total) # Only dump stats if they would actually be meaningful -- i.e. we're # collecting per-disk stats and covering all partitions, or we're # covering all partitions, all disks. - if not override_options['override_partitions'] and ( - self.reconstructor_workers > 0 or - not override_options['override_devices']): - self.final_recon_dump(total, **override_options) + if not override_opts.partitions and ( + self.reconstructor_workers > 0 or not override_opts.devices): + self.final_recon_dump( + total, override_devices=override_opts.devices, + override_partitions=override_opts.partitions) def run_forever(self, *args, **kwargs): self.logger.info(_("Starting object reconstructor in daemon mode.")) @@ -1266,13 +1237,16 @@ while True: start = time.time() self.logger.info(_("Starting object reconstruction pass.")) - override_options = parse_override_options(**kwargs) + override_opts = parse_override_options(**kwargs) # Run the reconstructor - self.reconstruct(**override_options) + self.reconstruct(override_devices=override_opts.devices, + override_partitions=override_opts.partitions) total = (time.time() - start) / 60 self.logger.info( _("Object reconstruction complete. (%.02f minutes)"), total) - self.final_recon_dump(total, **override_options) + self.final_recon_dump( + total, override_devices=override_opts.devices, + override_partitions=override_opts.partitions) self.logger.debug('reconstruction sleeping for %s seconds.', self.interval) sleep(self.interval) diff -Nru swift-2.17.0/swift/obj/replicator.py swift-2.18.0/swift/obj/replicator.py --- swift-2.17.0/swift/obj/replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import defaultdict import os import errno from os.path import isdir, isfile, join, dirname @@ -25,16 +26,17 @@ from swift import gettext_ as _ import eventlet -from eventlet import GreenPool, tpool, Timeout, sleep +from eventlet import GreenPool, queue, tpool, Timeout, sleep from eventlet.green import subprocess -from eventlet.support.greenlets import GreenletExit from swift.common.constraints import check_drive from swift.common.ring.utils import is_local_device from swift.common.utils import whataremyips, unlink_older_than, \ compute_eta, get_logger, dump_recon_cache, \ - rsync_module_interpolation, mkdirs, config_true_value, list_from_csv, \ - tpool_reraise, config_auto_int_value, storage_directory + rsync_module_interpolation, mkdirs, config_true_value, \ + tpool_reraise, config_auto_int_value, storage_directory, \ + load_recon_cache, PrefixLoggerAdapter, parse_override_options, \ + distribute_evenly from swift.common.bufferedhttp import http_connect from swift.common.daemon import Daemon from swift.common.http import HTTP_OK, HTTP_INSUFFICIENT_STORAGE @@ -49,6 +51,68 @@ return (((partition + replication_cycle) % 10) == 0) +class Stats(object): + fields = ['attempted', 'failure', 'hashmatch', 'remove', 'rsync', + 'success', 'suffix_count', 'suffix_hash', 'suffix_sync', + 'failure_nodes'] + + @classmethod + def from_recon(cls, dct): + return cls(**{k: v for k, v in dct.items() if k in cls.fields}) + + def to_recon(self): + return {k: getattr(self, k) for k in self.fields} + + def __init__(self, attempted=0, failure=0, hashmatch=0, remove=0, rsync=0, + success=0, suffix_count=0, suffix_hash=0, + suffix_sync=0, failure_nodes=None): + self.attempted = attempted + self.failure = failure + self.hashmatch = hashmatch + self.remove = remove + self.rsync = rsync + self.success = success + self.suffix_count = suffix_count + self.suffix_hash = suffix_hash + self.suffix_sync = suffix_sync + self.failure_nodes = defaultdict(lambda: defaultdict(int), + (failure_nodes or {})) + + def __add__(self, other): + total = type(self)() + total.attempted = self.attempted + other.attempted + total.failure = self.failure + other.failure + total.hashmatch = self.hashmatch + other.hashmatch + total.remove = self.remove + other.remove + total.rsync = self.rsync + other.rsync + total.success = self.success + other.success + total.suffix_count = self.suffix_count + other.suffix_count + total.suffix_hash = self.suffix_hash + other.suffix_hash + total.suffix_sync = self.suffix_sync + other.suffix_sync + + all_failed_ips = (set(self.failure_nodes.keys() + + other.failure_nodes.keys())) + for ip in all_failed_ips: + self_devs = self.failure_nodes.get(ip, {}) + other_devs = other.failure_nodes.get(ip, {}) + this_ip_failures = {} + for dev in set(self_devs.keys() + other_devs.keys()): + this_ip_failures[dev] = ( + self_devs.get(dev, 0) + other_devs.get(dev, 0)) + total.failure_nodes[ip] = this_ip_failures + return total + + def add_failure_stats(self, failures): + """ + Note the failure of one or more devices. + + :param failures: a list of (ip, device-name) pairs that failed + """ + self.failure += len(failures) + for ip, device in failures: + self.failure_nodes[ip][device] += 1 + + class ObjectReplicator(Daemon): """ Replicate objects. @@ -64,7 +128,8 @@ :param logger: logging object """ self.conf = conf - self.logger = logger or get_logger(conf, log_route='object-replicator') + self.logger = PrefixLoggerAdapter( + logger or get_logger(conf, log_route='object-replicator'), {}) self.devices_dir = conf.get('devices', '/srv/node') self.mount_check = config_true_value(conf.get('mount_check', 'true')) self.swift_dir = conf.get('swift_dir', '/etc/swift') @@ -73,6 +138,7 @@ self.port = None if self.servers_per_port else \ int(conf.get('bind_port', 6200)) self.concurrency = int(conf.get('concurrency', 1)) + self.replicator_workers = int(conf.get('replicator_workers', 0)) self.stats_interval = int(conf.get('stats_interval', '300')) self.ring_check_interval = int(conf.get('ring_check_interval', 15)) self.next_check = time.time() + self.ring_check_interval @@ -90,10 +156,10 @@ if not self.rsync_module: self.rsync_module = '{replication_ip}::object' self.http_timeout = int(conf.get('http_timeout', 60)) - self.lockup_timeout = int(conf.get('lockup_timeout', 1800)) self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = os.path.join(self.recon_cache_path, "object.recon") + self._next_rcache_update = time.time() + self.stats_interval self.conn_timeout = float(conf.get('conn_timeout', 0.5)) self.node_timeout = float(conf.get('node_timeout', 10)) self.sync_method = getattr(self, conf.get('sync_method') or 'rsync') @@ -112,20 +178,22 @@ 'operation, please disable handoffs_first and ' 'handoff_delete before the next ' 'normal rebalance') + self.is_multiprocess_worker = None self._df_router = DiskFileRouter(conf, self.logger) + self._child_process_reaper_queue = queue.LightQueue() def _zero_stats(self): - """Zero out the stats.""" - self.stats = {'attempted': 0, 'success': 0, 'failure': 0, - 'hashmatch': 0, 'rsync': 0, 'remove': 0, - 'start': time.time(), 'failure_nodes': {}} - - def _add_failure_stats(self, failure_devs_info): - for node, dev in failure_devs_info: - self.stats['failure'] += 1 - failure_devs = self.stats['failure_nodes'].setdefault(node, {}) - failure_devs.setdefault(dev, 0) - failure_devs[dev] += 1 + self.stats_for_dev = defaultdict(Stats) + + @property + def total_stats(self): + return sum(self.stats_for_dev.values(), Stats()) + + def _emplace_log_prefix(self, worker_index): + self.logger.set_prefix("[worker %d/%d pid=%d] " % ( + worker_index + 1, # use 1-based indexing for more readable logs + self.replicator_workers, + os.getpid())) def _get_my_replication_ips(self): my_replication_ips = set() @@ -138,6 +206,112 @@ my_replication_ips.add(local_dev['replication_ip']) return list(my_replication_ips) + def _child_process_reaper(self): + """ + Consume processes from self._child_process_reaper_queue and wait() for + them + """ + procs = set() + done = False + while not done: + timeout = 60 if procs else None + try: + new_proc = self._child_process_reaper_queue.get( + timeout=timeout) + if new_proc is not None: + procs.add(new_proc) + else: + done = True + except queue.Empty: + pass + + reaped_procs = set() + for proc in procs: + try: + # this will reap the process if it has exited, but + # otherwise will not wait + proc.wait(timeout=0) + reaped_procs.add(proc) + except subprocess.TimeoutExpired: + pass + procs -= reaped_procs + + def get_worker_args(self, once=False, **kwargs): + if self.replicator_workers < 1: + return [] + + override_opts = parse_override_options(once=once, **kwargs) + have_overrides = bool(override_opts.devices or override_opts.partitions + or override_opts.policies) + + # save this off for ring-change detection later in is_healthy() + self.all_local_devices = self.get_local_devices() + + if override_opts.devices: + devices_to_replicate = [ + d for d in override_opts.devices + if d in self.all_local_devices] + else: + # The sort isn't strictly necessary since we're just trying to + # spread devices around evenly, but it makes testing easier. + devices_to_replicate = sorted(self.all_local_devices) + + # Distribute devices among workers as evenly as possible + self.replicator_workers = min(self.replicator_workers, + len(devices_to_replicate)) + return [{'override_devices': devs, + 'override_partitions': override_opts.partitions, + 'override_policies': override_opts.policies, + 'have_overrides': have_overrides, + 'multiprocess_worker_index': index} + for index, devs in enumerate( + distribute_evenly(devices_to_replicate, + self.replicator_workers))] + + def is_healthy(self): + """ + Check whether our set of local devices remains the same. + + If devices have been added or removed, then we return False here so + that we can kill off any worker processes and then distribute the + new set of local devices across a new set of workers so that all + devices are, once again, being worked on. + + This function may also cause recon stats to be updated. + + :returns: False if any local devices have been added or removed, + True otherwise + """ + # We update recon here because this is the only function we have in + # a multiprocess replicator that gets called periodically in the + # parent process. + if time.time() >= self._next_rcache_update: + update = self.aggregate_recon_update() + dump_recon_cache(update, self.rcache, self.logger) + return self.get_local_devices() == self.all_local_devices + + def get_local_devices(self): + """ + Returns a set of all local devices in all replication-type storage + policies. + + This is the device names, e.g. "sdq" or "d1234" or something, not + the full ring entries. + """ + ips = whataremyips(self.bind_ip) + local_devices = set() + for policy in POLICIES: + if policy.policy_type != REPL_POLICY: + continue + self.load_object_ring(policy) + for device in policy.object_ring.devs: + if device and is_local_device( + ips, self.port, + device['replication_ip'], + device['replication_port']): + local_devices.add(device['device']) + return local_devices + # Just exists for doc anchor point def sync(self, node, job, suffixes, *args, **kwargs): """ @@ -162,6 +336,20 @@ policy.load_ring(self.swift_dir) return policy.object_ring + def _limit_rsync_log(self, line): + """ + If rsync_error_log_line_length is defined then + limit the error to that length + + :param line: rsync log line + :return: If enabled the line limited to rsync_error_log_line_length + otherwise the initial line. + """ + if self.rsync_error_log_line_length: + return line[:self.rsync_error_log_line_length] + + return line + def _rsync(self, args): """ Execute the rsync binary to replicate a partition. @@ -169,7 +357,8 @@ :returns: return code of rsync process. 0 is successful """ start_time = time.time() - ret_val = None + proc = None + try: with Timeout(self.rsync_timeout): proc = subprocess.Popen(args, @@ -178,9 +367,29 @@ results = proc.stdout.read() ret_val = proc.wait() except Timeout: - self.logger.error(_("Killing long-running rsync: %s"), str(args)) - proc.kill() + self.logger.error( + self._limit_rsync_log( + _("Killing long-running rsync: %s") % str(args))) + if proc: + proc.kill() + try: + # Note: Python 2.7's subprocess.Popen class doesn't take + # any arguments for wait(), but Python 3's does. + # However, Eventlet's replacement Popen takes a timeout + # argument regardless of Python version, so we don't + # need any conditional code here. + proc.wait(timeout=1.0) + except subprocess.TimeoutExpired: + # Sometimes a process won't die immediately even after a + # SIGKILL. This can be due to failing disks, high load, + # or other reasons. We can't wait for it forever since + # we're taking up a slot in the (green)thread pool, so + # we send it over to another greenthread, not part of + # our pool, whose sole duty is to wait for child + # processes to exit. + self._child_process_reaper_queue.put(proc) return 1 # failure response code + total_time = time.time() - start_time for result in results.split('\n'): if result == '': @@ -192,11 +401,10 @@ else: self.logger.error(result) if ret_val: - error_line = _('Bad rsync return code: %(ret)d <- %(args)s') % \ - {'args': str(args), 'ret': ret_val} - if self.rsync_error_log_line_length: - error_line = error_line[:self.rsync_error_log_line_length] - self.logger.error(error_line) + self.logger.error( + self._limit_rsync_log( + _('Bad rsync return code: %(ret)d <- %(args)s') % + {'args': str(args), 'ret': ret_val})) else: log_method = self.logger.info if results else self.logger.debug log_method( @@ -271,7 +479,9 @@ def tpool_get_suffixes(path): return [suff for suff in os.listdir(path) if len(suff) == 3 and isdir(join(path, suff))] - self.replication_count += 1 + + stats = self.stats_for_dev[job['device']] + stats.attempted += 1 self.logger.increment('partition.delete.count.%s' % (job['device'],)) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) @@ -285,7 +495,7 @@ delete_objs = None if suffixes: for node in job['nodes']: - self.stats['rsync'] += 1 + stats.rsync += 1 kwargs = {} if node['region'] in synced_remote_regions and \ self.conf.get('sync_method', 'rsync') == 'ssync': @@ -325,7 +535,7 @@ delete_handoff = len(responses) == len(job['nodes']) and \ all(responses) if delete_handoff: - self.stats['remove'] += 1 + stats.remove += 1 if (self.conf.get('sync_method', 'rsync') == 'ssync' and delete_objs is not None): self.logger.info(_("Removing %s objects"), @@ -350,12 +560,12 @@ handoff_partition_deleted = True except (Exception, Timeout): self.logger.exception(_("Error syncing handoff partition")) - self._add_failure_stats(failure_devs_info) + stats.add_failure_stats(failure_devs_info) finally: target_devs_info = set([(target_dev['replication_ip'], target_dev['device']) for target_dev in job['nodes']]) - self.stats['success'] += len(target_devs_info - failure_devs_info) + stats.success += len(target_devs_info - failure_devs_info) if not handoff_partition_deleted: self.handoffs_remaining += 1 self.partition_times.append(time.time() - begin) @@ -390,7 +600,8 @@ :param job: a dict containing info about the partition to be replicated """ - self.replication_count += 1 + stats = self.stats_for_dev[job['device']] + stats.attempted += 1 self.logger.increment('partition.update.count.%s' % (job['device'],)) headers = dict(self.default_headers) headers['X-Backend-Storage-Policy-Index'] = int(job['policy']) @@ -405,7 +616,7 @@ do_listdir=_do_listdir( int(job['partition']), self.replication_cycle)) - self.suffix_hash += hashed + stats.suffix_hash += hashed self.logger.update_stats('suffix.hashes', hashed) attempts_left = len(job['nodes']) synced_remote_regions = set() @@ -451,7 +662,7 @@ local_hash[suffix] != remote_hash.get(suffix, -1)] if not suffixes: - self.stats['hashmatch'] += 1 + stats.hashmatch += 1 continue hashed, recalc_hash = tpool_reraise( df_mgr._get_hashes, @@ -462,7 +673,7 @@ suffixes = [suffix for suffix in local_hash if local_hash[suffix] != remote_hash.get(suffix, -1)] - self.stats['rsync'] += 1 + stats.rsync += 1 success, _junk = self.sync(node, job, suffixes) with Timeout(self.http_timeout): conn = http_connect( @@ -477,14 +688,14 @@ # add only remote region when replicate succeeded if success and node['region'] != job['region']: synced_remote_regions.add(node['region']) - self.suffix_sync += len(suffixes) + stats.suffix_sync += len(suffixes) self.logger.update_stats('suffix.syncs', len(suffixes)) except (Exception, Timeout): failure_devs_info.add((node['replication_ip'], node['device'])) self.logger.exception(_("Error syncing with node: %s") % node) - self.suffix_count += len(local_hash) + stats.suffix_count += len(local_hash) except StopIteration: self.logger.error('Ran out of handoffs while replicating ' 'partition %s of policy %d', @@ -493,8 +704,8 @@ failure_devs_info.update(target_devs_info) self.logger.exception(_("Error syncing partition")) finally: - self._add_failure_stats(failure_devs_info) - self.stats['success'] += len(target_devs_info - failure_devs_info) + stats.add_failure_stats(failure_devs_info) + stats.success += len(target_devs_info - failure_devs_info) self.partition_times.append(time.time() - begin) self.logger.timing_since('partition.update.timing', begin) @@ -502,29 +713,35 @@ """ Logs various stats for the currently running replication pass. """ - if self.replication_count: + stats = self.total_stats + replication_count = stats.attempted + if replication_count > self.last_replication_count: + self.last_replication_count = replication_count elapsed = (time.time() - self.start) or 0.000001 - rate = self.replication_count / elapsed + rate = replication_count / elapsed self.logger.info( _("%(replicated)d/%(total)d (%(percentage).2f%%)" " partitions replicated in %(time).2fs (%(rate).2f/sec, " "%(remaining)s remaining)"), - {'replicated': self.replication_count, 'total': self.job_count, - 'percentage': self.replication_count * 100.0 / self.job_count, + {'replicated': replication_count, 'total': self.job_count, + 'percentage': replication_count * 100.0 / self.job_count, 'time': time.time() - self.start, 'rate': rate, 'remaining': '%d%s' % compute_eta(self.start, - self.replication_count, + replication_count, self.job_count)}) self.logger.info(_('%(success)s successes, %(failure)s failures') - % self.stats) + % dict(success=stats.success, + failure=stats.failure)) - if self.suffix_count: + if stats.suffix_count: self.logger.info( _("%(checked)d suffixes checked - " "%(hashed).2f%% hashed, %(synced).2f%% synced"), - {'checked': self.suffix_count, - 'hashed': (self.suffix_hash * 100.0) / self.suffix_count, - 'synced': (self.suffix_sync * 100.0) / self.suffix_count}) + {'checked': stats.suffix_count, + 'hashed': + (stats.suffix_hash * 100.0) / stats.suffix_count, + 'synced': + (stats.suffix_sync * 100.0) / stats.suffix_count}) self.partition_times.sort() self.logger.info( _("Partition times: max %(max).4fs, " @@ -538,14 +755,6 @@ _("Nothing replicated for %s seconds."), (time.time() - self.start)) - def kill_coros(self): - """Utility function that kills all coroutines currently running.""" - for coro in list(self.run_pool.coroutines_running): - try: - coro.kill(GreenletExit) - except GreenletExit: - pass - def heartbeat(self): """ Loop that runs in the background during replication. It periodically @@ -555,19 +764,6 @@ eventlet.sleep(self.stats_interval) self.stats_line() - def detect_lockups(self): - """ - In testing, the pool.waitall() call very occasionally failed to return. - This is an attempt to make sure the replicator finishes its replication - pass in some eventuality. - """ - while True: - eventlet.sleep(self.lockup_timeout) - if self.replication_count == self.last_replication_count: - self.logger.error(_("Lockup detected.. killing live coros.")) - self.kill_coros() - self.last_replication_count = self.replication_count - def build_replication_jobs(self, policy, ips, override_devices=None, override_partitions=None): """ @@ -592,8 +788,9 @@ found_local = True dev_path = check_drive(self.devices_dir, local_dev['device'], self.mount_check) + local_dev_stats = self.stats_for_dev[local_dev['device']] if not dev_path: - self._add_failure_stats( + local_dev_stats.add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs @@ -639,12 +836,12 @@ region=local_dev['region'])) except ValueError: if part_nodes: - self._add_failure_stats( + local_dev_stats.add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in nodes]) else: - self._add_failure_stats( + local_dev_stats.add_failure_stats( [(failure_dev['replication_ip'], failure_dev['device']) for failure_dev in policy.object_ring.devs @@ -688,7 +885,7 @@ if policy.policy_type == REPL_POLICY: if (override_policies is not None and - str(policy.idx) not in override_policies): + policy.idx not in override_policies): continue # ensure rings are loaded for policy self.load_object_ring(policy) @@ -703,14 +900,12 @@ return jobs def replicate(self, override_devices=None, override_partitions=None, - override_policies=None): + override_policies=None, start_time=None): """Run a replication pass""" - self.start = time.time() - self.suffix_count = 0 - self.suffix_sync = 0 - self.suffix_hash = 0 - self.replication_count = 0 - self.last_replication_count = -1 + if start_time is None: + start_time = time.time() + self.start = start_time + self.last_replication_count = 0 self.replication_cycle = (self.replication_cycle + 1) % 10 self.partition_times = [] self.my_replication_ips = self._get_my_replication_ips() @@ -718,28 +913,26 @@ self.handoffs_remaining = 0 stats = eventlet.spawn(self.heartbeat) - lockup_detector = eventlet.spawn(self.detect_lockups) eventlet.sleep() # Give spawns a cycle current_nodes = None + dev_stats = None + num_jobs = 0 try: self.run_pool = GreenPool(size=self.concurrency) jobs = self.collect_jobs(override_devices=override_devices, override_partitions=override_partitions, override_policies=override_policies) for job in jobs: + dev_stats = self.stats_for_dev[job['device']] + num_jobs += 1 current_nodes = job['nodes'] - if override_devices and job['device'] not in override_devices: - continue - if override_partitions and \ - job['partition'] not in override_partitions: - continue dev_path = check_drive(self.devices_dir, job['device'], self.mount_check) if not dev_path: - self._add_failure_stats([(failure_dev['replication_ip'], - failure_dev['device']) - for failure_dev in job['nodes']]) + dev_stats.add_failure_stats([ + (failure_dev['replication_ip'], failure_dev['device']) + for failure_dev in job['nodes']]) self.logger.warning(_('%s is not mounted'), job['device']) continue if self.handoffs_first and not job['delete']: @@ -772,71 +965,146 @@ else: self.run_pool.spawn(self.update, job) current_nodes = None - with Timeout(self.lockup_timeout): - self.run_pool.waitall() - except (Exception, Timeout): - if current_nodes: - self._add_failure_stats([(failure_dev['replication_ip'], - failure_dev['device']) - for failure_dev in current_nodes]) - else: - self._add_failure_stats(self.all_devs_info) - self.logger.exception(_("Exception in top-level replication loop")) - self.kill_coros() + self.run_pool.waitall() + except (Exception, Timeout) as err: + if dev_stats: + if current_nodes: + dev_stats.add_failure_stats( + [(failure_dev['replication_ip'], + failure_dev['device']) + for failure_dev in current_nodes]) + else: + dev_stats.add_failure_stats(self.all_devs_info) + self.logger.exception( + _("Exception in top-level replication loop: %s"), err) finally: stats.kill() - lockup_detector.kill() self.stats_line() - self.stats['attempted'] = self.replication_count - def run_once(self, *args, **kwargs): + def update_recon(self, total, end_time, override_devices): + # Called at the end of a replication pass to update recon stats. + if self.is_multiprocess_worker: + # If it weren't for the failure_nodes field, we could do this as + # a bunch of shared memory using multiprocessing.Value, which + # would be nice because it'd avoid dealing with existing data + # during an upgrade. + update = { + 'object_replication_per_disk': { + od: {'replication_stats': + self.stats_for_dev[od].to_recon(), + 'replication_time': total, + 'replication_last': end_time, + 'object_replication_time': total, + 'object_replication_last': end_time} + for od in override_devices}} + else: + update = {'replication_stats': self.total_stats.to_recon(), + 'replication_time': total, + 'replication_last': end_time, + 'object_replication_time': total, + 'object_replication_last': end_time} + dump_recon_cache(update, self.rcache, self.logger) + + def aggregate_recon_update(self): + per_disk_stats = load_recon_cache(self.rcache).get( + 'object_replication_per_disk', {}) + recon_update = {} + min_repl_last = float('inf') + min_repl_time = float('inf') + + # If every child has reported some stats, then aggregate things. + if all(ld in per_disk_stats for ld in self.all_local_devices): + aggregated = Stats() + for device_name, data in per_disk_stats.items(): + aggregated += Stats.from_recon(data['replication_stats']) + min_repl_time = min( + min_repl_time, data['object_replication_time']) + min_repl_last = min( + min_repl_last, data['object_replication_last']) + recon_update['replication_stats'] = aggregated.to_recon() + recon_update['replication_last'] = min_repl_last + recon_update['replication_time'] = min_repl_time + recon_update['object_replication_last'] = min_repl_last + recon_update['object_replication_time'] = min_repl_time + + # Clear out entries for old local devices that we no longer have + devices_to_remove = set(per_disk_stats) - set(self.all_local_devices) + if devices_to_remove: + recon_update['object_replication_per_disk'] = { + dtr: {} for dtr in devices_to_remove} + + return recon_update + + def run_once(self, multiprocess_worker_index=None, + have_overrides=False, *args, **kwargs): + if multiprocess_worker_index is not None: + self.is_multiprocess_worker = True + self._emplace_log_prefix(multiprocess_worker_index) + + rsync_reaper = eventlet.spawn(self._child_process_reaper) self._zero_stats() self.logger.info(_("Running object replicator in script mode.")) - override_devices = list_from_csv(kwargs.get('devices')) - override_partitions = list_from_csv(kwargs.get('partitions')) - override_policies = list_from_csv(kwargs.get('policies')) - if not override_devices: - override_devices = None - if not override_partitions: - override_partitions = None - if not override_policies: - override_policies = None + override_opts = parse_override_options(once=True, **kwargs) + devices = override_opts.devices or None + partitions = override_opts.partitions or None + policies = override_opts.policies or None + start_time = time.time() self.replicate( - override_devices=override_devices, - override_partitions=override_partitions, - override_policies=override_policies) - total = (time.time() - self.stats['start']) / 60 + override_devices=devices, + override_partitions=partitions, + override_policies=policies, + start_time=start_time) + end_time = time.time() + total = (end_time - start_time) / 60 self.logger.info( _("Object replication complete (once). (%.02f minutes)"), total) - if not (override_partitions or override_devices): - replication_last = time.time() - dump_recon_cache({'replication_stats': self.stats, - 'replication_time': total, - 'replication_last': replication_last, - 'object_replication_time': total, - 'object_replication_last': replication_last}, - self.rcache, self.logger) - def run_forever(self, *args, **kwargs): + # If we've been manually run on a subset of + # policies/devices/partitions, then our recon stats are not + # representative of how replication is doing, so we don't publish + # them. + if self.is_multiprocess_worker: + # The main process checked for overrides and determined that + # there were none + should_update_recon = not have_overrides + else: + # We are single-process, so update recon only if we worked on + # everything + should_update_recon = not (partitions or devices or policies) + if should_update_recon: + self.update_recon(total, end_time, devices) + + # Give rsync processes one last chance to exit, then bail out and + # let them be init's problem + self._child_process_reaper_queue.put(None) + rsync_reaper.wait() + + def run_forever(self, multiprocess_worker_index=None, + override_devices=None, *args, **kwargs): + if multiprocess_worker_index is not None: + self.is_multiprocess_worker = True + self._emplace_log_prefix(multiprocess_worker_index) self.logger.info(_("Starting object replicator in daemon mode.")) + eventlet.spawn_n(self._child_process_reaper) # Run the replicator continually while True: self._zero_stats() self.logger.info(_("Starting object replication pass.")) # Run the replicator - self.replicate() - total = (time.time() - self.stats['start']) / 60 + start = time.time() + self.replicate(override_devices=override_devices) + end = time.time() + total = (end - start) / 60 self.logger.info( _("Object replication complete. (%.02f minutes)"), total) - replication_last = time.time() - dump_recon_cache({'replication_stats': self.stats, - 'replication_time': total, - 'replication_last': replication_last, - 'object_replication_time': total, - 'object_replication_last': replication_last}, - self.rcache, self.logger) + self.update_recon(total, end, override_devices) self.logger.debug('Replication sleeping for %s seconds.', self.interval) sleep(self.interval) + + def post_multiprocess_run(self): + # This method is called after run_once using multiple workers. + update = self.aggregate_recon_update() + dump_recon_cache(update, self.rcache, self.logger) diff -Nru swift-2.17.0/swift/obj/server.py swift-2.18.0/swift/obj/server.py --- swift-2.17.0/swift/obj/server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/server.py 2018-05-30 10:17:09.000000000 +0000 @@ -15,6 +15,7 @@ """ Object Server for Swift """ +import six import six.moves.cPickle as pickle import json import os @@ -34,7 +35,7 @@ normalize_delete_at_timestamp, get_log_line, Timestamp, \ get_expirer_container, parse_mime_headers, \ iter_multipart_mime_documents, extract_swift_bytes, safe_json_loads, \ - config_auto_int_value + config_auto_int_value, split_path, get_redirect_data from swift.common.bufferedhttp import http_connect from swift.common.constraints import check_object_creation, \ valid_timestamp, check_utf8 @@ -43,7 +44,7 @@ DiskFileDeviceUnavailable, DiskFileExpired, ChunkReadTimeout, \ ChunkReadError, DiskFileXattrNotSupported from swift.obj import ssync_receiver -from swift.common.http import is_success +from swift.common.http import is_success, HTTP_MOVED_PERMANENTLY from swift.common.base_storage_server import BaseStorageServer from swift.common.header_key_dict import HeaderKeyDict from swift.common.request_helpers import get_name_and_placement, \ @@ -140,6 +141,10 @@ x-delete-at, x-object-manifest, x-static-large-object, + cache-control, + content-language, + expires, + x-robots-tag ''' extra_allowed_headers = [ header.strip().lower() for header in conf.get( @@ -170,7 +175,9 @@ # disk_chunk_size parameter. However, it affects all created sockets # using this class so we have chosen to tie it to the # network_chunk_size parameter value instead. - socket._fileobject.default_bufsize = self.network_chunk_size + if six.PY2: + socket._fileobject.default_bufsize = self.network_chunk_size + # TODO: find a way to enable similar functionality in py3 # Provide further setup specific to an object server implementation. self.setup(conf) @@ -242,7 +249,7 @@ def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice, policy, - logger_thread_locals=None): + logger_thread_locals=None, container_path=None): """ Sends or saves an async update. @@ -260,11 +267,21 @@ :param logger_thread_locals: The thread local values to be set on the self.logger to retain transaction logging information. + :param container_path: optional path in the form `` + to which the update should be sent. If given this path will be used + instead of constructing a path from the ``account`` and + ``container`` params. """ if logger_thread_locals: self.logger.thread_locals = logger_thread_locals headers_out['user-agent'] = 'object-server %s' % os.getpid() - full_path = '/%s/%s/%s' % (account, container, obj) + if container_path: + # use explicitly specified container path + full_path = '/%s/%s' % (container_path, obj) + else: + full_path = '/%s/%s/%s' % (account, container, obj) + + redirect_data = None if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): @@ -274,15 +291,23 @@ with Timeout(self.node_timeout): response = conn.getresponse() response.read() - if is_success(response.status): - return - else: - self.logger.error(_( - 'ERROR Container update failed ' - '(saving for async update later): %(status)d ' - 'response from %(ip)s:%(port)s/%(dev)s'), - {'status': response.status, 'ip': ip, 'port': port, - 'dev': contdevice}) + if is_success(response.status): + return + + if response.status == HTTP_MOVED_PERMANENTLY: + try: + redirect_data = get_redirect_data(response) + except ValueError as err: + self.logger.error( + 'Container update failed for %r; problem with ' + 'redirect location: %s' % (obj, err)) + else: + self.logger.error(_( + 'ERROR Container update failed ' + '(saving for async update later): %(status)d ' + 'response from %(ip)s:%(port)s/%(dev)s'), + {'status': response.status, 'ip': ip, 'port': port, + 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' @@ -290,6 +315,13 @@ {'ip': ip, 'port': port, 'dev': contdevice}) data = {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out} + if redirect_data: + self.logger.debug( + 'Update to %(path)s redirected to %(redirect)s', + {'path': full_path, 'redirect': redirect_data[0]}) + container_path = redirect_data[0] + if container_path: + data['container_path'] = container_path timestamp = headers_out.get('x-meta-timestamp', headers_out.get('x-timestamp')) self._diskfile_router[policy].pickle_async_update( @@ -316,6 +348,7 @@ contdevices = [d.strip() for d in headers_in.get('X-Container-Device', '').split(',')] contpartition = headers_in.get('X-Container-Partition', '') + contpath = headers_in.get('X-Backend-Container-Path') if len(conthosts) != len(contdevices): # This shouldn't happen unless there's a bug in the proxy, @@ -328,6 +361,21 @@ 'devices': headers_in.get('X-Container-Device', '')}) return + if contpath: + try: + # TODO: this is very late in request handling to be validating + # a header - if we did *not* check and the header was bad + # presumably the update would fail and we would fall back to an + # async update to the root container, which might be best + # course of action rather than aborting update altogether? + split_path('/' + contpath, minsegs=2, maxsegs=2) + except ValueError: + self.logger.error( + "Invalid X-Backend-Container-Path, should be of the form " + "'account/container' but got %r." % contpath) + # fall back to updating root container + contpath = None + if contpartition: updates = zip(conthosts, contdevices) else: @@ -341,7 +389,8 @@ gt = spawn(self.async_update, op, account, container, obj, conthost, contpartition, contdevice, headers_out, objdevice, policy, - logger_thread_locals=self.logger.thread_locals) + logger_thread_locals=self.logger.thread_locals, + container_path=contpath) update_greenthreads.append(gt) # Wait a little bit to see if the container updates are successful. # If we immediately return after firing off the greenthread above, then @@ -1060,10 +1109,10 @@ else: response_class = HTTPConflict response_timestamp = max(orig_timestamp, req_timestamp) - orig_delete_at = int(orig_metadata.get('X-Delete-At') or 0) + orig_delete_at = Timestamp(orig_metadata.get('X-Delete-At') or 0) try: req_if_delete_at_val = request.headers['x-if-delete-at'] - req_if_delete_at = int(req_if_delete_at_val) + req_if_delete_at = Timestamp(req_if_delete_at_val) except KeyError: pass except ValueError: diff -Nru swift-2.17.0/swift/obj/updater.py swift-2.18.0/swift/obj/updater.py --- swift-2.17.0/swift/obj/updater.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/obj/updater.py 2018-05-30 10:17:02.000000000 +0000 @@ -28,12 +28,14 @@ from swift.common.exceptions import ConnectionTimeout from swift.common.ring import Ring from swift.common.utils import get_logger, renamer, write_pickle, \ - dump_recon_cache, config_true_value, ratelimit_sleep, eventlet_monkey_patch + dump_recon_cache, config_true_value, ratelimit_sleep, split_path, \ + eventlet_monkey_patch, get_redirect_data from swift.common.daemon import Daemon from swift.common.header_key_dict import HeaderKeyDict from swift.common.storage_policy import split_policy_string, PolicyError from swift.obj.diskfile import get_tmp_dir, ASYNCDIR_BASE -from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR +from swift.common.http import is_success, HTTP_INTERNAL_SERVER_ERROR, \ + HTTP_MOVED_PERMANENTLY class SweepStats(object): @@ -41,12 +43,13 @@ Stats bucket for an update sweep """ def __init__(self, errors=0, failures=0, quarantines=0, successes=0, - unlinks=0): + unlinks=0, redirects=0): self.errors = errors self.failures = failures self.quarantines = quarantines self.successes = successes self.unlinks = unlinks + self.redirects = redirects def copy(self): return type(self)(self.errors, self.failures, self.quarantines, @@ -57,7 +60,8 @@ self.failures - other.failures, self.quarantines - other.quarantines, self.successes - other.successes, - self.unlinks - other.unlinks) + self.unlinks - other.unlinks, + self.redirects - other.redirects) def reset(self): self.errors = 0 @@ -65,6 +69,7 @@ self.quarantines = 0 self.successes = 0 self.unlinks = 0 + self.redirects = 0 def __str__(self): keys = ( @@ -73,6 +78,7 @@ (self.quarantines, 'quarantines'), (self.unlinks, 'unlinks'), (self.errors, 'errors'), + (self.redirects, 'redirects'), ) return ', '.join('%d %s' % pair for pair in keys) @@ -279,7 +285,8 @@ 'in %(elapsed).02fs seconds:, ' '%(successes)d successes, %(failures)d failures, ' '%(quarantines)d quarantines, ' - '%(unlinks)d unlinks, %(errors)d errors ' + '%(unlinks)d unlinks, %(errors)d errors, ' + '%(redirects)d redirects ' '(pid: %(pid)d)'), {'device': device, 'elapsed': time.time() - start_time, @@ -288,7 +295,8 @@ 'failures': sweep_totals.failures, 'quarantines': sweep_totals.quarantines, 'unlinks': sweep_totals.unlinks, - 'errors': sweep_totals.errors}) + 'errors': sweep_totals.errors, + 'redirects': sweep_totals.redirects}) def process_object_update(self, update_path, device, policy): """ @@ -309,44 +317,83 @@ os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return - successes = update.get('successes', []) - part, nodes = self.get_container_ring().get_nodes( - update['account'], update['container']) - obj = '/%s/%s/%s' % \ - (update['account'], update['container'], update['obj']) - headers_out = HeaderKeyDict(update['headers']) - headers_out['user-agent'] = 'object-updater %s' % os.getpid() - headers_out.setdefault('X-Backend-Storage-Policy-Index', - str(int(policy))) - events = [spawn(self.object_update, - node, part, update['op'], obj, headers_out) - for node in nodes if node['id'] not in successes] - success = True - new_successes = False - for event in events: - event_success, node_id = event.wait() - if event_success is True: - successes.append(node_id) - new_successes = True + + def do_update(): + successes = update.get('successes', []) + headers_out = HeaderKeyDict(update['headers'].copy()) + headers_out['user-agent'] = 'object-updater %s' % os.getpid() + headers_out.setdefault('X-Backend-Storage-Policy-Index', + str(int(policy))) + headers_out.setdefault('X-Backend-Accept-Redirect', 'true') + container_path = update.get('container_path') + if container_path: + acct, cont = split_path('/' + container_path, minsegs=2) else: - success = False - if success: - self.stats.successes += 1 - self.logger.increment('successes') - self.logger.debug('Update sent for %(obj)s %(path)s', - {'obj': obj, 'path': update_path}) - self.stats.unlinks += 1 - self.logger.increment('unlinks') - os.unlink(update_path) - else: - self.stats.failures += 1 - self.logger.increment('failures') - self.logger.debug('Update failed for %(obj)s %(path)s', - {'obj': obj, 'path': update_path}) - if new_successes: - update['successes'] = successes - write_pickle(update, update_path, os.path.join( - device, get_tmp_dir(policy))) + acct, cont = update['account'], update['container'] + part, nodes = self.get_container_ring().get_nodes(acct, cont) + obj = '/%s/%s/%s' % (acct, cont, update['obj']) + events = [spawn(self.object_update, + node, part, update['op'], obj, headers_out) + for node in nodes if node['id'] not in successes] + success = True + new_successes = rewrite_pickle = False + redirect = None + redirects = set() + for event in events: + event_success, node_id, redirect = event.wait() + if event_success is True: + successes.append(node_id) + new_successes = True + else: + success = False + if redirect: + redirects.add(redirect) + + if success: + self.stats.successes += 1 + self.logger.increment('successes') + self.logger.debug('Update sent for %(obj)s %(path)s', + {'obj': obj, 'path': update_path}) + self.stats.unlinks += 1 + self.logger.increment('unlinks') + os.unlink(update_path) + elif redirects: + # erase any previous successes + update.pop('successes', None) + redirect = max(redirects, key=lambda x: x[-1])[0] + redirect_history = update.setdefault('redirect_history', []) + if redirect in redirect_history: + # force next update to be sent to root, reset history + update['container_path'] = None + update['redirect_history'] = [] + else: + update['container_path'] = redirect + redirect_history.append(redirect) + self.stats.redirects += 1 + self.logger.increment("redirects") + self.logger.debug( + 'Update redirected for %(obj)s %(path)s to %(shard)s', + {'obj': obj, 'path': update_path, + 'shard': update['container_path']}) + rewrite_pickle = True + else: + self.stats.failures += 1 + self.logger.increment('failures') + self.logger.debug('Update failed for %(obj)s %(path)s', + {'obj': obj, 'path': update_path}) + if new_successes: + update['successes'] = successes + rewrite_pickle = True + + return rewrite_pickle, redirect + + rewrite_pickle, redirect = do_update() + if redirect: + # make one immediate retry to the redirect location + rewrite_pickle, redirect = do_update() + if rewrite_pickle: + write_pickle(update, update_path, os.path.join( + device, get_tmp_dir(policy))) def object_update(self, node, part, op, obj, headers_out): """ @@ -357,7 +404,12 @@ :param op: operation performed (ex: 'PUT' or 'DELETE') :param obj: object name being updated :param headers_out: headers to send with the update + :return: a tuple of (``success``, ``node_id``, ``redirect``) + where ``success`` is True if the update succeeded, ``node_id`` is + the_id of the node updated and ``redirect`` is either None or a + tuple of (a path, a timestamp string). """ + redirect = None try: with ConnectionTimeout(self.conn_timeout): conn = http_connect(node['ip'], node['port'], node['device'], @@ -365,15 +417,24 @@ with Timeout(self.node_timeout): resp = conn.getresponse() resp.read() - success = is_success(resp.status) - if not success: - self.logger.debug( - _('Error code %(status)d is returned from remote ' - 'server %(ip)s: %(port)s / %(device)s'), - {'status': resp.status, 'ip': node['ip'], - 'port': node['port'], 'device': node['device']}) - return (success, node['id']) + + if resp.status == HTTP_MOVED_PERMANENTLY: + try: + redirect = get_redirect_data(resp) + except ValueError as err: + self.logger.error( + 'Container update failed for %r; problem with ' + 'redirect location: %s' % (obj, err)) + + success = is_success(resp.status) + if not success: + self.logger.debug( + _('Error code %(status)d is returned from remote ' + 'server %(ip)s: %(port)s / %(device)s'), + {'status': resp.status, 'ip': node['ip'], + 'port': node['port'], 'device': node['device']}) + return success, node['id'], redirect except (Exception, Timeout): self.logger.exception(_('ERROR with remote server ' '%(ip)s:%(port)s/%(device)s'), node) - return HTTP_INTERNAL_SERVER_ERROR, node['id'] + return HTTP_INTERNAL_SERVER_ERROR, node['id'], redirect diff -Nru swift-2.17.0/swift/proxy/controllers/base.py swift-2.18.0/swift/proxy/controllers/base.py --- swift-2.17.0/swift/proxy/controllers/base.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/proxy/controllers/base.py 2018-05-30 10:17:02.000000000 +0000 @@ -28,6 +28,7 @@ import os import time +import json import functools import inspect import itertools @@ -40,11 +41,11 @@ from eventlet.timeout import Timeout import six -from swift.common.wsgi import make_pre_authed_env +from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request from swift.common.utils import Timestamp, config_true_value, \ public, split_path, list_from_csv, GreenthreadSafeIterator, \ GreenAsyncPile, quorum_size, parse_content_type, \ - document_iters_to_http_response_body + document_iters_to_http_response_body, ShardRange from swift.common.bufferedhttp import http_connect from swift.common import constraints from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \ @@ -188,6 +189,7 @@ }, 'meta': meta, 'sysmeta': sysmeta, + 'sharding_state': headers.get('x-backend-sharding-state', 'unsharded'), } @@ -375,6 +377,9 @@ else: info[field] = int(info[field]) + if info.get('sharding_state') is None: + info['sharding_state'] = 'unsharded' + return info @@ -1994,3 +1999,91 @@ else: raise ValueError( "server_type can only be 'account' or 'container'") + + def _get_container_listing(self, req, account, container, headers=None, + params=None): + """ + Fetch container listing from given `account/container`. + + :param req: original Request instance. + :param account: account in which `container` is stored. + :param container: container from listing should be fetched. + :param headers: headers to be included with the request + :param params: query string parameters to be used. + :return: a tuple of (deserialized json data structure, swob Response) + """ + params = params or {} + version, _a, _c, _other = req.split_path(3, 4, True) + path = '/'.join(['', version, account, container]) + + subreq = make_pre_authed_request( + req.environ, method='GET', path=quote(path), headers=req.headers, + swift_source='SH') + if headers: + subreq.headers.update(headers) + subreq.params = params + self.app.logger.debug( + 'Get listing from %s %s' % (subreq.path_qs, headers)) + response = self.app.handle_request(subreq) + + if not is_success(response.status_int): + self.app.logger.warning( + 'Failed to get container listing from %s: %s', + subreq.path_qs, response.status_int) + return None, response + + try: + data = json.loads(response.body) + if not isinstance(data, list): + raise ValueError('not a list') + return data, response + except ValueError as err: + self.app.logger.error( + 'Problem with listing response from %s: %r', + subreq.path_qs, err) + return None, response + + def _get_shard_ranges(self, req, account, container, includes=None, + states=None): + """ + Fetch shard ranges from given `account/container`. If `includes` is + given then the shard range for that object name is requested, otherwise + all shard ranges are requested. + + :param req: original Request instance. + :param account: account from which shard ranges should be fetched. + :param container: container from which shard ranges should be fetched. + :param includes: (optional) restricts the list of fetched shard ranges + to those which include the given name. + :param states: (optional) the states of shard ranges to be fetched. + :return: a list of instances of :class:`swift.common.utils.ShardRange`, + or None if there was a problem fetching the shard ranges + """ + params = req.params.copy() + params.pop('limit', None) + params['format'] = 'json' + if includes: + params['includes'] = includes + if states: + params['states'] = states + headers = {'X-Backend-Record-Type': 'shard'} + listing, response = self._get_container_listing( + req, account, container, headers=headers, params=params) + if listing is None: + return None + + record_type = response.headers.get('x-backend-record-type') + if record_type != 'shard': + err = 'unexpected record type %r' % record_type + self.app.logger.error("Failed to get shard ranges from %s: %s", + req.path_qs, err) + return None + + try: + return [ShardRange.from_dict(shard_range) + for shard_range in listing] + except (ValueError, TypeError, KeyError) as err: + self.app.logger.error( + "Failed to get shard ranges from %s: invalid data: %r", + req.path_qs, err) + return None diff -Nru swift-2.17.0/swift/proxy/controllers/container.py swift-2.18.0/swift/proxy/controllers/container.py --- swift-2.17.0/swift/proxy/controllers/container.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/proxy/controllers/container.py 2018-05-30 10:17:02.000000000 +0000 @@ -14,11 +14,14 @@ # limitations under the License. from swift import gettext_ as _ +import json from six.moves.urllib.parse import unquote -from swift.common.utils import public, csv_append, Timestamp -from swift.common.constraints import check_metadata +from swift.common.utils import public, csv_append, Timestamp, \ + config_true_value, ShardRange +from swift.common.constraints import check_metadata, CONTAINER_LISTING_LIMIT from swift.common.http import HTTP_ACCEPTED, is_success +from swift.common.request_helpers import get_sys_meta_prefix from swift.proxy.controllers.base import Controller, delay_denial, \ cors_validation, set_info_cache, clear_info_cache from swift.common.storage_policy import POLICIES @@ -84,7 +87,9 @@ def GETorHEAD(self, req): """Handler for HTTP GET/HEAD requests.""" ai = self.account_info(self.account_name, req) - if not ai[1]: + auto_account = self.account_name.startswith( + self.app.auto_create_account_prefix) + if not (auto_account or ai[1]): if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: @@ -101,10 +106,20 @@ node_iter = self.app.iter_nodes(self.app.container_ring, part) params = req.params params['format'] = 'json' + record_type = req.headers.get('X-Backend-Record-Type', '').lower() + if not record_type: + record_type = 'auto' + req.headers['X-Backend-Record-Type'] = 'auto' + params['states'] = 'listing' req.params = params resp = self.GETorHEAD_base( req, _('Container'), node_iter, part, req.swift_entity_path, concurrency) + resp_record_type = resp.headers.get('X-Backend-Record-Type', '') + if all((req.method == "GET", record_type == 'auto', + resp_record_type.lower() == 'shard')): + resp = self._get_from_shards(req, resp) + # Cache this. We just made a request to a storage node and got # up-to-date information for the container. resp.headers['X-Backend-Recheck-Container-Existence'] = str( @@ -122,6 +137,102 @@ for key in self.app.swift_owner_headers: if key in resp.headers: del resp.headers[key] + # Expose sharding state in reseller requests + if req.environ.get('reseller_request', False): + resp.headers['X-Container-Sharding'] = config_true_value( + resp.headers.get(get_sys_meta_prefix('container') + 'Sharding', + 'False')) + return resp + + def _get_from_shards(self, req, resp): + # construct listing using shards described by the response body + shard_ranges = [ShardRange.from_dict(data) + for data in json.loads(resp.body)] + self.app.logger.debug('GET listing from %s shards for: %s', + len(shard_ranges), req.path_qs) + if not shard_ranges: + # can't find ranges or there was a problem getting the ranges. So + # return what we have. + return resp + + objects = [] + req_limit = int(req.params.get('limit', CONTAINER_LISTING_LIMIT)) + params = req.params.copy() + params.pop('states', None) + req.headers.pop('X-Backend-Record-Type', None) + reverse = config_true_value(params.get('reverse')) + marker = params.get('marker') + end_marker = params.get('end_marker') + + limit = req_limit + for shard_range in shard_ranges: + params['limit'] = limit + # Always set marker to ensure that object names less than or equal + # to those already in the listing are not fetched; if the listing + # is empty then the original request marker, if any, is used. This + # allows misplaced objects below the expected shard range to be + # included in the listing. + if objects: + last_name = objects[-1].get('name', + objects[-1].get('subdir', u'')) + params['marker'] = last_name.encode('utf-8') + elif marker: + params['marker'] = marker + else: + params['marker'] = '' + # Always set end_marker to ensure that misplaced objects beyond the + # expected shard range are not fetched. This prevents a misplaced + # object obscuring correctly placed objects in the next shard + # range. + if end_marker and end_marker in shard_range: + params['end_marker'] = end_marker + elif reverse: + params['end_marker'] = shard_range.lower_str + else: + params['end_marker'] = shard_range.end_marker + + if (shard_range.account == self.account_name and + shard_range.container == self.container_name): + # directed back to same container - force GET of objects + headers = {'X-Backend-Record-Type': 'object'} + else: + headers = None + self.app.logger.debug('Getting from %s %s with %s', + shard_range, shard_range.name, headers) + objs, shard_resp = self._get_container_listing( + req, shard_range.account, shard_range.container, + headers=headers, params=params) + + if not objs: + # tolerate errors or empty shard containers + continue + + objects.extend(objs) + limit -= len(objs) + + if limit <= 0: + break + elif (end_marker and reverse and + end_marker >= objects[-1]['name'].encode('utf-8')): + break + elif (end_marker and not reverse and + end_marker <= objects[-1]['name'].encode('utf-8')): + break + + resp.body = json.dumps(objects) + constrained = any(req.params.get(constraint) for constraint in ( + 'marker', 'end_marker', 'path', 'prefix', 'delimiter')) + if not constrained and len(objects) < req_limit: + self.app.logger.debug('Setting object count to %s' % len(objects)) + # prefer the actual listing stats over the potentially outdated + # root stats. This condition is only likely when a sharded + # container is shrinking or in tests; typically a sharded container + # will have more than CONTAINER_LISTING_LIMIT objects so any + # unconstrained listing will be capped by the limit and total + # object stats cannot therefore be inferred from the listing. + resp.headers['X-Container-Object-Count'] = len(objects) + resp.headers['X-Container-Bytes-Used'] = sum( + [o['bytes'] for o in objects]) return resp @public @@ -150,6 +261,10 @@ if not req.environ.get('swift_owner'): for key in self.app.swift_owner_headers: req.headers.pop(key, None) + if req.environ.get('reseller_request', False) and \ + 'X-Container-Sharding' in req.headers: + req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \ + str(config_true_value(req.headers['X-Container-Sharding'])) length_limit = self.get_name_length_limit() if len(self.container_name) > length_limit: resp = HTTPBadRequest(request=req) @@ -198,6 +313,10 @@ if not req.environ.get('swift_owner'): for key in self.app.swift_owner_headers: req.headers.pop(key, None) + if req.environ.get('reseller_request', False) and \ + 'X-Container-Sharding' in req.headers: + req.headers[get_sys_meta_prefix('container') + 'Sharding'] = \ + str(config_true_value(req.headers['X-Container-Sharding'])) account_partition, accounts, container_count = \ self.account_info(self.account_name, req) if not accounts: diff -Nru swift-2.17.0/swift/proxy/controllers/obj.py swift-2.18.0/swift/proxy/controllers/obj.py --- swift-2.17.0/swift/proxy/controllers/obj.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/proxy/controllers/obj.py 2018-05-30 10:17:02.000000000 +0000 @@ -150,11 +150,11 @@ def __init__(self): self.policy_to_controller_cls = {} for policy in POLICIES: - self.policy_to_controller_cls[policy] = \ + self.policy_to_controller_cls[int(policy)] = \ self.policy_type_to_controller_map[policy.policy_type] def __getitem__(self, policy): - return self.policy_to_controller_cls[policy] + return self.policy_to_controller_cls[int(policy)] class BaseObjectController(Controller): @@ -266,6 +266,20 @@ """Handler for HTTP HEAD requests.""" return self.GETorHEAD(req) + def _get_update_target(self, req, container_info): + # find the sharded container to which we'll send the update + db_state = container_info.get('sharding_state', 'unsharded') + if db_state in ('sharded', 'sharding'): + shard_ranges = self._get_shard_ranges( + req, self.account_name, self.container_name, + includes=self.object_name, states='updating') + if shard_ranges: + partition, nodes = self.app.container_ring.get_nodes( + shard_ranges[0].account, shard_ranges[0].container) + return partition, nodes, shard_ranges[0].name + + return container_info['partition'], container_info['nodes'], None + @public @cors_validation @delay_denial @@ -273,8 +287,8 @@ """HTTP POST request handler.""" container_info = self.container_info( self.account_name, self.container_name, req) - container_partition = container_info['partition'] - container_nodes = container_info['nodes'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) req.acl = container_info['write_acl'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) @@ -304,13 +318,14 @@ headers = self._backend_requests( req, len(nodes), container_partition, container_nodes, - delete_at_container, delete_at_part, delete_at_nodes) + delete_at_container, delete_at_part, delete_at_nodes, + container_path=container_path) return self._post_object(req, obj_ring, partition, headers) def _backend_requests(self, req, n_outgoing, container_partition, containers, delete_at_container=None, delete_at_partition=None, - delete_at_nodes=None): + delete_at_nodes=None, container_path=None): policy_index = req.headers['X-Backend-Storage-Policy-Index'] policy = POLICIES.get_by_index(policy_index) headers = [self.generate_request_headers(req, additional=req.headers) @@ -324,6 +339,8 @@ headers[index]['X-Container-Device'] = csv_append( headers[index].get('X-Container-Device'), container['device']) + if container_path: + headers[index]['X-Backend-Container-Path'] = container_path def set_delete_at_headers(index, delete_at_node): headers[index]['X-Delete-At-Container'] = delete_at_container @@ -752,8 +769,8 @@ policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) - container_nodes = container_info['nodes'] - container_partition = container_info['partition'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) partition, nodes = obj_ring.get_nodes( self.account_name, self.container_name, self.object_name) @@ -800,7 +817,8 @@ # add special headers to be handled by storage nodes outgoing_headers = self._backend_requests( req, len(nodes), container_partition, container_nodes, - delete_at_container, delete_at_part, delete_at_nodes) + delete_at_container, delete_at_part, delete_at_nodes, + container_path=container_path) # send object to storage nodes resp = self._store_object( @@ -823,8 +841,8 @@ next_part_power = getattr(obj_ring, 'next_part_power', None) if next_part_power: req.headers['X-Backend-Next-Part-Power'] = next_part_power - container_partition = container_info['partition'] - container_nodes = container_info['nodes'] + container_partition, container_nodes, container_path = \ + self._get_update_target(req, container_info) req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] if 'swift.authorize' in req.environ: @@ -851,7 +869,8 @@ node_count += local_handoffs headers = self._backend_requests( - req, node_count, container_partition, container_nodes) + req, node_count, container_partition, container_nodes, + container_path=container_path) return self._delete_object(req, obj_ring, partition, headers) @@ -2263,7 +2282,7 @@ for client_start, client_end in req.range.ranges: # TODO: coalesce ranges that overlap segments. For # example, "bytes=0-10,20-30,40-50" with a 64 KiB - # segment size will result in a a Range header in the + # segment size will result in a Range header in the # object request of "bytes=0-65535,0-65535,0-65535", # which is wasteful. We should be smarter and only # request that first segment once. diff -Nru swift-2.17.0/swift/proxy/server.py swift-2.18.0/swift/proxy/server.py --- swift-2.17.0/swift/proxy/server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/swift/proxy/server.py 2018-05-30 10:17:02.000000000 +0000 @@ -276,7 +276,10 @@ # # ** Because it affects the client as well, currently, we use the # client chunk size as the govenor and not the object chunk size. - socket._fileobject.default_bufsize = self.client_chunk_size + if sys.version_info < (3,): + socket._fileobject.default_bufsize = self.client_chunk_size + # TODO: find a way to enable similar functionality in py3 + self.expose_info = config_true_value( conf.get('expose_info', 'yes')) self.disallowed_sections = list_from_csv( diff -Nru swift-2.17.0/swift.egg-info/entry_points.txt swift-2.18.0/swift.egg-info/entry_points.txt --- swift-2.17.0/swift.egg-info/entry_points.txt 2018-02-05 14:02:15.000000000 +0000 +++ swift-2.18.0/swift.egg-info/entry_points.txt 2018-05-30 10:18:47.000000000 +0000 @@ -1,3 +1,6 @@ +[console_scripts] +swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main + [paste.app_factory] account = swift.account.server:app_factory container = swift.container.server:app_factory @@ -30,6 +33,8 @@ proxy_logging = swift.common.middleware.proxy_logging:filter_factory ratelimit = swift.common.middleware.ratelimit:filter_factory recon = swift.common.middleware.recon:filter_factory +s3api = swift.common.middleware.s3api.s3api:filter_factory +s3token = swift.common.middleware.s3api.s3token:filter_factory slo = swift.common.middleware.slo:filter_factory staticweb = swift.common.middleware.staticweb:filter_factory symlink = swift.common.middleware.symlink:filter_factory diff -Nru swift-2.17.0/swift.egg-info/pbr.json swift-2.18.0/swift.egg-info/pbr.json --- swift-2.17.0/swift.egg-info/pbr.json 2018-02-05 14:02:15.000000000 +0000 +++ swift-2.18.0/swift.egg-info/pbr.json 2018-05-30 10:18:47.000000000 +0000 @@ -1 +1 @@ -{"git_version": "32d1b32", "is_release": true} \ No newline at end of file +{"git_version": "f270466", "is_release": true} \ No newline at end of file diff -Nru swift-2.17.0/swift.egg-info/PKG-INFO swift-2.18.0/swift.egg-info/PKG-INFO --- swift-2.17.0/swift.egg-info/PKG-INFO 2018-02-05 14:02:15.000000000 +0000 +++ swift-2.18.0/swift.egg-info/PKG-INFO 2018-05-30 10:18:47.000000000 +0000 @@ -1,18 +1,17 @@ -Metadata-Version: 1.1 +Metadata-Version: 2.1 Name: swift -Version: 2.17.0 +Version: 2.18.0 Summary: OpenStack Object Storage Home-page: https://docs.openstack.org/swift/latest/ Author: OpenStack Author-email: openstack-dev@lists.openstack.org License: UNKNOWN -Description-Content-Type: UNKNOWN Description: ======================== Team and repository tags ======================== - .. image:: https://governance.openstack.org/badges/swift.svg - :target: https://governance.openstack.org/reference/tags/index.html + .. image:: https://governance.openstack.org/tc/badges/swift.svg + :target: https://governance.openstack.org/tc/reference/tags/index.html .. Change things from this point on @@ -179,3 +178,6 @@ Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 +Provides-Extra: kms_keymaster +Provides-Extra: keystone +Provides-Extra: test diff -Nru swift-2.17.0/swift.egg-info/requires.txt swift-2.18.0/swift.egg-info/requires.txt --- swift-2.17.0/swift.egg-info/requires.txt 2018-02-05 14:02:15.000000000 +0000 +++ swift-2.18.0/swift.egg-info/requires.txt 2018-05-30 10:18:47.000000000 +0000 @@ -2,12 +2,35 @@ eventlet>=0.17.4 greenlet>=0.3.1 netifaces!=0.10.0,!=0.10.1,>=0.5 -pastedeploy>=1.3.3 +PasteDeploy>=1.3.3 +lxml +requests>=2.14.2 six>=1.9.0 xattr>=0.4 PyECLib>=1.3.1 cryptography!=2.0,>=1.6 +[keystone] +keystonemiddleware>=4.17.0 + [kms_keymaster] oslo.config!=4.3.0,!=4.4.0,>=4.0.0 castellan>=0.13.0 + +[test] +hacking<0.12,>=0.11.0 +coverage>=3.6 +nose +nosexcover +nosehtmloutput>=0.0.3 +os-testr>=0.8.0 +mock>=2.0 +python-swiftclient +python-keystoneclient!=2.1.0,>=2.0.0 +reno>=1.8.0 +python-openstackclient +boto +requests-mock>=1.2.0 +fixtures>=3.0.0 +keystonemiddleware>=4.17.0 +bandit>=1.1.0 diff -Nru swift-2.17.0/swift.egg-info/SOURCES.txt swift-2.18.0/swift.egg-info/SOURCES.txt --- swift-2.17.0/swift.egg-info/SOURCES.txt 2018-02-05 14:02:17.000000000 +0000 +++ swift-2.18.0/swift.egg-info/SOURCES.txt 2018-05-30 10:18:48.000000000 +0000 @@ -61,6 +61,7 @@ bin/swift-container-reconciler bin/swift-container-replicator bin/swift-container-server +bin/swift-container-sharder bin/swift-container-sync bin/swift-container-updater bin/swift-dispersion-populate @@ -85,6 +86,7 @@ bin/swift-reconciler-enqueue bin/swift-ring-builder bin/swift-ring-builder-analyzer +doc/requirements.txt doc/manpages/account-server.conf.5 doc/manpages/container-reconciler.conf.5 doc/manpages/container-server.conf.5 @@ -117,6 +119,7 @@ doc/manpages/swift-object-expirer.1 doc/manpages/swift-object-info.1 doc/manpages/swift-object-reconstructor.1 +doc/manpages/swift-object-relinker.1 doc/manpages/swift-object-replicator.1 doc/manpages/swift-object-server.1 doc/manpages/swift-object-updater.1 @@ -129,6 +132,28 @@ doc/manpages/swift-ring-builder-analyzer.1 doc/manpages/swift-ring-builder.1 doc/manpages/swift.conf.5 +doc/s3api/conf/ceph-known-failures-keystone.yaml +doc/s3api/conf/ceph-known-failures-tempauth.yaml +doc/s3api/rnc/access_control_policy.rnc +doc/s3api/rnc/bucket_logging_status.rnc +doc/s3api/rnc/common.rnc +doc/s3api/rnc/complete_multipart_upload.rnc +doc/s3api/rnc/complete_multipart_upload_result.rnc +doc/s3api/rnc/copy_object_result.rnc +doc/s3api/rnc/copy_part_result.rnc +doc/s3api/rnc/create_bucket_configuration.rnc +doc/s3api/rnc/delete.rnc +doc/s3api/rnc/delete_result.rnc +doc/s3api/rnc/error.rnc +doc/s3api/rnc/initiate_multipart_upload_result.rnc +doc/s3api/rnc/lifecycle_configuration.rnc +doc/s3api/rnc/list_all_my_buckets_result.rnc +doc/s3api/rnc/list_bucket_result.rnc +doc/s3api/rnc/list_multipart_uploads_result.rnc +doc/s3api/rnc/list_parts_result.rnc +doc/s3api/rnc/list_versions_result.rnc +doc/s3api/rnc/location_constraint.rnc +doc/s3api/rnc/versioning_configuration.rnc doc/saio/rsyncd.conf doc/saio/bin/remakerings doc/saio/bin/resetswift @@ -137,6 +162,7 @@ doc/saio/rsyslog.d/10-swift.conf doc/saio/swift/container-reconciler.conf doc/saio/swift/container-sync-realms.conf +doc/saio/swift/internal-client.conf doc/saio/swift/object-expirer.conf doc/saio/swift/proxy-server.conf doc/saio/swift/swift.conf @@ -179,6 +205,7 @@ doc/source/overview_architecture.rst doc/source/overview_auth.rst doc/source/overview_backing_store.rst +doc/source/overview_container_sharding.rst doc/source/overview_container_sync.rst doc/source/overview_encryption.rst doc/source/overview_erasure_code.rst @@ -236,6 +263,17 @@ doc/source/api/use_content-encoding_metadata.rst doc/source/api/use_the_content-disposition_metadata.rst doc/source/images/ec_overview.png +doc/source/images/sharded_GET.svg +doc/source/images/sharding_GET.svg +doc/source/images/sharding_cleave1_load.svg +doc/source/images/sharding_cleave2_load.svg +doc/source/images/sharding_cleave_basic.svg +doc/source/images/sharding_db_states.svg +doc/source/images/sharding_scan_basic.svg +doc/source/images/sharding_scan_load.svg +doc/source/images/sharding_sharded_load.svg +doc/source/images/sharding_unsharded.svg +doc/source/images/sharding_unsharded_load.svg doc/source/install/controller-common_prerequisites.txt doc/source/install/controller-include.txt doc/source/install/controller-install-debian.rst @@ -290,6 +328,12 @@ examples/wsgi/container-server.wsgi.template examples/wsgi/object-server.wsgi.template examples/wsgi/proxy-server.wsgi.template +playbooks/probetests/post.yaml +playbooks/probetests/run.yaml +playbooks/saio_single_node_setup/install_dependencies.yaml +playbooks/saio_single_node_setup/make_rings.yaml +playbooks/saio_single_node_setup/setup_saio.yaml +playbooks/saio_single_node_setup/templates/rc.local.j2 releasenotes/notes/2_10_0_release-666a76f4975657a5.yaml releasenotes/notes/2_11_0_release-ac1d256e455d347e.yaml releasenotes/notes/2_12_0_release-06af226abc7b91ef.yaml @@ -299,14 +343,17 @@ releasenotes/notes/2_15_1_release-be25e67bfc5e886a.yaml releasenotes/notes/2_16_0_release-d48cb9b2629df8ab.yaml releasenotes/notes/2_17_0_release-bd35f18c41c5ef18.yaml +releasenotes/notes/2_18_0_release-3acf63cfe2475c65.yaml releasenotes/source/conf.py releasenotes/source/current.rst releasenotes/source/index.rst releasenotes/source/newton.rst releasenotes/source/ocata.rst releasenotes/source/pike.rst +releasenotes/source/queens.rst releasenotes/source/locale/en_GB/LC_MESSAGES/releasenotes.po releasenotes/source/locale/ja/LC_MESSAGES/releasenotes.po +releasenotes/source/locale/ko_KR/LC_MESSAGES/releasenotes.po swift/__init__.py swift.egg-info/PKG-INFO swift.egg-info/SOURCES.txt @@ -327,10 +374,12 @@ swift/cli/dispersion_report.py swift/cli/form_signature.py swift/cli/info.py +swift/cli/manage_shard_ranges.py swift/cli/recon.py swift/cli/relinker.py swift/cli/ring_builder_analyzer.py swift/cli/ringbuilder.py +swift/cli/shard-info.py swift/common/__init__.py swift/common/base_storage_server.py swift/common/bufferedhttp.py @@ -389,6 +438,49 @@ swift/common/middleware/crypto/encrypter.py swift/common/middleware/crypto/keymaster.py swift/common/middleware/crypto/kms_keymaster.py +swift/common/middleware/s3api/__init__.py +swift/common/middleware/s3api/acl_handlers.py +swift/common/middleware/s3api/acl_utils.py +swift/common/middleware/s3api/etree.py +swift/common/middleware/s3api/exception.py +swift/common/middleware/s3api/s3api.py +swift/common/middleware/s3api/s3request.py +swift/common/middleware/s3api/s3response.py +swift/common/middleware/s3api/s3token.py +swift/common/middleware/s3api/subresource.py +swift/common/middleware/s3api/utils.py +swift/common/middleware/s3api/controllers/__init__.py +swift/common/middleware/s3api/controllers/acl.py +swift/common/middleware/s3api/controllers/base.py +swift/common/middleware/s3api/controllers/bucket.py +swift/common/middleware/s3api/controllers/location.py +swift/common/middleware/s3api/controllers/logging.py +swift/common/middleware/s3api/controllers/multi_delete.py +swift/common/middleware/s3api/controllers/multi_upload.py +swift/common/middleware/s3api/controllers/obj.py +swift/common/middleware/s3api/controllers/s3_acl.py +swift/common/middleware/s3api/controllers/service.py +swift/common/middleware/s3api/controllers/versioning.py +swift/common/middleware/s3api/schema/access_control_policy.rng +swift/common/middleware/s3api/schema/bucket_logging_status.rng +swift/common/middleware/s3api/schema/common.rng +swift/common/middleware/s3api/schema/complete_multipart_upload.rng +swift/common/middleware/s3api/schema/complete_multipart_upload_result.rng +swift/common/middleware/s3api/schema/copy_object_result.rng +swift/common/middleware/s3api/schema/copy_part_result.rng +swift/common/middleware/s3api/schema/create_bucket_configuration.rng +swift/common/middleware/s3api/schema/delete.rng +swift/common/middleware/s3api/schema/delete_result.rng +swift/common/middleware/s3api/schema/error.rng +swift/common/middleware/s3api/schema/initiate_multipart_upload_result.rng +swift/common/middleware/s3api/schema/lifecycle_configuration.rng +swift/common/middleware/s3api/schema/list_all_my_buckets_result.rng +swift/common/middleware/s3api/schema/list_bucket_result.rng +swift/common/middleware/s3api/schema/list_multipart_uploads_result.rng +swift/common/middleware/s3api/schema/list_parts_result.rng +swift/common/middleware/s3api/schema/list_versions_result.rng +swift/common/middleware/s3api/schema/location_constraint.rng +swift/common/middleware/s3api/schema/versioning_configuration.rng swift/common/middleware/x_profile/__init__.py swift/common/middleware/x_profile/exceptions.py swift/common/middleware/x_profile/html_viewer.py @@ -404,6 +496,7 @@ swift/container/reconciler.py swift/container/replicator.py swift/container/server.py +swift/container/sharder.py swift/container/sync.py swift/container/sync_store.py swift/container/updater.py @@ -450,10 +543,21 @@ test/functional/test_dlo.py test/functional/test_object.py test/functional/test_slo.py +test/functional/test_staticweb.py test/functional/test_symlink.py test/functional/test_tempurl.py test/functional/test_versioned_writes.py test/functional/tests.py +test/functional/s3api/__init__.py +test/functional/s3api/s3_test_client.py +test/functional/s3api/test_acl.py +test/functional/s3api/test_bucket.py +test/functional/s3api/test_multi_delete.py +test/functional/s3api/test_multi_upload.py +test/functional/s3api/test_object.py +test/functional/s3api/test_presigned.py +test/functional/s3api/test_service.py +test/functional/s3api/utils.py test/probe/__init__.py test/probe/brain.py test/probe/common.py @@ -474,6 +578,7 @@ test/probe/test_reconstructor_rebuild.py test/probe/test_reconstructor_revert.py test/probe/test_replication_servers_working.py +test/probe/test_sharder.py test/probe/test_signals.py test/unit/__init__.py test/unit/helpers.py @@ -492,6 +597,7 @@ test/unit/cli/test_form_signature.py test/unit/cli/test_info.py test/unit/cli/test_ipv6_output.stub +test/unit/cli/test_manage_shard_ranges.py test/unit/cli/test_recon.py test/unit/cli/test_relinker.py test/unit/cli/test_ring_builder_analyzer.py @@ -500,6 +606,7 @@ test/unit/common/corrupted_example.db test/unit/common/malformed_example.db test/unit/common/malformed_schema_example.db +test/unit/common/missing_container_info.db test/unit/common/test_base_storage_server.py test/unit/common/test_bufferedhttp.py test/unit/common/test_constraints.py @@ -561,6 +668,30 @@ test/unit/common/middleware/crypto/test_encryption.py test/unit/common/middleware/crypto/test_keymaster.py test/unit/common/middleware/crypto/test_kms_keymaster.py +test/unit/common/middleware/s3api/__init__.py +test/unit/common/middleware/s3api/exceptions.py +test/unit/common/middleware/s3api/helpers.py +test/unit/common/middleware/s3api/test_acl.py +test/unit/common/middleware/s3api/test_acl_handlers.py +test/unit/common/middleware/s3api/test_acl_utils.py +test/unit/common/middleware/s3api/test_bucket.py +test/unit/common/middleware/s3api/test_cfg.py +test/unit/common/middleware/s3api/test_etree.py +test/unit/common/middleware/s3api/test_helpers.py +test/unit/common/middleware/s3api/test_location.py +test/unit/common/middleware/s3api/test_logging.py +test/unit/common/middleware/s3api/test_multi_delete.py +test/unit/common/middleware/s3api/test_multi_upload.py +test/unit/common/middleware/s3api/test_obj.py +test/unit/common/middleware/s3api/test_s3_acl.py +test/unit/common/middleware/s3api/test_s3api.py +test/unit/common/middleware/s3api/test_s3request.py +test/unit/common/middleware/s3api/test_s3response.py +test/unit/common/middleware/s3api/test_s3token.py +test/unit/common/middleware/s3api/test_service.py +test/unit/common/middleware/s3api/test_subresource.py +test/unit/common/middleware/s3api/test_utils.py +test/unit/common/middleware/s3api/test_versioning.py test/unit/common/ring/__init__.py test/unit/common/ring/test_builder.py test/unit/common/ring/test_composite_builder.py @@ -572,6 +703,7 @@ test/unit/container/test_reconciler.py test/unit/container/test_replicator.py test/unit/container/test_server.py +test/unit/container/test_sharder.py test/unit/container/test_sync.py test/unit/container/test_sync_store.py test/unit/container/test_updater.py diff -Nru swift-2.17.0/test/functional/__init__.py swift-2.18.0/test/functional/__init__.py --- swift-2.17.0/test/functional/__init__.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/functional/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -53,7 +53,8 @@ from swift.common import constraints, utils, ring, storage_policy from swift.common.ring import Ring -from swift.common.wsgi import monkey_patch_mimetools, loadapp +from swift.common.wsgi import ( + monkey_patch_mimetools, loadapp, SwiftHttpProtocol) from swift.common.utils import config_true_value, split_path from swift.account import server as account_server from swift.container import server as container_server @@ -369,6 +370,89 @@ return proxy_conf_file, swift_conf_file +def _load_domain_remap_staticweb(proxy_conf_file, swift_conf_file, **kwargs): + """ + Load domain_remap and staticweb into proxy server pipeline. + + :param proxy_conf_file: Source proxy conf filename + :param swift_conf_file: Source swift conf filename + :returns: Tuple of paths to the proxy conf file and swift conf file to use + :raises InProcessException: raised if proxy conf contents are invalid + """ + _debug('Setting configuration for domain_remap') + + # The global conf dict cannot be used to modify the pipeline. + # The pipeline loader requires the pipeline to be set in the local_conf. + # If pipeline is set in the global conf dict (which in turn populates the + # DEFAULTS options) then it prevents pipeline being loaded into the local + # conf during wsgi load_app. + # Therefore we must modify the [pipeline:main] section. + + conf = ConfigParser() + conf.read(proxy_conf_file) + try: + section = 'pipeline:main' + old_pipeline = conf.get(section, 'pipeline') + pipeline = old_pipeline.replace( + "tempauth", + "domain_remap tempauth staticweb") + if pipeline == old_pipeline: + raise InProcessException( + "Failed to insert domain_remap and staticweb into pipeline: %s" + % old_pipeline) + conf.set(section, 'pipeline', pipeline) + except NoSectionError as err: + msg = 'Error problem with proxy conf file %s: %s' % \ + (proxy_conf_file, err) + raise InProcessException(msg) + + test_conf_file = os.path.join(_testdir, 'proxy-server.conf') + with open(test_conf_file, 'w') as fp: + conf.write(fp) + + return test_conf_file, swift_conf_file + + +def _load_s3api(proxy_conf_file, swift_conf_file, **kwargs): + """ + Load s3api configuration and override proxy-server.conf contents. + + :param proxy_conf_file: Source proxy conf filename + :param swift_conf_file: Source swift conf filename + :returns: Tuple of paths to the proxy conf file and swift conf file to use + :raises InProcessException: raised if proxy conf contents are invalid + """ + _debug('Setting configuration for s3api') + + # The global conf dict cannot be used to modify the pipeline. + # The pipeline loader requires the pipeline to be set in the local_conf. + # If pipeline is set in the global conf dict (which in turn populates the + # DEFAULTS options) then it prevents pipeline being loaded into the local + # conf during wsgi load_app. + # Therefore we must modify the [pipeline:main] section. + + conf = ConfigParser() + conf.read(proxy_conf_file) + try: + section = 'pipeline:main' + pipeline = conf.get(section, 'pipeline') + pipeline = pipeline.replace( + "tempauth", + "s3api tempauth") + conf.set(section, 'pipeline', pipeline) + conf.set('filter:s3api', 's3_acl', 'true') + except NoSectionError as err: + msg = 'Error problem with proxy conf file %s: %s' % \ + (proxy_conf_file, err) + raise InProcessException(msg) + + test_conf_file = os.path.join(_testdir, 'proxy-server.conf') + with open(test_conf_file, 'w') as fp: + conf.write(fp) + + return test_conf_file, swift_conf_file + + # Mapping from possible values of the variable # SWIFT_TEST_IN_PROCESS_CONF_LOADER # to the method to call for loading the associated configuration @@ -376,7 +460,9 @@ # conf_filename_to_use loader(input_conf_filename, **kwargs) conf_loaders = { 'encryption': _load_encryption, - 'ec': _load_ec_as_default_policy + 'ec': _load_ec_as_default_policy, + 'domain_remap_staticweb': _load_domain_remap_staticweb, + 's3api': _load_s3api, } @@ -476,6 +562,12 @@ 'account_autocreate': 'true', 'allow_versions': 'True', 'allow_versioned_writes': 'True', + # TODO: move this into s3api config loader because they are + # required by only s3api + 'allowed_headers': + "Content-Disposition, Content-Encoding, X-Delete-At, " + "X-Object-Manifest, X-Static-Large-Object, Cache-Control, " + "Content-Language, Expires, X-Robots-Tag", # Below are values used by the functional test framework, as well as # by the various in-process swift servers 'auth_host': '127.0.0.1', @@ -487,6 +579,8 @@ 'account': 'test', 'username': 'tester', 'password': 'testing', + 's3_access_key': 'test:tester', + 's3_secret_key': 'testing', # User on a second account (needs admin access to the account) 'account2': 'test2', 'username2': 'tester2', @@ -494,6 +588,8 @@ # User on same account as first, but without admin access 'username3': 'tester3', 'password3': 'testing3', + 's3_access_key2': 'test:tester3', + 's3_secret_key2': 'testing3', # Service user and prefix (emulates glance, cinder, etc. user) 'account5': 'test5', 'username5': 'tester5', @@ -531,13 +627,6 @@ 'port': con2lis.getsockname()[1]}], 30), f) - eventlet.wsgi.HttpProtocol.default_request_version = "HTTP/1.0" - # Turn off logging requests by the underlying WSGI software. - eventlet.wsgi.HttpProtocol.log_request = lambda *a: None - logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi') - # Redirect logging other messages by the underlying WSGI software. - eventlet.wsgi.HttpProtocol.log_message = \ - lambda s, f, *a: logger.error('ERROR WSGI: ' + f % a) # Default to only 4 seconds for in-process functional test runs eventlet.wsgi.WRITE_TIMEOUT = 4 @@ -564,7 +653,9 @@ ] if show_debug_logs: - logger = debug_logger('proxy') + logger = get_logger_name('proxy') + else: + logger = utils.get_logger(config, 'wsgi-server', log_route='wsgi') def get_logger(name, *args, **kwargs): return logger @@ -580,13 +671,19 @@ nl = utils.NullLogger() global proxy_srv proxy_srv = prolis - prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl) - acc1spa = eventlet.spawn(eventlet.wsgi.server, acc1lis, acc1srv, nl) - acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl) - con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl) - con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl) + prospa = eventlet.spawn(eventlet.wsgi.server, prolis, app, nl, + protocol=SwiftHttpProtocol) + acc1spa = eventlet.spawn(eventlet.wsgi.server, acc1lis, acc1srv, nl, + protocol=SwiftHttpProtocol) + acc2spa = eventlet.spawn(eventlet.wsgi.server, acc2lis, acc2srv, nl, + protocol=SwiftHttpProtocol) + con1spa = eventlet.spawn(eventlet.wsgi.server, con1lis, con1srv, nl, + protocol=SwiftHttpProtocol) + con2spa = eventlet.spawn(eventlet.wsgi.server, con2lis, con2srv, nl, + protocol=SwiftHttpProtocol) - objspa = [eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl) + objspa = [eventlet.spawn(eventlet.wsgi.server, objsrv[0], objsrv[1], nl, + protocol=SwiftHttpProtocol) for objsrv in objsrvs] global _test_coros diff -Nru swift-2.17.0/test/functional/s3api/__init__.py swift-2.18.0/test/functional/s3api/__init__.py --- swift-2.17.0/test/functional/s3api/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,61 @@ +# Copyright (c) 2011-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import traceback +import test.functional as tf +from test.functional.s3api.s3_test_client import Connection + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class S3ApiBase(unittest2.TestCase): + def __init__(self, method_name): + super(S3ApiBase, self).__init__(method_name) + self.method_name = method_name + + def setUp(self): + if 's3api' not in tf.cluster_info: + raise tf.SkipTest('s3api middleware is not enabled') + try: + self.conn = Connection() + self.conn.reset() + except Exception: + message = '%s got an error during initialize process.\n\n%s' % \ + (self.method_name, traceback.format_exc()) + # TODO: Find a way to make this go to FAIL instead of Error + self.fail(message) + + def assertCommonResponseHeaders(self, headers, etag=None): + """ + asserting common response headers with args + :param headers: a dict of response headers + :param etag: a string of md5(content).hexdigest() if not given, + this won't assert anything about etag. (e.g. DELETE obj) + """ + self.assertTrue(headers['x-amz-id-2'] is not None) + self.assertTrue(headers['x-amz-request-id'] is not None) + self.assertTrue(headers['date'] is not None) + # TODO; requires consideration + # self.assertTrue(headers['server'] is not None) + if etag is not None: + self.assertTrue('etag' in headers) # sanity + self.assertEqual(etag, headers['etag'].strip('"')) diff -Nru swift-2.17.0/test/functional/s3api/s3_test_client.py swift-2.18.0/test/functional/s3api/s3_test_client.py --- swift-2.17.0/test/functional/s3api/s3_test_client.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/s3_test_client.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,139 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import test.functional as tf +from boto.s3.connection import S3Connection, OrdinaryCallingFormat, \ + BotoClientError, S3ResponseError + +RETRY_COUNT = 3 + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class Connection(object): + """ + Connection class used for S3 functional testing. + """ + def __init__(self, aws_access_key='test:tester', + aws_secret_key='testing', + user_id='test:tester'): + """ + Initialize method. + + :param aws_access_key: a string of aws access key + :param aws_secret_key: a string of aws secret key + :param user_id: a string consists of TENANT and USER name used for + asserting Owner ID (not required S3Connection) + + In default, Connection class will be initialized as tester user + behaves as: + user_test_tester = testing .admin + + """ + self.aws_access_key = aws_access_key + self.aws_secret_key = aws_secret_key + self.user_id = user_id + # NOTE: auth_host and auth_port can be different from storage location + self.host = tf.config['auth_host'] + self.port = int(tf.config['auth_port']) + self.conn = \ + S3Connection(aws_access_key, aws_secret_key, is_secure=False, + host=self.host, port=self.port, + calling_format=OrdinaryCallingFormat()) + self.conn.auth_region_name = 'US' + + def reset(self): + """ + Reset all swift environment to keep clean. As a result by calling this + method, we can assume the backend swift keeps no containers and no + objects on this connection's account. + """ + exceptions = [] + for i in range(RETRY_COUNT): + try: + buckets = self.conn.get_all_buckets() + if not buckets: + break + + for bucket in buckets: + try: + for upload in bucket.list_multipart_uploads(): + upload.cancel_upload() + + for obj in bucket.list(): + bucket.delete_key(obj.name) + + self.conn.delete_bucket(bucket.name) + except S3ResponseError as e: + # 404 means NoSuchBucket, NoSuchKey, or NoSuchUpload + if e.status != 404: + raise + except (BotoClientError, S3ResponseError) as e: + exceptions.append(e) + if exceptions: + # raise the first exception + raise exceptions.pop(0) + + def make_request(self, method, bucket='', obj='', headers=None, body='', + query=None): + """ + Wrapper method of S3Connection.make_request. + + :param method: a string of HTTP request method + :param bucket: a string of bucket name + :param obj: a string of object name + :param headers: a dictionary of headers + :param body: a string of data binary sent to S3 as a request body + :param query: a string of HTTP query argument + + :returns: a tuple of (int(status_code), headers dict, response body) + """ + response = \ + self.conn.make_request(method, bucket=bucket, key=obj, + headers=headers, data=body, + query_args=query, sender=None, + override_num_retries=RETRY_COUNT, + retry_handler=None) + return response.status, dict(response.getheaders()), response.read() + + def generate_url_and_headers(self, method, bucket='', obj='', + expires_in=3600): + url = self.conn.generate_url(expires_in, method, bucket, obj) + if os.environ.get('S3_USE_SIGV4') == "True": + # V4 signatures are known-broken in boto, but we can work around it + if url.startswith('https://'): + url = 'http://' + url[8:] + return url, {'Host': '%(host)s:%(port)d:%(port)d' % { + 'host': self.host, 'port': self.port}} + return url, {} + + +# TODO: make sure where this function is used +def get_admin_connection(): + """ + Return tester connection behaves as: + user_test_admin = admin .admin + """ + aws_access_key = tf.config['s3_access_key'] + aws_secret_key = tf.config['s3_secret_key'] + user_id = tf.config['s3_access_key'] + return Connection(aws_access_key, aws_secret_key, user_id) diff -Nru swift-2.17.0/test/functional/s3api/test_acl.py swift-2.18.0/test/functional/s3api/test_acl.py --- swift-2.17.0/test/functional/s3api/test_acl.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_acl.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,156 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import os +import test.functional as tf +from swift.common.middleware.s3api.etree import fromstring +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3Acl(S3ApiBase): + def setUp(self): + super(TestS3Acl, self).setUp() + self.bucket = 'bucket' + self.obj = 'object' + if 's3_access_key2' not in tf.config or \ + 's3_secret_key2' not in tf.config: + raise tf.SkipTest( + 'TestS3Acl requires s3_access_key2 and s3_secret_key2 setting') + self.conn.make_request('PUT', self.bucket) + access_key2 = tf.config['s3_access_key2'] + secret_key2 = tf.config['s3_secret_key2'] + self.conn2 = Connection(access_key2, secret_key2, access_key2) + + def test_acl(self): + self.conn.make_request('PUT', self.bucket, self.obj) + query = 'acl' + + # PUT Bucket ACL + headers = {'x-amz-acl': 'public-read'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, headers=headers, + query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-length'], '0') + + # GET Bucket ACL + status, headers, body = \ + self.conn.make_request('GET', self.bucket, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + # TODO: Fix the response that last-modified must be in the response. + # self.assertTrue(headers['last-modified'] is not None) + self.assertEqual(headers['content-length'], str(len(body))) + self.assertTrue(headers['content-type'] is not None) + elem = fromstring(body, 'AccessControlPolicy') + owner = elem.find('Owner') + self.assertEqual(owner.find('ID').text, self.conn.user_id) + self.assertEqual(owner.find('DisplayName').text, self.conn.user_id) + acl = elem.find('AccessControlList') + self.assertTrue(acl.find('Grant') is not None) + + # GET Object ACL + status, headers, body = \ + self.conn.make_request('GET', self.bucket, self.obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + # TODO: Fix the response that last-modified must be in the response. + # self.assertTrue(headers['last-modified'] is not None) + self.assertEqual(headers['content-length'], str(len(body))) + self.assertTrue(headers['content-type'] is not None) + elem = fromstring(body, 'AccessControlPolicy') + owner = elem.find('Owner') + self.assertEqual(owner.find('ID').text, self.conn.user_id) + self.assertEqual(owner.find('DisplayName').text, self.conn.user_id) + acl = elem.find('AccessControlList') + self.assertTrue(acl.find('Grant') is not None) + + def test_put_bucket_acl_error(self): + req_headers = {'x-amz-acl': 'public-read'} + aws_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + aws_error_conn.make_request('PUT', self.bucket, + headers=req_headers, query='acl') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('PUT', 'nothing', + headers=req_headers, query='acl') + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + status, headers, body = \ + self.conn2.make_request('PUT', self.bucket, + headers=req_headers, query='acl') + self.assertEqual(get_error_code(body), 'AccessDenied') + + def test_get_bucket_acl_error(self): + aws_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + aws_error_conn.make_request('GET', self.bucket, query='acl') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('GET', 'nothing', query='acl') + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + status, headers, body = \ + self.conn2.make_request('GET', self.bucket, query='acl') + self.assertEqual(get_error_code(body), 'AccessDenied') + + def test_get_object_acl_error(self): + self.conn.make_request('PUT', self.bucket, self.obj) + + aws_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + aws_error_conn.make_request('GET', self.bucket, self.obj, + query='acl') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('GET', self.bucket, 'nothing', query='acl') + self.assertEqual(get_error_code(body), 'NoSuchKey') + + status, headers, body = \ + self.conn2.make_request('GET', self.bucket, self.obj, query='acl') + self.assertEqual(get_error_code(body), 'AccessDenied') + + +class TestS3AclSigV4(TestS3Acl): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3AclSigV4, self).setUp() + + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/test_bucket.py swift-2.18.0/test/functional/s3api/test_bucket.py --- swift-2.17.0/test/functional/s3api/test_bucket.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_bucket.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,487 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import os + +import test.functional as tf +from swift.common.middleware.s3api.etree import fromstring, tostring, Element, \ + SubElement +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiBucket(S3ApiBase): + def setUp(self): + super(TestS3ApiBucket, self).setUp() + + def _gen_location_xml(self, location): + elem = Element('CreateBucketConfiguration') + SubElement(elem, 'LocationConstraint').text = location + return tostring(elem) + + def test_bucket(self): + bucket = 'bucket' + max_bucket_listing = tf.cluster_info['s3api'].get( + 'max_bucket_listing', 1000) + + # PUT Bucket + status, headers, body = self.conn.make_request('PUT', bucket) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertIn(headers['location'], ( + '/' + bucket, # swob won't touch it... + # but webob (which we get because of auth_token) *does* + 'http://%s%s/%s' % ( + self.conn.host, + '' if self.conn.port == 80 else ':%d' % self.conn.port, + bucket), + # This is all based on the Host header the client provided, + # and boto will double-up ports for sig v4. See + # - https://github.com/boto/boto/issues/2623 + # - https://github.com/boto/boto/issues/3716 + # with proposed fixes at + # - https://github.com/boto/boto/pull/3513 + # - https://github.com/boto/boto/pull/3676 + 'http://%s%s:%d/%s' % ( + self.conn.host, + '' if self.conn.port == 80 else ':%d' % self.conn.port, + self.conn.port, + bucket), + )) + self.assertEqual(headers['content-length'], '0') + + # GET Bucket(Without Object) + status, headers, body = self.conn.make_request('GET', bucket) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertTrue(headers['content-type'] is not None) + self.assertEqual(headers['content-length'], str(len(body))) + # TODO; requires consideration + # self.assertEqual(headers['transfer-encoding'], 'chunked') + + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('Name').text, bucket) + self.assertIsNone(elem.find('Prefix').text) + self.assertIsNone(elem.find('Marker').text) + self.assertEqual( + elem.find('MaxKeys').text, str(max_bucket_listing)) + self.assertEqual(elem.find('IsTruncated').text, 'false') + objects = elem.findall('./Contents') + self.assertEqual(list(objects), []) + + # GET Bucket(With Object) + req_objects = ('object', 'object2') + for obj in req_objects: + self.conn.make_request('PUT', bucket, obj) + status, headers, body = self.conn.make_request('GET', bucket) + self.assertEqual(status, 200) + + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('Name').text, bucket) + self.assertIsNone(elem.find('Prefix').text) + self.assertIsNone(elem.find('Marker').text) + self.assertEqual(elem.find('MaxKeys').text, + str(max_bucket_listing)) + self.assertEqual(elem.find('IsTruncated').text, 'false') + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), 2) + for o in resp_objects: + self.assertTrue(o.find('Key').text in req_objects) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertTrue(o.find('StorageClass').text is not None) + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + # HEAD Bucket + status, headers, body = self.conn.make_request('HEAD', bucket) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertTrue(headers['content-type'] is not None) + self.assertEqual(headers['content-length'], str(len(body))) + # TODO; requires consideration + # self.assertEqual(headers['transfer-encoding'], 'chunked') + + # DELETE Bucket + for obj in req_objects: + self.conn.make_request('DELETE', bucket, obj) + status, headers, body = self.conn.make_request('DELETE', bucket) + self.assertEqual(status, 204) + + self.assertCommonResponseHeaders(headers) + + def test_put_bucket_error(self): + status, headers, body = \ + self.conn.make_request('PUT', 'bucket+invalid') + self.assertEqual(get_error_code(body), 'InvalidBucketName') + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = auth_error_conn.make_request('PUT', 'bucket') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + self.conn.make_request('PUT', 'bucket') + status, headers, body = self.conn.make_request('PUT', 'bucket') + self.assertEqual(get_error_code(body), 'BucketAlreadyExists') + + def test_put_bucket_with_LocationConstraint(self): + bucket = 'bucket' + xml = self._gen_location_xml('US') + status, headers, body = \ + self.conn.make_request('PUT', bucket, body=xml) + self.assertEqual(status, 200) + + def test_get_bucket_error(self): + self.conn.make_request('PUT', 'bucket') + + status, headers, body = \ + self.conn.make_request('GET', 'bucket+invalid') + self.assertEqual(get_error_code(body), 'InvalidBucketName') + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = auth_error_conn.make_request('GET', 'bucket') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = self.conn.make_request('GET', 'nothing') + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + def _prepare_test_get_bucket(self, bucket, objects): + self.conn.make_request('PUT', bucket) + for obj in objects: + self.conn.make_request('PUT', bucket, obj) + + def test_get_bucket_with_delimiter(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + delimiter = '/' + query = 'delimiter=%s' % delimiter + expect_objects = ('object', 'object2') + expect_prefixes = ('dir/', 'subdir/', 'subdir2/') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('Delimiter').text, delimiter) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + resp_prefixes = elem.findall('CommonPrefixes') + self.assertEqual(len(resp_prefixes), len(expect_prefixes)) + for i, p in enumerate(resp_prefixes): + self.assertEqual(p.find('./Prefix').text, expect_prefixes[i]) + + def test_get_bucket_with_encoding_type(self): + bucket = 'bucket' + put_objects = ('object', 'object2') + self._prepare_test_get_bucket(bucket, put_objects) + + encoding_type = 'url' + query = 'encoding-type=%s' % encoding_type + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('EncodingType').text, encoding_type) + + def test_get_bucket_with_marker(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + marker = 'object' + query = 'marker=%s' % marker + expect_objects = ('object2', 'subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('Marker').text, marker) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + def test_get_bucket_with_max_keys(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + max_keys = '2' + query = 'max-keys=%s' % max_keys + expect_objects = ('dir/subdir/object', 'object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('MaxKeys').text, max_keys) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + def test_get_bucket_with_prefix(self): + bucket = 'bucket' + req_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, req_objects) + + prefix = 'object' + query = 'prefix=%s' % prefix + expect_objects = ('object', 'object2') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('Prefix').text, prefix) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + def test_get_bucket_v2_with_start_after(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + marker = 'object' + query = 'list-type=2&start-after=%s' % marker + expect_objects = ('object2', 'subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('StartAfter').text, marker) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + + def test_get_bucket_v2_with_fetch_owner(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + query = 'list-type=2&fetch-owner=true' + expect_objects = ('dir/subdir/object', 'object', 'object2', + 'subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('KeyCount').text, '5') + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertTrue(o.find('Owner/ID').text, self.conn.user_id) + self.assertTrue(o.find('Owner/DisplayName').text, + self.conn.user_id) + + def test_get_bucket_v2_with_continuation_token(self): + bucket = 'bucket' + put_objects = ('object', 'object2', 'subdir/object', 'subdir2/object', + 'dir/subdir/object') + self._prepare_test_get_bucket(bucket, put_objects) + + query = 'list-type=2&max-keys=3' + expect_objects = ('dir/subdir/object', 'object', 'object2') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('MaxKeys').text, '3') + self.assertEqual(elem.find('KeyCount').text, '3') + self.assertEqual(elem.find('IsTruncated').text, 'true') + next_cont_token_elem = elem.find('NextContinuationToken') + self.assertIsNotNone(next_cont_token_elem) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + + query = 'list-type=2&max-keys=3&continuation-token=%s' % \ + next_cont_token_elem.text + expect_objects = ('subdir/object', 'subdir2/object') + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('MaxKeys').text, '3') + self.assertEqual(elem.find('KeyCount').text, '2') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertIsNone(elem.find('NextContinuationToken')) + cont_token_elem = elem.find('ContinuationToken') + self.assertEqual(cont_token_elem.text, next_cont_token_elem.text) + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), len(expect_objects)) + for i, o in enumerate(resp_objects): + self.assertEqual(o.find('Key').text, expect_objects[i]) + self.assertTrue(o.find('LastModified').text is not None) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertTrue(o.find('ETag').text is not None) + self.assertTrue(o.find('Size').text is not None) + self.assertEqual(o.find('StorageClass').text, 'STANDARD') + self.assertIsNone(o.find('Owner/ID')) + self.assertIsNone(o.find('Owner/DisplayName')) + + def test_head_bucket_error(self): + self.conn.make_request('PUT', 'bucket') + + status, headers, body = \ + self.conn.make_request('HEAD', 'bucket+invalid') + self.assertEqual(status, 400) + self.assertEqual(body, '') # sanity + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('HEAD', 'bucket') + self.assertEqual(status, 403) + self.assertEqual(body, '') # sanity + + status, headers, body = self.conn.make_request('HEAD', 'nothing') + self.assertEqual(status, 404) + self.assertEqual(body, '') # sanity + + def test_delete_bucket_error(self): + status, headers, body = \ + self.conn.make_request('DELETE', 'bucket+invalid') + self.assertEqual(get_error_code(body), 'InvalidBucketName') + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('DELETE', 'bucket') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = self.conn.make_request('DELETE', 'bucket') + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + def test_bucket_invalid_method_error(self): + # non existed verb in the controller + status, headers, body = \ + self.conn.make_request('GETPUT', 'bucket') + self.assertEqual(get_error_code(body), 'MethodNotAllowed') + # the method exists in the controller but deny as MethodNotAllowed + status, headers, body = \ + self.conn.make_request('_delete_segments_bucket', 'bucket') + self.assertEqual(get_error_code(body), 'MethodNotAllowed') + + +class TestS3ApiBucketSigV4(TestS3ApiBucket): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiBucket, self).setUp() + + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/test_multi_delete.py swift-2.18.0/test/functional/s3api/test_multi_delete.py --- swift-2.17.0/test/functional/s3api/test_multi_delete.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_multi_delete.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,248 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import os +import test.functional as tf +from swift.common.middleware.s3api.etree import fromstring, tostring, Element, \ + SubElement +from swift.common.middleware.s3api.controllers.multi_delete import \ + MAX_MULTI_DELETE_BODY_SIZE + +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code, calculate_md5 + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiMultiDelete(S3ApiBase): + def setUp(self): + super(TestS3ApiMultiDelete, self).setUp() + + def _prepare_test_delete_multi_objects(self, bucket, objects): + self.conn.make_request('PUT', bucket) + for obj in objects: + self.conn.make_request('PUT', bucket, obj) + + def _gen_multi_delete_xml(self, objects, quiet=None): + elem = Element('Delete') + if quiet: + SubElement(elem, 'Quiet').text = quiet + for key in objects: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + + return tostring(elem, use_s3ns=False) + + def _gen_invalid_multi_delete_xml(self, hasObjectTag=False): + elem = Element('Delete') + if hasObjectTag: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = '' + + return tostring(elem, use_s3ns=False) + + def test_delete_multi_objects(self): + bucket = 'bucket' + put_objects = ['obj%s' % var for var in xrange(4)] + self._prepare_test_delete_multi_objects(bucket, put_objects) + query = 'delete' + + # Delete an object via MultiDelete API + req_objects = ['obj0'] + xml = self._gen_multi_delete_xml(req_objects) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue(headers['content-type'] is not None) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body) + resp_objects = elem.findall('Deleted') + self.assertEqual(len(resp_objects), len(req_objects)) + for o in resp_objects: + self.assertTrue(o.find('Key').text in req_objects) + + # Delete 2 objects via MultiDelete API + req_objects = ['obj1', 'obj2'] + xml = self._gen_multi_delete_xml(req_objects) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'DeleteResult') + resp_objects = elem.findall('Deleted') + self.assertEqual(len(resp_objects), len(req_objects)) + for o in resp_objects: + self.assertTrue(o.find('Key').text in req_objects) + + # Delete 2 objects via MultiDelete API but one (obj4) doesn't exist. + req_objects = ['obj3', 'obj4'] + xml = self._gen_multi_delete_xml(req_objects) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'DeleteResult') + resp_objects = elem.findall('Deleted') + # S3 assumes a NoSuchKey object as deleted. + self.assertEqual(len(resp_objects), len(req_objects)) + for o in resp_objects: + self.assertTrue(o.find('Key').text in req_objects) + + # Delete 2 objects via MultiDelete API but no objects exist + req_objects = ['obj4', 'obj5'] + xml = self._gen_multi_delete_xml(req_objects) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'DeleteResult') + resp_objects = elem.findall('Deleted') + self.assertEqual(len(resp_objects), len(req_objects)) + for o in resp_objects: + self.assertTrue(o.find('Key').text in req_objects) + + def test_delete_multi_objects_error(self): + bucket = 'bucket' + put_objects = ['obj'] + self._prepare_test_delete_multi_objects(bucket, put_objects) + xml = self._gen_multi_delete_xml(put_objects) + content_md5 = calculate_md5(xml) + query = 'delete' + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('POST', bucket, body=xml, + headers={ + 'Content-MD5': content_md5 + }, + query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('POST', 'nothing', body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + # without Object tag + xml = self._gen_invalid_multi_delete_xml() + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(get_error_code(body), 'MalformedXML') + + # without value of Key tag + xml = self._gen_invalid_multi_delete_xml(hasObjectTag=True) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(get_error_code(body), 'UserKeyMustBeSpecified') + + # specified number of objects are over max_multi_delete_objects + # (Default 1000), but xml size is smaller than 61365 bytes. + req_objects = ['obj%s' for var in xrange(1001)] + xml = self._gen_multi_delete_xml(req_objects) + self.assertTrue(len(xml.encode('utf-8')) <= MAX_MULTI_DELETE_BODY_SIZE) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(get_error_code(body), 'MalformedXML') + + # specified xml size is over 61365 bytes, but number of objects are + # smaller than max_multi_delete_objects. + obj = 'a' * 1024 + req_objects = [obj + str(var) for var in xrange(999)] + xml = self._gen_multi_delete_xml(req_objects) + self.assertTrue(len(xml.encode('utf-8')) > MAX_MULTI_DELETE_BODY_SIZE) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(get_error_code(body), 'MalformedXML') + + def test_delete_multi_objects_with_quiet(self): + bucket = 'bucket' + put_objects = ['obj'] + query = 'delete' + + # with Quiet true + quiet = 'true' + self._prepare_test_delete_multi_objects(bucket, put_objects) + xml = self._gen_multi_delete_xml(put_objects, quiet) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'DeleteResult') + resp_objects = elem.findall('Deleted') + self.assertEqual(len(resp_objects), 0) + + # with Quiet false + quiet = 'false' + self._prepare_test_delete_multi_objects(bucket, put_objects) + xml = self._gen_multi_delete_xml(put_objects, quiet) + content_md5 = calculate_md5(xml) + status, headers, body = \ + self.conn.make_request('POST', bucket, body=xml, + headers={'Content-MD5': content_md5}, + query=query) + self.assertEqual(status, 200) + elem = fromstring(body, 'DeleteResult') + resp_objects = elem.findall('Deleted') + self.assertEqual(len(resp_objects), 1) + + +class TestS3ApiMultiDeleteSigV4(TestS3ApiMultiDelete): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiMultiDeleteSigV4, self).setUp() + + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/test_multi_upload.py swift-2.18.0/test/functional/s3api/test_multi_upload.py --- swift-2.17.0/test/functional/s3api/test_multi_upload.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_multi_upload.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,849 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import unittest2 +import os +import boto + +# For an issue with venv and distutils, disable pylint message here +# pylint: disable-msg=E0611,F0401 +from distutils.version import StrictVersion + +from hashlib import md5 +from itertools import izip, izip_longest + +import test.functional as tf +from swift.common.middleware.s3api.etree import fromstring, tostring, Element, \ + SubElement +from swift.common.middleware.s3api.utils import mktime + +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code, get_error_msg + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiMultiUpload(S3ApiBase): + def setUp(self): + super(TestS3ApiMultiUpload, self).setUp() + if not tf.cluster_info['s3api'].get('allow_multipart_uploads', False): + raise tf.SkipTest('multipart upload is not enebled') + + self.min_segment_size = int(tf.cluster_info['s3api'].get( + 'min_segment_size', 5242880)) + + def _gen_comp_xml(self, etags): + elem = Element('CompleteMultipartUpload') + for i, etag in enumerate(etags): + elem_part = SubElement(elem, 'Part') + SubElement(elem_part, 'PartNumber').text = str(i + 1) + SubElement(elem_part, 'ETag').text = etag + return tostring(elem) + + def _initiate_multi_uploads_result_generator(self, bucket, keys, + headers=None, trials=1): + if headers is None: + headers = [None] * len(keys) + self.conn.make_request('PUT', bucket) + query = 'uploads' + for key, key_headers in izip_longest(keys, headers): + for i in xrange(trials): + status, resp_headers, body = \ + self.conn.make_request('POST', bucket, key, + headers=key_headers, query=query) + yield status, resp_headers, body + + def _upload_part(self, bucket, key, upload_id, content=None, part_num=1): + query = 'partNumber=%s&uploadId=%s' % (part_num, upload_id) + content = content if content else 'a' * self.min_segment_size + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, body=content, + query=query) + return status, headers, body + + def _upload_part_copy(self, src_bucket, src_obj, dst_bucket, dst_key, + upload_id, part_num=1, src_range=None): + + src_path = '%s/%s' % (src_bucket, src_obj) + query = 'partNumber=%s&uploadId=%s' % (part_num, upload_id) + req_headers = {'X-Amz-Copy-Source': src_path} + if src_range: + req_headers['X-Amz-Copy-Source-Range'] = src_range + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_key, + headers=req_headers, + query=query) + elem = fromstring(body, 'CopyPartResult') + etag = elem.find('ETag').text.strip('"') + return status, headers, body, etag + + def _complete_multi_upload(self, bucket, key, upload_id, xml): + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + return status, headers, body + + def test_object_multi_upload(self): + bucket = 'bucket' + keys = ['obj1', 'obj2', 'obj3'] + headers = [None, + {'Content-MD5': base64.b64encode('a' * 16).strip()}, + {'Etag': 'nonsense'}] + uploads = [] + + results_generator = self._initiate_multi_uploads_result_generator( + bucket, keys, headers=headers) + + # Initiate Multipart Upload + for expected_key, (status, headers, body) in \ + izip(keys, results_generator): + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body, 'InitiateMultipartUploadResult') + self.assertEqual(elem.find('Bucket').text, bucket) + key = elem.find('Key').text + self.assertEqual(expected_key, key) + upload_id = elem.find('UploadId').text + self.assertTrue(upload_id is not None) + self.assertTrue((key, upload_id) not in uploads) + uploads.append((key, upload_id)) + + self.assertEqual(len(uploads), len(keys)) # sanity + + # List Multipart Uploads + query = 'uploads' + status, headers, body = \ + self.conn.make_request('GET', bucket, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(elem.find('Bucket').text, bucket) + self.assertIsNone(elem.find('KeyMarker').text) + self.assertEqual(elem.find('NextKeyMarker').text, uploads[-1][0]) + self.assertIsNone(elem.find('UploadIdMarker').text) + self.assertEqual(elem.find('NextUploadIdMarker').text, uploads[-1][1]) + self.assertEqual(elem.find('MaxUploads').text, '1000') + self.assertTrue(elem.find('EncodingType') is None) + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Upload')), 3) + for (expected_key, expected_upload_id), u in \ + izip(uploads, elem.findall('Upload')): + key = u.find('Key').text + upload_id = u.find('UploadId').text + self.assertEqual(expected_key, key) + self.assertEqual(expected_upload_id, upload_id) + self.assertEqual(u.find('Initiator/ID').text, + self.conn.user_id) + self.assertEqual(u.find('Initiator/DisplayName').text, + self.conn.user_id) + self.assertEqual(u.find('Owner/ID').text, self.conn.user_id) + self.assertEqual(u.find('Owner/DisplayName').text, + self.conn.user_id) + self.assertEqual(u.find('StorageClass').text, 'STANDARD') + self.assertTrue(u.find('Initiated').text is not None) + + # Upload Part + key, upload_id = uploads[0] + content = 'a' * self.min_segment_size + etag = md5(content).hexdigest() + status, headers, body = \ + self._upload_part(bucket, key, upload_id, content) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers, etag) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '0') + expected_parts_list = [(headers['etag'], mktime(headers['date']))] + + # Upload Part Copy + key, upload_id = uploads[1] + src_bucket = 'bucket2' + src_obj = 'obj3' + src_content = 'b' * self.min_segment_size + etag = md5(src_content).hexdigest() + + # prepare src obj + self.conn.make_request('PUT', src_bucket) + self.conn.make_request('PUT', src_bucket, src_obj, body=src_content) + _, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj) + self.assertCommonResponseHeaders(headers) + + status, headers, body, resp_etag = \ + self._upload_part_copy(src_bucket, src_obj, bucket, + key, upload_id) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + self.assertTrue('etag' not in headers) + elem = fromstring(body, 'CopyPartResult') + + last_modified = elem.find('LastModified').text + self.assertTrue(last_modified is not None) + + self.assertEqual(resp_etag, etag) + + # Check last-modified timestamp + key, upload_id = uploads[1] + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('GET', bucket, key, query=query) + + self.assertEqual(200, status) + elem = fromstring(body, 'ListPartsResult') + + # FIXME: COPY result drops milli/microseconds but GET doesn't + last_modified_gets = [p.find('LastModified').text + for p in elem.iterfind('Part')] + self.assertEqual( + last_modified_gets[0].rsplit('.', 1)[0], + last_modified.rsplit('.', 1)[0], + '%r != %r' % (last_modified_gets[0], last_modified)) + # There should be *exactly* two parts in the result + self.assertEqual(1, len(last_modified_gets)) + + # List Parts + key, upload_id = uploads[0] + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('GET', bucket, key, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('Bucket').text, bucket) + self.assertEqual(elem.find('Key').text, key) + self.assertEqual(elem.find('UploadId').text, upload_id) + self.assertEqual(elem.find('Initiator/ID').text, self.conn.user_id) + self.assertEqual(elem.find('Initiator/DisplayName').text, + self.conn.user_id) + self.assertEqual(elem.find('Owner/ID').text, self.conn.user_id) + self.assertEqual(elem.find('Owner/DisplayName').text, + self.conn.user_id) + self.assertEqual(elem.find('StorageClass').text, 'STANDARD') + self.assertEqual(elem.find('PartNumberMarker').text, '0') + self.assertEqual(elem.find('NextPartNumberMarker').text, '1') + self.assertEqual(elem.find('MaxParts').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Part')), 1) + + # etags will be used to generate xml for Complete Multipart Upload + etags = [] + for (expected_etag, expected_date), p in \ + izip(expected_parts_list, elem.findall('Part')): + last_modified = p.find('LastModified').text + self.assertTrue(last_modified is not None) + # TODO: sanity check + # (kota_) How do we check the sanity? + # the last-modified header drops milli-seconds info + # by the constraint of the format. + # For now, we can do either the format check or round check + # last_modified_from_xml = mktime(last_modified) + # self.assertEqual(expected_date, + # last_modified_from_xml) + self.assertEqual(expected_etag, p.find('ETag').text) + self.assertEqual(self.min_segment_size, int(p.find('Size').text)) + etags.append(p.find('ETag').text) + + # Abort Multipart Uploads + # note that uploads[1] has part data while uploads[2] does not + for key, upload_id in uploads[1:]: + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('DELETE', bucket, key, query=query) + self.assertEqual(status, 204) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], + 'text/html; charset=UTF-8') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '0') + + # Complete Multipart Upload + key, upload_id = uploads[0] + xml = self._gen_comp_xml(etags) + status, headers, body = \ + self._complete_multi_upload(bucket, key, upload_id, xml) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body, 'CompleteMultipartUploadResult') + # TODO: use tf.config value + self.assertEqual( + 'http://%s:%s/bucket/obj1' % (self.conn.host, self.conn.port), + elem.find('Location').text) + self.assertEqual(elem.find('Bucket').text, bucket) + self.assertEqual(elem.find('Key').text, key) + # TODO: confirm completed etag value + self.assertTrue(elem.find('ETag').text is not None) + + def test_initiate_multi_upload_error(self): + bucket = 'bucket' + key = 'obj' + self.conn.make_request('PUT', bucket) + query = 'uploads' + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('POST', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, resp_headers, body = \ + self.conn.make_request('POST', 'nothing', key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + def test_list_multi_uploads_error(self): + bucket = 'bucket' + self.conn.make_request('PUT', bucket) + query = 'uploads' + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('GET', bucket, query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('GET', 'nothing', query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + def test_upload_part_error(self): + bucket = 'bucket' + self.conn.make_request('PUT', bucket) + query = 'uploads' + key = 'obj' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + query = 'partNumber=%s&uploadId=%s' % (1, upload_id) + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('PUT', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('PUT', 'nothing', key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + query = 'partNumber=%s&uploadId=%s' % (1, 'nothing') + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + query = 'partNumber=%s&uploadId=%s' % (0, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'InvalidArgument') + err_msg = 'Part number must be an integer between 1 and' + self.assertTrue(err_msg in get_error_msg(body)) + + def test_upload_part_copy_error(self): + src_bucket = 'src' + src_obj = 'src' + self.conn.make_request('PUT', src_bucket) + self.conn.make_request('PUT', src_bucket, src_obj) + src_path = '%s/%s' % (src_bucket, src_obj) + + bucket = 'bucket' + self.conn.make_request('PUT', bucket) + key = 'obj' + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + query = 'partNumber=%s&uploadId=%s' % (1, upload_id) + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('PUT', bucket, key, + headers={ + 'X-Amz-Copy-Source': src_path + }, + query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('PUT', 'nothing', key, + headers={'X-Amz-Copy-Source': src_path}, + query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + query = 'partNumber=%s&uploadId=%s' % (1, 'nothing') + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, + headers={'X-Amz-Copy-Source': src_path}, + query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + src_path = '%s/%s' % (src_bucket, 'nothing') + query = 'partNumber=%s&uploadId=%s' % (1, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, + headers={'X-Amz-Copy-Source': src_path}, + query=query) + self.assertEqual(get_error_code(body), 'NoSuchKey') + + def test_list_parts_error(self): + bucket = 'bucket' + self.conn.make_request('PUT', bucket) + key = 'obj' + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + query = 'uploadId=%s' % upload_id + auth_error_conn = Connection(aws_secret_key='invalid') + + status, headers, body = \ + auth_error_conn.make_request('GET', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('GET', 'nothing', key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + query = 'uploadId=%s' % 'nothing' + status, headers, body = \ + self.conn.make_request('GET', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + def test_abort_multi_upload_error(self): + bucket = 'bucket' + self.conn.make_request('PUT', bucket) + key = 'obj' + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + self._upload_part(bucket, key, upload_id) + + query = 'uploadId=%s' % upload_id + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('DELETE', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + status, headers, body = \ + self.conn.make_request('DELETE', 'nothing', key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + status, headers, body = \ + self.conn.make_request('DELETE', bucket, 'nothing', query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + query = 'uploadId=%s' % 'nothing' + status, headers, body = \ + self.conn.make_request('DELETE', bucket, key, query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + def test_complete_multi_upload_error(self): + bucket = 'bucket' + keys = ['obj', 'obj2'] + self.conn.make_request('PUT', bucket) + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[0], query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + etags = [] + for i in xrange(1, 3): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, keys[0], query=query) + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + # part 1 too small + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[0], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'EntityTooSmall') + + # invalid credentials + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('POST', bucket, keys[0], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + + # wrong/missing bucket + status, headers, body = \ + self.conn.make_request('POST', 'nothing', keys[0], query=query) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + + # wrong upload ID + query = 'uploadId=%s' % 'nothing' + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[0], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'NoSuchUpload') + + # without Part tag in xml + query = 'uploadId=%s' % upload_id + xml = self._gen_comp_xml([]) + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[0], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'MalformedXML') + + # with invalid etag in xml + invalid_etag = 'invalid' + xml = self._gen_comp_xml([invalid_etag]) + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[0], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'InvalidPart') + + # without part in Swift + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[1], query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + query = 'uploadId=%s' % upload_id + xml = self._gen_comp_xml([etags[0]]) + status, headers, body = \ + self.conn.make_request('POST', bucket, keys[1], body=xml, + query=query) + self.assertEqual(get_error_code(body), 'InvalidPart') + + def test_complete_upload_min_segment_size(self): + bucket = 'bucket' + key = 'obj' + self.conn.make_request('PUT', bucket) + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + # multi parts with no body + etags = [] + for i in xrange(1, 3): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query) + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(get_error_code(body), 'EntityTooSmall') + + # multi parts with all parts less than min segment size + etags = [] + for i in xrange(1, 3): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query, + body='AA') + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(get_error_code(body), 'EntityTooSmall') + + # one part and less than min segment size + etags = [] + query = 'partNumber=1&uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query, + body='AA') + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(status, 200) + + # multi parts with all parts except the first part less than min + # segment size + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + etags = [] + body_size = [self.min_segment_size, self.min_segment_size - 1, 2] + for i in xrange(1, 3): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query, + body='A' * body_size[i]) + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(get_error_code(body), 'EntityTooSmall') + + # multi parts with all parts except last part more than min segment + # size + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + etags = [] + body_size = [self.min_segment_size, self.min_segment_size, 2] + for i in xrange(1, 3): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, query=query, + body='A' * body_size[i]) + etags.append(headers['etag']) + xml = self._gen_comp_xml(etags) + + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(status, 200) + + def test_complete_upload_with_fewer_etags(self): + bucket = 'bucket' + key = 'obj' + self.conn.make_request('PUT', bucket) + query = 'uploads' + status, headers, body = \ + self.conn.make_request('POST', bucket, key, query=query) + elem = fromstring(body, 'InitiateMultipartUploadResult') + upload_id = elem.find('UploadId').text + + etags = [] + for i in xrange(1, 4): + query = 'partNumber=%s&uploadId=%s' % (i, upload_id) + status, headers, body = \ + self.conn.make_request('PUT', bucket, key, + body='A' * 1024 * 1024 * 5, query=query) + etags.append(headers['etag']) + query = 'uploadId=%s' % upload_id + xml = self._gen_comp_xml(etags[:-1]) + status, headers, body = \ + self.conn.make_request('POST', bucket, key, body=xml, + query=query) + self.assertEqual(status, 200) + + def test_object_multi_upload_part_copy_range(self): + bucket = 'bucket' + keys = ['obj1'] + uploads = [] + + results_generator = self._initiate_multi_uploads_result_generator( + bucket, keys) + + # Initiate Multipart Upload + for expected_key, (status, headers, body) in \ + izip(keys, results_generator): + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + elem = fromstring(body, 'InitiateMultipartUploadResult') + self.assertEqual(elem.find('Bucket').text, bucket) + key = elem.find('Key').text + self.assertEqual(expected_key, key) + upload_id = elem.find('UploadId').text + self.assertTrue(upload_id is not None) + self.assertTrue((key, upload_id) not in uploads) + uploads.append((key, upload_id)) + + self.assertEqual(len(uploads), len(keys)) # sanity + + # Upload Part Copy Range + key, upload_id = uploads[0] + src_bucket = 'bucket2' + src_obj = 'obj4' + src_content = 'y' * (self.min_segment_size / 2) + 'z' * \ + self.min_segment_size + src_range = 'bytes=0-%d' % (self.min_segment_size - 1) + etag = md5(src_content[:self.min_segment_size]).hexdigest() + + # prepare src obj + self.conn.make_request('PUT', src_bucket) + self.conn.make_request('PUT', src_bucket, src_obj, body=src_content) + _, headers, _ = self.conn.make_request('HEAD', src_bucket, src_obj) + self.assertCommonResponseHeaders(headers) + + status, headers, body, resp_etag = \ + self._upload_part_copy(src_bucket, src_obj, bucket, + key, upload_id, 1, src_range) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'application/xml') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], str(len(body))) + self.assertTrue('etag' not in headers) + elem = fromstring(body, 'CopyPartResult') + + last_modified = elem.find('LastModified').text + self.assertTrue(last_modified is not None) + + self.assertEqual(resp_etag, etag) + + # Check last-modified timestamp + key, upload_id = uploads[0] + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('GET', bucket, key, query=query) + + elem = fromstring(body, 'ListPartsResult') + + # FIXME: COPY result drops milli/microseconds but GET doesn't + last_modified_gets = [p.find('LastModified').text + for p in elem.iterfind('Part')] + self.assertEqual( + last_modified_gets[0].rsplit('.', 1)[0], + last_modified.rsplit('.', 1)[0], + '%r != %r' % (last_modified_gets[0], last_modified)) + + # There should be *exactly* one parts in the result + self.assertEqual(1, len(last_modified_gets)) + + # Abort Multipart Upload + key, upload_id = uploads[0] + query = 'uploadId=%s' % upload_id + status, headers, body = \ + self.conn.make_request('DELETE', bucket, key, query=query) + + # sanity checks + self.assertEqual(status, 204) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'text/html; charset=UTF-8') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '0') + + +class TestS3ApiMultiUploadSigV4(TestS3ApiMultiUpload): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiMultiUploadSigV4, self).setUp() + + def test_object_multi_upload_part_copy_range(self): + if StrictVersion(boto.__version__) < StrictVersion('3.0'): + self.skipTest('This stuff got the issue of boto<=2.x') + + def test_delete_bucket_multi_upload_object_exisiting(self): + bucket = 'bucket' + keys = ['obj1'] + uploads = [] + + results_generator = self._initiate_multi_uploads_result_generator( + bucket, keys) + + # Initiate Multipart Upload + for expected_key, (status, _, body) in \ + izip(keys, results_generator): + self.assertEqual(status, 200) # sanity + elem = fromstring(body, 'InitiateMultipartUploadResult') + key = elem.find('Key').text + self.assertEqual(expected_key, key) # sanity + upload_id = elem.find('UploadId').text + self.assertTrue(upload_id is not None) # sanity + self.assertTrue((key, upload_id) not in uploads) + uploads.append((key, upload_id)) + + self.assertEqual(len(uploads), len(keys)) # sanity + + # Upload Part + key, upload_id = uploads[0] + content = 'a' * self.min_segment_size + status, headers, body = \ + self._upload_part(bucket, key, upload_id, content) + self.assertEqual(status, 200) + + # Complete Multipart Upload + key, upload_id = uploads[0] + etags = [md5(content).hexdigest()] + xml = self._gen_comp_xml(etags) + status, headers, body = \ + self._complete_multi_upload(bucket, key, upload_id, xml) + self.assertEqual(status, 200) # sanity + + # GET multipart object + status, headers, body = \ + self.conn.make_request('GET', bucket, key) + self.assertEqual(status, 200) # sanity + self.assertEqual(content, body) # sanity + + # DELETE bucket while the object existing + status, headers, body = \ + self.conn.make_request('DELETE', bucket) + self.assertEqual(status, 409) # sanity + + # The object must still be there. + status, headers, body = \ + self.conn.make_request('GET', bucket, key) + self.assertEqual(status, 200) # sanity + self.assertEqual(content, body) # sanity + + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/test_object.py swift-2.18.0/test/functional/s3api/test_object.py --- swift-2.17.0/test/functional/s3api/test_object.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_object.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,873 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import os +import boto + +# For an issue with venv and distutils, disable pylint message here +# pylint: disable-msg=E0611,F0401 +from distutils.version import StrictVersion + +import email.parser +from email.utils import formatdate, parsedate +from time import mktime +from hashlib import md5 +from urllib import quote + +import test.functional as tf + +from swift.common.middleware.s3api.etree import fromstring + +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code, calculate_md5 + +DAY = 86400.0 # 60 * 60 * 24 (sec) + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiObject(S3ApiBase): + def setUp(self): + super(TestS3ApiObject, self).setUp() + self.bucket = 'bucket' + self.conn.make_request('PUT', self.bucket) + + def _assertObjectEtag(self, bucket, obj, etag): + status, headers, _ = self.conn.make_request('HEAD', bucket, obj) + self.assertEqual(status, 200) # sanity + self.assertCommonResponseHeaders(headers, etag) + + def test_object(self): + obj = 'object name with %-sign' + content = 'abc123' + etag = md5(content).hexdigest() + + # PUT Object + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, body=content) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-length' in headers) # sanity + self.assertEqual(headers['content-length'], '0') + self._assertObjectEtag(self.bucket, obj, etag) + + # PUT Object Copy + dst_bucket = 'dst-bucket' + dst_obj = 'dst_obj' + self.conn.make_request('PUT', dst_bucket) + headers = {'x-amz-copy-source': '/%s/%s' % (self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, + headers=headers) + self.assertEqual(status, 200) + + # PUT Object Copy with URL-encoded Source + dst_bucket = 'dst-bucket' + dst_obj = 'dst_obj' + self.conn.make_request('PUT', dst_bucket) + headers = {'x-amz-copy-source': quote('/%s/%s' % (self.bucket, obj))} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, + headers=headers) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-length'], str(len(body))) + + elem = fromstring(body, 'CopyObjectResult') + self.assertTrue(elem.find('LastModified').text is not None) + last_modified_xml = elem.find('LastModified').text + self.assertTrue(elem.find('ETag').text is not None) + self.assertEqual(etag, elem.find('ETag').text.strip('"')) + self._assertObjectEtag(dst_bucket, dst_obj, etag) + + # Check timestamp on Copy: + status, headers, body = \ + self.conn.make_request('GET', dst_bucket) + self.assertEqual(status, 200) + elem = fromstring(body, 'ListBucketResult') + + # FIXME: COPY result drops milli/microseconds but GET doesn't + self.assertEqual( + elem.find('Contents').find("LastModified").text.rsplit('.', 1)[0], + last_modified_xml.rsplit('.', 1)[0]) + + # GET Object + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers, etag) + self.assertTrue(headers['last-modified'] is not None) + self.assertTrue(headers['content-type'] is not None) + self.assertEqual(headers['content-length'], str(len(content))) + + # HEAD Object + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers, etag) + self.assertTrue(headers['last-modified'] is not None) + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-length'], str(len(content))) + + # DELETE Object + status, headers, body = \ + self.conn.make_request('DELETE', self.bucket, obj) + self.assertEqual(status, 204) + self.assertCommonResponseHeaders(headers) + + def test_put_object_error(self): + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('PUT', self.bucket, 'object') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('PUT', 'bucket2', 'object') + self.assertEqual(get_error_code(body), 'NoSuchBucket') + self.assertEqual(headers['content-type'], 'application/xml') + + def test_put_object_copy_error(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + dst_bucket = 'dst-bucket' + self.conn.make_request('PUT', dst_bucket) + dst_obj = 'dst_object' + + headers = {'x-amz-copy-source': '/%s/%s' % (self.bucket, obj)} + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + self.assertEqual(headers['content-type'], 'application/xml') + + # /src/nothing -> /dst/dst + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, 'nothing')} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(get_error_code(body), 'NoSuchKey') + self.assertEqual(headers['content-type'], 'application/xml') + + # /nothing/src -> /dst/dst + headers = {'X-Amz-Copy-Source': '/%s/%s' % ('nothing', obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + # TODO: source bucket is not check. + # self.assertEqual(get_error_code(body), 'NoSuchBucket') + + # /src/src -> /nothing/dst + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', 'nothing', dst_obj, headers) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + self.assertEqual(headers['content-type'], 'application/xml') + + def test_get_object_error(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('GET', self.bucket, obj) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('GET', self.bucket, 'invalid') + self.assertEqual(get_error_code(body), 'NoSuchKey') + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = self.conn.make_request('GET', 'invalid', obj) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + self.assertEqual(headers['content-type'], 'application/xml') + + def test_head_object_error(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('HEAD', self.bucket, obj) + self.assertEqual(status, 403) + self.assertEqual(body, '') # sanity + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, 'invalid') + self.assertEqual(status, 404) + self.assertEqual(body, '') # sanity + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('HEAD', 'invalid', obj) + self.assertEqual(status, 404) + self.assertEqual(body, '') # sanity + self.assertEqual(headers['content-type'], 'application/xml') + + def test_delete_object_error(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = \ + auth_error_conn.make_request('DELETE', self.bucket, obj) + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('DELETE', self.bucket, 'invalid') + self.assertEqual(get_error_code(body), 'NoSuchKey') + self.assertEqual(headers['content-type'], 'application/xml') + + status, headers, body = \ + self.conn.make_request('DELETE', 'invalid', obj) + self.assertEqual(get_error_code(body), 'NoSuchBucket') + self.assertEqual(headers['content-type'], 'application/xml') + + def test_put_object_content_encoding(self): + obj = 'object' + etag = md5().hexdigest() + headers = {'Content-Encoding': 'gzip'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers) + self.assertEqual(status, 200) + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + self.assertTrue('content-encoding' in headers) # sanity + self.assertEqual(headers['content-encoding'], 'gzip') + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_content_md5(self): + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + headers = {'Content-MD5': calculate_md5(content)} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_content_type(self): + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + headers = {'Content-Type': 'text/plain'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 200) + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + self.assertEqual(headers['content-type'], 'text/plain') + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_conditional_requests(self): + obj = 'object' + content = 'abcdefghij' + headers = {'If-None-Match': '*'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 501) + + headers = {'If-Match': '*'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 501) + + headers = {'If-Modified-Since': 'Sat, 27 Jun 2015 00:00:00 GMT'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 501) + + headers = {'If-Unmodified-Since': 'Sat, 27 Jun 2015 00:00:00 GMT'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 501) + + # None of the above should actually have created an object + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, {}, '') + self.assertEqual(status, 404) + + def test_put_object_expect(self): + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + headers = {'Expect': '100-continue'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def _test_put_object_headers(self, req_headers, expected_headers=None): + if expected_headers is None: + expected_headers = req_headers + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, + req_headers, content) + self.assertEqual(status, 200) + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + for header, value in expected_headers.items(): + self.assertIn(header.lower(), headers) + self.assertEqual(headers[header.lower()], value) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_metadata(self): + self._test_put_object_headers({ + 'X-Amz-Meta-Bar': 'foo', + 'X-Amz-Meta-Bar2': 'foo2'}) + + def test_put_object_weird_metadata(self): + req_headers = dict( + ('x-amz-meta-' + c, c) + for c in '!"#$%&\'()*+-./<=>?@[\\]^`{|}~') + exp_headers = dict( + ('x-amz-meta-' + c, c) + for c in '!#$%&\'(*+-.^`|~') + self._test_put_object_headers(req_headers, exp_headers) + + def test_put_object_underscore_in_metadata(self): + # Break this out separately for ease of testing pre-0.19.0 eventlet + self._test_put_object_headers({ + 'X-Amz-Meta-Foo-Bar': 'baz', + 'X-Amz-Meta-Foo_Bar': 'also baz'}) + + def test_put_object_content_headers(self): + self._test_put_object_headers({ + 'Content-Type': 'foo/bar', + 'Content-Encoding': 'baz', + 'Content-Disposition': 'attachment', + 'Content-Language': 'en'}) + + def test_put_object_cache_control(self): + self._test_put_object_headers({ + 'Cache-Control': 'private, some-extension'}) + + def test_put_object_expires(self): + self._test_put_object_headers({ + # We don't validate that the Expires header is a valid date + 'Expires': 'a valid HTTP-date timestamp'}) + + def test_put_object_robots_tag(self): + self._test_put_object_headers({ + 'X-Robots-Tag': 'googlebot: noarchive'}) + + def test_put_object_storage_class(self): + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + headers = {'X-Amz-Storage-Class': 'STANDARD'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers, content) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_copy_source_params(self): + obj = 'object' + src_headers = {'X-Amz-Meta-Test': 'src'} + src_body = 'some content' + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + self.conn.make_request('PUT', self.bucket, obj, src_headers, src_body) + self.conn.make_request('PUT', dst_bucket) + + headers = {'X-Amz-Copy-Source': '/%s/%s?nonsense' % ( + self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(status, 400) + self.assertEqual(get_error_code(body), 'InvalidArgument') + + headers = {'X-Amz-Copy-Source': '/%s/%s?versionId=null&nonsense' % ( + self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(status, 400) + self.assertEqual(get_error_code(body), 'InvalidArgument') + + headers = {'X-Amz-Copy-Source': '/%s/%s?versionId=null' % ( + self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + status, headers, body = \ + self.conn.make_request('GET', dst_bucket, dst_obj) + self.assertEqual(status, 200) + self.assertEqual(headers['x-amz-meta-test'], 'src') + self.assertEqual(body, src_body) + + def test_put_object_copy_source(self): + obj = 'object' + content = 'abcdefghij' + etag = md5(content).hexdigest() + self.conn.make_request('PUT', self.bucket, obj, body=content) + + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + self.conn.make_request('PUT', dst_bucket) + + # /src/src -> /dst/dst + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(dst_bucket, dst_obj, etag) + + # /src/src -> /src/dst + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj)} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, dst_obj, headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, dst_obj, etag) + + # /src/src -> /src/src + # need changes to copy itself (e.g. metadata) + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Meta-Foo': 'bar', + 'X-Amz-Metadata-Directive': 'REPLACE'} + status, headers, body = \ + self.conn.make_request('PUT', self.bucket, obj, headers) + self.assertEqual(status, 200) + self._assertObjectEtag(self.bucket, obj, etag) + self.assertCommonResponseHeaders(headers) + + def test_put_object_copy_metadata_directive(self): + obj = 'object' + src_headers = {'X-Amz-Meta-Test': 'src'} + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + self.conn.make_request('PUT', self.bucket, obj, headers=src_headers) + self.conn.make_request('PUT', dst_bucket) + + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Metadata-Directive': 'REPLACE', + 'X-Amz-Meta-Test': 'dst'} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + status, headers, body = \ + self.conn.make_request('HEAD', dst_bucket, dst_obj) + self.assertEqual(headers['x-amz-meta-test'], 'dst') + + def test_put_object_copy_source_if_modified_since(self): + obj = 'object' + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + etag = md5().hexdigest() + self.conn.make_request('PUT', self.bucket, obj) + self.conn.make_request('PUT', dst_bucket) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + src_datetime = mktime(parsedate(headers['last-modified'])) + src_datetime = src_datetime - DAY + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Copy-Source-If-Modified-Since': + formatdate(src_datetime)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_copy_source_if_unmodified_since(self): + obj = 'object' + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + etag = md5().hexdigest() + self.conn.make_request('PUT', self.bucket, obj) + self.conn.make_request('PUT', dst_bucket) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + src_datetime = mktime(parsedate(headers['last-modified'])) + src_datetime = src_datetime + DAY + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Copy-Source-If-Unmodified-Since': + formatdate(src_datetime)} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_copy_source_if_match(self): + obj = 'object' + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + etag = md5().hexdigest() + self.conn.make_request('PUT', self.bucket, obj) + self.conn.make_request('PUT', dst_bucket) + + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Copy-Source-If-Match': etag} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_put_object_copy_source_if_none_match(self): + obj = 'object' + dst_bucket = 'dst-bucket' + dst_obj = 'dst_object' + etag = md5().hexdigest() + self.conn.make_request('PUT', self.bucket, obj) + self.conn.make_request('PUT', dst_bucket) + + headers = {'X-Amz-Copy-Source': '/%s/%s' % (self.bucket, obj), + 'X-Amz-Copy-Source-If-None-Match': 'none-match'} + status, headers, body = \ + self.conn.make_request('PUT', dst_bucket, dst_obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self._assertObjectEtag(self.bucket, obj, etag) + + def test_get_object_response_content_type(self): + obj = 'obj' + self.conn.make_request('PUT', self.bucket, obj) + + query = 'response-content-type=text/plain' + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-type'], 'text/plain') + + def test_get_object_response_content_language(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + query = 'response-content-language=en' + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-language'], 'en') + + def test_get_object_response_cache_control(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + query = 'response-cache-control=private' + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['cache-control'], 'private') + + def test_get_object_response_content_disposition(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + query = 'response-content-disposition=inline' + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-disposition'], 'inline') + + def test_get_object_response_content_encoding(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + query = 'response-content-encoding=gzip' + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, query=query) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + self.assertEqual(headers['content-encoding'], 'gzip') + + def test_get_object_range(self): + obj = 'object' + content = 'abcdefghij' + headers = {'x-amz-meta-test': 'swift'} + self.conn.make_request( + 'PUT', self.bucket, obj, headers=headers, body=content) + + headers = {'Range': 'bytes=1-5'} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 206) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + self.assertEqual(body, 'bcdef') + + headers = {'Range': 'bytes=5-'} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 206) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + self.assertEqual(body, 'fghij') + + headers = {'Range': 'bytes=-5'} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 206) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + self.assertEqual(body, 'fghij') + + ranges = ['1-2', '4-5'] + + headers = {'Range': 'bytes=%s' % ','.join(ranges)} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 206) + self.assertCommonResponseHeaders(headers) + self.assertTrue('content-length' in headers) + + self.assertTrue('content-type' in headers) # sanity + content_type, boundary = headers['content-type'].split(';') + + self.assertEqual('multipart/byteranges', content_type) + self.assertTrue(boundary.startswith('boundary=')) # sanity + boundary_str = boundary[len('boundary='):] + + # TODO: Using swift.common.utils.multipart_byteranges_to_document_iters + # could be easy enough. + parser = email.parser.FeedParser() + parser.feed( + "Content-Type: multipart/byterange; boundary=%s\r\n\r\n" % + boundary_str) + parser.feed(body) + message = parser.close() + + self.assertTrue(message.is_multipart()) # sanity check + mime_parts = message.get_payload() + self.assertEqual(len(mime_parts), len(ranges)) # sanity + + for index, range_value in enumerate(ranges): + start, end = map(int, range_value.split('-')) + # go to next section and check sanity + self.assertTrue(mime_parts[index]) + + part = mime_parts[index] + self.assertEqual( + 'application/octet-stream', part.get_content_type()) + expected_range = 'bytes %s/%s' % (range_value, len(content)) + self.assertEqual( + expected_range, part.get('Content-Range')) + # rest + payload = part.get_payload().strip() + self.assertEqual(content[start:end + 1], payload) + + def test_get_object_if_modified_since(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + src_datetime = mktime(parsedate(headers['last-modified'])) + src_datetime = src_datetime - DAY + headers = {'If-Modified-Since': formatdate(src_datetime)} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_get_object_if_unmodified_since(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + src_datetime = mktime(parsedate(headers['last-modified'])) + src_datetime = src_datetime + DAY + headers = \ + {'If-Unmodified-Since': formatdate(src_datetime)} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_get_object_if_match(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + etag = headers['etag'] + + headers = {'If-Match': etag} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_get_object_if_none_match(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + headers = {'If-None-Match': 'none-match'} + status, headers, body = \ + self.conn.make_request('GET', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_head_object_range(self): + obj = 'object' + content = 'abcdefghij' + self.conn.make_request('PUT', self.bucket, obj, body=content) + + headers = {'Range': 'bytes=1-5'} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(headers['content-length'], '5') + self.assertCommonResponseHeaders(headers) + + headers = {'Range': 'bytes=5-'} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(headers['content-length'], '5') + self.assertCommonResponseHeaders(headers) + + headers = {'Range': 'bytes=-5'} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(headers['content-length'], '5') + self.assertCommonResponseHeaders(headers) + + def test_head_object_if_modified_since(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + dt = mktime(parsedate(headers['last-modified'])) + dt = dt - DAY + + headers = {'If-Modified-Since': formatdate(dt)} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_head_object_if_unmodified_since(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + _, headers, _ = self.conn.make_request('HEAD', self.bucket, obj) + dt = mktime(parsedate(headers['last-modified'])) + dt = dt + DAY + + headers = {'If-Unmodified-Since': formatdate(dt)} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_head_object_if_match(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj) + etag = headers['etag'] + + headers = {'If-Match': etag} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + def test_head_object_if_none_match(self): + obj = 'object' + self.conn.make_request('PUT', self.bucket, obj) + + headers = {'If-None-Match': 'none-match'} + status, headers, body = \ + self.conn.make_request('HEAD', self.bucket, obj, headers=headers) + self.assertEqual(status, 200) + self.assertCommonResponseHeaders(headers) + + +class TestS3ApiObjectSigV4(TestS3ApiObject): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiObjectSigV4, self).setUp() + + @unittest2.skipIf(StrictVersion(boto.__version__) < StrictVersion('3.0'), + 'This stuff got the signing issue of boto<=2.x') + def test_put_object_metadata(self): + super(TestS3ApiObjectSigV4, self).test_put_object_metadata() + + @unittest2.skipIf(StrictVersion(boto.__version__) < StrictVersion('3.0'), + 'This stuff got the signing issue of boto<=2.x') + def test_put_object_copy_source_if_modified_since(self): + super(TestS3ApiObjectSigV4, self).\ + test_put_object_copy_source_if_modified_since() + + @unittest2.skipIf(StrictVersion(boto.__version__) < StrictVersion('3.0'), + 'This stuff got the signing issue of boto<=2.x') + def test_put_object_copy_source_if_unmodified_since(self): + super(TestS3ApiObjectSigV4, self).\ + test_put_object_copy_source_if_unmodified_since() + + @unittest2.skipIf(StrictVersion(boto.__version__) < StrictVersion('3.0'), + 'This stuff got the signing issue of boto<=2.x') + def test_put_object_copy_source_if_match(self): + super(TestS3ApiObjectSigV4, + self).test_put_object_copy_source_if_match() + + @unittest2.skipIf(StrictVersion(boto.__version__) < StrictVersion('3.0'), + 'This stuff got the signing issue of boto<=2.x') + def test_put_object_copy_source_if_none_match(self): + super(TestS3ApiObjectSigV4, + self).test_put_object_copy_source_if_none_match() + + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/test_presigned.py swift-2.18.0/test/functional/s3api/test_presigned.py --- swift-2.17.0/test/functional/s3api/test_presigned.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_presigned.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,237 @@ +# Copyright (c) 2016 SwiftStack, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import requests + +from swift.common.middleware.s3api.etree import fromstring + +import test.functional as tf + +from test.functional.s3api import S3ApiBase +from test.functional.s3api.utils import get_error_code, get_error_msg + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiPresignedUrls(S3ApiBase): + def test_bucket(self): + bucket = 'test-bucket' + req_objects = ('object', 'object2') + max_bucket_listing = tf.cluster_info['s3api'].get( + 'max_bucket_listing', 1000) + + # GET Bucket (Without Object) + status, _junk, _junk = self.conn.make_request('PUT', bucket) + self.assertEqual(status, 200) + + url, headers = self.conn.generate_url_and_headers('GET', bucket) + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertCommonResponseHeaders(resp.headers) + self.assertIsNotNone(resp.headers['content-type']) + self.assertEqual(resp.headers['content-length'], + str(len(resp.content))) + + elem = fromstring(resp.content, 'ListBucketResult') + self.assertEqual(elem.find('Name').text, bucket) + self.assertIsNone(elem.find('Prefix').text) + self.assertIsNone(elem.find('Marker').text) + self.assertEqual(elem.find('MaxKeys').text, + str(max_bucket_listing)) + self.assertEqual(elem.find('IsTruncated').text, 'false') + objects = elem.findall('./Contents') + self.assertEqual(list(objects), []) + + # GET Bucket (With Object) + for obj in req_objects: + status, _junk, _junk = self.conn.make_request('PUT', bucket, obj) + self.assertEqual( + status, 200, + 'Got %d response while creating %s' % (status, obj)) + + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertCommonResponseHeaders(resp.headers) + self.assertIsNotNone(resp.headers['content-type']) + self.assertEqual(resp.headers['content-length'], + str(len(resp.content))) + + elem = fromstring(resp.content, 'ListBucketResult') + self.assertEqual(elem.find('Name').text, bucket) + self.assertIsNone(elem.find('Prefix').text) + self.assertIsNone(elem.find('Marker').text) + self.assertEqual(elem.find('MaxKeys').text, + str(max_bucket_listing)) + self.assertEqual(elem.find('IsTruncated').text, 'false') + resp_objects = elem.findall('./Contents') + self.assertEqual(len(list(resp_objects)), 2) + for o in resp_objects: + self.assertIn(o.find('Key').text, req_objects) + self.assertIsNotNone(o.find('LastModified').text) + self.assertRegexpMatches( + o.find('LastModified').text, + r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$') + self.assertIsNotNone(o.find('ETag').text) + self.assertEqual(o.find('Size').text, '0') + self.assertIsNotNone(o.find('StorageClass').text is not None) + self.assertEqual(o.find('Owner/ID').text, self.conn.user_id) + self.assertEqual(o.find('Owner/DisplayName').text, + self.conn.user_id) + # DELETE Bucket + for obj in req_objects: + self.conn.make_request('DELETE', bucket, obj) + url, headers = self.conn.generate_url_and_headers('DELETE', bucket) + resp = requests.delete(url, headers=headers) + self.assertEqual(resp.status_code, 204, + 'Got %d %s' % (resp.status_code, resp.content)) + + def test_expiration_limits(self): + if os.environ.get('S3_USE_SIGV4'): + self._test_expiration_limits_v4() + else: + self._test_expiration_limits_v2() + + def _test_expiration_limits_v2(self): + bucket = 'test-bucket' + + # Expiration date is too far in the future + url, headers = self.conn.generate_url_and_headers( + 'GET', bucket, expires_in=2 ** 32) + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 403, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(get_error_code(resp.content), + 'AccessDenied') + self.assertIn('Invalid date (should be seconds since epoch)', + get_error_msg(resp.content)) + + def _test_expiration_limits_v4(self): + bucket = 'test-bucket' + + # Expiration is negative + url, headers = self.conn.generate_url_and_headers( + 'GET', bucket, expires_in=-1) + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 400, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(get_error_code(resp.content), + 'AuthorizationQueryParametersError') + self.assertIn('X-Amz-Expires must be non-negative', + get_error_msg(resp.content)) + + # Expiration date is too far in the future + for exp in (7 * 24 * 60 * 60 + 1, + 2 ** 63 - 1): + url, headers = self.conn.generate_url_and_headers( + 'GET', bucket, expires_in=exp) + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 400, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(get_error_code(resp.content), + 'AuthorizationQueryParametersError') + self.assertIn('X-Amz-Expires must be less than 604800 seconds', + get_error_msg(resp.content)) + + # Expiration date is *way* too far in the future, or isn't a number + for exp in (2 ** 63, 'foo'): + url, headers = self.conn.generate_url_and_headers( + 'GET', bucket, expires_in=2 ** 63) + resp = requests.get(url, headers=headers) + self.assertEqual(resp.status_code, 400, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(get_error_code(resp.content), + 'AuthorizationQueryParametersError') + self.assertEqual('X-Amz-Expires should be a number', + get_error_msg(resp.content)) + + def test_object(self): + bucket = 'test-bucket' + obj = 'object' + + status, _junk, _junk = self.conn.make_request('PUT', bucket) + self.assertEqual(status, 200) + + # HEAD/missing object + head_url, headers = self.conn.generate_url_and_headers( + 'HEAD', bucket, obj) + resp = requests.head(head_url, headers=headers) + self.assertEqual(resp.status_code, 404, + 'Got %d %s' % (resp.status_code, resp.content)) + + # Wrong verb + resp = requests.get(head_url) + self.assertEqual(resp.status_code, 403, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(get_error_code(resp.content), + 'SignatureDoesNotMatch') + + # PUT empty object + put_url, headers = self.conn.generate_url_and_headers( + 'PUT', bucket, obj) + resp = requests.put(put_url, data='', headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + # GET empty object + get_url, headers = self.conn.generate_url_and_headers( + 'GET', bucket, obj) + resp = requests.get(get_url, headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(resp.content, '') + + # PUT over object + resp = requests.put(put_url, data='foobar', headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + + # GET non-empty object + resp = requests.get(get_url, headers=headers) + self.assertEqual(resp.status_code, 200, + 'Got %d %s' % (resp.status_code, resp.content)) + self.assertEqual(resp.content, 'foobar') + + # DELETE Object + delete_url, headers = self.conn.generate_url_and_headers( + 'DELETE', bucket, obj) + resp = requests.delete(delete_url, headers=headers) + self.assertEqual(resp.status_code, 204, + 'Got %d %s' % (resp.status_code, resp.content)) + + # Final cleanup + status, _junk, _junk = self.conn.make_request('DELETE', bucket) + self.assertEqual(status, 204) + + +class TestS3ApiPresignedUrlsSigV4(TestS3ApiPresignedUrls): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiPresignedUrlsSigV4, self).setUp() diff -Nru swift-2.17.0/test/functional/s3api/test_service.py swift-2.18.0/test/functional/s3api/test_service.py --- swift-2.17.0/test/functional/s3api/test_service.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/test_service.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,100 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest2 +import os + +import test.functional as tf + +from swift.common.middleware.s3api.etree import fromstring + +from test.functional.s3api import S3ApiBase +from test.functional.s3api.s3_test_client import Connection +from test.functional.s3api.utils import get_error_code + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +class TestS3ApiService(S3ApiBase): + def setUp(self): + super(TestS3ApiService, self).setUp() + + def test_service(self): + # GET Service(without bucket) + status, headers, body = self.conn.make_request('GET') + self.assertEqual(status, 200) + + self.assertCommonResponseHeaders(headers) + self.assertTrue(headers['content-type'] is not None) + # TODO; requires consideration + # self.assertEqual(headers['transfer-encoding'], 'chunked') + + elem = fromstring(body, 'ListAllMyBucketsResult') + buckets = elem.findall('./Buckets/Bucket') + self.assertEqual(list(buckets), []) + owner = elem.find('Owner') + self.assertEqual(self.conn.user_id, owner.find('ID').text) + self.assertEqual(self.conn.user_id, owner.find('DisplayName').text) + + # GET Service(with Bucket) + req_buckets = ('bucket', 'bucket2') + for bucket in req_buckets: + self.conn.make_request('PUT', bucket) + status, headers, body = self.conn.make_request('GET') + self.assertEqual(status, 200) + + elem = fromstring(body, 'ListAllMyBucketsResult') + resp_buckets = elem.findall('./Buckets/Bucket') + self.assertEqual(len(list(resp_buckets)), 2) + for b in resp_buckets: + self.assertTrue(b.find('Name').text in req_buckets) + self.assertTrue(b.find('CreationDate') is not None) + + def test_service_error_signature_not_match(self): + auth_error_conn = Connection(aws_secret_key='invalid') + status, headers, body = auth_error_conn.make_request('GET') + self.assertEqual(get_error_code(body), 'SignatureDoesNotMatch') + self.assertEqual(headers['content-type'], 'application/xml') + + def test_service_error_no_date_header(self): + # Without x-amz-date/Date header, that makes 403 forbidden + status, headers, body = self.conn.make_request( + 'GET', headers={'Date': '', 'x-amz-date': ''}) + self.assertEqual(status, 403) + self.assertEqual(get_error_code(body), 'AccessDenied') + self.assertIn('AWS authentication requires a valid Date ' + 'or x-amz-date header', body) + + +class TestS3ApiServiceSigV4(TestS3ApiService): + @classmethod + def setUpClass(cls): + os.environ['S3_USE_SIGV4'] = "True" + + @classmethod + def tearDownClass(cls): + del os.environ['S3_USE_SIGV4'] + + def setUp(self): + super(TestS3ApiServiceSigV4, self).setUp() + +if __name__ == '__main__': + unittest2.main() diff -Nru swift-2.17.0/test/functional/s3api/utils.py swift-2.18.0/test/functional/s3api/utils.py --- swift-2.17.0/test/functional/s3api/utils.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/s3api/utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,31 @@ +# Copyright (c) 2015 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from hashlib import md5 +from swift.common.middleware.s3api.etree import fromstring + + +def get_error_code(body): + elem = fromstring(body, 'Error') + return elem.find('Code').text + + +def get_error_msg(body): + elem = fromstring(body, 'Error') + return elem.find('Message').text + + +def calculate_md5(body): + return md5(body).digest().encode('base64').strip() diff -Nru swift-2.17.0/test/functional/swift_test_client.py swift-2.18.0/test/functional/swift_test_client.py --- swift-2.17.0/test/functional/swift_test_client.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/functional/swift_test_client.py 2018-05-30 10:17:02.000000000 +0000 @@ -543,7 +543,7 @@ def delete_files(self): for f in listing_items(self.files): file_item = self.file(f) - if not file_item.delete(): + if not file_item.delete(tolerate_missing=True): return False return listing_empty(self.files) @@ -764,14 +764,19 @@ self.conn.make_path(self.path)) return True - def delete(self, hdrs=None, parms=None, cfg=None): + def delete(self, hdrs=None, parms=None, cfg=None, tolerate_missing=False): if hdrs is None: hdrs = {} if parms is None: parms = {} - if self.conn.make_request('DELETE', self.path, hdrs=hdrs, - cfg=cfg, parms=parms) != 204: + if tolerate_missing: + allowed_statuses = (204, 404) + else: + allowed_statuses = (204,) + if self.conn.make_request( + 'DELETE', self.path, hdrs=hdrs, cfg=cfg, + parms=parms) not in allowed_statuses: raise ResponseError(self.conn.response, 'DELETE', self.conn.make_path(self.path)) diff -Nru swift-2.17.0/test/functional/test_staticweb.py swift-2.18.0/test/functional/test_staticweb.py --- swift-2.17.0/test/functional/test_staticweb.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/functional/test_staticweb.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,400 @@ +#!/usr/bin/python -u +# Copyright (c) 2010-2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +from unittest2 import SkipTest +import test.functional as tf +from test.functional import cluster_info +from test.functional.tests import Utils, Base, BaseEnv +from test.functional.swift_test_client import Account, Connection, \ + ResponseError + + +def setUpModule(): + tf.setup_package() + + +def tearDownModule(): + tf.teardown_package() + + +def requires_domain_remap(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'domain_remap' not in cluster_info: + raise SkipTest('Domain Remap is not enabled') + return func(*args, **kwargs) + return wrapper + + +class TestStaticWebEnv(BaseEnv): + static_web_enabled = None # tri-state: None initially, then True/False + + @classmethod + def setUp(cls): + cls.conn = Connection(tf.config) + cls.conn.authenticate() + + if cls.static_web_enabled is None: + cls.static_web_enabled = 'staticweb' in cluster_info + if not cls.static_web_enabled: + return + + cls.account = Account( + cls.conn, tf.config.get('account', tf.config['username'])) + cls.account.delete_containers() + + cls.container = cls.account.container(Utils.create_name()) + if not cls.container.create( + hdrs={'X-Container-Read': '.r:*,.rlistings'}): + raise ResponseError(cls.conn.response) + + objects = ['index', + 'error', + 'listings_css', + 'dir/', + 'dir/obj', + 'dir/subdir/', + 'dir/subdir/obj'] + + cls.objects = {} + for item in sorted(objects): + parent = None + if '/' in item.rstrip('/'): + parent, _ = item.rstrip('/').rsplit('/', 1) + path = '%s/%s' % (cls.objects[parent + '/'].name, + Utils.create_name()) + else: + path = Utils.create_name() + + if item[-1] == '/': + cls.objects[item] = cls.container.file(path) + cls.objects[item].write(hdrs={ + 'Content-Type': 'application/directory'}) + else: + cls.objects[item] = cls.container.file(path) + cls.objects[item].write('%s contents' % item) + + +class TestStaticWeb(Base): + env = TestStaticWebEnv + set_up = False + + def setUp(self): + super(TestStaticWeb, self).setUp() + if self.env.static_web_enabled is False: + raise SkipTest("Static Web not enabled") + elif self.env.static_web_enabled is not True: + # just some sanity checking + raise Exception( + "Expected static_web_enabled to be True/False, got %r" % + (self.env.static_web_enabled,)) + + _, _, acct = self.env.account.conn.storage_url.split('/') + + self.domain_remap_acct = '%s.example.com' % acct + + self.domain_remap_cont = '%s.%s.example.com' % ( + self.env.container.name, acct) + + def _set_staticweb_headers(self, index=False, listings=False, + listings_css=False, error=False): + objects = self.env.objects + headers = {} + if index: + headers['X-Container-Meta-Web-Index'] = objects['index'].name + else: + headers['X-Remove-Container-Meta-Web-Index'] = 'true' + + if listings: + headers['X-Container-Meta-Web-Listings'] = 'true' + else: + headers['X-Remove-Container-Meta-Web-Listings'] = 'true' + + if listings_css: + headers['X-Container-Meta-Web-Listings-Css'] = \ + objects['listings_css'].name + else: + headers['X-Remove-Container-Meta-Web-Listings-Css'] = 'true' + + if error: + headers['X-Container-Meta-Web-Error'] = objects['error'].name + else: + headers['X-Remove-Container-Meta-Web-Error'] = 'true' + + self.assertTrue(self.env.container.update_metadata(hdrs=headers)) + + def _test_redirect_with_slash(self, host, path, anonymous=False): + self._set_staticweb_headers(listings=True) + self.env.account.conn.make_request('GET', path, + hdrs={'X-Web-Mode': not anonymous, + 'Host': host}, + cfg={'no_auth_token': anonymous, + 'absolute_path': True}) + + self.assert_status(301) + self.assertRegexpMatches(self.env.conn.response.getheader('location'), + 'http[s]?://%s%s/' % (host, path)) + + def _test_redirect_slash_direct(self, anonymous): + host = self.env.account.conn.storage_netloc + path = '%s/%s' % (self.env.account.conn.storage_url, + self.env.container.name) + self._test_redirect_with_slash(host, path, anonymous=anonymous) + + path = '%s/%s/%s' % (self.env.account.conn.storage_url, + self.env.container.name, + self.env.objects['dir/'].name) + self._test_redirect_with_slash(host, path, anonymous=anonymous) + + def test_redirect_slash_auth_direct(self): + self._test_redirect_slash_direct(False) + + def test_redirect_slash_anon_direct(self): + self._test_redirect_slash_direct(True) + + @requires_domain_remap + def _test_redirect_slash_remap_acct(self, anonymous): + host = self.domain_remap_acct + path = '/%s' % self.env.container.name + self._test_redirect_with_slash(host, path, anonymous=anonymous) + + path = '/%s/%s' % (self.env.container.name, + self.env.objects['dir/'].name) + self._test_redirect_with_slash(host, path, anonymous=anonymous) + + def test_redirect_slash_auth_remap_acct(self): + self._test_redirect_slash_remap_acct(False) + + def test_redirect_slash_anon_remap_acct(self): + self._test_redirect_slash_remap_acct(True) + + @requires_domain_remap + def _test_redirect_slash_remap_cont(self, anonymous): + host = self.domain_remap_cont + path = '/%s' % self.env.objects['dir/'].name + self._test_redirect_with_slash(host, path, anonymous=anonymous) + + def test_redirect_slash_auth_remap_cont(self): + self._test_redirect_slash_remap_cont(False) + + def test_redirect_slash_anon_remap_cont(self): + self._test_redirect_slash_remap_cont(True) + + def _test_get_path(self, host, path, anonymous=False, expected_status=200, + expected_in=[], expected_not_in=[]): + self.env.account.conn.make_request('GET', path, + hdrs={'X-Web-Mode': not anonymous, + 'Host': host}, + cfg={'no_auth_token': anonymous, + 'absolute_path': True}) + self.assert_status(expected_status) + body = self.env.account.conn.response.read() + for string in expected_in: + self.assertIn(string, body) + for string in expected_not_in: + self.assertNotIn(string, body) + + def _test_listing(self, host, path, title=None, links=[], notins=[], + css=None, anonymous=False): + self._set_staticweb_headers(listings=True, + listings_css=(css is not None)) + if title is None: + title = path + expected_in = ['Listing of %s' % title] + [ + '{0}'.format(link) for link in links] + expected_not_in = notins + if css: + expected_in.append('' % css) + self._test_get_path(host, path, anonymous=anonymous, + expected_in=expected_in, + expected_not_in=expected_not_in) + + def _test_listing_direct(self, anonymous, listings_css): + objects = self.env.objects + host = self.env.account.conn.storage_netloc + path = '%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name) + css = objects['listings_css'].name if listings_css else None + self._test_listing(host, path, anonymous=True, css=css, + links=[objects['index'].name, + objects['dir/'].name + '/'], + notins=[objects['dir/obj'].name]) + + path = '%s/%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name, + objects['dir/'].name) + css = '../%s' % objects['listings_css'].name if listings_css else None + self._test_listing(host, path, anonymous=anonymous, css=css, + links=[objects['dir/obj'].name.split('/')[-1], + objects['dir/subdir/'].name.split('/')[-1] + + '/'], + notins=[objects['index'].name, + objects['dir/subdir/obj'].name]) + + def test_listing_auth_direct_without_css(self): + self._test_listing_direct(False, False) + + def test_listing_anon_direct_without_css(self): + self._test_listing_direct(True, False) + + def test_listing_auth_direct_with_css(self): + self._test_listing_direct(False, True) + + def test_listing_anon_direct_with_css(self): + self._test_listing_direct(True, True) + + @requires_domain_remap + def _test_listing_remap_acct(self, anonymous, listings_css): + objects = self.env.objects + host = self.domain_remap_acct + path = '/%s/' % self.env.container.name + css = objects['listings_css'].name if listings_css else None + title = '%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name) + self._test_listing(host, path, title=title, anonymous=anonymous, + css=css, + links=[objects['index'].name, + objects['dir/'].name + '/'], + notins=[objects['dir/obj'].name]) + + path = '/%s/%s/' % (self.env.container.name, objects['dir/'].name) + css = '../%s' % objects['listings_css'].name if listings_css else None + title = '%s/%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name, + objects['dir/']) + self._test_listing(host, path, title=title, anonymous=anonymous, + css=css, + links=[objects['dir/obj'].name.split('/')[-1], + objects['dir/subdir/'].name.split('/')[-1] + + '/'], + notins=[objects['index'].name, + objects['dir/subdir/obj'].name]) + + def test_listing_auth_remap_acct_without_css(self): + self._test_listing_remap_acct(False, False) + + def test_listing_anon_remap_acct_without_css(self): + self._test_listing_remap_acct(True, False) + + def test_listing_auth_remap_acct_with_css(self): + self._test_listing_remap_acct(False, True) + + def test_listing_anon_remap_acct_with_css(self): + self._test_listing_remap_acct(True, True) + + @requires_domain_remap + def _test_listing_remap_cont(self, anonymous, listings_css): + objects = self.env.objects + host = self.domain_remap_cont + path = '/' + css = objects['listings_css'].name if listings_css else None + title = '%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name) + self._test_listing(host, path, title=title, anonymous=anonymous, + css=css, + links=[objects['index'].name, + objects['dir/'].name + '/'], + notins=[objects['dir/obj'].name]) + + path = '/%s/' % objects['dir/'].name + css = '../%s' % objects['listings_css'].name if listings_css else None + title = '%s/%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name, + objects['dir/']) + self._test_listing(host, path, title=title, anonymous=anonymous, + css=css, + links=[objects['dir/obj'].name.split('/')[-1], + objects['dir/subdir/'].name.split('/')[-1] + + '/'], + notins=[objects['index'].name, + objects['dir/subdir/obj'].name]) + + def test_listing_auth_remap_cont_without_css(self): + self._test_listing_remap_cont(False, False) + + def test_listing_anon_remap_cont_without_css(self): + self._test_listing_remap_cont(True, False) + + def test_listing_auth_remap_cont_with_css(self): + self._test_listing_remap_cont(False, True) + + def test_listing_anon_remap_cont_with_css(self): + self._test_listing_remap_cont(True, True) + + def _test_index(self, host, path, anonymous=False, expected_status=200): + self._set_staticweb_headers(index=True) + if expected_status == 200: + expected_in = ['index contents'] + expected_not_in = ['Listing'] + else: + expected_in = [] + expected_not_in = [] + self._test_get_path(host, path, anonymous=anonymous, + expected_status=expected_status, + expected_in=expected_in, + expected_not_in=expected_not_in) + + def _test_index_direct(self, anonymous): + objects = self.env.objects + host = self.env.account.conn.storage_netloc + path = '%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name) + self._test_index(host, path, anonymous=anonymous) + + path = '%s/%s/%s/' % (self.env.account.conn.storage_url, + self.env.container.name, + objects['dir/'].name) + self._test_index(host, path, anonymous=anonymous, expected_status=404) + + def test_index_auth_direct(self): + self._test_index_direct(False) + + def test_index_anon_direct(self): + self._test_index_direct(True) + + @requires_domain_remap + def _test_index_remap_acct(self, anonymous): + objects = self.env.objects + host = self.domain_remap_acct + path = '/%s/' % self.env.container.name + self._test_index(host, path, anonymous=anonymous) + + path = '/%s/%s/' % (self.env.container.name, objects['dir/'].name) + self._test_index(host, path, anonymous=anonymous, expected_status=404) + + def test_index_auth_remap_acct(self): + self._test_index_remap_acct(False) + + def test_index_anon_remap_acct(self): + self._test_index_remap_acct(True) + + @requires_domain_remap + def _test_index_remap_cont(self, anonymous): + objects = self.env.objects + host = self.domain_remap_cont + path = '/' + self._test_index(host, path, anonymous=anonymous) + + path = '/%s/' % objects['dir/'].name + self._test_index(host, path, anonymous=anonymous, expected_status=404) + + def test_index_auth_remap_cont(self): + self._test_index_remap_cont(False) + + def test_index_anon_remap_cont(self): + self._test_index_remap_cont(True) diff -Nru swift-2.17.0/test/functional/test_tempurl.py swift-2.18.0/test/functional/test_tempurl.py --- swift-2.17.0/test/functional/test_tempurl.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/functional/test_tempurl.py 2018-05-30 10:17:02.000000000 +0000 @@ -587,10 +587,8 @@ def test_tempurl_keys_hidden_from_acl_readonly(self): if not tf.cluster_info.get('tempauth'): raise SkipTest('TEMP AUTH SPECIFIC TEST') - original_token = self.env.container.conn.storage_token - self.env.container.conn.storage_token = self.env.conn2.storage_token - metadata = self.env.container.info() - self.env.container.conn.storage_token = original_token + metadata = self.env.container.info(cfg={ + 'use_token': self.env.conn2.storage_token}) self.assertNotIn( 'tempurl_key', metadata, diff -Nru swift-2.17.0/test/functional/test_versioned_writes.py swift-2.18.0/test/functional/test_versioned_writes.py --- swift-2.17.0/test/functional/test_versioned_writes.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/functional/test_versioned_writes.py 2018-05-30 10:17:02.000000000 +0000 @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +from copy import deepcopy import json import time import unittest2 +from six.moves.urllib.parse import quote import test.functional as tf -from copy import deepcopy from swift.common.utils import MD5_OF_EMPTY_STRING from test.functional.tests import Base, Base2, BaseEnv, Utils @@ -62,7 +63,7 @@ cls.container = cls.account.container(prefix + "-objs") container_headers = { - cls.location_header_key: cls.versions_container.name} + cls.location_header_key: quote(cls.versions_container.name)} if not cls.container.create(hdrs=container_headers): if cls.conn.response.status == 412: cls.versioning_enabled = False @@ -224,26 +225,24 @@ def test_clear_version_option(self): # sanity - self.assertEqual(self.env.container.info()['versions'], - self.env.versions_container.name) + header_val = quote(self.env.versions_container.name) + self.assertEqual(self.env.container.info()['versions'], header_val) self.env.container.update_metadata( hdrs={self.env.location_header_key: ''}) self.assertIsNone(self.env.container.info().get('versions')) # set location back to the way it was self.env.container.update_metadata( - hdrs={self.env.location_header_key: - self.env.versions_container.name}) - self.assertEqual(self.env.container.info()['versions'], - self.env.versions_container.name) + hdrs={self.env.location_header_key: header_val}) + self.assertEqual(self.env.container.info()['versions'], header_val) - def _test_overwriting_setup(self): + def _test_overwriting_setup(self, obj_name=None): container = self.env.container versions_container = self.env.versions_container cont_info = container.info() - self.assertEqual(cont_info['versions'], versions_container.name) + self.assertEqual(cont_info['versions'], quote(versions_container.name)) expected_content_types = [] - obj_name = Utils.create_name() + obj_name = obj_name or Utils.create_name() versioned_obj = container.file(obj_name) put_headers = {'Content-Type': 'text/jibberish01', @@ -291,11 +290,11 @@ # check that POST does not create a new version versioned_obj.sync_metadata(metadata={'fu': 'baz'}) self.assertEqual(1, versions_container.info()['object_count']) - expected_content_types.append('text/jibberish02') # if we overwrite it again, there are two versions versioned_obj.write("ccccc") self.assertEqual(2, versions_container.info()['object_count']) + expected_content_types.append('text/jibberish02') versioned_obj_name = versions_container.files()[1] prev_version = versions_container.file(versioned_obj_name) prev_version.initialize() @@ -371,6 +370,48 @@ versioned_obj.delete() self.assertRaises(ResponseError, versioned_obj.read) + def test_overwriting_with_url_encoded_object_name(self): + versions_container = self.env.versions_container + obj_name = Utils.create_name() + '%25ff' + versioned_obj, expected_headers, expected_content_types = \ + self._test_overwriting_setup(obj_name) + + # pop one for the current version + expected_content_types.pop() + self.assertEqual(expected_content_types, [ + o['content_type'] for o in versions_container.files( + parms={'format': 'json'})]) + + # test delete + versioned_obj.delete() + self.assertEqual("ccccc", versioned_obj.read()) + expected_content_types.pop() + self.assertEqual(expected_content_types, [ + o['content_type'] for o in versions_container.files( + parms={'format': 'json'})]) + + versioned_obj.delete() + self.assertEqual("bbbbb", versioned_obj.read()) + expected_content_types.pop() + self.assertEqual(expected_content_types, [ + o['content_type'] for o in versions_container.files( + parms={'format': 'json'})]) + + versioned_obj.delete() + self.assertEqual("aaaaa", versioned_obj.read()) + self.assertEqual(0, versions_container.info()['object_count']) + + # verify that all the original object headers have been copied back + obj_info = versioned_obj.info() + self.assertEqual('text/jibberish01', obj_info['content_type']) + resp_headers = dict(versioned_obj.conn.response.getheaders()) + for k, v in expected_headers.items(): + self.assertIn(k.lower(), resp_headers) + self.assertEqual(v, resp_headers[k.lower()]) + + versioned_obj.delete() + self.assertRaises(ResponseError, versioned_obj.read) + def assert_most_recent_version(self, obj_name, content, should_be_dlo=False): archive_versions = self.env.versions_container.files(parms={ @@ -446,6 +487,7 @@ def test_versioning_container_acl(self): # create versions container and DO NOT give write access to account2 versions_container = self.env.account.container(Utils.create_name()) + location_header_val = quote(str(versions_container)) self.assertTrue(versions_container.create(hdrs={ 'X-Container-Write': '' })) @@ -464,7 +506,7 @@ # check account2 cannot set X-Versions-Location on container self.assertRaises(ResponseError, container.update_metadata, hdrs={ - self.env.location_header_key: versions_container}, + self.env.location_header_key: location_header_val}, cfg={'use_token': self.env.storage_token2}) # good! now let admin set the X-Versions-Location @@ -472,8 +514,8 @@ # of both headers. Setting the location should succeed. self.assertTrue(container.update_metadata(hdrs={ 'X-Remove-' + self.env.location_header_key[len('X-'):]: - versions_container, - self.env.location_header_key: versions_container})) + location_header_val, + self.env.location_header_key: location_header_val})) # write object twice to container and check version obj_name = Utils.create_name() @@ -784,6 +826,57 @@ self.assertEqual(404, cm.exception.status) self.assertEqual(11, versions_container.info()['object_count']) + def test_overwriting_with_url_encoded_object_name(self): + versions_container = self.env.versions_container + obj_name = Utils.create_name() + '%25ff' + versioned_obj, expected_headers, expected_content_types = \ + self._test_overwriting_setup(obj_name) + + # test delete + # at first, delete will succeed with 204 + versioned_obj.delete() + expected_content_types.append( + 'application/x-deleted;swift_versions_deleted=1') + # after that, any time the delete doesn't restore the old version + # and we will get 404 NotFound + for x in range(3): + with self.assertRaises(ResponseError) as cm: + versioned_obj.delete() + self.assertEqual(404, cm.exception.status) + expected_content_types.append( + 'application/x-deleted;swift_versions_deleted=1') + # finally, we have 4 versioned items and 4 delete markers total in + # the versions container + self.assertEqual(8, versions_container.info()['object_count']) + self.assertEqual(expected_content_types, [ + o['content_type'] for o in versions_container.files( + parms={'format': 'json'})]) + + # update versioned_obj + versioned_obj.write("eeee", hdrs={'Content-Type': 'text/thanksgiving', + 'X-Object-Meta-Bar': 'foo'}) + # verify the PUT object is kept successfully + obj_info = versioned_obj.info() + self.assertEqual('text/thanksgiving', obj_info['content_type']) + + # we still have delete-marker there + self.assertEqual(8, versions_container.info()['object_count']) + + # update versioned_obj + versioned_obj.write("ffff", hdrs={'Content-Type': 'text/teriyaki', + 'X-Object-Meta-Food': 'chickin'}) + # verify the PUT object is kept successfully + obj_info = versioned_obj.info() + self.assertEqual('text/teriyaki', obj_info['content_type']) + + # new obj will be inserted after delete-marker there + self.assertEqual(9, versions_container.info()['object_count']) + + versioned_obj.delete() + with self.assertRaises(ResponseError) as cm: + versioned_obj.read() + self.assertEqual(404, cm.exception.status) + def test_versioning_dlo(self): obj_name, man_file = \ self._test_versioning_dlo_setup() diff -Nru swift-2.17.0/test/__init__.py swift-2.18.0/test/__init__.py --- swift-2.17.0/test/__init__.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -17,7 +17,11 @@ # The code below enables nosetests to work with i18n _() blocks from __future__ import print_function import sys +from contextlib import contextmanager + import os +from six import reraise + try: from unittest.util import safe_repr except ImportError: @@ -86,3 +90,26 @@ sock.bind(("127.0.0.1", 0)) sock.listen(50) return sock + + +@contextmanager +def annotate_failure(msg): + """ + Catch AssertionError and annotate it with a message. Useful when making + assertions in a loop where the message can indicate the loop index or + richer context about the failure. + + :param msg: A message to be prefixed to the AssertionError message. + """ + try: + yield + except AssertionError as err: + err_typ, err_val, err_tb = sys.exc_info() + if err_val.args: + msg = '%s Failed with %s' % (msg, err_val.args[0]) + err_val.args = (msg, ) + err_val.args[1:] + else: + # workaround for some IDE's raising custom AssertionErrors + err_val = '%s Failed with %s' % (msg, err) + err_typ = AssertionError + reraise(err_typ, err_val, err_tb) diff -Nru swift-2.17.0/test/probe/brain.py swift-2.18.0/test/probe/brain.py --- swift-2.17.0/test/probe/brain.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/probe/brain.py 2018-05-30 10:17:02.000000000 +0000 @@ -99,10 +99,10 @@ raise ValueError('Unknown server_type: %r' % server_type) self.server_type = server_type - part, nodes = self.ring.get_nodes(self.account, c, o) + self.part, self.nodes = self.ring.get_nodes(self.account, c, o) - node_ids = [n['id'] for n in nodes] - if all(n_id in node_ids for n_id in (0, 1)): + self.node_numbers = [n['id'] + 1 for n in self.nodes] + if 1 in self.node_numbers and 2 in self.node_numbers: self.primary_numbers = (1, 2) self.handoff_numbers = (3, 4) else: diff -Nru swift-2.17.0/test/probe/common.py swift-2.18.0/test/probe/common.py --- swift-2.17.0/test/probe/common.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/probe/common.py 2018-05-30 10:17:02.000000000 +0000 @@ -14,6 +14,8 @@ # limitations under the License. from __future__ import print_function + +import errno import os from subprocess import Popen, PIPE import sys @@ -125,13 +127,17 @@ if err: raise Exception('unable to kill %s' % (server if not number else '%s%s' % (server, number))) + return wait_for_server_to_hangup(ipport) + + +def wait_for_server_to_hangup(ipport): try_until = time() + 30 while True: try: conn = HTTPConnection(*ipport) conn.request('GET', '/') conn.getresponse() - except Exception as err: + except Exception: break if time() > try_until: raise Exception( @@ -266,18 +272,26 @@ raise unittest.SkipTest('No policy matching %s' % kwargs) -def resetswift(): - p = Popen("resetswift 2>&1", shell=True, stdout=PIPE) +def run_cleanup(cmd): + p = Popen(cmd + " 2>&1", shell=True, stdout=PIPE) stdout, _stderr = p.communicate() if p.returncode: raise AssertionError( - 'Cleanup with "resetswift" failed: stdout: %s, stderr: %s' - % (stdout, _stderr)) + 'Cleanup with %r failed: stdout: %s, stderr: %s' + % (cmd, stdout, _stderr)) print(stdout) Manager(['all']).stop() +def resetswift(): + run_cleanup("resetswift") + + +def kill_orphans(): + run_cleanup("swift-orphans -a 0 -k 9") + + class Body(object): def __init__(self, total=3.5 * 2 ** 20): @@ -326,32 +340,35 @@ Don't instantiate this directly, use a child class instead. """ + def _load_rings_and_configs(self): + self.ipport2server = {} + self.configs = defaultdict(dict) + self.account_ring = get_ring( + 'account', + self.acct_cont_required_replicas, + self.acct_cont_required_devices, + ipport2server=self.ipport2server, + config_paths=self.configs) + self.container_ring = get_ring( + 'container', + self.acct_cont_required_replicas, + self.acct_cont_required_devices, + ipport2server=self.ipport2server, + config_paths=self.configs) + self.policy = get_policy(**self.policy_requirements) + self.object_ring = get_ring( + self.policy.ring_name, + self.obj_required_replicas, + self.obj_required_devices, + server='object', + ipport2server=self.ipport2server, + config_paths=self.configs) + def setUp(self): resetswift() + kill_orphans() + self._load_rings_and_configs() try: - self.ipport2server = {} - self.configs = defaultdict(dict) - self.account_ring = get_ring( - 'account', - self.acct_cont_required_replicas, - self.acct_cont_required_devices, - ipport2server=self.ipport2server, - config_paths=self.configs) - self.container_ring = get_ring( - 'container', - self.acct_cont_required_replicas, - self.acct_cont_required_devices, - ipport2server=self.ipport2server, - config_paths=self.configs) - self.policy = get_policy(**self.policy_requirements) - self.object_ring = get_ring( - self.policy.ring_name, - self.obj_required_replicas, - self.obj_required_devices, - server='object', - ipport2server=self.ipport2server, - config_paths=self.configs) - self.servers_per_port = any( int(readconf(c, section_name='object-replicator').get( 'servers_per_port', '0')) @@ -480,6 +497,49 @@ finally: shutil.rmtree(tempdir) + def get_all_object_nodes(self): + """ + Returns a list of all nodes in all object storage policies. + + :return: a list of node dicts. + """ + all_obj_nodes = {} + for policy in ENABLED_POLICIES: + for dev in policy.object_ring.devs: + all_obj_nodes[dev['device']] = dev + return all_obj_nodes.values() + + def gather_async_pendings(self, onodes): + """ + Returns a list of paths to async pending files found on given nodes. + + :param onodes: a list of nodes. + :return: a list of file paths. + """ + async_pendings = [] + for onode in onodes: + device_dir = self.device_dir('', onode) + for ap_pol_dir in os.listdir(device_dir): + if not ap_pol_dir.startswith('async_pending'): + # skip 'objects', 'containers', etc. + continue + async_pending_dir = os.path.join(device_dir, ap_pol_dir) + try: + ap_dirs = os.listdir(async_pending_dir) + except OSError as err: + if err.errno == errno.ENOENT: + pass + else: + raise + else: + for ap_dir in ap_dirs: + ap_dir_fullpath = os.path.join( + async_pending_dir, ap_dir) + async_pendings.extend([ + os.path.join(ap_dir_fullpath, ent) + for ent in os.listdir(ap_dir_fullpath)]) + return async_pendings + class ReplProbeTest(ProbeTest): diff -Nru swift-2.17.0/test/probe/test_db_replicator.py swift-2.18.0/test/probe/test_db_replicator.py --- swift-2.17.0/test/probe/test_db_replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/probe/test_db_replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -90,12 +90,13 @@ expected_meta = { 'x-container-meta-a': '2', - 'x-container-meta-b': '2', + 'x-container-meta-b': '3', 'x-container-meta-c': '1', 'x-container-meta-d': '2', + 'x-container-meta-e': '3', } - # node that got the object updates still doesn't have the meta + # node that got the object updates now has the meta resp_headers = direct_client.direct_head_container( cnode, cpart, self.account, container) for header, value in expected_meta.items(): @@ -104,14 +105,6 @@ self.assertNotIn(resp_headers.get('x-container-object-count'), (None, '0', 0)) - expected_meta = { - 'x-container-meta-a': '2', - 'x-container-meta-b': '3', - 'x-container-meta-c': '1', - 'x-container-meta-d': '2', - 'x-container-meta-e': '3', - } - # other nodes still have the meta, as well as objects for node in cnodes: resp_headers = direct_client.direct_head_container( diff -Nru swift-2.17.0/test/probe/test_object_expirer.py swift-2.18.0/test/probe/test_object_expirer.py --- swift-2.17.0/test/probe/test_object_expirer.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/probe/test_object_expirer.py 2018-05-30 10:17:02.000000000 +0000 @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import errno -import os import random import time import uuid @@ -143,31 +141,6 @@ # tha the object server does not write out any async pendings; this # test asserts that this is the case. - def gather_async_pendings(onodes): - async_pendings = [] - for onode in onodes: - device_dir = self.device_dir('', onode) - for ap_pol_dir in os.listdir(device_dir): - if not ap_pol_dir.startswith('async_pending'): - # skip 'objects', 'containers', etc. - continue - async_pending_dir = os.path.join(device_dir, ap_pol_dir) - try: - ap_dirs = os.listdir(async_pending_dir) - except OSError as err: - if err.errno == errno.ENOENT: - pass - else: - raise - else: - for ap_dir in ap_dirs: - ap_dir_fullpath = os.path.join( - async_pending_dir, ap_dir) - async_pendings.extend([ - os.path.join(ap_dir_fullpath, ent) - for ent in os.listdir(ap_dir_fullpath)]) - return async_pendings - # Make an expiring object in each policy for policy in ENABLED_POLICIES: container_name = "expirer-test-%d" % policy.idx @@ -191,15 +164,12 @@ # Make sure there's no async_pendings anywhere. Probe tests only run # on single-node installs anyway, so this set should be small enough # that an exhaustive check doesn't take too long. - all_obj_nodes = {} - for policy in ENABLED_POLICIES: - for dev in policy.object_ring.devs: - all_obj_nodes[dev['device']] = dev - pendings_before = gather_async_pendings(all_obj_nodes.values()) + all_obj_nodes = self.get_all_object_nodes() + pendings_before = self.gather_async_pendings(all_obj_nodes) # expire the objects Manager(['object-expirer']).once() - pendings_after = gather_async_pendings(all_obj_nodes.values()) + pendings_after = self.gather_async_pendings(all_obj_nodes) self.assertEqual(pendings_after, pendings_before) def test_expirer_object_should_not_be_expired(self): diff -Nru swift-2.17.0/test/probe/test_sharder.py swift-2.18.0/test/probe/test_sharder.py --- swift-2.17.0/test/probe/test_sharder.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/probe/test_sharder.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,2034 @@ +# Copyright (c) 2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import hashlib +import json +import os +import shutil +import uuid + +from nose import SkipTest + +from swift.common import direct_client +from swift.common.direct_client import DirectClientException +from swift.common.utils import ShardRange, parse_db_filename, get_db_files, \ + quorum_size, config_true_value, Timestamp +from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING +from swift.common import utils +from swift.common.manager import Manager +from swiftclient import client, get_auth, ClientException + +from swift.proxy.controllers.obj import num_container_updates +from test import annotate_failure +from test.probe.brain import BrainSplitter +from test.probe.common import ReplProbeTest, get_server_number, \ + wait_for_server_to_hangup + + +MIN_SHARD_CONTAINER_THRESHOLD = 4 +MAX_SHARD_CONTAINER_THRESHOLD = 100 + + +class ShardCollector(object): + """ + Returns map of node to tuples of (headers, shard ranges) returned from node + """ + def __init__(self): + self.ranges = {} + + def __call__(self, cnode, cpart, account, container): + self.ranges[cnode['id']] = direct_client.direct_get_container( + cnode, cpart, account, container, + headers={'X-Backend-Record-Type': 'shard'}) + + +class BaseTestContainerSharding(ReplProbeTest): + + def _maybe_skip_test(self): + try: + cont_configs = [utils.readconf(p, 'container-sharder') + for p in self.configs['container-server'].values()] + except ValueError: + raise SkipTest('No [container-sharder] section found in ' + 'container-server configs') + + skip_reasons = [] + auto_shard = all([config_true_value(c.get('auto_shard', False)) + for c in cont_configs]) + if not auto_shard: + skip_reasons.append( + 'auto_shard must be true in all container_sharder configs') + + self.max_shard_size = max( + int(c.get('shard_container_threshold', '1000000')) + for c in cont_configs) + + if not (MIN_SHARD_CONTAINER_THRESHOLD <= self.max_shard_size + <= MAX_SHARD_CONTAINER_THRESHOLD): + skip_reasons.append( + 'shard_container_threshold %d must be between %d and %d' % + (self.max_shard_size, MIN_SHARD_CONTAINER_THRESHOLD, + MAX_SHARD_CONTAINER_THRESHOLD)) + + def skip_check(reason_list, option, required): + values = set([int(c.get(option, required)) for c in cont_configs]) + if values != {required}: + reason_list.append('%s must be %s' % (option, required)) + + skip_check(skip_reasons, 'shard_scanner_batch_size', 10) + skip_check(skip_reasons, 'shard_batch_size', 2) + + if skip_reasons: + raise SkipTest(', '.join(skip_reasons)) + + def _load_rings_and_configs(self): + super(BaseTestContainerSharding, self)._load_rings_and_configs() + # perform checks for skipping test before starting services + self._maybe_skip_test() + + def _make_object_names(self, number): + return ['obj-%04d' % x for x in range(number)] + + def _setup_container_name(self): + self.container_name = 'container-%s' % uuid.uuid4() + + def setUp(self): + client.logger.setLevel(client.logging.WARNING) + client.requests.logging.getLogger().setLevel( + client.requests.logging.WARNING) + super(BaseTestContainerSharding, self).setUp() + _, self.admin_token = get_auth( + 'http://127.0.0.1:8080/auth/v1.0', 'admin:admin', 'admin') + self._setup_container_name() + self.brain = BrainSplitter(self.url, self.token, self.container_name, + None, 'container') + self.brain.put_container(policy_index=int(self.policy)) + self.sharders = Manager(['container-sharder']) + self.internal_client = self.make_internal_client() + + def stop_container_servers(self, node_numbers=None): + if node_numbers: + ipports = [] + server2ipport = {v: k for k, v in self.ipport2server.items()} + for number in self.brain.node_numbers[node_numbers]: + self.brain.servers.stop(number=number) + server = 'container%d' % number + ipports.append(server2ipport[server]) + else: + ipports = [k for k, v in self.ipport2server.items() + if v.startswith('container')] + self.brain.servers.stop() + for ipport in ipports: + wait_for_server_to_hangup(ipport) + + def put_objects(self, obj_names): + for obj in obj_names: + client.put_object(self.url, self.token, self.container_name, obj) + + def delete_objects(self, obj_names): + for obj in obj_names: + client.delete_object( + self.url, self.token, self.container_name, obj) + + def get_container_shard_ranges(self, account=None, container=None): + account = account if account else self.account + container = container if container else self.container_name + path = self.internal_client.make_path(account, container) + resp = self.internal_client.make_request( + 'GET', path + '?format=json', {'X-Backend-Record-Type': 'shard'}, + [200]) + return [ShardRange.from_dict(sr) for sr in json.loads(resp.body)] + + def direct_container_op(self, func, account=None, container=None, + expect_failure=False): + account = account if account else self.account + container = container if container else self.container_name + cpart, cnodes = self.container_ring.get_nodes(account, container) + unexpected_responses = [] + results = {} + for cnode in cnodes: + try: + results[cnode['id']] = func(cnode, cpart, account, container) + except DirectClientException as err: + if not expect_failure: + unexpected_responses.append((cnode, err)) + else: + if expect_failure: + unexpected_responses.append((cnode, 'success')) + if unexpected_responses: + self.fail('Unexpected responses: %s' % unexpected_responses) + return results + + def direct_get_container_shard_ranges(self, account=None, container=None, + expect_failure=False): + collector = ShardCollector() + self.direct_container_op( + collector, account, container, expect_failure) + return collector.ranges + + def direct_delete_container(self, account=None, container=None, + expect_failure=False): + self.direct_container_op(direct_client.direct_delete_container, + account, container, expect_failure) + + def direct_head_container(self, account=None, container=None, + expect_failure=False): + return self.direct_container_op(direct_client.direct_head_container, + account, container, expect_failure) + + def get_storage_dir(self, part, node, account=None, container=None): + account = account or self.brain.account + container = container or self.container_name + server_type, config_number = get_server_number( + (node['ip'], node['port']), self.ipport2server) + assert server_type == 'container' + repl_server = '%s-replicator' % server_type + conf = utils.readconf(self.configs[repl_server][config_number], + section_name=repl_server) + datadir = os.path.join(conf['devices'], node['device'], 'containers') + container_hash = utils.hash_path(account, container) + return (utils.storage_directory(datadir, part, container_hash), + container_hash) + + def get_broker(self, part, node, account=None, container=None): + container_dir, container_hash = self.get_storage_dir( + part, node, account=account, container=container) + db_file = os.path.join(container_dir, container_hash + '.db') + self.assertTrue(get_db_files(db_file)) # sanity check + return ContainerBroker(db_file) + + def categorize_container_dir_content(self, account=None, container=None): + account = account or self.brain.account + container = container or self.container_name + part, nodes = self.brain.ring.get_nodes(account, container) + storage_dirs = [ + self.get_storage_dir(part, node, account=account, + container=container)[0] + for node in nodes] + result = { + 'shard_dbs': [], + 'normal_dbs': [], + 'pendings': [], + 'locks': [], + 'other': [], + } + for storage_dir in storage_dirs: + for f in os.listdir(storage_dir): + path = os.path.join(storage_dir, f) + if path.endswith('.db'): + hash_, epoch, ext = parse_db_filename(path) + if epoch: + result['shard_dbs'].append(path) + else: + result['normal_dbs'].append(path) + elif path.endswith('.db.pending'): + result['pendings'].append(path) + elif path.endswith('/.lock'): + result['locks'].append(path) + else: + result['other'].append(path) + if result['other']: + self.fail('Found unexpected files in storage directory:\n %s' % + '\n '.join(result['other'])) + return result + + def assertLengthEqual(self, obj, length): + obj_len = len(obj) + self.assertEqual(obj_len, length, 'len(%r) == %d, not %d' % ( + obj, obj_len, length)) + + def assert_dict_contains(self, expected_items, actual_dict): + ignored = set(expected_items) ^ set(actual_dict) + filtered_actual = dict((k, actual_dict[k]) + for k in actual_dict if k not in ignored) + self.assertEqual(expected_items, filtered_actual) + + def assert_shard_ranges_contiguous(self, expected_number, shard_ranges, + first_lower='', last_upper=''): + if shard_ranges and isinstance(shard_ranges[0], ShardRange): + actual_shard_ranges = sorted(shard_ranges) + else: + actual_shard_ranges = sorted([ShardRange.from_dict(d) + for d in shard_ranges]) + self.assertLengthEqual(actual_shard_ranges, expected_number) + if expected_number: + with annotate_failure('Ranges %s.' % actual_shard_ranges): + self.assertEqual(first_lower, actual_shard_ranges[0].lower_str) + for x, y in zip(actual_shard_ranges, actual_shard_ranges[1:]): + self.assertEqual(x.upper, y.lower) + self.assertEqual(last_upper, actual_shard_ranges[-1].upper_str) + + def assert_shard_range_equal(self, expected, actual, excludes=None): + excludes = excludes or [] + expected_dict = dict(expected) + actual_dict = dict(actual) + for k in excludes: + expected_dict.pop(k, None) + actual_dict.pop(k, None) + self.assertEqual(expected_dict, actual_dict) + + def assert_shard_range_lists_equal(self, expected, actual, excludes=None): + self.assertEqual(len(expected), len(actual)) + for expected, actual in zip(expected, actual): + self.assert_shard_range_equal(expected, actual, excludes=excludes) + + def assert_shard_range_state(self, expected_state, shard_ranges): + if shard_ranges and not isinstance(shard_ranges[0], ShardRange): + shard_ranges = [ShardRange.from_dict(data) + for data in shard_ranges] + self.assertEqual([expected_state] * len(shard_ranges), + [sr.state for sr in shard_ranges]) + + def assert_total_object_count(self, expected_object_count, shard_ranges): + actual = sum([sr['object_count'] for sr in shard_ranges]) + self.assertEqual(expected_object_count, actual) + + def assert_container_listing(self, expected_listing): + headers, actual_listing = client.get_container( + self.url, self.token, self.container_name) + self.assertIn('x-container-object-count', headers) + expected_obj_count = len(expected_listing) + self.assertEqual(expected_listing, [ + x['name'].encode('utf-8') for x in actual_listing]) + self.assertEqual(str(expected_obj_count), + headers['x-container-object-count']) + return headers, actual_listing + + def assert_container_object_count(self, expected_obj_count): + headers = client.head_container( + self.url, self.token, self.container_name) + self.assertIn('x-container-object-count', headers) + self.assertEqual(str(expected_obj_count), + headers['x-container-object-count']) + + def assert_container_post_ok(self, meta_value): + key = 'X-Container-Meta-Assert-Post-Works' + headers = {key: meta_value} + client.post_container( + self.url, self.token, self.container_name, headers=headers) + resp_headers = client.head_container( + self.url, self.token, self.container_name) + self.assertEqual(meta_value, resp_headers.get(key.lower())) + + def assert_container_post_fails(self, meta_value): + key = 'X-Container-Meta-Assert-Post-Works' + headers = {key: meta_value} + with self.assertRaises(ClientException) as cm: + client.post_container( + self.url, self.token, self.container_name, headers=headers) + self.assertEqual(404, cm.exception.http_status) + + def assert_container_delete_fails(self): + with self.assertRaises(ClientException) as cm: + client.delete_container(self.url, self.token, self.container_name) + self.assertEqual(409, cm.exception.http_status) + + def assert_container_not_found(self): + with self.assertRaises(ClientException) as cm: + client.get_container(self.url, self.token, self.container_name) + self.assertEqual(404, cm.exception.http_status) + # check for headers leaking out while deleted + resp_headers = cm.exception.http_response_headers + self.assertNotIn('X-Container-Object-Count', resp_headers) + self.assertNotIn('X-Container-Bytes-Used', resp_headers) + self.assertNotIn('X-Timestamp', resp_headers) + self.assertNotIn('X-PUT-Timestamp', resp_headers) + + def assert_container_has_shard_sysmeta(self): + node_headers = self.direct_head_container() + for node_id, headers in node_headers.items(): + with annotate_failure('%s in %s' % (node_id, node_headers.keys())): + for k, v in headers.items(): + if k.lower().startswith('x-container-sysmeta-shard'): + break + else: + self.fail('No shard sysmeta found in %s' % headers) + + def assert_container_state(self, node, expected_state, num_shard_ranges): + headers, shard_ranges = direct_client.direct_get_container( + node, self.brain.part, self.account, self.container_name, + headers={'X-Backend-Record-Type': 'shard'}) + self.assertEqual(num_shard_ranges, len(shard_ranges)) + self.assertIn('X-Backend-Sharding-State', headers) + self.assertEqual( + expected_state, headers['X-Backend-Sharding-State']) + return [ShardRange.from_dict(sr) for sr in shard_ranges] + + def get_part_and_node_numbers(self, shard_range): + """Return the partition and node numbers for a shard range.""" + part, nodes = self.brain.ring.get_nodes( + shard_range.account, shard_range.container) + return part, [n['id'] + 1 for n in nodes] + + def run_sharders(self, shard_ranges): + """Run the sharder on partitions for given shard ranges.""" + if not isinstance(shard_ranges, (list, tuple, set)): + shard_ranges = (shard_ranges,) + partitions = ','.join(str(self.get_part_and_node_numbers(sr)[0]) + for sr in shard_ranges) + self.sharders.once(additional_args='--partitions=%s' % partitions) + + def run_sharder_sequentially(self, shard_range=None): + """Run sharder node by node on partition for given shard range.""" + if shard_range: + part, node_numbers = self.get_part_and_node_numbers(shard_range) + else: + part, node_numbers = self.brain.part, self.brain.node_numbers + for node_number in node_numbers: + self.sharders.once(number=node_number, + additional_args='--partitions=%s' % part) + + +class TestContainerShardingNonUTF8(BaseTestContainerSharding): + def test_sharding_listing(self): + # verify parameterised listing of a container during sharding + all_obj_names = self._make_object_names(4 * self.max_shard_size) + obj_names = all_obj_names[::2] + self.put_objects(obj_names) + # choose some names approx in middle of each expected shard range + markers = [ + obj_names[i] for i in range(self.max_shard_size / 4, + 2 * self.max_shard_size, + self.max_shard_size / 2)] + + def check_listing(objects, **params): + qs = '&'.join(['%s=%s' % param for param in params.items()]) + headers, listing = client.get_container( + self.url, self.token, self.container_name, query_string=qs) + listing = [x['name'].encode('utf-8') for x in listing] + if params.get('reverse'): + marker = params.get('marker', ShardRange.MAX) + end_marker = params.get('end_marker', ShardRange.MIN) + expected = [o for o in objects if end_marker < o < marker] + expected.reverse() + else: + marker = params.get('marker', ShardRange.MIN) + end_marker = params.get('end_marker', ShardRange.MAX) + expected = [o for o in objects if marker < o < end_marker] + if 'limit' in params: + expected = expected[:params['limit']] + self.assertEqual(expected, listing) + + def check_listing_precondition_fails(**params): + qs = '&'.join(['%s=%s' % param for param in params.items()]) + with self.assertRaises(ClientException) as cm: + client.get_container( + self.url, self.token, self.container_name, query_string=qs) + self.assertEqual(412, cm.exception.http_status) + return cm.exception + + def do_listing_checks(objects): + check_listing(objects) + check_listing(objects, marker=markers[0], end_marker=markers[1]) + check_listing(objects, marker=markers[0], end_marker=markers[2]) + check_listing(objects, marker=markers[1], end_marker=markers[3]) + check_listing(objects, marker=markers[1], end_marker=markers[3], + limit=self.max_shard_size / 4) + check_listing(objects, marker=markers[1], end_marker=markers[3], + limit=self.max_shard_size / 4) + check_listing(objects, marker=markers[1], end_marker=markers[2], + limit=self.max_shard_size / 2) + check_listing(objects, marker=markers[1], end_marker=markers[1]) + check_listing(objects, reverse=True) + check_listing(objects, reverse=True, end_marker=markers[1]) + check_listing(objects, reverse=True, marker=markers[3], + end_marker=markers[1], limit=self.max_shard_size / 4) + check_listing(objects, reverse=True, marker=markers[3], + end_marker=markers[1], limit=0) + check_listing([], marker=markers[0], end_marker=markers[0]) + check_listing([], marker=markers[0], end_marker=markers[1], + reverse=True) + check_listing(objects, prefix='obj') + check_listing([], prefix='zzz') + # delimiter + headers, listing = client.get_container( + self.url, self.token, self.container_name, + query_string='delimiter=-') + self.assertEqual([{'subdir': 'obj-'}], listing) + + limit = self.cluster_info['swift']['container_listing_limit'] + exc = check_listing_precondition_fails(limit=limit + 1) + self.assertIn('Maximum limit', exc.http_response_content) + exc = check_listing_precondition_fails(delimiter='ab') + self.assertIn('Bad delimiter', exc.http_response_content) + + # sanity checks + do_listing_checks(obj_names) + + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + # First run the 'leader' in charge of scanning, which finds all shard + # ranges and cleaves first two + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + # Then run sharder on other nodes which will also cleave first two + # shard ranges + for n in self.brain.node_numbers[1:]: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + + # sanity check shard range states + for node in self.brain.nodes: + self.assert_container_state(node, 'sharding', 4) + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 4) + self.assert_shard_range_state(ShardRange.CLEAVED, shard_ranges[:2]) + self.assert_shard_range_state(ShardRange.CREATED, shard_ranges[2:]) + + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() # confirm no sysmeta deleted + self.assert_container_post_ok('sharding') + do_listing_checks(obj_names) + + # put some new objects spread through entire namespace + new_obj_names = all_obj_names[1::4] + self.put_objects(new_obj_names) + + # new objects that fell into the first two cleaved shard ranges are + # reported in listing, new objects in the yet-to-be-cleaved shard + # ranges are not yet included in listing + exp_obj_names = [o for o in obj_names + new_obj_names + if o <= shard_ranges[1].upper] + exp_obj_names += [o for o in obj_names + if o > shard_ranges[1].upper] + exp_obj_names.sort() + do_listing_checks(exp_obj_names) + + # run all the sharders again and the last two shard ranges get cleaved + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 4) + shard_ranges = self.get_container_shard_ranges() + self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges) + + exp_obj_names = obj_names + new_obj_names + exp_obj_names.sort() + do_listing_checks(exp_obj_names) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + + # delete original objects + self.delete_objects(obj_names) + do_listing_checks(new_obj_names) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + + +class TestContainerShardingUTF8(TestContainerShardingNonUTF8): + def _make_object_names(self, number): + # override default with names that include non-ascii chars + name_length = self.cluster_info['swift']['max_object_name_length'] + obj_names = [] + for x in range(number): + name = (u'obj-\u00e4\u00ea\u00ec\u00f2\u00fb-%04d' % x) + name = name.encode('utf8').ljust(name_length, 'o') + obj_names.append(name) + return obj_names + + def _setup_container_name(self): + # override default with max length name that includes non-ascii chars + super(TestContainerShardingUTF8, self)._setup_container_name() + name_length = self.cluster_info['swift']['max_container_name_length'] + cont_name = self.container_name + u'-\u00e4\u00ea\u00ec\u00f2\u00fb' + self.conainer_name = cont_name.encode('utf8').ljust(name_length, 'x') + + +class TestContainerSharding(BaseTestContainerSharding): + def _test_sharded_listing(self, run_replicators=False): + obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(obj_names) + + # Verify that we start out with normal DBs, no shards + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['normal_dbs'], 3) + self.assertLengthEqual(found['shard_dbs'], 0) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertLengthEqual(broker.get_shard_ranges(), 0) + + headers, pre_sharding_listing = client.get_container( + self.url, self.token, self.container_name) + self.assertEqual(obj_names, [x['name'].encode('utf-8') + for x in pre_sharding_listing]) # sanity + + # Shard it + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + pre_sharding_headers = client.head_container( + self.url, self.admin_token, self.container_name) + self.assertEqual('True', + pre_sharding_headers.get('x-container-sharding')) + + # Only run the one in charge of scanning + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # Verify that we have one sharded db -- though the other normal DBs + # received the shard ranges that got defined + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + broker = ContainerBroker(found['shard_dbs'][0]) + # TODO: assert the shard db is on replica 0 + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + orig_root_shard_ranges = [dict(sr) for sr in broker.get_shard_ranges()] + self.assertLengthEqual(orig_root_shard_ranges, 2) + self.assert_total_object_count(len(obj_names), orig_root_shard_ranges) + self.assert_shard_ranges_contiguous(2, orig_root_shard_ranges) + self.assertEqual([ShardRange.ACTIVE, ShardRange.ACTIVE], + [sr['state'] for sr in orig_root_shard_ranges]) + self.direct_delete_container(expect_failure=True) + + self.assertLengthEqual(found['normal_dbs'], 2) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + # the sharded db had shard range meta_timestamps and state updated + # during cleaving, so we do not expect those to be equal on other + # nodes + self.assert_shard_range_lists_equal( + orig_root_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state', 'state_timestamp']) + + if run_replicators: + Manager(['container-replicator']).once() + # replication doesn't change the db file names + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + self.assertLengthEqual(found['normal_dbs'], 2) + + # Now that everyone has shard ranges, run *everyone* + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + + # Verify that we only have shard dbs now + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + # Shards stayed the same + for db_file in found['shard_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + # Well, except for meta_timestamps, since the shards each reported + self.assert_shard_range_lists_equal( + orig_root_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state_timestamp']) + for orig, updated in zip(orig_root_shard_ranges, + broker.get_shard_ranges()): + self.assertGreaterEqual(updated.state_timestamp, + orig['state_timestamp']) + self.assertGreaterEqual(updated.meta_timestamp, + orig['meta_timestamp']) + + # Check that entire listing is available + headers, actual_listing = self.assert_container_listing(obj_names) + # ... and check some other container properties + self.assertEqual(headers['last-modified'], + pre_sharding_headers['last-modified']) + + # It even works in reverse! + headers, listing = client.get_container(self.url, self.token, + self.container_name, + query_string='reverse=on') + self.assertEqual(pre_sharding_listing[::-1], listing) + + # Now put some new objects into first shard, taking its count to + # 3 shard ranges' worth + more_obj_names = [ + 'beta%03d' % x for x in range(self.max_shard_size)] + self.put_objects(more_obj_names) + + # The listing includes new objects... + headers, listing = self.assert_container_listing( + more_obj_names + obj_names) + self.assertEqual(pre_sharding_listing, listing[len(more_obj_names):]) + + # ...but root object count is out of date until the sharders run and + # update the root + self.assert_container_object_count(len(obj_names)) + + # run sharders on the shard to get root updated + shard_1 = ShardRange.from_dict(orig_root_shard_ranges[0]) + self.run_sharders(shard_1) + self.assert_container_object_count(len(more_obj_names + obj_names)) + + # we've added objects enough that we need to shard the first shard + # *again* into three new sub-shards, but nothing happens until the root + # leader identifies shard candidate... + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + self.assertLengthEqual(root_shards, 2) + with annotate_failure('node %s. ' % node): + self.assertEqual( + [ShardRange.ACTIVE] * 2, + [sr['state'] for sr in root_shards]) + # orig shards 0, 1 should be contiguous + self.assert_shard_ranges_contiguous(2, root_shards) + + # Now run the root leader to identify shard candidate...while one of + # the shard container servers is down + shard_1_part, shard_1_nodes = self.get_part_and_node_numbers(shard_1) + self.brain.servers.stop(number=shard_1_nodes[2]) + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # ... so third replica of first shard state is not moved to sharding + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + self.assertEqual( + [ShardRange.SHARDING, ShardRange.SHARDING, ShardRange.ACTIVE], + [ContainerBroker(db_file).get_own_shard_range().state + for db_file in found_for_shard['normal_dbs']]) + + # ...then run first cycle of first shard sharders in order, leader + # first, to get to predictable state where all nodes have cleaved 2 out + # of 3 ranges...starting with first two nodes + for node_number in shard_1_nodes[:2]: + self.sharders.once( + number=node_number, + additional_args='--partitions=%s' % shard_1_part) + + # ... first two replicas start sharding to sub-shards + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 2) + for db_file in found_for_shard['shard_dbs'][:2]: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharding', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + shard_shards = broker.get_shard_ranges() + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED], + [sr.state for sr in shard_shards]) + self.assert_shard_ranges_contiguous( + 3, shard_shards, + first_lower=orig_root_shard_ranges[0]['lower'], + last_upper=orig_root_shard_ranges[0]['upper']) + + # but third replica still has no idea it should be sharding + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + self.assertEqual( + ShardRange.ACTIVE, + ContainerBroker( + found_for_shard['normal_dbs'][2]).get_own_shard_range().state) + + # ...but once sharder runs on third replica it will learn its state; + # note that any root replica on the stopped container server also won't + # know about the shards being in sharding state, so leave that server + # stopped for now so that shard fetches its state from an up-to-date + # root replica + self.sharders.once( + number=shard_1_nodes[2], + additional_args='--partitions=%s' % shard_1_part) + + # third replica is sharding but has no sub-shard ranges yet... + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 2) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + broker = ContainerBroker(found_for_shard['normal_dbs'][2]) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + self.assertFalse(broker.get_shard_ranges()) + + # ...until sub-shard ranges are replicated from another shard replica; + # there may also be a sub-shard replica missing so run replicators on + # all nodes to fix that if necessary + self.brain.servers.start(number=shard_1_nodes[2]) + self.replicators.once() + + # now run sharder again on third replica + self.sharders.once( + number=shard_1_nodes[2], + additional_args='--partitions=%s' % shard_1_part) + + # check original first shard range state and sub-shards - all replicas + # should now be in consistent state + found_for_shard = self.categorize_container_dir_content( + shard_1.account, shard_1.container) + self.assertLengthEqual(found_for_shard['shard_dbs'], 3) + self.assertLengthEqual(found_for_shard['normal_dbs'], 3) + for db_file in found_for_shard['shard_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharding', broker.get_db_state()) + self.assertEqual( + ShardRange.SHARDING, broker.get_own_shard_range().state) + shard_shards = broker.get_shard_ranges() + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED], + [sr.state for sr in shard_shards]) + self.assert_shard_ranges_contiguous( + 3, shard_shards, + first_lower=orig_root_shard_ranges[0]['lower'], + last_upper=orig_root_shard_ranges[0]['upper']) + + # check third sub-shard is in created state + sub_shard = shard_shards[2] + found_for_sub_shard = self.categorize_container_dir_content( + sub_shard.account, sub_shard.container) + self.assertFalse(found_for_sub_shard['shard_dbs']) + self.assertLengthEqual(found_for_sub_shard['normal_dbs'], 3) + for db_file in found_for_sub_shard['normal_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('sub shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('unsharded', broker.get_db_state()) + self.assertEqual( + ShardRange.CREATED, broker.get_own_shard_range().state) + self.assertFalse(broker.get_shard_ranges()) + + # check root shard ranges + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + self.assertLengthEqual(root_shards, 5) + with annotate_failure('node %s. ' % node): + # shard ranges are sorted by upper, state, lower, so expect: + # sub-shards, orig shard 0, orig shard 1 + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, + ShardRange.CREATED, ShardRange.SHARDING, + ShardRange.ACTIVE], + [sr['state'] for sr in root_shards]) + # sub-shards 0, 1, 2, orig shard 1 should be contiguous + self.assert_shard_ranges_contiguous( + 4, root_shards[:3] + root_shards[4:]) + # orig shards 0, 1 should be contiguous + self.assert_shard_ranges_contiguous(2, root_shards[3:]) + + self.assert_container_listing(more_obj_names + obj_names) + self.assert_container_object_count(len(more_obj_names + obj_names)) + + # add another object that lands in the first of the new sub-shards + self.put_objects(['alpha']) + + # TODO: assert that alpha is in the first new shard + self.assert_container_listing(['alpha'] + more_obj_names + obj_names) + # Run sharders again so things settle. + self.run_sharders(shard_1) + + # check original first shard range shards + for db_file in found_for_shard['shard_dbs']: + broker = ContainerBroker(db_file) + with annotate_failure('shard db file %s. ' % db_file): + self.assertIs(False, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + self.assertEqual( + [ShardRange.ACTIVE] * 3, + [sr.state for sr in broker.get_shard_ranges()]) + # check root shard ranges + root_shard_ranges = self.direct_get_container_shard_ranges() + for node, (hdrs, root_shards) in root_shard_ranges.items(): + # old first shard range should have been deleted + self.assertLengthEqual(root_shards, 4) + with annotate_failure('node %s. ' % node): + self.assertEqual( + [ShardRange.ACTIVE] * 4, + [sr['state'] for sr in root_shards]) + self.assert_shard_ranges_contiguous(4, root_shards) + + headers, final_listing = self.assert_container_listing( + ['alpha'] + more_obj_names + obj_names) + + # check root + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + new_shard_ranges = None + for db_file in found['shard_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual('sharded', broker.get_db_state()) + if new_shard_ranges is None: + new_shard_ranges = broker.get_shard_ranges( + include_deleted=True) + self.assertLengthEqual(new_shard_ranges, 5) + # Second half is still there, and unchanged + self.assertIn( + dict(orig_root_shard_ranges[1], meta_timestamp=None, + state_timestamp=None), + [dict(sr, meta_timestamp=None, state_timestamp=None) + for sr in new_shard_ranges]) + # But the first half split in three, then deleted + by_name = {sr.name: sr for sr in new_shard_ranges} + self.assertIn(orig_root_shard_ranges[0]['name'], by_name) + old_shard_range = by_name.pop( + orig_root_shard_ranges[0]['name']) + self.assertTrue(old_shard_range.deleted) + self.assert_shard_ranges_contiguous(4, by_name.values()) + else: + # Everyone's on the same page. Well, except for + # meta_timestamps, since the shards each reported + other_shard_ranges = broker.get_shard_ranges( + include_deleted=True) + self.assert_shard_range_lists_equal( + new_shard_ranges, other_shard_ranges, + excludes=['meta_timestamp', 'state_timestamp']) + for orig, updated in zip(orig_root_shard_ranges, + other_shard_ranges): + self.assertGreaterEqual(updated.meta_timestamp, + orig['meta_timestamp']) + + self.assert_container_delete_fails() + + for obj in final_listing: + client.delete_object( + self.url, self.token, self.container_name, obj['name']) + + # the objects won't be listed anymore + self.assert_container_listing([]) + # but root container stats will not yet be aware of the deletions + self.assert_container_delete_fails() + + # One server was down while the shard sharded its first two sub-shards, + # so there may be undeleted handoff db(s) for sub-shard(s) that were + # not fully replicated; run replicators now to clean up so they no + # longer report bogus stats to root. + self.replicators.once() + + # Run sharder so that shard containers update the root. Do not run + # sharder on root container because that triggers shrinks which can + # cause root object count to temporarily be non-zero and prevent the + # final delete. + self.run_sharders(self.get_container_shard_ranges()) + # then root is empty and can be deleted + self.assert_container_listing([]) + self.assert_container_object_count(0) + client.delete_container(self.url, self.token, self.container_name) + + def test_sharded_listing_no_replicators(self): + self._test_sharded_listing() + + def test_sharded_listing_with_replicators(self): + self._test_sharded_listing(run_replicators=True) + + def test_async_pendings(self): + obj_names = self._make_object_names(self.max_shard_size * 2) + + # There are some updates *everyone* gets + self.put_objects(obj_names[::5]) + # But roll some outages so each container only get ~2/5 more object + # records i.e. total of 3/5 updates per container; and async pendings + # pile up + for i, n in enumerate(self.brain.node_numbers, start=1): + self.brain.servers.stop(number=n) + self.put_objects(obj_names[i::5]) + self.brain.servers.start(number=n) + + # But there are also 1/5 updates *no one* gets + self.brain.servers.stop() + self.put_objects(obj_names[4::5]) + self.brain.servers.start() + + # Shard it + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + headers = client.head_container(self.url, self.admin_token, + self.container_name) + self.assertEqual('True', headers.get('x-container-sharding')) + + # sanity check + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 0) + self.assertLengthEqual(found['normal_dbs'], 3) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # Only run the 'leader' in charge of scanning. + # Each container has ~2 * max * 3/5 objects + # which are distributed from obj000 to obj<2 * max - 1>, + # so expect 3 shard ranges to be found: the first two will be complete + # shards with max/2 objects and lower/upper bounds spaced by approx: + # (2 * max - 1)/(2 * max * 3/5) * (max/2) =~ 5/6 * max + # + # Note that during this shard cycle the leader replicates to other + # nodes so they will end up with ~2 * max * 4/5 objects. + self.sharders.once(number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # Verify that we have one shard db -- though the other normal DBs + # received the shard ranges that got defined + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 1) + node_index_zero_db = found['shard_dbs'][0] + broker = ContainerBroker(node_index_zero_db) + self.assertIs(True, broker.is_root_container()) + self.assertEqual(SHARDING, broker.get_db_state()) + expected_shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(expected_shard_ranges, 3) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED], + [sr.state for sr in expected_shard_ranges]) + + # Still have all three big DBs -- we've only cleaved 2 of the 3 shard + # ranges that got defined + self.assertLengthEqual(found['normal_dbs'], 3) + db_states = [] + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertIs(True, broker.is_root_container()) + db_states.append(broker.get_db_state()) + # the sharded db had shard range meta_timestamps updated during + # cleaving, so we do not expect those to be equal on other nodes + self.assert_shard_range_lists_equal( + expected_shard_ranges, broker.get_shard_ranges(), + excludes=['meta_timestamp', 'state_timestamp', 'state']) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + self.assertEqual([SHARDING, UNSHARDED, UNSHARDED], sorted(db_states)) + + # Run the other sharders so we're all in (roughly) the same state + for n in self.brain.node_numbers[1:]: + self.sharders.once( + number=n, + additional_args='--partitions=%s' % self.brain.part) + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 3) + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertEqual(SHARDING, broker.get_db_state()) + # no new rows + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # Run updaters to clear the async pendings + Manager(['object-updater']).once() + + # Our "big" dbs didn't take updates + for db_file in found['normal_dbs']: + broker = ContainerBroker(db_file) + self.assertEqual(len(obj_names) * 3 // 5, + broker.get_info()['object_count']) + + # TODO: confirm that the updates got redirected to the shards + + # The entire listing is not yet available - we have two cleaved shard + # ranges, complete with async updates, but for the remainder of the + # namespace only what landed in the original container + headers, listing = client.get_container(self.url, self.token, + self.container_name) + start_listing = [ + o for o in obj_names if o <= expected_shard_ranges[1].upper] + self.assertEqual( + [x['name'].encode('utf-8') for x in listing[:len(start_listing)]], + start_listing) + # we can't assert much about the remaining listing, other than that + # there should be something + self.assertTrue( + [x['name'].encode('utf-8') for x in listing[len(start_listing):]]) + # Object count is hard to reason about though! + # TODO: nail down what this *should* be and make sure all containers + # respond with it! Depending on what you're looking at, this + # could be 0, 1/2, 7/12 (!?), 3/5, 2/3, or 4/5 or all objects! + # Apparently, it may not even be present at all! + # self.assertIn('x-container-object-count', headers) + # self.assertEqual(headers['x-container-object-count'], + # str(len(obj_names) - len(obj_names) // 6)) + + # TODO: Doesn't work in reverse, yet + # headers, listing = client.get_container(self.url, self.token, + # self.container_name, + # query_string='reverse=on') + # self.assertEqual([x['name'].encode('utf-8') for x in listing], + # obj_names[::-1]) + + # Run the sharders again to get everything to settle + self.sharders.once() + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 3) + self.assertLengthEqual(found['normal_dbs'], 0) + # now all shards have been cleaved we should get the complete listing + headers, listing = client.get_container(self.url, self.token, + self.container_name) + self.assertEqual([x['name'].encode('utf-8') for x in listing], + obj_names) + + def test_shrinking(self): + int_client = self.make_internal_client() + + def check_node_data(node_data, exp_hdrs, exp_obj_count, exp_shards): + hdrs, range_data = node_data + self.assert_dict_contains(exp_hdrs, hdrs) + self.assert_shard_ranges_contiguous(exp_shards, range_data) + self.assert_total_object_count(exp_obj_count, range_data) + + def check_shard_nodes_data(node_data, expected_state='unsharded', + expected_shards=0, exp_obj_count=0): + # checks that shard range is consistent on all nodes + root_path = '%s/%s' % (self.account, self.container_name) + exp_shard_hdrs = {'X-Container-Sysmeta-Shard-Root': root_path, + 'X-Backend-Sharding-State': expected_state} + object_counts = [] + bytes_used = [] + for node_id, node_data in node_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data( + node_data, exp_shard_hdrs, exp_obj_count, + expected_shards) + hdrs = node_data[0] + object_counts.append(int(hdrs['X-Container-Object-Count'])) + bytes_used.append(int(hdrs['X-Container-Bytes-Used'])) + if len(set(object_counts)) != 1: + self.fail('Inconsistent object counts: %s' % object_counts) + if len(set(bytes_used)) != 1: + self.fail('Inconsistent bytes used: %s' % bytes_used) + return object_counts[0], bytes_used[0] + + repeat = [0] + + def do_shard_then_shrink(): + repeat[0] += 1 + obj_names = ['obj-%s-%03d' % (repeat[0], x) + for x in range(self.max_shard_size)] + self.put_objects(obj_names) + # these two object names will fall at start of first shard range... + alpha = 'alpha-%s' % repeat[0] + beta = 'beta-%s' % repeat[0] + + # Enable sharding + client.post_container( + self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + + # sanity check + self.assert_container_listing(obj_names) + + # Only run the one in charge of scanning + self.sharders.once( + number=self.brain.node_numbers[0], + additional_args='--partitions=%s' % self.brain.part) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + + # nodes on which sharder has not run are still in unsharded state + # but have had shard ranges replicated to them + exp_obj_count = len(obj_names) + exp_hdrs = {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': str(exp_obj_count)} + node_id = self.brain.node_numbers[1] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + node_id = self.brain.node_numbers[2] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + + # only one that ran sharder is in sharded state + exp_hdrs['X-Backend-Sharding-State'] = 'sharded' + node_id = self.brain.node_numbers[0] - 1 + check_node_data( + root_nodes_data[node_id], exp_hdrs, exp_obj_count, 2) + + orig_range_data = root_nodes_data[node_id][1] + orig_shard_ranges = [ShardRange.from_dict(r) + for r in orig_range_data] + + # check first shard + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[0].account, orig_shard_ranges[0].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + total_shard_object_count = obj_count + + # check second shard + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + total_shard_object_count += obj_count + self.assertEqual(exp_obj_count, total_shard_object_count) + + # Now that everyone has shard ranges, run *everyone* + self.sharders.once( + additional_args='--partitions=%s' % self.brain.part) + + # all root container nodes should now be in sharded state + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data(node_data, exp_hdrs, exp_obj_count, 2) + + # run updaters to update .sharded account; shard containers have + # not updated account since having objects replicated to them + self.updaters.once() + shard_cont_count, shard_obj_count = int_client.get_account_info( + orig_shard_ranges[0].account, [204]) + self.assertEqual(2 * repeat[0], shard_cont_count) + self.assertEqual(len(obj_names), shard_obj_count) + + # checking the listing also refreshes proxy container info cache so + # that the proxy becomes aware that container is sharded and will + # now look up the shard target for subsequent updates + self.assert_container_listing(obj_names) + + # delete objects from first shard range + first_shard_objects = [obj_name for obj_name in obj_names + if obj_name <= orig_shard_ranges[0].upper] + for obj in first_shard_objects: + client.delete_object( + self.url, self.token, self.container_name, obj) + with self.assertRaises(ClientException): + client.get_object( + self.url, self.token, self.container_name, obj) + + second_shard_objects = [obj_name for obj_name in obj_names + if obj_name > orig_shard_ranges[1].lower] + self.assert_container_listing(second_shard_objects) + + self.put_objects([alpha]) + second_shard_objects = [obj_name for obj_name in obj_names + if obj_name > orig_shard_ranges[1].lower] + self.assert_container_listing([alpha] + second_shard_objects) + + # while container servers are down, but proxy has container info in + # cache from recent listing, put another object; this update will + # lurk in async pending until the updaters run again + # TODO: because all the root container servers are down and + # therefore cannot respond to a GET for a redirect target, the + # object update will default to being targeted at the root + # container; can we provoke an object update that does get targeted + # to the shard, but fails to update shard, so that the async + # pending will first be directed to the shard when the updaters + # run? + self.stop_container_servers() + self.put_objects([beta]) + self.brain.servers.start() + async_pendings = self.gather_async_pendings( + self.get_all_object_nodes()) + num_container_replicas = len(self.brain.nodes) + num_obj_replicas = self.policy.object_ring.replica_count + expected_num_updates = num_container_updates( + num_container_replicas, quorum_size(num_container_replicas), + num_obj_replicas, self.policy.quorum) + expected_num_pendings = min(expected_num_updates, num_obj_replicas) + # sanity check + with annotate_failure('policy %s. ' % self.policy): + self.assertLengthEqual(async_pendings, expected_num_pendings) + + # root object count is not updated... + self.assert_container_object_count(len(obj_names)) + self.assert_container_listing([alpha] + second_shard_objects) + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + check_node_data(node_data, exp_hdrs, exp_obj_count, 2) + range_data = node_data[1] + self.assert_shard_range_lists_equal( + orig_range_data, range_data, + excludes=['meta_timestamp', 'state_timestamp']) + + # ...until the sharders run and update root + self.run_sharders(orig_shard_ranges[0]) + exp_obj_count = len(second_shard_objects) + 1 + self.assert_container_object_count(exp_obj_count) + self.assert_container_listing([alpha] + second_shard_objects) + + # root sharder finds donor, acceptor pair and pushes changes + self.sharders.once( + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_listing([alpha] + second_shard_objects) + # run sharder on donor to shrink and replicate to acceptor + self.run_sharders(orig_shard_ranges[0]) + self.assert_container_listing([alpha] + second_shard_objects) + # run sharder on acceptor to update root with stats + self.run_sharders(orig_shard_ranges[1]) + self.assert_container_listing([alpha] + second_shard_objects) + self.assert_container_object_count(len(second_shard_objects) + 1) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + exp_hdrs['X-Container-Object-Count'] = str(exp_obj_count) + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + # NB now only *one* shard range in root + check_node_data(node_data, exp_hdrs, exp_obj_count, 1) + + # the acceptor shard is intact.. + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + obj_count, bytes_used = check_shard_nodes_data(shard_nodes_data) + # all objects should now be in this shard + self.assertEqual(exp_obj_count, obj_count) + + # the donor shard is also still intact + # TODO: once we have figured out when these redundant donors are + # deleted, test for deletion/clean up + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[0].account, orig_shard_ranges[0].container) + # the donor's shard range will have the acceptor's projected stats + obj_count, bytes_used = check_shard_nodes_data( + shard_nodes_data, expected_state='sharded', expected_shards=1, + exp_obj_count=len(second_shard_objects) + 1) + # but the donor is empty and so reports zero stats + self.assertEqual(0, obj_count) + self.assertEqual(0, bytes_used) + + # delete all the second shard's object apart from 'alpha' + for obj in second_shard_objects: + client.delete_object( + self.url, self.token, self.container_name, obj) + + self.assert_container_listing([alpha]) + + # runs sharders so second range shrinks away, requires up to 3 + # cycles + self.sharders.once() # shard updates root stats + self.assert_container_listing([alpha]) + self.sharders.once() # root finds shrinkable shard + self.assert_container_listing([alpha]) + self.sharders.once() # shards shrink themselves + self.assert_container_listing([alpha]) + + # the second shard range has sharded and is empty + shard_nodes_data = self.direct_get_container_shard_ranges( + orig_shard_ranges[1].account, orig_shard_ranges[1].container) + check_shard_nodes_data( + shard_nodes_data, expected_state='sharded', expected_shards=1, + exp_obj_count=1) + + # check root container + root_nodes_data = self.direct_get_container_shard_ranges() + self.assertEqual(3, len(root_nodes_data)) + exp_hdrs = {'X-Backend-Sharding-State': 'collapsed', + # just the alpha object + 'X-Container-Object-Count': '1'} + for node_id, node_data in root_nodes_data.items(): + with annotate_failure('Node id %s.' % node_id): + # NB now no shard ranges in root + check_node_data(node_data, exp_hdrs, 0, 0) + + # delete the alpha object + client.delete_object( + self.url, self.token, self.container_name, alpha) + # should now be able to delete the *apparently* empty container + client.delete_container(self.url, self.token, self.container_name) + self.assert_container_not_found() + self.direct_head_container(expect_failure=True) + + # and the container stays deleted even after sharders run and shard + # send updates + self.sharders.once() + self.assert_container_not_found() + self.direct_head_container(expect_failure=True) + + # now run updaters to deal with the async pending for the beta + # object + self.updaters.once() + # and the container is revived! + self.assert_container_listing([beta]) + + # finally, clear out the container + client.delete_object( + self.url, self.token, self.container_name, beta) + + do_shard_then_shrink() + # repeat from starting point of a collapsed and previously deleted + # container + do_shard_then_shrink() + + def _setup_replication_scenario(self, num_shards, extra_objs=('alpha',)): + # Get cluster to state where 2 replicas are sharding or sharded but 3rd + # replica is unsharded and has an object that the first 2 are missing. + + # put objects while all servers are up + obj_names = self._make_object_names( + num_shards * self.max_shard_size / 2) + self.put_objects(obj_names) + + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + node_numbers = self.brain.node_numbers + + # run replicators first time to get sync points set + self.replicators.once() + + # stop the leader node and one other server + self.stop_container_servers(slice(0, 2)) + + # ...then put one more object in first shard range namespace + self.put_objects(extra_objs) + + # start leader and first other server, stop third server + for number in node_numbers[:2]: + self.brain.servers.start(number=number) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) # sanity check + + # shard the container - first two shard ranges are cleaved + for number in node_numbers[:2]: + self.sharders.once( + number=number, + additional_args='--partitions=%s' % self.brain.part) + + self.assert_container_listing(obj_names) # sanity check + return obj_names + + def test_replication_to_sharding_container(self): + # verify that replication from an unsharded replica to a sharding + # replica does not replicate rows but does replicate shard ranges + obj_names = self._setup_replication_scenario(3) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharding', 3) + + # bring third server back up, run replicator + node_numbers = self.brain.node_numbers + self.brain.servers.start(number=node_numbers[2]) + # sanity check... + self.assert_container_state(self.brain.nodes[2], 'unsharded', 0) + self.replicators.once(number=node_numbers[2]) + # check db files unchanged + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 3) + + # the 'alpha' object is NOT replicated to the two sharded nodes + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertFalse(broker.get_objects()) + self.assert_container_state(node, 'sharding', 3) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) + + # all nodes now have shard ranges + self.brain.servers.start(number=node_numbers[2]) + node_data = self.direct_get_container_shard_ranges() + for node, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure(node): + self.assert_shard_ranges_contiguous(3, shard_ranges) + + # complete cleaving third shard range on first two nodes + self.brain.servers.stop(number=node_numbers[2]) + for number in node_numbers[:2]: + self.sharders.once( + number=number, + additional_args='--partitions=%s' % self.brain.part) + # ...and now they are in sharded state + self.assert_container_state(self.brain.nodes[0], 'sharded', 3) + self.assert_container_state(self.brain.nodes[1], 'sharded', 3) + # ...still no 'alpha' object in listing + self.assert_container_listing(obj_names) + + # run the sharder on the third server, alpha object is included in + # shards that it cleaves + self.brain.servers.start(number=node_numbers[2]) + self.assert_container_state(self.brain.nodes[2], 'unsharded', 3) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharding', 3) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharded', 3) + self.assert_container_listing(['alpha'] + obj_names) + + def test_replication_to_sharded_container(self): + # verify that replication from an unsharded replica to a sharded + # replica does not replicate rows but does replicate shard ranges + obj_names = self._setup_replication_scenario(2) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharded', 2) + + # sanity check + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 1) + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + info = broker.get_info() + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertEqual(len(obj_names), info['object_count']) + self.assertFalse(broker.get_objects()) + + # bring third server back up, run replicator + node_numbers = self.brain.node_numbers + self.brain.servers.start(number=node_numbers[2]) + # sanity check... + self.assert_container_state(self.brain.nodes[2], 'unsharded', 0) + self.replicators.once(number=node_numbers[2]) + # check db files unchanged + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['shard_dbs'], 2) + self.assertLengthEqual(found['normal_dbs'], 1) + + # the 'alpha' object is NOT replicated to the two sharded nodes + for node in self.brain.nodes[:2]: + broker = self.get_broker(self.brain.part, node) + with annotate_failure( + 'Node id %s in %s' % (node['id'], self.brain.nodes[:2])): + self.assertFalse(broker.get_objects()) + self.assert_container_state(node, 'sharded', 2) + self.brain.servers.stop(number=node_numbers[2]) + self.assert_container_listing(obj_names) + + # all nodes now have shard ranges + self.brain.servers.start(number=node_numbers[2]) + node_data = self.direct_get_container_shard_ranges() + for node, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure(node): + self.assert_shard_ranges_contiguous(2, shard_ranges) + + # run the sharder on the third server, alpha object is included in + # shards that it cleaves + self.assert_container_state(self.brain.nodes[2], 'unsharded', 2) + self.sharders.once(number=node_numbers[2], + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(self.brain.nodes[2], 'sharded', 2) + self.assert_container_listing(['alpha'] + obj_names) + + def test_sharding_requires_sufficient_replication(self): + # verify that cleaving only progresses if each cleaved shard range is + # sufficiently replicated + + # put enough objects for 4 shard ranges + obj_names = self._make_object_names(2 * self.max_shard_size) + self.put_objects(obj_names) + + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + node_numbers = self.brain.node_numbers + leader_node = self.brain.nodes[0] + leader_num = node_numbers[0] + + # run replicators first time to get sync points set + self.replicators.once() + + # start sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop *all* container servers for third shard range + sr_part, sr_node_nums = self.get_part_and_node_numbers(shard_ranges[2]) + for node_num in sr_node_nums: + self.brain.servers.stop(number=node_num) + + # attempt to continue sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # no cleaving progress was made + for node_num in sr_node_nums: + self.brain.servers.start(number=node_num) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop two of the servers for third shard range, not including any + # server that happens to be the leader node + stopped = [] + for node_num in sr_node_nums: + if node_num != leader_num: + self.brain.servers.stop(number=node_num) + stopped.append(node_num) + if len(stopped) >= 2: + break + self.assertLengthEqual(stopped, 2) # sanity check + + # attempt to continue sharding on the leader node + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # no cleaving progress was made + for node_num in stopped: + self.brain.servers.start(number=node_num) + shard_ranges = self.assert_container_state(leader_node, 'sharding', 4) + self.assertEqual([ShardRange.CLEAVED] * 2 + [ShardRange.CREATED] * 2, + [sr.state for sr in shard_ranges]) + + # stop just one of the servers for third shard range + stopped = [] + for node_num in sr_node_nums: + if node_num != leader_num: + self.brain.servers.stop(number=node_num) + stopped.append(node_num) + break + self.assertLengthEqual(stopped, 1) # sanity check + + # attempt to continue sharding the container + self.sharders.once(number=leader_num, + additional_args='--partitions=%s' % self.brain.part) + + # this time cleaving completed + self.brain.servers.start(number=stopped[0]) + shard_ranges = self.assert_container_state(leader_node, 'sharded', 4) + self.assertEqual([ShardRange.ACTIVE] * 4, + [sr.state for sr in shard_ranges]) + + def test_sharded_delete(self): + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects - updates redirected to shards + self.delete_objects(all_obj_names) + self.assert_container_listing([]) + self.assert_container_post_ok('has objects') + # root not yet updated with shard stats + self.assert_container_object_count(len(all_obj_names)) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + + # run sharder on shard containers to update root stats + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.run_sharders(shard_ranges) + self.assert_container_listing([]) + self.assert_container_post_ok('empty') + self.assert_container_object_count(0) + + # put a new object - update redirected to shard + self.put_objects(['alpha']) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) + + # before root learns about new object in shard, delete the container + client.delete_container(self.url, self.token, self.container_name) + self.assert_container_post_fails('deleted') + self.assert_container_not_found() + + # run the sharders to update root with shard stats + self.run_sharders(shard_ranges) + + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + self.assert_container_delete_fails() + self.assert_container_post_ok('revived') + + def test_object_update_redirection(self): + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects - updates redirected to shards + self.delete_objects(all_obj_names) + self.assert_container_listing([]) + self.assert_container_post_ok('has objects') + + # run sharder on shard containers to update root stats + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.run_sharders(shard_ranges) + self.assert_container_object_count(0) + + # First, test a misplaced object moving from one shard to another. + # with one shard server down, put a new 'alpha' object... + shard_part, shard_nodes = self.get_part_and_node_numbers( + shard_ranges[0]) + self.brain.servers.stop(number=shard_nodes[2]) + self.put_objects(['alpha']) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) + self.assertLengthEqual( + self.gather_async_pendings(self.get_all_object_nodes()), 1) + self.brain.servers.start(number=shard_nodes[2]) + + # run sharder on root to discover first shrink candidate + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node without the alpha object + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=shard_nodes[2]) + # root sees first shard has shrunk, only second shard range used for + # listing so alpha object not in listing + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + self.assert_container_listing([]) + self.assert_container_object_count(0) + + # run the updaters: the async pending update will be redirected from + # shrunk shard to second shard + self.updaters.once() + self.assert_container_listing(['alpha']) + self.assert_container_object_count(0) # root not yet updated + + # then run sharder on other shard nodes to complete shrinking + for number in shard_nodes[:2]: + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=number) + # and get root updated + self.run_sharders(shard_ranges[1]) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + + # Now we have just one active shard, test a misplaced object moving + # from that shard to the root. + # with one shard server down, delete 'alpha' and put a 'beta' object... + shard_part, shard_nodes = self.get_part_and_node_numbers( + shard_ranges[1]) + self.brain.servers.stop(number=shard_nodes[2]) + self.delete_objects(['alpha']) + self.put_objects(['beta']) + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + self.assertLengthEqual( + self.gather_async_pendings(self.get_all_object_nodes()), 2) + self.brain.servers.start(number=shard_nodes[2]) + + # run sharder on root to discover second shrink candidate - root is not + # yet aware of the beta object + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node without the beta object, to shrink + # it to root - note this moves stale copy of alpha to the root db + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=shard_nodes[2]) + # now there are no active shards + self.assertFalse(self.get_container_shard_ranges()) + + # with other two shard servers down, listing won't find beta object + for number in shard_nodes[:2]: + self.brain.servers.stop(number=number) + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + + # run the updaters: the async pending update will be redirected from + # shrunk shard to the root + self.updaters.once() + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + + def test_misplaced_object_movement(self): + def merge_object(shard_range, name, deleted=0): + # it's hard to get a test to put a misplaced object into a shard, + # so this hack is used force an object record directly into a shard + # container db. Note: the actual object won't exist, we're just + # using this to test object records in container dbs. + shard_part, shard_nodes = self.brain.ring.get_nodes( + shard_range.account, shard_range.container) + shard_broker = self.get_broker( + shard_part, shard_nodes[0], shard_range.account, + shard_range.container) + shard_broker.merge_items( + [{'name': name, 'created_at': Timestamp.now().internal, + 'size': 0, 'content_type': 'text/plain', + 'etag': hashlib.md5().hexdigest(), 'deleted': deleted, + 'storage_policy_index': shard_broker.storage_policy_index}]) + return shard_nodes[0] + + all_obj_names = self._make_object_names(self.max_shard_size) + self.put_objects(all_obj_names) + # Shard the container + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + for n in self.brain.node_numbers: + self.sharders.once( + number=n, additional_args='--partitions=%s' % self.brain.part) + # sanity checks + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 2) + self.assert_container_delete_fails() + self.assert_container_has_shard_sysmeta() + self.assert_container_post_ok('sharded') + self.assert_container_listing(all_obj_names) + + # delete all objects in first shard range - updates redirected to shard + shard_ranges = self.get_container_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + shard_0_objects = [name for name in all_obj_names + if name in shard_ranges[0]] + shard_1_objects = [name for name in all_obj_names + if name in shard_ranges[1]] + self.delete_objects(shard_0_objects) + self.assert_container_listing(shard_1_objects) + self.assert_container_post_ok('has objects') + + # run sharder on first shard container to update root stats + self.run_sharders(shard_ranges[0]) + self.assert_container_object_count(len(shard_1_objects)) + + # First, test a misplaced object moving from one shard to another. + # run sharder on root to discover first shrink candidate + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on first shard range to shrink it + self.run_sharders(shard_ranges[0]) + # force a misplaced object into the shrunken shard range to simulate + # a client put that was in flight when it started to shrink + misplaced_node = merge_object(shard_ranges[0], 'alpha', deleted=0) + # root sees first shard has shrunk, only second shard range used for + # listing so alpha object not in listing + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + self.assert_container_listing(shard_1_objects) + self.assert_container_object_count(len(shard_1_objects)) + # until sharder runs on that node to move the misplaced object to the + # second shard range + shard_part, shard_nodes_numbers = self.get_part_and_node_numbers( + shard_ranges[0]) + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=misplaced_node['id'] + 1) + self.assert_container_listing(['alpha'] + shard_1_objects) + # root not yet updated + self.assert_container_object_count(len(shard_1_objects)) + + # run sharder to get root updated + self.run_sharders(shard_ranges[1]) + self.assert_container_listing(['alpha'] + shard_1_objects) + self.assert_container_object_count(len(shard_1_objects) + 1) + self.assertLengthEqual(self.get_container_shard_ranges(), 1) + + # Now we have just one active shard, test a misplaced object moving + # from that shard to the root. + # delete most objects from second shard range and run sharder on root + # to discover second shrink candidate + self.delete_objects(shard_1_objects) + self.run_sharders(shard_ranges[1]) + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + # then run sharder on the shard node to shrink it to root - note this + # moves alpha to the root db + self.run_sharders(shard_ranges[1]) + # now there are no active shards + self.assertFalse(self.get_container_shard_ranges()) + + # force some misplaced object updates into second shrunk shard range + merge_object(shard_ranges[1], 'alpha', deleted=1) + misplaced_node = merge_object(shard_ranges[1], 'beta', deleted=0) + # root is not yet aware of them + self.assert_container_listing(['alpha']) + self.assert_container_object_count(1) + # until sharder runs on that node to move the misplaced object + shard_part, shard_nodes_numbers = self.get_part_and_node_numbers( + shard_ranges[1]) + self.sharders.once(additional_args='--partitions=%s' % shard_part, + number=misplaced_node['id'] + 1) + self.assert_container_listing(['beta']) + self.assert_container_object_count(1) + self.assert_container_delete_fails() + + def test_replication_to_sharded_container_from_unsharded_old_primary(self): + primary_ids = [n['id'] for n in self.brain.nodes] + handoff_node = next(n for n in self.brain.ring.devs + if n['id'] not in primary_ids) + + # start with two sharded replicas and one unsharded with extra object + obj_names = self._setup_replication_scenario(2) + for node in self.brain.nodes[:2]: + self.assert_container_state(node, 'sharded', 2) + + # Fake a ring change - copy unsharded db which has no shard ranges to a + # handoff to create illusion of a new unpopulated primary node + node_numbers = self.brain.node_numbers + new_primary_node = self.brain.nodes[2] + new_primary_node_number = node_numbers[2] + new_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, new_primary_node) + old_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, handoff_node) + utils.mkdirs(os.path.dirname(old_primary_dir)) + os.rename(new_primary_dir, old_primary_dir) + + # make the cluster more or less "healthy" again + self.brain.servers.start(number=new_primary_node_number) + + # get a db on every node... + client.put_container(self.url, self.token, self.container_name) + self.assertTrue(os.path.exists(os.path.join( + new_primary_dir, container_hash + '.db'))) + found = self.categorize_container_dir_content() + self.assertLengthEqual(found['normal_dbs'], 1) # "new" primary + self.assertLengthEqual(found['shard_dbs'], 2) # existing primaries + + # catastrophic failure! drive dies and is replaced on unchanged primary + failed_node = self.brain.nodes[0] + failed_dir, _container_hash = self.get_storage_dir( + self.brain.part, failed_node) + shutil.rmtree(failed_dir) + + # replicate the "old primary" to everybody except the "new primary" + self.brain.servers.stop(number=new_primary_node_number) + self.replicators.once(number=handoff_node['id'] + 1) + + # We're willing to rsync the retiring db to the failed primary. + # This may or may not have shard ranges, depending on the order in + # which we hit the primaries, but it definitely *doesn't* have an + # epoch in its name yet. All objects are replicated. + self.assertTrue(os.path.exists(os.path.join( + failed_dir, container_hash + '.db'))) + self.assertLengthEqual(os.listdir(failed_dir), 1) + broker = self.get_broker(self.brain.part, failed_node) + self.assertLengthEqual(broker.get_objects(), len(obj_names) + 1) + + # The other out-of-date primary is within usync range but objects are + # not replicated to it because the handoff db learns about shard ranges + broker = self.get_broker(self.brain.part, self.brain.nodes[1]) + self.assertLengthEqual(broker.get_objects(), 0) + + # Handoff db still exists and now has shard ranges! + self.assertTrue(os.path.exists(os.path.join( + old_primary_dir, container_hash + '.db'))) + broker = self.get_broker(self.brain.part, handoff_node) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + self.assert_container_state(handoff_node, 'unsharded', 2) + + # Replicate again, this time *including* "new primary" + self.brain.servers.start(number=new_primary_node_number) + self.replicators.once(number=handoff_node['id'] + 1) + + # Ordinarily, we would have rsync_then_merge'd to "new primary" + # but instead we wait + broker = self.get_broker(self.brain.part, new_primary_node) + self.assertLengthEqual(broker.get_objects(), 0) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 2) + + # so the next time the sharder comes along, it can push rows out + # and delete the big db + self.sharders.once(number=handoff_node['id'] + 1, + additional_args='--partitions=%s' % self.brain.part) + self.assert_container_state(handoff_node, 'sharded', 2) + self.assertFalse(os.path.exists(os.path.join( + old_primary_dir, container_hash + '.db'))) + # the sharded db hangs around until replication confirms durability + # first attempt is not sufficiently successful + self.brain.servers.stop(number=node_numbers[0]) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertTrue(os.path.exists(old_primary_dir)) + self.assert_container_state(handoff_node, 'sharded', 2) + # second attempt is successful and handoff db is deleted + self.brain.servers.start(number=node_numbers[0]) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertFalse(os.path.exists(old_primary_dir)) + + # run all the sharders, get us into a consistent state + self.sharders.once(additional_args='--partitions=%s' % self.brain.part) + self.assert_container_listing(['alpha'] + obj_names) + + def test_replication_to_empty_new_primary_from_sharding_old_primary(self): + primary_ids = [n['id'] for n in self.brain.nodes] + handoff_node = next(n for n in self.brain.ring.devs + if n['id'] not in primary_ids) + num_shards = 3 + obj_names = self._make_object_names( + num_shards * self.max_shard_size / 2) + self.put_objects(obj_names) + client.post_container(self.url, self.admin_token, self.container_name, + headers={'X-Container-Sharding': 'on'}) + + # run replicators first time to get sync points set + self.replicators.once() + # start sharding on only the leader node + leader_node = self.brain.nodes[0] + leader_node_number = self.brain.node_numbers[0] + self.sharders.once(number=leader_node_number) + self.assert_container_state(leader_node, 'sharding', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'unsharded', 3) + + # Fake a ring change - copy leader node db to a handoff to create + # illusion of a new unpopulated primary leader node + new_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, leader_node) + old_primary_dir, container_hash = self.get_storage_dir( + self.brain.part, handoff_node) + utils.mkdirs(os.path.dirname(old_primary_dir)) + os.rename(new_primary_dir, old_primary_dir) + self.assert_container_state(handoff_node, 'sharding', 3) + + # run replicator on handoff node to create a fresh db on new primary + self.assertFalse(os.path.exists(new_primary_dir)) + self.replicators.once(number=handoff_node['id'] + 1) + self.assertTrue(os.path.exists(new_primary_dir)) + self.assert_container_state(leader_node, 'sharded', 3) + broker = self.get_broker(self.brain.part, leader_node) + shard_ranges = broker.get_shard_ranges() + self.assertLengthEqual(shard_ranges, 3) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED], + [sr.state for sr in shard_ranges]) + + # db still exists on handoff + self.assertTrue(os.path.exists(old_primary_dir)) + self.assert_container_state(handoff_node, 'sharding', 3) + # continue sharding it... + self.sharders.once(number=handoff_node['id'] + 1) + self.assert_container_state(leader_node, 'sharded', 3) + # now handoff is fully sharded the replicator will delete it + self.replicators.once(number=handoff_node['id'] + 1) + self.assertFalse(os.path.exists(old_primary_dir)) + + # all primaries now have active shard ranges but only one is in sharded + # state + self.assert_container_state(leader_node, 'sharded', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'unsharded', 3) + node_data = self.direct_get_container_shard_ranges() + for node_id, (hdrs, shard_ranges) in node_data.items(): + with annotate_failure( + 'node id %s from %s' % (node_id, node_data.keys)): + self.assert_shard_range_state(ShardRange.ACTIVE, shard_ranges) + + # check handoff cleaved all objects before it was deleted - stop all + # but leader node so that listing is fetched from shards + for number in self.brain.node_numbers[1:3]: + self.brain.servers.stop(number=number) + + self.assert_container_listing(obj_names) + + for number in self.brain.node_numbers[1:3]: + self.brain.servers.start(number=number) + + self.sharders.once() + self.assert_container_state(leader_node, 'sharded', 3) + for node in self.brain.nodes[1:]: + self.assert_container_state(node, 'sharding', 3) + self.sharders.once() + for node in self.brain.nodes: + self.assert_container_state(node, 'sharded', 3) + + self.assert_container_listing(obj_names) diff -Nru swift-2.17.0/test/sample.conf swift-2.18.0/test/sample.conf --- swift-2.17.0/test/sample.conf 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/sample.conf 2018-05-30 10:17:02.000000000 +0000 @@ -17,6 +17,8 @@ account = test username = tester password = testing +s3_access_key = test:tester +s3_secret_key = testing # User on a second account (needs admin access to the account) account2 = test2 @@ -26,6 +28,9 @@ # User on same account as first, but without admin access username3 = tester3 password3 = testing3 +# s3api requires the same account with the primary one and different users +s3_access_key2 = test:tester3 +s3_secret_key2 = testing3 # Fourth user is required for keystone v3 specific tests. # Account must be in a non-default domain. diff -Nru swift-2.17.0/test/unit/account/test_server.py swift-2.18.0/test/unit/account/test_server.py --- swift-2.17.0/test/unit/account/test_server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/account/test_server.py 2018-05-30 10:17:02.000000000 +0000 @@ -404,7 +404,7 @@ elif state[0] == 'race': # Save the original db_file attribute value self._saved_db_file = self.db_file - self.db_file += '.doesnotexist' + self._db_file += '.doesnotexist' def initialize(self, *args, **kwargs): if state[0] == 'initial': @@ -413,7 +413,7 @@ elif state[0] == 'race': # Restore the original db_file attribute to get the race # behavior - self.db_file = self._saved_db_file + self._db_file = self._saved_db_file return super(InterceptedAcBr, self).initialize(*args, **kwargs) with mock.patch("swift.account.server.AccountBroker", InterceptedAcBr): diff -Nru swift-2.17.0/test/unit/cli/test_form_signature.py swift-2.18.0/test/unit/cli/test_form_signature.py --- swift-2.17.0/test/unit/cli/test_form_signature.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_form_signature.py 2018-05-30 10:17:02.000000000 +0000 @@ -17,7 +17,7 @@ import hashlib import hmac import mock -from six import StringIO +import six import unittest from swift.cli import form_signature @@ -33,14 +33,19 @@ max_file_size = str(int(1024 * 1024 * 1024 * 3.14159)) # π GiB max_file_count = '3' - expected_signature = hmac.new( - key, - "\n".join(( - path, redirect, max_file_size, max_file_count, - str(int(the_time + expires)))), - hashlib.sha1).hexdigest() + data = "\n".join(( + path, redirect, max_file_size, max_file_count, + str(int(the_time + expires)))) + + if six.PY3: + data = data if isinstance(data, six.binary_type) else \ + data.encode('utf8') + key = key if isinstance(key, six.binary_type) else \ + key.encode('utf8') - out = StringIO() + expected_signature = hmac.new(key, data, hashlib.sha1).hexdigest() + + out = six.StringIO() with mock.patch('swift.cli.form_signature.time', lambda: the_time): with mock.patch('sys.stdout', out): exitcode = form_signature.main([ @@ -59,7 +64,7 @@ self.assertIn(sig_input, out.getvalue()) def test_too_few_args(self): - out = StringIO() + out = six.StringIO() with mock.patch('sys.stdout', out): exitcode = form_signature.main([ '/path/to/swift-form-signature', @@ -70,7 +75,7 @@ self.assertIn(usage, out.getvalue()) def test_invalid_filesize_arg(self): - out = StringIO() + out = six.StringIO() key = 'secret squirrel' with mock.patch('sys.stdout', out): exitcode = form_signature.main([ @@ -79,7 +84,7 @@ self.assertNotEqual(exitcode, 0) def test_invalid_filecount_arg(self): - out = StringIO() + out = six.StringIO() key = 'secret squirrel' with mock.patch('sys.stdout', out): exitcode = form_signature.main([ @@ -88,7 +93,7 @@ self.assertNotEqual(exitcode, 0) def test_invalid_path_arg(self): - out = StringIO() + out = six.StringIO() key = 'secret squirrel' with mock.patch('sys.stdout', out): exitcode = form_signature.main([ @@ -97,7 +102,7 @@ self.assertNotEqual(exitcode, 0) def test_invalid_seconds_arg(self): - out = StringIO() + out = six.StringIO() key = 'secret squirrel' with mock.patch('sys.stdout', out): exitcode = form_signature.main([ diff -Nru swift-2.17.0/test/unit/cli/test_info.py swift-2.18.0/test/unit/cli/test_info.py --- swift-2.17.0/test/unit/cli/test_info.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_info.py 2018-05-30 10:17:02.000000000 +0000 @@ -31,6 +31,7 @@ parse_get_node_args) from swift.account.server import AccountController from swift.container.server import ContainerController +from swift.container.backend import UNSHARDED, SHARDED from swift.obj.diskfile import write_metadata @@ -42,8 +43,8 @@ def setUp(self): skip_if_no_xattrs() self.orig_hp = utils.HASH_PATH_PREFIX, utils.HASH_PATH_SUFFIX - utils.HASH_PATH_PREFIX = 'info' - utils.HASH_PATH_SUFFIX = 'info' + utils.HASH_PATH_PREFIX = b'info' + utils.HASH_PATH_SUFFIX = b'info' self.testdir = os.path.join(mkdtemp(), 'tmp_test_cli_info') utils.mkdirs(self.testdir) rmtree(self.testdir) @@ -103,17 +104,18 @@ self.assertRaisesMessage(ValueError, 'Info is incomplete', print_db_info_metadata, 'container', {}, {}) - info = dict( - account='acct', - created_at=100.1, - put_timestamp=106.3, - delete_timestamp=107.9, - status_changed_at=108.3, - container_count='3', - object_count='20', - bytes_used='42') - info['hash'] = 'abaddeadbeefcafe' - info['id'] = 'abadf100d0ddba11' + info = { + 'account': 'acct', + 'created_at': 100.1, + 'put_timestamp': 106.3, + 'delete_timestamp': 107.9, + 'status_changed_at': 108.3, + 'container_count': '3', + 'object_count': '20', + 'bytes_used': '42', + 'hash': 'abaddeadbeefcafe', + 'id': 'abadf100d0ddba11', + } md = {'x-account-meta-mydata': ('swift', '0000000000.00000'), 'x-other-something': ('boo', '0000000000.00000')} out = StringIO() @@ -134,7 +136,7 @@ UUID: abadf100d0ddba11 X-Other-Something: boo No system metadata found in db file - User Metadata: {'mydata': 'swift'}''' + User Metadata: {'x-account-meta-mydata': 'swift'}''' self.assertEqual(sorted(out.getvalue().strip().split('\n')), sorted(exp_out.split('\n'))) @@ -154,13 +156,15 @@ reported_object_count='20', reported_bytes_used='42', x_container_foo='bar', - x_container_bar='goo') + x_container_bar='goo', + db_state=UNSHARDED, + is_root=True) info['hash'] = 'abaddeadbeefcafe' info['id'] = 'abadf100d0ddba11' md = {'x-container-sysmeta-mydata': ('swift', '0000000000.00000')} out = StringIO() with mock.patch('sys.stdout', out): - print_db_info_metadata('container', info, md) + print_db_info_metadata('container', info, md, True) exp_out = '''Path: /acct/cont Account: acct Container: cont @@ -182,10 +186,88 @@ X-Container-Bar: goo X-Container-Foo: bar System Metadata: {'mydata': 'swift'} -No user metadata found in db file''' % POLICIES[0].name +No user metadata found in db file +Sharding Metadata: + Type: root + State: unsharded''' % POLICIES[0].name self.assertEqual(sorted(out.getvalue().strip().split('\n')), sorted(exp_out.split('\n'))) + def test_print_db_info_metadata_with_shard_ranges(self): + + shard_ranges = [utils.ShardRange( + name='.sharded_a/shard_range_%s' % i, + timestamp=utils.Timestamp(i), lower='%da' % i, + upper='%dz' % i, object_count=i, bytes_used=i, + meta_timestamp=utils.Timestamp(i)) for i in range(1, 4)] + shard_ranges[0].state = utils.ShardRange.CLEAVED + shard_ranges[1].state = utils.ShardRange.CREATED + + info = dict( + account='acct', + container='cont', + storage_policy_index=0, + created_at='0000000100.10000', + put_timestamp='0000000106.30000', + delete_timestamp='0000000107.90000', + status_changed_at='0000000108.30000', + object_count='20', + bytes_used='42', + reported_put_timestamp='0000010106.30000', + reported_delete_timestamp='0000010107.90000', + reported_object_count='20', + reported_bytes_used='42', + db_state=SHARDED, + is_root=True, + shard_ranges=shard_ranges) + info['hash'] = 'abaddeadbeefcafe' + info['id'] = 'abadf100d0ddba11' + out = StringIO() + with mock.patch('sys.stdout', out): + print_db_info_metadata('container', info, {}) + exp_out = '''Path: /acct/cont + Account: acct + Container: cont + Container Hash: d49d0ecbb53be1fcc49624f2f7c7ccae +Metadata: + Created at: 1970-01-01T00:01:40.100000 (0000000100.10000) + Put Timestamp: 1970-01-01T00:01:46.300000 (0000000106.30000) + Delete Timestamp: 1970-01-01T00:01:47.900000 (0000000107.90000) + Status Timestamp: 1970-01-01T00:01:48.300000 (0000000108.30000) + Object Count: 20 + Bytes Used: 42 + Storage Policy: %s (0) + Reported Put Timestamp: 1970-01-01T02:48:26.300000 (0000010106.30000) + Reported Delete Timestamp: 1970-01-01T02:48:27.900000 (0000010107.90000) + Reported Object Count: 20 + Reported Bytes Used: 42 + Chexor: abaddeadbeefcafe + UUID: abadf100d0ddba11 +No system metadata found in db file +No user metadata found in db file +Sharding Metadata: + Type: root + State: sharded +Shard Ranges (3): + Name: .sharded_a/shard_range_1 + lower: '1a', upper: '1z' + Object Count: 1, Bytes Used: 1, State: cleaved (30) + Created at: 1970-01-01T00:00:01.000000 (0000000001.00000) + Meta Timestamp: 1970-01-01T00:00:01.000000 (0000000001.00000) + Name: .sharded_a/shard_range_2 + lower: '2a', upper: '2z' + Object Count: 2, Bytes Used: 2, State: created (20) + Created at: 1970-01-01T00:00:02.000000 (0000000002.00000) + Meta Timestamp: 1970-01-01T00:00:02.000000 (0000000002.00000) + Name: .sharded_a/shard_range_3 + lower: '3a', upper: '3z' + Object Count: 3, Bytes Used: 3, State: found (10) + Created at: 1970-01-01T00:00:03.000000 (0000000003.00000) + Meta Timestamp: 1970-01-01T00:00:03.000000 (0000000003.00000)''' %\ + POLICIES[0].name + self.assertEqual(sorted(out.getvalue().strip().split('\n')), + sorted(exp_out.strip().split('\n'))) + def test_print_ring_locations_invalid_args(self): self.assertRaises(ValueError, print_ring_locations, None, 'dir', 'acct') @@ -423,14 +505,8 @@ '1', 'b47', 'dc5be2aa4347a22a0fee6bc7de505b47', 'dc5be2aa4347a22a0fee6bc7de505b47.db') - try: - print_info('account', db_file, swift_dir=self.testdir) - except Exception: - exp_raised = True - if exp_raised: - self.fail("Unexpected exception raised") - else: - self.assertGreater(len(out.getvalue().strip()), 800) + print_info('account', db_file, swift_dir=self.testdir) + self.assertGreater(len(out.getvalue().strip()), 800) controller = ContainerController( {'devices': self.testdir, 'mount_check': 'false'}) @@ -875,7 +951,7 @@ self.assertRaises(InfoSystemExit, print_obj, datafile) with open(datafile, 'wb') as fp: - fp.write('1234') + fp.write(b'1234') out = StringIO() with mock.patch('sys.stdout', out): @@ -1129,7 +1205,7 @@ }) out = StringIO() with mock.patch('sys.stdout', out): - print_obj_metadata(metadata) + print_obj_metadata(metadata, True) exp_out = '''Path: /AUTH_admin/c/dummy Account: AUTH_admin Container: c @@ -1138,8 +1214,8 @@ Content-Type: application/octet-stream Timestamp: 1970-01-01T00:01:46.300000 (%s) System Metadata: - X-Object-Sysmeta-Mtime: 107.3 - X-Object-Sysmeta-Name: Obj name + Mtime: 107.3 + Name: Obj name Transient System Metadata: No metadata found User Metadata: @@ -1209,7 +1285,7 @@ del metadata['name'] out = StringIO() with mock.patch('sys.stdout', out): - print_obj_metadata(metadata) + print_obj_metadata(metadata, True) exp_out = '''Path: Not found in metadata Content-Type: application/octet-stream Timestamp: 1970-01-01T00:01:46.300000 (%s) @@ -1218,7 +1294,7 @@ Transient System Metadata: No metadata found User Metadata: - X-Object-Meta-Mtime: 107.3 + Mtime: 107.3 Other Metadata: No metadata found''' % ( utils.Timestamp(106.3).internal) @@ -1253,7 +1329,7 @@ del metadata['X-Timestamp'] out = StringIO() with mock.patch('sys.stdout', out): - print_obj_metadata(metadata) + print_obj_metadata(metadata, True) exp_out = '''Path: /AUTH_admin/c/dummy Account: AUTH_admin Container: c @@ -1266,7 +1342,7 @@ Transient System Metadata: No metadata found User Metadata: - X-Object-Meta-Mtime: 107.3 + Mtime: 107.3 Other Metadata: No metadata found''' @@ -1297,6 +1373,34 @@ Other Metadata: X-Object-Mtime: 104.3''' % ( utils.Timestamp(106.3).internal) + + self.assertEqual(out.getvalue().strip(), exp_out) + + metadata = get_metadata({ + 'X-Object-Meta-Mtime': '107.3', + 'X-Object-Sysmeta-Mtime': '106.3', + 'X-Object-Transient-Sysmeta-Mtime': '105.3', + 'X-Object-Mtime': '104.3', + }) + out = StringIO() + with mock.patch('sys.stdout', out): + print_obj_metadata(metadata, True) + exp_out = '''Path: /AUTH_admin/c/dummy + Account: AUTH_admin + Container: c + Object: dummy + Object hash: 128fdf98bddd1b1e8695f4340e67a67a +Content-Type: application/octet-stream +Timestamp: 1970-01-01T00:01:46.300000 (%s) +System Metadata: + Mtime: 106.3 +Transient System Metadata: + Mtime: 105.3 +User Metadata: + Mtime: 107.3 +Other Metadata: + X-Object-Mtime: 104.3''' % ( + utils.Timestamp(106.3).internal) self.assertEqual(out.getvalue().strip(), exp_out) diff -Nru swift-2.17.0/test/unit/cli/test_manage_shard_ranges.py swift-2.18.0/test/unit/cli/test_manage_shard_ranges.py --- swift-2.17.0/test/unit/cli/test_manage_shard_ranges.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_manage_shard_ranges.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,362 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy +# of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from __future__ import unicode_literals + +import json +import os +import unittest +import mock +from shutil import rmtree +from tempfile import mkdtemp + +from six.moves import cStringIO as StringIO + +from swift.cli.manage_shard_ranges import main +from swift.common import utils +from swift.common.utils import Timestamp, ShardRange +from swift.container.backend import ContainerBroker +from test.unit import mock_timestamp_now + + +class TestManageShardRanges(unittest.TestCase): + def setUp(self): + self.testdir = os.path.join(mkdtemp(), 'tmp_test_cli_find_shards') + utils.mkdirs(self.testdir) + rmtree(self.testdir) + self.shard_data = [ + {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10}, + {'index': 1, 'lower': 'obj09', 'upper': 'obj19', + 'object_count': 10}, + {'index': 2, 'lower': 'obj19', 'upper': 'obj29', + 'object_count': 10}, + {'index': 3, 'lower': 'obj29', 'upper': 'obj39', + 'object_count': 10}, + {'index': 4, 'lower': 'obj39', 'upper': 'obj49', + 'object_count': 10}, + {'index': 5, 'lower': 'obj49', 'upper': 'obj59', + 'object_count': 10}, + {'index': 6, 'lower': 'obj59', 'upper': 'obj69', + 'object_count': 10}, + {'index': 7, 'lower': 'obj69', 'upper': 'obj79', + 'object_count': 10}, + {'index': 8, 'lower': 'obj79', 'upper': 'obj89', + 'object_count': 10}, + {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10}, + ] + + def tearDown(self): + rmtree(os.path.dirname(self.testdir)) + + def assert_starts_with(self, value, prefix): + self.assertTrue(value.startswith(prefix), + "%r does not start with %r" % (value, prefix)) + + def assert_formatted_json(self, output, expected): + try: + loaded = json.loads(output) + except ValueError as err: + self.fail('Invalid JSON: %s\n%r' % (err, output)) + # Check this one first, for a prettier diff + self.assertEqual(loaded, expected) + formatted = json.dumps(expected, sort_keys=True, indent=2) + '\n' + self.assertEqual(output, formatted) + + def _make_broker(self, account='a', container='c', + device='sda', part=0): + datadir = os.path.join( + self.testdir, device, 'containers', str(part), 'ash', 'hash') + db_file = os.path.join(datadir, 'hash.db') + broker = ContainerBroker( + db_file, account=account, container=container) + broker.initialize() + return broker + + def test_find_shard_ranges(self): + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file) + broker.account = 'a' + broker.container = 'c' + broker.initialize() + ts = utils.Timestamp.now() + broker.merge_items([ + {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, + 'content_type': 'application/octet-stream', 'etag': 'not-really', + 'deleted': 0, 'storage_policy_index': 0, + 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} + for i in range(100)]) + + # Default uses a large enough value that sharding isn't required + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find']) + self.assert_formatted_json(out.getvalue(), []) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '100']) + self.assert_formatted_json(out.getvalue(), []) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 0 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '99']) + self.assert_formatted_json(out.getvalue(), [ + {'index': 0, 'lower': '', 'upper': 'obj98', 'object_count': 99}, + {'index': 1, 'lower': 'obj98', 'upper': '', 'object_count': 1}, + ]) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 2 ranges in ') + + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([db_file, 'find', '10']) + self.assert_formatted_json(out.getvalue(), [ + {'index': 0, 'lower': '', 'upper': 'obj09', 'object_count': 10}, + {'index': 1, 'lower': 'obj09', 'upper': 'obj19', + 'object_count': 10}, + {'index': 2, 'lower': 'obj19', 'upper': 'obj29', + 'object_count': 10}, + {'index': 3, 'lower': 'obj29', 'upper': 'obj39', + 'object_count': 10}, + {'index': 4, 'lower': 'obj39', 'upper': 'obj49', + 'object_count': 10}, + {'index': 5, 'lower': 'obj49', 'upper': 'obj59', + 'object_count': 10}, + {'index': 6, 'lower': 'obj59', 'upper': 'obj69', + 'object_count': 10}, + {'index': 7, 'lower': 'obj69', 'upper': 'obj79', + 'object_count': 10}, + {'index': 8, 'lower': 'obj79', 'upper': 'obj89', + 'object_count': 10}, + {'index': 9, 'lower': 'obj89', 'upper': '', 'object_count': 10}, + ]) + err_lines = err.getvalue().split('\n') + self.assert_starts_with(err_lines[0], 'Loaded db broker for ') + self.assert_starts_with(err_lines[1], 'Found 10 ranges in ') + + def test_info(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: None', + 'db_state = unsharded', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + retiring_db_id = broker.get_info()['id'] + broker.merge_shard_ranges(ShardRange('.shards/cc', Timestamp.now())) + epoch = Timestamp.now() + with mock_timestamp_now(epoch) as now: + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now(now): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: {', + ' "bytes_used": 0, ', + ' "deleted": 0, ', + ' "epoch": "%s", ' % epoch.internal, + ' "lower": "", ', + ' "meta_timestamp": "%s", ' % now.internal, + ' "name": "a/c", ', + ' "object_count": 0, ', + ' "state": "sharding", ', + ' "state_timestamp": "%s", ' % now.internal, + ' "timestamp": "%s", ' % now.internal, + ' "upper": ""', + '}', + 'db_state = sharding', + 'Retiring db id: %s' % retiring_db_id, + 'Cleaving context: {', + ' "cleave_to_row": null, ', + ' "cleaving_done": false, ', + ' "cursor": "", ', + ' "last_cleave_to_row": null, ', + ' "max_row": -1, ', + ' "misplaced_done": false, ', + ' "ranges_done": 0, ', + ' "ranges_todo": 0, ', + ' "ref": "%s"' % retiring_db_id, + '}', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + self.assertTrue(broker.set_sharded_state()) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now(now): + main([broker.db_file, 'info']) + expected = ['Sharding enabled = True', + 'Own shard range: {', + ' "bytes_used": 0, ', + ' "deleted": 0, ', + ' "epoch": "%s", ' % epoch.internal, + ' "lower": "", ', + ' "meta_timestamp": "%s", ' % now.internal, + ' "name": "a/c", ', + ' "object_count": 0, ', + ' "state": "sharding", ', + ' "state_timestamp": "%s", ' % now.internal, + ' "timestamp": "%s", ' % now.internal, + ' "upper": ""', + '}', + 'db_state = sharded', + 'Metadata:', + ' X-Container-Sysmeta-Sharding = True'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + def test_replace(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + input_file = os.path.join(self.testdir, 'shards') + with open(input_file, 'wb') as fd: + json.dump(self.shard_data, fd) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'replace', input_file]) + expected = [ + 'No shard ranges found to delete.', + 'Injected 10 shard ranges.', + 'Run container-replicator to replicate them to other nodes.', + 'Use the enable sub-command to enable sharding.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self.assertEqual( + [(data['lower'], data['upper']) for data in self.shard_data], + [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()]) + + def _assert_enabled(self, broker, epoch): + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(epoch, own_sr.epoch) + self.assertEqual(ShardRange.MIN, own_sr.lower) + self.assertEqual(ShardRange.MAX, own_sr.upper) + self.assertEqual( + 'True', broker.metadata['X-Container-Sysmeta-Sharding'][0]) + + def test_enable(self): + broker = self._make_broker() + broker.update_metadata({'X-Container-Sysmeta-Sharding': + (True, Timestamp.now().internal)}) + # no shard ranges + out = StringIO() + err = StringIO() + with self.assertRaises(SystemExit): + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'enable']) + expected = ["WARNING: invalid shard ranges: ['No shard ranges.'].", + 'Aborting.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + + # success + shard_ranges = [] + for data in self.shard_data: + path = ShardRange.make_path( + '.shards_a', 'c', 'c', Timestamp.now(), data['index']) + shard_ranges.append( + ShardRange(path, Timestamp.now(), data['lower'], + data['upper'], data['object_count'])) + broker.merge_shard_ranges(shard_ranges) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now() as now: + main([broker.db_file, 'enable']) + expected = [ + "Container moved to state 'sharding' with epoch %s." % + now.internal, + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + + # already enabled + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + main([broker.db_file, 'enable']) + expected = [ + "Container already in state 'sharding' with epoch %s." % + now.internal, + 'No action required.', + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + + def test_find_replace_enable(self): + db_file = os.path.join(self.testdir, 'hash.db') + broker = ContainerBroker(db_file) + broker.account = 'a' + broker.container = 'c' + broker.initialize() + ts = utils.Timestamp.now() + broker.merge_items([ + {'name': 'obj%02d' % i, 'created_at': ts.internal, 'size': 0, + 'content_type': 'application/octet-stream', 'etag': 'not-really', + 'deleted': 0, 'storage_policy_index': 0, + 'ctype_timestamp': ts.internal, 'meta_timestamp': ts.internal} + for i in range(100)]) + out = StringIO() + err = StringIO() + with mock.patch('sys.stdout', out), mock.patch('sys.stderr', err): + with mock_timestamp_now() as now: + main([broker.db_file, 'find_and_replace', '10', '--enable']) + expected = [ + 'No shard ranges found to delete.', + 'Injected 10 shard ranges.', + 'Run container-replicator to replicate them to other nodes.', + "Container moved to state 'sharding' with epoch %s." % + now.internal, + 'Run container-sharder on all nodes to shard the container.'] + self.assertEqual(expected, out.getvalue().splitlines()) + self.assertEqual(['Loaded db broker for a/c.'], + err.getvalue().splitlines()) + self._assert_enabled(broker, now) + self.assertEqual( + [(data['lower'], data['upper']) for data in self.shard_data], + [(sr.lower_str, sr.upper_str) for sr in broker.get_shard_ranges()]) diff -Nru swift-2.17.0/test/unit/cli/test_recon.py swift-2.18.0/test/unit/cli/test_recon.py --- swift-2.17.0/test/unit/cli/test_recon.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_recon.py 2018-05-30 10:17:02.000000000 +0000 @@ -39,8 +39,10 @@ if six.PY3: from eventlet.green.urllib import request as urllib2 + GREEN_URLLIB_URLOPEN = 'eventlet.green.urllib.request.urlopen' else: from eventlet.green import urllib2 + GREEN_URLLIB_URLOPEN = 'eventlet.green.urllib2.urlopen' class TestHelpers(unittest.TestCase): @@ -68,7 +70,7 @@ self.url = 'http://127.0.0.1:8080/recon/type' self.server_type_url = 'http://127.0.0.1:8080/' - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_ok(self, mock_urlopen): mock_urlopen.return_value.read = lambda: json.dumps([]) url, content, status, ts_start, ts_end = self.scout_instance.scout( @@ -77,7 +79,7 @@ self.assertEqual(content, []) self.assertEqual(status, 200) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_url_error(self, mock_urlopen): mock_urlopen.side_effect = urllib2.URLError("") url, content, status, ts_start, ts_end = self.scout_instance.scout( @@ -86,7 +88,7 @@ self.assertEqual(url, self.url) self.assertEqual(status, -1) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_http_error(self, mock_urlopen): mock_urlopen.side_effect = urllib2.HTTPError( self.url, 404, "Internal error", None, None) @@ -96,7 +98,7 @@ self.assertIsInstance(content, urllib2.HTTPError) self.assertEqual(status, 404) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_socket_timeout(self, mock_urlopen): mock_urlopen.side_effect = socket.timeout("timeout") url, content, status, ts_start, ts_end = self.scout_instance.scout( @@ -105,19 +107,19 @@ self.assertEqual(url, self.url) self.assertEqual(status, -1) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_server_type_ok(self, mock_urlopen): def getheader(name): d = {'Server': 'server-type'} return d.get(name) - mock_urlopen.return_value.info.return_value.getheader = getheader + mock_urlopen.return_value.info.return_value.get = getheader url, content, status = self.scout_instance.scout_server_type( ("127.0.0.1", "8080")) self.assertEqual(url, self.server_type_url) self.assertEqual(content, 'server-type') self.assertEqual(status, 200) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_server_type_url_error(self, mock_urlopen): mock_urlopen.side_effect = urllib2.URLError("") url, content, status = self.scout_instance.scout_server_type( @@ -126,7 +128,7 @@ self.assertEqual(url, self.server_type_url) self.assertEqual(status, -1) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_server_type_http_error(self, mock_urlopen): mock_urlopen.side_effect = urllib2.HTTPError( self.server_type_url, 404, "Internal error", None, None) @@ -136,7 +138,7 @@ self.assertIsInstance(content, urllib2.HTTPError) self.assertEqual(status, 404) - @mock.patch('eventlet.green.urllib2.urlopen') + @mock.patch(GREEN_URLLIB_URLOPEN) def test_scout_server_type_socket_timeout(self, mock_urlopen): mock_urlopen.side_effect = socket.timeout("timeout") url, content, status = self.scout_instance.scout_server_type( @@ -160,9 +162,8 @@ self.swift_dir, self.ring_name2 + '.ring.gz') swift_conf = os.path.join(self.swift_dir, 'swift.conf') - self.policy_name = ''.join(random.sample(string.letters, 20)) - with open(swift_conf, "wb") as sc: - sc.write(''' + self.policy_name = ''.join(random.sample(string.ascii_letters, 20)) + swift_conf_data = ''' [swift-hash] swift_hash_path_suffix = changeme @@ -173,7 +174,9 @@ [storage-policy:1] name = unu aliases = %s -''' % self.policy_name) +''' % self.policy_name + with open(swift_conf, "wb") as sc: + sc.write(swift_conf_data.encode('utf8')) def tearDown(self, *_args, **_kwargs): utils.SWIFT_CONF_FILE = self.swift_conf_file @@ -511,7 +514,7 @@ self.recon_instance.umount_check(hosts) output = stdout.getvalue() - r = re.compile("\Not mounted:|Device errors: .*") + r = re.compile("^Not mounted:|Device errors: .*") lines = output.splitlines() self.assertTrue(lines) for line in lines: @@ -668,10 +671,11 @@ response_body = resps[(host, port, path[7:])] resp = mock.MagicMock() - resp.read = mock.MagicMock(side_effect=[response_body]) + resp.read = mock.MagicMock(side_effect=[ + response_body if six.PY2 else response_body.encode('utf8')]) return resp - return mock.patch('eventlet.green.urllib2.urlopen', fake_urlopen) + return mock.patch(GREEN_URLLIB_URLOPEN, fake_urlopen) def test_server_type_check(self): hosts = [('127.0.0.1', 6010), ('127.0.0.1', 6011), @@ -870,7 +874,9 @@ mock.call('Disk usage: space used: 260 of 300'), mock.call('Disk usage: space free: 40 of 300'), mock.call('Disk usage: lowest: 85.0%, ' + - 'highest: 90.0%, avg: 86.6666666667%'), + 'highest: 90.0%%, avg: %s' % + ('86.6666666667%' if six.PY2 else + '86.66666666666667%')), mock.call('=' * 79), ] diff -Nru swift-2.17.0/test/unit/cli/test_relinker.py swift-2.18.0/test/unit/cli/test_relinker.py --- swift-2.17.0/test/unit/cli/test_relinker.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_relinker.py 2018-05-30 10:17:02.000000000 +0000 @@ -62,7 +62,7 @@ self.object_fname = "1278553064.00000.data" self.objname = os.path.join(self.objdir, self.object_fname) with open(self.objname, "wb") as dummy: - dummy.write("Hello World!") + dummy.write(b"Hello World!") write_metadata(dummy, {'name': '/a/c/o', 'Content-Length': '12'}) test_policies = [StoragePolicy(0, 'platin', True)] @@ -164,7 +164,7 @@ self._common_test_cleanup() # Pretend the object in the new place got corrupted with open(self.expected_file, "wb") as obj: - obj.write('trash') + obj.write(b'trash') self.assertEqual( 1, relinker.cleanup(self.testdir, self.devices, True, self.logger)) diff -Nru swift-2.17.0/test/unit/cli/test_ringbuilder.py swift-2.18.0/test/unit/cli/test_ringbuilder.py --- swift-2.17.0/test/unit/cli/test_ringbuilder.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/cli/test_ringbuilder.py 2018-05-30 10:17:02.000000000 +0000 @@ -34,6 +34,11 @@ from test.unit import Timeout +try: + from itertools import zip_longest +except ImportError: + from itertools import izip_longest as zip_longest + class RunSwiftRingBuilderMixin(object): @@ -115,8 +120,7 @@ output = output.replace(self.tempfile, '__RINGFILE__') stub = stub.replace('__BUILDER_ID__', builder_id) for i, (value, expected) in enumerate( - itertools.izip_longest( - output.splitlines(), stub.splitlines())): + zip_longest(output.splitlines(), stub.splitlines())): # N.B. differences in trailing whitespace are ignored! value = (value or '').rstrip() expected = (expected or '').rstrip() diff -Nru swift-2.17.0/test/unit/common/middleware/helpers.py swift-2.18.0/test/unit/common/middleware/helpers.py --- swift-2.17.0/test/unit/common/middleware/helpers.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/helpers.py 2018-05-30 10:17:02.000000000 +0000 @@ -20,7 +20,7 @@ from swift.common import swob from swift.common.header_key_dict import HeaderKeyDict from swift.common.request_helpers import is_user_meta, \ - is_object_transient_sysmeta + is_object_transient_sysmeta, resolve_etag_is_at_header from swift.common.swob import HTTPNotImplemented from swift.common.utils import split_path @@ -154,11 +154,8 @@ self._calls.append( FakeSwiftCall(method, path, HeaderKeyDict(req.headers))) - backend_etag_header = req.headers.get('X-Backend-Etag-Is-At') - conditional_etag = None - if backend_etag_header and backend_etag_header in headers: - # Apply conditional etag overrides - conditional_etag = headers[backend_etag_header] + # Apply conditional etag overrides + conditional_etag = resolve_etag_is_at_header(req, headers) # range requests ought to work, hence conditional_response=True if isinstance(body, list): diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/exceptions.py swift-2.18.0/test/unit/common/middleware/s3api/exceptions.py --- swift-2.17.0/test/unit/common/middleware/s3api/exceptions.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/exceptions.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,18 @@ +# Copyright (c) 2013 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class NotMethodException(Exception): + pass diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/helpers.py swift-2.18.0/test/unit/common/middleware/s3api/helpers.py --- swift-2.17.0/test/unit/common/middleware/s3api/helpers.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/helpers.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,185 @@ +# Copyright (c) 2013 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This stuff can't live in test/unit/__init__.py due to its swob dependency. + +from copy import deepcopy +from hashlib import md5 +from swift.common import swob +from swift.common.utils import split_path +from swift.common.request_helpers import is_sys_meta + + +class FakeSwift(object): + """ + A good-enough fake Swift proxy server to use in testing middleware. + """ + + def __init__(self, s3_acl=False): + self._calls = [] + self.req_method_paths = [] + self.swift_sources = [] + self.uploaded = {} + # mapping of (method, path) --> (response class, headers, body) + self._responses = {} + self.s3_acl = s3_acl + + def _fake_auth_middleware(self, env): + if 'swift.authorize_override' in env: + return + + if 'HTTP_AUTHORIZATION' not in env: + return + + _, authorization = env['HTTP_AUTHORIZATION'].split(' ') + tenant_user, sign = authorization.rsplit(':', 1) + tenant, user = tenant_user.rsplit(':', 1) + + path = env['PATH_INFO'] + env['PATH_INFO'] = path.replace(tenant_user, 'AUTH_' + tenant) + + env['REMOTE_USER'] = 'authorized' + + if env['REQUEST_METHOD'] == 'TEST': + # AccessDenied by default at s3acl authenticate + env['swift.authorize'] = \ + lambda req: swob.HTTPForbidden(request=req) + else: + env['swift.authorize'] = lambda req: None + + def __call__(self, env, start_response): + if self.s3_acl: + self._fake_auth_middleware(env) + + req = swob.Request(env) + method = env['REQUEST_METHOD'] + path = env['PATH_INFO'] + _, acc, cont, obj = split_path(env['PATH_INFO'], 0, 4, + rest_with_last=True) + if env.get('QUERY_STRING'): + path += '?' + env['QUERY_STRING'] + + if 'swift.authorize' in env: + resp = env['swift.authorize'](req) + if resp: + return resp(env, start_response) + + headers = req.headers + self._calls.append((method, path, headers)) + self.swift_sources.append(env.get('swift.source')) + + try: + resp_class, raw_headers, body = self._responses[(method, path)] + headers = swob.HeaderKeyDict(raw_headers) + except KeyError: + # FIXME: suppress print state error for python3 compatibility. + # pylint: disable-msg=E1601 + if (env.get('QUERY_STRING') + and (method, env['PATH_INFO']) in self._responses): + resp_class, raw_headers, body = self._responses[ + (method, env['PATH_INFO'])] + headers = swob.HeaderKeyDict(raw_headers) + elif method == 'HEAD' and ('GET', path) in self._responses: + resp_class, raw_headers, _ = self._responses[('GET', path)] + body = None + headers = swob.HeaderKeyDict(raw_headers) + elif method == 'GET' and obj and path in self.uploaded: + resp_class = swob.HTTPOk + headers, body = self.uploaded[path] + else: + print("Didn't find %r in allowed responses" % + ((method, path),)) + raise + + # simulate object PUT + if method == 'PUT' and obj: + input = env['wsgi.input'].read() + etag = md5(input).hexdigest() + headers.setdefault('Etag', etag) + headers.setdefault('Content-Length', len(input)) + + # keep it for subsequent GET requests later + self.uploaded[path] = (deepcopy(headers), input) + if "CONTENT_TYPE" in env: + self.uploaded[path][0]['Content-Type'] = env["CONTENT_TYPE"] + + # range requests ought to work, but copies are special + support_range_and_conditional = not ( + method == 'PUT' and + 'X-Copy-From' in req.headers and + 'Range' in req.headers) + resp = resp_class(req=req, headers=headers, body=body, + conditional_response=support_range_and_conditional) + return resp(env, start_response) + + @property + def calls(self): + return [(method, path) for method, path, headers in self._calls] + + @property + def calls_with_headers(self): + return self._calls + + @property + def call_count(self): + return len(self._calls) + + def register(self, method, path, response_class, headers, body): + # assuming the path format like /v1/account/container/object + resource_map = ['account', 'container', 'object'] + acos = filter(None, split_path(path, 0, 4, True)[1:]) + index = len(acos) - 1 + resource = resource_map[index] + if (method, path) in self._responses: + old_headers = self._responses[(method, path)][1] + headers = headers.copy() + for key, value in old_headers.iteritems(): + if is_sys_meta(resource, key) and key not in headers: + # keep old sysmeta for s3acl + headers.update({key: value}) + + self._responses[(method, path)] = (response_class, headers, body) + + def register_unconditionally(self, method, path, response_class, headers, + body): + # register() keeps old sysmeta around, but + # register_unconditionally() keeps nothing. + self._responses[(method, path)] = (response_class, headers, body) + + def clear_calls(self): + del self._calls[:] + + +class UnreadableInput(object): + # Some clients will send neither a Content-Length nor a Transfer-Encoding + # header, which will cause (some versions of?) eventlet to bomb out on + # reads. This class helps us simulate that behavior. + def __init__(self, test_case): + self.calls = 0 + self.test_case = test_case + + def read(self, *a, **kw): + self.calls += 1 + # Calling wsgi.input.read with neither a Content-Length nor + # a Transfer-Encoding header will raise TypeError (See + # https://bugs.launchpad.net/swift3/+bug/1593870 in detail) + # This unreadable class emulates the behavior + raise TypeError + + def __enter__(self): + return self + + def __exit__(self, *args): + self.test_case.assertEqual(0, self.calls) diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/__init__.py swift-2.18.0/test/unit/common/middleware/s3api/__init__.py --- swift-2.17.0/test/unit/common/middleware/s3api/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,163 @@ +# Copyright (c) 2011-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from datetime import datetime +import email +import time + +from swift.common import swob + +from swift.common.middleware.s3api.s3api import S3ApiMiddleware +from helpers import FakeSwift +from swift.common.middleware.s3api.etree import fromstring +from swift.common.middleware.s3api.utils import Config + + +class FakeApp(object): + def __init__(self): + self.swift = FakeSwift() + + def _update_s3_path_info(self, env): + """ + For S3 requests, Swift auth middleware replaces a user name in + env['PATH_INFO'] with a valid tenant id. + E.g. '/v1/test:tester/bucket/object' will become + '/v1/AUTH_test/bucket/object'. This method emulates the behavior. + """ + _, authorization = env['HTTP_AUTHORIZATION'].split(' ') + tenant_user, sign = authorization.rsplit(':', 1) + tenant, user = tenant_user.rsplit(':', 1) + + path = env['PATH_INFO'] + env['PATH_INFO'] = path.replace(tenant_user, 'AUTH_' + tenant) + + def __call__(self, env, start_response): + if 'HTTP_AUTHORIZATION' in env: + self._update_s3_path_info(env) + + return self.swift(env, start_response) + + +class S3ApiTestCase(unittest.TestCase): + def __init__(self, name): + unittest.TestCase.__init__(self, name) + + def setUp(self): + # setup default config + self.conf = Config({ + 'allow_no_owner': False, + 'location': 'US', + 'dns_compliant_bucket_names': True, + 'max_bucket_listing': 1000, + 'max_parts_listing': 1000, + 'max_multi_delete_objects': 1000, + 's3_acl': False, + 'storage_domain': 'localhost', + 'auth_pipeline_check': True, + 'max_upload_part_num': 1000, + 'check_bucket_owner': False, + 'force_swift_request_proxy_log': False, + 'allow_multipart_uploads': True, + 'min_segment_size': 5242880, + }) + # those 2 settings has existed the original test setup + self.conf.log_level = 'debug' + + self.app = FakeApp() + self.swift = self.app.swift + self.s3api = S3ApiMiddleware(self.app, self.conf) + + self.swift.register('HEAD', '/v1/AUTH_test', + swob.HTTPOk, {}, None) + self.swift.register('HEAD', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + self.swift.register('PUT', '/v1/AUTH_test/bucket', + swob.HTTPCreated, {}, None) + self.swift.register('POST', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + + self.swift.register('GET', '/v1/AUTH_test/bucket/object', + swob.HTTPOk, {}, "") + self.swift.register('PUT', '/v1/AUTH_test/bucket/object', + swob.HTTPCreated, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/object', + swob.HTTPNoContent, {}, None) + + def _get_error_code(self, body): + elem = fromstring(body, 'Error') + return elem.find('./Code').text + + def _get_error_message(self, body): + elem = fromstring(body, 'Error') + return elem.find('./Message').text + + def _test_method_error(self, method, path, response_class, headers={}): + if not path.startswith('/'): + path = '/' + path # add a missing slash before the path + + uri = '/v1/AUTH_test' + if path != '/': + uri += path + + self.swift.register(method, uri, response_class, headers, None) + headers.update({'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + req = swob.Request.blank(path, environ={'REQUEST_METHOD': method}, + headers=headers) + status, headers, body = self.call_s3api(req) + return self._get_error_code(body) + + def get_date_header(self): + # email.utils.formatdate returns utc timestamp in default + return email.utils.formatdate(time.time()) + + def get_v4_amz_date_header(self): + return datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + + def call_app(self, req, app=None, expect_exception=False): + if app is None: + app = self.app + + req.headers.setdefault("User-Agent", "Mozzarella Foxfire") + + status = [None] + headers = [None] + + def start_response(s, h, ei=None): + status[0] = s + headers[0] = swob.HeaderKeyDict(h) + + body_iter = app(req.environ, start_response) + body = '' + caught_exc = None + try: + for chunk in body_iter: + body += chunk + except Exception as exc: + if expect_exception: + caught_exc = exc + else: + raise + + if expect_exception: + return status[0], headers[0], body, caught_exc + else: + return status[0], headers[0], body + + def call_s3api(self, req, **kwargs): + return self.call_app(req, app=self.s3api, **kwargs) diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_acl_handlers.py swift-2.18.0/test/unit/common/middleware/s3api/test_acl_handlers.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_acl_handlers.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_acl_handlers.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,42 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.middleware.s3api.acl_handlers import S3AclHandler, \ + BucketAclHandler, ObjectAclHandler, BaseAclHandler, PartAclHandler, \ + UploadAclHandler, UploadsAclHandler, get_acl_handler + + +class TestAclHandlers(unittest.TestCase): + def test_get_acl_handler(self): + expected_handlers = (('Bucket', BucketAclHandler), + ('Object', ObjectAclHandler), + ('S3Acl', S3AclHandler), + ('Part', PartAclHandler), + ('Upload', UploadAclHandler), + ('Uploads', UploadsAclHandler), + ('Foo', BaseAclHandler)) + for name, expected in expected_handlers: + handler = get_acl_handler(name) + self.assertTrue(issubclass(handler, expected)) + + def test_handle_acl(self): + # we have already have tests for s3_acl checking at test_s3_acl.py + pass + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_acl.py swift-2.18.0/test/unit/common/middleware/s3api/test_acl.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_acl.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_acl.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,230 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import mock + +from cStringIO import StringIO +from hashlib import md5 + +from swift.common.swob import Request, HTTPAccepted +from swift.common.middleware.s3api.etree import fromstring, tostring, \ + Element, SubElement, XMLNS_XSI +from swift.common.middleware.s3api.s3response import InvalidArgument +from swift.common.middleware.s3api.acl_utils import handle_acl_header + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.helpers import UnreadableInput +from test.unit.common.middleware.s3api.test_s3_acl import s3acl + + +class TestS3ApiAcl(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiAcl, self).setUp() + # All ACL API should be called against to existing bucket. + self.swift.register('PUT', '/v1/AUTH_test/bucket', + HTTPAccepted, {}, None) + + def _check_acl(self, owner, body): + elem = fromstring(body, 'AccessControlPolicy') + permission = elem.find('./AccessControlList/Grant/Permission').text + self.assertEqual(permission, 'FULL_CONTROL') + name = elem.find('./AccessControlList/Grant/Grantee/ID').text + self.assertEqual(name, owner) + + def test_bucket_acl_GET(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self._check_acl('test:tester', body) + + def test_bucket_acl_PUT(self): + elem = Element('AccessControlPolicy') + owner = SubElement(elem, 'Owner') + SubElement(owner, 'ID').text = 'id' + acl = SubElement(elem, 'AccessControlList') + grant = SubElement(acl, 'Grant') + grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Group') + SubElement(grantee, 'URI').text = \ + 'http://acs.amazonaws.com/groups/global/AllUsers' + SubElement(grant, 'Permission').text = 'READ' + + xml = tostring(elem) + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT', + 'wsgi.input': StringIO(xml)}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Transfer-Encoding': 'chunked'}) + self.assertIsNone(req.content_length) + self.assertIsNone(req.message_length()) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_bucket_canned_acl_PUT(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'X-AMZ-ACL': 'public-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_bucket_canned_acl_PUT_with_s3acl(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'X-AMZ-ACL': 'public-read'}) + with mock.patch('swift.common.middleware.s3api.s3request.' + 'handle_acl_header') as mock_handler: + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + self.assertEqual(mock_handler.call_count, 0) + + def test_bucket_fails_with_both_acl_header_and_xml_PUT(self): + elem = Element('AccessControlPolicy') + owner = SubElement(elem, 'Owner') + SubElement(owner, 'ID').text = 'id' + acl = SubElement(elem, 'AccessControlList') + grant = SubElement(acl, 'Grant') + grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Group') + SubElement(grantee, 'URI').text = \ + 'http://acs.amazonaws.com/groups/global/AllUsers' + SubElement(grant, 'Permission').text = 'READ' + + xml = tostring(elem) + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'X-AMZ-ACL': 'public-read'}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), + 'UnexpectedContent') + + def _test_put_no_body(self, use_content_length=False, + use_transfer_encoding=False, string_to_md5=''): + content_md5 = md5(string_to_md5).digest().encode('base64').strip() + with UnreadableInput(self) as fake_input: + req = Request.blank( + '/bucket?acl', + environ={ + 'REQUEST_METHOD': 'PUT', + 'wsgi.input': fake_input}, + headers={ + 'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body='') + if not use_content_length: + req.environ.pop('CONTENT_LENGTH') + if use_transfer_encoding: + req.environ['HTTP_TRANSFER_ENCODING'] = 'chunked' + status, headers, body = self.call_s3api(req) + self.assertEqual(status, '400 Bad Request') + self.assertEqual(self._get_error_code(body), 'MissingSecurityHeader') + self.assertEqual(self._get_error_message(body), + 'Your request was missing a required header.') + self.assertIn('x-amz-acl', body) + + @s3acl + def test_bucket_fails_with_neither_acl_header_nor_xml_PUT(self): + self._test_put_no_body() + self._test_put_no_body(string_to_md5='test') + self._test_put_no_body(use_content_length=True) + self._test_put_no_body(use_content_length=True, string_to_md5='test') + self._test_put_no_body(use_transfer_encoding=True) + self._test_put_no_body(use_transfer_encoding=True, string_to_md5='zz') + + def test_object_acl_GET(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self._check_acl('test:tester', body) + + def test_invalid_xml(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='invalid') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedACLError') + + def test_handle_acl_header(self): + def check_generated_acl_header(acl, targets): + req = Request.blank('/bucket', + headers={'X-Amz-Acl': acl}) + handle_acl_header(req) + for target in targets: + self.assertTrue(target[0] in req.headers) + self.assertEqual(req.headers[target[0]], target[1]) + + check_generated_acl_header('public-read', + [('X-Container-Read', '.r:*,.rlistings')]) + check_generated_acl_header('public-read-write', + [('X-Container-Read', '.r:*,.rlistings'), + ('X-Container-Write', '.r:*')]) + check_generated_acl_header('private', + [('X-Container-Read', '.'), + ('X-Container-Write', '.')]) + + @s3acl(s3acl_only=True) + def test_handle_acl_header_with_s3acl(self): + def check_generated_acl_header(acl, targets): + req = Request.blank('/bucket', + headers={'X-Amz-Acl': acl}) + for target in targets: + self.assertTrue(target not in req.headers) + self.assertTrue('HTTP_X_AMZ_ACL' in req.environ) + # TODO: add transration and assertion for s3acl + + check_generated_acl_header('public-read', + ['X-Container-Read']) + check_generated_acl_header('public-read-write', + ['X-Container-Read', 'X-Container-Write']) + check_generated_acl_header('private', + ['X-Container-Read', 'X-Container-Write']) + + def test_handle_acl_with_invalid_header_string(self): + req = Request.blank('/bucket', headers={'X-Amz-Acl': 'invalid'}) + with self.assertRaises(InvalidArgument) as cm: + handle_acl_header(req) + self.assertTrue('argument_name' in cm.exception.info) + self.assertEqual(cm.exception.info['argument_name'], 'x-amz-acl') + self.assertTrue('argument_value' in cm.exception.info) + self.assertEqual(cm.exception.info['argument_value'], 'invalid') + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_acl_utils.py swift-2.18.0/test/unit/common/middleware/s3api/test_acl_utils.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_acl_utils.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_acl_utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,49 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.swob import Request +from swift.common.middleware.s3api.acl_utils import handle_acl_header + +from test.unit.common.middleware.s3api import S3ApiTestCase + + +class TestS3ApiAclUtils(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiAclUtils, self).setUp() + + def test_handle_acl_header(self): + def check_generated_acl_header(acl, targets): + req = Request.blank('/bucket', + headers={'X-Amz-Acl': acl}) + handle_acl_header(req) + for target in targets: + self.assertTrue(target[0] in req.headers) + self.assertEqual(req.headers[target[0]], target[1]) + + check_generated_acl_header('public-read', + [('X-Container-Read', '.r:*,.rlistings')]) + check_generated_acl_header('public-read-write', + [('X-Container-Read', '.r:*,.rlistings'), + ('X-Container-Write', '.r:*')]) + check_generated_acl_header('private', + [('X-Container-Read', '.'), + ('X-Container-Write', '.')]) + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_bucket.py swift-2.18.0/test/unit/common/middleware/s3api/test_bucket.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_bucket.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_bucket.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,755 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import cgi + +from swift.common import swob +from swift.common.swob import Request +from swift.common.utils import json + +from swift.common.middleware.s3api.etree import fromstring, tostring, \ + Element, SubElement +from swift.common.middleware.s3api.subresource import Owner, encode_acl, \ + ACLPublicRead +from swift.common.middleware.s3api.s3request import MAX_32BIT_INT + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.test_s3_acl import s3acl +from test.unit.common.middleware.s3api.helpers import UnreadableInput + + +class TestS3ApiBucket(S3ApiTestCase): + def setup_objects(self): + self.objects = (('rose', '2011-01-05T02:19:14.275290', 0, 303), + ('viola', '2011-01-05T02:19:14.275290', '0', 3909), + ('lily', '2011-01-05T02:19:14.275290', '0', '3909'), + ('with space', '2011-01-05T02:19:14.275290', 0, 390), + ('with%20space', '2011-01-05T02:19:14.275290', 0, 390)) + + objects = map( + lambda item: {'name': str(item[0]), 'last_modified': str(item[1]), + 'hash': str(item[2]), 'bytes': str(item[3])}, + list(self.objects)) + object_list = json.dumps(objects) + + self.prefixes = ['rose', 'viola', 'lily'] + object_list_subdir = [] + for p in self.prefixes: + object_list_subdir.append({"subdir": p}) + + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments', + swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/rose', + swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/viola', + swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/lily', + swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/with' + ' space', swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/with%20' + 'space', swob.HTTPNoContent, {}, json.dumps([])) + self.swift.register('GET', '/v1/AUTH_test/bucket+segments?format=json' + '&marker=with%2520space', swob.HTTPOk, {}, + json.dumps([])) + self.swift.register('GET', '/v1/AUTH_test/bucket+segments?format=json' + '&marker=', swob.HTTPOk, {}, object_list) + self.swift.register('HEAD', '/v1/AUTH_test/junk', swob.HTTPNoContent, + {}, None) + self.swift.register('HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, + {}, None) + self.swift.register('GET', '/v1/AUTH_test/junk', swob.HTTPOk, {}, + object_list) + self.swift.register( + 'GET', + '/v1/AUTH_test/junk?delimiter=a&format=json&limit=3&marker=viola', + swob.HTTPOk, {}, json.dumps(objects[2:])) + self.swift.register('GET', '/v1/AUTH_test/junk-subdir', swob.HTTPOk, + {}, json.dumps(object_list_subdir)) + self.swift.register( + 'GET', + '/v1/AUTH_test/subdirs?delimiter=/&format=json&limit=3', + swob.HTTPOk, {}, json.dumps([ + {'subdir': 'nothing/'}, + {'subdir': 'but/'}, + {'subdir': 'subdirs/'}, + ])) + + def setUp(self): + super(TestS3ApiBucket, self).setUp() + self.setup_objects() + + def test_bucket_HEAD(self): + req = Request.blank('/junk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_bucket_HEAD_error(self): + req = Request.blank('/nojunk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + self.assertEqual(body, '') # sanity + + def test_bucket_HEAD_slash(self): + req = Request.blank('/junk/', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_bucket_HEAD_slash_error(self): + req = Request.blank('/nojunk/', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + + @s3acl + def test_bucket_GET_error(self): + code = self._test_method_error('GET', '/bucket', swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('GET', '/bucket', swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('GET', '/bucket', swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchBucket') + code = self._test_method_error('GET', '/bucket', swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + + def test_bucket_GET(self): + bucket_name = 'junk' + req = Request.blank('/%s' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + objects = elem.iterchildren('Contents') + + names = [] + for o in objects: + names.append(o.find('./Key').text) + self.assertEqual('2011-01-05T02:19:14.275Z', + o.find('./LastModified').text) + self.assertEqual('"0"', o.find('./ETag').text) + + self.assertEqual(len(names), len(self.objects)) + for i in self.objects: + self.assertTrue(i[0] in names) + + def test_bucket_GET_subdir(self): + bucket_name = 'junk-subdir' + req = Request.blank('/%s' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + prefixes = elem.findall('CommonPrefixes') + + self.assertEqual(len(prefixes), len(self.prefixes)) + for p in prefixes: + self.assertTrue(p.find('./Prefix').text in self.prefixes) + + def test_bucket_GET_is_truncated(self): + bucket_name = 'junk' + + req = Request.blank('/%s?max-keys=5' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./IsTruncated').text, 'false') + + req = Request.blank('/%s?max-keys=4' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + req = Request.blank('/subdirs?delimiter=/&max-keys=2', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + self.assertEqual(elem.find('./NextMarker').text, 'but/') + + def test_bucket_GET_v2_is_truncated(self): + bucket_name = 'junk' + + req = Request.blank('/%s?list-type=2&max-keys=5' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./KeyCount').text, '5') + self.assertEqual(elem.find('./IsTruncated').text, 'false') + + req = Request.blank('/%s?list-type=2&max-keys=4' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertIsNotNone(elem.find('./NextContinuationToken')) + self.assertEqual(elem.find('./KeyCount').text, '4') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + req = Request.blank('/subdirs?list-type=2&delimiter=/&max-keys=2', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertIsNotNone(elem.find('./NextContinuationToken')) + self.assertEqual(elem.find('./KeyCount').text, '2') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + def test_bucket_GET_max_keys(self): + bucket_name = 'junk' + + req = Request.blank('/%s?max-keys=5' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./MaxKeys').text, '5') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['limit'], '6') + + req = Request.blank('/%s?max-keys=5000' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./MaxKeys').text, '5000') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['limit'], '1001') + + def test_bucket_GET_str_max_keys(self): + bucket_name = 'junk' + + req = Request.blank('/%s?max-keys=invalid' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_bucket_GET_negative_max_keys(self): + bucket_name = 'junk' + + req = Request.blank('/%s?max-keys=-1' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_bucket_GET_over_32bit_int_max_keys(self): + bucket_name = 'junk' + + req = Request.blank('/%s?max-keys=%s' % + (bucket_name, MAX_32BIT_INT + 1), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_bucket_GET_passthroughs(self): + bucket_name = 'junk' + req = Request.blank('/%s?delimiter=a&marker=b&prefix=c' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, 'c') + self.assertEqual(elem.find('./Marker').text, 'b') + self.assertEqual(elem.find('./Delimiter').text, 'a') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], 'a') + self.assertEqual(args['marker'], 'b') + self.assertEqual(args['prefix'], 'c') + + def test_bucket_GET_v2_passthroughs(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=a&start-after=b&prefix=c' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, 'c') + self.assertEqual(elem.find('./StartAfter').text, 'b') + self.assertEqual(elem.find('./Delimiter').text, 'a') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], 'a') + # "start-after" is converted to "marker" + self.assertEqual(args['marker'], 'b') + self.assertEqual(args['prefix'], 'c') + + def test_bucket_GET_with_nonascii_queries(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?delimiter=\xef\xbc\xa1&marker=\xef\xbc\xa2&' + 'prefix=\xef\xbc\xa3' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3') + self.assertEqual(elem.find('./Marker').text, '\xef\xbc\xa2') + self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], '\xef\xbc\xa1') + self.assertEqual(args['marker'], '\xef\xbc\xa2') + self.assertEqual(args['prefix'], '\xef\xbc\xa3') + + def test_bucket_GET_v2_with_nonascii_queries(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=\xef\xbc\xa1&start-after=\xef\xbc\xa2&' + 'prefix=\xef\xbc\xa3' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./Prefix').text, '\xef\xbc\xa3') + self.assertEqual(elem.find('./StartAfter').text, '\xef\xbc\xa2') + self.assertEqual(elem.find('./Delimiter').text, '\xef\xbc\xa1') + _, path = self.swift.calls[-1] + _, query_string = path.split('?') + args = dict(cgi.parse_qsl(query_string)) + self.assertEqual(args['delimiter'], '\xef\xbc\xa1') + self.assertEqual(args['marker'], '\xef\xbc\xa2') + self.assertEqual(args['prefix'], '\xef\xbc\xa3') + + def test_bucket_GET_with_delimiter_max_keys(self): + bucket_name = 'junk' + req = Request.blank('/%s?delimiter=a&max-keys=2' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./NextMarker').text, 'viola') + self.assertEqual(elem.find('./MaxKeys').text, '2') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + def test_bucket_GET_v2_with_delimiter_max_keys(self): + bucket_name = 'junk' + req = Request.blank( + '/%s?list-type=2&delimiter=a&max-keys=2' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + next_token = elem.find('./NextContinuationToken') + self.assertIsNotNone(next_token) + self.assertEqual(elem.find('./MaxKeys').text, '2') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + req = Request.blank( + '/%s?list-type=2&delimiter=a&max-keys=2&continuation-token=%s' % + (bucket_name, next_token.text), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + names = [o.find('./Key').text for o in elem.iterchildren('Contents')] + self.assertEqual(names[0], 'lily') + + def test_bucket_GET_subdir_with_delimiter_max_keys(self): + bucket_name = 'junk-subdir' + req = Request.blank('/%s?delimiter=a&max-keys=1' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListBucketResult') + self.assertEqual(elem.find('./NextMarker').text, 'rose') + self.assertEqual(elem.find('./MaxKeys').text, '1') + self.assertEqual(elem.find('./IsTruncated').text, 'true') + + def test_bucket_GET_v2_fetch_owner(self): + bucket_name = 'junk' + req = Request.blank('/%s?list-type=2' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + objects = elem.iterchildren('Contents') + for o in objects: + self.assertIsNone(o.find('./Owner')) + + req = Request.blank('/%s?list-type=2&fetch-owner=true' % bucket_name, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListBucketResult') + name = elem.find('./Name').text + self.assertEqual(name, bucket_name) + + objects = elem.iterchildren('Contents') + for o in objects: + self.assertIsNotNone(o.find('./Owner')) + + @s3acl + def test_bucket_PUT_error(self): + code = self._test_method_error('PUT', '/bucket', swob.HTTPCreated, + headers={'Content-Length': 'a'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket', swob.HTTPCreated, + headers={'Content-Length': '-1'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket', swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('PUT', '/bucket', swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('PUT', '/bucket', swob.HTTPAccepted) + self.assertEqual(code, 'BucketAlreadyExists') + code = self._test_method_error('PUT', '/bucket', swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + code = self._test_method_error( + 'PUT', '/bucket+bucket', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error( + 'PUT', '/192.168.11.1', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error( + 'PUT', '/bucket.-bucket', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error( + 'PUT', '/bucket-.bucket', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error('PUT', '/bucket*', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error('PUT', '/b', swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + code = self._test_method_error( + 'PUT', '/%s' % ''.join(['b' for x in xrange(64)]), + swob.HTTPCreated) + self.assertEqual(code, 'InvalidBucketName') + + @s3acl + def test_bucket_PUT(self): + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Location'], '/bucket') + + # Apparently some clients will include a chunked transfer-encoding + # even with no body + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Transfer-Encoding': 'chunked'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Location'], '/bucket') + + with UnreadableInput(self) as fake_input: + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': 'PUT', + 'wsgi.input': fake_input}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Location'], '/bucket') + + def _test_bucket_PUT_with_location(self, root_element): + elem = Element(root_element) + SubElement(elem, 'LocationConstraint').text = 'US' + xml = tostring(elem) + + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_bucket_PUT_with_location(self): + self._test_bucket_PUT_with_location('CreateBucketConfiguration') + + @s3acl + def test_bucket_PUT_with_ami_location(self): + # ec2-ami-tools apparently uses CreateBucketConstraint instead? + self._test_bucket_PUT_with_location('CreateBucketConstraint') + + @s3acl + def test_bucket_PUT_with_strange_location(self): + # Even crazier: it doesn't seem to matter + self._test_bucket_PUT_with_location('foo') + + def test_bucket_PUT_with_canned_acl(self): + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'X-Amz-Acl': 'public-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue('X-Container-Read' in headers) + self.assertEqual(headers.get('X-Container-Read'), '.r:*,.rlistings') + self.assertNotIn('X-Container-Sysmeta-S3api-Acl', headers) + + @s3acl(s3acl_only=True) + def test_bucket_PUT_with_canned_s3acl(self): + account = 'test:tester' + acl = \ + encode_acl('container', ACLPublicRead(Owner(account, account))) + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'X-Amz-Acl': 'public-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + _, _, headers = self.swift.calls_with_headers[-1] + self.assertNotIn('X-Container-Read', headers) + self.assertIn('X-Container-Sysmeta-S3api-Acl', headers) + self.assertEqual(headers.get('X-Container-Sysmeta-S3api-Acl'), + acl['x-container-sysmeta-s3api-acl']) + + @s3acl + def test_bucket_PUT_with_location_error(self): + elem = Element('CreateBucketConfiguration') + SubElement(elem, 'LocationConstraint').text = 'XXX' + xml = tostring(elem) + + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), + 'InvalidLocationConstraint') + + @s3acl + def test_bucket_PUT_with_location_invalid_xml(self): + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='invalid_xml') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedXML') + + def _test_method_error_delete(self, path, sw_resp): + self.swift.register('HEAD', '/v1/AUTH_test' + path, sw_resp, {}, None) + return self._test_method_error('DELETE', path, sw_resp) + + @s3acl + def test_bucket_DELETE_error(self): + code = self._test_method_error_delete('/bucket', swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error_delete('/bucket', swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error_delete('/bucket', swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchBucket') + code = self._test_method_error_delete('/bucket', swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + + # bucket not empty is now validated at s3api + self.swift.register('HEAD', '/v1/AUTH_test/bucket', swob.HTTPNoContent, + {'X-Container-Object-Count': '1'}, None) + code = self._test_method_error('DELETE', '/bucket', swob.HTTPConflict) + self.assertEqual(code, 'BucketNotEmpty') + + @s3acl + def test_bucket_DELETE(self): + # overwrite default HEAD to return x-container-object-count + self.swift.register( + 'HEAD', '/v1/AUTH_test/bucket', swob.HTTPNoContent, + {'X-Container-Object-Count': 0}, None) + + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '204') + + @s3acl + def test_bucket_DELETE_error_while_segment_bucket_delete(self): + # An error occurred while deleting segment objects + self.swift.register('DELETE', '/v1/AUTH_test/bucket+segments/lily', + swob.HTTPServiceUnavailable, {}, json.dumps([])) + # overwrite default HEAD to return x-container-object-count + self.swift.register( + 'HEAD', '/v1/AUTH_test/bucket', swob.HTTPNoContent, + {'X-Container-Object-Count': 0}, None) + + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '503') + called = [(method, path) for method, path, _ in + self.swift.calls_with_headers] + # Don't delete original bucket when error occurred in segment container + self.assertNotIn(('DELETE', '/v1/AUTH_test/bucket'), called) + + def _test_bucket_for_s3acl(self, method, account): + req = Request.blank('/bucket', + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}) + + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_bucket_GET_without_permission(self): + status, headers, body = self._test_bucket_for_s3acl('GET', + 'test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + @s3acl(s3acl_only=True) + def test_bucket_GET_with_read_permission(self): + status, headers, body = self._test_bucket_for_s3acl('GET', + 'test:read') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_bucket_GET_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_bucket_for_s3acl('GET', 'test:full_control') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_bucket_GET_with_owner_permission(self): + status, headers, body = self._test_bucket_for_s3acl('GET', + 'test:tester') + self.assertEqual(status.split()[0], '200') + + def _test_bucket_GET_canned_acl(self, bucket): + req = Request.blank('/%s' % bucket, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_bucket_GET_authenticated_users(self): + status, headers, body = \ + self._test_bucket_GET_canned_acl('authenticated') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_bucket_GET_all_users(self): + status, headers, body = self._test_bucket_GET_canned_acl('public') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_bucket_DELETE_without_permission(self): + status, headers, body = self._test_bucket_for_s3acl('DELETE', + 'test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + # Don't delete anything in backend Swift + called = [method for method, _, _ in self.swift.calls_with_headers] + self.assertNotIn('DELETE', called) + + @s3acl(s3acl_only=True) + def test_bucket_DELETE_with_write_permission(self): + status, headers, body = self._test_bucket_for_s3acl('DELETE', + 'test:write') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + # Don't delete anything in backend Swift + called = [method for method, _, _ in self.swift.calls_with_headers] + self.assertNotIn('DELETE', called) + + @s3acl(s3acl_only=True) + def test_bucket_DELETE_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_bucket_for_s3acl('DELETE', 'test:full_control') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + # Don't delete anything in backend Swift + called = [method for method, _, _ in self.swift.calls_with_headers] + self.assertNotIn('DELETE', called) + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_cfg.py swift-2.18.0/test/unit/common/middleware/s3api/test_cfg.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_cfg.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_cfg.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,44 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.middleware.s3api.utils import Config + + +class TestS3ApiCfg(unittest.TestCase): + def test_config(self): + conf = Config( + { + 'a': 'str', + 'b': 10, + 'c': True, + } + ) + + conf.update( + { + 'a': 'str2', + 'b': '100', + 'c': 'false', + } + ) + + self.assertEqual(conf['a'], 'str2') + self.assertEqual(conf['b'], 100) + self.assertEqual(conf['c'], False) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_etree.py swift-2.18.0/test/unit/common/middleware/s3api/test_etree.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_etree.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_etree.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,73 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.middleware.s3api import etree + + +class TestS3ApiEtree(unittest.TestCase): + def test_xml_namespace(self): + def test_xml(ns, prefix): + return '<%(prefix)sB>C' % \ + ({'ns': ns, 'prefix': prefix}) + + # No namespace is same as having the S3 namespace. + xml = test_xml('', '') + elem = etree.fromstring(xml) + self.assertEqual(elem.find('./B').text, 'C') + + # The S3 namespace is handled as no namespace. + xml = test_xml('xmlns="%s"' % etree.XMLNS_S3, '') + elem = etree.fromstring(xml) + self.assertEqual(elem.find('./B').text, 'C') + + xml = test_xml('xmlns:s3="%s"' % etree.XMLNS_S3, 's3:') + elem = etree.fromstring(xml) + self.assertEqual(elem.find('./B').text, 'C') + + # Any namespaces without a prefix work as no namespace. + xml = test_xml('xmlns="http://example.com/"', '') + elem = etree.fromstring(xml) + self.assertEqual(elem.find('./B').text, 'C') + + xml = test_xml('xmlns:s3="http://example.com/"', 's3:') + elem = etree.fromstring(xml) + self.assertIsNone(elem.find('./B')) + + def test_xml_with_comments(self): + xml = 'C' + elem = etree.fromstring(xml) + self.assertEqual(elem.find('./B').text, 'C') + + def test_tostring_with_nonascii_text(self): + elem = etree.Element('Test') + sub = etree.SubElement(elem, 'FOO') + sub.text = '\xef\xbc\xa1' + self.assertTrue(isinstance(sub.text, str)) + xml_string = etree.tostring(elem) + self.assertTrue(isinstance(xml_string, str)) + + def test_fromstring_with_nonascii_text(self): + input_str = '\n' \ + '\xef\xbc\xa1' + elem = etree.fromstring(input_str) + text = elem.find('FOO').text + self.assertEqual(text, '\xef\xbc\xa1') + self.assertTrue(isinstance(text, str)) + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_helpers.py swift-2.18.0/test/unit/common/middleware/s3api/test_helpers.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_helpers.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_helpers.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,69 @@ +# Copyright (c) 2013 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This stuff can't live in test/unit/__init__.py due to its swob dependency. + +import unittest +from test.unit.common.middleware.s3api.helpers import FakeSwift +from swift.common.middleware.s3api.utils import sysmeta_header +from swift.common.swob import HeaderKeyDict +from mock import MagicMock + + +class S3ApiHelperTestCase(unittest.TestCase): + def setUp(self): + self.method = 'HEAD' + self.path = '/v1/AUTH_test/bucket' + + def _check_headers(self, swift, method, path, headers): + _, response_headers, _ = swift._responses[(method, path)] + self.assertEqual(headers, response_headers) + + def test_fake_swift_sysmeta(self): + swift = FakeSwift() + orig_headers = HeaderKeyDict() + orig_headers.update({sysmeta_header('container', 'acl'): 'test', + 'x-container-meta-foo': 'bar'}) + + swift.register(self.method, self.path, MagicMock(), orig_headers, None) + + self._check_headers(swift, self.method, self.path, orig_headers) + + new_headers = orig_headers.copy() + del new_headers[sysmeta_header('container', 'acl').title()] + swift.register(self.method, self.path, MagicMock(), new_headers, None) + + self._check_headers(swift, self.method, self.path, orig_headers) + + def test_fake_swift_sysmeta_overwrite(self): + swift = FakeSwift() + orig_headers = HeaderKeyDict() + orig_headers.update({sysmeta_header('container', 'acl'): 'test', + 'x-container-meta-foo': 'bar'}) + swift.register(self.method, self.path, MagicMock(), orig_headers, None) + + self._check_headers(swift, self.method, self.path, orig_headers) + + new_headers = orig_headers.copy() + new_headers[sysmeta_header('container', 'acl').title()] = 'bar' + + swift.register(self.method, self.path, MagicMock(), new_headers, None) + + self.assertFalse(orig_headers == new_headers) + self._check_headers(swift, self.method, self.path, new_headers) + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_location.py swift-2.18.0/test/unit/common/middleware/s3api/test_location.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_location.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_location.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,51 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.swob import Request + +from test.unit.common.middleware.s3api import S3ApiTestCase +from swift.common.middleware.s3api.etree import fromstring + + +class TestS3ApiLocation(S3ApiTestCase): + + def test_object_location(self): + req = Request.blank('/bucket?location', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'LocationConstraint') + location = elem.text + self.assertIsNone(location) + + def test_object_location_setting_as_us_west_1(self): + self.s3api.conf.location = 'us-west-1' + req = Request.blank('/bucket?location', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'LocationConstraint') + location = elem.text + self.assertEqual(location, 'us-west-1') + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_logging.py swift-2.18.0/test/unit/common/middleware/s3api/test_logging.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_logging.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_logging.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,66 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.swob import Request + +from test.unit.common.middleware.s3api import S3ApiTestCase +from swift.common.middleware.s3api.etree import fromstring + + +class TestS3ApiLogging(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiLogging, self).setUp() + + def test_bucket_logging_GET(self): + req = Request.blank('/bucket?logging', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + xml = fromstring(body, 'BucketLoggingStatus') + self.assertEqual(xml.keys(), []) + self.assertEqual(status.split()[0], '200') + + def test_object_logging_GET_error(self): + req = Request.blank('/bucket/object?logging', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoLoggingStatusForKey') + + def test_bucket_logging_PUT(self): + req = Request.blank('/bucket?logging', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + # FIXME: Support PUT logging + # self.assertEqual(status, 201) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_object_logging_PUT_error(self): + req = Request.blank('/bucket/object?logging', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoLoggingStatusForKey') + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_multi_delete.py swift-2.18.0/test/unit/common/middleware/s3api/test_multi_delete.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_multi_delete.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_multi_delete.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,284 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from datetime import datetime +from hashlib import md5 + +from six.moves import urllib +from swift.common import swob +from swift.common.swob import Request + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.helpers import UnreadableInput +from swift.common.middleware.s3api.etree import fromstring, tostring, Element, \ + SubElement +from test.unit.common.middleware.s3api.test_s3_acl import s3acl + + +class TestS3ApiMultiDelete(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiMultiDelete, self).setUp() + self.swift.register('HEAD', '/v1/AUTH_test/bucket/Key1', + swob.HTTPOk, {}, None) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/Key2', + swob.HTTPNotFound, {}, None) + + @s3acl + def test_object_multi_DELETE_to_object(self): + elem = Element('Delete') + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = 'object' + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket/object?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_object_multi_DELETE(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket/Key3', + swob.HTTPOk, + {'x-static-large-object': 'True'}, + None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key1', + swob.HTTPNoContent, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key2', + swob.HTTPNotFound, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key3', + swob.HTTPOk, {}, None) + + elem = Element('Delete') + for key in ['Key1', 'Key2', 'Key3']: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Content-Type': 'multipart/form-data', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body) + self.assertEqual(len(elem.findall('Deleted')), 3) + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + self.assertEqual(path, '/v1/AUTH_test/bucket/Key3') + query = dict(urllib.parse.parse_qsl(query_string)) + self.assertEqual(query['multipart-manifest'], 'delete') + + @s3acl + def test_object_multi_DELETE_quiet(self): + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key1', + swob.HTTPNoContent, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key2', + swob.HTTPNotFound, {}, None) + + elem = Element('Delete') + SubElement(elem, 'Quiet').text = 'true' + for key in ['Key1', 'Key2']: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body) + self.assertEqual(len(elem.findall('Deleted')), 0) + + @s3acl + def test_object_multi_DELETE_no_key(self): + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key1', + swob.HTTPNoContent, {}, None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/Key2', + swob.HTTPNotFound, {}, None) + + elem = Element('Delete') + SubElement(elem, 'Quiet').text = 'true' + for key in ['Key1', 'Key2']: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key') + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'UserKeyMustBeSpecified') + + @s3acl + def test_object_multi_DELETE_with_invalid_md5(self): + elem = Element('Delete') + for key in ['Key1', 'Key2']: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + body = tostring(elem, use_s3ns=False) + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': 'XXXX'}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidDigest') + + @s3acl + def test_object_multi_DELETE_without_md5(self): + elem = Element('Delete') + for key in ['Key1', 'Key2']: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + body = tostring(elem, use_s3ns=False) + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_object_multi_DELETE_too_many_keys(self): + elem = Element('Delete') + for i in range(self.conf.max_multi_delete_objects + 1): + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = str(i) + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedXML') + + def _test_object_multi_DELETE(self, account): + self.keys = ['Key1', 'Key2'] + self.swift.register( + 'DELETE', '/v1/AUTH_test/bucket/%s' % self.keys[0], + swob.HTTPNoContent, {}, None) + self.swift.register( + 'DELETE', '/v1/AUTH_test/bucket/%s' % self.keys[1], + swob.HTTPNotFound, {}, None) + + elem = Element('Delete') + for key in self.keys: + obj = SubElement(elem, 'Object') + SubElement(obj, 'Key').text = key + body = tostring(elem, use_s3ns=False) + content_md5 = md5(body).digest().encode('base64').strip() + + req = Request.blank('/bucket?delete', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body=body) + req.date = datetime.now() + req.content_type = 'text/plain' + + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_object_multi_DELETE_without_permission(self): + status, headers, body = self._test_object_multi_DELETE('test:other') + self.assertEqual(status.split()[0], '200') + elem = fromstring(body) + errors = elem.findall('Error') + self.assertEqual(len(errors), len(self.keys)) + for e in errors: + self.assertTrue(e.find('Key').text in self.keys) + self.assertEqual(e.find('Code').text, 'AccessDenied') + self.assertEqual(e.find('Message').text, 'Access Denied.') + + @s3acl(s3acl_only=True) + def test_object_multi_DELETE_with_write_permission(self): + status, headers, body = self._test_object_multi_DELETE('test:write') + self.assertEqual(status.split()[0], '200') + elem = fromstring(body) + self.assertEqual(len(elem.findall('Deleted')), len(self.keys)) + + @s3acl(s3acl_only=True) + def test_object_multi_DELETE_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_object_multi_DELETE('test:full_control') + self.assertEqual(status.split()[0], '200') + elem = fromstring(body) + self.assertEqual(len(elem.findall('Deleted')), len(self.keys)) + + def _test_no_body(self, use_content_length=False, + use_transfer_encoding=False, string_to_md5=''): + content_md5 = md5(string_to_md5).digest().encode('base64').strip() + with UnreadableInput(self) as fake_input: + req = Request.blank( + '/bucket?delete', + environ={ + 'REQUEST_METHOD': 'POST', + 'wsgi.input': fake_input}, + headers={ + 'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body='') + if not use_content_length: + req.environ.pop('CONTENT_LENGTH') + if use_transfer_encoding: + req.environ['HTTP_TRANSFER_ENCODING'] = 'chunked' + status, headers, body = self.call_s3api(req) + self.assertEqual(status, '400 Bad Request') + self.assertEqual(self._get_error_code(body), 'MissingRequestBodyError') + + @s3acl + def test_object_multi_DELETE_empty_body(self): + self._test_no_body() + self._test_no_body(string_to_md5='test') + self._test_no_body(use_content_length=True) + self._test_no_body(use_content_length=True, string_to_md5='test') + self._test_no_body(use_transfer_encoding=True) + self._test_no_body(use_transfer_encoding=True, string_to_md5='test') + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_multi_upload.py swift-2.18.0/test/unit/common/middleware/s3api/test_multi_upload.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_multi_upload.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_multi_upload.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1742 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +from hashlib import md5 +from mock import patch +import os +import time +import unittest +from urllib import quote + +from swift.common import swob +from swift.common.swob import Request +from swift.common.utils import json + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.helpers import UnreadableInput +from swift.common.middleware.s3api.etree import fromstring, tostring +from swift.common.middleware.s3api.subresource import Owner, Grant, User, ACL, \ + encode_acl, decode_acl, ACLPublicRead +from test.unit.common.middleware.s3api.test_s3_acl import s3acl +from swift.common.middleware.s3api.utils import sysmeta_header, mktime, \ + S3Timestamp +from swift.common.middleware.s3api.s3request import MAX_32BIT_INT +from swift.common.middleware.s3api.controllers.multi_upload import \ + MULTIUPLOAD_SUFFIX + +xml = '' \ + '' \ + '1' \ + 'HASH' \ + '' \ + '' \ + '2' \ + '"HASH"' \ + '' \ + '' + +objects_template = \ + (('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 100), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 200)) + +multiparts_template = \ + (('object/X', '2014-05-07T19:47:50.592270', 'HASH', 1), + ('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 11), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 21), + ('object/Y', '2014-05-07T19:47:53.592270', 'HASH', 2), + ('object/Y/1', '2014-05-07T19:47:54.592270', 'HASH', 12), + ('object/Y/2', '2014-05-07T19:47:55.592270', 'HASH', 22), + ('object/Z', '2014-05-07T19:47:56.592270', 'HASH', 3), + ('object/Z/1', '2014-05-07T19:47:57.592270', 'HASH', 13), + ('object/Z/2', '2014-05-07T19:47:58.592270', 'HASH', 23), + ('subdir/object/Z', '2014-05-07T19:47:58.592270', 'HASH', 4), + ('subdir/object/Z/1', '2014-05-07T19:47:58.592270', 'HASH', 41), + ('subdir/object/Z/2', '2014-05-07T19:47:58.592270', 'HASH', 41)) + + +class TestS3ApiMultiUpload(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiMultiUpload, self).setUp() + + segment_bucket = '/v1/AUTH_test/bucket+segments' + self.etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + self.last_modified = 'Fri, 01 Apr 2014 12:00:00 GMT' + put_headers = {'etag': self.etag, 'last-modified': self.last_modified} + + self.s3api.conf.min_segment_size = 1 + + objects = map(lambda item: {'name': item[0], 'last_modified': item[1], + 'hash': item[2], 'bytes': item[3]}, + objects_template) + object_list = json.dumps(objects) + + self.swift.register('PUT', segment_bucket, + swob.HTTPAccepted, {}, None) + self.swift.register('GET', segment_bucket, swob.HTTPOk, {}, + object_list) + self.swift.register('HEAD', segment_bucket + '/object/X', + swob.HTTPOk, + {'x-object-meta-foo': 'bar', + 'content-type': 'application/directory', + 'x-object-sysmeta-s3api-has-content-type': 'yes', + 'x-object-sysmeta-s3api-content-type': + 'baz/quux'}, None) + self.swift.register('PUT', segment_bucket + '/object/X', + swob.HTTPCreated, {}, None) + self.swift.register('DELETE', segment_bucket + '/object/X', + swob.HTTPNoContent, {}, None) + self.swift.register('GET', segment_bucket + '/object/invalid', + swob.HTTPNotFound, {}, None) + self.swift.register('PUT', segment_bucket + '/object/X/1', + swob.HTTPCreated, put_headers, None) + self.swift.register('DELETE', segment_bucket + '/object/X/1', + swob.HTTPNoContent, {}, None) + self.swift.register('DELETE', segment_bucket + '/object/X/2', + swob.HTTPNoContent, {}, None) + + @s3acl + def test_bucket_upload_part(self): + req = Request.blank('/bucket?partNumber=1&uploadId=x', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_object_multipart_uploads_list(self): + req = Request.blank('/bucket/object?uploads', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_bucket_multipart_uploads_initiate(self): + req = Request.blank('/bucket?uploads', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_bucket_list_parts(self): + req = Request.blank('/bucket?uploadId=x', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_bucket_multipart_uploads_abort(self): + req = Request.blank('/bucket?uploadId=x', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + @s3acl + def test_bucket_multipart_uploads_complete(self): + req = Request.blank('/bucket?uploadId=x', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + def _test_bucket_multipart_uploads_GET(self, query=None, + multiparts=None): + segment_bucket = '/v1/AUTH_test/bucket+segments' + objects = multiparts or multiparts_template + objects = map(lambda item: {'name': item[0], 'last_modified': item[1], + 'hash': item[2], 'bytes': item[3]}, + objects) + object_list = json.dumps(objects) + self.swift.register('GET', segment_bucket, swob.HTTPOk, {}, + object_list) + + query = '?uploads&' + query if query else '?uploads' + req = Request.blank('/bucket/%s' % query, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + @s3acl + def test_bucket_multipart_uploads_GET(self): + status, headers, body = self._test_bucket_multipart_uploads_GET() + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(elem.find('Bucket').text, 'bucket') + self.assertIsNone(elem.find('KeyMarker').text) + self.assertIsNone(elem.find('UploadIdMarker').text) + self.assertEqual(elem.find('NextUploadIdMarker').text, 'Z') + self.assertEqual(elem.find('MaxUploads').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Upload')), 4) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts_template] + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + self.assertEqual(u.find('Initiator/ID').text, 'test:tester') + self.assertEqual(u.find('Initiator/DisplayName').text, + 'test:tester') + self.assertEqual(u.find('Owner/ID').text, 'test:tester') + self.assertEqual(u.find('Owner/DisplayName').text, 'test:tester') + self.assertEqual(u.find('StorageClass').text, 'STANDARD') + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_bucket_multipart_uploads_GET_without_segment_bucket(self): + segment_bucket = '/v1/AUTH_test/bucket+segments' + self.swift.register('GET', segment_bucket, swob.HTTPNotFound, {}, '') + + req = Request.blank('/bucket?uploads', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + + status, haeaders, body = self.call_s3api(req) + + self.assertEqual(status.split()[0], '200') + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(elem.find('Bucket').text, 'bucket') + self.assertIsNone(elem.find('KeyMarker').text) + self.assertIsNone(elem.find('UploadIdMarker').text) + self.assertIsNone(elem.find('NextUploadIdMarker').text) + self.assertEqual(elem.find('MaxUploads').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Upload')), 0) + + @s3acl + @patch('swift.common.middleware.s3api.s3request.get_container_info', + lambda x, y: {'status': 404}) + def test_bucket_multipart_uploads_GET_without_bucket(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket', + swob.HTTPNotFound, {}, '') + req = Request.blank('/bucket?uploads', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, haeaders, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + @s3acl + def test_bucket_multipart_uploads_GET_encoding_type_error(self): + query = 'encoding-type=xml' + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + @s3acl + def test_bucket_multipart_uploads_GET_maxuploads(self): + query = 'max-uploads=2' + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload/UploadId')), 2) + self.assertEqual(elem.find('NextKeyMarker').text, 'object') + self.assertEqual(elem.find('NextUploadIdMarker').text, 'Y') + self.assertEqual(elem.find('MaxUploads').text, '2') + self.assertEqual(elem.find('IsTruncated').text, 'true') + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_bucket_multipart_uploads_GET_str_maxuploads(self): + query = 'max-uploads=invalid' + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + @s3acl + def test_bucket_multipart_uploads_GET_negative_maxuploads(self): + query = 'max-uploads=-1' + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + @s3acl + def test_bucket_multipart_uploads_GET_maxuploads_over_default(self): + query = 'max-uploads=1001' + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload/UploadId')), 4) + self.assertEqual(elem.find('NextKeyMarker').text, 'subdir/object') + self.assertEqual(elem.find('NextUploadIdMarker').text, 'Z') + self.assertEqual(elem.find('MaxUploads').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_bucket_multipart_uploads_GET_maxuploads_over_max_32bit_int(self): + query = 'max-uploads=%s' % (MAX_32BIT_INT + 1) + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + @s3acl + def test_bucket_multipart_uploads_GET_with_id_and_key_marker(self): + query = 'upload-id-marker=Y&key-marker=object' + multiparts = \ + (('object/Y', '2014-05-07T19:47:53.592270', 'HASH', 2), + ('object/Y/1', '2014-05-07T19:47:54.592270', 'HASH', 12), + ('object/Y/2', '2014-05-07T19:47:55.592270', 'HASH', 22)) + + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(elem.find('KeyMarker').text, 'object') + self.assertEqual(elem.find('UploadIdMarker').text, 'Y') + self.assertEqual(len(elem.findall('Upload')), 1) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts] + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + self.assertEqual(status.split()[0], '200') + + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertEqual(query['marker'], 'object/Y') + + @s3acl + def test_bucket_multipart_uploads_GET_with_key_marker(self): + query = 'key-marker=object' + multiparts = \ + (('object/X', '2014-05-07T19:47:50.592270', 'HASH', 1), + ('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 11), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 21), + ('object/Y', '2014-05-07T19:47:53.592270', 'HASH', 2), + ('object/Y/1', '2014-05-07T19:47:54.592270', 'HASH', 12), + ('object/Y/2', '2014-05-07T19:47:55.592270', 'HASH', 22)) + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(elem.find('KeyMarker').text, 'object') + self.assertEqual(elem.find('NextKeyMarker').text, 'object') + self.assertEqual(elem.find('NextUploadIdMarker').text, 'Y') + self.assertEqual(len(elem.findall('Upload')), 2) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts] + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + self.assertEqual(status.split()[0], '200') + + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertEqual(query['marker'], quote('object/~')) + + @s3acl + def test_bucket_multipart_uploads_GET_with_prefix(self): + query = 'prefix=X' + multiparts = \ + (('object/X', '2014-05-07T19:47:50.592270', 'HASH', 1), + ('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 11), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 21)) + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload')), 1) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts] + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + self.assertEqual(status.split()[0], '200') + + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertEqual(query['prefix'], 'X') + + @s3acl + def test_bucket_multipart_uploads_GET_with_delimiter(self): + query = 'delimiter=/' + multiparts = \ + (('object/X', '2014-05-07T19:47:50.592270', 'HASH', 1), + ('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 11), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 21), + ('object/Y', '2014-05-07T19:47:50.592270', 'HASH', 2), + ('object/Y/1', '2014-05-07T19:47:51.592270', 'HASH', 21), + ('object/Y/2', '2014-05-07T19:47:52.592270', 'HASH', 22), + ('object/Z', '2014-05-07T19:47:50.592270', 'HASH', 3), + ('object/Z/1', '2014-05-07T19:47:51.592270', 'HASH', 31), + ('object/Z/2', '2014-05-07T19:47:52.592270', 'HASH', 32), + ('subdir/object/X', '2014-05-07T19:47:50.592270', 'HASH', 4), + ('subdir/object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 41), + ('subdir/object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 42), + ('subdir/object/Y', '2014-05-07T19:47:50.592270', 'HASH', 5), + ('subdir/object/Y/1', '2014-05-07T19:47:51.592270', 'HASH', 51), + ('subdir/object/Y/2', '2014-05-07T19:47:52.592270', 'HASH', 52), + ('subdir2/object/Z', '2014-05-07T19:47:50.592270', 'HASH', 6), + ('subdir2/object/Z/1', '2014-05-07T19:47:51.592270', 'HASH', 61), + ('subdir2/object/Z/2', '2014-05-07T19:47:52.592270', 'HASH', 62)) + + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload')), 3) + self.assertEqual(len(elem.findall('CommonPrefixes')), 2) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts + if o[0].startswith('o')] + prefixes = set([o[0].split('/')[0] + '/' for o in multiparts + if o[0].startswith('s')]) + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + for p in elem.findall('CommonPrefixes'): + prefix = p.find('Prefix').text + self.assertTrue(prefix in prefixes) + + self.assertEqual(status.split()[0], '200') + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertTrue(query.get('delimiter') is None) + + @s3acl + def test_bucket_multipart_uploads_GET_with_multi_chars_delimiter(self): + query = 'delimiter=subdir' + multiparts = \ + (('object/X', '2014-05-07T19:47:50.592270', 'HASH', 1), + ('object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 11), + ('object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 21), + ('dir/subdir/object/X', '2014-05-07T19:47:50.592270', + 'HASH', 3), + ('dir/subdir/object/X/1', '2014-05-07T19:47:51.592270', + 'HASH', 31), + ('dir/subdir/object/X/2', '2014-05-07T19:47:52.592270', + 'HASH', 32), + ('subdir/object/X', '2014-05-07T19:47:50.592270', 'HASH', 4), + ('subdir/object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 41), + ('subdir/object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 42), + ('subdir/object/Y', '2014-05-07T19:47:50.592270', 'HASH', 5), + ('subdir/object/Y/1', '2014-05-07T19:47:51.592270', 'HASH', 51), + ('subdir/object/Y/2', '2014-05-07T19:47:52.592270', 'HASH', 52), + ('subdir2/object/Z', '2014-05-07T19:47:50.592270', 'HASH', 6), + ('subdir2/object/Z/1', '2014-05-07T19:47:51.592270', 'HASH', 61), + ('subdir2/object/Z/2', '2014-05-07T19:47:52.592270', 'HASH', 62)) + + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload')), 1) + self.assertEqual(len(elem.findall('CommonPrefixes')), 2) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts + if o[0].startswith('object')] + prefixes = ('dir/subdir', 'subdir') + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + for p in elem.findall('CommonPrefixes'): + prefix = p.find('Prefix').text + self.assertTrue(prefix in prefixes) + + self.assertEqual(status.split()[0], '200') + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertTrue(query.get('delimiter') is None) + + @s3acl + def test_bucket_multipart_uploads_GET_with_prefix_and_delimiter(self): + query = 'prefix=dir/&delimiter=/' + multiparts = \ + (('dir/subdir/object/X', '2014-05-07T19:47:50.592270', + 'HASH', 4), + ('dir/subdir/object/X/1', '2014-05-07T19:47:51.592270', + 'HASH', 41), + ('dir/subdir/object/X/2', '2014-05-07T19:47:52.592270', + 'HASH', 42), + ('dir/object/X', '2014-05-07T19:47:50.592270', 'HASH', 5), + ('dir/object/X/1', '2014-05-07T19:47:51.592270', 'HASH', 51), + ('dir/object/X/2', '2014-05-07T19:47:52.592270', 'HASH', 52)) + + status, headers, body = \ + self._test_bucket_multipart_uploads_GET(query, multiparts) + elem = fromstring(body, 'ListMultipartUploadsResult') + self.assertEqual(len(elem.findall('Upload')), 1) + self.assertEqual(len(elem.findall('CommonPrefixes')), 1) + objects = [(o[0], o[1][:-3] + 'Z') for o in multiparts + if o[0].startswith('dir/o')] + prefixes = ['dir/subdir/'] + for u in elem.findall('Upload'): + name = u.find('Key').text + '/' + u.find('UploadId').text + initiated = u.find('Initiated').text + self.assertTrue((name, initiated) in objects) + for p in elem.findall('CommonPrefixes'): + prefix = p.find('Prefix').text + self.assertTrue(prefix in prefixes) + + self.assertEqual(status.split()[0], '200') + _, path, _ = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['format'], 'json') + self.assertEqual(query['limit'], '1001') + self.assertEqual(query['prefix'], 'dir/') + self.assertTrue(query.get('delimiter') is None) + + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def _test_object_multipart_upload_initiate(self, headers): + headers.update({ + 'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-meta-foo': 'bar', + }) + req = Request.blank('/bucket/object?uploads', + environ={'REQUEST_METHOD': 'POST'}, + headers=headers) + status, headers, body = self.call_s3api(req) + fromstring(body, 'InitiateMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, req_headers = self.swift.calls_with_headers[-1] + self.assertEqual(req_headers.get('X-Object-Meta-Foo'), 'bar') + self.assertNotIn('Etag', req_headers) + self.assertNotIn('Content-MD5', req_headers) + method, path, _ = self.swift.calls_with_headers[-2] + self.assertEqual(method, 'PUT') + self.assertEqual( + path, + '/v1/AUTH_test/bucket%s' % MULTIUPLOAD_SUFFIX) + + def test_object_multipart_upload_initiate(self): + self._test_object_multipart_upload_initiate({}) + self._test_object_multipart_upload_initiate({'Etag': 'blahblahblah'}) + self._test_object_multipart_upload_initiate({ + 'Content-MD5': base64.b64encode('blahblahblahblah').strip()}) + + @s3acl(s3acl_only=True) + @patch('swift.common.middleware.s3api.controllers.multi_upload.' + 'unique_id', lambda: 'X') + def test_object_multipart_upload_initiate_s3acl(self): + req = Request.blank('/bucket/object?uploads', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': + 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'public-read', + 'x-amz-meta-foo': 'bar', + 'Content-Type': 'cat/picture'}) + status, headers, body = self.call_s3api(req) + fromstring(body, 'InitiateMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, req_headers = self.swift.calls_with_headers[-1] + self.assertEqual(req_headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(req_headers.get( + 'X-Object-Sysmeta-S3api-Has-Content-Type'), 'yes') + self.assertEqual(req_headers.get( + 'X-Object-Sysmeta-S3api-Content-Type'), 'cat/picture') + tmpacl_header = req_headers.get(sysmeta_header('object', 'tmpacl')) + self.assertTrue(tmpacl_header) + acl_header = encode_acl('object', + ACLPublicRead(Owner('test:tester', + 'test:tester'))) + self.assertEqual(acl_header.get(sysmeta_header('object', 'acl')), + tmpacl_header) + + @s3acl(s3acl_only=True) + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def test_object_multipart_upload_initiate_no_content_type(self): + req = Request.blank('/bucket/object?uploads', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': + 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'public-read', + 'x-amz-meta-foo': 'bar'}) + status, headers, body = self.call_s3api(req) + fromstring(body, 'InitiateMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, req_headers = self.swift.calls_with_headers[-1] + self.assertEqual(req_headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(req_headers.get( + 'X-Object-Sysmeta-S3api-Has-Content-Type'), 'no') + tmpacl_header = req_headers.get(sysmeta_header('object', 'tmpacl')) + self.assertTrue(tmpacl_header) + acl_header = encode_acl('object', + ACLPublicRead(Owner('test:tester', + 'test:tester'))) + self.assertEqual(acl_header.get(sysmeta_header('object', 'acl')), + tmpacl_header) + + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def test_object_multipart_upload_initiate_without_bucket(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket', + swob.HTTPNotFound, {}, None) + req = Request.blank('/bucket/object?uploads', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': + 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + @s3acl + def test_object_multipart_upload_complete_error(self): + malformed_xml = 'malformed_XML' + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=malformed_xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedXML') + + # without target bucket + req = Request.blank('/nobucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + lambda x, y: {'status': 404}): + self.swift.register('HEAD', '/v1/AUTH_test/nobucket', + swob.HTTPNotFound, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + def test_object_multipart_upload_complete(self): + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(headers.get('Content-Type'), 'baz/quux') + + def test_object_multipart_upload_complete_404_on_marker_delete(self): + segment_bucket = '/v1/AUTH_test/bucket+segments' + self.swift.register('DELETE', segment_bucket + '/object/X', + swob.HTTPNotFound, {}, None) + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + fromstring(body, 'CompleteMultipartUploadResult') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(headers.get('Content-Type'), 'baz/quux') + + def test_object_multipart_upload_complete_old_content_type(self): + self.swift.register_unconditionally( + 'HEAD', '/v1/AUTH_test/bucket+segments/object/X', + swob.HTTPOk, {"Content-Type": "thingy/dingy"}, None) + + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers.get('Content-Type'), 'thingy/dingy') + + def test_object_multipart_upload_complete_no_content_type(self): + self.swift.register_unconditionally( + 'HEAD', '/v1/AUTH_test/bucket+segments/object/X', + swob.HTTPOk, {"X-Object-Sysmeta-S3api-Has-Content-Type": "no"}, + None) + + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertNotIn('Content-Type', headers) + + def test_object_multipart_upload_complete_weird_host_name(self): + # This happens via boto signature v4 + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST', + 'HTTP_HOST': 'localhost:8080:8080'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers.get('X-Object-Meta-Foo'), 'bar') + + def test_object_multipart_upload_complete_segment_too_small(self): + msg = 'Index 0: too small; each segment must be at least 1 byte.' + + req = Request.blank( + '/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + + self.swift.register('PUT', '/v1/AUTH_test/bucket/object', + swob.HTTPBadRequest, {}, msg) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '400') + self.assertEqual(self._get_error_code(body), 'EntityTooSmall') + self.assertEqual(self._get_error_message(body), msg) + + self.swift.clear_calls() + self.s3api.conf.min_segment_size = 5242880 + req = Request.blank( + '/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '400') + self.assertEqual(self._get_error_code(body), 'EntityTooSmall') + self.assertEqual(self._get_error_message(body), + 'Your proposed upload is smaller than the minimum ' + 'allowed object size.') + self.assertNotIn('PUT', [method for method, _ in self.swift.calls]) + + def test_object_multipart_upload_complete_single_zero_length_segment(self): + segment_bucket = '/v1/AUTH_test/empty-bucket+segments' + put_headers = {'etag': self.etag, 'last-modified': self.last_modified} + + object_list = [{ + 'name': 'object/X/1', + 'last_modified': self.last_modified, + 'hash': 'd41d8cd98f00b204e9800998ecf8427e', + 'bytes': '0', + }] + + self.swift.register('GET', segment_bucket, swob.HTTPOk, {}, + json.dumps(object_list)) + self.swift.register('HEAD', '/v1/AUTH_test/empty-bucket', + swob.HTTPNoContent, {}, None) + self.swift.register('HEAD', segment_bucket + '/object/X', + swob.HTTPOk, {'x-object-meta-foo': 'bar', + 'content-type': 'baz/quux'}, None) + self.swift.register('PUT', '/v1/AUTH_test/empty-bucket/object', + swob.HTTPCreated, {}, None) + self.swift.register('DELETE', segment_bucket + '/object/X/1', + swob.HTTPOk, {}, None) + self.swift.register('DELETE', segment_bucket + '/object/X', + swob.HTTPOk, {}, None) + + xml = '' \ + '' \ + '1' \ + 'd41d8cd98f00b204e9800998ecf8427e' \ + '' \ + '' + + req = Request.blank('/empty-bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + self.assertEqual(self.swift.calls, [ + ('HEAD', '/v1/AUTH_test/empty-bucket'), + ('HEAD', '/v1/AUTH_test/empty-bucket+segments/object/X'), + ('GET', '/v1/AUTH_test/empty-bucket+segments?delimiter=/&' + 'format=json&prefix=object/X/'), + ('PUT', + '/v1/AUTH_test/empty-bucket/object?multipart-manifest=put'), + ('DELETE', '/v1/AUTH_test/empty-bucket+segments/object/X'), + ]) + _, _, put_headers = self.swift.calls_with_headers[-2] + self.assertEqual(put_headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(put_headers.get('Content-Type'), 'baz/quux') + + def test_object_multipart_upload_complete_double_zero_length_segment(self): + segment_bucket = '/v1/AUTH_test/empty-bucket+segments' + + object_list = [{ + 'name': 'object/X/1', + 'last_modified': self.last_modified, + 'hash': 'd41d8cd98f00b204e9800998ecf8427e', + 'bytes': '0', + }, { + 'name': 'object/X/2', + 'last_modified': self.last_modified, + 'hash': 'd41d8cd98f00b204e9800998ecf8427e', + 'bytes': '0', + }] + + self.swift.register('GET', segment_bucket, swob.HTTPOk, {}, + json.dumps(object_list)) + self.swift.register('HEAD', '/v1/AUTH_test/empty-bucket', + swob.HTTPNoContent, {}, None) + self.swift.register('HEAD', segment_bucket + '/object/X', + swob.HTTPOk, {'x-object-meta-foo': 'bar', + 'content-type': 'baz/quux'}, None) + + xml = '' \ + '' \ + '1' \ + 'd41d8cd98f00b204e9800998ecf8427e' \ + '' \ + '' \ + '2' \ + 'd41d8cd98f00b204e9800998ecf8427e' \ + '' \ + '' + + req = Request.blank('/empty-bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'EntityTooSmall') + self.assertEqual(status.split()[0], '400') + + self.assertEqual(self.swift.calls, [ + ('HEAD', '/v1/AUTH_test/empty-bucket'), + ('HEAD', '/v1/AUTH_test/empty-bucket+segments/object/X'), + ('GET', '/v1/AUTH_test/empty-bucket+segments?delimiter=/&' + 'format=json&prefix=object/X/'), + ]) + + def test_object_multipart_upload_complete_zero_length_final_segment(self): + segment_bucket = '/v1/AUTH_test/bucket+segments' + + object_list = [{ + 'name': 'object/X/1', + 'last_modified': self.last_modified, + 'hash': 'some hash', + 'bytes': '100', + }, { + 'name': 'object/X/2', + 'last_modified': self.last_modified, + 'hash': 'some other hash', + 'bytes': '1', + }, { + 'name': 'object/X/3', + 'last_modified': self.last_modified, + 'hash': 'd41d8cd98f00b204e9800998ecf8427e', + 'bytes': '0', + }] + + self.swift.register('GET', segment_bucket, swob.HTTPOk, {}, + json.dumps(object_list)) + self.swift.register('HEAD', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + self.swift.register('HEAD', segment_bucket + '/object/X', + swob.HTTPOk, {'x-object-meta-foo': 'bar', + 'content-type': 'baz/quux'}, None) + self.swift.register('DELETE', segment_bucket + '/object/X/3', + swob.HTTPNoContent, {}, None) + + xml = '' \ + '' \ + '1' \ + 'some hash' \ + '' \ + '' \ + '2' \ + 'some other hash' \ + '' \ + '' \ + '3' \ + 'd41d8cd98f00b204e9800998ecf8427e' \ + '' \ + '' + + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), }, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + self.assertEqual(self.swift.calls, [ + ('HEAD', '/v1/AUTH_test/bucket'), + ('HEAD', '/v1/AUTH_test/bucket+segments/object/X'), + ('GET', '/v1/AUTH_test/bucket+segments?delimiter=/&' + 'format=json&prefix=object/X/'), + ('PUT', '/v1/AUTH_test/bucket/object?multipart-manifest=put'), + ('DELETE', '/v1/AUTH_test/bucket+segments/object/X'), + ]) + + @s3acl(s3acl_only=True) + def test_object_multipart_upload_complete_s3acl(self): + acl_headers = encode_acl('object', ACLPublicRead(Owner('test:tester', + 'test:tester'))) + headers = {} + headers[sysmeta_header('object', 'tmpacl')] = \ + acl_headers.get(sysmeta_header('object', 'acl')) + headers['X-Object-Meta-Foo'] = 'bar' + headers['Content-Type'] = 'baz/quux' + self.swift.register('HEAD', '/v1/AUTH_test/bucket+segments/object/X', + swob.HTTPOk, headers, None) + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + fromstring(body, 'CompleteMultipartUploadResult') + self.assertEqual(status.split()[0], '200') + + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers.get('X-Object-Meta-Foo'), 'bar') + self.assertEqual(headers.get('Content-Type'), 'baz/quux') + self.assertEqual( + tostring(ACLPublicRead(Owner('test:tester', + 'test:tester')).elem()), + tostring(decode_acl('object', headers, False).elem())) + + @s3acl + def test_object_multipart_upload_abort_error(self): + req = Request.blank('/bucket/object?uploadId=invalid', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchUpload') + + # without target bucket + req = Request.blank('/nobucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + lambda x, y: {'status': 404}): + self.swift.register('HEAD', '/v1/AUTH_test/nobucket', + swob.HTTPNotFound, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + @s3acl + def test_object_multipart_upload_abort(self): + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '204') + + @s3acl + @patch('swift.common.middleware.s3api.s3request.' + 'get_container_info', lambda x, y: {'status': 204}) + def test_object_upload_part_error(self): + # without upload id + req = Request.blank('/bucket/object?partNumber=1', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + # invalid part number + req = Request.blank('/bucket/object?partNumber=invalid&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + # part number must be > 0 + req = Request.blank('/bucket/object?partNumber=0&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + # part number must be < 1001 + req = Request.blank('/bucket/object?partNumber=1001&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + # without target bucket + req = Request.blank('/nobucket/object?partNumber=1&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + lambda x, y: {'status': 404}): + self.swift.register('HEAD', '/v1/AUTH_test/nobucket', + swob.HTTPNotFound, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + @s3acl + def test_object_upload_part(self): + req = Request.blank('/bucket/object?partNumber=1&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body='part object') + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + @s3acl + def test_object_list_parts_error(self): + req = Request.blank('/bucket/object?uploadId=invalid', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchUpload') + + # without target bucket + req = Request.blank('/nobucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + lambda x, y: {'status': 404}): + self.swift.register('HEAD', '/v1/AUTH_test/nobucket', + swob.HTTPNotFound, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NoSuchBucket') + + @s3acl + def test_object_list_parts(self): + req = Request.blank('/bucket/object?uploadId=X', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('Bucket').text, 'bucket') + self.assertEqual(elem.find('Key').text, 'object') + self.assertEqual(elem.find('UploadId').text, 'X') + self.assertEqual(elem.find('Initiator/ID').text, 'test:tester') + self.assertEqual(elem.find('Initiator/ID').text, 'test:tester') + self.assertEqual(elem.find('Owner/ID').text, 'test:tester') + self.assertEqual(elem.find('Owner/ID').text, 'test:tester') + self.assertEqual(elem.find('StorageClass').text, 'STANDARD') + self.assertEqual(elem.find('PartNumberMarker').text, '0') + self.assertEqual(elem.find('NextPartNumberMarker').text, '2') + self.assertEqual(elem.find('MaxParts').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Part')), 2) + for p in elem.findall('Part'): + partnum = int(p.find('PartNumber').text) + self.assertEqual(p.find('LastModified').text, + objects_template[partnum - 1][1][:-3] + + 'Z') + self.assertEqual(p.find('ETag').text.strip(), + '"%s"' % objects_template[partnum - 1][2]) + self.assertEqual(p.find('Size').text, + str(objects_template[partnum - 1][3])) + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_encoding_type(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket+segments/object@@/X', + swob.HTTPOk, {}, None) + req = Request.blank('/bucket/object@@?uploadId=X&encoding-type=url', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('Key').text, quote('object@@')) + self.assertEqual(elem.find('EncodingType').text, 'url') + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_without_encoding_type(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket+segments/object@@/X', + swob.HTTPOk, {}, None) + req = Request.blank('/bucket/object@@?uploadId=X', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('Key').text, 'object@@') + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_encoding_type_error(self): + req = Request.blank('/bucket/object?uploadId=X&encoding-type=xml', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_max_parts(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=1', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('IsTruncated').text, 'true') + self.assertEqual(len(elem.findall('Part')), 1) + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_str_max_parts(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=invalid', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_negative_max_parts(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=-1', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_over_max_parts(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=%d' % + (self.s3api.conf.max_parts_listing + 1), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(elem.find('Bucket').text, 'bucket') + self.assertEqual(elem.find('Key').text, 'object') + self.assertEqual(elem.find('UploadId').text, 'X') + self.assertEqual(elem.find('Initiator/ID').text, 'test:tester') + self.assertEqual(elem.find('Owner/ID').text, 'test:tester') + self.assertEqual(elem.find('StorageClass').text, 'STANDARD') + self.assertEqual(elem.find('PartNumberMarker').text, '0') + self.assertEqual(elem.find('NextPartNumberMarker').text, '2') + self.assertEqual(elem.find('MaxParts').text, '1000') + self.assertEqual(elem.find('IsTruncated').text, 'false') + self.assertEqual(len(elem.findall('Part')), 2) + for p in elem.findall('Part'): + partnum = int(p.find('PartNumber').text) + self.assertEqual(p.find('LastModified').text, + objects_template[partnum - 1][1][:-3] + + 'Z') + self.assertEqual(p.find('ETag').text, + '"%s"' % objects_template[partnum - 1][2]) + self.assertEqual(p.find('Size').text, + str(objects_template[partnum - 1][3])) + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_over_max_32bit_int(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=%d' % + (MAX_32BIT_INT + 1), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_with_part_number_marker(self): + req = Request.blank('/bucket/object?uploadId=X&' + 'part-number-marker=1', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(len(elem.findall('Part')), 1) + self.assertEqual(elem.find('Part/PartNumber').text, '2') + self.assertEqual(elem.find('PartNumberMarker').text, '1') + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_str_part_number_marker(self): + req = Request.blank('/bucket/object?uploadId=X&part-number-marker=' + 'invalid', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_negative_part_number_marker(self): + req = Request.blank('/bucket/object?uploadId=X&part-number-marker=' + '-1', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_over_part_number_marker(self): + part_number_marker = str(self.s3api.conf.max_upload_part_num + 1) + req = Request.blank('/bucket/object?uploadId=X&' + 'part-number-marker=%s' % part_number_marker, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(len(elem.findall('Part')), 0) + self.assertEqual(elem.find('PartNumberMarker').text, + part_number_marker) + self.assertEqual(status.split()[0], '200') + + def test_object_list_parts_over_max_32bit_int_part_number_marker(self): + req = Request.blank('/bucket/object?uploadId=X&part-number-marker=' + '%s' % ((MAX_32BIT_INT + 1)), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_object_list_parts_same_max_marts_as_objects_num(self): + req = Request.blank('/bucket/object?uploadId=X&max-parts=2', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'ListPartsResult') + self.assertEqual(len(elem.findall('Part')), 2) + self.assertEqual(status.split()[0], '200') + + def _test_for_s3acl(self, method, query, account, hasObj=True, body=None): + path = '/bucket%s' % ('/object' + query if hasObj else query) + req = Request.blank(path, + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}, + body=body) + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_upload_part_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('PUT', '?partNumber=1&uploadId=X', + 'test:other') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_upload_part_acl_with_write_permission(self): + status, headers, body = \ + self._test_for_s3acl('PUT', '?partNumber=1&uploadId=X', + 'test:write') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_upload_part_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('PUT', '?partNumber=1&uploadId=X', + 'test:full_control') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_list_multipart_uploads_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploads', 'test:other', + hasObj=False) + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_list_multipart_uploads_acl_with_read_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploads', 'test:read', + hasObj=False) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_list_multipart_uploads_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploads', 'test:full_control', + hasObj=False) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def test_initiate_multipart_upload_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploads', 'test:other') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def test_initiate_multipart_upload_acl_with_write_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploads', 'test:write') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + @patch('swift.common.middleware.s3api.controllers.' + 'multi_upload.unique_id', lambda: 'X') + def test_initiate_multipart_upload_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploads', 'test:full_control') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_list_parts_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploadId=X', 'test:other') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_list_parts_acl_with_read_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploadId=X', 'test:read') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_list_parts_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('GET', '?uploadId=X', 'test:full_control') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_abort_multipart_upload_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('DELETE', '?uploadId=X', 'test:other') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_abort_multipart_upload_acl_with_write_permission(self): + status, headers, body = \ + self._test_for_s3acl('DELETE', '?uploadId=X', 'test:write') + self.assertEqual(status.split()[0], '204') + + @s3acl(s3acl_only=True) + def test_abort_multipart_upload_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('DELETE', '?uploadId=X', 'test:full_control') + self.assertEqual(status.split()[0], '204') + + @s3acl(s3acl_only=True) + def test_complete_multipart_upload_acl_without_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploadId=X', 'test:other', + body=xml) + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_complete_multipart_upload_acl_with_write_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploadId=X', 'test:write', + body=xml) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_complete_multipart_upload_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_for_s3acl('POST', '?uploadId=X', 'test:full_control', + body=xml) + self.assertEqual(status.split()[0], '200') + + def _test_copy_for_s3acl(self, account, src_permission=None, + src_path='/src_bucket/src_obj', src_headers=None, + head_resp=swob.HTTPOk, put_header=None, + timestamp=None): + owner = 'test:tester' + grants = [Grant(User(account), src_permission)] \ + if src_permission else [Grant(User(owner), 'FULL_CONTROL')] + src_o_headers = encode_acl('object', ACL(Owner(owner, owner), grants)) + src_o_headers.update({'last-modified': self.last_modified}) + src_o_headers.update(src_headers or {}) + self.swift.register('HEAD', '/v1/AUTH_test/%s' % src_path.lstrip('/'), + head_resp, src_o_headers, None) + put_header = put_header or {} + put_headers = {'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header(), + 'X-Amz-Copy-Source': src_path} + put_headers.update(put_header) + req = Request.blank( + '/bucket/object?partNumber=1&uploadId=X', + environ={'REQUEST_METHOD': 'PUT'}, + headers=put_headers) + timestamp = timestamp or time.time() + with patch('swift.common.middleware.s3api.utils.time.time', + return_value=timestamp): + return self.call_s3api(req) + + @s3acl + def test_upload_part_copy(self): + date_header = self.get_date_header() + timestamp = mktime(date_header) + last_modified = S3Timestamp(timestamp).s3xmlformat + status, headers, body = self._test_copy_for_s3acl( + 'test:tester', put_header={'Date': date_header}, + timestamp=timestamp) + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Content-Type'], 'application/xml') + self.assertTrue(headers.get('etag') is None) + elem = fromstring(body, 'CopyPartResult') + self.assertEqual(elem.find('LastModified').text, last_modified) + self.assertEqual(elem.find('ETag').text, '"%s"' % self.etag) + + _, _, headers = self.swift.calls_with_headers[-1] + self.assertEqual(headers['X-Copy-From'], '/src_bucket/src_obj') + self.assertEqual(headers['Content-Length'], '0') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_with_owner_permission(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:tester') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_without_permission(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:other', 'READ') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_with_write_permission(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'READ') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:full_control', 'READ') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_without_src_permission(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'WRITE') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_upload_part_copy_acl_invalid_source(self): + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'WRITE', '') + self.assertEqual(status.split()[0], '400') + + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'WRITE', '/') + self.assertEqual(status.split()[0], '400') + + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'WRITE', '/bucket') + self.assertEqual(status.split()[0], '400') + + status, headers, body = \ + self._test_copy_for_s3acl('test:write', 'WRITE', '/bucket/') + self.assertEqual(status.split()[0], '400') + + @s3acl + def test_upload_part_copy_headers_error(self): + account = 'test:tester' + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag} + status, header, body = \ + self._test_copy_for_s3acl(account, + head_resp=swob.HTTPPreconditionFailed, + put_header=header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = {'X-Amz-Copy-Source-If-None-Match': etag} + status, header, body = \ + self._test_copy_for_s3acl(account, + head_resp=swob.HTTPNotModified, + put_header=header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = {'X-Amz-Copy-Source-If-Modified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, + head_resp=swob.HTTPNotModified, + put_header=header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = \ + {'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, + head_resp=swob.HTTPPreconditionFailed, + put_header=header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + def test_upload_part_copy_headers_with_match(self): + account = 'test:tester' + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 11:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag, + 'X-Amz-Copy-Source-If-Modified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '200') + + self.assertEqual(len(self.swift.calls_with_headers), 4) + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers['If-Match'], etag) + self.assertEqual(headers['If-Modified-Since'], last_modified_since) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + + @s3acl(s3acl_only=True) + def test_upload_part_copy_headers_with_match_and_s3acl(self): + account = 'test:tester' + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 11:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag, + 'X-Amz-Copy-Source-If-Modified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 4) + # Before the check of the copy source in the case of s3acl is valid, + # s3api check the bucket write permissions and the object existence + # of the destination. + _, _, headers = self.swift.calls_with_headers[-3] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers['If-Match'], etag) + self.assertEqual(headers['If-Modified-Since'], last_modified_since) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + + def test_upload_part_copy_headers_with_not_match(self): + account = 'test:tester' + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-None-Match': etag, + 'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 4) + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers['If-None-Match'], etag) + self.assertEqual(headers['If-Unmodified-Since'], last_modified_since) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-None-Match') is None) + self.assertTrue(headers.get('If-Unmodified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertTrue(headers.get('If-None-Match') is None) + self.assertTrue(headers.get('If-Unmodified-Since') is None) + + @s3acl(s3acl_only=True) + def test_upload_part_copy_headers_with_not_match_and_s3acl(self): + account = 'test:tester' + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-None-Match': etag, + 'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 4) + # Before the check of the copy source in the case of s3acl is valid, + # s3api check the bucket write permissions and the object existence + # of the destination. + _, _, headers = self.swift.calls_with_headers[-3] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[-2] + self.assertEqual(headers['If-None-Match'], etag) + self.assertEqual(headers['If-Unmodified-Since'], last_modified_since) + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-None-Match') is None) + self.assertTrue(headers.get('If-Unmodified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + + def test_upload_part_copy_range_unsatisfiable(self): + account = 'test:tester' + + header = {'X-Amz-Copy-Source-Range': 'bytes=1000-'} + status, header, body = self._test_copy_for_s3acl( + account, src_headers={'Content-Length': '10'}, put_header=header) + + self.assertEqual(status.split()[0], '400') + self.assertIn('Range specified is not valid for ' + 'source object of size: 10', body) + + self.assertEqual([ + ('HEAD', '/v1/AUTH_test/bucket'), + ('HEAD', '/v1/AUTH_test/bucket+segments/object/X'), + ('HEAD', '/v1/AUTH_test/src_bucket/src_obj'), + ], self.swift.calls) + + def test_upload_part_copy_range_invalid(self): + account = 'test:tester' + + header = {'X-Amz-Copy-Source-Range': '0-9'} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '400', body) + + header = {'X-Amz-Copy-Source-Range': 'asdf'} + status, header, body = \ + self._test_copy_for_s3acl(account, put_header=header) + + self.assertEqual(status.split()[0], '400', body) + + def test_upload_part_copy_range(self): + account = 'test:tester' + + header = {'X-Amz-Copy-Source-Range': 'bytes=0-9'} + status, header, body = self._test_copy_for_s3acl( + account, src_headers={'Content-Length': '20'}, put_header=header) + + self.assertEqual(status.split()[0], '200', body) + + self.assertEqual([ + ('HEAD', '/v1/AUTH_test/bucket'), + ('HEAD', '/v1/AUTH_test/bucket+segments/object/X'), + ('HEAD', '/v1/AUTH_test/src_bucket/src_obj'), + ('PUT', '/v1/AUTH_test/bucket+segments/object/X/1'), + ], self.swift.calls) + put_headers = self.swift.calls_with_headers[-1][2] + self.assertEqual('bytes=0-9', put_headers['Range']) + self.assertEqual('/src_bucket/src_obj', put_headers['X-Copy-From']) + + def _test_no_body(self, use_content_length=False, + use_transfer_encoding=False, string_to_md5=''): + content_md5 = md5(string_to_md5).digest().encode('base64').strip() + with UnreadableInput(self) as fake_input: + req = Request.blank( + '/bucket/object?uploadId=X', + environ={ + 'REQUEST_METHOD': 'POST', + 'wsgi.input': fake_input}, + headers={ + 'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-MD5': content_md5}, + body='') + if not use_content_length: + req.environ.pop('CONTENT_LENGTH') + if use_transfer_encoding: + req.environ['HTTP_TRANSFER_ENCODING'] = 'chunked' + status, headers, body = self.call_s3api(req) + self.assertEqual(status, '400 Bad Request') + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + self.assertEqual(self._get_error_message(body), + 'You must specify at least one part') + + @s3acl + def test_object_multi_upload_empty_body(self): + self._test_no_body() + self._test_no_body(string_to_md5='test') + self._test_no_body(use_content_length=True) + self._test_no_body(use_content_length=True, string_to_md5='test') + self._test_no_body(use_transfer_encoding=True) + self._test_no_body(use_transfer_encoding=True, string_to_md5='test') + + +class TestS3ApiMultiUploadNonUTC(TestS3ApiMultiUpload): + def setUp(self): + self.orig_tz = os.environ.get('TZ', '') + os.environ['TZ'] = 'EST+05EDT,M4.1.0,M10.5.0' + time.tzset() + super(TestS3ApiMultiUploadNonUTC, self).setUp() + + def tearDown(self): + super(TestS3ApiMultiUploadNonUTC, self).tearDown() + os.environ['TZ'] = self.orig_tz + time.tzset() + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_obj.py swift-2.18.0/test/unit/common/middleware/s3api/test_obj.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_obj.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_obj.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1010 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from datetime import datetime +import hashlib +import os +from os.path import join +import time +from mock import patch + +from swift.common import swob +from swift.common.swob import Request + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.test_s3_acl import s3acl +from swift.common.middleware.s3api.subresource import ACL, User, encode_acl, \ + Owner, Grant +from swift.common.middleware.s3api.etree import fromstring +from swift.common.middleware.s3api.utils import mktime, S3Timestamp +from test.unit.common.middleware.s3api.helpers import FakeSwift + + +def _wrap_fake_auth_middleware(org_func): + def fake_fake_auth_middleware(self, env): + org_func(env) + + if 'swift.authorize_override' in env: + return + + if 'HTTP_AUTHORIZATION' not in env: + return + + _, authorization = env['HTTP_AUTHORIZATION'].split(' ') + tenant_user, sign = authorization.rsplit(':', 1) + tenant, user = tenant_user.rsplit(':', 1) + + env['HTTP_X_TENANT_NAME'] = tenant + env['HTTP_X_USER_NAME'] = user + + return fake_fake_auth_middleware + + +class TestS3ApiObj(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiObj, self).setUp() + + self.object_body = 'hello' + self.etag = hashlib.md5(self.object_body).hexdigest() + self.last_modified = 'Fri, 01 Apr 2014 12:00:00 GMT' + + self.response_headers = {'Content-Type': 'text/html', + 'Content-Length': len(self.object_body), + 'Content-Disposition': 'inline', + 'Content-Language': 'en', + 'x-object-meta-test': 'swift', + 'etag': self.etag, + 'last-modified': self.last_modified, + 'expires': 'Mon, 21 Sep 2015 12:00:00 GMT', + 'x-robots-tag': 'nofollow', + 'cache-control': 'private'} + + self.swift.register('GET', '/v1/AUTH_test/bucket/object', + swob.HTTPOk, self.response_headers, + self.object_body) + self.swift.register('PUT', '/v1/AUTH_test/bucket/object', + swob.HTTPCreated, + {'etag': self.etag, + 'last-modified': self.last_modified, + 'x-object-meta-something': 'oh hai'}, + None) + + def _test_object_GETorHEAD(self, method): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + unexpected_headers = [] + for key, val in self.response_headers.iteritems(): + if key in ('Content-Length', 'Content-Type', 'content-encoding', + 'last-modified', 'cache-control', 'Content-Disposition', + 'Content-Language', 'expires', 'x-robots-tag'): + self.assertIn(key, headers) + self.assertEqual(headers[key], str(val)) + + elif key == 'etag': + self.assertEqual(headers[key], '"%s"' % val) + + elif key.startswith('x-object-meta-'): + self.assertIn('x-amz-meta-' + key[14:], headers) + self.assertEqual(headers['x-amz-meta-' + key[14:]], val) + + else: + unexpected_headers.append((key, val)) + + if unexpected_headers: + self.fail('unexpected headers: %r' % unexpected_headers) + + self.assertEqual(headers['etag'], + '"%s"' % self.response_headers['etag']) + + if method == 'GET': + self.assertEqual(body, self.object_body) + + @s3acl + def test_object_HEAD_error(self): + # HEAD does not return the body even an error response in the + # specifications of the REST API. + # So, check the response code for error test of HEAD. + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPUnauthorized, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(body, '') # sanity + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPForbidden, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(body, '') # sanity + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPNotFound, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + self.assertEqual(body, '') # sanity + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPPreconditionFailed, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '412') + self.assertEqual(body, '') # sanity + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPServerError, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '500') + self.assertEqual(body, '') # sanity + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPServiceUnavailable, {}, None) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '500') + self.assertEqual(body, '') # sanity + + def test_object_HEAD(self): + self._test_object_GETorHEAD('HEAD') + + def _test_object_HEAD_Range(self, range_value): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Range': range_value, + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + @s3acl + def test_object_HEAD_Range_with_invalid_value(self): + range_value = '' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '200') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('content-range' not in headers) + + range_value = 'hoge' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '200') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('content-range' not in headers) + + range_value = 'bytes=' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '200') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('content-range' not in headers) + + range_value = 'bytes=1' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '200') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('content-range' not in headers) + + range_value = 'bytes=5-1' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '200') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '5') + self.assertTrue('content-range' not in headers) + + range_value = 'bytes=5-10' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '416') + + @s3acl + def test_object_HEAD_Range(self): + # update response headers + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPOk, self.response_headers, + self.object_body) + range_value = 'bytes=0-3' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '206') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '4') + self.assertTrue('content-range' in headers) + self.assertTrue(headers['content-range'].startswith('bytes 0-3')) + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + + range_value = 'bytes=3-3' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '206') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '1') + self.assertTrue('content-range' in headers) + self.assertTrue(headers['content-range'].startswith('bytes 3-3')) + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + + range_value = 'bytes=1-' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '206') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '4') + self.assertTrue('content-range' in headers) + self.assertTrue(headers['content-range'].startswith('bytes 1-4')) + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + + range_value = 'bytes=-3' + status, headers, body = self._test_object_HEAD_Range(range_value) + self.assertEqual(status.split()[0], '206') + self.assertTrue('content-length' in headers) + self.assertEqual(headers['content-length'], '3') + self.assertTrue('content-range' in headers) + self.assertTrue(headers['content-range'].startswith('bytes 2-4')) + self.assertTrue('x-amz-meta-test' in headers) + self.assertEqual('swift', headers['x-amz-meta-test']) + + @s3acl + def test_object_GET_error(self): + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchKey') + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPPreconditionFailed) + self.assertEqual(code, 'PreconditionFailed') + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPServiceUnavailable) + self.assertEqual(code, 'InternalError') + + @s3acl + def test_object_GET(self): + self._test_object_GETorHEAD('GET') + + @s3acl(s3acl_only=True) + def test_object_GET_with_s3acl_and_keystone(self): + # for passing keystone authentication root + fake_auth = self.swift._fake_auth_middleware + with patch.object(FakeSwift, '_fake_auth_middleware', + _wrap_fake_auth_middleware(fake_auth)): + + self._test_object_GETorHEAD('GET') + _, _, headers = self.swift.calls_with_headers[-1] + self.assertNotIn('Authorization', headers) + _, _, headers = self.swift.calls_with_headers[0] + self.assertNotIn('Authorization', headers) + + @s3acl + def test_object_GET_Range(self): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Range': 'bytes=0-3', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '206') + + self.assertTrue('content-range' in headers) + self.assertTrue(headers['content-range'].startswith('bytes 0-3')) + + @s3acl + def test_object_GET_Range_error(self): + code = self._test_method_error('GET', '/bucket/object', + swob.HTTPRequestedRangeNotSatisfiable) + self.assertEqual(code, 'InvalidRange') + + @s3acl + def test_object_GET_Response(self): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'GET', + 'QUERY_STRING': + 'response-content-type=%s&' + 'response-content-language=%s&' + 'response-expires=%s&' + 'response-cache-control=%s&' + 'response-content-disposition=%s&' + 'response-content-encoding=%s&' + % ('text/plain', 'en', + 'Fri, 01 Apr 2014 12:00:00 GMT', + 'no-cache', + 'attachment', + 'gzip')}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + self.assertTrue('content-type' in headers) + self.assertEqual(headers['content-type'], 'text/plain') + self.assertTrue('content-language' in headers) + self.assertEqual(headers['content-language'], 'en') + self.assertTrue('expires' in headers) + self.assertEqual(headers['expires'], 'Fri, 01 Apr 2014 12:00:00 GMT') + self.assertTrue('cache-control' in headers) + self.assertEqual(headers['cache-control'], 'no-cache') + self.assertTrue('content-disposition' in headers) + self.assertEqual(headers['content-disposition'], + 'attachment') + self.assertTrue('content-encoding' in headers) + self.assertEqual(headers['content-encoding'], 'gzip') + + @s3acl + def test_object_PUT_error(self): + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchBucket') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPRequestEntityTooLarge) + self.assertEqual(code, 'EntityTooLarge') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPUnprocessableEntity) + self.assertEqual(code, 'BadDigest') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPLengthRequired) + self.assertEqual(code, 'MissingContentLength') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPPreconditionFailed) + self.assertEqual(code, 'InternalError') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPServiceUnavailable) + self.assertEqual(code, 'InternalError') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': ''}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket/'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error( + 'PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket/src_obj?foo=bar'}) + self.assertEqual(code, 'InvalidArgument') + # adding other query paramerters will cause an error + code = self._test_method_error( + 'PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket/src_obj?versionId=foo&bar=baz'}) + self.assertEqual(code, 'InvalidArgument') + # ...even versionId appears in the last + code = self._test_method_error( + 'PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket/src_obj?bar=baz&versionId=foo'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error( + 'PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/bucket/src_obj?versionId=foo'}) + self.assertEqual(code, 'NotImplemented') + code = self._test_method_error( + 'PUT', '/bucket/object', + swob.HTTPCreated, + {'X-Amz-Copy-Source': '/src_bucket/src_object', + 'X-Amz-Copy-Source-Range': 'bytes=0-0'}) + self.assertEqual(code, 'InvalidArgument') + code = self._test_method_error('PUT', '/bucket/object', + swob.HTTPRequestTimeout) + self.assertEqual(code, 'RequestTimeout') + + @s3acl + def test_object_PUT(self): + etag = self.response_headers['etag'] + content_md5 = etag.decode('hex').encode('base64').strip() + + req = Request.blank( + '/bucket/object', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'x-amz-storage-class': 'STANDARD', + 'Content-MD5': content_md5, + 'Date': self.get_date_header()}, + body=self.object_body) + req.date = datetime.now() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + # Check that s3api returns an etag header. + self.assertEqual(headers['etag'], '"%s"' % etag) + + _, _, headers = self.swift.calls_with_headers[-1] + # Check that s3api converts a Content-MD5 header into an etag. + self.assertEqual(headers['etag'], etag) + + def test_object_PUT_headers(self): + content_md5 = self.etag.decode('hex').encode('base64').strip() + + self.swift.register('HEAD', '/v1/AUTH_test/some/source', + swob.HTTPOk, {'last-modified': self.last_modified}, + None) + req = Request.blank( + '/bucket/object', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'X-Amz-Storage-Class': 'STANDARD', + 'X-Amz-Meta-Something': 'oh hai', + 'X-Amz-Meta-Unreadable-Prefix': '\x04w', + 'X-Amz-Meta-Unreadable-Suffix': 'h\x04', + 'X-Amz-Meta-Lots-Of-Unprintable': 5 * '\x04', + 'X-Amz-Copy-Source': '/some/source', + 'Content-MD5': content_md5, + 'Date': self.get_date_header()}) + req.date = datetime.now() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + # Check that s3api does not return an etag header, + # specified copy source. + self.assertTrue(headers.get('etag') is None) + # Check that s3api does not return custom metadata in response + self.assertTrue(headers.get('x-amz-meta-something') is None) + + _, _, headers = self.swift.calls_with_headers[-1] + # Check that s3api converts a Content-MD5 header into an etag. + self.assertEqual(headers['ETag'], self.etag) + self.assertEqual(headers['X-Object-Meta-Something'], 'oh hai') + self.assertEqual(headers['X-Object-Meta-Unreadable-Prefix'], + '=?UTF-8?Q?=04w?=') + self.assertEqual(headers['X-Object-Meta-Unreadable-Suffix'], + '=?UTF-8?Q?h=04?=') + self.assertEqual(headers['X-Object-Meta-Lots-Of-Unprintable'], + '=?UTF-8?B?BAQEBAQ=?=') + self.assertEqual(headers['X-Copy-From'], '/some/source') + self.assertEqual(headers['Content-Length'], '0') + + def _test_object_PUT_copy(self, head_resp, put_header=None, + src_path='/some/source', timestamp=None): + account = 'test:tester' + grants = [Grant(User(account), 'FULL_CONTROL')] + head_headers = \ + encode_acl('object', + ACL(Owner(account, account), grants)) + head_headers.update({'last-modified': self.last_modified}) + self.swift.register('HEAD', '/v1/AUTH_test/some/source', + head_resp, head_headers, None) + put_header = put_header or {} + return self._call_object_copy(src_path, put_header, timestamp) + + def _test_object_PUT_copy_self(self, head_resp, + put_header=None, timestamp=None): + account = 'test:tester' + grants = [Grant(User(account), 'FULL_CONTROL')] + head_headers = \ + encode_acl('object', + ACL(Owner(account, account), grants)) + head_headers.update({'last-modified': self.last_modified}) + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + head_resp, head_headers, None) + put_header = put_header or {} + return self._call_object_copy('/bucket/object', put_header, timestamp) + + def _call_object_copy(self, src_path, put_header, timestamp=None): + put_headers = {'Authorization': 'AWS test:tester:hmac', + 'X-Amz-Copy-Source': src_path, + 'Date': self.get_date_header()} + put_headers.update(put_header) + + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'PUT'}, + headers=put_headers) + + req.date = datetime.now() + req.content_type = 'text/plain' + timestamp = timestamp or time.time() + with patch('swift.common.middleware.s3api.utils.time.time', + return_value=timestamp): + return self.call_s3api(req) + + @s3acl + def test_object_PUT_copy(self): + def do_test(src_path=None): + date_header = self.get_date_header() + timestamp = mktime(date_header) + last_modified = S3Timestamp(timestamp).s3xmlformat + status, headers, body = self._test_object_PUT_copy( + swob.HTTPOk, put_header={'Date': date_header}, + timestamp=timestamp, src_path=src_path) + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Content-Type'], 'application/xml') + + self.assertTrue(headers.get('etag') is None) + self.assertTrue(headers.get('x-amz-meta-something') is None) + elem = fromstring(body, 'CopyObjectResult') + self.assertEqual(elem.find('LastModified').text, last_modified) + self.assertEqual(elem.find('ETag').text, '"%s"' % self.etag) + + _, _, headers = self.swift.calls_with_headers[-1] + self.assertEqual(headers['X-Copy-From'], '/some/source') + self.assertEqual(headers['Content-Length'], '0') + + do_test('/some/source') + do_test('/some/source?') + do_test('/some/source?versionId=null') + # Some clients (like Boto) don't include the leading slash; + # AWS seems to tolerate this so we should, too + do_test('some/source') + + @s3acl + def test_object_PUT_copy_self(self): + status, headers, body = \ + self._test_object_PUT_copy_self(swob.HTTPOk) + self.assertEqual(status.split()[0], '400') + elem = fromstring(body, 'Error') + err_msg = ("This copy request is illegal because it is trying to copy " + "an object to itself without changing the object's " + "metadata, storage class, website redirect location or " + "encryption attributes.") + self.assertEqual(elem.find('Code').text, 'InvalidRequest') + self.assertEqual(elem.find('Message').text, err_msg) + + @s3acl + def test_object_PUT_copy_self_metadata_copy(self): + header = {'x-amz-metadata-directive': 'COPY'} + status, headers, body = \ + self._test_object_PUT_copy_self(swob.HTTPOk, header) + self.assertEqual(status.split()[0], '400') + elem = fromstring(body, 'Error') + err_msg = ("This copy request is illegal because it is trying to copy " + "an object to itself without changing the object's " + "metadata, storage class, website redirect location or " + "encryption attributes.") + self.assertEqual(elem.find('Code').text, 'InvalidRequest') + self.assertEqual(elem.find('Message').text, err_msg) + + @s3acl + def test_object_PUT_copy_self_metadata_replace(self): + date_header = self.get_date_header() + timestamp = mktime(date_header) + last_modified = S3Timestamp(timestamp).s3xmlformat + header = {'x-amz-metadata-directive': 'REPLACE', + 'Date': date_header} + status, headers, body = self._test_object_PUT_copy_self( + swob.HTTPOk, header, timestamp=timestamp) + self.assertEqual(status.split()[0], '200') + self.assertEqual(headers['Content-Type'], 'application/xml') + self.assertTrue(headers.get('etag') is None) + elem = fromstring(body, 'CopyObjectResult') + self.assertEqual(elem.find('LastModified').text, last_modified) + self.assertEqual(elem.find('ETag').text, '"%s"' % self.etag) + + _, _, headers = self.swift.calls_with_headers[-1] + self.assertEqual(headers['X-Copy-From'], '/bucket/object') + self.assertEqual(headers['Content-Length'], '0') + + @s3acl + def test_object_PUT_copy_headers_error(self): + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag, + 'Date': self.get_date_header()} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPPreconditionFailed, + header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = {'X-Amz-Copy-Source-If-None-Match': etag} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPNotModified, + header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = {'X-Amz-Copy-Source-If-Modified-Since': last_modified_since} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPNotModified, + header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + header = \ + {'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPPreconditionFailed, + header) + self.assertEqual(self._get_error_code(body), 'PreconditionFailed') + + def test_object_PUT_copy_headers_with_match(self): + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 11:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag, + 'X-Amz-Copy-Source-If-Modified-Since': last_modified_since, + 'Date': self.get_date_header()} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPOk, header) + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 2) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertEqual(headers['If-Match'], etag) + self.assertEqual(headers['If-Modified-Since'], last_modified_since) + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_headers_with_match_and_s3acl(self): + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 11:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-Match': etag, + 'X-Amz-Copy-Source-If-Modified-Since': last_modified_since, + 'Date': self.get_date_header()} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPOk, header) + + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 3) + # After the check of the copy source in the case of s3acl is valid, + # s3api check the bucket write permissions of the destination. + _, _, headers = self.swift.calls_with_headers[-2] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-Match') is None) + self.assertTrue(headers.get('If-Modified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertEqual(headers['If-Match'], etag) + self.assertEqual(headers['If-Modified-Since'], last_modified_since) + + def test_object_PUT_copy_headers_with_not_match(self): + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-None-Match': etag, + 'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since, + 'Date': self.get_date_header()} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPOk, header) + + self.assertEqual(status.split()[0], '200') + self.assertEqual(len(self.swift.calls_with_headers), 2) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-None-Match') is None) + self.assertTrue(headers.get('If-Unmodified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertEqual(headers['If-None-Match'], etag) + self.assertEqual(headers['If-Unmodified-Since'], last_modified_since) + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_headers_with_not_match_and_s3acl(self): + etag = '7dfa07a8e59ddbcd1dc84d4c4f82aea1' + last_modified_since = 'Fri, 01 Apr 2014 12:00:00 GMT' + + header = {'X-Amz-Copy-Source-If-None-Match': etag, + 'X-Amz-Copy-Source-If-Unmodified-Since': last_modified_since, + 'Date': self.get_date_header()} + status, header, body = \ + self._test_object_PUT_copy(swob.HTTPOk, header) + self.assertEqual(status.split()[0], '200') + # After the check of the copy source in the case of s3acl is valid, + # s3api check the bucket write permissions of the destination. + self.assertEqual(len(self.swift.calls_with_headers), 3) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertTrue(headers.get('If-None-Match') is None) + self.assertTrue(headers.get('If-Unmodified-Since') is None) + _, _, headers = self.swift.calls_with_headers[0] + self.assertEqual(headers['If-None-Match'], etag) + self.assertEqual(headers['If-Unmodified-Since'], last_modified_since) + + @s3acl + def test_object_POST_error(self): + code = self._test_method_error('POST', '/bucket/object', None) + self.assertEqual(code, 'NotImplemented') + + @s3acl + def test_object_DELETE_error(self): + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPServiceUnavailable) + self.assertEqual(code, 'InternalError') + + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + return_value={'status': 204}): + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchKey') + + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + return_value={'status': 404}): + code = self._test_method_error('DELETE', '/bucket/object', + swob.HTTPNotFound) + self.assertEqual(code, 'NoSuchBucket') + + @s3acl + def test_object_DELETE_no_multipart(self): + self.s3api.conf.allow_multipart_uploads = False + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '204') + + self.assertNotIn(('HEAD', '/v1/AUTH_test/bucket/object'), + self.swift.calls) + self.assertIn(('DELETE', '/v1/AUTH_test/bucket/object'), + self.swift.calls) + _, path = self.swift.calls[-1] + self.assertEqual(path.count('?'), 0) + + @s3acl + def test_object_DELETE_multipart(self): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '204') + + self.assertIn(('HEAD', '/v1/AUTH_test/bucket/object'), + self.swift.calls) + self.assertIn(('DELETE', '/v1/AUTH_test/bucket/object'), + self.swift.calls) + _, path = self.swift.calls[-1] + self.assertEqual(path.count('?'), 0) + + @s3acl + def test_slo_object_DELETE(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket/object', + swob.HTTPOk, + {'x-static-large-object': 'True'}, + None) + self.swift.register('DELETE', '/v1/AUTH_test/bucket/object', + swob.HTTPOk, {}, '') + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'Content-Type': 'foo/bar'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '204') + self.assertEqual(body, '') + + self.assertIn(('HEAD', '/v1/AUTH_test/bucket/object'), + self.swift.calls) + self.assertIn(('DELETE', '/v1/AUTH_test/bucket/object' + '?multipart-manifest=delete'), + self.swift.calls) + _, path, headers = self.swift.calls_with_headers[-1] + path, query_string = path.split('?', 1) + query = {} + for q in query_string.split('&'): + key, arg = q.split('=') + query[key] = arg + self.assertEqual(query['multipart-manifest'], 'delete') + self.assertNotIn('Content-Type', headers) + + def _test_object_for_s3acl(self, method, account): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + def _test_set_container_permission(self, account, permission): + grants = [Grant(User(account), permission)] + headers = \ + encode_acl('container', + ACL(Owner('test:tester', 'test:tester'), grants)) + self.swift.register('HEAD', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, headers, None) + + @s3acl(s3acl_only=True) + def test_object_GET_without_permission(self): + status, headers, body = self._test_object_for_s3acl('GET', + 'test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + @s3acl(s3acl_only=True) + def test_object_GET_with_read_permission(self): + status, headers, body = self._test_object_for_s3acl('GET', + 'test:read') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_GET_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_object_for_s3acl('GET', 'test:full_control') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_without_permission(self): + status, headers, body = self._test_object_for_s3acl('PUT', + 'test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + @s3acl(s3acl_only=True) + def test_object_PUT_with_owner_permission(self): + status, headers, body = self._test_object_for_s3acl('PUT', + 'test:tester') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_with_write_permission(self): + account = 'test:other' + self._test_set_container_permission(account, 'WRITE') + status, headers, body = self._test_object_for_s3acl('PUT', account) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_with_fullcontrol_permission(self): + account = 'test:other' + self._test_set_container_permission(account, 'FULL_CONTROL') + status, headers, body = \ + self._test_object_for_s3acl('PUT', account) + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_DELETE_without_permission(self): + account = 'test:other' + status, headers, body = self._test_object_for_s3acl('DELETE', + account) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + @s3acl(s3acl_only=True) + def test_object_DELETE_with_owner_permission(self): + status, headers, body = self._test_object_for_s3acl('DELETE', + 'test:tester') + self.assertEqual(status.split()[0], '204') + + @s3acl(s3acl_only=True) + def test_object_DELETE_with_write_permission(self): + account = 'test:other' + self._test_set_container_permission(account, 'WRITE') + status, headers, body = self._test_object_for_s3acl('DELETE', + account) + self.assertEqual(status.split()[0], '204') + + @s3acl(s3acl_only=True) + def test_object_DELETE_with_fullcontrol_permission(self): + account = 'test:other' + self._test_set_container_permission(account, 'FULL_CONTROL') + status, headers, body = self._test_object_for_s3acl('DELETE', account) + self.assertEqual(status.split()[0], '204') + + def _test_object_copy_for_s3acl(self, account, src_permission=None, + src_path='/src_bucket/src_obj'): + owner = 'test:tester' + grants = [Grant(User(account), src_permission)] \ + if src_permission else [Grant(User(owner), 'FULL_CONTROL')] + src_o_headers = \ + encode_acl('object', ACL(Owner(owner, owner), grants)) + src_o_headers.update({'last-modified': self.last_modified}) + self.swift.register( + 'HEAD', join('/v1/AUTH_test', src_path.lstrip('/')), + swob.HTTPOk, src_o_headers, None) + + req = Request.blank( + '/bucket/object', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'X-Amz-Copy-Source': src_path, + 'Date': self.get_date_header()}) + + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_with_owner_permission(self): + status, headers, body = \ + self._test_object_copy_for_s3acl('test:tester') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_with_fullcontrol_permission(self): + status, headers, body = \ + self._test_object_copy_for_s3acl('test:full_control', + 'FULL_CONTROL') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_with_grantee_permission(self): + status, headers, body = \ + self._test_object_copy_for_s3acl('test:write', 'READ') + self.assertEqual(status.split()[0], '200') + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_without_src_obj_permission(self): + status, headers, body = \ + self._test_object_copy_for_s3acl('test:write') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_without_dst_container_permission(self): + status, headers, body = \ + self._test_object_copy_for_s3acl('test:other', 'READ') + self.assertEqual(status.split()[0], '403') + + @s3acl(s3acl_only=True) + def test_object_PUT_copy_empty_src_path(self): + self.swift.register('PUT', '/v1/AUTH_test/bucket/object', + swob.HTTPPreconditionFailed, {}, None) + status, headers, body = self._test_object_copy_for_s3acl( + 'test:write', 'READ', src_path='') + self.assertEqual(status.split()[0], '400') + + +class TestS3ApiObjNonUTC(TestS3ApiObj): + def setUp(self): + self.orig_tz = os.environ.get('TZ', '') + os.environ['TZ'] = 'EST+05EDT,M4.1.0,M10.5.0' + time.tzset() + super(TestS3ApiObjNonUTC, self).setUp() + + def tearDown(self): + super(TestS3ApiObjNonUTC, self).tearDown() + os.environ['TZ'] = self.orig_tz + time.tzset() + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_s3_acl.py swift-2.18.0/test/unit/common/middleware/s3api/test_s3_acl.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_s3_acl.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_s3_acl.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,540 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import functools +import sys +import traceback +from mock import patch, MagicMock + +from swift.common import swob +from swift.common.swob import Request +from swift.common.utils import json + +from swift.common.middleware.s3api.etree import tostring, Element, SubElement +from swift.common.middleware.s3api.subresource import ACL, ACLPrivate, User, \ + encode_acl, AuthenticatedUsers, AllUsers, Owner, Grant, PERMISSIONS +from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.exceptions import NotMethodException +from test.unit.common.middleware.s3api import FakeSwift + + +XMLNS_XSI = 'http://www.w3.org/2001/XMLSchema-instance' + + +def s3acl(func=None, s3acl_only=False): + """ + NOTE: s3acl decorator needs an instance of s3api testing framework. + (i.e. An instance for first argument is necessary) + """ + if func is None: + return functools.partial(s3acl, s3acl_only=s3acl_only) + + @functools.wraps(func) + def s3acl_decorator(*args, **kwargs): + if not args and not kwargs: + raise NotMethodException('Use s3acl decorator for a method') + + def call_func(failing_point=''): + try: + # For maintainability, we patch 204 status for every + # get_container_info. if you want, we can rewrite the + # statement easily with nested decorator like as: + # + # @s3acl + # @patch(xxx) + # def test_xxxx(self) + + with patch('swift.common.middleware.s3api.s3request.' + 'get_container_info', + return_value={'status': 204}): + func(*args, **kwargs) + except AssertionError: + # Make traceback message to clarify the assertion + exc_type, exc_instance, exc_traceback = sys.exc_info() + formatted_traceback = ''.join(traceback.format_tb( + exc_traceback)) + message = '\n%s\n%s:\n%s' % (formatted_traceback, + exc_type.__name__, + exc_instance.message) + message += failing_point + raise exc_type(message) + + instance = args[0] + + if not s3acl_only: + call_func() + instance.swift._calls = [] + + instance.s3api.conf.s3_acl = True + instance.swift.s3_acl = True + owner = Owner('test:tester', 'test:tester') + generate_s3acl_environ('test', instance.swift, owner) + call_func(' (fail at s3_acl)') + + return s3acl_decorator + + +def _gen_test_headers(owner, grants=[], resource='container'): + if not grants: + grants = [Grant(User('test:tester'), 'FULL_CONTROL')] + return encode_acl(resource, ACL(owner, grants)) + + +def _make_xml(grantee): + owner = 'test:tester' + permission = 'READ' + elem = Element('AccessControlPolicy') + elem_owner = SubElement(elem, 'Owner') + SubElement(elem_owner, 'ID').text = owner + SubElement(elem_owner, 'DisplayName').text = owner + acl_list_elem = SubElement(elem, 'AccessControlList') + elem_grant = SubElement(acl_list_elem, 'Grant') + elem_grant.append(grantee) + SubElement(elem_grant, 'Permission').text = permission + return tostring(elem) + + +def generate_s3acl_environ(account, swift, owner): + + def gen_grant(permission): + # generate Grant with a grantee named by "permission" + account_name = '%s:%s' % (account, permission.lower()) + return Grant(User(account_name), permission) + + grants = map(gen_grant, PERMISSIONS) + container_headers = _gen_test_headers(owner, grants) + object_headers = _gen_test_headers(owner, grants, 'object') + object_body = 'hello' + object_headers['Content-Length'] = len(object_body) + + # TEST method is used to resolve a tenant name + swift.register('TEST', '/v1/AUTH_test', swob.HTTPMethodNotAllowed, + {}, None) + swift.register('TEST', '/v1/AUTH_X', swob.HTTPMethodNotAllowed, + {}, None) + + # for bucket + swift.register('HEAD', '/v1/AUTH_test/bucket', swob.HTTPNoContent, + container_headers, None) + swift.register('HEAD', '/v1/AUTH_test/bucket+segments', swob.HTTPNoContent, + container_headers, None) + swift.register('PUT', '/v1/AUTH_test/bucket', + swob.HTTPCreated, {}, None) + swift.register('GET', '/v1/AUTH_test/bucket', swob.HTTPNoContent, + container_headers, json.dumps([])) + swift.register('POST', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + swift.register('DELETE', '/v1/AUTH_test/bucket', + swob.HTTPNoContent, {}, None) + + # necessary for canned-acl tests + public_headers = _gen_test_headers(owner, [Grant(AllUsers(), 'READ')]) + swift.register('GET', '/v1/AUTH_test/public', swob.HTTPNoContent, + public_headers, json.dumps([])) + authenticated_headers = _gen_test_headers( + owner, [Grant(AuthenticatedUsers(), 'READ')], 'bucket') + swift.register('GET', '/v1/AUTH_test/authenticated', + swob.HTTPNoContent, authenticated_headers, + json.dumps([])) + + # for object + swift.register('HEAD', '/v1/AUTH_test/bucket/object', swob.HTTPOk, + object_headers, None) + + +class TestS3ApiS3Acl(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiS3Acl, self).setUp() + + self.s3api.conf.s3_acl = True + self.swift.s3_acl = True + + account = 'test' + owner_name = '%s:tester' % account + self.default_owner = Owner(owner_name, owner_name) + generate_s3acl_environ(account, self.swift, self.default_owner) + + def tearDown(self): + self.s3api.conf.s3_acl = False + + def test_bucket_acl_PUT_with_other_owner(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=tostring( + ACLPrivate( + Owner(id='test:other', + name='test:other')).elem())) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_object_acl_PUT_xml_error(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body="invalid xml") + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedACLError') + + def test_canned_acl_private(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'private'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_canned_acl_public_read(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'public-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_canned_acl_public_read_write(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'public-read-write'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_canned_acl_authenticated_read(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'authenticated-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_canned_acl_bucket_owner_read(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'bucket-owner-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_canned_acl_bucket_owner_full_control(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'bucket-owner-full-control'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_invalid_canned_acl(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-acl': 'invalid'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def _test_grant_header(self, permission): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-' + permission: + 'id=test:tester'}) + return self.call_s3api(req) + + def test_grant_read(self): + status, headers, body = self._test_grant_header('read') + self.assertEqual(status.split()[0], '200') + + def test_grant_write(self): + status, headers, body = self._test_grant_header('write') + self.assertEqual(status.split()[0], '200') + + def test_grant_read_acp(self): + status, headers, body = self._test_grant_header('read-acp') + self.assertEqual(status.split()[0], '200') + + def test_grant_write_acp(self): + status, headers, body = self._test_grant_header('write-acp') + self.assertEqual(status.split()[0], '200') + + def test_grant_full_control(self): + status, headers, body = self._test_grant_header('full-control') + self.assertEqual(status.split()[0], '200') + + def test_grant_invalid_permission(self): + status, headers, body = self._test_grant_header('invalid') + self.assertEqual(self._get_error_code(body), 'MissingSecurityHeader') + + def test_grant_with_both_header_and_xml(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-full-control': + 'id=test:tester'}, + body=tostring( + ACLPrivate( + Owner(id='test:tester', + name='test:tester')).elem())) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'UnexpectedContent') + + def test_grant_with_both_header_and_canned_acl(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-full-control': + 'id=test:tester', + 'x-amz-acl': 'public-read'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + + def test_grant_email(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-read': 'emailAddress=a@b.c'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_grant_email_xml(self): + grantee = Element('Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'AmazonCustomerByEmail') + SubElement(grantee, 'EmailAddress').text = 'Grantees@email.com' + xml = _make_xml(grantee=grantee) + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_grant_invalid_group_xml(self): + grantee = Element('Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Invalid') + xml = _make_xml(grantee=grantee) + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MalformedACLError') + + def test_grant_authenticated_users(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-read': + 'uri="http://acs.amazonaws.com/groups/' + 'global/AuthenticatedUsers"'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_grant_all_users(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-read': + 'uri="http://acs.amazonaws.com/groups/' + 'global/AllUsers"'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_grant_invalid_uri(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-read': + 'uri="http://localhost/"'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_grant_invalid_uri_xml(self): + grantee = Element('Grantee', nsmap={'xsi': XMLNS_XSI}) + grantee.set('{%s}type' % XMLNS_XSI, 'Group') + SubElement(grantee, 'URI').text = 'invalid' + xml = _make_xml(grantee) + + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}, + body=xml) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_grant_invalid_target(self): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-grant-read': 'key=value'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def _test_bucket_acl_GET(self, account): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + def test_bucket_acl_GET_without_permission(self): + status, headers, body = self._test_bucket_acl_GET('test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_bucket_acl_GET_with_read_acp_permission(self): + status, headers, body = self._test_bucket_acl_GET('test:read_acp') + self.assertEqual(status.split()[0], '200') + + def test_bucket_acl_GET_with_fullcontrol_permission(self): + status, headers, body = self._test_bucket_acl_GET('test:full_control') + self.assertEqual(status.split()[0], '200') + + def test_bucket_acl_GET_with_owner_permission(self): + status, headers, body = self._test_bucket_acl_GET('test:tester') + self.assertEqual(status.split()[0], '200') + + def _test_bucket_acl_PUT(self, account, permission='FULL_CONTROL'): + acl = ACL(self.default_owner, [Grant(User(account), permission)]) + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}, + body=tostring(acl.elem())) + + return self.call_s3api(req) + + def test_bucket_acl_PUT_without_permission(self): + status, headers, body = self._test_bucket_acl_PUT('test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_bucket_acl_PUT_with_write_acp_permission(self): + status, headers, body = self._test_bucket_acl_PUT('test:write_acp') + self.assertEqual(status.split()[0], '200') + + def test_bucket_acl_PUT_with_fullcontrol_permission(self): + status, headers, body = self._test_bucket_acl_PUT('test:full_control') + self.assertEqual(status.split()[0], '200') + + def test_bucket_acl_PUT_with_owner_permission(self): + status, headers, body = self._test_bucket_acl_PUT('test:tester') + self.assertEqual(status.split()[0], '200') + + def _test_object_acl_GET(self, account): + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + def test_object_acl_GET_without_permission(self): + status, headers, body = self._test_object_acl_GET('test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_object_acl_GET_with_read_acp_permission(self): + status, headers, body = self._test_object_acl_GET('test:read_acp') + self.assertEqual(status.split()[0], '200') + + def test_object_acl_GET_with_fullcontrol_permission(self): + status, headers, body = self._test_object_acl_GET('test:full_control') + self.assertEqual(status.split()[0], '200') + + def test_object_acl_GET_with_owner_permission(self): + status, headers, body = self._test_object_acl_GET('test:tester') + self.assertEqual(status.split()[0], '200') + + def _test_object_acl_PUT(self, account, permission='FULL_CONTROL'): + acl = ACL(self.default_owner, [Grant(User(account), permission)]) + req = Request.blank('/bucket/object?acl', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS %s:hmac' % account, + 'Date': self.get_date_header()}, + body=tostring(acl.elem())) + + return self.call_s3api(req) + + def test_object_acl_PUT_without_permission(self): + status, headers, body = self._test_object_acl_PUT('test:other') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_object_acl_PUT_with_write_acp_permission(self): + status, headers, body = self._test_object_acl_PUT('test:write_acp') + self.assertEqual(status.split()[0], '200') + + def test_object_acl_PUT_with_fullcontrol_permission(self): + status, headers, body = self._test_object_acl_PUT('test:full_control') + self.assertEqual(status.split()[0], '200') + + def test_object_acl_PUT_with_owner_permission(self): + status, headers, body = self._test_object_acl_PUT('test:tester') + self.assertEqual(status.split()[0], '200') + + def test_s3acl_decorator(self): + @s3acl + def non_class_s3acl_error(): + raise TypeError() + + class FakeClass(object): + def __init__(self): + self.s3api = MagicMock() + self.swift = FakeSwift() + + @s3acl + def s3acl_error(self): + raise TypeError() + + @s3acl + def s3acl_assert_fail(self): + assert False + + @s3acl(s3acl_only=True) + def s3acl_s3only_error(self): + if self.s3api.conf.s3_acl: + raise TypeError() + + @s3acl(s3acl_only=True) + def s3acl_s3only_no_error(self): + if not self.s3api.conf.s3_acl: + raise TypeError() + + fake_class = FakeClass() + + self.assertRaises(NotMethodException, non_class_s3acl_error) + self.assertRaises(TypeError, fake_class.s3acl_error) + self.assertRaises(AssertionError, fake_class.s3acl_assert_fail) + self.assertRaises(TypeError, fake_class.s3acl_s3only_error) + self.assertIsNone(fake_class.s3acl_s3only_no_error()) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_s3api.py swift-2.18.0/test/unit/common/middleware/s3api/test_s3api.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_s3api.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_s3api.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,1049 @@ +# Copyright (c) 2011-2014 OpenStack Foundation. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from mock import patch, MagicMock +from datetime import datetime +import hashlib +import mock +import requests +import json +import copy +from urllib import unquote, quote + +import swift.common.middleware.s3api +from swift.common.middleware.keystoneauth import KeystoneAuth +from swift.common import swob, utils +from swift.common.swob import Request + +from keystonemiddleware.auth_token import AuthProtocol +from keystoneauth1.access import AccessInfoV2 + +from test.unit.common.middleware.s3api import S3ApiTestCase +from test.unit.common.middleware.s3api.helpers import FakeSwift +from test.unit.common.middleware.s3api.test_s3token import \ + GOOD_RESPONSE_V2, GOOD_RESPONSE_V3 +from swift.common.middleware.s3api.s3request import SigV4Request, S3Request +from swift.common.middleware.s3api.etree import fromstring +from swift.common.middleware.s3api.s3api import filter_factory, \ + S3ApiMiddleware +from swift.common.middleware.s3api.s3token import S3Token + + +class TestS3ApiMiddleware(S3ApiTestCase): + def setUp(self): + super(TestS3ApiMiddleware, self).setUp() + + self.swift.register('GET', '/something', swob.HTTPOk, {}, 'FAKE APP') + + def test_non_s3_request_passthrough(self): + req = Request.blank('/something') + status, headers, body = self.call_s3api(req) + self.assertEqual(body, 'FAKE APP') + + def test_bad_format_authorization(self): + req = Request.blank('/something', + headers={'Authorization': 'hoge', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_bad_method(self): + req = Request.blank('/', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MethodNotAllowed') + + def test_bad_method_but_method_exists_in_controller(self): + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': '_delete_segments_bucket'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'MethodNotAllowed') + + def test_path_info_encode(self): + bucket_name = 'b%75cket' + object_name = 'ob%6aect:1' + self.swift.register('GET', '/v1/AUTH_test/bucket/object:1', + swob.HTTPOk, {}, None) + req = Request.blank('/%s/%s' % (bucket_name, object_name), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + raw_path_info = "/%s/%s" % (bucket_name, object_name) + path_info = req.environ['PATH_INFO'] + self.assertEqual(path_info, unquote(raw_path_info)) + self.assertEqual(req.path, quote(path_info)) + + def test_canonical_string_v2(self): + """ + The hashes here were generated by running the same requests against + boto.utils.canonical_string + """ + def canonical_string(path, headers): + if '?' in path: + path, query_string = path.split('?', 1) + else: + query_string = '' + env = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': path, + 'QUERY_STRING': query_string, + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + } + for header, value in headers.items(): + header = 'HTTP_' + header.replace('-', '_').upper() + if header in ('HTTP_CONTENT_TYPE', 'HTTP_CONTENT_LENGTH'): + header = header[5:] + env[header] = value + + with patch('swift.common.middleware.s3api.s3request.' + 'S3Request._validate_headers'): + req = S3Request(env) + return req.environ['s3api.auth_details']['string_to_sign'] + + def verify(hash, path, headers): + s = canonical_string(path, headers) + self.assertEqual(hash, hashlib.md5(s).hexdigest()) + + verify('6dd08c75e42190a1ce9468d1fd2eb787', '/bucket/object', + {'Content-Type': 'text/plain', 'X-Amz-Something': 'test', + 'Date': 'whatever'}) + + verify('c8447135da232ae7517328f3429df481', '/bucket/object', + {'Content-Type': 'text/plain', 'X-Amz-Something': 'test'}) + + verify('bf49304103a4de5c325dce6384f2a4a2', '/bucket/object', + {'content-type': 'text/plain'}) + + verify('be01bd15d8d47f9fe5e2d9248cc6f180', '/bucket/object', {}) + + verify('e9ec7dca45eef3e2c7276af23135e896', '/bucket/object', + {'Content-MD5': 'somestuff'}) + + verify('a822deb31213ad09af37b5a7fe59e55e', '/bucket/object?acl', {}) + + verify('cce5dd1016595cb706c93f28d3eaa18f', '/bucket/object', + {'Content-Type': 'text/plain', 'X-Amz-A': 'test', + 'X-Amz-Z': 'whatever', 'X-Amz-B': 'lalala', + 'X-Amz-Y': 'lalalalalalala'}) + + verify('7506d97002c7d2de922cc0ec34af8846', '/bucket/object', + {'Content-Type': None, 'X-Amz-Something': 'test'}) + + verify('28f76d6162444a193b612cd6cb20e0be', '/bucket/object', + {'Content-Type': None, + 'X-Amz-Date': 'Mon, 11 Jul 2011 10:52:57 +0000', + 'Date': 'Tue, 12 Jul 2011 10:52:57 +0000'}) + + verify('ed6971e3eca5af4ee361f05d7c272e49', '/bucket/object', + {'Content-Type': None, + 'Date': 'Tue, 12 Jul 2011 10:52:57 +0000'}) + + verify('41ecd87e7329c33fea27826c1c9a6f91', '/bucket/object?cors', {}) + + verify('d91b062f375d8fab407d6dab41fd154e', '/bucket/object?tagging', + {}) + + verify('ebab878a96814b30eb178e27efb3973f', '/bucket/object?restore', + {}) + + verify('f6bf1b2d92b054350d3679d28739fc69', '/bucket/object?' + 'response-cache-control&response-content-disposition&' + 'response-content-encoding&response-content-language&' + 'response-content-type&response-expires', {}) + + str1 = canonical_string('/', headers={'Content-Type': None, + 'X-Amz-Something': 'test'}) + str2 = canonical_string('/', headers={'Content-Type': '', + 'X-Amz-Something': 'test'}) + str3 = canonical_string('/', headers={'X-Amz-Something': 'test'}) + + self.assertEqual(str1, str2) + self.assertEqual(str2, str3) + + # Note that boto does not do proper stripping (as of 2.42.0). + # These were determined by examining the StringToSignBytes element of + # resulting SignatureDoesNotMatch errors from AWS. + str1 = canonical_string('/', {'Content-Type': 'text/plain', + 'Content-MD5': '##'}) + str2 = canonical_string('/', {'Content-Type': '\x01\x02text/plain', + 'Content-MD5': '\x1f ##'}) + str3 = canonical_string('/', {'Content-Type': 'text/plain \x10', + 'Content-MD5': '##\x18'}) + + self.assertEqual(str1, str2) + self.assertEqual(str2, str3) + + def test_signed_urls_expired(self): + expire = '1000000000' + req = Request.blank('/bucket/object?Signature=X&Expires=%s&' + 'AWSAccessKeyId=test:tester' % expire, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Date': self.get_date_header()}) + req.headers['Date'] = datetime.utcnow() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls(self): + # Set expire to last 32b timestamp value + # This number can't be higher, because it breaks tests on 32b systems + expire = '2147483647' # 19 Jan 2038 03:14:07 + utc_date = datetime.utcnow() + req = Request.blank('/bucket/object?Signature=X&Expires=%s&' + 'AWSAccessKeyId=test:tester&Timestamp=%s' % + (expire, utc_date.isoformat().rsplit('.')[0]), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Date': self.get_date_header()}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + for _, _, headers in self.swift.calls_with_headers: + self.assertEqual(headers['Authorization'], 'AWS test:tester:X') + + def test_signed_urls_no_timestamp(self): + expire = '2147483647' # 19 Jan 2038 03:14:07 + req = Request.blank('/bucket/object?Signature=X&Expires=%s&' + 'AWSAccessKeyId=test:tester' % expire, + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + # Curious! But actually S3 doesn't verify any x-amz-date/date headers + # for signed_url access and it also doesn't check timestamp + self.assertEqual(status.split()[0], '200') + for _, _, headers in self.swift.calls_with_headers: + self.assertEqual(headers['Authorization'], 'AWS test:tester:X') + + def test_signed_urls_invalid_expire(self): + expire = 'invalid' + req = Request.blank('/bucket/object?Signature=X&Expires=%s&' + 'AWSAccessKeyId=test:tester' % expire, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Date': self.get_date_header()}) + req.headers['Date'] = datetime.utcnow() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls_no_sign(self): + expire = '2147483647' # 19 Jan 2038 03:14:07 + req = Request.blank('/bucket/object?Expires=%s&' + 'AWSAccessKeyId=test:tester' % expire, + environ={'REQUEST_METHOD': 'GET'}, + headers={'Date': self.get_date_header()}) + req.headers['Date'] = datetime.utcnow() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls_no_access(self): + expire = '2147483647' # 19 Jan 2038 03:14:07 + req = Request.blank('/bucket/object?Expires=%s&' + 'AWSAccessKeyId=' % expire, + environ={'REQUEST_METHOD': 'GET'}) + req.headers['Date'] = datetime.utcnow() + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls_v4(self): + req = Request.blank( + '/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test:tester/20T20Z/US/s3/aws4_request' + '&X-Amz-Date=%s' + '&X-Amz-Expires=1000' + '&X-Amz-SignedHeaders=host' + '&X-Amz-Signature=X' % + self.get_v4_amz_date_header(), + headers={'Date': self.get_date_header()}, + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200', body) + for _, _, headers in self.swift.calls_with_headers: + self.assertEqual('AWS test:tester:X', headers['Authorization']) + self.assertIn('X-Auth-Token', headers) + + def test_signed_urls_v4_missing_x_amz_date(self): + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_request' + '&X-Amz-Expires=1000' + '&X-Amz-SignedHeaders=host' + '&X-Amz-Signature=X', + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls_v4_invalid_algorithm(self): + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=FAKE' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_request' + '&X-Amz-Date=%s' + '&X-Amz-Expires=1000' + '&X-Amz-SignedHeaders=host' + '&X-Amz-Signature=X' % + self.get_v4_amz_date_header(), + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_signed_urls_v4_missing_signed_headers(self): + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_request' + '&X-Amz-Date=%s' + '&X-Amz-Expires=1000' + '&X-Amz-Signature=X' % + self.get_v4_amz_date_header(), + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), + 'AuthorizationHeaderMalformed') + + def test_signed_urls_v4_invalid_credentials(self): + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test' + '&X-Amz-Date=%s' + '&X-Amz-Expires=1000' + '&X-Amz-SignedHeaders=host' + '&X-Amz-Signature=X' % + self.get_v4_amz_date_header(), + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signed_urls_v4_missing_signature(self): + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_request' + '&X-Amz-Date=%s' + '&X-Amz-Expires=1000' + '&X-Amz-SignedHeaders=host' % + self.get_v4_amz_date_header(), + environ={'REQUEST_METHOD': 'GET'}) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_bucket_virtual_hosted_style(self): + req = Request.blank('/', + environ={'HTTP_HOST': 'bucket.localhost:80', + 'REQUEST_METHOD': 'HEAD', + 'HTTP_AUTHORIZATION': + 'AWS test:tester:hmac'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_object_virtual_hosted_style(self): + req = Request.blank('/object', + environ={'HTTP_HOST': 'bucket.localhost:80', + 'REQUEST_METHOD': 'HEAD', + 'HTTP_AUTHORIZATION': + 'AWS test:tester:hmac'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + def test_token_generation(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket+segments/' + 'object/123456789abcdef', + swob.HTTPOk, {}, None) + self.swift.register('PUT', '/v1/AUTH_test/bucket+segments/' + 'object/123456789abcdef/1', + swob.HTTPCreated, {}, None) + req = Request.blank('/bucket/object?uploadId=123456789abcdef' + '&partNumber=1', + environ={'REQUEST_METHOD': 'PUT'}) + req.headers['Authorization'] = 'AWS test:tester:hmac' + date_header = self.get_date_header() + req.headers['Date'] = date_header + with mock.patch('swift.common.middleware.s3api.s3request.' + 'S3Request.check_signature') as mock_cs: + status, headers, body = self.call_s3api(req) + _, _, headers = self.swift.calls_with_headers[-1] + self.assertEqual(req.environ['s3api.auth_details'], { + 'access_key': 'test:tester', + 'signature': 'hmac', + 'string_to_sign': '\n'.join([ + 'PUT', '', '', date_header, + '/bucket/object?partNumber=1&uploadId=123456789abcdef']), + 'check_signature': mock_cs}) + + def test_invalid_uri(self): + req = Request.blank('/bucket/invalid\xffname', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidURI') + + def test_object_create_bad_md5_unreadable(self): + req = Request.blank('/bucket/object', + environ={'REQUEST_METHOD': 'PUT', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'HTTP_CONTENT_MD5': '#'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidDigest') + + def test_object_create_bad_md5_too_short(self): + too_short_digest = hashlib.md5('hey').hexdigest()[:-1] + md5_str = too_short_digest.encode('base64').strip() + req = Request.blank( + '/bucket/object', + environ={'REQUEST_METHOD': 'PUT', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'HTTP_CONTENT_MD5': md5_str}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidDigest') + + def test_object_create_bad_md5_too_long(self): + too_long_digest = hashlib.md5('hey').hexdigest() + 'suffix' + md5_str = too_long_digest.encode('base64').strip() + req = Request.blank( + '/bucket/object', + environ={'REQUEST_METHOD': 'PUT', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'HTTP_CONTENT_MD5': md5_str}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidDigest') + + def test_invalid_metadata_directive(self): + req = Request.blank('/', + environ={'REQUEST_METHOD': 'GET', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'HTTP_X_AMZ_METADATA_DIRECTIVE': + 'invalid'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidArgument') + + def test_invalid_storage_class(self): + req = Request.blank('/', + environ={'REQUEST_METHOD': 'GET', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z', + 'HTTP_X_AMZ_STORAGE_CLASS': 'INVALID'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'InvalidStorageClass') + + def _test_unsupported_header(self, header): + req = Request.blank('/error', + environ={'REQUEST_METHOD': 'GET', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z'}, + headers={'x-amz-' + header: 'value', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_mfa(self): + self._test_unsupported_header('mfa') + + def test_server_side_encryption(self): + self._test_unsupported_header('server-side-encryption') + + def test_website_redirect_location(self): + self._test_unsupported_header('website-redirect-location') + + def _test_unsupported_resource(self, resource): + req = Request.blank('/error?' + resource, + environ={'REQUEST_METHOD': 'GET', + 'HTTP_AUTHORIZATION': 'AWS X:Y:Z'}, + headers={'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_notification(self): + self._test_unsupported_resource('notification') + + def test_policy(self): + self._test_unsupported_resource('policy') + + def test_request_payment(self): + self._test_unsupported_resource('requestPayment') + + def test_torrent(self): + self._test_unsupported_resource('torrent') + + def test_website(self): + self._test_unsupported_resource('website') + + def test_cors(self): + self._test_unsupported_resource('cors') + + def test_tagging(self): + self._test_unsupported_resource('tagging') + + def test_restore(self): + self._test_unsupported_resource('restore') + + def test_unsupported_method(self): + req = Request.blank('/bucket?acl', + environ={'REQUEST_METHOD': 'POST'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + elem = fromstring(body, 'Error') + self.assertEqual(elem.find('./Code').text, 'MethodNotAllowed') + self.assertEqual(elem.find('./Method').text, 'POST') + self.assertEqual(elem.find('./ResourceType').text, 'ACL') + + def test_registered_defaults(self): + filter_factory(self.conf) + swift_info = utils.get_swift_info() + self.assertTrue('s3api' in swift_info) + self.assertEqual(swift_info['s3api'].get('max_bucket_listing'), + self.conf.max_bucket_listing) + self.assertEqual(swift_info['s3api'].get('max_parts_listing'), + self.conf.max_parts_listing) + self.assertEqual(swift_info['s3api'].get('max_upload_part_num'), + self.conf.max_upload_part_num) + self.assertEqual(swift_info['s3api'].get('max_multi_delete_objects'), + self.conf.max_multi_delete_objects) + + def test_check_pipeline(self): + with patch("swift.common.middleware.s3api.s3api.loadcontext"), \ + patch("swift.common.middleware.s3api.s3api.PipelineWrapper") \ + as pipeline: + self.conf.auth_pipeline_check = True + self.conf.__file__ = '' + + pipeline.return_value = 's3api tempauth proxy-server' + self.s3api.check_pipeline(self.conf) + + # This *should* still work; authtoken will remove our auth details, + # but the X-Auth-Token we drop in will remain + # if we found one in the response + pipeline.return_value = 's3api s3token authtoken keystoneauth ' \ + 'proxy-server' + self.s3api.check_pipeline(self.conf) + + # This should work now; no more doubled-up requests to keystone! + pipeline.return_value = 's3api s3token keystoneauth proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api swauth proxy-server' + self.s3api.check_pipeline(self.conf) + + # Note that authtoken would need to have delay_auth_decision=True + pipeline.return_value = 's3api authtoken s3token keystoneauth ' \ + 'proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api proxy-server' + with self.assertRaises(ValueError) as cm: + self.s3api.check_pipeline(self.conf) + self.assertIn('expected auth between s3api and proxy-server', + cm.exception.message) + + pipeline.return_value = 'proxy-server' + with self.assertRaises(ValueError) as cm: + self.s3api.check_pipeline(self.conf) + self.assertIn("missing filters ['s3api']", + cm.exception.message) + + def test_s3api_initialization_with_disabled_pipeline_check(self): + with patch("swift.common.middleware.s3api.s3api.loadcontext"), \ + patch("swift.common.middleware.s3api.s3api.PipelineWrapper") \ + as pipeline: + # Disable pipeline check + self.conf.auth_pipeline_check = False + self.conf.__file__ = '' + + pipeline.return_value = 's3api tempauth proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api s3token authtoken keystoneauth ' \ + 'proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api swauth proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api authtoken s3token keystoneauth ' \ + 'proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 's3api proxy-server' + self.s3api.check_pipeline(self.conf) + + pipeline.return_value = 'proxy-server' + with self.assertRaises(ValueError): + self.s3api.check_pipeline(self.conf) + + def test_signature_v4(self): + environ = { + 'REQUEST_METHOD': 'GET'} + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test:tester/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-date,' + 'Signature=X', + 'X-Amz-Date': self.get_v4_amz_date_header(), + 'X-Amz-Content-SHA256': '0123456789'} + req = Request.blank('/bucket/object', environ=environ, headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200', body) + for _, _, headers in self.swift.calls_with_headers: + self.assertEqual('AWS test:tester:X', headers['Authorization']) + self.assertIn('X-Auth-Token', headers) + + def test_signature_v4_no_date(self): + environ = { + 'REQUEST_METHOD': 'GET'} + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test:tester/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;range;x-amz-date,' + 'Signature=X', + 'X-Amz-Content-SHA256': '0123456789'} + req = Request.blank('/bucket/object', environ=environ, headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(self._get_error_code(body), 'AccessDenied') + + def test_signature_v4_no_payload(self): + environ = { + 'REQUEST_METHOD': 'GET'} + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test:tester/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-date,' + 'Signature=X', + 'X-Amz-Date': self.get_v4_amz_date_header()} + req = Request.blank('/bucket/object', environ=environ, headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '400') + self.assertEqual(self._get_error_code(body), 'InvalidRequest') + self.assertEqual( + self._get_error_message(body), + 'Missing required header for this request: x-amz-content-sha256') + + def test_signature_v4_bad_authorization_string(self): + def test(auth_str, error, msg): + environ = { + 'REQUEST_METHOD': 'GET'} + headers = { + 'Authorization': auth_str, + 'X-Amz-Date': self.get_v4_amz_date_header(), + 'X-Amz-Content-SHA256': '0123456789'} + req = Request.blank('/bucket/object', environ=environ, + headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), error) + self.assertEqual(self._get_error_message(body), msg) + + auth_str = ('AWS4-HMAC-SHA256 ' + 'SignedHeaders=host;x-amz-date,' + 'Signature=X') + test(auth_str, 'AccessDenied', 'Access Denied.') + + auth_str = ('AWS4-HMAC-SHA256 ' + 'Credential=test:tester/20130524/US/s3/aws4_request, ' + 'Signature=X') + test(auth_str, 'AuthorizationHeaderMalformed', + 'The authorization header is malformed; the authorization ' + 'header requires three components: Credential, SignedHeaders, ' + 'and Signature.') + + auth_str = ('AWS4-HMAC-SHA256 ' + 'Credential=test:tester/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-date') + test(auth_str, 'AccessDenied', 'Access Denied.') + + def test_canonical_string_v4(self): + def _get_req(path, environ): + if '?' in path: + path, query_string = path.split('?', 1) + else: + query_string = '' + + env = { + 'REQUEST_METHOD': 'GET', + 'PATH_INFO': path, + 'QUERY_STRING': query_string, + 'HTTP_DATE': 'Mon, 09 Sep 2011 23:36:00 GMT', + 'HTTP_X_AMZ_CONTENT_SHA256': + 'e3b0c44298fc1c149afbf4c8996fb924' + '27ae41e4649b934ca495991b7852b855', + 'HTTP_AUTHORIZATION': + 'AWS4-HMAC-SHA256 ' + 'Credential=X:Y/dt/reg/host/blah, ' + 'SignedHeaders=content-md5;content-type;date, ' + 'Signature=x', + } + env.update(environ) + with patch('swift.common.middleware.s3api.s3request.' + 'S3Request._validate_headers'): + req = SigV4Request(env, location=self.conf.location) + return req + + def canonical_string(path, environ): + return _get_req(path, environ)._canonical_request() + + def verify(hash_val, path, environ): + # See http://docs.aws.amazon.com/general/latest/gr + # /signature-v4-test-suite.html for where location, service, and + # signing key came from + with patch.object(self.conf, 'location', 'us-east-1'), \ + patch.object(swift.common.middleware.s3api.s3request, + 'SERVICE', 'host'): + req = _get_req(path, environ) + hash_in_sts = req._string_to_sign().split('\n')[3] + self.assertEqual(hash_val, hash_in_sts) + self.assertTrue(req.check_signature( + 'wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY')) + + # all next data got from aws4_testsuite from Amazon + # http://docs.aws.amazon.com/general/latest/gr/samples + # /aws4_testsuite.zip + # Each *expected* hash value is the 4th line in .sts in the + # test suite. + + # get-vanilla + env = { + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host, ' + 'Signature=b27ccfbfa7df52a200ff74193ca6e32d' + '4b48b8856fab7ebf1c595d0670a7e470'), + 'HTTP_HOST': 'host.foo.com'} + verify('366b91fb121d72a00f46bbe8d395f53a' + '102b06dfb7e79636515208ed3fa606b1', + '/', env) + + # get-header-value-trim + env = { + 'REQUEST_METHOD': 'POST', + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host;p, ' + 'Signature=debf546796015d6f6ded8626f5ce9859' + '7c33b47b9164cf6b17b4642036fcb592'), + 'HTTP_HOST': 'host.foo.com', + 'HTTP_P': 'phfft'} + verify('dddd1902add08da1ac94782b05f9278c' + '08dc7468db178a84f8950d93b30b1f35', + '/', env) + + # get-utf8 (not exact) + env = { + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host, ' + 'Signature=8d6634c189aa8c75c2e51e106b6b5121' + 'bed103fdb351f7d7d4381c738823af74'), + 'HTTP_HOST': 'host.foo.com', + 'RAW_PATH_INFO': '/%E1%88%B4'} + + # This might look weird because actually S3 doesn't care about utf-8 + # encoded multi-byte bucket name from bucket-in-host name constraint. + # However, aws4_testsuite has only a sample hash with utf-8 *bucket* + # name to make sure the correctness (probably it can be used in other + # aws resource except s3) so, to test also utf-8, skip the bucket name + # validation in the following test. + + # NOTE: eventlet's PATH_INFO is unquoted + with patch('swift.common.middleware.s3api.s3request.' + 'validate_bucket_name'): + verify('27ba31df5dbc6e063d8f87d62eb07143' + 'f7f271c5330a917840586ac1c85b6f6b', + unquote('/%E1%88%B4'), env) + + # get-vanilla-query-order-key + env = { + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host, ' + 'Signature=0dc122f3b28b831ab48ba65cb47300de' + '53fbe91b577fe113edac383730254a3b'), + 'HTTP_HOST': 'host.foo.com'} + verify('2f23d14fe13caebf6dfda346285c6d9c' + '14f49eaca8f5ec55c627dd7404f7a727', + '/?a=foo&b=foo', env) + + # post-header-value-case + env = { + 'REQUEST_METHOD': 'POST', + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host;zoo, ' + 'Signature=273313af9d0c265c531e11db70bbd653' + 'f3ba074c1009239e8559d3987039cad7'), + 'HTTP_HOST': 'host.foo.com', + 'HTTP_ZOO': 'ZOOBAR'} + verify('3aae6d8274b8c03e2cc96fc7d6bda4b9' + 'bd7a0a184309344470b2c96953e124aa', + '/', env) + + # post-x-www-form-urlencoded-parameters + env = { + 'REQUEST_METHOD': 'POST', + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host;content-type, ' + 'Signature=b105eb10c6d318d2294de9d49dd8b031' + 'b55e3c3fe139f2e637da70511e9e7b71'), + 'HTTP_HOST': 'host.foo.com', + 'HTTP_X_AMZ_CONTENT_SHA256': + '3ba8907e7a252327488df390ed517c45' + 'b96dead033600219bdca7107d1d3f88a', + 'CONTENT_TYPE': + 'application/x-www-form-urlencoded; charset=utf8'} + verify('c4115f9e54b5cecf192b1eaa23b8e88e' + 'd8dc5391bd4fde7b3fff3d9c9fe0af1f', + '/', env) + + # post-x-www-form-urlencoded + env = { + 'REQUEST_METHOD': 'POST', + 'HTTP_AUTHORIZATION': ( + 'AWS4-HMAC-SHA256 ' + 'Credential=AKIDEXAMPLE/20110909/us-east-1/host/aws4_request, ' + 'SignedHeaders=date;host;content-type, ' + 'Signature=5a15b22cf462f047318703b92e6f4f38' + '884e4a7ab7b1d6426ca46a8bd1c26cbc'), + 'HTTP_HOST': 'host.foo.com', + 'HTTP_X_AMZ_CONTENT_SHA256': + '3ba8907e7a252327488df390ed517c45' + 'b96dead033600219bdca7107d1d3f88a', + 'CONTENT_TYPE': + 'application/x-www-form-urlencoded'} + verify('4c5c6e4b52fb5fb947a8733982a8a5a6' + '1b14f04345cbfe6e739236c76dd48f74', + '/', env) + + # Note that boto does not do proper stripping (as of 2.42.0). + # These were determined by examining the StringToSignBytes element of + # resulting SignatureDoesNotMatch errors from AWS. + str1 = canonical_string('/', {'CONTENT_TYPE': 'text/plain', + 'HTTP_CONTENT_MD5': '##'}) + str2 = canonical_string('/', {'CONTENT_TYPE': '\x01\x02text/plain', + 'HTTP_CONTENT_MD5': '\x1f ##'}) + str3 = canonical_string('/', {'CONTENT_TYPE': 'text/plain \x10', + 'HTTP_CONTENT_MD5': '##\x18'}) + + self.assertEqual(str1, str2) + self.assertEqual(str2, str3) + + def test_mixture_param_v4(self): + # now we have an Authorization header + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request_A, ' + 'SignedHeaders=hostA;rangeA;x-amz-dateA,' + 'Signature=X', + 'X-Amz-Date': self.get_v4_amz_date_header(), + 'X-Amz-Content-SHA256': '0123456789'} + + # and then, different auth info (Credential, SignedHeaders, Signature) + # in query + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_requestB' + '&X-Amz-SignedHeaders=hostB' + '&X-Amz-Signature=Y', + environ={'REQUEST_METHOD': 'GET'}, + headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + # FIXME: should this failed as 400 or pass via query auth? + # for now, 403 forbidden for safety + self.assertEqual(status.split()[0], '403', body) + + # But if we are missing Signature in query param + req = Request.blank('/bucket/object' + '?X-Amz-Algorithm=AWS4-HMAC-SHA256' + '&X-Amz-Credential=test/20T20Z/US/s3/aws4_requestB' + '&X-Amz-SignedHeaders=hostB', + environ={'REQUEST_METHOD': 'GET'}, + headers=headers) + req.content_type = 'text/plain' + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403', body) + + def test_s3api_with_only_s3_token(self): + self.swift = FakeSwift() + self.keystone_auth = KeystoneAuth( + self.swift, {'operator_roles': 'swift-user'}) + self.s3_token = S3Token( + self.keystone_auth, {'auth_uri': 'https://fakehost/identity'}) + self.s3api = S3ApiMiddleware(self.s3_token, self.conf) + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS access:signature', + 'Date': self.get_date_header()}) + self.swift.register('PUT', '/v1/AUTH_TENANT_ID/bucket', + swob.HTTPCreated, {}, None) + self.swift.register('HEAD', '/v1/AUTH_TENANT_ID', + swob.HTTPOk, {}, None) + with patch.object(self.s3_token, '_json_request') as mock_req: + mock_resp = requests.Response() + mock_resp._content = json.dumps(GOOD_RESPONSE_V2) + mock_resp.status_code = 201 + mock_req.return_value = mock_resp + + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(1, mock_req.call_count) + + def test_s3api_with_only_s3_token_v3(self): + self.swift = FakeSwift() + self.keystone_auth = KeystoneAuth( + self.swift, {'operator_roles': 'swift-user'}) + self.s3_token = S3Token( + self.keystone_auth, {'auth_uri': 'https://fakehost/identity'}) + self.s3api = S3ApiMiddleware(self.s3_token, self.conf) + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS access:signature', + 'Date': self.get_date_header()}) + self.swift.register('PUT', '/v1/AUTH_PROJECT_ID/bucket', + swob.HTTPCreated, {}, None) + self.swift.register('HEAD', '/v1/AUTH_PROJECT_ID', + swob.HTTPOk, {}, None) + with patch.object(self.s3_token, '_json_request') as mock_req: + mock_resp = requests.Response() + mock_resp._content = json.dumps(GOOD_RESPONSE_V3) + mock_resp.status_code = 200 + mock_req.return_value = mock_resp + + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(1, mock_req.call_count) + + def test_s3api_with_s3_token_and_auth_token(self): + self.swift = FakeSwift() + self.keystone_auth = KeystoneAuth( + self.swift, {'operator_roles': 'swift-user'}) + self.auth_token = AuthProtocol( + self.keystone_auth, {'delay_auth_decision': 'True'}) + self.s3_token = S3Token( + self.auth_token, {'auth_uri': 'https://fakehost/identity'}) + self.s3api = S3ApiMiddleware(self.s3_token, self.conf) + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS access:signature', + 'Date': self.get_date_header()}) + self.swift.register('PUT', '/v1/AUTH_TENANT_ID/bucket', + swob.HTTPCreated, {}, None) + self.swift.register('HEAD', '/v1/AUTH_TENANT_ID', + swob.HTTPOk, {}, None) + with patch.object(self.s3_token, '_json_request') as mock_req: + with patch.object(self.auth_token, + '_do_fetch_token') as mock_fetch: + mock_resp = requests.Response() + mock_resp._content = json.dumps(GOOD_RESPONSE_V2) + mock_resp.status_code = 201 + mock_req.return_value = mock_resp + + mock_access_info = AccessInfoV2(GOOD_RESPONSE_V2) + mock_access_info.will_expire_soon = \ + lambda stale_duration: False + mock_fetch.return_value = (MagicMock(), mock_access_info) + + status, headers, body = self.call_s3api(req) + self.assertEqual(body, '') + self.assertEqual(1, mock_req.call_count) + # With X-Auth-Token, auth_token will call _do_fetch_token to + # connect to keystone in auth_token, again + self.assertEqual(1, mock_fetch.call_count) + + def test_s3api_with_s3_token_no_pass_token_to_auth_token(self): + self.swift = FakeSwift() + self.keystone_auth = KeystoneAuth( + self.swift, {'operator_roles': 'swift-user'}) + self.auth_token = AuthProtocol( + self.keystone_auth, {'delay_auth_decision': 'True'}) + self.s3_token = S3Token( + self.auth_token, {'auth_uri': 'https://fakehost/identity'}) + self.s3api = S3ApiMiddleware(self.s3_token, self.conf) + req = Request.blank( + '/bucket', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS access:signature', + 'Date': self.get_date_header()}) + self.swift.register('PUT', '/v1/AUTH_TENANT_ID/bucket', + swob.HTTPCreated, {}, None) + self.swift.register('HEAD', '/v1/AUTH_TENANT_ID', + swob.HTTPOk, {}, None) + with patch.object(self.s3_token, '_json_request') as mock_req: + with patch.object(self.auth_token, + '_do_fetch_token') as mock_fetch: + mock_resp = requests.Response() + no_token_id_good_resp = copy.deepcopy(GOOD_RESPONSE_V2) + # delete token id + del no_token_id_good_resp['access']['token']['id'] + mock_resp._content = json.dumps(no_token_id_good_resp) + mock_resp.status_code = 201 + mock_req.return_value = mock_resp + + mock_access_info = AccessInfoV2(GOOD_RESPONSE_V2) + mock_access_info.will_expire_soon = \ + lambda stale_duration: False + mock_fetch.return_value = (MagicMock(), mock_access_info) + + status, headers, body = self.call_s3api(req) + # No token provided from keystone result in 401 Unauthorized + # at `swift.common.middleware.keystoneauth` because auth_token + # will remove all auth headers including 'X-Identity-Status'[1] + # and then, set X-Identity-Status: Invalid at [2] + # + # 1: https://github.com/openstack/keystonemiddleware/blob/ + # master/keystonemiddleware/auth_token/__init__.py#L620 + # 2: https://github.com/openstack/keystonemiddleware/blob/ + # master/keystonemiddleware/auth_token/__init__.py#L627-L629 + + self.assertEqual('403 Forbidden', status) + self.assertEqual(1, mock_req.call_count) + # if no token provided from keystone, we can skip the call to + # fetch the token + self.assertEqual(0, mock_fetch.call_count) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_s3request.py swift-2.18.0/test/unit/common/middleware/s3api/test_s3request.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_s3request.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_s3request.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,765 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from mock import patch, MagicMock +import unittest + +from swift.common import swob +from swift.common.swob import Request, HTTPNoContent +from swift.common.middleware.s3api.utils import mktime +from swift.common.middleware.s3api.acl_handlers import get_acl_handler +from swift.common.middleware.s3api.subresource import ACL, User, Owner, \ + Grant, encode_acl +from test.unit.common.middleware.s3api.test_s3api import S3ApiTestCase +from swift.common.middleware.s3api.s3request import S3Request, \ + S3AclRequest, SigV4Request, SIGV4_X_AMZ_DATE_FORMAT +from swift.common.middleware.s3api.s3response import InvalidArgument, \ + NoSuchBucket, InternalError, \ + AccessDenied, SignatureDoesNotMatch, RequestTimeTooSkewed + +from test.unit import DebugLogger + +Fake_ACL_MAP = { + # HEAD Bucket + ('HEAD', 'HEAD', 'container'): + {'Resource': 'container', + 'Permission': 'READ'}, + # GET Bucket + ('GET', 'GET', 'container'): + {'Resource': 'container', + 'Permission': 'READ'}, + # HEAD Object + ('HEAD', 'HEAD', 'object'): + {'Resource': 'object', + 'Permission': 'READ'}, + # GET Object + ('GET', 'GET', 'object'): + {'Resource': 'object', + 'Permission': 'READ'}, +} + + +def _gen_test_acl_header(owner, permission=None, grantee=None, + resource='container'): + if permission is None: + return ACL(owner, []) + + if grantee is None: + grantee = User('test:tester') + return encode_acl(resource, ACL(owner, [Grant(grantee, permission)])) + + +class FakeResponse(object): + def __init__(self, s3_acl): + self.sysmeta_headers = {} + if s3_acl: + owner = Owner(id='test:tester', name='test:tester') + self.sysmeta_headers.update( + _gen_test_acl_header(owner, 'FULL_CONTROL', + resource='container')) + self.sysmeta_headers.update( + _gen_test_acl_header(owner, 'FULL_CONTROL', + resource='object')) + + +class FakeSwiftResponse(object): + def __init__(self): + self.environ = { + 'PATH_INFO': '/v1/AUTH_test', + 'HTTP_X_TENANT_NAME': 'test', + 'HTTP_X_USER_NAME': 'tester', + 'HTTP_X_AUTH_TOKEN': 'token', + } + + +class TestRequest(S3ApiTestCase): + + def setUp(self): + super(TestRequest, self).setUp() + self.s3api.conf.s3_acl = True + self.swift.s3_acl = True + + @patch('swift.common.middleware.s3api.acl_handlers.ACL_MAP', Fake_ACL_MAP) + @patch('swift.common.middleware.s3api.s3request.S3AclRequest.authenticate', + lambda x, y: None) + def _test_get_response(self, method, container='bucket', obj=None, + permission=None, skip_check=False, + req_klass=S3Request, fake_swift_resp=None): + path = '/' + container + ('/' + obj if obj else '') + req = Request.blank(path, + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + if issubclass(req_klass, S3AclRequest): + s3_req = req_klass( + req.environ, MagicMock(), + True, self.conf.storage_domain, + self.conf.location, self.conf.force_swift_request_proxy_log, + self.conf.dns_compliant_bucket_names, + self.conf.allow_multipart_uploads, self.conf.allow_no_owner) + else: + s3_req = req_klass( + req.environ, MagicMock(), + True, self.conf.storage_domain, + self.conf.location, self.conf.force_swift_request_proxy_log, + self.conf.dns_compliant_bucket_names, + self.conf.allow_multipart_uploads, self.conf.allow_no_owner) + s3_req.set_acl_handler( + get_acl_handler(s3_req.controller_name)(s3_req, DebugLogger())) + with patch('swift.common.middleware.s3api.s3request.S3Request.' + '_get_response') as mock_get_resp, \ + patch('swift.common.middleware.s3api.subresource.ACL.' + 'check_permission') as m_check_permission: + mock_get_resp.return_value = fake_swift_resp \ + or FakeResponse(self.conf.s3_acl) + return mock_get_resp, m_check_permission,\ + s3_req.get_response(self.s3api) + + def test_get_response_without_s3_acl(self): + self.s3api.conf.s3_acl = False + self.swift.s3_acl = False + mock_get_resp, m_check_permission, s3_resp = \ + self._test_get_response('HEAD') + self.assertFalse(hasattr(s3_resp, 'bucket_acl')) + self.assertFalse(hasattr(s3_resp, 'object_acl')) + self.assertEqual(mock_get_resp.call_count, 1) + self.assertEqual(m_check_permission.call_count, 0) + + def test_get_response_without_match_ACL_MAP(self): + with self.assertRaises(Exception) as e: + self._test_get_response('POST', req_klass=S3AclRequest) + self.assertEqual(e.exception.message, + 'No permission to be checked exists') + + def test_get_response_without_duplication_HEAD_request(self): + obj = 'object' + mock_get_resp, m_check_permission, s3_resp = \ + self._test_get_response('HEAD', obj=obj, + req_klass=S3AclRequest) + self.assertTrue(s3_resp.bucket_acl is not None) + self.assertTrue(s3_resp.object_acl is not None) + self.assertEqual(mock_get_resp.call_count, 1) + args, kargs = mock_get_resp.call_args_list[0] + get_resp_obj = args[3] + self.assertEqual(get_resp_obj, obj) + self.assertEqual(m_check_permission.call_count, 1) + args, kargs = m_check_permission.call_args + permission = args[1] + self.assertEqual(permission, 'READ') + + def test_get_response_with_check_object_permission(self): + obj = 'object' + mock_get_resp, m_check_permission, s3_resp = \ + self._test_get_response('GET', obj=obj, + req_klass=S3AclRequest) + self.assertTrue(s3_resp.bucket_acl is not None) + self.assertTrue(s3_resp.object_acl is not None) + self.assertEqual(mock_get_resp.call_count, 2) + args, kargs = mock_get_resp.call_args_list[0] + get_resp_obj = args[3] + self.assertEqual(get_resp_obj, obj) + self.assertEqual(m_check_permission.call_count, 1) + args, kargs = m_check_permission.call_args + permission = args[1] + self.assertEqual(permission, 'READ') + + def test_get_response_with_check_container_permission(self): + mock_get_resp, m_check_permission, s3_resp = \ + self._test_get_response('GET', + req_klass=S3AclRequest) + self.assertTrue(s3_resp.bucket_acl is not None) + self.assertTrue(s3_resp.object_acl is not None) + self.assertEqual(mock_get_resp.call_count, 2) + args, kargs = mock_get_resp.call_args_list[0] + get_resp_obj = args[3] + self.assertTrue(get_resp_obj is '') + self.assertEqual(m_check_permission.call_count, 1) + args, kargs = m_check_permission.call_args + permission = args[1] + self.assertEqual(permission, 'READ') + + def test_get_validate_param(self): + def create_s3request_with_param(param, value): + req = Request.blank( + '/bucket?%s=%s' % (param, value), + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + return S3Request(req.environ) + + s3req = create_s3request_with_param('max-keys', '1') + + # a param in the range + self.assertEqual(s3req.get_validated_param('max-keys', 1000, 1000), 1) + self.assertEqual(s3req.get_validated_param('max-keys', 0, 1), 1) + + # a param in the out of the range + self.assertEqual(s3req.get_validated_param('max-keys', 0, 0), 0) + + # a param in the out of the integer range + s3req = create_s3request_with_param('max-keys', '1' * 30) + with self.assertRaises(InvalidArgument) as result: + s3req.get_validated_param('max-keys', 1) + self.assertTrue( + 'not an integer or within integer range' in result.exception.body) + self.assertEqual( + result.exception.headers['content-type'], 'application/xml') + + # a param is negative integer + s3req = create_s3request_with_param('max-keys', '-1') + with self.assertRaises(InvalidArgument) as result: + s3req.get_validated_param('max-keys', 1) + self.assertTrue( + 'must be an integer between 0 and' in result.exception.body) + self.assertEqual( + result.exception.headers['content-type'], 'application/xml') + + # a param is not integer + s3req = create_s3request_with_param('max-keys', 'invalid') + with self.assertRaises(InvalidArgument) as result: + s3req.get_validated_param('max-keys', 1) + self.assertTrue( + 'not an integer or within integer range' in result.exception.body) + self.assertEqual( + result.exception.headers['content-type'], 'application/xml') + + def test_authenticate_delete_Authorization_from_s3req(self): + req = Request.blank('/bucket/obj', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch.object(Request, 'get_response') as m_swift_resp, \ + patch.object(Request, 'remote_user', 'authorized'): + + m_swift_resp.return_value = FakeSwiftResponse() + s3_req = S3AclRequest(req.environ, MagicMock()) + self.assertNotIn('s3api.auth_details', s3_req.environ) + self.assertNotIn('HTTP_AUTHORIZATION', s3_req.environ) + self.assertNotIn('Authorization', s3_req.headers) + self.assertEqual(s3_req.token, 'token') + + def test_to_swift_req_Authorization_not_exist_in_swreq(self): + container = 'bucket' + obj = 'obj' + method = 'GET' + req = Request.blank('/%s/%s' % (container, obj), + environ={'REQUEST_METHOD': method}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch.object(Request, 'get_response') as m_swift_resp, \ + patch.object(Request, 'remote_user', 'authorized'): + + m_swift_resp.return_value = FakeSwiftResponse() + s3_req = S3AclRequest(req.environ, MagicMock()) + sw_req = s3_req.to_swift_req(method, container, obj) + self.assertNotIn('s3api.auth_details', sw_req.environ) + self.assertNotIn('HTTP_AUTHORIZATION', sw_req.environ) + self.assertNotIn('Authorization', sw_req.headers) + self.assertEqual(sw_req.headers['X-Auth-Token'], 'token') + + def test_to_swift_req_subrequest_proxy_access_log(self): + container = 'bucket' + obj = 'obj' + method = 'GET' + + # force_swift_request_proxy_log is True + req = Request.blank('/%s/%s' % (container, obj), + environ={'REQUEST_METHOD': method, + 'swift.proxy_access_log_made': True}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch.object(Request, 'get_response') as m_swift_resp, \ + patch.object(Request, 'remote_user', 'authorized'): + m_swift_resp.return_value = FakeSwiftResponse() + s3_req = S3AclRequest( + req.environ, MagicMock(), force_request_log=True) + sw_req = s3_req.to_swift_req(method, container, obj) + self.assertFalse(sw_req.environ['swift.proxy_access_log_made']) + + # force_swift_request_proxy_log is False + req = Request.blank('/%s/%s' % (container, obj), + environ={'REQUEST_METHOD': method, + 'swift.proxy_access_log_made': True}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + with patch.object(Request, 'get_response') as m_swift_resp, \ + patch.object(Request, 'remote_user', 'authorized'): + m_swift_resp.return_value = FakeSwiftResponse() + s3_req = S3AclRequest( + req.environ, MagicMock(), force_request_log=False) + sw_req = s3_req.to_swift_req(method, container, obj) + self.assertTrue(sw_req.environ['swift.proxy_access_log_made']) + + def test_get_container_info(self): + self.swift.register('HEAD', '/v1/AUTH_test/bucket', HTTPNoContent, + {'x-container-read': 'foo', + 'X-container-object-count': 5, + 'X-container-meta-foo': 'bar'}, None) + req = Request.blank('/bucket', environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + s3_req = S3Request(req.environ) + # first, call get_response('HEAD') + info = s3_req.get_container_info(self.app) + self.assertTrue('status' in info) # sanity + self.assertEqual(204, info['status']) # sanity + self.assertEqual('foo', info['read_acl']) # sanity + self.assertEqual('5', info['object_count']) # sanity + self.assertEqual({'foo': 'bar'}, info['meta']) # sanity + with patch( + 'swift.common.middleware.s3api.s3request.get_container_info', + return_value={'status': 204}) as mock_info: + # Then all calls goes to get_container_info + for x in xrange(10): + info = s3_req.get_container_info(self.swift) + self.assertTrue('status' in info) # sanity + self.assertEqual(204, info['status']) # sanity + self.assertEqual(10, mock_info.call_count) + + expected_errors = [(404, NoSuchBucket), (0, InternalError)] + for status, expected_error in expected_errors: + with patch('swift.common.middleware.s3api.s3request.' + 'get_container_info', + return_value={'status': status}): + self.assertRaises( + expected_error, s3_req.get_container_info, MagicMock()) + + def test_date_header_missing(self): + self.swift.register('HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, + {}, None) + req = Request.blank('/nojunk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac'}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(body, '') + + def test_date_header_expired(self): + self.swift.register('HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, + {}, None) + req = Request.blank('/nojunk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': 'Fri, 01 Apr 2014 12:00:00 GMT'}) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(body, '') + + def test_date_header_with_x_amz_date_valid(self): + self.swift.register('HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, + {}, None) + req = Request.blank('/nojunk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': 'Fri, 01 Apr 2014 12:00:00 GMT', + 'x-amz-date': self.get_date_header()}) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '404') + self.assertEqual(body, '') + + def test_date_header_with_x_amz_date_expired(self): + self.swift.register('HEAD', '/v1/AUTH_test/nojunk', swob.HTTPNotFound, + {}, None) + req = Request.blank('/nojunk', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header(), + 'x-amz-date': + 'Fri, 01 Apr 2014 12:00:00 GMT'}) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '403') + self.assertEqual(body, '') + + def _test_request_timestamp_sigv4(self, date_header): + # signature v4 here + environ = { + 'REQUEST_METHOD': 'GET'} + + if 'X-Amz-Date' in date_header: + included_header = 'x-amz-date' + elif 'Date' in date_header: + included_header = 'date' + else: + self.fail('Invalid date header specified as test') + + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request, ' + 'SignedHeaders=%s,' + 'Signature=X' % ';'.join(sorted(['host', included_header])), + 'X-Amz-Content-SHA256': '0123456789'} + + headers.update(date_header) + req = Request.blank('/', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + + if 'X-Amz-Date' in date_header: + timestamp = mktime( + date_header['X-Amz-Date'], SIGV4_X_AMZ_DATE_FORMAT) + elif 'Date' in date_header: + timestamp = mktime(date_header['Date']) + + self.assertEqual(timestamp, int(sigv4_req.timestamp)) + + def test_request_timestamp_sigv4(self): + access_denied_message = \ + 'AWS authentication requires a valid Date or x-amz-date header' + + # normal X-Amz-Date header + date_header = {'X-Amz-Date': self.get_v4_amz_date_header()} + self._test_request_timestamp_sigv4(date_header) + + # normal Date header + date_header = {'Date': self.get_date_header()} + self._test_request_timestamp_sigv4(date_header) + + # mangled X-Amz-Date header + date_header = {'X-Amz-Date': self.get_v4_amz_date_header()[:-1]} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv4(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # mangled Date header + date_header = {'Date': self.get_date_header()[20:]} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv4(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # Negative timestamp + date_header = {'X-Amz-Date': '00160523T054055Z'} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv4(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # far-past Date header + date_header = {'Date': 'Tue, 07 Jul 999 21:53:04 GMT'} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv4(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # far-future Date header + date_header = {'Date': 'Tue, 07 Jul 9999 21:53:04 GMT'} + with self.assertRaises(RequestTimeTooSkewed) as cm: + self._test_request_timestamp_sigv4(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn('The difference between the request time and the ' + 'current time is too large.', cm.exception.body) + + def _test_request_timestamp_sigv2(self, date_header): + # signature v4 here + environ = { + 'REQUEST_METHOD': 'GET'} + + headers = {'Authorization': 'AWS test:tester:hmac'} + headers.update(date_header) + req = Request.blank('/', environ=environ, headers=headers) + sigv2_req = S3Request(req.environ) + + if 'X-Amz-Date' in date_header: + timestamp = mktime(req.headers.get('X-Amz-Date')) + elif 'Date' in date_header: + timestamp = mktime(req.headers.get('Date')) + else: + self.fail('Invalid date header specified as test') + self.assertEqual(timestamp, int(sigv2_req.timestamp)) + + def test_request_timestamp_sigv2(self): + access_denied_message = \ + 'AWS authentication requires a valid Date or x-amz-date header' + + # In v2 format, normal X-Amz-Date header is same + date_header = {'X-Amz-Date': self.get_date_header()} + self._test_request_timestamp_sigv2(date_header) + + # normal Date header + date_header = {'Date': self.get_date_header()} + self._test_request_timestamp_sigv2(date_header) + + # mangled X-Amz-Date header + date_header = {'X-Amz-Date': self.get_date_header()[:-20]} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv2(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # mangled Date header + date_header = {'Date': self.get_date_header()[:-20]} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv2(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # Negative timestamp + date_header = {'X-Amz-Date': '00160523T054055Z'} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv2(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # far-past Date header + date_header = {'Date': 'Tue, 07 Jul 999 21:53:04 GMT'} + with self.assertRaises(AccessDenied) as cm: + self._test_request_timestamp_sigv2(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn(access_denied_message, cm.exception.body) + + # far-future Date header + date_header = {'Date': 'Tue, 07 Jul 9999 21:53:04 GMT'} + with self.assertRaises(RequestTimeTooSkewed) as cm: + self._test_request_timestamp_sigv2(date_header) + + self.assertEqual('403 Forbidden', cm.exception.message) + self.assertIn('The difference between the request time and the ' + 'current time is too large.', cm.exception.body) + + def test_headers_to_sign_sigv4(self): + environ = { + 'REQUEST_METHOD': 'GET'} + + # host and x-amz-date + x_amz_date = self.get_v4_amz_date_header() + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-content-sha256;x-amz-date,' + 'Signature=X', + 'X-Amz-Content-SHA256': '0123456789', + 'Date': self.get_date_header(), + 'X-Amz-Date': x_amz_date} + + req = Request.blank('/', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + + headers_to_sign = sigv4_req._headers_to_sign() + self.assertEqual(headers_to_sign, [ + ('host', 'localhost:80'), + ('x-amz-content-sha256', '0123456789'), + ('x-amz-date', x_amz_date)]) + + # no x-amz-date + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-content-sha256,' + 'Signature=X', + 'X-Amz-Content-SHA256': '0123456789', + 'Date': self.get_date_header()} + + req = Request.blank('/', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + + headers_to_sign = sigv4_req._headers_to_sign() + self.assertEqual(headers_to_sign, [ + ('host', 'localhost:80'), + ('x-amz-content-sha256', '0123456789')]) + + # SignedHeaders says, host and x-amz-date included but there is not + # X-Amz-Date header + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-content-sha256;x-amz-date,' + 'Signature=X', + 'X-Amz-Content-SHA256': '0123456789', + 'Date': self.get_date_header()} + + req = Request.blank('/', environ=environ, headers=headers) + with self.assertRaises(SignatureDoesNotMatch): + sigv4_req = SigV4Request(req.environ) + sigv4_req._headers_to_sign() + + def test_canonical_uri_sigv2(self): + environ = { + 'HTTP_HOST': 'bucket1.s3.test.com', + 'REQUEST_METHOD': 'GET'} + + headers = {'Authorization': 'AWS test:tester:hmac', + 'X-Amz-Date': self.get_date_header()} + + # Virtual hosted-style + req = Request.blank('/', environ=environ, headers=headers) + sigv2_req = S3Request( + req.environ, storage_domain='s3.test.com') + uri = sigv2_req._canonical_uri() + self.assertEqual(uri, '/bucket1/') + self.assertEqual(req.environ['PATH_INFO'], '/') + + req = Request.blank('/obj1', environ=environ, headers=headers) + sigv2_req = S3Request( + req.environ, storage_domain='s3.test.com') + uri = sigv2_req._canonical_uri() + self.assertEqual(uri, '/bucket1/obj1') + self.assertEqual(req.environ['PATH_INFO'], '/obj1') + + environ = { + 'HTTP_HOST': 's3.test.com', + 'REQUEST_METHOD': 'GET'} + + # Path-style + req = Request.blank('/', environ=environ, headers=headers) + sigv2_req = S3Request(req.environ, storage_domain='') + uri = sigv2_req._canonical_uri() + + self.assertEqual(uri, '/') + self.assertEqual(req.environ['PATH_INFO'], '/') + + req = Request.blank('/bucket1/obj1', + environ=environ, + headers=headers) + sigv2_req = S3Request(req.environ, storage_domain='') + uri = sigv2_req._canonical_uri() + self.assertEqual(uri, '/bucket1/obj1') + self.assertEqual(req.environ['PATH_INFO'], '/bucket1/obj1') + + def test_canonical_uri_sigv4(self): + environ = { + 'HTTP_HOST': 'bucket.s3.test.com', + 'REQUEST_METHOD': 'GET'} + + # host and x-amz-date + x_amz_date = self.get_v4_amz_date_header() + headers = { + 'Authorization': + 'AWS4-HMAC-SHA256 ' + 'Credential=test/20130524/US/s3/aws4_request, ' + 'SignedHeaders=host;x-amz-content-sha256;x-amz-date,' + 'Signature=X', + 'X-Amz-Content-SHA256': '0123456789', + 'Date': self.get_date_header(), + 'X-Amz-Date': x_amz_date} + + # Virtual hosted-style + self.conf.storage_domain = 's3.test.com' + req = Request.blank('/', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + uri = sigv4_req._canonical_uri() + + self.assertEqual(uri, '/') + self.assertEqual(req.environ['PATH_INFO'], '/') + + req = Request.blank('/obj1', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + uri = sigv4_req._canonical_uri() + + self.assertEqual(uri, '/obj1') + self.assertEqual(req.environ['PATH_INFO'], '/obj1') + + environ = { + 'HTTP_HOST': 's3.test.com', + 'REQUEST_METHOD': 'GET'} + + # Path-style + self.conf.storage_domain = '' + req = Request.blank('/', environ=environ, headers=headers) + sigv4_req = SigV4Request(req.environ) + uri = sigv4_req._canonical_uri() + + self.assertEqual(uri, '/') + self.assertEqual(req.environ['PATH_INFO'], '/') + + req = Request.blank('/bucket/obj1', + environ=environ, + headers=headers) + sigv4_req = SigV4Request(req.environ) + uri = sigv4_req._canonical_uri() + + self.assertEqual(uri, '/bucket/obj1') + self.assertEqual(req.environ['PATH_INFO'], '/bucket/obj1') + + @patch.object(S3Request, '_validate_headers', lambda *a: None) + def test_check_signature_sigv2(self): + # See https://web.archive.org/web/20151226025049/http:// + # docs.aws.amazon.com//AmazonS3/latest/dev/RESTAuthentication.html + req = Request.blank('/photos/puppy.jpg', headers={ + 'Host': 'johnsmith.s3.amazonaws.com', + 'Date': 'Tue, 27 Mar 2007 19:36:42 +0000', + 'Authorization': ('AWS AKIAIOSFODNN7EXAMPLE:' + 'bWq2s1WEIj+Ydj0vQ697zp+IXMU='), + }) + sigv2_req = S3Request(req.environ, storage_domain='s3.amazonaws.com') + expected_sts = '\n'.join([ + 'GET', + '', + '', + 'Tue, 27 Mar 2007 19:36:42 +0000', + '/johnsmith/photos/puppy.jpg', + ]) + self.assertEqual(expected_sts, sigv2_req._string_to_sign()) + self.assertTrue(sigv2_req.check_signature( + 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY')) + + req = Request.blank('/photos/puppy.jpg', method='PUT', headers={ + 'Content-Type': 'image/jpeg', + 'Content-Length': '94328', + 'Host': 'johnsmith.s3.amazonaws.com', + 'Date': 'Tue, 27 Mar 2007 21:15:45 +0000', + 'Authorization': ('AWS AKIAIOSFODNN7EXAMPLE:' + 'MyyxeRY7whkBe+bq8fHCL/2kKUg='), + }) + sigv2_req = S3Request(req.environ, storage_domain='s3.amazonaws.com') + expected_sts = '\n'.join([ + 'PUT', + '', + 'image/jpeg', + 'Tue, 27 Mar 2007 21:15:45 +0000', + '/johnsmith/photos/puppy.jpg', + ]) + self.assertEqual(expected_sts, sigv2_req._string_to_sign()) + self.assertTrue(sigv2_req.check_signature( + 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY')) + + req = Request.blank( + '/?prefix=photos&max-keys=50&marker=puppy', + headers={ + 'User-Agent': 'Mozilla/5.0', + 'Host': 'johnsmith.s3.amazonaws.com', + 'Date': 'Tue, 27 Mar 2007 19:42:41 +0000', + 'Authorization': ('AWS AKIAIOSFODNN7EXAMPLE:' + 'htDYFYduRNen8P9ZfE/s9SuKy0U='), + }) + sigv2_req = S3Request(req.environ, storage_domain='s3.amazonaws.com') + expected_sts = '\n'.join([ + 'GET', + '', + '', + 'Tue, 27 Mar 2007 19:42:41 +0000', + '/johnsmith/', + ]) + self.assertEqual(expected_sts, sigv2_req._string_to_sign()) + self.assertTrue(sigv2_req.check_signature( + 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY')) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_s3response.py swift-2.18.0/test/unit/common/middleware/s3api/test_s3response.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_s3response.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_s3response.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,80 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.swob import Response +from swift.common.utils import HeaderKeyDict +from swift.common.middleware.s3api.s3response import S3Response +from swift.common.middleware.s3api.utils import sysmeta_prefix + + +class TestResponse(unittest.TestCase): + def test_from_swift_resp_slo(self): + for expected, header_vals in \ + ((True, ('true', '1')), (False, ('false', 'ugahhh', None))): + for val in header_vals: + resp = Response(headers={'X-Static-Large-Object': val}) + s3resp = S3Response.from_swift_resp(resp) + self.assertEqual(expected, s3resp.is_slo) + + def test_response_s3api_sysmeta_headers(self): + for _server_type in ('object', 'container'): + swift_headers = HeaderKeyDict( + {sysmeta_prefix(_server_type) + 'test': 'ok'}) + resp = Response(headers=swift_headers) + s3resp = S3Response.from_swift_resp(resp) + self.assertEqual(swift_headers, s3resp.sysmeta_headers) + + def test_response_s3api_sysmeta_headers_ignore_other_sysmeta(self): + for _server_type in ('object', 'container'): + swift_headers = HeaderKeyDict( + # sysmeta not leading sysmeta_prefix even including s3api word + {'x-%s-sysmeta-test-s3api' % _server_type: 'ok', + sysmeta_prefix(_server_type) + 'test': 'ok'}) + resp = Response(headers=swift_headers) + s3resp = S3Response.from_swift_resp(resp) + expected_headers = HeaderKeyDict( + {sysmeta_prefix(_server_type) + 'test': 'ok'}) + self.assertEqual(expected_headers, s3resp.sysmeta_headers) + + def test_response_s3api_sysmeta_from_swift3_sysmeta(self): + for _server_type in ('object', 'container'): + # swift could return older swift3 sysmeta + swift_headers = HeaderKeyDict( + {('x-%s-sysmeta-swift3-' % _server_type) + 'test': 'ok'}) + resp = Response(headers=swift_headers) + s3resp = S3Response.from_swift_resp(resp) + expected_headers = HeaderKeyDict( + {sysmeta_prefix(_server_type) + 'test': 'ok'}) + # but Response class should translates as s3api sysmeta + self.assertEqual(expected_headers, s3resp.sysmeta_headers) + + def test_response_swift3_sysmeta_does_not_overwrite_s3api_sysmeta(self): + for _server_type in ('object', 'container'): + # same key name except sysmeta prefix + swift_headers = HeaderKeyDict( + {('x-%s-sysmeta-swift3-' % _server_type) + 'test': 'ng', + sysmeta_prefix(_server_type) + 'test': 'ok'}) + resp = Response(headers=swift_headers) + s3resp = S3Response.from_swift_resp(resp) + expected_headers = HeaderKeyDict( + {sysmeta_prefix(_server_type) + 'test': 'ok'}) + # but only s3api sysmeta remains in the response sysmeta_headers + self.assertEqual(expected_headers, s3resp.sysmeta_headers) + + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_s3token.py swift-2.18.0/test/unit/common/middleware/s3api/test_s3token.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_s3token.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_s3token.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,821 @@ +# Copyright 2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import copy +import base64 +import json +import logging +import time +import unittest +import uuid + +import fixtures +import mock +import requests +from requests_mock.contrib import fixture as rm_fixture +from six.moves import urllib + +from swift.common.middleware.s3api import s3token +from swift.common.swob import Request, Response +from swift.common.wsgi import ConfigFileError + +GOOD_RESPONSE_V2 = {'access': { + 'user': { + 'username': 'S3_USER', + 'name': 'S3_USER', + 'id': 'USER_ID', + 'roles': [ + {'name': 'swift-user'}, + {'name': '_member_'}, + ], + }, + 'token': { + 'id': 'TOKEN_ID', + 'tenant': { + 'id': 'TENANT_ID', + 'name': 'TENANT_NAME' + } + } +}} +GOOD_RESPONSE_V3 = {'token': { + 'user': { + 'domain': { + 'name': 'Default', + 'id': 'default', + }, + 'name': 'S3_USER', + 'id': 'USER_ID', + }, + 'project': { + 'domain': { + 'name': 'PROJECT_DOMAIN_NAME', + 'id': 'PROJECT_DOMAIN_ID', + }, + 'name': 'PROJECT_NAME', + 'id': 'PROJECT_ID', + }, + 'roles': [ + {'name': 'swift-user'}, + {'name': '_member_'}, + ], +}} + + +class TestResponse(requests.Response): + """Utility class to wrap requests.Response. + + Class used to wrap requests.Response and provide some convenience to + initialize with a dict. + """ + + def __init__(self, data): + self._text = None + super(TestResponse, self).__init__() + if isinstance(data, dict): + self.status_code = data.get('status_code', 200) + headers = data.get('headers') + if headers: + self.headers.update(headers) + # Fake the text attribute to streamline Response creation + # _content is defined by requests.Response + self._content = data.get('text') + else: + self.status_code = data + + def __eq__(self, other): + return self.__dict__ == other.__dict__ + + @property + def text(self): + return self.content + + +class FakeApp(object): + calls = 0 + """This represents a WSGI app protected by the auth_token middleware.""" + def __call__(self, env, start_response): + self.calls += 1 + resp = Response() + resp.environ = env + return resp(env, start_response) + + +class S3TokenMiddlewareTestBase(unittest.TestCase): + + TEST_AUTH_URI = 'https://fakehost/identity/v2.0' + TEST_URL = '%s/s3tokens' % (TEST_AUTH_URI, ) + TEST_DOMAIN_ID = '1' + TEST_DOMAIN_NAME = 'aDomain' + TEST_GROUP_ID = uuid.uuid4().hex + TEST_ROLE_ID = uuid.uuid4().hex + TEST_TENANT_ID = '1' + TEST_TENANT_NAME = 'aTenant' + TEST_TOKEN = 'aToken' + TEST_TRUST_ID = 'aTrust' + TEST_USER = 'test' + TEST_USER_ID = uuid.uuid4().hex + + TEST_ROOT_URL = 'http://127.0.0.1:5000/' + + def setUp(self): + super(S3TokenMiddlewareTestBase, self).setUp() + self.logger = fixtures.FakeLogger(level=logging.DEBUG) + self.logger.setUp() + self.time_patcher = mock.patch.object(time, 'time', lambda: 1234) + self.time_patcher.start() + + self.app = FakeApp() + self.conf = { + 'auth_uri': self.TEST_AUTH_URI, + } + self.middleware = s3token.S3Token(self.app, self.conf) + + self.requests_mock = rm_fixture.Fixture() + self.requests_mock.setUp() + + def tearDown(self): + self.requests_mock.cleanUp() + self.time_patcher.stop() + self.logger.cleanUp() + super(S3TokenMiddlewareTestBase, self).tearDown() + + def start_fake_response(self, status, headers): + self.response_status = int(status.split(' ', 1)[0]) + self.response_headers = dict(headers) + + +class S3TokenMiddlewareTestGood(S3TokenMiddlewareTestBase): + + def setUp(self): + super(S3TokenMiddlewareTestGood, self).setUp() + + self.requests_mock.post(self.TEST_URL, + status_code=201, + json=GOOD_RESPONSE_V2) + + # Ignore the request and pass to the next middleware in the + # pipeline if no path has been specified. + def test_no_path_request(self): + req = Request.blank('/') + self.middleware(req.environ, self.start_fake_response) + self.assertEqual(self.response_status, 200) + + # Ignore the request and pass to the next middleware in the + # pipeline if no Authorization header has been specified + def test_without_authorization(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + self.middleware(req.environ, self.start_fake_response) + self.assertEqual(self.response_status, 200) + + def test_nukes_auth_headers(self): + client_env = { + 'HTTP_X_IDENTITY_STATUS': 'Confirmed', + 'HTTP_X_ROLES': 'admin,_member_,swift-user', + 'HTTP_X_TENANT_ID': 'cfa' + } + req = Request.blank('/v1/AUTH_cfa/c/o', environ=client_env) + self.middleware(req.environ, self.start_fake_response) + self.assertEqual(self.response_status, 200) + for key in client_env: + self.assertNotIn(key, req.environ) + + def test_without_auth_storage_token(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + req.headers['Authorization'] = 'AWS badboy' + self.middleware(req.environ, self.start_fake_response) + self.assertEqual(self.response_status, 200) + + def _assert_authorized(self, req, expect_token=True, + account_path='/v1/AUTH_TENANT_ID/'): + self.assertTrue( + req.path.startswith(account_path), + '%r does not start with %r' % (req.path, account_path)) + expected_headers = { + 'X-Identity-Status': 'Confirmed', + 'X-Roles': 'swift-user,_member_', + 'X-User-Id': 'USER_ID', + 'X-User-Name': 'S3_USER', + 'X-Tenant-Id': 'TENANT_ID', + 'X-Tenant-Name': 'TENANT_NAME', + 'X-Project-Id': 'TENANT_ID', + 'X-Project-Name': 'TENANT_NAME', + 'X-Auth-Token': 'TOKEN_ID', + } + for header, value in expected_headers.items(): + if header == 'X-Auth-Token' and not expect_token: + self.assertNotIn(header, req.headers) + continue + self.assertIn(header, req.headers) + self.assertEqual(value, req.headers[header]) + # WSGI wants native strings for headers + self.assertIsInstance(req.headers[header], str) + self.assertEqual(1, self.middleware._app.calls) + + self.assertEqual(1, self.requests_mock.call_count) + request_call = self.requests_mock.request_history[0] + self.assertEqual(json.loads(request_call.body), {'credentials': { + 'access': 'access', + 'signature': 'signature', + 'token': base64.urlsafe_b64encode(b'token').decode('ascii')}}) + + def test_authorized(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_tolerate_missing_token_id(self): + resp = copy.deepcopy(GOOD_RESPONSE_V2) + del resp['access']['token']['id'] + self.requests_mock.post(self.TEST_URL, + status_code=201, + json=resp) + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req, expect_token=False) + + def test_authorized_bytes(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': b'access', + 'signature': b'signature', + 'string_to_sign': b'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_http(self): + auth_uri = 'http://fakehost:35357/v2.0' + self.requests_mock.post( + '%s/s3tokens' % auth_uri, + status_code=201, json=GOOD_RESPONSE_V2) + + self.middleware = s3token.filter_factory({ + 'auth_uri': auth_uri})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_v3(self): + # Prior to https://github.com/openstack/keystone/commit/dd1e705 + # even v3 URLs would respond with a v2-format response + auth_uri = 'http://fakehost:35357/v3' + self.requests_mock.post( + '%s/s3tokens' % auth_uri, + status_code=201, json=GOOD_RESPONSE_V2) + + self.middleware = s3token.filter_factory({ + 'auth_uri': auth_uri})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_trailing_slash(self): + self.middleware = s3token.filter_factory({ + 'auth_uri': self.TEST_AUTH_URI + '/'})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorization_nova_toconnect(self): + req = Request.blank('/v1/AUTH_swiftint/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access:FORCED_TENANT_ID', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req, account_path='/v1/AUTH_FORCED_TENANT_ID/') + + @mock.patch.object(requests, 'post') + def test_insecure(self, MOCK_REQUEST): + self.middleware = s3token.filter_factory( + {'insecure': 'True', 'auth_uri': 'http://example.com'})(self.app) + + text_return_value = json.dumps(GOOD_RESPONSE_V2) + MOCK_REQUEST.return_value = TestResponse({ + 'status_code': 201, + 'text': text_return_value}) + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + + self.assertTrue(MOCK_REQUEST.called) + mock_args, mock_kwargs = MOCK_REQUEST.call_args + self.assertIs(mock_kwargs['verify'], False) + + def test_insecure_option(self): + # insecure is passed as a string. + + # Some non-secure values. + true_values = ['true', 'True', '1', 'yes'] + for val in true_values: + config = {'insecure': val, + 'certfile': 'false_ind', + 'auth_uri': 'http://example.com'} + middleware = s3token.filter_factory(config)(self.app) + self.assertIs(False, middleware._verify) + + # Some "secure" values, including unexpected value. + false_values = ['false', 'False', '0', 'no', 'someweirdvalue'] + for val in false_values: + config = {'insecure': val, + 'certfile': 'false_ind', + 'auth_uri': 'http://example.com'} + middleware = s3token.filter_factory(config)(self.app) + self.assertEqual('false_ind', middleware._verify) + + # Default is secure. + config = {'certfile': 'false_ind', + 'auth_uri': 'http://example.com'} + middleware = s3token.filter_factory(config)(self.app) + self.assertIs('false_ind', middleware._verify) + + def test_auth_uris(self): + for conf, expected in [ + ({'auth_uri': 'https://example.com/v2.0'}, + 'https://example.com/v2.0/s3tokens'), + # Trailing slash doesn't interfere + ({'auth_uri': 'https://example.com/v2.0/'}, + 'https://example.com/v2.0/s3tokens'), + # keystone running under mod_wsgi often has a path prefix + ({'auth_uri': 'https://example.com/identity/v2.0'}, + 'https://example.com/identity/v2.0/s3tokens'), + ({'auth_uri': 'https://example.com/identity/v2.0/'}, + 'https://example.com/identity/v2.0/s3tokens'), + # IPv4 addresses are fine + ({'auth_uri': 'http://127.0.0.1:35357/v3'}, + 'http://127.0.0.1:35357/v3/s3tokens'), + ({'auth_uri': 'http://127.0.0.1:35357/v3/'}, + 'http://127.0.0.1:35357/v3/s3tokens'), + # IPv6 addresses need [brackets] per RFC 3986 + ({'auth_uri': 'https://[::FFFF:129.144.52.38]:5000/v3'}, + 'https://[::FFFF:129.144.52.38]:5000/v3/s3tokens'), + ({'auth_uri': 'https://[::FFFF:129.144.52.38]:5000/v3/'}, + 'https://[::FFFF:129.144.52.38]:5000/v3/s3tokens'), + ]: + middleware = s3token.filter_factory(conf)(self.app) + self.assertEqual(expected, middleware._request_uri) + + @mock.patch.object(requests, 'post') + def test_http_timeout(self, MOCK_REQUEST): + self.middleware = s3token.filter_factory({ + 'http_timeout': '2', + 'auth_uri': 'http://example.com', + })(FakeApp()) + + MOCK_REQUEST.return_value = TestResponse({ + 'status_code': 201, + 'text': json.dumps(GOOD_RESPONSE_V2)}) + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + + self.assertTrue(MOCK_REQUEST.called) + mock_args, mock_kwargs = MOCK_REQUEST.call_args + self.assertEqual(mock_kwargs['timeout'], 2) + + def test_http_timeout_option(self): + good_values = ['1', '5.3', '10', '.001'] + for val in good_values: + middleware = s3token.filter_factory({ + 'http_timeout': val, + 'auth_uri': 'http://example.com', + })(FakeApp()) + self.assertEqual(float(val), middleware._timeout) + + bad_values = ['1, 4', '-3', '100', 'foo', '0'] + for val in bad_values: + with self.assertRaises(ValueError) as ctx: + s3token.filter_factory({ + 'http_timeout': val, + 'auth_uri': 'http://example.com', + })(FakeApp()) + self.assertTrue(ctx.exception.args[0].startswith(( + 'invalid literal for float():', + 'could not convert string to float:', + 'http_timeout must be between 0 and 60 seconds', + )), 'Unexpected error message: %s' % ctx.exception) + + # default is 10 seconds + middleware = s3token.filter_factory({ + 'auth_uri': 'http://example.com'})(FakeApp()) + self.assertEqual(10, middleware._timeout) + + def test_bad_auth_uris(self): + for auth_uri in [ + '/not/a/uri', + 'http://', + '//example.com/path']: + with self.assertRaises(ConfigFileError) as cm: + s3token.filter_factory({'auth_uri': auth_uri})(self.app) + self.assertEqual('Invalid auth_uri; must include scheme and host', + cm.exception.message) + with self.assertRaises(ConfigFileError) as cm: + s3token.filter_factory({ + 'auth_uri': 'nonhttp://example.com'})(self.app) + self.assertEqual('Invalid auth_uri; scheme must be http or https', + cm.exception.message) + for auth_uri in [ + 'http://user@example.com/', + 'http://example.com/?with=query', + 'http://example.com/#with-fragment']: + with self.assertRaises(ConfigFileError) as cm: + s3token.filter_factory({'auth_uri': auth_uri})(self.app) + self.assertEqual('Invalid auth_uri; must not include username, ' + 'query, or fragment', cm.exception.message) + + def test_unicode_path(self): + url = u'/v1/AUTH_cfa/c/euro\u20ac'.encode('utf8') + req = Request.blank(urllib.parse.quote(url)) + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + +class S3TokenMiddlewareTestBad(S3TokenMiddlewareTestBase): + def test_unauthorized_token(self): + ret = {"error": + {"message": "EC2 access key not found.", + "code": 401, + "title": "Unauthorized"}} + self.requests_mock.post(self.TEST_URL, status_code=403, json=ret) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + s3_denied_req = self.middleware._deny_request('AccessDenied') + self.assertEqual(resp.body, s3_denied_req.body) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + s3_denied_req.status_int) # pylint: disable-msg=E1101 + self.assertEqual(0, self.middleware._app.calls) + + self.assertEqual(1, self.requests_mock.call_count) + request_call = self.requests_mock.request_history[0] + self.assertEqual(json.loads(request_call.body), {'credentials': { + 'access': 'access', + 'signature': 'signature', + 'token': base64.urlsafe_b64encode(b'token').decode('ascii')}}) + + def test_no_s3_creds_defers_to_auth_middleware(self): + # Without an Authorization header, we should just pass through to the + # auth system to make a decision. + req = Request.blank('/v1/AUTH_cfa/c/o') + resp = req.get_response(self.middleware) + self.assertEqual(resp.status_int, 200) # pylint: disable-msg=E1101 + self.assertEqual(1, self.middleware._app.calls) + + def test_fail_to_connect_to_keystone(self): + with mock.patch.object(self.middleware, '_json_request') as o: + s3_invalid_resp = self.middleware._deny_request('InvalidURI') + o.side_effect = s3_invalid_resp + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + self.assertEqual(resp.body, s3_invalid_resp.body) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + s3_invalid_resp.status_int) # pylint: disable-msg=E1101 + self.assertEqual(0, self.middleware._app.calls) + + def _test_bad_reply(self, response_body): + self.requests_mock.post(self.TEST_URL, + status_code=201, + text=response_body) + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + s3_invalid_resp = self.middleware._deny_request('InvalidURI') + self.assertEqual(resp.body, s3_invalid_resp.body) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + s3_invalid_resp.status_int) # pylint: disable-msg=E1101 + self.assertEqual(0, self.middleware._app.calls) + + def test_bad_reply_not_json(self): + self._test_bad_reply('') + + def _test_bad_reply_missing_parts(self, *parts): + resp = copy.deepcopy(GOOD_RESPONSE_V2) + part_dict = resp + for part in parts[:-1]: + part_dict = part_dict[part] + del part_dict[parts[-1]] + self._test_bad_reply(json.dumps(resp)) + + def test_bad_reply_missing_token_dict(self): + self._test_bad_reply_missing_parts('access', 'token') + + def test_bad_reply_missing_user_dict(self): + self._test_bad_reply_missing_parts('access', 'user') + + def test_bad_reply_missing_user_roles(self): + self._test_bad_reply_missing_parts('access', 'user', 'roles') + + def test_bad_reply_missing_user_name(self): + self._test_bad_reply_missing_parts('access', 'user', 'name') + + def test_bad_reply_missing_user_id(self): + self._test_bad_reply_missing_parts('access', 'user', 'id') + + def test_bad_reply_missing_tenant_dict(self): + self._test_bad_reply_missing_parts('access', 'token', 'tenant') + + def test_bad_reply_missing_tenant_id(self): + self._test_bad_reply_missing_parts('access', 'token', 'tenant', 'id') + + def test_bad_reply_missing_tenant_name(self): + self._test_bad_reply_missing_parts('access', 'token', 'tenant', 'name') + + def test_bad_reply_valid_but_bad_json(self): + self._test_bad_reply('{}') + self._test_bad_reply('[]') + self._test_bad_reply('null') + self._test_bad_reply('"foo"') + self._test_bad_reply('1') + self._test_bad_reply('true') + + +class S3TokenMiddlewareTestDeferredAuth(S3TokenMiddlewareTestBase): + def setUp(self): + super(S3TokenMiddlewareTestDeferredAuth, self).setUp() + self.conf['delay_auth_decision'] = 'yes' + self.middleware = s3token.S3Token(FakeApp(), self.conf) + + def test_unauthorized_token(self): + ret = {"error": + {"message": "EC2 access key not found.", + "code": 401, + "title": "Unauthorized"}} + self.requests_mock.post(self.TEST_URL, status_code=403, json=ret) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + 200) + self.assertNotIn('X-Auth-Token', req.headers) + self.assertEqual(1, self.middleware._app.calls) + + self.assertEqual(1, self.requests_mock.call_count) + request_call = self.requests_mock.request_history[0] + self.assertEqual(json.loads(request_call.body), {'credentials': { + 'access': 'access', + 'signature': 'signature', + 'token': base64.urlsafe_b64encode(b'token').decode('ascii')}}) + + def test_fail_to_connect_to_keystone(self): + with mock.patch.object(self.middleware, '_json_request') as o: + o.side_effect = self.middleware._deny_request('InvalidURI') + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + 200) + self.assertNotIn('X-Auth-Token', req.headers) + self.assertEqual(1, self.middleware._app.calls) + + def test_bad_reply(self): + self.requests_mock.post(self.TEST_URL, + status_code=201, + text="") + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + 200) + self.assertNotIn('X-Auth-Token', req.headers) + self.assertEqual(1, self.middleware._app.calls) + + +class S3TokenMiddlewareTestV3(S3TokenMiddlewareTestBase): + + def setUp(self): + super(S3TokenMiddlewareTestV3, self).setUp() + + self.requests_mock.post(self.TEST_URL, + status_code=200, + json=GOOD_RESPONSE_V3) + + def _assert_authorized(self, req, + account_path='/v1/AUTH_PROJECT_ID/'): + self.assertTrue(req.path.startswith(account_path)) + expected_headers = { + 'X-Identity-Status': 'Confirmed', + 'X-Roles': 'swift-user,_member_', + 'X-User-Id': 'USER_ID', + 'X-User-Name': 'S3_USER', + 'X-User-Domain-Id': 'default', + 'X-User-Domain-Name': 'Default', + 'X-Tenant-Id': 'PROJECT_ID', + 'X-Tenant-Name': 'PROJECT_NAME', + 'X-Project-Id': 'PROJECT_ID', + 'X-Project-Name': 'PROJECT_NAME', + 'X-Project-Domain-Id': 'PROJECT_DOMAIN_ID', + 'X-Project-Domain-Name': 'PROJECT_DOMAIN_NAME', + } + for header, value in expected_headers.items(): + self.assertIn(header, req.headers) + self.assertEqual(value, req.headers[header]) + # WSGI wants native strings for headers + self.assertIsInstance(req.headers[header], str) + self.assertNotIn('X-Auth-Token', req.headers) + self.assertEqual(1, self.middleware._app.calls) + + def test_authorized(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_bytes(self): + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': b'access', + 'signature': b'signature', + 'string_to_sign': b'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_http(self): + # Following https://github.com/openstack/keystone/commit/3ec1aa4 + # even v2 URLs would respond with a v3-format response + auth_uri = 'http://fakehost:35357/v2.0/' + self.requests_mock.post( + auth_uri + 's3tokens', + status_code=201, json=GOOD_RESPONSE_V3) + + self.middleware = s3token.filter_factory({ + 'auth_uri': auth_uri})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_v3(self): + auth_uri = 'http://fakehost:35357/v3/' + self.requests_mock.post( + auth_uri + 's3tokens', + status_code=201, json=GOOD_RESPONSE_V3) + + self.middleware = s3token.filter_factory({ + 'auth_uri': auth_uri})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorized_trailing_slash(self): + self.middleware = s3token.filter_factory({ + 'auth_uri': self.TEST_AUTH_URI + '/'})(self.app) + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req) + + def test_authorization_nova_toconnect(self): + req = Request.blank('/v1/AUTH_swiftint/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access:FORCED_TENANT_ID', + 'signature': u'signature', + 'string_to_sign': u'token', + } + req.get_response(self.middleware) + self._assert_authorized(req, account_path='/v1/AUTH_FORCED_TENANT_ID/') + + def _test_bad_reply_missing_parts(self, *parts): + resp = copy.deepcopy(GOOD_RESPONSE_V3) + part_dict = resp + for part in parts[:-1]: + part_dict = part_dict[part] + del part_dict[parts[-1]] + self.requests_mock.post(self.TEST_URL, + status_code=201, + text=json.dumps(resp)) + + req = Request.blank('/v1/AUTH_cfa/c/o') + req.environ['s3api.auth_details'] = { + 'access_key': u'access', + 'signature': u'signature', + 'string_to_sign': u'token', + } + resp = req.get_response(self.middleware) + s3_invalid_resp = self.middleware._deny_request('InvalidURI') + self.assertEqual(resp.body, s3_invalid_resp.body) + self.assertEqual( + resp.status_int, # pylint: disable-msg=E1101 + s3_invalid_resp.status_int) # pylint: disable-msg=E1101 + self.assertEqual(0, self.middleware._app.calls) + + def test_bad_reply_missing_parts(self): + self._test_bad_reply_missing_parts('token', 'user', 'id') + self._test_bad_reply_missing_parts('token', 'user', 'name') + self._test_bad_reply_missing_parts('token', 'user', 'domain', 'id') + self._test_bad_reply_missing_parts('token', 'user', 'domain', 'name') + self._test_bad_reply_missing_parts('token', 'user', 'domain') + self._test_bad_reply_missing_parts('token', 'user') + self._test_bad_reply_missing_parts('token', 'project', 'id') + self._test_bad_reply_missing_parts('token', 'project', 'name') + self._test_bad_reply_missing_parts('token', 'project', 'domain', 'id') + self._test_bad_reply_missing_parts('token', 'project', 'domain', + 'name') + self._test_bad_reply_missing_parts('token', 'project', 'domain') + self._test_bad_reply_missing_parts('token', 'project') + self._test_bad_reply_missing_parts('token', 'roles') diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_service.py swift-2.18.0/test/unit/common/middleware/s3api/test_service.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_service.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_service.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,235 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common import swob +from swift.common.swob import Request +from swift.common.utils import json + +from test.unit.common.middleware.s3api.test_s3_acl import s3acl +from test.unit.common.middleware.s3api import S3ApiTestCase +from swift.common.middleware.s3api.etree import fromstring +from swift.common.middleware.s3api.subresource import ACL, Owner, encode_acl + + +def create_bucket_list_json(buckets): + """ + Create a json from bucket list + :param buckets: a list of tuples (or lists) consist of elements orderd as + name, count, bytes + """ + bucket_list = map( + lambda item: {'name': item[0], 'count': item[1], 'bytes': item[2]}, + list(buckets)) + return json.dumps(bucket_list) + + +class TestS3ApiService(S3ApiTestCase): + def setup_buckets(self): + self.buckets = (('apple', 1, 200), ('orange', 3, 430)) + bucket_list = create_bucket_list_json(self.buckets) + self.swift.register('GET', '/v1/AUTH_test', swob.HTTPOk, {}, + bucket_list) + + def setUp(self): + super(TestS3ApiService, self).setUp() + + self.setup_buckets() + + def test_service_GET_error(self): + code = self._test_method_error('GET', '', swob.HTTPUnauthorized) + self.assertEqual(code, 'SignatureDoesNotMatch') + code = self._test_method_error('GET', '', swob.HTTPForbidden) + self.assertEqual(code, 'AccessDenied') + code = self._test_method_error('GET', '', swob.HTTPServerError) + self.assertEqual(code, 'InternalError') + + @s3acl + def test_service_GET(self): + req = Request.blank('/', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + + all_buckets = elem.find('./Buckets') + buckets = all_buckets.iterchildren('Bucket') + listing = list(list(buckets)[0]) + self.assertEqual(len(listing), 2) + + names = [] + for b in all_buckets.iterchildren('Bucket'): + names.append(b.find('./Name').text) + + self.assertEqual(len(names), len(self.buckets)) + for i in self.buckets: + self.assertTrue(i[0] in names) + + @s3acl + def test_service_GET_subresource(self): + req = Request.blank('/?acl', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + + all_buckets = elem.find('./Buckets') + buckets = all_buckets.iterchildren('Bucket') + listing = list(list(buckets)[0]) + self.assertEqual(len(listing), 2) + + names = [] + for b in all_buckets.iterchildren('Bucket'): + names.append(b.find('./Name').text) + + self.assertEqual(len(names), len(self.buckets)) + for i in self.buckets: + self.assertTrue(i[0] in names) + + def test_service_GET_with_blind_resource(self): + buckets = (('apple', 1, 200), ('orange', 3, 430), + ('apple+segment', 1, 200)) + expected = buckets[:-1] + bucket_list = create_bucket_list_json(buckets) + self.swift.register('GET', '/v1/AUTH_test', swob.HTTPOk, {}, + bucket_list) + + req = Request.blank('/', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + all_buckets = elem.find('./Buckets') + buckets = all_buckets.iterchildren('Bucket') + listing = list(list(buckets)[0]) + self.assertEqual(len(listing), 2) + + names = [] + for b in all_buckets.iterchildren('Bucket'): + names.append(b.find('./Name').text) + + self.assertEqual(len(names), len(expected)) + for i in expected: + self.assertTrue(i[0] in names) + + def _test_service_GET_for_check_bucket_owner(self, buckets): + self.s3api.conf.check_bucket_owner = True + bucket_list = create_bucket_list_json(buckets) + self.swift.register('GET', '/v1/AUTH_test', swob.HTTPOk, {}, + bucket_list) + + req = Request.blank('/', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + return self.call_s3api(req) + + @s3acl(s3acl_only=True) + def test_service_GET_without_bucket(self): + bucket_list = [] + for var in range(0, 10): + bucket = 'bucket%s' % var + self.swift.register('HEAD', '/v1/AUTH_test/%s' % bucket, + swob.HTTPNotFound, {}, None) + bucket_list.append((bucket, var, 300 + var)) + + status, headers, body = \ + self._test_service_GET_for_check_bucket_owner(bucket_list) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + + resp_buckets = elem.find('./Buckets') + buckets = resp_buckets.iterchildren('Bucket') + self.assertEqual(len(list(buckets)), 0) + + @s3acl(s3acl_only=True) + def test_service_GET_without_owner_bucket(self): + bucket_list = [] + for var in range(0, 10): + user_id = 'test:other' + bucket = 'bucket%s' % var + owner = Owner(user_id, user_id) + headers = encode_acl('container', ACL(owner, [])) + self.swift.register('HEAD', '/v1/AUTH_test/%s' % bucket, + swob.HTTPNoContent, headers, None) + bucket_list.append((bucket, var, 300 + var)) + + status, headers, body = \ + self._test_service_GET_for_check_bucket_owner(bucket_list) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + + resp_buckets = elem.find('./Buckets') + buckets = resp_buckets.iterchildren('Bucket') + self.assertEqual(len(list(buckets)), 0) + + @s3acl(s3acl_only=True) + def test_service_GET_bucket_list(self): + bucket_list = [] + for var in range(0, 10): + if var % 3 == 0: + user_id = 'test:tester' + else: + user_id = 'test:other' + bucket = 'bucket%s' % var + owner = Owner(user_id, user_id) + headers = encode_acl('container', ACL(owner, [])) + # set register to get owner of buckets + if var % 3 == 2: + self.swift.register('HEAD', '/v1/AUTH_test/%s' % bucket, + swob.HTTPNotFound, {}, None) + else: + self.swift.register('HEAD', '/v1/AUTH_test/%s' % bucket, + swob.HTTPNoContent, headers, None) + bucket_list.append((bucket, var, 300 + var)) + + status, headers, body = \ + self._test_service_GET_for_check_bucket_owner(bucket_list) + self.assertEqual(status.split()[0], '200') + + elem = fromstring(body, 'ListAllMyBucketsResult') + resp_buckets = elem.find('./Buckets') + buckets = resp_buckets.iterchildren('Bucket') + listing = list(list(buckets)[0]) + self.assertEqual(len(listing), 2) + + names = [] + for b in resp_buckets.iterchildren('Bucket'): + names.append(b.find('./Name').text) + + # Check whether getting bucket only locate in multiples of 3 in + # bucket_list which mean requested user is owner. + expected_buckets = [b for i, b in enumerate(bucket_list) + if i % 3 == 0] + self.assertEqual(len(names), len(expected_buckets)) + for i in expected_buckets: + self.assertTrue(i[0] in names) + self.assertEqual(len(self.swift.calls_with_headers), 11) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_subresource.py swift-2.18.0/test/unit/common/middleware/s3api/test_subresource.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_subresource.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_subresource.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,367 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.utils import json + +from swift.common.middleware.s3api.s3response import AccessDenied, \ + InvalidArgument, S3NotImplemented +from swift.common.middleware.s3api.subresource import User, \ + AuthenticatedUsers, AllUsers, \ + ACLPrivate, ACLPublicRead, ACLPublicReadWrite, ACLAuthenticatedRead, \ + ACLBucketOwnerRead, ACLBucketOwnerFullControl, Owner, ACL, encode_acl, \ + decode_acl, canned_acl_grantees, Grantee +from swift.common.middleware.s3api.utils import sysmeta_header +from swift.common.middleware.s3api.exception import InvalidSubresource + + +class TestS3ApiSubresource(unittest.TestCase): + + def setUp(self): + self.s3_acl = True + self.allow_no_owner = False + + def test_acl_canonical_user(self): + grantee = User('test:tester') + + self.assertTrue('test:tester' in grantee) + self.assertTrue('test:tester2' not in grantee) + self.assertEqual(str(grantee), 'test:tester') + self.assertEqual(grantee.elem().find('./ID').text, 'test:tester') + + def test_acl_authenticated_users(self): + grantee = AuthenticatedUsers() + + self.assertTrue('test:tester' in grantee) + self.assertTrue('test:tester2' in grantee) + uri = 'http://acs.amazonaws.com/groups/global/AuthenticatedUsers' + self.assertEqual(grantee.elem().find('./URI').text, uri) + + def test_acl_all_users(self): + grantee = AllUsers() + + self.assertTrue('test:tester' in grantee) + self.assertTrue('test:tester2' in grantee) + uri = 'http://acs.amazonaws.com/groups/global/AllUsers' + self.assertEqual(grantee.elem().find('./URI').text, uri) + + def check_permission(self, acl, user_id, permission): + try: + acl.check_permission(user_id, permission) + return True + except AccessDenied: + return False + + def test_acl_private(self): + acl = ACLPrivate(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_public_read(self): + acl = ACLPublicRead(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_public_read_write(self): + acl = ACLPublicReadWrite(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_authenticated_read(self): + acl = ACLAuthenticatedRead(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_bucket_owner_read(self): + acl = ACLBucketOwnerRead( + bucket_owner=Owner('test:tester2', 'test:tester2'), + object_owner=Owner('test:tester', 'test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_bucket_owner_full_control(self): + acl = ACLBucketOwnerFullControl( + bucket_owner=Owner('test:tester2', 'test:tester2'), + object_owner=Owner('test:tester', 'test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester2', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_elem(self): + acl = ACLPrivate(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + elem = acl.elem() + self.assertTrue(elem.find('./Owner') is not None) + self.assertTrue(elem.find('./AccessControlList') is not None) + grants = [e for e in elem.findall('./AccessControlList/Grant')] + self.assertEqual(len(grants), 1) + self.assertEqual(grants[0].find('./Grantee/ID').text, 'test:tester') + self.assertEqual( + grants[0].find('./Grantee/DisplayName').text, 'test:tester') + + def test_acl_from_elem(self): + # check translation from element + acl = ACLPrivate(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner) + elem = acl.elem() + acl = ACL.from_elem(elem, self.s3_acl, self.allow_no_owner) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_acl_from_elem_by_id_only(self): + elem = ACLPrivate(Owner(id='test:tester', + name='test:tester'), + s3_acl=self.s3_acl, + allow_no_owner=self.allow_no_owner).elem() + elem.find('./Owner').remove(elem.find('./Owner/DisplayName')) + acl = ACL.from_elem(elem, self.s3_acl, self.allow_no_owner) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'READ_ACP')) + self.assertTrue(self.check_permission(acl, 'test:tester', 'WRITE_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'READ')) + self.assertFalse(self.check_permission(acl, 'test:tester2', 'WRITE')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'READ_ACP')) + self.assertFalse(self.check_permission(acl, 'test:tester2', + 'WRITE_ACP')) + + def test_decode_acl_container(self): + access_control_policy = \ + {'Owner': 'test:tester', + 'Grant': [{'Permission': 'FULL_CONTROL', + 'Grantee': 'test:tester'}]} + headers = {sysmeta_header('container', 'acl'): + json.dumps(access_control_policy)} + acl = decode_acl('container', headers, self.allow_no_owner) + + self.assertEqual(type(acl), ACL) + self.assertEqual(acl.owner.id, 'test:tester') + self.assertEqual(len(acl.grants), 1) + self.assertEqual(str(acl.grants[0].grantee), 'test:tester') + self.assertEqual(acl.grants[0].permission, 'FULL_CONTROL') + + def test_decode_acl_object(self): + access_control_policy = \ + {'Owner': 'test:tester', + 'Grant': [{'Permission': 'FULL_CONTROL', + 'Grantee': 'test:tester'}]} + headers = {sysmeta_header('object', 'acl'): + json.dumps(access_control_policy)} + acl = decode_acl('object', headers, self.allow_no_owner) + + self.assertEqual(type(acl), ACL) + self.assertEqual(acl.owner.id, 'test:tester') + self.assertEqual(len(acl.grants), 1) + self.assertEqual(str(acl.grants[0].grantee), 'test:tester') + self.assertEqual(acl.grants[0].permission, 'FULL_CONTROL') + + def test_decode_acl_undefined(self): + headers = {} + acl = decode_acl('container', headers, self.allow_no_owner) + + self.assertEqual(type(acl), ACL) + self.assertIsNone(acl.owner.id) + self.assertEqual(len(acl.grants), 0) + + def test_decode_acl_empty_list(self): + headers = {sysmeta_header('container', 'acl'): '[]'} + acl = decode_acl('container', headers, self.allow_no_owner) + self.assertEqual(type(acl), ACL) + self.assertIsNone(acl.owner.id) + self.assertEqual(len(acl.grants), 0) + + def test_decode_acl_with_invalid_json(self): + headers = {sysmeta_header('container', 'acl'): '['} + self.assertRaises( + InvalidSubresource, decode_acl, 'container', + headers, self.allow_no_owner) + + def test_encode_acl_container(self): + acl = ACLPrivate(Owner(id='test:tester', + name='test:tester')) + acp = encode_acl('container', acl) + header_value = json.loads(acp[sysmeta_header('container', 'acl')]) + + self.assertTrue('Owner' in header_value) + self.assertTrue('Grant' in header_value) + self.assertEqual('test:tester', header_value['Owner']) + self.assertEqual(len(header_value['Grant']), 1) + + def test_encode_acl_object(self): + acl = ACLPrivate(Owner(id='test:tester', + name='test:tester')) + acp = encode_acl('object', acl) + header_value = json.loads(acp[sysmeta_header('object', 'acl')]) + + self.assertTrue('Owner' in header_value) + self.assertTrue('Grant' in header_value) + self.assertEqual('test:tester', header_value['Owner']) + self.assertEqual(len(header_value['Grant']), 1) + + def test_encode_acl_many_grant(self): + headers = {} + users = [] + for i in range(0, 99): + users.append('id=test:tester%s' % str(i)) + users = ','.join(users) + headers['x-amz-grant-read'] = users + acl = ACL.from_headers(headers, Owner('test:tester', 'test:tester')) + acp = encode_acl('container', acl) + + header_value = acp[sysmeta_header('container', 'acl')] + header_value = json.loads(header_value) + + self.assertTrue('Owner' in header_value) + self.assertTrue('Grant' in header_value) + self.assertEqual('test:tester', header_value['Owner']) + self.assertEqual(len(header_value['Grant']), 99) + + def test_from_headers_x_amz_acl(self): + canned_acls = ['public-read', 'public-read-write', + 'authenticated-read', 'bucket-owner-read', + 'bucket-owner-full-control', 'log-delivery-write'] + + owner = Owner('test:tester', 'test:tester') + grantee_map = canned_acl_grantees(owner) + + for acl_str in canned_acls: + acl = ACL.from_headers({'x-amz-acl': acl_str}, owner) + expected = grantee_map[acl_str] + + self.assertEqual(len(acl.grants), len(expected)) # sanity + + # parse Grant object to permission and grantee + actual_grants = [(grant.permission, grant.grantee) + for grant in acl.grants] + + assertions = zip(sorted(expected), sorted(actual_grants)) + + for (expected_permission, expected_grantee), \ + (permission, grantee) in assertions: + self.assertEqual(expected_permission, permission) + self.assertTrue( + isinstance(grantee, expected_grantee.__class__)) + if isinstance(grantee, User): + self.assertEqual(expected_grantee.id, grantee.id) + self.assertEqual(expected_grantee.display_name, + grantee.display_name) + + def test_from_headers_x_amz_acl_invalid(self): + with self.assertRaises(InvalidArgument) as cm: + ACL.from_headers({'x-amz-acl': 'invalid'}, + Owner('test:tester', 'test:tester')) + self.assertTrue('argument_name' in cm.exception.info) + self.assertEqual(cm.exception.info['argument_name'], 'x-amz-acl') + self.assertTrue('argument_value' in cm.exception.info) + self.assertEqual(cm.exception.info['argument_value'], 'invalid') + + def test_canned_acl_grantees(self): + grantee_map = canned_acl_grantees(Owner('test:tester', 'test:tester')) + canned_acls = ['private', 'public-read', 'public-read-write', + 'authenticated-read', 'bucket-owner-read', + 'bucket-owner-full-control', 'log-delivery-write'] + for canned_acl in canned_acls: + self.assertTrue(canned_acl in grantee_map) + self.assertEqual(len(canned_acls), len(grantee_map)) # sanity + + def test_base_grantee(self): + grantee = Grantee() + func = lambda: '' in grantee + self.assertRaises(S3NotImplemented, func) + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_utils.py swift-2.18.0/test/unit/common/middleware/s3api/test_utils.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_utils.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,133 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import unittest + +from swift.common.middleware.s3api import utils, s3request + +strs = [ + ('Owner', 'owner'), + ('DisplayName', 'display_name'), + ('AccessControlPolicy', 'access_control_policy'), +] + + +class TestS3ApiUtils(unittest.TestCase): + def test_camel_to_snake(self): + for s1, s2 in strs: + self.assertEqual(utils.camel_to_snake(s1), s2) + + def test_snake_to_camel(self): + for s1, s2 in strs: + self.assertEqual(s1, utils.snake_to_camel(s2)) + + def test_validate_bucket_name(self): + # good cases + self.assertTrue(utils.validate_bucket_name('bucket', True)) + self.assertTrue(utils.validate_bucket_name('bucket1', True)) + self.assertTrue(utils.validate_bucket_name('bucket-1', True)) + self.assertTrue(utils.validate_bucket_name('b.u.c.k.e.t', True)) + self.assertTrue(utils.validate_bucket_name('a' * 63, True)) + # bad cases + self.assertFalse(utils.validate_bucket_name('a', True)) + self.assertFalse(utils.validate_bucket_name('aa', True)) + self.assertFalse(utils.validate_bucket_name('a+a', True)) + self.assertFalse(utils.validate_bucket_name('a_a', True)) + self.assertFalse(utils.validate_bucket_name('Bucket', True)) + self.assertFalse(utils.validate_bucket_name('BUCKET', True)) + self.assertFalse(utils.validate_bucket_name('bucket-', True)) + self.assertFalse(utils.validate_bucket_name('bucket.', True)) + self.assertFalse(utils.validate_bucket_name('bucket_', True)) + self.assertFalse(utils.validate_bucket_name('bucket.-bucket', True)) + self.assertFalse(utils.validate_bucket_name('bucket-.bucket', True)) + self.assertFalse(utils.validate_bucket_name('bucket..bucket', True)) + self.assertFalse(utils.validate_bucket_name('a' * 64, True)) + + def test_validate_bucket_name_with_dns_compliant_bucket_names_false(self): + # good cases + self.assertTrue(utils.validate_bucket_name('bucket', False)) + self.assertTrue(utils.validate_bucket_name('bucket1', False)) + self.assertTrue(utils.validate_bucket_name('bucket-1', False)) + self.assertTrue(utils.validate_bucket_name('b.u.c.k.e.t', False)) + self.assertTrue(utils.validate_bucket_name('a' * 63, False)) + self.assertTrue(utils.validate_bucket_name('a' * 255, False)) + self.assertTrue(utils.validate_bucket_name('a_a', False)) + self.assertTrue(utils.validate_bucket_name('Bucket', False)) + self.assertTrue(utils.validate_bucket_name('BUCKET', False)) + self.assertTrue(utils.validate_bucket_name('bucket-', False)) + self.assertTrue(utils.validate_bucket_name('bucket_', False)) + self.assertTrue(utils.validate_bucket_name('bucket.-bucket', False)) + self.assertTrue(utils.validate_bucket_name('bucket-.bucket', False)) + self.assertTrue(utils.validate_bucket_name('bucket..bucket', False)) + # bad cases + self.assertFalse(utils.validate_bucket_name('a', False)) + self.assertFalse(utils.validate_bucket_name('aa', False)) + self.assertFalse(utils.validate_bucket_name('a+a', False)) + # ending with dot seems invalid in US standard, too + self.assertFalse(utils.validate_bucket_name('bucket.', False)) + self.assertFalse(utils.validate_bucket_name('a' * 256, False)) + + def test_s3timestamp(self): + expected = '1970-01-01T00:00:01.000Z' + # integer + ts = utils.S3Timestamp(1) + self.assertEqual(expected, ts.s3xmlformat) + # milliseconds unit should be floored + ts = utils.S3Timestamp(1.1) + self.assertEqual(expected, ts.s3xmlformat) + # float (microseconds) should be floored too + ts = utils.S3Timestamp(1.000001) + self.assertEqual(expected, ts.s3xmlformat) + # Bigger float (milliseconds) should be floored too + ts = utils.S3Timestamp(1.9) + self.assertEqual(expected, ts.s3xmlformat) + + def test_mktime(self): + date_headers = [ + 'Thu, 01 Jan 1970 00:00:00 -0000', + 'Thu, 01 Jan 1970 00:00:00 GMT', + 'Thu, 01 Jan 1970 00:00:00 UTC', + 'Thu, 01 Jan 1970 08:00:00 +0800', + 'Wed, 31 Dec 1969 16:00:00 -0800', + 'Wed, 31 Dec 1969 16:00:00 PST', + ] + for header in date_headers: + ts = utils.mktime(header) + self.assertEqual(0, ts, 'Got %r for header %s' % (ts, header)) + + # Last-Modified response style + self.assertEqual(0, utils.mktime('1970-01-01T00:00:00')) + + # X-Amz-Date style + self.assertEqual(0, utils.mktime('19700101T000000Z', + s3request.SIGV4_X_AMZ_DATE_FORMAT)) + + def test_mktime_weird_tz(self): + orig_tz = os.environ.get('TZ', '') + try: + os.environ['TZ'] = 'EST+05EDT,M4.1.0,M10.5.0' + time.tzset() + os.environ['TZ'] = '+0000' + # No tzset! Simulating what Swift would do. + self.assertNotEqual(0, time.timezone) + self.test_mktime() + finally: + os.environ['TZ'] = orig_tz + time.tzset() + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/s3api/test_versioning.py swift-2.18.0/test/unit/common/middleware/s3api/test_versioning.py --- swift-2.17.0/test/unit/common/middleware/s3api/test_versioning.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/s3api/test_versioning.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,56 @@ +# Copyright (c) 2014 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from swift.common.swob import Request + +from test.unit.common.middleware.s3api import S3ApiTestCase +from swift.common.middleware.s3api.etree import fromstring + + +class TestS3ApiVersioning(S3ApiTestCase): + + def setUp(self): + super(TestS3ApiVersioning, self).setUp() + + def test_object_versioning_GET(self): + req = Request.blank('/bucket/object?versioning', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + + status, headers, body = self.call_s3api(req) + self.assertEqual(status.split()[0], '200') + fromstring(body, 'VersioningConfiguration') + + def test_object_versioning_PUT(self): + req = Request.blank('/bucket/object?versioning', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + self.assertEqual(self._get_error_code(body), 'NotImplemented') + + def test_bucket_versioning_GET(self): + req = Request.blank('/bucket?versioning', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Authorization': 'AWS test:tester:hmac', + 'Date': self.get_date_header()}) + status, headers, body = self.call_s3api(req) + fromstring(body, 'VersioningConfiguration') + +if __name__ == '__main__': + unittest.main() diff -Nru swift-2.17.0/test/unit/common/middleware/test_gatekeeper.py swift-2.18.0/test/unit/common/middleware/test_gatekeeper.py --- swift-2.17.0/test/unit/common/middleware/test_gatekeeper.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/test_gatekeeper.py 2018-05-30 10:17:02.000000000 +0000 @@ -104,7 +104,7 @@ app = self.get_app(fake_app, {}) resp = req.get_response(app) self.assertEqual('200 OK', resp.status) - self.assertEqual(resp.body, 'FAKE APP') + self.assertEqual(resp.body, b'FAKE APP') self._assertHeadersEqual(self.allowed_headers, fake_app.req.headers) def _test_reserved_header_removed_inbound(self, method): diff -Nru swift-2.17.0/test/unit/common/middleware/test_slo.py swift-2.18.0/test/unit/common/middleware/test_slo.py --- swift-2.17.0/test/unit/common/middleware/test_slo.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/test_slo.py 2018-05-30 10:17:02.000000000 +0000 @@ -1039,6 +1039,12 @@ json.dumps([{'name': '/deltest/b_2', 'hash': 'a', 'bytes': '1'}, {'name': '/deltest/c_3', 'hash': 'b', 'bytes': '2'}])) self.app.register( + 'GET', '/v1/AUTH_test-un\xc3\xafcode/deltest/man-all-there', + swob.HTTPOk, {'Content-Type': 'application/json', + 'X-Static-Large-Object': 'true'}, + json.dumps([{'name': '/deltest/b_2', 'hash': 'a', 'bytes': '1'}, + {'name': '/deltest/c_3', 'hash': 'b', 'bytes': '2'}])) + self.app.register( 'DELETE', '/v1/AUTH_test/deltest/man-all-there', swob.HTTPNoContent, {}, None) self.app.register( @@ -1059,6 +1065,15 @@ self.app.register( 'DELETE', '/v1/AUTH_test/deltest/d_3', swob.HTTPNoContent, {}, None) + self.app.register( + 'DELETE', '/v1/AUTH_test-un\xc3\xafcode/deltest/man-all-there', + swob.HTTPNoContent, {}, None) + self.app.register( + 'DELETE', '/v1/AUTH_test-un\xc3\xafcode/deltest/b_2', + swob.HTTPNoContent, {}, None) + self.app.register( + 'DELETE', '/v1/AUTH_test-un\xc3\xafcode/deltest/c_3', + swob.HTTPNoContent, {}, None) self.app.register( 'GET', '/v1/AUTH_test/deltest/manifest-with-submanifest', @@ -1190,6 +1205,31 @@ ('DELETE', ('/v1/AUTH_test/deltest/' + 'man-all-there?multipart-manifest=delete'))])) + def test_handle_multipart_delete_non_ascii(self): + acct = u'AUTH_test-un\u00efcode'.encode('utf-8') + req = Request.blank( + '/v1/%s/deltest/man-all-there?multipart-manifest=delete' % acct, + environ={'REQUEST_METHOD': 'DELETE'}) + status, _, body = self.call_slo(req) + self.assertEqual('200 OK', status) + lines = body.split('\n') + for l in lines: + parts = l.split(':') + if len(parts) == 1: + continue + key, value = parts + if key == 'Response Status': + delete_status = int(value.split()[0]) + self.assertEqual(200, delete_status) + + self.assertEqual(set(self.app.calls), set([ + ('GET', + '/v1/%s/deltest/man-all-there?multipart-manifest=get' % acct), + ('DELETE', '/v1/%s/deltest/b_2?multipart-manifest=delete' % acct), + ('DELETE', '/v1/%s/deltest/c_3?multipart-manifest=delete' % acct), + ('DELETE', ('/v1/%s/deltest/' + 'man-all-there?multipart-manifest=delete' % acct))])) + def test_handle_multipart_delete_nested(self): req = Request.blank( '/v1/AUTH_test/deltest/manifest-with-submanifest?' + @@ -1392,6 +1432,7 @@ 'Content-Length': str(len(manifest_json)), 'Content-Type': 'test/data', 'X-Static-Large-Object': 'true', + 'X-Object-Sysmeta-Artisanal-Etag': 'bespoke', 'Etag': md5hex(manifest_json)} manifest_headers.update(getattr(self, 'extra_manifest_headers', {})) self.manifest_has_sysmeta = all(h in manifest_headers for h in ( @@ -1449,6 +1490,46 @@ expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) self.assertEqual(self.app.calls, expected_app_calls) + def test_if_none_match_etag_matching_with_override(self): + req = Request.blank( + '/v1/AUTH_test/headtest/man', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={ + 'If-None-Match': 'bespoke', + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Artisanal-Etag'}) + status, headers, body = self.call_slo(req) + self.assertEqual(status, '304 Not Modified') + # We *are not* responsible for replacing the etag; whoever set + # x-backend-etag-is-at is responsible + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('Content-Length', '0'), headers) + self.assertIn(('Content-Type', 'test/data'), headers) + + expected_app_calls = [('HEAD', '/v1/AUTH_test/headtest/man')] + if not self.manifest_has_sysmeta: + expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) + self.assertEqual(self.app.calls, expected_app_calls) + + def test_if_match_etag_not_matching_with_override(self): + req = Request.blank( + '/v1/AUTH_test/headtest/man', + environ={'REQUEST_METHOD': 'HEAD'}, + headers={ + 'If-Match': self.slo_etag, + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Artisanal-Etag'}) + status, headers, body = self.call_slo(req) + self.assertEqual(status, '412 Precondition Failed') + # We *are not* responsible for replacing the etag; whoever set + # x-backend-etag-is-at is responsible + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('Content-Length', '0'), headers) + self.assertIn(('Content-Type', 'test/data'), headers) + + expected_app_calls = [('HEAD', '/v1/AUTH_test/headtest/man')] + if not self.manifest_has_sysmeta: + expected_app_calls.append(('GET', '/v1/AUTH_test/headtest/man')) + self.assertEqual(self.app.calls, expected_app_calls) + class TestSloHeadManifest(TestSloHeadOldManifest): def setUp(self): @@ -3040,14 +3121,9 @@ def test_download_takes_too_long(self, mock_time): mock_time.time.side_effect = [ 0, # start time - 1, # just building the first segment request; purely local - 2, # build the second segment request object, too, so we know we - # can't coalesce and should instead go fetch the first segment - 7 * 3600, # that takes a while, but gets serviced; we build the - # third request and service the second - 21 * 3600, # which takes *even longer* (ostensibly something to - # do with submanifests), but we build the fourth... - 28 * 3600, # and before we go to service it we time out + 10 * 3600, # a_5 + 20 * 3600, # b_10 + 30 * 3600, # c_15, but then we time out ] req = Request.blank( '/v1/AUTH_test/gettest/manifest-abcd', @@ -3415,7 +3491,8 @@ 'Content-Length': str(len(_abcd_manifest_json)), 'Content-Type': 'application/json', 'X-Static-Large-Object': 'true', - 'Etag': md5hex(_abcd_manifest_json)} + 'Etag': md5hex(_abcd_manifest_json), + 'X-Object-Sysmeta-Custom-Etag': 'a custom etag'} manifest_headers.update(getattr(self, 'extra_manifest_headers', {})) self.manifest_has_sysmeta = all(h in manifest_headers for h in ( 'X-Object-Sysmeta-Slo-Etag', 'X-Object-Sysmeta-Slo-Size')) @@ -3526,6 +3603,128 @@ self.assertEqual(self.app.headers[0].get('X-Backend-Etag-Is-At'), 'x-object-sysmeta-slo-etag') + def test_if_none_match_matches_with_override(self): + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd', + environ={'REQUEST_METHOD': 'GET'}, + headers={'If-None-Match': '"a custom etag"', + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Custom-Etag'}) + status, headers, body = self.call_slo(req) + + self.assertEqual(status, '304 Not Modified') + self.assertIn(('Content-Length', '0'), headers) + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('X-Object-Sysmeta-Custom-Etag', 'a custom etag'), + headers) + self.assertEqual(body, '') + + expected_app_calls = [('GET', '/v1/AUTH_test/gettest/manifest-abcd')] + if not self.manifest_has_sysmeta: + # NB: no known middleware would have written a custom etag with + # old-style manifests. but if there *was*, here's what'd happen + expected_app_calls.extend([ + # 304, so gotta refetch + ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + # Since the "authoritative" etag didn't come from slo, we still + # verify the first segment + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ]) + self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual( + self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'X-Object-Sysmeta-Custom-Etag,x-object-sysmeta-slo-etag') + + def test_if_none_match_does_not_match_with_override(self): + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd', + environ={'REQUEST_METHOD': 'GET'}, + headers={'If-None-Match': "%s" % self.slo_etag, + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Custom-Etag'}) + status, headers, body = self.call_slo(req) + + self.assertEqual(status, '200 OK') + self.assertIn(('Content-Length', '50'), headers) + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('X-Object-Sysmeta-Custom-Etag', 'a custom etag'), + headers) + self.assertEqual( + body, 'aaaaabbbbbbbbbbcccccccccccccccdddddddddddddddddddd') + + expected_app_calls = [ + ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/b_10?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/c_15?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/d_20?multipart-manifest=get'), + ] + self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual( + self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'X-Object-Sysmeta-Custom-Etag,x-object-sysmeta-slo-etag') + + def test_if_match_matches_with_override(self): + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd', + environ={'REQUEST_METHOD': 'GET'}, + headers={'If-Match': '"a custom etag"', + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Custom-Etag'}) + status, headers, body = self.call_slo(req) + + self.assertEqual(status, '200 OK') + self.assertIn(('Content-Length', '50'), headers) + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('X-Object-Sysmeta-Custom-Etag', 'a custom etag'), + headers) + self.assertEqual( + body, 'aaaaabbbbbbbbbbcccccccccccccccdddddddddddddddddddd') + + expected_app_calls = [ + ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + # Match on the override from left of us; no need to refetch + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/b_10?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/c_15?multipart-manifest=get'), + ('GET', '/v1/AUTH_test/gettest/d_20?multipart-manifest=get'), + ] + self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual( + self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'X-Object-Sysmeta-Custom-Etag,x-object-sysmeta-slo-etag') + + def test_if_match_does_not_match_with_override(self): + req = Request.blank( + '/v1/AUTH_test/gettest/manifest-abcd', + environ={'REQUEST_METHOD': 'GET'}, + headers={'If-Match': "%s" % self.slo_etag, + 'X-Backend-Etag-Is-At': 'X-Object-Sysmeta-Custom-Etag'}) + status, headers, body = self.call_slo(req) + + self.assertEqual(status, '412 Precondition Failed') + self.assertIn(('Content-Length', '0'), headers) + self.assertIn(('Etag', '"%s"' % self.slo_etag), headers) + self.assertIn(('X-Object-Sysmeta-Custom-Etag', 'a custom etag'), + headers) + self.assertEqual(body, '') + + expected_app_calls = [('GET', '/v1/AUTH_test/gettest/manifest-abcd')] + if not self.manifest_has_sysmeta: + # NB: no known middleware would have written a custom etag with + # old-style manifests. but if there *was*, here's what'd happen + expected_app_calls.extend([ + # Manifest never matches -> got back a 412; need to re-fetch + ('GET', '/v1/AUTH_test/gettest/manifest-abcd'), + # We *still* verify the first segment, even though we'll 412 + ('GET', '/v1/AUTH_test/gettest/manifest-bc'), + ('GET', '/v1/AUTH_test/gettest/a_5?multipart-manifest=get'), + ]) + self.assertEqual(self.app.calls, expected_app_calls) + self.assertEqual( + self.app.headers[0].get('X-Backend-Etag-Is-At'), + 'X-Object-Sysmeta-Custom-Etag,x-object-sysmeta-slo-etag') + def test_if_match_matches_and_range(self): req = Request.blank( '/v1/AUTH_test/gettest/manifest-abcd', diff -Nru swift-2.17.0/test/unit/common/middleware/test_tempauth.py swift-2.18.0/test/unit/common/middleware/test_tempauth.py --- swift-2.17.0/test/unit/common/middleware/test_tempauth.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/test_tempauth.py 2018-05-30 10:17:02.000000000 +0000 @@ -37,6 +37,14 @@ return self.store.get(key) def set(self, key, value, time=0): + if isinstance(value, (tuple, list)): + decoded = [] + for elem in value: + if type(elem) == str: + decoded.append(elem.decode('utf8')) + else: + decoded.append(elem) + value = tuple(decoded) self.store[key] = value return True @@ -269,7 +277,7 @@ local_auth = auth.filter_factory( {'user_s3_s3': 'secret .admin'})(local_app) req = self._make_request('/v1/s3:s3', environ={ - 'swift3.auth_details': { + 's3api.auth_details': { 'access_key': 's3:s3', 'signature': b64encode('sig'), 'string_to_sign': 't', @@ -287,7 +295,7 @@ local_auth = auth.filter_factory( {'user_s3_s3': 'secret .admin'})(local_app) req = self._make_request('/v1/s3:s3', environ={ - 'swift3.auth_details': { + 's3api.auth_details': { 'access_key': 's3:s3', 'signature': b64encode('sig'), 'string_to_sign': 't', @@ -305,7 +313,7 @@ local_auth = auth.filter_factory( {'user_s3_s3': 'secret .admin'})(local_app) req = self._make_request('/v1/s3:s3', environ={ - 'swift3.auth_details': { + 's3api.auth_details': { 'access_key': 's3:s3', 'signature': b64encode('sig'), 'string_to_sign': 't'}}) @@ -908,6 +916,37 @@ self.assertEqual(resp.headers.get('Www-Authenticate'), 'Swift realm="BLAH_account"') + def test_successful_token_unicode_user(self): + app = FakeApp(iter(NO_CONTENT_RESP)) + ath = auth.filter_factory( + {u'user_t\u00e9st_t\u00e9ster'.encode('utf8'): + u'p\u00e1ss .admin'.encode('utf8')})(app) + memcache = FakeMemcache() + + req = self._make_request( + '/auth/v1.0', + headers={'X-Auth-User': u't\u00e9st:t\u00e9ster', + 'X-Auth-Key': u'p\u00e1ss'}) + req.environ['swift.cache'] = memcache + resp = req.get_response(ath) + self.assertEqual(resp.status_int, 200) + auth_token = resp.headers['X-Auth-Token'] + + req = self._make_request( + '/auth/v1.0', + headers={'X-Auth-User': u't\u00e9st:t\u00e9ster', + 'X-Auth-Key': u'p\u00e1ss'}) + req.environ['swift.cache'] = memcache + resp = req.get_response(ath) + self.assertEqual(resp.status_int, 200) + self.assertEqual(auth_token, resp.headers['X-Auth-Token']) + + req = self._make_request( + u'/v1/AUTH_t\u00e9st', headers={'X-Auth-Token': auth_token}) + req.environ['swift.cache'] = memcache + resp = req.get_response(ath) + self.assertEqual(204, resp.status_int) + class TestAuthWithMultiplePrefixes(TestAuth): """ diff -Nru swift-2.17.0/test/unit/common/middleware/test_versioned_writes.py swift-2.18.0/test/unit/common/middleware/test_versioned_writes.py --- swift-2.17.0/test/unit/common/middleware/test_versioned_writes.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/middleware/test_versioned_writes.py 2018-05-30 10:17:02.000000000 +0000 @@ -354,6 +354,30 @@ self.assertEqual(['VW', None], self.app.swift_sources) self.assertEqual({'fake_trans_id'}, set(self.app.txn_ids)) + def test_put_versioned_object_including_url_encoded_name_success(self): + self.app.register( + 'PUT', '/v1/a/c/%ff', swob.HTTPOk, {}, 'passed') + self.app.register( + 'GET', '/v1/a/c/%ff', swob.HTTPNotFound, {}, None) + + cache = FakeCache({'sysmeta': {'versions-location': 'ver_cont'}}) + req = Request.blank( + '/v1/a/c/%25ff', + environ={'REQUEST_METHOD': 'PUT', 'swift.cache': cache, + 'CONTENT_LENGTH': '100', + 'swift.trans_id': 'fake_trans_id'}) + status, headers, body = self.call_vw(req) + self.assertEqual(status, '200 OK') + self.assertEqual(len(self.authorized), 2) + # Versioned writes middleware now calls auth on the incoming request + # before we try the GET and then at the proxy, so there are 2 + # atuhorized for the same request. + self.assertRequestEqual(req, self.authorized[0]) + self.assertRequestEqual(req, self.authorized[1]) + self.assertEqual(2, self.app.call_count) + self.assertEqual(['VW', None], self.app.swift_sources) + self.assertEqual({'fake_trans_id'}, set(self.app.txn_ids)) + def test_put_object_no_versioning_with_container_config_true(self): # set False to versions_write and expect no GET occurred self.vw.conf = {'allow_versioned_writes': 'false'} Binary files /tmp/tmppq8kS4/4Rf6ZdDa06/swift-2.17.0/test/unit/common/missing_container_info.db and /tmp/tmppq8kS4/CFwch7SxMx/swift-2.18.0/test/unit/common/missing_container_info.db differ diff -Nru swift-2.17.0/test/unit/common/ring/test_builder.py swift-2.18.0/test/unit/common/ring/test_builder.py --- swift-2.17.0/test/unit/common/ring/test_builder.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/ring/test_builder.py 2018-05-30 10:17:02.000000000 +0000 @@ -25,6 +25,7 @@ from math import ceil from tempfile import mkdtemp from shutil import rmtree +import sys import random import uuid import itertools @@ -649,7 +650,7 @@ self.assertEqual({0: 2, 1: 2, 2: 2}, dict(counts['zone'])) # each part is assigned once to six unique devices - self.assertEqual((counts['dev_id'].values()), [1] * 6) + self.assertEqual(list(counts['dev_id'].values()), [1] * 6) self.assertEqual(len(set(counts['dev_id'].keys())), 6) def test_multitier_part_moves_with_0_min_part_hours(self): @@ -2114,7 +2115,7 @@ with self.assertRaises(AttributeError) as cm: rb.id self.assertIn('id attribute has not been initialised', - cm.exception.message) + cm.exception.args[0]) builder_file = os.path.join(self.testdir, 'test_save.builder') orig_rb.save(builder_file) @@ -2131,7 +2132,7 @@ with self.assertRaises(AttributeError) as cm: loaded_rb.id self.assertIn('id attribute has not been initialised', - cm.exception.message) + cm.exception.args[0]) # check saving assigns an id, and that it is persisted loaded_rb.save(builder_file) @@ -2169,7 +2170,7 @@ with self.assertRaises(AttributeError) as cm: rb.id self.assertIn('id attribute has not been initialised', - cm.exception.message) + cm.exception.args[0]) # save must succeed for id to be assigned with self.assertRaises(IOError): rb.save(os.path.join( @@ -2177,7 +2178,7 @@ with self.assertRaises(AttributeError) as cm: rb.id self.assertIn('id attribute has not been initialised', - cm.exception.message) + cm.exception.args[0]) def test_search_devs(self): rb = ring.RingBuilder(8, 3, 1) @@ -2480,6 +2481,8 @@ (0, 0, '127.0.0.1', 3): [0, 256, 0, 0], }) + @unittest.skipIf(sys.version_info >= (3,), + "Seed-specific tests don't work well on py3") def test_undispersable_zone_converge_on_balance(self): rb = ring.RingBuilder(8, 6, 0) dev_id = 0 @@ -2535,6 +2538,8 @@ self.assertEqual(rb.get_balance(), 0.390625) self.assertEqual(rb.dispersion, 16.6015625) + @unittest.skipIf(sys.version_info >= (3,), + "Seed-specific tests don't work well on py3") def test_undispersable_server_converge_on_balance(self): rb = ring.RingBuilder(8, 6, 0) dev_id = 0 diff -Nru swift-2.17.0/test/unit/common/ring/test_composite_builder.py swift-2.18.0/test/unit/common/ring/test_composite_builder.py --- swift-2.17.0/test/unit/common/ring/test_composite_builder.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/ring/test_composite_builder.py 2018-05-30 10:17:02.000000000 +0000 @@ -230,7 +230,7 @@ with self.assertRaises(ValueError) as cm: compose_rings(builders) self.assertIn('Same region found in different rings', - cm.exception.message) + cm.exception.args[0]) def test_composite_only_one_ring_in_the_args_error(self): builders = self.create_sample_ringbuilders(1) @@ -238,7 +238,7 @@ compose_rings(builders) self.assertIn( 'Two or more component builders are required.', - cm.exception.message) + cm.exception.args[0]) def test_composite_same_device_in_the_different_rings_error(self): builders = self.create_sample_ringbuilders(2) @@ -267,7 +267,7 @@ self.assertIn( 'Duplicate ip/port/device combination %(ip)s/%(port)s/%(device)s ' 'found in builders at indexes 0 and 2' % - same_device, cm.exception.message) + same_device, cm.exception.args[0]) def test_different_part_power_error(self): # create a ring builder @@ -296,7 +296,7 @@ with self.assertRaises(ValueError) as cm: compose_rings(builders) self.assertIn("All builders must have same value for 'part_power'", - cm.exception.message) + cm.exception.args[0]) def test_compose_rings_float_replica_count_builder_error(self): builders = self.create_sample_ringbuilders(1) @@ -322,8 +322,8 @@ with self.assertRaises(ValueError) as cm: compose_rings(builders) - self.assertIn("Problem with builders", cm.exception.message) - self.assertIn("Non integer replica count", cm.exception.message) + self.assertIn("Problem with builders", cm.exception.args[0]) + self.assertIn("Non integer replica count", cm.exception.args[0]) def test_compose_rings_rebalance_needed(self): builders = self.create_sample_ringbuilders(2) @@ -334,8 +334,8 @@ self.assertTrue(builders[1].devs_changed) # sanity check with self.assertRaises(ValueError) as cm: compose_rings(builders) - self.assertIn("Problem with builders", cm.exception.message) - self.assertIn("Builder needs rebalance", cm.exception.message) + self.assertIn("Problem with builders", cm.exception.args[0]) + self.assertIn("Builder needs rebalance", cm.exception.args[0]) # after rebalance, that works (sanity) builders[1].rebalance() compose_rings(builders) @@ -367,7 +367,7 @@ def test_ring_swap(self): # sanity - builders = sorted(self.create_sample_ringbuilders(2)) + builders = self.create_sample_ringbuilders(2) rd = compose_rings(builders) rd.save(self.output_ring) got_ring = Ring(self.output_ring) @@ -377,7 +377,7 @@ self.assertDevices(got_ring, builders) # even if swapped, it works - reverse_builders = sorted(builders, reverse=True) + reverse_builders = builders[::-1] self.assertNotEqual(reverse_builders, builders) rd = compose_rings(reverse_builders) rd.save(self.output_ring) @@ -396,7 +396,7 @@ self.assertDevices(got_ring, builders) self.assertIn("composite ring is not ordered by ring order", - cm.exception.message) + cm.exception.args[0]) class TestCompositeRingBuilder(BaseTestCompositeBuilder): @@ -429,7 +429,7 @@ with self.assertRaises(ValueError) as cm: cb.compose(require_modified=True) self.assertIn('None of the component builders has been modified', - cm.exception.message) + cm.exception.args[0]) self.assertEqual(1, cb.version) # ...but by default will compose again despite no changes to components cb.compose(force=True).save(self.output_ring) @@ -530,12 +530,12 @@ CompositeRingBuilder.load(bad_file) self.assertIn( "File does not contain valid composite ring data", - cm.exception.message) + cm.exception.args[0]) except AssertionError as err: raise AssertionError('With content %r: %s' % (content, err)) for content in ('', 'not json', json.dumps({}), json.dumps([])): - check_bad_content(content) + check_bad_content(content.encode('ascii')) good_content = { 'components': [ @@ -548,7 +548,7 @@ for missing in good_content: bad_content = dict(good_content) bad_content.pop(missing) - check_bad_content(json.dumps(bad_content)) + check_bad_content(json.dumps(bad_content).encode('ascii')) def test_save_errors(self): cb_file = os.path.join(self.tmpdir, 'test-composite-ring.json') @@ -556,7 +556,7 @@ def do_test(cb): with self.assertRaises(ValueError) as cm: cb.save(cb_file) - self.assertIn("No composed ring to save", cm.exception.message) + self.assertIn("No composed ring to save", cm.exception.args[0]) do_test(CompositeRingBuilder()) do_test(CompositeRingBuilder([])) @@ -635,7 +635,7 @@ with self.assertRaises(ValueError) as cm: cb.rebalance() self.assertIn('Two or more component builders are required', - cm.exception.message) + cm.exception.args[0]) builders = self.create_sample_ringbuilders(2) cb, builder_files = self._make_composite_builder(builders) @@ -668,7 +668,7 @@ # sanity, it is impossible to compose un-rebalanced component rings with self.assertRaises(ValueError) as cm: cb.compose() - self.assertIn("Builder needs rebalance", cm.exception.message) + self.assertIn("Builder needs rebalance", cm.exception.args[0]) # but ok to compose after rebalance cb.rebalance() rd = cb.compose() @@ -727,14 +727,14 @@ self._call_method_under_test(cb, builder_files, force=force) self.assertIn('Two or more component builders are required', - cm.exception.message) + cm.exception.args[0]) cb = CompositeRingBuilder() with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, builder_files, force=force) self.assertIn('Two or more component builders are required', - cm.exception.message) + cm.exception.args[0]) builders = self.create_sample_ringbuilders(3) builder_files = self.save_builders(builders) @@ -755,7 +755,7 @@ with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, builder_files, force=force) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Problem with builder at index %s" % no_id, error_lines[0]) self.assertIn("id attribute has not been initialised", @@ -785,7 +785,7 @@ def do_check(force): with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, force=force) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Builder id %r used at indexes 0, 2" % builders[0].id, error_lines[0]) self.assertFalse(error_lines[1:]) @@ -799,7 +799,7 @@ orig_version = cb.version with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, builder_files, **kwargs) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("None of the component builders has been modified", error_lines[0]) self.assertFalse(error_lines[1:]) @@ -839,7 +839,7 @@ self.save_builders([old_builders[0], builders[1]]) with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Invalid builder change at index 0", error_lines[0]) self.assertIn("Older builder version", error_lines[0]) self.assertFalse(error_lines[1:]) @@ -849,7 +849,7 @@ self.save_builders([old_builders[0], builders[1]]) with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Invalid builder change at index 0", error_lines[0]) self.assertIn("Older builder version", error_lines[0]) self.assertFalse(error_lines[1:]) @@ -869,7 +869,7 @@ with self.assertRaises(ValueError) as cm: self._call_method_under_test( cb, self.save_builders(bad_builders)) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertFalse(error_lines[1:]) self.assertEqual(1, cb.version) # unless we ignore errors @@ -892,7 +892,7 @@ different_files = self.save_builders([builders[0], builders[2]]) with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, different_files) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Invalid builder change at index 1", error_lines[0]) self.assertIn("Attribute mismatch for id", error_lines[0]) self.assertFalse(error_lines[1:]) @@ -908,7 +908,7 @@ builder_files.reverse() with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, builder_files) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') for i, line in enumerate(error_lines): self.assertIn("Invalid builder change at index %s" % i, line) self.assertIn("Attribute mismatch for id", line) @@ -925,7 +925,7 @@ self.save_builders(builders) with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') for i, line in enumerate(error_lines): self.assertIn("Invalid builder change at index 0", line) self.assertIn("Attribute mismatch for replicas", line) @@ -949,7 +949,7 @@ self.save_builders(builders) with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') for i, line in enumerate(error_lines): self.assertIn("Invalid builder change at index 0", line) self.assertIn("Attribute mismatch for replicas", line) @@ -958,7 +958,7 @@ # validate will fail because the builder needs rebalancing with self.assertRaises(ValueError) as cm: self._call_method_under_test(cb, force=True) - error_lines = cm.exception.message.split('\n') + error_lines = cm.exception.args[0].split('\n') self.assertIn("Problem with builders", error_lines[0]) self.assertIn("Builder needs rebalance", error_lines[1]) self.assertFalse(error_lines[2:]) @@ -976,17 +976,18 @@ if rebalance: rb.rebalance() self.assertEqual(self._partition_counts(rb), - {0: 256, 1: 256, 2: 256}) # sanity check + [256, 256, 256]) # sanity check return rb def _partition_counts(self, builder): """ - Returns a dictionary mapping device id's to (number of + Returns an array mapping device id's to (number of partitions assigned to that device). """ - return Counter(builder.devs[dev_id]['id'] - for part2dev_id in builder._replica2part2dev - for dev_id in part2dev_id) + c = Counter(builder.devs[dev_id]['id'] + for part2dev_id in builder._replica2part2dev + for dev_id in part2dev_id) + return [c[d['id']] for d in builder.devs] def get_moved_parts(self, after, before): def uniqueness(dev): @@ -1020,7 +1021,7 @@ # all cobuilders can perform initial rebalance cb.rebalance() - exp = {0: 256, 1: 256, 2: 256} + exp = [256, 256, 256] self.assertEqual(exp, self._partition_counts(builders[0])) self.assertEqual(exp, self._partition_counts(builders[1])) self.assertEqual(exp, self._partition_counts(builders[2])) @@ -1057,13 +1058,13 @@ rb1_parts_moved = self.get_moved_parts(builders[0], old_builders[0]) self.assertEqual(192, len(rb1_parts_moved)) self.assertEqual(self._partition_counts(builders[0]), - {0: 192, 1: 192, 2: 192, 3: 192}) + [192, 192, 192, 192]) rb2_parts_moved = self.get_moved_parts(builders[1], old_builders[1]) self.assertEqual(64, len(rb2_parts_moved)) counts = self._partition_counts(builders[1]) self.assertEqual(counts[3], 64) - self.assertEqual([234, 235, 235], sorted(counts.values()[:3])) + self.assertEqual([234, 235, 235], sorted(counts[:3])) self.assertFalse(rb2_parts_moved.intersection(rb1_parts_moved)) # rb3 can't rebalance - all parts moved while rebalancing rb1 and rb2 @@ -1191,7 +1192,7 @@ rb1_parts_moved = self.get_moved_parts(rb1s[1], rb1s[0]) self.assertEqual(192, len(rb1_parts_moved)) self.assertEqual(self._partition_counts(rb1s[1]), - {0: 192, 1: 192, 2: 192, 3: 192}) + [192, 192, 192, 192]) # rebalancing rb2 - rb2 in isolation could potentially move all parts # so would move 192 parts to new device, but it is constrained by rb1 @@ -1200,7 +1201,7 @@ self.assertEqual(64, len(rb2_parts_moved)) counts = self._partition_counts(rb2s[3]) self.assertEqual(counts[3], 64) - self.assertEqual([234, 235, 235], sorted(counts.values()[:3])) + self.assertEqual([234, 235, 235], sorted(counts[:3])) self.assertFalse(rb2_parts_moved.intersection(rb1_parts_moved)) self.assertEqual(192, self.num_parts_can_move(rb2s[3])) self.assertEqual(64, self.num_parts_can_move(rb1s[3])) @@ -1255,7 +1256,7 @@ # rebalance - after that expect no more updates with mock_update_last_part_moves() as update_calls: cb.update_last_part_moves() - self.assertEqual(sorted([rb1, rb2]), sorted(update_calls)) + self.assertEqual({rb1, rb2}, set(update_calls)) with mock_update_last_part_moves() as update_calls: with mock_can_part_move() as can_part_move_calls: @@ -1263,14 +1264,14 @@ self.assertFalse(update_calls) # rb1 has never been rebalanced so no calls propagate from its # can_part_move method to its superclass _can_part_move method - self.assertEqual([rb2], can_part_move_calls.keys()) + self.assertEqual({rb2}, set(can_part_move_calls)) with mock_update_last_part_moves() as update_calls: with mock_can_part_move() as can_part_move_calls: rb1.rebalance() self.assertFalse(update_calls) # rb1 is being rebalanced so gets checked, and rb2 also gets checked - self.assertEqual(sorted([rb1, rb2]), sorted(can_part_move_calls)) + self.assertEqual({rb1, rb2}, set(can_part_move_calls)) self.assertEqual(768, len(can_part_move_calls[rb1])) self.assertEqual(768, len(can_part_move_calls[rb2])) diff -Nru swift-2.17.0/test/unit/common/ring/test_ring.py swift-2.18.0/test/unit/common/ring/test_ring.py --- swift-2.17.0/test/unit/common/ring/test_ring.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/ring/test_ring.py 2018-05-30 10:17:02.000000000 +0000 @@ -40,8 +40,8 @@ def setUp(self): self._orig_hash_suffix = utils.HASH_PATH_SUFFIX self._orig_hash_prefix = utils.HASH_PATH_PREFIX - utils.HASH_PATH_SUFFIX = 'endcap' - utils.HASH_PATH_PREFIX = '' + utils.HASH_PATH_SUFFIX = b'endcap' + utils.HASH_PATH_PREFIX = b'' def tearDown(self): utils.HASH_PATH_SUFFIX = self._orig_hash_suffix @@ -150,8 +150,8 @@ [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) rd.save(ring_fname1) rd.save(ring_fname2) - with open(ring_fname1) as ring1: - with open(ring_fname2) as ring2: + with open(ring_fname1, 'rb') as ring1: + with open(ring_fname2, 'rb') as ring2: self.assertEqual(ring1.read(), ring2.read()) def test_permissions(self): @@ -160,8 +160,12 @@ [array.array('H', [0, 1, 0, 1]), array.array('H', [0, 1, 0, 1])], [{'id': 0, 'zone': 0}, {'id': 1, 'zone': 1}], 30) rd.save(ring_fname) - self.assertEqual(oct(stat.S_IMODE(os.stat(ring_fname).st_mode)), - '0644') + ring_mode = stat.S_IMODE(os.stat(ring_fname).st_mode) + expected_mode = (stat.S_IRUSR | stat.S_IWUSR | + stat.S_IRGRP | stat.S_IROTH) + self.assertEqual( + ring_mode, expected_mode, + 'Ring has mode 0%o, expected 0%o' % (ring_mode, expected_mode)) def test_replica_count(self): rd = ring.RingData( @@ -227,8 +231,8 @@ self.assertEqual(self.ring.reload_time, self.intended_reload_time) self.assertEqual(self.ring.serialized_path, self.testgz) # test invalid endcap - with mock.patch.object(utils, 'HASH_PATH_SUFFIX', ''), \ - mock.patch.object(utils, 'HASH_PATH_PREFIX', ''), \ + with mock.patch.object(utils, 'HASH_PATH_SUFFIX', b''), \ + mock.patch.object(utils, 'HASH_PATH_PREFIX', b''), \ mock.patch.object(utils, 'SWIFT_CONF_FILE', ''): self.assertRaises(SystemExit, ring.Ring, self.testdir, 'whatever') @@ -490,6 +494,8 @@ self.ring.devs.append(new_dev) self.ring._rebuild_tier_data() + @unittest.skipIf(sys.version_info >= (3,), + "Seed-specific tests don't work well on py3") def test_get_more_nodes(self): # Yes, these tests are deliberately very fragile. We want to make sure # that if someone changes the results the ring produces, they know it. diff -Nru swift-2.17.0/test/unit/common/ring/test_utils.py swift-2.18.0/test/unit/common/ring/test_utils.py --- swift-2.17.0/test/unit/common/ring/test_utils.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/ring/test_utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import sys import unittest from collections import defaultdict @@ -663,6 +664,8 @@ } self.assertEqual(device, expected) + @unittest.skipIf(sys.version_info >= (3,), + "Seed-specific tests don't work well on py3") def test_dispersion_report(self): rb = ring.RingBuilder(8, 3, 0) rb.add_dev({'id': 0, 'region': 1, 'zone': 0, 'weight': 100, diff -Nru swift-2.17.0/test/unit/common/test_constraints.py swift-2.18.0/test/unit/common/test_constraints.py --- swift-2.17.0/test/unit/common/test_constraints.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_constraints.py 2018-05-30 10:17:02.000000000 +0000 @@ -360,13 +360,13 @@ self.assertEqual(req.headers['X-Delete-At'], str(int(ts) + 42)) def test_check_delete_headers_sets_delete_at(self): - t = time.time() - expected = str(int(t) + 1000) + ts = utils.Timestamp.now() + expected = str(int(ts) + 1000) # check delete-at is passed through headers = {'Content-Length': '0', 'Content-Type': 'text/plain', 'X-Delete-At': expected, - 'X-Timestamp': str(t)} + 'X-Timestamp': ts.internal} req = Request.blank('/', headers=headers) constraints.check_delete_headers(req) self.assertIn('X-Delete-At', req.headers) @@ -376,19 +376,19 @@ headers = {'Content-Length': '0', 'Content-Type': 'text/plain', 'X-Delete-After': '42', - 'X-Timestamp': str(t)} + 'X-Timestamp': ts.internal} req = Request.blank('/', headers=headers) constraints.check_delete_headers(req) self.assertIn('X-Delete-At', req.headers) - expected = str(int(t) + 42) + expected = str(int(ts) + 42) self.assertEqual(req.headers['X-Delete-At'], expected) # check delete-after takes precedence over delete-at headers = {'Content-Length': '0', 'Content-Type': 'text/plain', 'X-Delete-After': '42', - 'X-Delete-At': str(int(t) + 40), - 'X-Timestamp': str(t)} + 'X-Delete-At': str(int(ts) + 40), + 'X-Timestamp': ts.internal} req = Request.blank('/', headers=headers) constraints.check_delete_headers(req) self.assertIn('X-Delete-At', req.headers) @@ -397,8 +397,8 @@ headers = {'Content-Length': '0', 'Content-Type': 'text/plain', 'X-Delete-After': '42', - 'X-Delete-At': str(int(t) + 44), - 'X-Timestamp': str(t)} + 'X-Delete-At': str(int(ts) + 44), + 'X-Timestamp': ts.internal} req = Request.blank('/', headers=headers) constraints.check_delete_headers(req) self.assertIn('X-Delete-At', req.headers) diff -Nru swift-2.17.0/test/unit/common/test_daemon.py swift-2.18.0/test/unit/common/test_daemon.py --- swift-2.17.0/test/unit/common/test_daemon.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_daemon.py 2018-05-30 10:17:02.000000000 +0000 @@ -67,6 +67,10 @@ class MyWorkerDaemon(MyDaemon): + def __init__(self, *a, **kw): + super(MyWorkerDaemon, self).__init__(*a, **kw) + MyWorkerDaemon.post_multiprocess_run_called = False + def get_worker_args(self, once=False, **kwargs): return [kwargs for i in range(int(self.conf.get('workers', 0)))] @@ -76,6 +80,9 @@ except IndexError: return True + def post_multiprocess_run(self): + MyWorkerDaemon.post_multiprocess_run_called = True + class TestWorkerDaemon(unittest.TestCase): @@ -231,6 +238,7 @@ }) self.assertEqual([], self.mock_kill.call_args_list) self.assertIn('Finished', d.logger.get_lines_for_level('notice')[-1]) + self.assertTrue(MyWorkerDaemon.post_multiprocess_run_called) def test_forked_worker(self): d = MyWorkerDaemon({'workers': 3}) diff -Nru swift-2.17.0/test/unit/common/test_db.py swift-2.18.0/test/unit/common/test_db.py --- swift-2.17.0/test/unit/common/test_db.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_db.py 2018-05-30 10:17:02.000000000 +0000 @@ -38,7 +38,7 @@ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE from swift.common.db import chexor, dict_factory, get_db_connection, \ DatabaseBroker, DatabaseConnectionError, DatabaseAlreadyExists, \ - GreenDBConnection, PICKLE_PROTOCOL + GreenDBConnection, PICKLE_PROTOCOL, zero_like from swift.common.utils import normalize_timestamp, mkdirs, Timestamp from swift.common.exceptions import LockTimeout from swift.common.swob import HTTPException @@ -46,6 +46,30 @@ from test.unit import with_tempdir +class TestHelperFunctions(unittest.TestCase): + + def test_zero_like(self): + expectations = { + # value => expected + None: True, + True: False, + '': True, + 'asdf': False, + 0: True, + 1: False, + '0': True, + '1': False, + } + errors = [] + for value, expected in expectations.items(): + rv = zero_like(value) + if rv != expected: + errors.append('zero_like(%r) => %r expected %r' % ( + value, rv, expected)) + if errors: + self.fail('Some unexpected return values:\n' + '\n'.join(errors)) + + class TestDatabaseConnectionError(unittest.TestCase): def test_str(self): @@ -580,6 +604,25 @@ broker.get_info() self.assertEqual(1, broker.get_info()[count_key]) + @with_tempdir + def test_maybe_get(self, tempdir): + broker = self.broker_class(os.path.join(tempdir, 'test.db'), + account='a', container='c') + broker.initialize(next(self.ts), + storage_policy_index=int(self.policy)) + qry = 'select account from %s_stat' % broker.db_type + with broker.maybe_get(None) as conn: + rows = [dict(x) for x in conn.execute(qry)] + self.assertEqual([{'account': 'a'}], rows) + self.assertEqual(conn, broker.conn) + with broker.get() as other_conn: + self.assertEqual(broker.conn, None) + with broker.maybe_get(other_conn) as identity_conn: + self.assertIs(other_conn, identity_conn) + self.assertEqual(broker.conn, None) + self.assertEqual(broker.conn, None) + self.assertEqual(broker.conn, conn) + class TestDatabaseBroker(unittest.TestCase): @@ -697,20 +740,20 @@ def test_get(self): broker = DatabaseBroker(':memory:') - got_exc = False - try: - with broker.get() as conn: - conn.execute('SELECT 1') - except Exception: - got_exc = True + with self.assertRaises(DatabaseConnectionError) as raised, \ + broker.get() as conn: + conn.execute('SELECT 1') + self.assertEqual( + str(raised.exception), + "DB connection error (:memory:, 0):\nDB doesn't exist") + broker = DatabaseBroker(os.path.join(self.testdir, '1.db')) - got_exc = False - try: - with broker.get() as conn: - conn.execute('SELECT 1') - except Exception: - got_exc = True - self.assertTrue(got_exc) + with self.assertRaises(DatabaseConnectionError) as raised, \ + broker.get() as conn: + conn.execute('SELECT 1') + self.assertEqual( + str(raised.exception), + "DB connection error (%s, 0):\nDB doesn't exist" % broker.db_file) def stub(*args, **kwargs): pass @@ -748,14 +791,11 @@ os.path.join(dbpath, '1.db')) broker = DatabaseBroker(os.path.join(dbpath, '1.db')) broker.db_type = 'test' - exc = None - try: - with broker.get() as conn: - conn.execute('SELECT * FROM test') - except Exception as err: - exc = err + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + conn.execute('SELECT * FROM test') self.assertEqual( - str(exc), + str(raised.exception), 'Quarantined %s to %s due to malformed database' % (dbpath, qpath)) # Test malformed schema database @@ -764,14 +804,11 @@ os.path.join(dbpath, '1.db')) broker = DatabaseBroker(os.path.join(dbpath, '1.db')) broker.db_type = 'test' - exc = None - try: - with broker.get() as conn: - conn.execute('SELECT * FROM test') - except Exception as err: - exc = err + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + conn.execute('SELECT * FROM test') self.assertEqual( - str(exc), + str(raised.exception), 'Quarantined %s to %s due to malformed database' % (dbpath, qpath)) # Test corrupted database @@ -780,26 +817,42 @@ os.path.join(dbpath, '1.db')) broker = DatabaseBroker(os.path.join(dbpath, '1.db')) broker.db_type = 'test' - exc = None - try: - with broker.get() as conn: - conn.execute('SELECT * FROM test') - except Exception as err: - exc = err + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + conn.execute('SELECT * FROM test') self.assertEqual( - str(exc), + str(raised.exception), 'Quarantined %s to %s due to corrupted database' % (dbpath, qpath)) + def test_get_raw_metadata_missing_container_info(self): + # Test missing container_info/container_stat row + dbpath = os.path.join(self.testdir, 'dev', 'dbs', 'par', 'pre', 'db') + mkdirs(dbpath) + qpath = os.path.join(self.testdir, 'dev', 'quarantined', 'containers', + 'db') + copy(os.path.join(os.path.dirname(__file__), + 'missing_container_info.db'), + os.path.join(dbpath, '1.db')) + + broker = DatabaseBroker(os.path.join(dbpath, '1.db')) + broker.db_type = 'container' + + with self.assertRaises(sqlite3.DatabaseError) as raised: + broker.get_raw_metadata() + self.assertEqual( + str(raised.exception), + 'Quarantined %s to %s due to missing row in container_stat table' % + (dbpath, qpath)) + def test_lock(self): broker = DatabaseBroker(os.path.join(self.testdir, '1.db'), timeout=.1) - got_exc = False - try: - with broker.lock(): - pass - except Exception: - got_exc = True - self.assertTrue(got_exc) + with self.assertRaises(DatabaseConnectionError) as raised, \ + broker.lock(): + pass + self.assertEqual( + str(raised.exception), + "DB connection error (%s, 0):\nDB doesn't exist" % broker.db_file) def stub(*args, **kwargs): pass @@ -813,13 +866,12 @@ timeout=.1) broker2._initialize = stub with broker.lock(): - got_exc = False - try: - with broker2.lock(): - pass - except LockTimeout: - got_exc = True - self.assertTrue(got_exc) + with self.assertRaises(LockTimeout) as raised, \ + broker2.lock(): + pass + self.assertEqual(str(raised.exception), + '0.1 seconds: %s' % broker.db_file) + try: with broker.lock(): raise Exception('test') @@ -966,6 +1018,19 @@ self.assertEqual(broker.get_sync(uuid3), 2) broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}]) self.assertEqual(broker.get_sync(uuid2), 5) + # max sync point sticks + broker.merge_syncs([{'sync_point': 5, 'remote_id': uuid2}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 2) + broker.merge_syncs([{'sync_point': 4, 'remote_id': uuid2}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 2) + broker.merge_syncs([{'sync_point': -1, 'remote_id': uuid2}, + {'sync_point': 3, 'remote_id': uuid3}]) + self.assertEqual(broker.get_sync(uuid2), 5) + self.assertEqual(broker.get_sync(uuid3), 3) + self.assertEqual(broker.get_sync(uuid2, incoming=False), 3) + self.assertEqual(broker.get_sync(uuid3, incoming=False), 4) def test_get_replication_info(self): self.get_replication_info_tester(metadata=False) @@ -1066,11 +1131,9 @@ 'max_row': 1, 'id': broker_uuid, 'metadata': broker_metadata}) return broker - def test_metadata(self): - def reclaim(broker, timestamp): - with broker.get() as conn: - broker._reclaim(conn, timestamp) - conn.commit() + # only testing _reclaim_metadata here + @patch.object(DatabaseBroker, '_reclaim') + def test_metadata(self, mock_reclaim): # Initializes a good broker for us broker = self.get_replication_info_tester(metadata=True) # Add our first item @@ -1111,7 +1174,7 @@ self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim at point before second item was deleted - reclaim(broker, normalize_timestamp(3)) + broker.reclaim(normalize_timestamp(3), normalize_timestamp(3)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) @@ -1119,7 +1182,7 @@ self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim at point second item was deleted - reclaim(broker, normalize_timestamp(4)) + broker.reclaim(normalize_timestamp(4), normalize_timestamp(4)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) @@ -1127,11 +1190,59 @@ self.assertEqual(broker.metadata['Second'], [second_value, second_timestamp]) # Reclaim after point second item was deleted - reclaim(broker, normalize_timestamp(5)) + broker.reclaim(normalize_timestamp(5), normalize_timestamp(5)) self.assertIn('First', broker.metadata) self.assertEqual(broker.metadata['First'], [first_value, first_timestamp]) self.assertNotIn('Second', broker.metadata) + # Delete first item (by setting to empty string) + first_timestamp = normalize_timestamp(6) + broker.update_metadata({'First': ['', first_timestamp]}) + self.assertIn('First', broker.metadata) + # Check that sync_timestamp doesn't cause item to be reclaimed + broker.reclaim(normalize_timestamp(5), normalize_timestamp(99)) + self.assertIn('First', broker.metadata) + + def test_update_metadata_missing_container_info(self): + # Test missing container_info/container_stat row + dbpath = os.path.join(self.testdir, 'dev', 'dbs', 'par', 'pre', 'db') + mkdirs(dbpath) + qpath = os.path.join(self.testdir, 'dev', 'quarantined', 'containers', + 'db') + copy(os.path.join(os.path.dirname(__file__), + 'missing_container_info.db'), + os.path.join(dbpath, '1.db')) + + broker = DatabaseBroker(os.path.join(dbpath, '1.db')) + broker.db_type = 'container' + + with self.assertRaises(sqlite3.DatabaseError) as raised: + broker.update_metadata({'First': ['1', normalize_timestamp(1)]}) + self.assertEqual( + str(raised.exception), + 'Quarantined %s to %s due to missing row in container_stat table' % + (dbpath, qpath)) + + def test_reclaim_missing_container_info(self): + # Test missing container_info/container_stat row + dbpath = os.path.join(self.testdir, 'dev', 'dbs', 'par', 'pre', 'db') + mkdirs(dbpath) + qpath = os.path.join(self.testdir, 'dev', 'quarantined', 'containers', + 'db') + copy(os.path.join(os.path.dirname(__file__), + 'missing_container_info.db'), + os.path.join(dbpath, '1.db')) + + broker = DatabaseBroker(os.path.join(dbpath, '1.db')) + broker.db_type = 'container' + + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + broker._reclaim_metadata(conn, 0) + self.assertEqual( + str(raised.exception), + 'Quarantined %s to %s due to missing row in container_stat table' % + (dbpath, qpath)) @patch.object(DatabaseBroker, 'validate_metadata') def test_validate_metadata_is_called_from_update_metadata(self, mock): @@ -1158,19 +1269,13 @@ metadata[key] = ('B', normalize_timestamp(1)) key = 'X-Account-Meta-Foo'.format(c) metadata[key] = ('', normalize_timestamp(1)) - try: - DatabaseBroker.validate_metadata(metadata) - except HTTPException: - self.fail('Unexpected HTTPException') + self.assertIsNone(DatabaseBroker.validate_metadata(metadata)) def test_metadata_raises_exception_on_non_utf8(self): def try_validate(metadata): - try: + with self.assertRaises(HTTPException) as raised: DatabaseBroker.validate_metadata(metadata) - except HTTPException as e: - self.assertEqual(str(e), '400 Bad Request') - else: - self.fail('HTTPException not raised') + self.assertEqual(str(raised.exception), '400 Bad Request') ts = normalize_timestamp(1) try_validate({'X-Account-Meta-Foo': (b'\xff', ts)}) try_validate({b'X-Container-Meta-\xff': ('bar', ts)}) @@ -1202,10 +1307,7 @@ metadata['X-Account-Meta-k'] = ( 'v' * (MAX_META_OVERALL_SIZE - size - 1), normalize_timestamp(1)) - try: - DatabaseBroker.validate_metadata(metadata) - except HTTPException: - self.fail('Unexpected HTTPException') + self.assertIsNone(DatabaseBroker.validate_metadata(metadata)) def test_metadata_raises_exception_over_max_overall_size(self): metadata = {} @@ -1250,16 +1352,149 @@ broker.db_type = 'test' try: raise ex - except (sqlite3.DatabaseError, DatabaseConnectionError): - try: + except sqlite3.DatabaseError: + with self.assertRaises(sqlite3.DatabaseError) as raised: broker.possibly_quarantine(*sys.exc_info()) - except Exception as exc: - self.assertEqual( - str(exc), - 'Quarantined %s to %s due to %s database' % - (dbpath, qpath, hint)) - else: - self.fail('Expected an exception to be raised') + self.assertEqual( + str(raised.exception), + 'Quarantined %s to %s due to %s database' % + (dbpath, qpath, hint)) + + def test_skip_commits(self): + broker = DatabaseBroker(':memory:') + self.assertTrue(broker._skip_commit_puts()) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + self.assertTrue(broker._skip_commit_puts()) + + # not initialized + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + self.assertFalse(os.path.exists(broker.db_file)) # sanity check + self.assertTrue(broker._skip_commit_puts()) + + # no pending file + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + self.assertTrue(os.path.exists(broker.db_file)) # sanity check + self.assertFalse(os.path.exists(broker.pending_file)) # sanity check + self.assertTrue(broker._skip_commit_puts()) + + # pending file exists + with open(broker.pending_file, 'wb'): + pass + self.assertTrue(os.path.exists(broker.pending_file)) # sanity check + self.assertFalse(broker._skip_commit_puts()) + + # skip_commits is True + broker.skip_commits = True + self.assertTrue(broker._skip_commit_puts()) + + # re-init + broker = DatabaseBroker(db_file) + self.assertFalse(broker._skip_commit_puts()) + + # constructor can override + broker = DatabaseBroker(db_file, skip_commits=True) + self.assertTrue(broker._skip_commit_puts()) + + def test_commit_puts(self): + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + with open(broker.pending_file, 'wb'): + pass + + # merge given list + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts(['test']) + mock_merge_items.assert_called_once_with(['test']) + + # load file and merge + with open(broker.pending_file, 'wb') as fd: + fd.write(':1:2:99') + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts_load = lambda l, e: l.append(e) + broker._commit_puts() + mock_merge_items.assert_called_once_with(['1', '2', '99']) + self.assertEqual(0, os.path.getsize(broker.pending_file)) + + # load file and merge with given list + with open(broker.pending_file, 'wb') as fd: + fd.write(':bad') + with patch.object(broker, 'merge_items') as mock_merge_items: + broker._commit_puts_load = lambda l, e: l.append(e) + broker._commit_puts(['not']) + mock_merge_items.assert_called_once_with(['not', 'bad']) + self.assertEqual(0, os.path.getsize(broker.pending_file)) + + # skip_commits True - no merge + db_file = os.path.join(self.testdir, '2.db') + broker = DatabaseBroker(db_file, skip_commits=True) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + with open(broker.pending_file, 'wb') as fd: + fd.write(':ignored') + with patch.object(broker, 'merge_items') as mock_merge_items: + with self.assertRaises(DatabaseConnectionError) as cm: + broker._commit_puts(['hmmm']) + mock_merge_items.assert_not_called() + self.assertIn('commits not accepted', str(cm.exception)) + with open(broker.pending_file, 'rb') as fd: + self.assertEqual(':ignored', fd.read()) + + def test_put_record(self): + db_file = os.path.join(self.testdir, '1.db') + broker = DatabaseBroker(db_file) + broker._initialize = MagicMock() + broker.initialize(Timestamp.now()) + + # pending file created and record written + broker.make_tuple_for_pickle = lambda x: x.upper() + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('pinky') + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + items = pending.split(':') + self.assertEqual(['PINKY'], + [pickle.loads(i.decode('base64')) for i in items[1:]]) + + # record appended + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('perky') + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + items = pending.split(':') + self.assertEqual(['PINKY', 'PERKY'], + [pickle.loads(i.decode('base64')) for i in items[1:]]) + + # pending file above cap + cap = swift.common.db.PENDING_CAP + while os.path.getsize(broker.pending_file) < cap: + with open(broker.pending_file, 'ab') as fd: + fd.write('x' * 100000) + with patch.object(broker, '_commit_puts') as mock_commit_puts: + broker.put_record('direct') + mock_commit_puts.called_once_with(['direct']) + + # records shouldn't be put to brokers with skip_commits True because + # they cannot be accepted if the pending file is full + broker.skip_commits = True + with open(broker.pending_file, 'wb'): + # empty the pending file + pass + with patch.object(broker, '_commit_puts') as mock_commit_puts: + with self.assertRaises(DatabaseConnectionError) as cm: + broker.put_record('unwelcome') + self.assertIn('commits not accepted', str(cm.exception)) + mock_commit_puts.assert_not_called() + with open(broker.pending_file, 'rb') as fd: + pending = fd.read() + self.assertFalse(pending) + if __name__ == '__main__': unittest.main() diff -Nru swift-2.17.0/test/unit/common/test_db_replicator.py swift-2.18.0/test/unit/common/test_db_replicator.py --- swift-2.17.0/test/unit/common/test_db_replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_db_replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -16,6 +16,8 @@ from __future__ import print_function import unittest from contextlib import contextmanager + +import eventlet import os import logging import errno @@ -37,6 +39,7 @@ from swift.common.swob import HTTPException from test import unit +from test.unit import FakeLogger, attach_fake_replication_rpc from test.unit.common.test_db import ExampleBroker @@ -160,6 +163,11 @@ self.set_status = set_status replicated = False host = 'localhost' + node = { + 'ip': '127.0.0.1', + 'port': '6000', + 'device': 'sdb', + } def replicate(self, *args): self.replicated = True @@ -230,11 +238,27 @@ 'put_timestamp': 1, 'created_at': 1, 'count': 0, + 'max_row': 99, + 'id': 'ID', + 'metadata': {} }) if self.stub_replication_info: info.update(self.stub_replication_info) return info + def get_max_row(self, table=None): + return self.get_replication_info()['max_row'] + + def is_reclaimable(self, now, reclaim_age): + info = self.get_replication_info() + return info['count'] == 0 and ( + (now - reclaim_age) > + info['delete_timestamp'] > + info['put_timestamp']) + + def get_other_replication_items(self): + return None + def reclaim(self, item_timestamp, sync_timestamp): pass @@ -249,6 +273,9 @@ self.put_timestamp = put_timestamp self.delete_timestamp = delete_timestamp + def get_brokers(self): + return [self] + class FakeAccountBroker(FakeBroker): db_type = 'account' @@ -273,6 +300,7 @@ self.recon_cache = mkdtemp() rmtree(self.recon_cache, ignore_errors=1) os.mkdir(self.recon_cache) + self.logger = unit.debug_logger('test-replicator') def tearDown(self): for patcher in self._patchers: @@ -287,6 +315,7 @@ def stub_delete_db(self, broker): self.delete_db_calls.append('/path/to/file') + return True def test_creation(self): # later config should be extended to assert more config options @@ -647,11 +676,107 @@ }) def test_replicate_object(self): + # verify return values from replicate_object db_replicator.ring = FakeRingWithNodes() - replicator = TestReplicator({}) - replicator.delete_db = self.stub_delete_db - replicator._replicate_object('0', '/path/to/file', 'node_id') - self.assertEqual([], self.delete_db_calls) + db_path = '/path/to/file' + replicator = TestReplicator({}, logger=FakeLogger()) + info = FakeBroker().get_replication_info() + # make remote appear to be in sync + rinfo = {'point': info['max_row'], 'id': 'remote_id'} + + class FakeResponse(object): + def __init__(self, status, rinfo): + self._status = status + self.data = json.dumps(rinfo) + + @property + def status(self): + if isinstance(self._status, (Exception, eventlet.Timeout)): + raise self._status + return self._status + + # all requests fail + replicate = 'swift.common.db_replicator.ReplConnection.replicate' + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(500, None), + FakeResponse(500, None), + FakeResponse(500, None)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [False, False, False]), res) + self.assertEqual(0, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(Exception('ugh'), None), + FakeResponse(eventlet.Timeout(), None), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [False, False, True]), res) + self.assertEqual(0, mock_delete.call_count) + lines = replicator.logger.get_lines_for_level('error') + self.assertIn('ERROR syncing', lines[0]) + self.assertIn('ERROR syncing', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # partial success + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(500, None)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [True, True, False]), res) + self.assertEqual(0, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # 507 triggers additional requests + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(507, None), + FakeResponse(507, None), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((False, [True, True, False, False, True]), res) + self.assertEqual(0, mock_delete.call_count) + lines = replicator.logger.get_lines_for_level('error') + self.assertIn('Remote drive not mounted', lines[0]) + self.assertIn('Remote drive not mounted', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) + replicator.logger.clear() + + # all requests succeed; node id == 'node_id' causes node to be + # considered a handoff so expect the db to be deleted + with mock.patch(replicate) as fake_replicate: + fake_replicate.side_effect = [ + FakeResponse(200, rinfo), + FakeResponse(200, rinfo), + FakeResponse(200, rinfo)] + with mock.patch.object(replicator, 'delete_db') as mock_delete: + res = replicator._replicate_object('0', db_path, 'node_id') + self.assertRaises(StopIteration, next, fake_replicate.side_effect) + self.assertEqual((True, [True, True, True]), res) + self.assertEqual(1, mock_delete.call_count) + self.assertFalse(replicator.logger.get_lines_for_level('error')) + self.assertFalse(replicator.logger.get_lines_for_level('warning')) def test_replicate_object_quarantine(self): replicator = TestReplicator({}) @@ -695,8 +820,122 @@ replicator.brokerclass = FakeAccountBroker replicator._repl_to_node = lambda *args: True replicator.delete_db = self.stub_delete_db - replicator._replicate_object('0', '/path/to/file', 'node_id') + orig_cleanup = replicator.cleanup_post_replicate + with mock.patch.object(replicator, 'cleanup_post_replicate', + side_effect=orig_cleanup) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) self.assertEqual(['/path/to/file'], self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + + def test_replicate_object_delete_delegated_to_cleanup_post_replicate(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator._repl_to_node = lambda *args: True + replicator.delete_db = self.stub_delete_db + + # cleanup succeeds + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=True) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) + self.assertFalse(self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + self.assertEqual(3, replicator.stats['success']) + + # cleanup fails + replicator._zero_stats() + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=False) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', 'node_id') + mock_cleanup.assert_called_once_with(mock.ANY, mock.ANY, [True] * 3) + self.assertIsInstance(mock_cleanup.call_args[0][0], + replicator.brokerclass) + self.assertFalse(self.delete_db_calls) + self.assertEqual(3, replicator.stats['failure']) + self.assertEqual(0, replicator.stats['success']) + + # shouldbehere True - cleanup not required + replicator._zero_stats() + primary_node_id = replicator.ring.get_part_nodes('0')[0]['id'] + with mock.patch.object(replicator, 'cleanup_post_replicate', + return_value=True) as mock_cleanup: + replicator._replicate_object('0', '/path/to/file', primary_node_id) + mock_cleanup.assert_not_called() + self.assertFalse(self.delete_db_calls) + self.assertEqual(0, replicator.stats['failure']) + self.assertEqual(2, replicator.stats['success']) + + def test_cleanup_post_replicate(self): + replicator = TestReplicator({}, logger=self.logger) + replicator.ring = FakeRingWithNodes().Ring('path') + broker = FakeBroker() + replicator._repl_to_node = lambda *args: True + info = broker.get_replication_info() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [False] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (0/3 success)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True, False, True]) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + broker.stub_replication_info = {'max_row': 101} + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + self.assertEqual(['Not deleting db %s (2 new rows)' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + broker.stub_replication_info = {'max_row': 98} + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_not_called() + self.assertTrue(res) + broker.stub_replication_info = None + self.assertEqual(['Not deleting db %s (negative max_row_delta: -1)' % + broker.db_file], + replicator.logger.get_lines_for_level('error')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db') as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_called_once_with(broker) + self.assertTrue(res) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() + + with mock.patch.object(replicator, 'delete_db', + return_value=False) as mock_delete_db: + res = replicator.cleanup_post_replicate( + broker, info, [True] * 3) + mock_delete_db.assert_called_once_with(broker) + self.assertFalse(res) + self.assertEqual(['Failed to delete db %s' % broker.db_file], + replicator.logger.get_lines_for_level('debug')) + replicator.logger.clear() def test_replicate_object_with_exception(self): replicator = TestReplicator({}) @@ -949,6 +1188,8 @@ response = rpc.dispatch(('drive', 'part', 'hash'), ['rsync_then_merge', 'arg1', 'arg2']) expected_calls = [call('/part/ash/hash/hash.db'), + call('/drive/tmp/arg1'), + call(FakeBroker.db_file), call('/drive/tmp/arg1')] self.assertEqual(mock_os.path.exists.call_args_list, expected_calls) @@ -966,7 +1207,7 @@ unit.mock_check_drive(isdir=True): mock_os.path.exists.side_effect = [False, True] response = rpc.dispatch(('drive', 'part', 'hash'), - ['complete_rsync', 'arg1', 'arg2']) + ['complete_rsync', 'arg1']) expected_calls = [call('/part/ash/hash/hash.db'), call('/drive/tmp/arg1')] self.assertEqual(mock_os.path.exists.call_args_list, @@ -974,6 +1215,19 @@ self.assertEqual('204 No Content', response.status) self.assertEqual(204, response.status_int) + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.side_effect = [False, True] + response = rpc.dispatch(('drive', 'part', 'hash'), + ['complete_rsync', 'arg1', 'arg2']) + expected_calls = [call('/part/ash/hash/arg2'), + call('/drive/tmp/arg1')] + self.assertEqual(mock_os.path.exists.call_args_list, + expected_calls) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + def test_rsync_then_merge_db_does_not_exist(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) @@ -1010,7 +1264,8 @@ def mock_renamer(old, new): self.assertEqual('/drive/tmp/arg1', old) - self.assertEqual('/data/db.db', new) + # FakeBroker uses module filename as db_file! + self.assertEqual(__file__, new) self._patch(patch.object, db_replicator, 'renamer', mock_renamer) @@ -1023,7 +1278,7 @@ self.assertEqual('204 No Content', response.status) self.assertEqual(204, response.status_int) - def test_complete_rsync_db_does_not_exist(self): + def test_complete_rsync_db_exists(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) @@ -1031,9 +1286,18 @@ new=mock.MagicMock(wraps=os)) as mock_os, \ unit.mock_check_drive(isdir=True): mock_os.path.exists.return_value = True + response = rpc.complete_rsync('drive', '/data/db.db', ['arg1']) + mock_os.path.exists.assert_called_with('/data/db.db') + self.assertEqual('404 Not Found', response.status) + self.assertEqual(404, response.status_int) + + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.return_value = True response = rpc.complete_rsync('drive', '/data/db.db', ['arg1', 'arg2']) - mock_os.path.exists.assert_called_with('/data/db.db') + mock_os.path.exists.assert_called_with('/data/arg2') self.assertEqual('404 Not Found', response.status) self.assertEqual(404, response.status_int) @@ -1046,37 +1310,57 @@ unit.mock_check_drive(isdir=True): mock_os.path.exists.return_value = False response = rpc.complete_rsync('drive', '/data/db.db', - ['arg1', 'arg2']) + ['arg1']) expected_calls = [call('/data/db.db'), call('/drive/tmp/arg1')] self.assertEqual(expected_calls, mock_os.path.exists.call_args_list) self.assertEqual('404 Not Found', response.status) self.assertEqual(404, response.status_int) + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.return_value = False + response = rpc.complete_rsync('drive', '/data/db.db', + ['arg1', 'arg2']) + expected_calls = [call('/data/arg2'), call('/drive/tmp/arg1')] + self.assertEqual(expected_calls, + mock_os.path.exists.call_args_list) + self.assertEqual('404 Not Found', response.status) + self.assertEqual(404, response.status_int) + def test_complete_rsync_rename(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, mount_check=False) - def mock_exists(path): - if path == '/data/db.db': - return False - self.assertEqual('/drive/tmp/arg1', path) - return True - def mock_renamer(old, new): - self.assertEqual('/drive/tmp/arg1', old) - self.assertEqual('/data/db.db', new) + renamer_calls.append((old, new)) self._patch(patch.object, db_replicator, 'renamer', mock_renamer) + renamer_calls = [] + with patch('swift.common.db_replicator.os', + new=mock.MagicMock(wraps=os)) as mock_os, \ + unit.mock_check_drive(isdir=True): + mock_os.path.exists.side_effect = [False, True] + response = rpc.complete_rsync('drive', '/data/db.db', + ['arg1']) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + self.assertEqual(('/drive/tmp/arg1', '/data/db.db'), renamer_calls[0]) + self.assertFalse(renamer_calls[1:]) + + renamer_calls = [] with patch('swift.common.db_replicator.os', new=mock.MagicMock(wraps=os)) as mock_os, \ unit.mock_check_drive(isdir=True): mock_os.path.exists.side_effect = [False, True] response = rpc.complete_rsync('drive', '/data/db.db', ['arg1', 'arg2']) - self.assertEqual('204 No Content', response.status) - self.assertEqual(204, response.status_int) + self.assertEqual('204 No Content', response.status) + self.assertEqual(204, response.status_int) + self.assertEqual(('/drive/tmp/arg1', '/data/arg2'), renamer_calls[0]) + self.assertFalse(renamer_calls[1:]) def test_replicator_sync_with_broker_replication_missing_table(self): rpc = db_replicator.ReplicatorRpc('/', '/', FakeBroker, @@ -1220,7 +1504,8 @@ self.assertTrue(os.path.isdir(dirpath)) node_id = 1 - results = list(db_replicator.roundrobin_datadirs([(datadir, node_id)])) + results = list(db_replicator.roundrobin_datadirs( + [(datadir, node_id, lambda p: True)])) expected = [ ('450', os.path.join(datadir, db_path), node_id), ] @@ -1241,12 +1526,14 @@ self.assertEqual({'18', '1054', '1060', '450'}, set(os.listdir(datadir))) - results = list(db_replicator.roundrobin_datadirs([(datadir, node_id)])) + results = list(db_replicator.roundrobin_datadirs( + [(datadir, node_id, lambda p: True)])) self.assertEqual(results, expected) self.assertEqual({'1054', '1060', '450'}, set(os.listdir(datadir))) - results = list(db_replicator.roundrobin_datadirs([(datadir, node_id)])) + results = list(db_replicator.roundrobin_datadirs( + [(datadir, node_id, lambda p: True)])) self.assertEqual(results, expected) # non db file in '1060' dir is not deleted and exception is handled self.assertEqual({'1060', '450'}, @@ -1266,9 +1553,11 @@ return [] path = path[len('/srv/node/sdx/containers'):] if path == '': - return ['123', '456', '789', '9999'] + return ['123', '456', '789', '9999', "-5", "not-a-partition"] # 456 will pretend to be a file # 9999 will be an empty partition with no contents + # -5 and not-a-partition were created by something outside + # Swift elif path == '/123': return ['abc', 'def.db'] # def.db will pretend to be a file elif path == '/123/abc': @@ -1292,6 +1581,10 @@ 'weird2'] # weird2 will pretend to be a dir, if asked elif path == '9999': return [] + elif path == 'not-a-partition': + raise Exception("shouldn't look in not-a-partition") + elif path == '-5': + raise Exception("shouldn't look in -5") return [] def _isdir(path): @@ -1327,8 +1620,8 @@ mock.patch(base + 'random.shuffle', _shuffle), \ mock.patch(base + 'os.rmdir', _rmdir): - datadirs = [('/srv/node/sda/containers', 1), - ('/srv/node/sdb/containers', 2)] + datadirs = [('/srv/node/sda/containers', 1, lambda p: True), + ('/srv/node/sdb/containers', 2, lambda p: True)] results = list(db_replicator.roundrobin_datadirs(datadirs)) # The results show that the .db files are returned, the devices # interleaved. @@ -1426,10 +1719,219 @@ db_file = __file__ replicator = TestReplicator({}) replicator._http_connect(node, partition, db_file) + expected_hsh = os.path.basename(db_file).split('.', 1)[0] + expected_hsh = expected_hsh.split('_', 1)[0] db_replicator.ReplConnection.assert_has_calls([ - mock.call(node, partition, - os.path.basename(db_file).split('.', 1)[0], - replicator.logger)]) + mock.call(node, partition, expected_hsh, replicator.logger)]) + + +class TestHandoffsOnly(unittest.TestCase): + class FakeRing3Nodes(object): + _replicas = 3 + + # Three nodes, two disks each + devs = [ + dict(id=0, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.1', port=6201, + replication_ip='10.0.0.1', replication_port=6201, + device='sdp'), + dict(id=1, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.1', port=6201, + replication_ip='10.0.0.1', replication_port=6201, + device='sdq'), + + dict(id=2, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.2', port=6201, + replication_ip='10.0.0.2', replication_port=6201, + device='sdp'), + dict(id=3, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.2', port=6201, + replication_ip='10.0.0.2', replication_port=6201, + device='sdq'), + + dict(id=4, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.3', port=6201, + replication_ip='10.0.0.3', replication_port=6201, + device='sdp'), + dict(id=5, region=1, zone=1, + meta='', weight=500.0, ip='10.0.0.3', port=6201, + replication_ip='10.0.0.3', replication_port=6201, + device='sdq'), + ] + + def __init__(self, *a, **kw): + pass + + def get_part(self, account, container=None, obj=None): + return 0 + + def get_part_nodes(self, part): + nodes = [] + for offset in range(self._replicas): + i = (part + offset) % len(self.devs) + nodes.append(self.devs[i]) + return nodes + + def get_more_nodes(self, part): + for offset in range(self._replicas, len(self.devs)): + i = (part + offset) % len(self.devs) + yield self.devs[i] + + def _make_fake_db(self, disk, partition, db_hash): + directories = [ + os.path.join(self.root, disk), + os.path.join(self.root, disk, 'containers'), + os.path.join(self.root, disk, 'containers', str(partition)), + os.path.join(self.root, disk, 'containers', str(partition), + db_hash[-3:]), + os.path.join(self.root, disk, 'containers', str(partition), + db_hash[-3:], db_hash)] + + for d in directories: + try: + os.mkdir(d) + except OSError as err: + if err.errno != errno.EEXIST: + raise + file_path = os.path.join(directories[-1], db_hash + ".db") + with open(file_path, 'w'): + pass + + def setUp(self): + self.root = mkdtemp() + + # object disks; they're just here to make sure they don't trip us up + os.mkdir(os.path.join(self.root, 'sdc')) + os.mkdir(os.path.join(self.root, 'sdc', 'objects')) + os.mkdir(os.path.join(self.root, 'sdd')) + os.mkdir(os.path.join(self.root, 'sdd', 'objects')) + + # part 0 belongs on sdp + self._make_fake_db('sdp', 0, '010101013cf2b7979af9eaa71cb67220') + + # part 1 does not belong on sdp + self._make_fake_db('sdp', 1, 'abababab2b5368158355e799323b498d') + + # part 1 belongs on sdq + self._make_fake_db('sdq', 1, '02020202e30f696a3cfa63d434a3c94e') + + # part 2 does not belong on sdq + self._make_fake_db('sdq', 2, 'bcbcbcbc15d3835053d568c57e2c83b5') + + def cleanUp(self): + rmtree(self.root, ignore_errors=True) + + def test_scary_warnings(self): + logger = unit.FakeLogger() + replicator = TestReplicator({ + 'handoffs_only': 'yes', + 'devices': self.root, + 'bind_port': 6201, + 'mount_check': 'no', + }, logger=logger) + + with patch.object(db_replicator, 'whataremyips', + return_value=['10.0.0.1']), \ + patch.object(replicator, '_replicate_object'), \ + patch.object(replicator, 'ring', self.FakeRing3Nodes()): + replicator.run_once() + + self.assertEqual( + logger.get_lines_for_level('warning'), + [('Starting replication pass with handoffs_only enabled. This ' + 'mode is not intended for normal operation; use ' + 'handoffs_only with care.'), + ('Finished replication pass with handoffs_only enabled. ' + 'If handoffs_only is no longer required, disable it.')]) + + def test_skips_primary_partitions(self): + replicator = TestReplicator({ + 'handoffs_only': 'yes', + 'devices': self.root, + 'bind_port': 6201, + 'mount_check': 'no', + }) + + with patch.object(db_replicator, 'whataremyips', + return_value=['10.0.0.1']), \ + patch.object(replicator, '_replicate_object') as mock_repl, \ + patch.object(replicator, 'ring', self.FakeRing3Nodes()): + replicator.run_once() + + self.assertEqual(sorted(mock_repl.mock_calls), [ + mock.call('1', os.path.join( + self.root, 'sdp', 'containers', '1', '98d', + 'abababab2b5368158355e799323b498d', + 'abababab2b5368158355e799323b498d.db'), 0), + mock.call('2', os.path.join( + self.root, 'sdq', 'containers', '2', '3b5', + 'bcbcbcbc15d3835053d568c57e2c83b5', + 'bcbcbcbc15d3835053d568c57e2c83b5.db'), 1)]) + + def test_override_partitions(self): + replicator = TestReplicator({ + 'devices': self.root, + 'bind_port': 6201, + 'mount_check': 'no', + }) + + with patch.object(db_replicator, 'whataremyips', + return_value=['10.0.0.1']), \ + patch.object(replicator, '_replicate_object') as mock_repl, \ + patch.object(replicator, 'ring', self.FakeRing3Nodes()): + replicator.run_once(partitions="0,2") + + self.assertEqual(sorted(mock_repl.mock_calls), [ + mock.call('0', os.path.join( + self.root, 'sdp', 'containers', '0', '220', + '010101013cf2b7979af9eaa71cb67220', + '010101013cf2b7979af9eaa71cb67220.db'), 0), + mock.call('2', os.path.join( + self.root, 'sdq', 'containers', '2', '3b5', + 'bcbcbcbc15d3835053d568c57e2c83b5', + 'bcbcbcbc15d3835053d568c57e2c83b5.db'), 1)]) + + def test_override_devices(self): + replicator = TestReplicator({ + 'devices': self.root, + 'bind_port': 6201, + 'mount_check': 'no', + }) + + with patch.object(db_replicator, 'whataremyips', + return_value=['10.0.0.1']), \ + patch.object(replicator, '_replicate_object') as mock_repl, \ + patch.object(replicator, 'ring', self.FakeRing3Nodes()): + replicator.run_once(devices="sdp") + + self.assertEqual(sorted(mock_repl.mock_calls), [ + mock.call('0', os.path.join( + self.root, 'sdp', 'containers', '0', '220', + '010101013cf2b7979af9eaa71cb67220', + '010101013cf2b7979af9eaa71cb67220.db'), 0), + mock.call('1', os.path.join( + self.root, 'sdp', 'containers', '1', '98d', + 'abababab2b5368158355e799323b498d', + 'abababab2b5368158355e799323b498d.db'), 0)]) + + def test_override_devices_and_partitions(self): + replicator = TestReplicator({ + 'devices': self.root, + 'bind_port': 6201, + 'mount_check': 'no', + }) + + with patch.object(db_replicator, 'whataremyips', + return_value=['10.0.0.1']), \ + patch.object(replicator, '_replicate_object') as mock_repl, \ + patch.object(replicator, 'ring', self.FakeRing3Nodes()): + replicator.run_once(partitions="0,2", devices="sdp") + + self.assertEqual(sorted(mock_repl.mock_calls), [ + mock.call('0', os.path.join( + self.root, 'sdp', 'containers', '0', '220', + '010101013cf2b7979af9eaa71cb67220', + '010101013cf2b7979af9eaa71cb67220.db'), 0)]) class TestReplToNode(unittest.TestCase): @@ -1497,7 +1999,9 @@ self.assertEqual(self.replicator._repl_to_node( self.fake_node, self.broker, '0', self.fake_info), True) metadata = self.broker.metadata - self.assertEqual({}, metadata) + self.assertIn("X-Container-Sysmeta-Test", metadata) + self.assertEqual("XYZ", metadata["X-Container-Sysmeta-Test"][0]) + self.assertEqual(now, metadata["X-Container-Sysmeta-Test"][1]) def test_repl_to_node_not_found(self): self.http = ReplHttp('{"id": 3, "point": -1}', set_status=404) @@ -1520,7 +2024,7 @@ def test_repl_to_node_300_status(self): self.http = ReplHttp('{"id": 3, "point": -1}', set_status=300) - self.assertIsNone(self.replicator._repl_to_node( + self.assertFalse(self.replicator._repl_to_node( self.fake_node, FakeBroker(), '0', self.fake_info)) def test_repl_to_node_not_response(self): @@ -1549,45 +2053,6 @@ ]) -class FakeHTTPResponse(object): - - def __init__(self, resp): - self.resp = resp - - @property - def status(self): - return self.resp.status_int - - @property - def data(self): - return self.resp.body - - -def attach_fake_replication_rpc(rpc, replicate_hook=None): - class FakeReplConnection(object): - - def __init__(self, node, partition, hash_, logger): - self.logger = logger - self.node = node - self.partition = partition - self.path = '/%s/%s/%s' % (node['device'], partition, hash_) - self.host = node['replication_ip'] - - def replicate(self, op, *sync_args): - print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args)) - replicate_args = self.path.lstrip('/').split('/') - args = [op] + list(sync_args) - with unit.mock_check_drive(isdir=not rpc.mount_check, - ismount=rpc.mount_check): - swob_response = rpc.dispatch(replicate_args, args) - resp = FakeHTTPResponse(swob_response) - if replicate_hook: - replicate_hook(op, *sync_args) - return resp - - return FakeReplConnection - - class ExampleReplicator(db_replicator.Replicator): server_type = 'fake' brokerclass = ExampleBroker @@ -1652,15 +2117,19 @@ conf.update(conf_updates) return self.replicator_daemon(conf, logger=self.logger) - def _run_once(self, node, conf_updates=None, daemon=None): - daemon = daemon or self._get_daemon(node, conf_updates) - + def _install_fake_rsync_file(self, daemon, captured_calls=None): def _rsync_file(db_file, remote_file, **kwargs): + if captured_calls is not None: + captured_calls.append((db_file, remote_file, kwargs)) remote_server, remote_path = remote_file.split('/', 1) dest_path = os.path.join(self.root, remote_path) copy(db_file, dest_path) return True daemon._rsync_file = _rsync_file + + def _run_once(self, node, conf_updates=None, daemon=None): + daemon = daemon or self._get_daemon(node, conf_updates) + self._install_fake_rsync_file(daemon) with mock.patch('swift.common.db_replicator.whataremyips', new=lambda *a, **kw: [node['replication_ip']]), \ unit.mock_check_drive(isdir=not daemon.mount_check, diff -Nru swift-2.17.0/test/unit/common/test_direct_client.py swift-2.18.0/test/unit/common/test_direct_client.py --- swift-2.17.0/test/unit/common/test_direct_client.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_direct_client.py 2018-05-30 10:17:02.000000000 +0000 @@ -95,6 +95,11 @@ yield fake_conn +@contextmanager +def noop_timeout(duration): + yield + + @patch_policies class TestDirectClient(unittest.TestCase): @@ -117,6 +122,10 @@ self.account, self.container, self.obj)) self.user_agent = 'direct-client %s' % os.getpid() + patcher = mock.patch.object(direct_client, 'Timeout', noop_timeout) + patcher.start() + self.addCleanup(patcher.stop) + def test_gen_headers(self): stub_user_agent = 'direct-client %s' % os.getpid() @@ -450,6 +459,67 @@ self.assertEqual(err.http_status, 500) self.assertTrue('DELETE' in str(err)) + def test_direct_put_container(self): + body = 'Let us begin with a quick introduction' + headers = {'x-foo': 'bar', 'Content-Length': str(len(body)), + 'Content-Type': 'application/json', + 'User-Agent': 'my UA'} + + with mocked_http_conn(204) as conn: + rv = direct_client.direct_put_container( + self.node, self.part, self.account, self.container, + contents=body, headers=headers) + self.assertEqual(conn.host, self.node['ip']) + self.assertEqual(conn.port, self.node['port']) + self.assertEqual(conn.method, 'PUT') + self.assertEqual(conn.path, self.container_path) + self.assertEqual(conn.req_headers['Content-Length'], + str(len(body))) + self.assertEqual(conn.req_headers['Content-Type'], + 'application/json') + self.assertEqual(conn.req_headers['User-Agent'], 'my UA') + self.assertTrue('x-timestamp' in conn.req_headers) + self.assertEqual('bar', conn.req_headers.get('x-foo')) + self.assertEqual(md5(body).hexdigest(), conn.etag.hexdigest()) + self.assertIsNone(rv) + + def test_direct_put_container_chunked(self): + body = 'Let us begin with a quick introduction' + headers = {'x-foo': 'bar', 'Content-Type': 'application/json'} + + with mocked_http_conn(204) as conn: + rv = direct_client.direct_put_container( + self.node, self.part, self.account, self.container, + contents=body, headers=headers) + self.assertEqual(conn.host, self.node['ip']) + self.assertEqual(conn.port, self.node['port']) + self.assertEqual(conn.method, 'PUT') + self.assertEqual(conn.path, self.container_path) + self.assertEqual(conn.req_headers['Transfer-Encoding'], 'chunked') + self.assertEqual(conn.req_headers['Content-Type'], + 'application/json') + self.assertTrue('x-timestamp' in conn.req_headers) + self.assertEqual('bar', conn.req_headers.get('x-foo')) + self.assertNotIn('Content-Length', conn.req_headers) + expected_sent = '%0x\r\n%s\r\n0\r\n\r\n' % (len(body), body) + self.assertEqual(md5(expected_sent).hexdigest(), + conn.etag.hexdigest()) + self.assertIsNone(rv) + + def test_direct_put_container_fail(self): + with mock.patch('swift.common.bufferedhttp.http_connect_raw', + side_effect=Exception('conn failed')): + with self.assertRaises(Exception) as cm: + direct_client.direct_put_container( + self.node, self.part, self.account, self.container) + self.assertEqual('conn failed', str(cm.exception)) + + with mocked_http_conn(Exception('resp failed')): + with self.assertRaises(Exception) as cm: + direct_client.direct_put_container( + self.node, self.part, self.account, self.container) + self.assertEqual('resp failed', str(cm.exception)) + def test_direct_put_container_object(self): headers = {'x-foo': 'bar'} diff -Nru swift-2.17.0/test/unit/common/test_linkat.py swift-2.18.0/test/unit/common/test_linkat.py --- swift-2.17.0/test/unit/common/test_linkat.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_linkat.py 2018-05-30 10:17:02.000000000 +0000 @@ -25,7 +25,7 @@ from swift.common.linkat import linkat from swift.common.utils import O_TMPFILE -from test.unit import requires_o_tmpfile_support +from test.unit import requires_o_tmpfile_support_in_tmp class TestLinkat(unittest.TestCase): @@ -38,7 +38,7 @@ def test_available(self): self.assertFalse(linkat.available) - @requires_o_tmpfile_support + @requires_o_tmpfile_support_in_tmp def test_errno(self): with open('/dev/null', 'r') as fd: self.assertRaises(IOError, linkat, @@ -77,7 +77,7 @@ mock_cdll.assert_called_once_with(libc_name, use_errno=True) self.assertTrue(libc.linkat_retrieved) - @requires_o_tmpfile_support + @requires_o_tmpfile_support_in_tmp def test_linkat_success(self): fd = None diff -Nru swift-2.17.0/test/unit/common/test_memcached.py swift-2.18.0/test/unit/common/test_memcached.py --- swift-2.17.0/test/unit/common/test_memcached.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_memcached.py 2018-05-30 10:17:02.000000000 +0000 @@ -71,8 +71,8 @@ # In particular, the "Storage commands" section may be interesting. def __init__(self): - self.inbuf = '' - self.outbuf = '' + self.inbuf = b'' + self.outbuf = b'' self.cache = {} self.down = False self.exc_on_delete = False @@ -84,81 +84,86 @@ if self.down: raise Exception('mock is down') self.inbuf += string - while '\n' in self.inbuf: - cmd, self.inbuf = self.inbuf.split('\n', 1) + while b'\n' in self.inbuf: + cmd, self.inbuf = self.inbuf.split(b'\n', 1) parts = cmd.split() - handler = getattr(self, 'handle_%s' % parts[0].lower(), None) + cmd_name = parts[0].decode('ascii').lower() + handler = getattr(self, 'handle_%s' % cmd_name, None) if handler: handler(*parts[1:]) else: raise ValueError('Unhandled command: %s' % parts[0]) - def handle_set(self, key, flags, exptime, num_bytes, noreply=''): + def handle_set(self, key, flags, exptime, num_bytes, noreply=b''): self.cache[key] = flags, exptime, self.inbuf[:int(num_bytes)] self.inbuf = self.inbuf[int(num_bytes) + 2:] - if noreply != 'noreply': - self.outbuf += 'STORED\r\n' + if noreply != b'noreply': + self.outbuf += b'STORED\r\n' - def handle_add(self, key, flags, exptime, num_bytes, noreply=''): + def handle_add(self, key, flags, exptime, num_bytes, noreply=b''): value = self.inbuf[:int(num_bytes)] self.inbuf = self.inbuf[int(num_bytes) + 2:] if key in self.cache: - if noreply != 'noreply': - self.outbuf += 'NOT_STORED\r\n' + if noreply != b'noreply': + self.outbuf += b'NOT_STORED\r\n' else: self.cache[key] = flags, exptime, value - if noreply != 'noreply': - self.outbuf += 'STORED\r\n' + if noreply != b'noreply': + self.outbuf += b'STORED\r\n' - def handle_delete(self, key, noreply=''): + def handle_delete(self, key, noreply=b''): if self.exc_on_delete: raise Exception('mock is has exc_on_delete set') if key in self.cache: del self.cache[key] - if noreply != 'noreply': - self.outbuf += 'DELETED\r\n' - elif noreply != 'noreply': - self.outbuf += 'NOT_FOUND\r\n' + if noreply != b'noreply': + self.outbuf += b'DELETED\r\n' + elif noreply != b'noreply': + self.outbuf += b'NOT_FOUND\r\n' def handle_get(self, *keys): for key in keys: if key in self.cache: val = self.cache[key] - self.outbuf += 'VALUE %s %s %s\r\n' % ( - key, val[0], len(val[2])) - self.outbuf += val[2] + '\r\n' - self.outbuf += 'END\r\n' + self.outbuf += b' '.join([ + b'VALUE', + key, + val[0], + str(len(val[2])).encode('ascii') + ]) + b'\r\n' + self.outbuf += val[2] + b'\r\n' + self.outbuf += b'END\r\n' - def handle_incr(self, key, value, noreply=''): + def handle_incr(self, key, value, noreply=b''): if key in self.cache: current = self.cache[key][2] - new_val = str(int(current) + int(value)) + new_val = str(int(current) + int(value)).encode('ascii') self.cache[key] = self.cache[key][:2] + (new_val, ) - self.outbuf += str(new_val) + '\r\n' + self.outbuf += new_val + b'\r\n' else: - self.outbuf += 'NOT_FOUND\r\n' + self.outbuf += b'NOT_FOUND\r\n' - def handle_decr(self, key, value, noreply=''): + def handle_decr(self, key, value, noreply=b''): if key in self.cache: current = self.cache[key][2] - new_val = str(int(current) - int(value)) - if new_val[0] == '-': # ie, val is negative - new_val = '0' + new_val = str(int(current) - int(value)).encode('ascii') + if new_val[:1] == b'-': # ie, val is negative + new_val = b'0' self.cache[key] = self.cache[key][:2] + (new_val, ) - self.outbuf += str(new_val) + '\r\n' + self.outbuf += new_val + b'\r\n' else: - self.outbuf += 'NOT_FOUND\r\n' + self.outbuf += b'NOT_FOUND\r\n' def readline(self): if self.read_return_empty_str: - return '' + return b'' if self.read_return_none: return None if self.down: raise Exception('mock is down') - if '\n' in self.outbuf: - response, self.outbuf = self.outbuf.split('\n', 1) - return response + '\n' + if b'\n' in self.outbuf: + response, self.outbuf = self.outbuf.split(b'\n', 1) + return response + b'\n' def read(self, size): if self.down: @@ -199,7 +204,7 @@ memcache_client = memcached.MemcacheRing([sock1ipport, sock2ip]) one = two = True while one or two: # Run until we match hosts one and two - key = uuid4().hex + key = uuid4().hex.encode('ascii') for conn in memcache_client._get_conns(key): peeripport = '%s:%s' % conn[2].getpeername() self.assertTrue(peeripport in (sock1ipport, sock2ipport)) @@ -222,7 +227,7 @@ sock_addr = sock.getsockname() server_socket = '[%s]:%s' % (sock_addr[0], sock_addr[1]) memcache_client = memcached.MemcacheRing([server_socket]) - key = uuid4().hex + key = uuid4().hex.encode('ascii') for conn in memcache_client._get_conns(key): peer_sockaddr = conn[2].getpeername() peer_socket = '[%s]:%s' % (peer_sockaddr[0], peer_sockaddr[1]) @@ -243,7 +248,7 @@ server_host = '[%s]' % sock_addr[0] memcached.DEFAULT_MEMCACHED_PORT = sock_addr[1] memcache_client = memcached.MemcacheRing([server_host]) - key = uuid4().hex + key = uuid4().hex.encode('ascii') for conn in memcache_client._get_conns(key): peer_sockaddr = conn[2].getpeername() peer_socket = '[%s]:%s' % (peer_sockaddr[0], peer_sockaddr[1]) @@ -271,7 +276,7 @@ socket.SOCK_STREAM, 0, '', ('127.0.0.1', sock_addr[1]))] memcache_client = memcached.MemcacheRing([server_socket]) - key = uuid4().hex + key = uuid4().hex.encode('ascii') for conn in memcache_client._get_conns(key): peer_sockaddr = conn[2].getpeername() peer_socket = '%s:%s' % (peer_sockaddr[0], @@ -296,7 +301,7 @@ socket.SOCK_STREAM, 0, '', ('::1', sock_addr[1]))] memcache_client = memcached.MemcacheRing([server_socket]) - key = uuid4().hex + key = uuid4().hex.encode('ascii') for conn in memcache_client._get_conns(key): peer_sockaddr = conn[2].getpeername() peer_socket = '[%s]:%s' % (peer_sockaddr[0], @@ -312,16 +317,16 @@ mock = MockMemcached() memcache_client._client_cache['1.2.3.4:11211'] = MockedMemcachePool( [(mock, mock)] * 2) - cache_key = md5('some_key').hexdigest() + cache_key = md5(b'some_key').hexdigest().encode('ascii') memcache_client.set('some_key', [1, 2, 3]) self.assertEqual(memcache_client.get('some_key'), [1, 2, 3]) # See JSON_FLAG - self.assertEqual(mock.cache, {cache_key: ('2', '0', '[1, 2, 3]')}) + self.assertEqual(mock.cache, {cache_key: (b'2', b'0', b'[1, 2, 3]')}) memcache_client.set('some_key', [4, 5, 6]) self.assertEqual(memcache_client.get('some_key'), [4, 5, 6]) - self.assertEqual(mock.cache, {cache_key: ('2', '0', '[4, 5, 6]')}) + self.assertEqual(mock.cache, {cache_key: (b'2', b'0', b'[4, 5, 6]')}) memcache_client.set('some_key', ['simple str', 'utf8 str éà']) # As per http://wiki.openstack.org/encoding, @@ -329,10 +334,10 @@ self.assertEqual( memcache_client.get('some_key'), ['simple str', u'utf8 str éà']) self.assertEqual(mock.cache, {cache_key: ( - '2', '0', '["simple str", "utf8 str \\u00e9\\u00e0"]')}) + b'2', b'0', b'["simple str", "utf8 str \\u00e9\\u00e0"]')}) memcache_client.set('some_key', [1, 2, 3], time=20) - self.assertEqual(mock.cache, {cache_key: ('2', '20', '[1, 2, 3]')}) + self.assertEqual(mock.cache, {cache_key: (b'2', b'20', b'[1, 2, 3]')}) sixtydays = 60 * 24 * 60 * 60 esttimeout = time.time() + sixtydays @@ -347,7 +352,8 @@ [(mock, mock)] * 2) memcache_client.set('some_key', [1, 2, 3]) self.assertEqual(memcache_client.get('some_key'), [1, 2, 3]) - self.assertEqual(mock.cache.values()[0][1], '0') + self.assertEqual(list(mock.cache.values()), + [(b'2', b'0', b'[1, 2, 3]')]) # Now lets return an empty string, and make sure we aren't logging # the error. @@ -359,7 +365,7 @@ with patch("sys.stdout", fake_stdout),\ patch('swift.common.memcached.logging', logger): mock.read_return_empty_str = True - self.assertEqual(memcache_client.get('some_key'), None) + self.assertIsNone(memcache_client.get('some_key')) log_lines = logger.get_lines_for_level('error') self.assertIn('Error talking to memcached', log_lines[0]) self.assertFalse(log_lines[1:]) @@ -371,15 +377,15 @@ memcache_client._client_cache['1.2.3.4:11211'] = MockedMemcachePool( [(mock, mock)] * 2) self.assertEqual(memcache_client.incr('some_key', delta=5), 5) - self.assertEqual(memcache_client.get('some_key'), '5') + self.assertEqual(memcache_client.get('some_key'), b'5') self.assertEqual(memcache_client.incr('some_key', delta=5), 10) - self.assertEqual(memcache_client.get('some_key'), '10') + self.assertEqual(memcache_client.get('some_key'), b'10') self.assertEqual(memcache_client.incr('some_key', delta=1), 11) - self.assertEqual(memcache_client.get('some_key'), '11') + self.assertEqual(memcache_client.get('some_key'), b'11') self.assertEqual(memcache_client.incr('some_key', delta=-5), 6) - self.assertEqual(memcache_client.get('some_key'), '6') + self.assertEqual(memcache_client.get('some_key'), b'6') self.assertEqual(memcache_client.incr('some_key', delta=-15), 0) - self.assertEqual(memcache_client.get('some_key'), '0') + self.assertEqual(memcache_client.get('some_key'), b'0') mock.read_return_none = True self.assertRaises(memcached.MemcacheConnectionError, memcache_client.incr, 'some_key', delta=-15) @@ -391,9 +397,9 @@ memcache_client._client_cache['1.2.3.4:11211'] = MockedMemcachePool( [(mock, mock)] * 2) self.assertEqual(memcache_client.incr('some_key', delta=5), 5) - self.assertEqual(memcache_client.get('some_key'), '5') + self.assertEqual(memcache_client.get('some_key'), b'5') self.assertEqual(memcache_client.incr('some_key', delta=5), 10) - self.assertEqual(memcache_client.get('some_key'), '10') + self.assertEqual(memcache_client.get('some_key'), b'10') # Now lets return an empty string, and make sure we aren't logging # the error. @@ -417,11 +423,11 @@ mock = MockMemcached() memcache_client._client_cache['1.2.3.4:11211'] = MockedMemcachePool( [(mock, mock)] * 2) - cache_key = md5('some_key').hexdigest() + cache_key = md5(b'some_key').hexdigest().encode('ascii') memcache_client.incr('some_key', delta=5, time=55) - self.assertEqual(memcache_client.get('some_key'), '5') - self.assertEqual(mock.cache, {cache_key: ('0', '55', '5')}) + self.assertEqual(memcache_client.get('some_key'), b'5') + self.assertEqual(mock.cache, {cache_key: (b'0', b'55', b'5')}) memcache_client.delete('some_key') self.assertIsNone(memcache_client.get('some_key')) @@ -429,7 +435,7 @@ fiftydays = 50 * 24 * 60 * 60 esttimeout = time.time() + fiftydays memcache_client.incr('some_key', delta=5, time=fiftydays) - self.assertEqual(memcache_client.get('some_key'), '5') + self.assertEqual(memcache_client.get('some_key'), b'5') _junk, cache_timeout, _junk = mock.cache[cache_key] self.assertAlmostEqual(float(cache_timeout), esttimeout, delta=1) @@ -437,12 +443,12 @@ self.assertIsNone(memcache_client.get('some_key')) memcache_client.incr('some_key', delta=5) - self.assertEqual(memcache_client.get('some_key'), '5') - self.assertEqual(mock.cache, {cache_key: ('0', '0', '5')}) + self.assertEqual(memcache_client.get('some_key'), b'5') + self.assertEqual(mock.cache, {cache_key: (b'0', b'0', b'5')}) memcache_client.incr('some_key', delta=5, time=55) - self.assertEqual(memcache_client.get('some_key'), '10') - self.assertEqual(mock.cache, {cache_key: ('0', '0', '10')}) + self.assertEqual(memcache_client.get('some_key'), b'10') + self.assertEqual(mock.cache, {cache_key: (b'0', b'0', b'10')}) def test_decr(self): memcache_client = memcached.MemcacheRing(['1.2.3.4:11211']) @@ -450,13 +456,13 @@ memcache_client._client_cache['1.2.3.4:11211'] = MockedMemcachePool( [(mock, mock)] * 2) self.assertEqual(memcache_client.decr('some_key', delta=5), 0) - self.assertEqual(memcache_client.get('some_key'), '0') + self.assertEqual(memcache_client.get('some_key'), b'0') self.assertEqual(memcache_client.incr('some_key', delta=15), 15) - self.assertEqual(memcache_client.get('some_key'), '15') + self.assertEqual(memcache_client.get('some_key'), b'15') self.assertEqual(memcache_client.decr('some_key', delta=4), 11) - self.assertEqual(memcache_client.get('some_key'), '11') + self.assertEqual(memcache_client.get('some_key'), b'11') self.assertEqual(memcache_client.decr('some_key', delta=15), 0) - self.assertEqual(memcache_client.get('some_key'), '0') + self.assertEqual(memcache_client.get('some_key'), b'0') mock.read_return_none = True self.assertRaises(memcached.MemcacheConnectionError, memcache_client.decr, 'some_key', delta=15) @@ -510,27 +516,27 @@ self.assertEqual( memcache_client.get_multi(('some_key2', 'some_key1'), 'multi_key'), [[4, 5, 6], [1, 2, 3]]) - for key in ('some_key1', 'some_key2'): - key = md5(key).hexdigest() + for key in (b'some_key1', b'some_key2'): + key = md5(key).hexdigest().encode('ascii') self.assertIn(key, mock.cache) _junk, cache_timeout, _junk = mock.cache[key] - self.assertEqual(cache_timeout, '0') + self.assertEqual(cache_timeout, b'0') memcache_client.set_multi( {'some_key1': [1, 2, 3], 'some_key2': [4, 5, 6]}, 'multi_key', time=20) - for key in ('some_key1', 'some_key2'): - key = md5(key).hexdigest() + for key in (b'some_key1', b'some_key2'): + key = md5(key).hexdigest().encode('ascii') _junk, cache_timeout, _junk = mock.cache[key] - self.assertEqual(cache_timeout, '20') + self.assertEqual(cache_timeout, b'20') fortydays = 50 * 24 * 60 * 60 esttimeout = time.time() + fortydays memcache_client.set_multi( {'some_key1': [1, 2, 3], 'some_key2': [4, 5, 6]}, 'multi_key', time=fortydays) - for key in ('some_key1', 'some_key2'): - key = md5(key).hexdigest() + for key in (b'some_key1', b'some_key2'): + key = md5(key).hexdigest().encode('ascii') _junk, cache_timeout, _junk = mock.cache[key] self.assertAlmostEqual(float(cache_timeout), esttimeout, delta=1) self.assertEqual(memcache_client.get_multi( diff -Nru swift-2.17.0/test/unit/common/test_storage_policy.py swift-2.18.0/test/unit/common/test_storage_policy.py --- swift-2.17.0/test/unit/common/test_storage_policy.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_storage_policy.py 2018-05-30 10:17:02.000000000 +0000 @@ -70,7 +70,10 @@ class TestStoragePolicies(unittest.TestCase): def _conf(self, conf_str): conf_str = "\n".join(line.strip() for line in conf_str.split("\n")) - conf = ConfigParser() + if six.PY2: + conf = ConfigParser() + else: + conf = ConfigParser(strict=False) conf.readfp(six.StringIO(conf_str)) return conf @@ -679,7 +682,7 @@ with capture_logging('swift.common.storage_policy') as records, \ self.assertRaises(PolicyError) as exc_mgr: parse_storage_policies(bad_conf) - self.assertEqual(exc_mgr.exception.message, + self.assertEqual(exc_mgr.exception.args[0], 'Storage policy bad-policy uses an EC ' 'configuration known to harm data durability. This ' 'policy MUST be deprecated.') @@ -1048,7 +1051,7 @@ [storage-policy:00] name = double-zero """) - with NamedTemporaryFile() as f: + with NamedTemporaryFile(mode='w+t') as f: conf.write(f) f.flush() with mock.patch('swift.common.utils.SWIFT_CONF_FILE', diff -Nru swift-2.17.0/test/unit/common/test_swob.py swift-2.18.0/test/unit/common/test_swob.py --- swift-2.17.0/test/unit/common/test_swob.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_swob.py 2018-05-30 10:17:02.000000000 +0000 @@ -271,18 +271,34 @@ self.assertIn('a', match) self.assertIn('b', match) self.assertNotIn('c', match) + self.assertEqual(repr(match), "Match('a, b')") def test_match_star(self): match = swift.common.swob.Match('"a", "*"') self.assertIn('a', match) self.assertIn('b', match) self.assertIn('c', match) + self.assertEqual(repr(match), "Match('*, a')") def test_match_noquote(self): match = swift.common.swob.Match('a, b') self.assertEqual(match.tags, set(('a', 'b'))) self.assertIn('a', match) self.assertIn('b', match) + self.assertNotIn('c', match) + + def test_match_no_optional_white_space(self): + match = swift.common.swob.Match('"a","b"') + self.assertEqual(match.tags, set(('a', 'b'))) + self.assertIn('a', match) + self.assertIn('b', match) + self.assertNotIn('c', match) + + def test_match_lots_of_optional_white_space(self): + match = swift.common.swob.Match('"a" , , "b" ') + self.assertEqual(match.tags, set(('a', 'b'))) + self.assertIn('a', match) + self.assertIn('b', match) self.assertNotIn('c', match) diff -Nru swift-2.17.0/test/unit/common/test_utils.py swift-2.18.0/test/unit/common/test_utils.py --- swift-2.17.0/test/unit/common/test_utils.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_utils.py 2018-05-30 10:17:02.000000000 +0000 @@ -15,7 +15,11 @@ """Tests for swift.common.utils""" from __future__ import print_function -from test.unit import temptree, debug_logger, make_timestamp_iter, with_tempdir + +import hashlib + +from test.unit import temptree, debug_logger, make_timestamp_iter, \ + with_tempdir, mock_timestamp_now import ctypes import contextlib @@ -43,6 +47,7 @@ import six from six import BytesIO, StringIO from six.moves.queue import Queue, Empty +from six.moves import http_client from six.moves import range from textwrap import dedent @@ -72,7 +77,7 @@ from swift.common.storage_policy import POLICIES, reload_storage_policies from swift.common.swob import Request, Response from test.unit import FakeLogger, requires_o_tmpfile_support, \ - quiet_eventlet_exceptions + requires_o_tmpfile_support_in_tmp, quiet_eventlet_exceptions threading = eventlet.patcher.original('threading') @@ -183,6 +188,7 @@ def test_invalid_input(self): self.assertRaises(ValueError, utils.Timestamp, time.time(), offset=-1) + self.assertRaises(ValueError, utils.Timestamp, '123.456_78_90') def test_invalid_string_conversion(self): t = utils.Timestamp.now() @@ -390,6 +396,8 @@ expected = '1402436408.91203_00000000000000f0' test_values = ( '1402436408.91203_000000f0', + u'1402436408.91203_000000f0', + b'1402436408.91203_000000f0', '1402436408.912030000_0000000000f0', '1402436408.912029_000000f0', '1402436408.91202999999_0000000000f0', @@ -621,16 +629,7 @@ '%r is not greater than %r given %r' % ( timestamp, int(other), value)) - def test_greater_with_offset(self): - now = time.time() - older = now - 1 - test_values = ( - 0, '0', 0.0, '0.0', '0000.0000', '000.000_000', - 1, '1', 1.1, '1.1', '1111.1111', '111.111_111', - 1402443346.935174, '1402443346.93517', '1402443346.935169_ffff', - older, '%f' % older, '%f_0000ffff' % older, - now, '%f' % now, '%f_00000000' % now, - ) + def _test_greater_with_offset(self, now, test_values): for offset in range(1, 1000, 100): timestamp = utils.Timestamp(now, offset=offset) for value in test_values: @@ -655,6 +654,43 @@ '%r is not greater than %r given %r' % ( timestamp, int(other), value)) + def test_greater_with_offset(self): + # Part 1: use the natural time of the Python. This is deliciously + # unpredictable, but completely legitimate and realistic. Finds bugs! + now = time.time() + older = now - 1 + test_values = ( + 0, '0', 0.0, '0.0', '0000.0000', '000.000_000', + 1, '1', 1.1, '1.1', '1111.1111', '111.111_111', + 1402443346.935174, '1402443346.93517', '1402443346.935169_ffff', + older, now, + ) + self._test_greater_with_offset(now, test_values) + # Part 2: Same as above, but with fixed time values that reproduce + # specific corner cases. + now = 1519830570.6949348 + older = now - 1 + test_values = ( + 0, '0', 0.0, '0.0', '0000.0000', '000.000_000', + 1, '1', 1.1, '1.1', '1111.1111', '111.111_111', + 1402443346.935174, '1402443346.93517', '1402443346.935169_ffff', + older, now, + ) + self._test_greater_with_offset(now, test_values) + # Part 3: The '%f' problem. Timestamps cannot be converted to %f + # strings, then back to timestamps, then compared with originals. + # You can only "import" a floating point representation once. + now = 1519830570.6949348 + now = float('%f' % now) + older = now - 1 + test_values = ( + 0, '0', 0.0, '0.0', '0000.0000', '000.000_000', + 1, '1', 1.1, '1.1', '1111.1111', '111.111_111', + older, '%f' % older, '%f_0000ffff' % older, + now, '%f' % now, '%s_00000000' % now, + ) + self._test_greater_with_offset(now, test_values) + def test_smaller_no_offset(self): now = time.time() newer = now + 1 @@ -921,8 +957,8 @@ """Tests for swift.common.utils """ def setUp(self): - utils.HASH_PATH_SUFFIX = 'endcap' - utils.HASH_PATH_PREFIX = 'startcap' + utils.HASH_PATH_SUFFIX = b'endcap' + utils.HASH_PATH_PREFIX = b'startcap' def test_get_zero_indexed_base_string(self): self.assertEqual(utils.get_zero_indexed_base_string('something', 0), @@ -1422,6 +1458,15 @@ with open(testcache_file) as fd: file_dict = json.loads(fd.readline()) self.assertEqual(expect_dict, file_dict) + # nested dict items are not sticky + submit_dict = {'key1': {'key2': {'value3': 3}}} + expect_dict = {'key0': 101, + 'key1': {'key2': {'value3': 3}, + 'value1': 1, 'value2': 2}} + utils.dump_recon_cache(submit_dict, testcache_file, logger) + with open(testcache_file) as fd: + file_dict = json.loads(fd.readline()) + self.assertEqual(expect_dict, file_dict) # cached entries are sticky submit_dict = {} utils.dump_recon_cache(submit_dict, testcache_file, logger) @@ -1501,6 +1546,25 @@ finally: rmtree(testdir_base) + def test_load_recon_cache(self): + stub_data = {'test': 'foo'} + with NamedTemporaryFile() as f: + f.write(json.dumps(stub_data).encode("utf-8")) + f.flush() + self.assertEqual(stub_data, utils.load_recon_cache(f.name)) + + # missing files are treated as empty + self.assertFalse(os.path.exists(f.name)) # sanity + self.assertEqual({}, utils.load_recon_cache(f.name)) + + # Corrupt files are treated as empty. We could crash and make an + # operator fix the corrupt file, but they'll "fix" it with "rm -f + # /var/cache/swift/*.recon", so let's just do it for them. + with NamedTemporaryFile() as f: + f.write(b"{not [valid (json") + f.flush() + self.assertEqual({}, utils.load_recon_cache(f.name)) + def test_get_logger(self): sio = StringIO() logger = logging.getLogger('server') @@ -1675,6 +1739,13 @@ self.assertTrue('my error message' in log_msg) message_timeout.cancel() + # test BadStatusLine + log_exception(http_client.BadStatusLine('')) + log_msg = strip_value(sio) + self.assertNotIn('Traceback', log_msg) + self.assertIn('BadStatusLine', log_msg) + self.assertIn("''", log_msg) + # test unhandled log_exception(Exception('my error message')) log_msg = strip_value(sio) @@ -1938,7 +2009,7 @@ def test_hash_path(self): # Yes, these tests are deliberately very fragile. We want to make sure # that if someones changes the results hash_path produces, they know it - with mock.patch('swift.common.utils.HASH_PATH_PREFIX', ''): + with mock.patch('swift.common.utils.HASH_PATH_PREFIX', b''): self.assertEqual(utils.hash_path('a'), '1c84525acb02107ea475dcd3d09c2c58') self.assertEqual(utils.hash_path('a', 'c'), @@ -1948,10 +2019,10 @@ self.assertEqual(utils.hash_path('a', 'c', 'o', raw_digest=False), '06fbf0b514e5199dfc4e00f42eb5ea83') self.assertEqual(utils.hash_path('a', 'c', 'o', raw_digest=True), - '\x06\xfb\xf0\xb5\x14\xe5\x19\x9d\xfcN' - '\x00\xf4.\xb5\xea\x83') + b'\x06\xfb\xf0\xb5\x14\xe5\x19\x9d\xfcN' + b'\x00\xf4.\xb5\xea\x83') self.assertRaises(ValueError, utils.hash_path, 'a', object='o') - utils.HASH_PATH_PREFIX = 'abcdef' + utils.HASH_PATH_PREFIX = b'abcdef' self.assertEqual(utils.hash_path('a', 'c', 'o', raw_digest=False), '363f9b535bfb7d17a43a46a358afca0e') @@ -1985,8 +2056,8 @@ def _test_validate_hash_conf(self, sections, options, should_raise_error): class FakeConfigParser(object): - def read(self, conf_path): - return True + def read(self, conf_path, encoding=None): + return [conf_path] def get(self, section, option): if section not in sections: @@ -1996,8 +2067,8 @@ else: return 'some_option_value' - with mock.patch('swift.common.utils.HASH_PATH_PREFIX', ''), \ - mock.patch('swift.common.utils.HASH_PATH_SUFFIX', ''), \ + with mock.patch('swift.common.utils.HASH_PATH_PREFIX', b''), \ + mock.patch('swift.common.utils.HASH_PATH_SUFFIX', b''), \ mock.patch('swift.common.utils.ConfigParser', FakeConfigParser): try: @@ -2026,7 +2097,7 @@ log_name = yarr''' # setup a real file fd, temppath = tempfile.mkstemp() - with os.fdopen(fd, 'wb') as f: + with os.fdopen(fd, 'w') as f: f.write(conf) make_filename = lambda: temppath # setup a file stream @@ -2075,7 +2146,7 @@ log_name = %(yarr)s''' # setup a real file fd, temppath = tempfile.mkstemp() - with os.fdopen(fd, 'wb') as f: + with os.fdopen(fd, 'w') as f: f.write(conf) make_filename = lambda: temppath # setup a file stream @@ -2661,18 +2732,26 @@ def test_config_positive_int_value(self): expectations = { # value : expected, - '1': 1, + u'1': 1, + b'1': 1, 1: 1, - '2': 2, - '1024': 1024, - '0': ValueError, - '-1': ValueError, - '0x01': ValueError, - 'asdf': ValueError, + u'2': 2, + b'2': 2, + u'1024': 1024, + b'1024': 1024, + u'0': ValueError, + b'0': ValueError, + u'-1': ValueError, + b'-1': ValueError, + u'0x01': ValueError, + b'0x01': ValueError, + u'asdf': ValueError, + b'asdf': ValueError, None: ValueError, 0: ValueError, -1: ValueError, - '1.2': ValueError, # string expresses float should be value error + u'1.2': ValueError, # string expresses float should be value error + b'1.2': ValueError, # string expresses float should be value error } for value, expected in expectations.items(): try: @@ -2683,10 +2762,57 @@ else: self.assertEqual( 'Config option must be an positive int number, ' - 'not "%s".' % value, e.message) + 'not "%s".' % value, e.args[0]) else: self.assertEqual(expected, rv) + def test_config_float_value(self): + for args, expected in ( + ((99, None, None), 99.0), + ((99.01, None, None), 99.01), + (('99', None, None), 99.0), + (('99.01', None, None), 99.01), + ((99, 99, None), 99.0), + ((99.01, 99.01, None), 99.01), + (('99', 99, None), 99.0), + (('99.01', 99.01, None), 99.01), + ((99, None, 99), 99.0), + ((99.01, None, 99.01), 99.01), + (('99', None, 99), 99.0), + (('99.01', None, 99.01), 99.01), + ((-99, -99, -99), -99.0), + ((-99.01, -99.01, -99.01), -99.01), + (('-99', -99, -99), -99.0), + (('-99.01', -99.01, -99.01), -99.01),): + actual = utils.config_float_value(*args) + self.assertEqual(expected, actual) + + for val, minimum in ((99, 100), + ('99', 100), + (-99, -98), + ('-98.01', -98)): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, minimum=minimum) + self.assertIn('greater than %s' % minimum, cm.exception.args[0]) + self.assertNotIn('less than', cm.exception.args[0]) + + for val, maximum in ((99, 98), + ('99', 98), + (-99, -100), + ('-97.9', -98)): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, maximum=maximum) + self.assertIn('less than %s' % maximum, cm.exception.args[0]) + self.assertNotIn('greater than', cm.exception.args[0]) + + for val, minimum, maximum in ((99, 99, 98), + ('99', 100, 100), + (99, 98, 98),): + with self.assertRaises(ValueError) as cm: + utils.config_float_value(val, minimum=minimum, maximum=maximum) + self.assertIn('greater than %s' % minimum, cm.exception.args[0]) + self.assertIn('less than %s' % maximum, cm.exception.args[0]) + def test_config_auto_int_value(self): expectations = { # (value, default) : expected, @@ -2940,7 +3066,7 @@ fallocate(0, 1, 0, ctypes.c_uint64(0)) self.assertEqual( str(catcher.exception), - '[Errno %d] FALLOCATE_RESERVE fail 100.0 <= 100.0' + '[Errno %d] FALLOCATE_RESERVE fail 100 <= 100' % errno.ENOSPC) self.assertEqual(catcher.exception.errno, errno.ENOSPC) @@ -2955,7 +3081,7 @@ fallocate(0, 1, 0, ctypes.c_uint64(101)) self.assertEqual( str(catcher.exception), - '[Errno %d] FALLOCATE_RESERVE fail 0.99 <= 1.0' + '[Errno %d] FALLOCATE_RESERVE fail 0.99 <= 1' % errno.ENOSPC) self.assertEqual(catcher.exception.errno, errno.ENOSPC) @@ -2969,7 +3095,7 @@ fallocate(0, 1, 0, ctypes.c_uint64(100)) self.assertEqual( str(catcher.exception), - '[Errno %d] FALLOCATE_RESERVE fail 98.0 <= 98.0' + '[Errno %d] FALLOCATE_RESERVE fail 98 <= 98' % errno.ENOSPC) self.assertEqual(catcher.exception.errno, errno.ENOSPC) @@ -2993,7 +3119,7 @@ fallocate(0, 1, 0, ctypes.c_uint64(1000)) self.assertEqual( str(catcher.exception), - '[Errno %d] FALLOCATE_RESERVE fail 2.0 <= 2.0' + '[Errno %d] FALLOCATE_RESERVE fail 2 <= 2' % errno.ENOSPC) self.assertEqual(catcher.exception.errno, errno.ENOSPC) @@ -3126,11 +3252,11 @@ def test_lock_file(self): flags = os.O_CREAT | os.O_RDWR with NamedTemporaryFile(delete=False) as nt: - nt.write("test string") + nt.write(b"test string") nt.flush() nt.close() with utils.lock_file(nt.name, unlink=False) as f: - self.assertEqual(f.read(), "test string") + self.assertEqual(f.read(), b"test string") # we have a lock, now let's try to get a newer one fd = os.open(nt.name, flags) self.assertRaises(IOError, fcntl.flock, fd, @@ -3138,12 +3264,12 @@ with utils.lock_file(nt.name, unlink=False, append=True) as f: f.seek(0) - self.assertEqual(f.read(), "test string") + self.assertEqual(f.read(), b"test string") f.seek(0) - f.write("\nanother string") + f.write(b"\nanother string") f.flush() f.seek(0) - self.assertEqual(f.read(), "test string\nanother string") + self.assertEqual(f.read(), b"test string\nanother string") # we have a lock, now let's try to get a newer one fd = os.open(nt.name, flags) @@ -3160,7 +3286,7 @@ pass with utils.lock_file(nt.name, unlink=True) as f: - self.assertEqual(f.read(), "test string\nanother string") + self.assertEqual(f.read(), b"test string\nanother string") # we have a lock, now let's try to get a newer one fd = os.open(nt.name, flags) self.assertRaises( @@ -3482,28 +3608,80 @@ do_test(b'\xf0\x9f\x82\xa1', b'\xf0\x9f\x82\xa1'), do_test(b'\xed\xa0\xbc\xed\xb2\xa1', b'\xf0\x9f\x82\xa1'), - def test_quote(self): - res = utils.quote('/v1/a/c3/subdirx/') - assert res == '/v1/a/c3/subdirx/' - res = utils.quote('/v1/a&b/c3/subdirx/') - assert res == '/v1/a%26b/c3/subdirx/' - res = utils.quote('/v1/a&b/c3/subdirx/', safe='&') - assert res == '%2Fv1%2Fa&b%2Fc3%2Fsubdirx%2F' - unicode_sample = u'\uc77c\uc601' - account = 'abc_' + unicode_sample - valid_utf8_str = utils.get_valid_utf8_str(account) - account = 'abc_' + unicode_sample.encode('utf-8')[::-1] - invalid_utf8_str = utils.get_valid_utf8_str(account) - self.assertEqual('abc_%EC%9D%BC%EC%98%81', - utils.quote(valid_utf8_str)) - self.assertEqual('abc_%EF%BF%BD%EF%BF%BD%EC%BC%9D%EF%BF%BD', - utils.quote(invalid_utf8_str)) + def test_quote_bytes(self): + self.assertEqual(b'/v1/a/c3/subdirx/', + utils.quote(b'/v1/a/c3/subdirx/')) + self.assertEqual(b'/v1/a%26b/c3/subdirx/', + utils.quote(b'/v1/a&b/c3/subdirx/')) + self.assertEqual(b'%2Fv1%2Fa&b%2Fc3%2Fsubdirx%2F', + utils.quote(b'/v1/a&b/c3/subdirx/', safe='&')) + self.assertEqual(b'abc_%EC%9D%BC%EC%98%81', + utils.quote(u'abc_\uc77c\uc601'.encode('utf8'))) + # Invalid utf8 is parsed as latin1, then re-encoded as utf8?? + self.assertEqual(b'%EF%BF%BD%EF%BF%BD%EC%BC%9D%EF%BF%BD', + utils.quote(u'\uc77c\uc601'.encode('utf8')[::-1])) + + def test_quote_unicode(self): + self.assertEqual(u'/v1/a/c3/subdirx/', + utils.quote(u'/v1/a/c3/subdirx/')) + self.assertEqual(u'/v1/a%26b/c3/subdirx/', + utils.quote(u'/v1/a&b/c3/subdirx/')) + self.assertEqual(u'%2Fv1%2Fa&b%2Fc3%2Fsubdirx%2F', + utils.quote(u'/v1/a&b/c3/subdirx/', safe='&')) + self.assertEqual(u'abc_%EC%9D%BC%EC%98%81', + utils.quote(u'abc_\uc77c\uc601')) def test_get_hmac(self): self.assertEqual( utils.get_hmac('GET', '/path', 1, 'abc'), 'b17f6ff8da0e251737aa9e3ee69a881e3e092e2f') + def test_parse_override_options(self): + # When override_ is passed in, it takes precedence. + opts = utils.parse_override_options( + override_policies=[0, 1], + override_devices=['sda', 'sdb'], + override_partitions=[100, 200], + policies='0,1,2,3', + devices='sda,sdb,sdc,sdd', + partitions='100,200,300,400') + self.assertEqual(opts.policies, [0, 1]) + self.assertEqual(opts.devices, ['sda', 'sdb']) + self.assertEqual(opts.partitions, [100, 200]) + + # When override_ is passed in, it applies even in run-once + # mode. + opts = utils.parse_override_options( + once=True, + override_policies=[0, 1], + override_devices=['sda', 'sdb'], + override_partitions=[100, 200], + policies='0,1,2,3', + devices='sda,sdb,sdc,sdd', + partitions='100,200,300,400') + self.assertEqual(opts.policies, [0, 1]) + self.assertEqual(opts.devices, ['sda', 'sdb']) + self.assertEqual(opts.partitions, [100, 200]) + + # In run-once mode, we honor the passed-in overrides. + opts = utils.parse_override_options( + once=True, + policies='0,1,2,3', + devices='sda,sdb,sdc,sdd', + partitions='100,200,300,400') + self.assertEqual(opts.policies, [0, 1, 2, 3]) + self.assertEqual(opts.devices, ['sda', 'sdb', 'sdc', 'sdd']) + self.assertEqual(opts.partitions, [100, 200, 300, 400]) + + # In run-forever mode, we ignore the passed-in overrides. + opts = utils.parse_override_options( + policies='0,1,2,3', + devices='sda,sdb,sdc,sdd', + partitions='100,200,300,400') + self.assertEqual(opts.policies, []) + self.assertEqual(opts.devices, []) + self.assertEqual(opts.partitions, []) + def test_get_policy_index(self): # Account has no information about a policy req = Request.blank( @@ -3689,6 +3867,107 @@ if tempdir: shutil.rmtree(tempdir) + def test_find_shard_range(self): + ts = utils.Timestamp.now().internal + start = utils.ShardRange('a/-a', ts, '', 'a') + atof = utils.ShardRange('a/a-f', ts, 'a', 'f') + ftol = utils.ShardRange('a/f-l', ts, 'f', 'l') + ltor = utils.ShardRange('a/l-r', ts, 'l', 'r') + rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z') + end = utils.ShardRange('a/z-', ts, 'z', '') + ranges = [start, atof, ftol, ltor, rtoz, end] + + found = utils.find_shard_range('', ranges) + self.assertEqual(found, None) + found = utils.find_shard_range(' ', ranges) + self.assertEqual(found, start) + found = utils.find_shard_range(' ', ranges[1:]) + self.assertEqual(found, None) + found = utils.find_shard_range('b', ranges) + self.assertEqual(found, atof) + found = utils.find_shard_range('f', ranges) + self.assertEqual(found, atof) + found = utils.find_shard_range('f\x00', ranges) + self.assertEqual(found, ftol) + found = utils.find_shard_range('x', ranges) + self.assertEqual(found, rtoz) + found = utils.find_shard_range('r', ranges) + self.assertEqual(found, ltor) + found = utils.find_shard_range('}', ranges) + self.assertEqual(found, end) + found = utils.find_shard_range('}', ranges[:-1]) + self.assertEqual(found, None) + # remove l-r from list of ranges and try and find a shard range for an + # item in that range. + found = utils.find_shard_range('p', ranges[:-3] + ranges[-2:]) + self.assertEqual(found, None) + + # add some sub-shards; a sub-shard's state is less than its parent + # while the parent is undeleted, so insert these ahead of the + # overlapping parent in the list of ranges + ftoh = utils.ShardRange('a/f-h', ts, 'f', 'h') + htok = utils.ShardRange('a/h-k', ts, 'h', 'k') + + overlapping_ranges = ranges[:2] + [ftoh, htok] + ranges[2:] + found = utils.find_shard_range('g', overlapping_ranges) + self.assertEqual(found, ftoh) + found = utils.find_shard_range('h', overlapping_ranges) + self.assertEqual(found, ftoh) + found = utils.find_shard_range('k', overlapping_ranges) + self.assertEqual(found, htok) + found = utils.find_shard_range('l', overlapping_ranges) + self.assertEqual(found, ftol) + found = utils.find_shard_range('m', overlapping_ranges) + self.assertEqual(found, ltor) + + ktol = utils.ShardRange('a/k-l', ts, 'k', 'l') + overlapping_ranges = ranges[:2] + [ftoh, htok, ktol] + ranges[2:] + found = utils.find_shard_range('l', overlapping_ranges) + self.assertEqual(found, ktol) + + def test_parse_db_filename(self): + actual = utils.parse_db_filename('hash.db') + self.assertEqual(('hash', None, '.db'), actual) + actual = utils.parse_db_filename('hash_1234567890.12345.db') + self.assertEqual(('hash', '1234567890.12345', '.db'), actual) + actual = utils.parse_db_filename( + '/dev/containers/part/ash/hash/hash_1234567890.12345.db') + self.assertEqual(('hash', '1234567890.12345', '.db'), actual) + self.assertRaises(ValueError, utils.parse_db_filename, '/path/to/dir/') + # These shouldn't come up in practice; included for completeness + self.assertEqual(utils.parse_db_filename('hashunder_.db'), + ('hashunder', '', '.db')) + self.assertEqual(utils.parse_db_filename('lots_of_underscores.db'), + ('lots', 'of', '.db')) + + def test_make_db_file_path(self): + epoch = utils.Timestamp.now() + actual = utils.make_db_file_path('hash.db', epoch) + self.assertEqual('hash_%s.db' % epoch.internal, actual) + + actual = utils.make_db_file_path('hash_oldepoch.db', epoch) + self.assertEqual('hash_%s.db' % epoch.internal, actual) + + actual = utils.make_db_file_path('/path/to/hash.db', epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual) + + epoch = utils.Timestamp.now() + actual = utils.make_db_file_path(actual, epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.internal, actual) + + # None strips epoch + self.assertEqual('hash.db', utils.make_db_file_path('hash.db', None)) + self.assertEqual('/path/to/hash.db', utils.make_db_file_path( + '/path/to/hash_withepoch.db', None)) + + # epochs shouldn't have offsets + epoch = utils.Timestamp.now(offset=10) + actual = utils.make_db_file_path(actual, epoch) + self.assertEqual('/path/to/hash_%s.db' % epoch.normal, actual) + + self.assertRaises(ValueError, utils.make_db_file_path, + '/path/to/hash.db', 'bad epoch') + def test_modify_priority(self): pid = os.getpid() logger = debug_logger() @@ -3793,11 +4072,11 @@ patch('platform.architecture', return_value=('64bit', '')): self.assertRaises(OSError, utils.NR_ioprio_set) - @requires_o_tmpfile_support + @requires_o_tmpfile_support_in_tmp def test_link_fd_to_path_linkat_success(self): tempdir = mkdtemp() fd = os.open(tempdir, utils.O_TMPFILE | os.O_WRONLY) - data = "I'm whatever Gotham needs me to be" + data = b"I'm whatever Gotham needs me to be" _m_fsync_dir = mock.Mock() try: os.write(fd, data) @@ -3805,32 +4084,32 @@ self.assertRaises(OSError, os.read, fd, 1) file_path = os.path.join(tempdir, uuid4().hex) with mock.patch('swift.common.utils.fsync_dir', _m_fsync_dir): - utils.link_fd_to_path(fd, file_path, 1) - with open(file_path, 'r') as f: + utils.link_fd_to_path(fd, file_path, 1) + with open(file_path, 'rb') as f: self.assertEqual(f.read(), data) self.assertEqual(_m_fsync_dir.call_count, 2) finally: os.close(fd) shutil.rmtree(tempdir) - @requires_o_tmpfile_support + @requires_o_tmpfile_support_in_tmp def test_link_fd_to_path_target_exists(self): tempdir = mkdtemp() # Create and write to a file fd, path = tempfile.mkstemp(dir=tempdir) - os.write(fd, "hello world") + os.write(fd, b"hello world") os.fsync(fd) os.close(fd) self.assertTrue(os.path.exists(path)) fd = os.open(tempdir, utils.O_TMPFILE | os.O_WRONLY) try: - os.write(fd, "bye world") + os.write(fd, b"bye world") os.fsync(fd) utils.link_fd_to_path(fd, path, 0, fsync=False) # Original file now should have been over-written - with open(path, 'r') as f: - self.assertEqual(f.read(), "bye world") + with open(path, 'rb') as f: + self.assertEqual(f.read(), b"bye world") finally: os.close(fd) shutil.rmtree(tempdir) @@ -3848,7 +4127,7 @@ self.fail("Expecting IOError exception") self.assertTrue(_m_linkat.called) - @requires_o_tmpfile_support + @requires_o_tmpfile_support_in_tmp def test_linkat_race_dir_not_exists(self): tempdir = mkdtemp() target_dir = os.path.join(tempdir, uuid4().hex) @@ -3966,6 +4245,128 @@ self.assertEqual(utils.replace_partition_in_path(old, 10), old) self.assertEqual(utils.replace_partition_in_path(new, 11), new) + def test_round_robin_iter(self): + it1 = iter([1, 2, 3]) + it2 = iter([4, 5]) + it3 = iter([6, 7, 8, 9]) + it4 = iter([]) + + rr_its = utils.round_robin_iter([it1, it2, it3, it4]) + got = list(rr_its) + + # Expect that items get fetched in a round-robin fashion from the + # iterators + self.assertListEqual([1, 4, 6, 2, 5, 7, 3, 8, 9], got) + + @with_tempdir + def test_get_db_files(self, tempdir): + dbdir = os.path.join(tempdir, 'dbdir') + self.assertEqual([], utils.get_db_files(dbdir)) + path_1 = os.path.join(dbdir, 'dbfile.db') + self.assertEqual([], utils.get_db_files(path_1)) + os.mkdir(dbdir) + self.assertEqual([], utils.get_db_files(path_1)) + with open(path_1, 'wb'): + pass + self.assertEqual([path_1], utils.get_db_files(path_1)) + + path_2 = os.path.join(dbdir, 'dbfile_2.db') + self.assertEqual([path_1], utils.get_db_files(path_2)) + + with open(path_2, 'wb'): + pass + + self.assertEqual([path_1, path_2], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2], utils.get_db_files(path_2)) + + path_3 = os.path.join(dbdir, 'dbfile_3.db') + self.assertEqual([path_1, path_2], utils.get_db_files(path_3)) + + with open(path_3, 'wb'): + pass + + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3)) + + other_hash = os.path.join(dbdir, 'other.db') + self.assertEqual([], utils.get_db_files(other_hash)) + other_hash = os.path.join(dbdir, 'other_1.db') + self.assertEqual([], utils.get_db_files(other_hash)) + + pending = os.path.join(dbdir, 'dbfile.pending') + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending)) + + with open(pending, 'wb'): + pass + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(pending)) + + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_1, path_2, path_3], utils.get_db_files(path_3)) + self.assertEqual([], utils.get_db_files(dbdir)) + + os.unlink(path_1) + self.assertEqual([path_2, path_3], utils.get_db_files(path_1)) + self.assertEqual([path_2, path_3], utils.get_db_files(path_2)) + self.assertEqual([path_2, path_3], utils.get_db_files(path_3)) + + os.unlink(path_2) + self.assertEqual([path_3], utils.get_db_files(path_1)) + self.assertEqual([path_3], utils.get_db_files(path_2)) + self.assertEqual([path_3], utils.get_db_files(path_3)) + + os.unlink(path_3) + self.assertEqual([], utils.get_db_files(path_1)) + self.assertEqual([], utils.get_db_files(path_2)) + self.assertEqual([], utils.get_db_files(path_3)) + self.assertEqual([], utils.get_db_files('/path/to/nowhere')) + + def test_get_redirect_data(self): + ts_now = utils.Timestamp.now() + headers = {'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + self.assertIsNone(utils.get_redirect_data(response)) + + headers = {'Location': '/a/c/o', + 'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + path, ts = utils.get_redirect_data(response) + self.assertEqual('a/c', path) + self.assertEqual(ts_now, ts) + + headers = {'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': ts_now.internal} + response = FakeResponse(200, headers, '') + path, ts = utils.get_redirect_data(response) + self.assertEqual('a/c', path) + self.assertEqual(ts_now, ts) + + def do_test(headers): + response = FakeResponse(200, headers, '') + with self.assertRaises(ValueError) as cm: + utils.get_redirect_data(response) + return cm.exception + + exc = do_test({'Location': '/a', + 'X-Backend-Redirect-Timestamp': ts_now.internal}) + self.assertIn('Invalid path', str(exc)) + + exc = do_test({'Location': '', + 'X-Backend-Redirect-Timestamp': ts_now.internal}) + self.assertIn('Invalid path', str(exc)) + + exc = do_test({'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': 'bad'}) + self.assertIn('Invalid timestamp', str(exc)) + + exc = do_test({'Location': '/a/c'}) + self.assertIn('Invalid timestamp', str(exc)) + + exc = do_test({'Location': '/a/c', + 'X-Backend-Redirect-Timestamp': '-1'}) + self.assertIn('Invalid timestamp', str(exc)) + class ResellerConfReader(unittest.TestCase): @@ -4809,6 +5210,13 @@ self.assertEqual(mock_controller.args[0], 'METHOD.timing') self.assertTrue(mock_controller.args[1] > 0) + mock_controller = MockController(400) + METHOD(mock_controller) + self.assertEqual(len(mock_controller.args), 2) + self.assertEqual(mock_controller.called, 'timing') + self.assertEqual(mock_controller.args[0], 'METHOD.timing') + self.assertTrue(mock_controller.args[1] > 0) + mock_controller = MockController(404) METHOD(mock_controller) self.assertEqual(len(mock_controller.args), 2) @@ -4830,7 +5238,14 @@ self.assertEqual(mock_controller.args[0], 'METHOD.timing') self.assertTrue(mock_controller.args[1] > 0) - mock_controller = MockController(401) + mock_controller = MockController(500) + METHOD(mock_controller) + self.assertEqual(len(mock_controller.args), 2) + self.assertEqual(mock_controller.called, 'timing') + self.assertEqual(mock_controller.args[0], 'METHOD.errors.timing') + self.assertTrue(mock_controller.args[1] > 0) + + mock_controller = MockController(507) METHOD(mock_controller) self.assertEqual(len(mock_controller.args), 2) self.assertEqual(mock_controller.called, 'timing') @@ -5904,7 +6319,7 @@ class TestIterMultipartMimeDocuments(unittest.TestCase): def test_bad_start(self): - it = utils.iter_multipart_mime_documents(StringIO('blah'), 'unique') + it = utils.iter_multipart_mime_documents(BytesIO(b'blah'), b'unique') exc = None try: next(it) @@ -5914,144 +6329,104 @@ self.assertTrue('--unique' in str(exc)) def test_empty(self): - it = utils.iter_multipart_mime_documents(StringIO('--unique'), - 'unique') + it = utils.iter_multipart_mime_documents(BytesIO(b'--unique'), + b'unique') fp = next(it) - self.assertEqual(fp.read(), '') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'') + self.assertRaises(StopIteration, next, it) def test_basic(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nabcdefg\r\n--unique--'), 'unique') + BytesIO(b'--unique\r\nabcdefg\r\n--unique--'), b'unique') fp = next(it) - self.assertEqual(fp.read(), 'abcdefg') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'abcdefg') + self.assertRaises(StopIteration, next, it) def test_basic2(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), - 'unique') + BytesIO(b'--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), + b'unique') fp = next(it) - self.assertEqual(fp.read(), 'abcdefg') + self.assertEqual(fp.read(), b'abcdefg') fp = next(it) - self.assertEqual(fp.read(), 'hijkl') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'hijkl') + self.assertRaises(StopIteration, next, it) def test_tiny_reads(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), - 'unique') + BytesIO(b'--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), + b'unique') fp = next(it) - self.assertEqual(fp.read(2), 'ab') - self.assertEqual(fp.read(2), 'cd') - self.assertEqual(fp.read(2), 'ef') - self.assertEqual(fp.read(2), 'g') - self.assertEqual(fp.read(2), '') + self.assertEqual(fp.read(2), b'ab') + self.assertEqual(fp.read(2), b'cd') + self.assertEqual(fp.read(2), b'ef') + self.assertEqual(fp.read(2), b'g') + self.assertEqual(fp.read(2), b'') fp = next(it) - self.assertEqual(fp.read(), 'hijkl') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'hijkl') + self.assertRaises(StopIteration, next, it) def test_big_reads(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), - 'unique') + BytesIO(b'--unique\r\nabcdefg\r\n--unique\r\nhijkl\r\n--unique--'), + b'unique') fp = next(it) - self.assertEqual(fp.read(65536), 'abcdefg') - self.assertEqual(fp.read(), '') + self.assertEqual(fp.read(65536), b'abcdefg') + self.assertEqual(fp.read(), b'') fp = next(it) - self.assertEqual(fp.read(), 'hijkl') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'hijkl') + self.assertRaises(StopIteration, next, it) def test_leading_crlfs(self): it = utils.iter_multipart_mime_documents( - StringIO('\r\n\r\n\r\n--unique\r\nabcdefg\r\n' - '--unique\r\nhijkl\r\n--unique--'), - 'unique') + BytesIO(b'\r\n\r\n\r\n--unique\r\nabcdefg\r\n' + b'--unique\r\nhijkl\r\n--unique--'), + b'unique') fp = next(it) - self.assertEqual(fp.read(65536), 'abcdefg') - self.assertEqual(fp.read(), '') + self.assertEqual(fp.read(65536), b'abcdefg') + self.assertEqual(fp.read(), b'') fp = next(it) - self.assertEqual(fp.read(), 'hijkl') - self.assertRaises(StopIteration, it.next) + self.assertEqual(fp.read(), b'hijkl') + self.assertRaises(StopIteration, next, it) def test_broken_mid_stream(self): # We go ahead and accept whatever is sent instead of rejecting the # whole request, in case the partial form is still useful. it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nabc'), 'unique') + BytesIO(b'--unique\r\nabc'), b'unique') fp = next(it) - self.assertEqual(fp.read(), 'abc') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.read(), b'abc') + self.assertRaises(StopIteration, next, it) def test_readline(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nab\r\ncd\ref\ng\r\n--unique\r\nhi\r\n\r\n' - 'jkl\r\n\r\n--unique--'), 'unique') + BytesIO(b'--unique\r\nab\r\ncd\ref\ng\r\n--unique\r\nhi\r\n\r\n' + b'jkl\r\n\r\n--unique--'), b'unique') fp = next(it) - self.assertEqual(fp.readline(), 'ab\r\n') - self.assertEqual(fp.readline(), 'cd\ref\ng') - self.assertEqual(fp.readline(), '') + self.assertEqual(fp.readline(), b'ab\r\n') + self.assertEqual(fp.readline(), b'cd\ref\ng') + self.assertEqual(fp.readline(), b'') fp = next(it) - self.assertEqual(fp.readline(), 'hi\r\n') - self.assertEqual(fp.readline(), '\r\n') - self.assertEqual(fp.readline(), 'jkl\r\n') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.readline(), b'hi\r\n') + self.assertEqual(fp.readline(), b'\r\n') + self.assertEqual(fp.readline(), b'jkl\r\n') + self.assertRaises(StopIteration, next, it) def test_readline_with_tiny_chunks(self): it = utils.iter_multipart_mime_documents( - StringIO('--unique\r\nab\r\ncd\ref\ng\r\n--unique\r\nhi\r\n' - '\r\njkl\r\n\r\n--unique--'), - 'unique', + BytesIO(b'--unique\r\nab\r\ncd\ref\ng\r\n--unique\r\nhi\r\n' + b'\r\njkl\r\n\r\n--unique--'), + b'unique', read_chunk_size=2) fp = next(it) - self.assertEqual(fp.readline(), 'ab\r\n') - self.assertEqual(fp.readline(), 'cd\ref\ng') - self.assertEqual(fp.readline(), '') + self.assertEqual(fp.readline(), b'ab\r\n') + self.assertEqual(fp.readline(), b'cd\ref\ng') + self.assertEqual(fp.readline(), b'') fp = next(it) - self.assertEqual(fp.readline(), 'hi\r\n') - self.assertEqual(fp.readline(), '\r\n') - self.assertEqual(fp.readline(), 'jkl\r\n') - exc = None - try: - next(it) - except StopIteration as err: - exc = err - self.assertTrue(exc is not None) + self.assertEqual(fp.readline(), b'hi\r\n') + self.assertEqual(fp.readline(), b'\r\n') + self.assertEqual(fp.readline(), b'jkl\r\n') + self.assertRaises(StopIteration, next, it) class TestParseMimeHeaders(unittest.TestCase): @@ -6230,7 +6605,7 @@ pass def test_hash_for_file_smallish(self): - stub_data = 'some data' + stub_data = b'some data' with open(self.tempfilename, 'wb') as fd: fd.write(stub_data) with mock.patch('swift.common.utils.md5') as mock_md5: @@ -6246,9 +6621,9 @@ block_size = utils.MD5_BLOCK_READ_BYTES truncate = 523 start_char = ord('a') - expected_blocks = [chr(i) * block_size + expected_blocks = [chr(i).encode('utf8') * block_size for i in range(start_char, start_char + num_blocks)] - full_data = ''.join(expected_blocks) + full_data = b''.join(expected_blocks) trimmed_data = full_data[:-truncate] # sanity self.assertEqual(len(trimmed_data), block_size * num_blocks - truncate) @@ -6272,7 +6647,7 @@ else: self.assertEqual(block, expected_block[:-truncate]) found_blocks.append(block) - self.assertEqual(''.join(found_blocks), trimmed_data) + self.assertEqual(b''.join(found_blocks), trimmed_data) def test_hash_for_file_empty(self): with open(self.tempfilename, 'wb'): @@ -6281,14 +6656,14 @@ mock_hasher = mock_md5.return_value rv = utils.md5_hash_for_file(self.tempfilename) self.assertTrue(mock_hasher.hexdigest.called) - self.assertEqual(rv, mock_hasher.hexdigest.return_value) + self.assertIs(rv, mock_hasher.hexdigest.return_value) self.assertEqual([], mock_hasher.update.call_args_list) def test_hash_for_file_brittle(self): data_to_expected_hash = { - '': 'd41d8cd98f00b204e9800998ecf8427e', - 'some data': '1e50210a0202497fb79bc38b6ade6c34', - ('a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3', + b'': 'd41d8cd98f00b204e9800998ecf8427e', + b'some data': '1e50210a0202497fb79bc38b6ade6c34', + (b'a' * 4096 * 10)[:-523]: '06a41551609656c85f14f659055dc6d3', } # unlike some other places where the concrete implementation really # matters for backwards compatibility these brittle tests are probably @@ -6316,8 +6691,8 @@ def setUp(self): self.swift_dir = tempfile.mkdtemp() self.swift_conf = os.path.join(self.swift_dir, 'swift.conf') - self.policy_name = ''.join(random.sample(string.letters, 20)) - with open(self.swift_conf, "wb") as sc: + self.policy_name = ''.join(random.sample(string.ascii_letters, 20)) + with open(self.swift_conf, "wt") as sc: sc.write(''' [swift-hash] swift_hash_path_suffix = changeme @@ -6517,5 +6892,862 @@ eventlet.debug.hub_prevent_multiple_readers(True) +class TestDistributeEvenly(unittest.TestCase): + def test_evenly_divided(self): + out = utils.distribute_evenly(range(12), 3) + self.assertEqual(out, [ + [0, 3, 6, 9], + [1, 4, 7, 10], + [2, 5, 8, 11], + ]) + + out = utils.distribute_evenly(range(12), 4) + self.assertEqual(out, [ + [0, 4, 8], + [1, 5, 9], + [2, 6, 10], + [3, 7, 11], + ]) + + def test_uneven(self): + out = utils.distribute_evenly(range(11), 3) + self.assertEqual(out, [ + [0, 3, 6, 9], + [1, 4, 7, 10], + [2, 5, 8], + ]) + + def test_just_one(self): + out = utils.distribute_evenly(range(5), 1) + self.assertEqual(out, [[0, 1, 2, 3, 4]]) + + def test_more_buckets_than_items(self): + out = utils.distribute_evenly(range(5), 7) + self.assertEqual(out, [[0], [1], [2], [3], [4], [], []]) + + +class TestShardRange(unittest.TestCase): + def setUp(self): + self.ts_iter = make_timestamp_iter() + + def test_min_max_bounds(self): + # max + self.assertEqual(utils.ShardRange.MAX, utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX > utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX < utils.ShardRange.MAX) + + for val in 'z', u'\u00e4': + self.assertFalse(utils.ShardRange.MAX == val) + self.assertFalse(val > utils.ShardRange.MAX) + self.assertTrue(val < utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX > val) + self.assertFalse(utils.ShardRange.MAX < val) + + self.assertEqual('', str(utils.ShardRange.MAX)) + self.assertFalse(utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX == utils.ShardRange.MAX) + self.assertFalse(utils.ShardRange.MAX != utils.ShardRange.MAX) + self.assertTrue( + utils.ShardRange.MaxBound() == utils.ShardRange.MaxBound()) + self.assertFalse( + utils.ShardRange.MaxBound() != utils.ShardRange.MaxBound()) + + # min + self.assertEqual(utils.ShardRange.MIN, utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN > utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN < utils.ShardRange.MIN) + + for val in 'z', u'\u00e4': + self.assertFalse(utils.ShardRange.MIN == val) + self.assertFalse(val < utils.ShardRange.MIN) + self.assertTrue(val > utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN < val) + self.assertFalse(utils.ShardRange.MIN > val) + self.assertFalse(utils.ShardRange.MIN) + + self.assertEqual('', str(utils.ShardRange.MIN)) + self.assertFalse(utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN == utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN != utils.ShardRange.MIN) + self.assertTrue( + utils.ShardRange.MinBound() == utils.ShardRange.MinBound()) + self.assertFalse( + utils.ShardRange.MinBound() != utils.ShardRange.MinBound()) + + self.assertFalse(utils.ShardRange.MAX == utils.ShardRange.MIN) + self.assertFalse(utils.ShardRange.MIN == utils.ShardRange.MAX) + self.assertTrue(utils.ShardRange.MAX != utils.ShardRange.MIN) + self.assertTrue(utils.ShardRange.MIN != utils.ShardRange.MAX) + + self.assertEqual(utils.ShardRange.MAX, + max(utils.ShardRange.MIN, utils.ShardRange.MAX)) + self.assertEqual(utils.ShardRange.MIN, + min(utils.ShardRange.MIN, utils.ShardRange.MAX)) + + def test_shard_range_initialisation(self): + def assert_initialisation_ok(params, expected): + pr = utils.ShardRange(**params) + self.assertDictEqual(dict(pr), expected) + + def assert_initialisation_fails(params, err_type=ValueError): + with self.assertRaises(err_type): + utils.ShardRange(**params) + + ts_1 = next(self.ts_iter) + ts_2 = next(self.ts_iter) + ts_3 = next(self.ts_iter) + ts_4 = next(self.ts_iter) + empty_run = dict(name=None, timestamp=None, lower=None, + upper=None, object_count=0, bytes_used=0, + meta_timestamp=None, deleted=0, + state=utils.ShardRange.FOUND, state_timestamp=None, + epoch=None) + # name, timestamp must be given + assert_initialisation_fails(empty_run.copy()) + assert_initialisation_fails(dict(empty_run, name='a/c'), TypeError) + assert_initialisation_fails(dict(empty_run, timestamp=ts_1)) + # name must be form a/c + assert_initialisation_fails(dict(empty_run, name='c', timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='', timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='/a/c', + timestamp=ts_1)) + assert_initialisation_fails(dict(empty_run, name='/c', + timestamp=ts_1)) + # lower, upper can be None + expect = dict(name='a/c', timestamp=ts_1.internal, lower='', + upper='', object_count=0, bytes_used=0, + meta_timestamp=ts_1.internal, deleted=0, + state=utils.ShardRange.FOUND, + state_timestamp=ts_1.internal, epoch=None) + assert_initialisation_ok(dict(empty_run, name='a/c', timestamp=ts_1), + expect) + assert_initialisation_ok(dict(name='a/c', timestamp=ts_1), expect) + + good_run = dict(name='a/c', timestamp=ts_1, lower='l', + upper='u', object_count=2, bytes_used=10, + meta_timestamp=ts_2, deleted=0, + state=utils.ShardRange.CREATED, + state_timestamp=ts_3.internal, epoch=ts_4) + expect.update({'lower': 'l', 'upper': 'u', 'object_count': 2, + 'bytes_used': 10, 'meta_timestamp': ts_2.internal, + 'state': utils.ShardRange.CREATED, + 'state_timestamp': ts_3.internal, 'epoch': ts_4}) + assert_initialisation_ok(good_run.copy(), expect) + + # obj count and bytes used as int strings + good_str_run = good_run.copy() + good_str_run.update({'object_count': '2', 'bytes_used': '10'}) + assert_initialisation_ok(good_str_run, expect) + + good_no_meta = good_run.copy() + good_no_meta.pop('meta_timestamp') + assert_initialisation_ok(good_no_meta, + dict(expect, meta_timestamp=ts_1.internal)) + + good_deleted = good_run.copy() + good_deleted['deleted'] = 1 + assert_initialisation_ok(good_deleted, + dict(expect, deleted=1)) + + assert_initialisation_fails(dict(good_run, timestamp='water balloon')) + + assert_initialisation_fails( + dict(good_run, meta_timestamp='water balloon')) + + assert_initialisation_fails(dict(good_run, lower='water balloon')) + + assert_initialisation_fails(dict(good_run, upper='balloon')) + + assert_initialisation_fails( + dict(good_run, object_count='water balloon')) + + assert_initialisation_fails(dict(good_run, bytes_used='water ballon')) + + assert_initialisation_fails(dict(good_run, object_count=-1)) + + assert_initialisation_fails(dict(good_run, bytes_used=-1)) + assert_initialisation_fails(dict(good_run, state=-1)) + assert_initialisation_fails(dict(good_run, state_timestamp='not a ts')) + assert_initialisation_fails(dict(good_run, name='/a/c')) + assert_initialisation_fails(dict(good_run, name='/a/c/')) + assert_initialisation_fails(dict(good_run, name='a/c/')) + assert_initialisation_fails(dict(good_run, name='a')) + assert_initialisation_fails(dict(good_run, name='')) + + def _check_to_from_dict(self, lower, upper): + ts_1 = next(self.ts_iter) + ts_2 = next(self.ts_iter) + ts_3 = next(self.ts_iter) + ts_4 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, lower, upper, 10, 100, ts_2, + state=None, state_timestamp=ts_3, epoch=ts_4) + sr_dict = dict(sr) + expected = { + 'name': 'a/test', 'timestamp': ts_1.internal, 'lower': lower, + 'upper': upper, 'object_count': 10, 'bytes_used': 100, + 'meta_timestamp': ts_2.internal, 'deleted': 0, + 'state': utils.ShardRange.FOUND, 'state_timestamp': ts_3.internal, + 'epoch': ts_4} + self.assertEqual(expected, sr_dict) + self.assertIsInstance(sr_dict['lower'], six.string_types) + self.assertIsInstance(sr_dict['upper'], six.string_types) + sr_new = utils.ShardRange.from_dict(sr_dict) + self.assertEqual(sr, sr_new) + self.assertEqual(sr_dict, dict(sr_new)) + + sr_new = utils.ShardRange(**sr_dict) + self.assertEqual(sr, sr_new) + self.assertEqual(sr_dict, dict(sr_new)) + + for key in sr_dict: + bad_dict = dict(sr_dict) + bad_dict.pop(key) + with self.assertRaises(KeyError): + utils.ShardRange.from_dict(bad_dict) + # But __init__ still (generally) works! + if key not in ('name', 'timestamp'): + utils.ShardRange(**bad_dict) + else: + with self.assertRaises(TypeError): + utils.ShardRange(**bad_dict) + + def test_to_from_dict(self): + self._check_to_from_dict('l', 'u') + self._check_to_from_dict('', '') + + def test_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + + ts_2 = next(self.ts_iter) + sr.timestamp = ts_2 + self.assertEqual(ts_2, sr.timestamp) + + sr.timestamp = 0 + self.assertEqual(utils.Timestamp(0), sr.timestamp) + + with self.assertRaises(TypeError): + sr.timestamp = None + + def test_meta_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_1, sr.meta_timestamp) + + ts_2 = next(self.ts_iter) + sr.meta_timestamp = ts_2 + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_2, sr.meta_timestamp) + + ts_3 = next(self.ts_iter) + sr.timestamp = ts_3 + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_2, sr.meta_timestamp) + + # meta_timestamp defaults to tracking timestamp + sr.meta_timestamp = None + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_3, sr.meta_timestamp) + ts_4 = next(self.ts_iter) + sr.timestamp = ts_4 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(ts_4, sr.meta_timestamp) + + sr.meta_timestamp = 0 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(utils.Timestamp(0), sr.meta_timestamp) + + def test_update_meta(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.update_meta(9, 99) + self.assertEqual(9, sr.object_count) + self.assertEqual(99, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.update_meta(99, 999, None) + self.assertEqual(99, sr.object_count) + self.assertEqual(999, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + ts_2 = next(self.ts_iter) + sr.update_meta(21, 2112, ts_2) + self.assertEqual(21, sr.object_count) + self.assertEqual(2112, sr.bytes_used) + self.assertEqual(ts_2, sr.meta_timestamp) + + sr.update_meta('11', '12') + self.assertEqual(11, sr.object_count) + self.assertEqual(12, sr.bytes_used) + + def check_bad_args(*args): + with self.assertRaises(ValueError): + sr.update_meta(*args) + check_bad_args('bad', 10) + check_bad_args(10, 'bad') + check_bad_args(10, 11, 'bad') + + def test_increment_meta(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 1, 2, None) + with mock_timestamp_now(next(self.ts_iter)) as now: + sr.increment_meta(9, 99) + self.assertEqual(10, sr.object_count) + self.assertEqual(101, sr.bytes_used) + self.assertEqual(now, sr.meta_timestamp) + + sr.increment_meta('11', '12') + self.assertEqual(21, sr.object_count) + self.assertEqual(113, sr.bytes_used) + + def check_bad_args(*args): + with self.assertRaises(ValueError): + sr.increment_meta(*args) + check_bad_args('bad', 10) + check_bad_args(10, 'bad') + + def test_state_timestamp_setter(self): + ts_1 = next(self.ts_iter) + sr = utils.ShardRange('a/test', ts_1, 'l', 'u', 0, 0, None) + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_1, sr.state_timestamp) + + ts_2 = next(self.ts_iter) + sr.state_timestamp = ts_2 + self.assertEqual(ts_1, sr.timestamp) + self.assertEqual(ts_2, sr.state_timestamp) + + ts_3 = next(self.ts_iter) + sr.timestamp = ts_3 + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_2, sr.state_timestamp) + + # state_timestamp defaults to tracking timestamp + sr.state_timestamp = None + self.assertEqual(ts_3, sr.timestamp) + self.assertEqual(ts_3, sr.state_timestamp) + ts_4 = next(self.ts_iter) + sr.timestamp = ts_4 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(ts_4, sr.state_timestamp) + + sr.state_timestamp = 0 + self.assertEqual(ts_4, sr.timestamp) + self.assertEqual(utils.Timestamp(0), sr.state_timestamp) + + def test_state_setter(self): + for state in utils.ShardRange.STATES: + for test_value in (state, str(state)): + sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u') + sr.state = test_value + actual = sr.state + self.assertEqual( + state, actual, + 'Expected %s but got %s for %s' % + (state, actual, test_value) + ) + + for bad_state in (max(utils.ShardRange.STATES) + 1, + -1, 99, None, 'stringy', 1.1): + sr = utils.ShardRange('a/test', next(self.ts_iter), 'l', 'u') + with self.assertRaises(ValueError) as cm: + sr.state = bad_state + self.assertIn('Invalid state', str(cm.exception)) + + def test_update_state(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + old_sr = sr.copy() + self.assertEqual(utils.ShardRange.FOUND, sr.state) + self.assertEqual(dict(sr), dict(old_sr)) # sanity check + + for state in utils.ShardRange.STATES: + if state == utils.ShardRange.FOUND: + continue + self.assertTrue(sr.update_state(state)) + self.assertEqual(dict(old_sr, state=state), dict(sr)) + self.assertFalse(sr.update_state(state)) + self.assertEqual(dict(old_sr, state=state), dict(sr)) + + sr = utils.ShardRange('a/c', next(self.ts_iter)) + old_sr = sr.copy() + for state in utils.ShardRange.STATES: + ts = next(self.ts_iter) + self.assertTrue(sr.update_state(state, state_timestamp=ts)) + self.assertEqual(dict(old_sr, state=state, state_timestamp=ts), + dict(sr)) + + def test_resolve_state(self): + for name, number in utils.ShardRange.STATES_BY_NAME.items(): + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name)) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name.upper())) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(name.title())) + self.assertEqual( + (number, name), utils.ShardRange.resolve_state(number)) + + def check_bad_value(value): + with self.assertRaises(ValueError) as cm: + utils.ShardRange.resolve_state(value) + self.assertIn('Invalid state %r' % value, str(cm.exception)) + + check_bad_value(min(utils.ShardRange.STATES) - 1) + check_bad_value(max(utils.ShardRange.STATES) + 1) + check_bad_value('badstate') + + def test_epoch_setter(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + self.assertIsNone(sr.epoch) + ts = next(self.ts_iter) + sr.epoch = ts + self.assertEqual(ts, sr.epoch) + ts = next(self.ts_iter) + sr.epoch = ts.internal + self.assertEqual(ts, sr.epoch) + sr.epoch = None + self.assertIsNone(sr.epoch) + with self.assertRaises(ValueError): + sr.epoch = 'bad' + + def test_deleted_setter(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + for val in (True, 1): + sr.deleted = val + self.assertIs(True, sr.deleted) + for val in (False, 0, None): + sr.deleted = val + self.assertIs(False, sr.deleted) + + def test_set_deleted(self): + sr = utils.ShardRange('a/c', next(self.ts_iter)) + # initialise other timestamps + sr.update_state(utils.ShardRange.ACTIVE, + state_timestamp=utils.Timestamp.now()) + sr.update_meta(1, 2) + old_sr = sr.copy() + self.assertIs(False, sr.deleted) # sanity check + self.assertEqual(dict(sr), dict(old_sr)) # sanity check + + with mock_timestamp_now(next(self.ts_iter)) as now: + self.assertTrue(sr.set_deleted()) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + old_sr_dict = dict(old_sr) + old_sr_dict.pop('deleted') + old_sr_dict.pop('timestamp') + sr_dict = dict(sr) + sr_dict.pop('deleted') + sr_dict.pop('timestamp') + self.assertEqual(old_sr_dict, sr_dict) + + # no change + self.assertFalse(sr.set_deleted()) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + + # force timestamp change + with mock_timestamp_now(next(self.ts_iter)) as now: + self.assertTrue(sr.set_deleted(timestamp=now)) + self.assertEqual(now, sr.timestamp) + self.assertIs(True, sr.deleted) + + def test_lower_setter(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', '') + # sanity checks + self.assertEqual('b', sr.lower) + self.assertEqual(sr.MAX, sr.upper) + + def do_test(good_value, expected): + sr.lower = good_value + self.assertEqual(expected, sr.lower) + self.assertEqual(sr.MAX, sr.upper) + + do_test(utils.ShardRange.MIN, utils.ShardRange.MIN) + do_test(utils.ShardRange.MAX, utils.ShardRange.MAX) + do_test('', utils.ShardRange.MIN) + do_test(u'', utils.ShardRange.MIN) + do_test(None, utils.ShardRange.MIN) + do_test('a', 'a') + do_test('y', 'y') + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + sr.lower = '' + self.assertEqual(sr.MIN, sr.lower) + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + with self.assertRaises(ValueError) as cm: + sr.lower = 'z' + self.assertIn("lower ('z') must be less than or equal to upper ('y')", + str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(bad_value): + with self.assertRaises(TypeError) as cm: + sr.lower = bad_value + self.assertIn("lower must be a string", str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + do_test(1) + do_test(1.234) + + def test_upper_setter(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y') + # sanity checks + self.assertEqual(sr.MIN, sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(good_value, expected): + sr.upper = good_value + self.assertEqual(expected, sr.upper) + self.assertEqual(sr.MIN, sr.lower) + + do_test(utils.ShardRange.MIN, utils.ShardRange.MIN) + do_test(utils.ShardRange.MAX, utils.ShardRange.MAX) + do_test('', utils.ShardRange.MAX) + do_test(u'', utils.ShardRange.MAX) + do_test(None, utils.ShardRange.MAX) + do_test('z', 'z') + do_test('b', 'b') + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + sr.upper = '' + self.assertEqual(sr.MAX, sr.upper) + + sr = utils.ShardRange('a/c', utils.Timestamp.now(), 'b', 'y') + with self.assertRaises(ValueError) as cm: + sr.upper = 'a' + self.assertIn( + "upper ('a') must be greater than or equal to lower ('b')", + str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + def do_test(bad_value): + with self.assertRaises(TypeError) as cm: + sr.upper = bad_value + self.assertIn("upper must be a string", str(cm.exception)) + self.assertEqual('b', sr.lower) + self.assertEqual('y', sr.upper) + + do_test(1) + do_test(1.234) + + def test_end_marker(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', 'y') + self.assertEqual('y\x00', sr.end_marker) + sr = utils.ShardRange('a/c', utils.Timestamp.now(), '', '') + self.assertEqual('', sr.end_marker) + + def test_bounds_serialization(self): + sr = utils.ShardRange('a/c', utils.Timestamp.now()) + self.assertEqual('a/c', sr.name) + self.assertEqual(utils.ShardRange.MIN, sr.lower) + self.assertEqual('', sr.lower_str) + self.assertEqual(utils.ShardRange.MAX, sr.upper) + self.assertEqual('', sr.upper_str) + self.assertEqual('', sr.end_marker) + + lower = u'\u00e4' + upper = u'\u00fb' + sr = utils.ShardRange('a/%s-%s' % (lower, upper), + utils.Timestamp.now(), lower, upper) + if six.PY3: + self.assertEqual(u'\u00e4', sr.lower) + self.assertEqual(u'\u00e4', sr.lower_str) + self.assertEqual(u'\u00fb', sr.upper) + self.assertEqual(u'\u00fb', sr.upper_str) + self.assertEqual(u'\u00fb\x00', sr.end_marker) + else: + self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower) + self.assertEqual(u'\u00e4'.encode('utf8'), sr.lower_str) + self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper) + self.assertEqual(u'\u00fb'.encode('utf8'), sr.upper_str) + self.assertEqual(u'\u00fb\x00'.encode('utf8'), sr.end_marker) + + def test_entire_namespace(self): + # test entire range (no boundaries) + entire = utils.ShardRange('a/test', utils.Timestamp.now()) + self.assertEqual(utils.ShardRange.MAX, entire.upper) + self.assertEqual(utils.ShardRange.MIN, entire.lower) + self.assertIs(True, entire.entire_namespace()) + + for x in range(100): + self.assertTrue(str(x) in entire) + self.assertTrue(chr(x) in entire) + + for x in ('a', 'z', 'zzzz', '124fsdf', u'\u00e4'): + self.assertTrue(x in entire, '%r should be in %r' % (x, entire)) + + entire.lower = 'a' + self.assertIs(False, entire.entire_namespace()) + + def test_comparisons(self): + ts = utils.Timestamp.now().internal + + # upper (if provided) *must* be greater than lower + with self.assertRaises(ValueError): + utils.ShardRange('f-a', ts, 'f', 'a') + + # test basic boundaries + btoc = utils.ShardRange('a/b-c', ts, 'b', 'c') + atof = utils.ShardRange('a/a-f', ts, 'a', 'f') + ftol = utils.ShardRange('a/f-l', ts, 'f', 'l') + ltor = utils.ShardRange('a/l-r', ts, 'l', 'r') + rtoz = utils.ShardRange('a/r-z', ts, 'r', 'z') + lower = utils.ShardRange('a/lower', ts, '', 'mid') + upper = utils.ShardRange('a/upper', ts, 'mid', '') + entire = utils.ShardRange('a/test', utils.Timestamp.now()) + + # overlapping ranges + dtof = utils.ShardRange('a/d-f', ts, 'd', 'f') + dtom = utils.ShardRange('a/d-m', ts, 'd', 'm') + + # test range > and < + # non-adjacent + self.assertFalse(rtoz < atof) + self.assertTrue(atof < ltor) + self.assertTrue(ltor > atof) + self.assertFalse(ftol > rtoz) + + # adjacent + self.assertFalse(rtoz < ltor) + self.assertTrue(ltor < rtoz) + self.assertFalse(ltor > rtoz) + self.assertTrue(rtoz > ltor) + + # wholly within + self.assertFalse(btoc < atof) + self.assertFalse(btoc > atof) + self.assertFalse(atof < btoc) + self.assertFalse(atof > btoc) + + self.assertFalse(atof < dtof) + self.assertFalse(dtof > atof) + self.assertFalse(atof > dtof) + self.assertFalse(dtof < atof) + + self.assertFalse(dtof < dtom) + self.assertFalse(dtof > dtom) + self.assertFalse(dtom > dtof) + self.assertFalse(dtom < dtof) + + # overlaps + self.assertFalse(atof < dtom) + self.assertFalse(atof > dtom) + self.assertFalse(ltor > dtom) + + # ranges including min/max bounds + self.assertTrue(upper > lower) + self.assertTrue(lower < upper) + self.assertFalse(upper < lower) + self.assertFalse(lower > upper) + + self.assertFalse(lower < entire) + self.assertFalse(entire > lower) + self.assertFalse(lower > entire) + self.assertFalse(entire < lower) + + self.assertFalse(upper < entire) + self.assertFalse(entire > upper) + self.assertFalse(upper > entire) + self.assertFalse(entire < upper) + + self.assertFalse(entire < entire) + self.assertFalse(entire > entire) + + # test range < and > to an item + # range is > lower and <= upper to lower boundary isn't + # actually included + self.assertTrue(ftol > 'f') + self.assertFalse(atof < 'f') + self.assertTrue(ltor < 'y') + + self.assertFalse(ftol < 'f') + self.assertFalse(atof > 'f') + self.assertFalse(ltor > 'y') + + self.assertTrue('f' < ftol) + self.assertFalse('f' > atof) + self.assertTrue('y' > ltor) + + self.assertFalse('f' > ftol) + self.assertFalse('f' < atof) + self.assertFalse('y' < ltor) + + # Now test ranges with only 1 boundary + start_to_l = utils.ShardRange('a/None-l', ts, '', 'l') + l_to_end = utils.ShardRange('a/l-None', ts, 'l', '') + + for x in ('l', 'm', 'z', 'zzz1231sd'): + if x == 'l': + self.assertFalse(x in l_to_end) + self.assertFalse(start_to_l < x) + self.assertFalse(x > start_to_l) + else: + self.assertTrue(x in l_to_end) + self.assertTrue(start_to_l < x) + self.assertTrue(x > start_to_l) + + # Now test some of the range to range checks with missing boundaries + self.assertFalse(atof < start_to_l) + self.assertFalse(start_to_l < entire) + + # Now test ShardRange.overlaps(other) + self.assertTrue(atof.overlaps(atof)) + self.assertFalse(atof.overlaps(ftol)) + self.assertFalse(ftol.overlaps(atof)) + self.assertTrue(atof.overlaps(dtof)) + self.assertTrue(dtof.overlaps(atof)) + self.assertFalse(dtof.overlaps(ftol)) + self.assertTrue(dtom.overlaps(ftol)) + self.assertTrue(ftol.overlaps(dtom)) + self.assertFalse(start_to_l.overlaps(l_to_end)) + + def test_contains(self): + ts = utils.Timestamp.now().internal + lower = utils.ShardRange('a/-h', ts, '', 'h') + mid = utils.ShardRange('a/h-p', ts, 'h', 'p') + upper = utils.ShardRange('a/p-', ts, 'p', '') + entire = utils.ShardRange('a/all', ts, '', '') + + self.assertTrue('a' in entire) + self.assertTrue('x' in entire) + + # the empty string is not a valid object name, so it cannot be in any + # range + self.assertFalse('' in lower) + self.assertFalse('' in upper) + self.assertFalse('' in entire) + + self.assertTrue('a' in lower) + self.assertTrue('h' in lower) + self.assertFalse('i' in lower) + + self.assertFalse('h' in mid) + self.assertTrue('p' in mid) + + self.assertFalse('p' in upper) + self.assertTrue('x' in upper) + + self.assertIn(utils.ShardRange.MAX, entire) + self.assertNotIn(utils.ShardRange.MAX, lower) + self.assertIn(utils.ShardRange.MAX, upper) + + # lower bound is excluded so MIN cannot be in any range. + self.assertNotIn(utils.ShardRange.MIN, entire) + self.assertNotIn(utils.ShardRange.MIN, upper) + self.assertNotIn(utils.ShardRange.MIN, lower) + + def test_includes(self): + ts = utils.Timestamp.now().internal + _to_h = utils.ShardRange('a/-h', ts, '', 'h') + d_to_t = utils.ShardRange('a/d-t', ts, 'd', 't') + d_to_k = utils.ShardRange('a/d-k', ts, 'd', 'k') + e_to_l = utils.ShardRange('a/e-l', ts, 'e', 'l') + k_to_t = utils.ShardRange('a/k-t', ts, 'k', 't') + p_to_ = utils.ShardRange('a/p-', ts, 'p', '') + t_to_ = utils.ShardRange('a/t-', ts, 't', '') + entire = utils.ShardRange('a/all', ts, '', '') + + self.assertTrue(entire.includes(entire)) + self.assertTrue(d_to_t.includes(d_to_t)) + self.assertTrue(_to_h.includes(_to_h)) + self.assertTrue(p_to_.includes(p_to_)) + + self.assertTrue(entire.includes(_to_h)) + self.assertTrue(entire.includes(d_to_t)) + self.assertTrue(entire.includes(p_to_)) + + self.assertTrue(d_to_t.includes(d_to_k)) + self.assertTrue(d_to_t.includes(e_to_l)) + self.assertTrue(d_to_t.includes(k_to_t)) + self.assertTrue(p_to_.includes(t_to_)) + + self.assertFalse(_to_h.includes(d_to_t)) + self.assertFalse(p_to_.includes(d_to_t)) + self.assertFalse(k_to_t.includes(d_to_k)) + self.assertFalse(d_to_k.includes(e_to_l)) + self.assertFalse(k_to_t.includes(e_to_l)) + self.assertFalse(t_to_.includes(p_to_)) + + self.assertFalse(_to_h.includes(entire)) + self.assertFalse(p_to_.includes(entire)) + self.assertFalse(d_to_t.includes(entire)) + + def test_repr(self): + ts = next(self.ts_iter) + ts.offset = 1234 + meta_ts = next(self.ts_iter) + state_ts = next(self.ts_iter) + sr = utils.ShardRange('a/c', ts, 'l', 'u', 100, 1000, + meta_timestamp=meta_ts, + state=utils.ShardRange.ACTIVE, + state_timestamp=state_ts) + self.assertEqual( + "ShardRange<'l' to 'u' as of %s, (100, 1000) as of %s, " + "active as of %s>" + % (ts.internal, meta_ts.internal, state_ts.internal), str(sr)) + + ts.offset = 0 + meta_ts.offset = 2 + state_ts.offset = 3 + sr = utils.ShardRange('a/c', ts, '', '', 100, 1000, + meta_timestamp=meta_ts, + state=utils.ShardRange.FOUND, + state_timestamp=state_ts) + self.assertEqual( + "ShardRange" + % (ts.internal, meta_ts.internal, state_ts.internal), str(sr)) + + def test_copy(self): + sr = utils.ShardRange('a/c', next(self.ts_iter), 'x', 'y', 99, 99000, + meta_timestamp=next(self.ts_iter), + state=utils.ShardRange.CREATED, + state_timestamp=next(self.ts_iter)) + new = sr.copy() + self.assertEqual(dict(sr), dict(new)) + + new = sr.copy(deleted=1) + self.assertEqual(dict(sr, deleted=1), dict(new)) + + new_timestamp = next(self.ts_iter) + new = sr.copy(timestamp=new_timestamp) + self.assertEqual(dict(sr, timestamp=new_timestamp.internal, + meta_timestamp=new_timestamp.internal, + state_timestamp=new_timestamp.internal), + dict(new)) + + new = sr.copy(timestamp=new_timestamp, object_count=99) + self.assertEqual(dict(sr, timestamp=new_timestamp.internal, + meta_timestamp=new_timestamp.internal, + state_timestamp=new_timestamp.internal, + object_count=99), + dict(new)) + + def test_make_path(self): + ts = utils.Timestamp.now() + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 0) + parent_hash = hashlib.md5(b'parent').hexdigest() + self.assertEqual('a/root-%s-%s-0' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 3) + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, '3') + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path( + 'a', 'root', 'parent', ts.internal, '3') + self.assertEqual('a/root-%s-%s-3' % (parent_hash, ts.internal), actual) + actual = utils.ShardRange.make_path('a', 'root', 'parent', ts, 'foo') + self.assertEqual('a/root-%s-%s-foo' % (parent_hash, ts.internal), + actual) + + if __name__ == '__main__': unittest.main() diff -Nru swift-2.17.0/test/unit/common/test_wsgi.py swift-2.18.0/test/unit/common/test_wsgi.py --- swift-2.17.0/test/unit/common/test_wsgi.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/common/test_wsgi.py 2018-05-30 10:17:02.000000000 +0000 @@ -15,6 +15,7 @@ """Tests for swift.common.wsgi""" +from argparse import Namespace import errno import logging import socket @@ -22,6 +23,9 @@ import os from textwrap import dedent from collections import defaultdict +import types + +import eventlet.wsgi import six from six import BytesIO @@ -203,7 +207,7 @@ conf_file = os.path.join(tempdir, 'file.conf') def _write_and_load_conf_file(conf): - with open(conf_file, 'wb') as fd: + with open(conf_file, 'wt') as fd: fd.write(dedent(conf)) return wsgi.load_app_config(conf_file) @@ -474,21 +478,13 @@ with mock.patch('swift.proxy.server.Application.' 'modify_wsgi_pipeline'), \ mock.patch('swift.common.wsgi.wsgi') as _wsgi, \ - mock.patch('swift.common.wsgi.eventlet') as _wsgi_evt, \ - mock.patch('swift.common.utils.eventlet') as _utils_evt, \ - mock.patch('swift.common.wsgi.inspect'): + mock.patch('swift.common.wsgi.eventlet') as _wsgi_evt: conf = wsgi.appconfig(conf_file) logger = logging.getLogger('test') sock = listen_zero() wsgi.run_server(conf, logger, sock) - self.assertEqual('HTTP/1.0', - _wsgi.HttpProtocol.default_request_version) self.assertEqual(30, _wsgi.WRITE_TIMEOUT) _wsgi_evt.hubs.use_hub.assert_called_with(utils.get_hub()) - _utils_evt.patcher.monkey_patch.assert_called_with(all=False, - socket=True, - select=True, - thread=True) _wsgi_evt.debug.hub_exceptions.assert_called_with(False) self.assertTrue(_wsgi.server.called) args, kwargs = _wsgi.server.call_args @@ -500,9 +496,15 @@ self.assertTrue('custom_pool' in kwargs) self.assertEqual(1000, kwargs['custom_pool'].size) - def test_run_server_with_latest_eventlet(self): + proto_class = kwargs['protocol'] + self.assertEqual(proto_class, wsgi.SwiftHttpProtocol) + self.assertEqual('HTTP/1.0', proto_class.default_request_version) + + def test_run_server_proxied(self): config = """ [DEFAULT] + client_timeout = 30 + max_clients = 1000 swift_dir = TEMPDIR [pipeline:main] @@ -510,10 +512,55 @@ [app:proxy-server] use = egg:swift#proxy + # these "set" values override defaults + set client_timeout = 20 + set max_clients = 10 + require_proxy_protocol = true """ - def argspec_stub(server): - return mock.MagicMock(args=['capitalize_response_headers']) + contents = dedent(config) + with temptree(['proxy-server.conf']) as t: + conf_file = os.path.join(t, 'proxy-server.conf') + with open(conf_file, 'w') as f: + f.write(contents.replace('TEMPDIR', t)) + _fake_rings(t) + with mock.patch('swift.proxy.server.Application.' + 'modify_wsgi_pipeline'), \ + mock.patch('swift.common.wsgi.wsgi') as _wsgi, \ + mock.patch('swift.common.wsgi.eventlet') as _eventlet: + conf = wsgi.appconfig(conf_file, + name='proxy-server') + logger = logging.getLogger('test') + sock = listen_zero() + wsgi.run_server(conf, logger, sock) + self.assertEqual(20, _wsgi.WRITE_TIMEOUT) + _eventlet.hubs.use_hub.assert_called_with(utils.get_hub()) + _eventlet.debug.hub_exceptions.assert_called_with(False) + self.assertTrue(_wsgi.server.called) + args, kwargs = _wsgi.server.call_args + server_sock, server_app, server_logger = args + self.assertEqual(sock, server_sock) + self.assertTrue(isinstance(server_app, swift.proxy.server.Application)) + self.assertEqual(20, server_app.client_timeout) + self.assertTrue(isinstance(server_logger, wsgi.NullLogger)) + self.assertTrue('custom_pool' in kwargs) + self.assertEqual(10, kwargs['custom_pool'].size) + + proto_class = kwargs['protocol'] + self.assertEqual(proto_class, wsgi.SwiftHttpProxiedProtocol) + self.assertEqual('HTTP/1.0', proto_class.default_request_version) + + def test_run_server_with_latest_eventlet(self): + config = """ + [DEFAULT] + swift_dir = TEMPDIR + + [pipeline:main] + pipeline = proxy-server + + [app:proxy-server] + use = egg:swift#proxy + """ contents = dedent(config) with temptree(['proxy-server.conf']) as t: @@ -524,9 +571,7 @@ with mock.patch('swift.proxy.server.Application.' 'modify_wsgi_pipeline'), \ mock.patch('swift.common.wsgi.wsgi') as _wsgi, \ - mock.patch('swift.common.wsgi.eventlet'), \ - mock.patch('swift.common.wsgi.inspect', - getargspec=argspec_stub): + mock.patch('swift.common.wsgi.eventlet'): conf = wsgi.appconfig(conf_file) logger = logging.getLogger('test') sock = listen_zero() @@ -535,6 +580,9 @@ self.assertTrue(_wsgi.server.called) args, kwargs = _wsgi.server.call_args self.assertEqual(kwargs.get('capitalize_response_headers'), False) + self.assertTrue('protocol' in kwargs) + self.assertEqual('HTTP/1.0', + kwargs['protocol'].default_request_version) def test_run_server_conf_dir(self): config_dir = { @@ -562,9 +610,7 @@ 'modify_wsgi_pipeline'), \ mock.patch('swift.common.wsgi.wsgi') as _wsgi, \ mock.patch('swift.common.wsgi.eventlet') as _wsgi_evt, \ - mock.patch('swift.common.utils.eventlet') as _utils_evt, \ mock.patch.dict('os.environ', {'TZ': ''}), \ - mock.patch('swift.common.wsgi.inspect'), \ mock.patch('time.tzset'): conf = wsgi.appconfig(conf_dir) logger = logging.getLogger('test') @@ -572,14 +618,8 @@ wsgi.run_server(conf, logger, sock) self.assertTrue(os.environ['TZ'] is not '') - self.assertEqual('HTTP/1.0', - _wsgi.HttpProtocol.default_request_version) self.assertEqual(30, _wsgi.WRITE_TIMEOUT) _wsgi_evt.hubs.use_hub.assert_called_with(utils.get_hub()) - _utils_evt.patcher.monkey_patch.assert_called_with(all=False, - socket=True, - select=True, - thread=True) _wsgi_evt.debug.hub_exceptions.assert_called_with(False) self.assertTrue(_wsgi.server.called) args, kwargs = _wsgi.server.call_args @@ -588,6 +628,9 @@ self.assertTrue(isinstance(server_app, swift.proxy.server.Application)) self.assertTrue(isinstance(server_logger, wsgi.NullLogger)) self.assertTrue('custom_pool' in kwargs) + self.assertTrue('protocol' in kwargs) + self.assertEqual('HTTP/1.0', + kwargs['protocol'].default_request_version) def test_run_server_debug(self): config = """ @@ -617,7 +660,6 @@ with mock.patch('swift.proxy.server.Application.' 'modify_wsgi_pipeline'), \ mock.patch('swift.common.wsgi.wsgi') as _wsgi, \ - mock.patch('swift.common.utils.eventlet') as _utils_evt, \ mock.patch('swift.common.wsgi.eventlet') as _wsgi_evt: mock_server = _wsgi.server _wsgi.server = lambda *args, **kwargs: mock_server( @@ -626,14 +668,8 @@ logger = logging.getLogger('test') sock = listen_zero() wsgi.run_server(conf, logger, sock) - self.assertEqual('HTTP/1.0', - _wsgi.HttpProtocol.default_request_version) self.assertEqual(30, _wsgi.WRITE_TIMEOUT) _wsgi_evt.hubs.use_hub.assert_called_with(utils.get_hub()) - _utils_evt.patcher.monkey_patch.assert_called_with(all=False, - socket=True, - select=True, - thread=True) _wsgi_evt.debug.hub_exceptions.assert_called_with(True) self.assertTrue(mock_server.called) args, kwargs = mock_server.call_args @@ -644,6 +680,9 @@ self.assertIsNone(server_logger) self.assertTrue('custom_pool' in kwargs) self.assertEqual(1000, kwargs['custom_pool'].size) + self.assertTrue('protocol' in kwargs) + self.assertEqual('HTTP/1.0', + kwargs['protocol'].default_request_version) def test_appconfig_dir_ignores_hidden_files(self): config_dir = { @@ -674,12 +713,12 @@ oldenv = {} newenv = wsgi.make_pre_authed_env(oldenv) self.assertTrue('wsgi.input' in newenv) - self.assertEqual(newenv['wsgi.input'].read(), '') + self.assertEqual(newenv['wsgi.input'].read(), b'') oldenv = {'wsgi.input': BytesIO(b'original wsgi.input')} newenv = wsgi.make_pre_authed_env(oldenv) self.assertTrue('wsgi.input' in newenv) - self.assertEqual(newenv['wsgi.input'].read(), '') + self.assertEqual(newenv['wsgi.input'].read(), b'') oldenv = {'swift.source': 'UT'} newenv = wsgi.make_pre_authed_env(oldenv) @@ -692,7 +731,7 @@ def test_pre_auth_req(self): class FakeReq(object): @classmethod - def fake_blank(cls, path, environ=None, body='', headers=None): + def fake_blank(cls, path, environ=None, body=b'', headers=None): if environ is None: environ = {} if headers is None: @@ -702,7 +741,7 @@ was_blank = Request.blank Request.blank = FakeReq.fake_blank wsgi.make_pre_authed_request({'HTTP_X_TRANS_ID': '1234'}, - 'PUT', '/', body='tester', headers={}) + 'PUT', '/', body=b'tester', headers={}) wsgi.make_pre_authed_request({'HTTP_X_TRANS_ID': '1234'}, 'PUT', '/', headers={}) Request.blank = was_blank @@ -710,7 +749,7 @@ def test_pre_auth_req_with_quoted_path(self): r = wsgi.make_pre_authed_request( {'HTTP_X_TRANS_ID': '1234'}, 'PUT', path=quote('/a space'), - body='tester', headers={}) + body=b'tester', headers={}) self.assertEqual(r.path, quote('/a space')) def test_pre_auth_req_drops_query(self): @@ -726,8 +765,8 @@ def test_pre_auth_req_with_body(self): r = wsgi.make_pre_authed_request( - {'QUERY_STRING': 'original'}, 'GET', 'path', 'the body') - self.assertEqual(r.body, 'the body') + {'QUERY_STRING': 'original'}, 'GET', 'path', b'the body') + self.assertEqual(r.body, b'the body') def test_pre_auth_creates_script_name(self): e = wsgi.make_pre_authed_env({}) @@ -745,9 +784,9 @@ def test_pre_auth_req_swift_source(self): r = wsgi.make_pre_authed_request( - {'QUERY_STRING': 'original'}, 'GET', 'path', 'the body', + {'QUERY_STRING': 'original'}, 'GET', 'path', b'the body', swift_source='UT') - self.assertEqual(r.body, 'the body') + self.assertEqual(r.body, b'the body') self.assertEqual(r.environ['swift.source'], 'UT') def test_run_server_global_conf_callback(self): @@ -777,12 +816,17 @@ mock.patch.object(wsgi, 'drop_privileges'), \ mock.patch.object(wsgi, 'loadapp', _loadapp), \ mock.patch.object(wsgi, 'capture_stdio'), \ - mock.patch.object(wsgi, 'run_server'): + mock.patch.object(wsgi, 'run_server'), \ + mock.patch('swift.common.utils.eventlet') as _utils_evt: wsgi.run_wsgi('conf_file', 'app_section', global_conf_callback=_global_conf_callback) self.assertEqual(calls['_global_conf_callback'], 1) self.assertEqual(calls['_loadapp'], 1) + _utils_evt.patcher.monkey_patch.assert_called_with(all=False, + socket=True, + select=True, + thread=True) def test_run_server_success(self): calls = defaultdict(lambda: 0) @@ -802,11 +846,16 @@ mock.patch.object(wsgi, 'drop_privileges'), \ mock.patch.object(wsgi, 'loadapp', _loadapp), \ mock.patch.object(wsgi, 'capture_stdio'), \ - mock.patch.object(wsgi, 'run_server'): + mock.patch.object(wsgi, 'run_server'), \ + mock.patch('swift.common.utils.eventlet') as _utils_evt: rc = wsgi.run_wsgi('conf_file', 'app_section') self.assertEqual(calls['_initrp'], 1) self.assertEqual(calls['_loadapp'], 1) self.assertEqual(rc, 0) + _utils_evt.patcher.monkey_patch.assert_called_with(all=False, + socket=True, + select=True, + thread=True) @mock.patch('swift.common.wsgi.run_server') @mock.patch('swift.common.wsgi.WorkersStrategy') @@ -953,6 +1002,193 @@ self.assertIs(newenv.get('swift.infocache'), oldenv['swift.infocache']) +class TestSwiftHttpProtocol(unittest.TestCase): + def setUp(self): + patcher = mock.patch('swift.common.wsgi.wsgi.HttpProtocol') + self.mock_super = patcher.start() + self.addCleanup(patcher.stop) + + def _proto_obj(self): + # Make an object we can exercise... note the base class's __init__() + # does a bunch of work, so we just new up an object like eventlet.wsgi + # does. + proto_class = wsgi.SwiftHttpProtocol + try: + the_obj = types.InstanceType(proto_class) + except AttributeError: + the_obj = proto_class.__new__(proto_class) + # Install some convenience mocks + the_obj.server = Namespace(app=Namespace(logger=mock.Mock()), + url_length_limit=777, + log=mock.Mock()) + the_obj.send_error = mock.Mock() + + return the_obj + + def test_swift_http_protocol_log_request(self): + proto_obj = self._proto_obj() + self.assertEqual(None, proto_obj.log_request('ignored')) + + def test_swift_http_protocol_log_message(self): + proto_obj = self._proto_obj() + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.error('ERROR WSGI: a%sc', 'b')], + proto_obj.server.app.logger.mock_calls) + + def test_swift_http_protocol_log_message_no_logger(self): + # If the app somehow had no logger attribute or it was None, don't blow + # up + proto_obj = self._proto_obj() + delattr(proto_obj.server.app, 'logger') + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.error('ERROR WSGI: a%sc', 'b')], + proto_obj.server.log.mock_calls) + + proto_obj.server.log.reset_mock() + proto_obj.server.app.logger = None + + proto_obj.log_message('a%sc', 'b') + self.assertEqual([mock.call.error('ERROR WSGI: a%sc', 'b')], + proto_obj.server.log.mock_calls) + + def test_swift_http_protocol_parse_request_no_proxy(self): + proto_obj = self._proto_obj() + proto_obj.raw_requestline = b'jimmy jam' + proto_obj.client_address = ('a', '123') + + self.assertEqual(False, proto_obj.parse_request()) + + self.assertEqual([], self.mock_super.mock_calls) + self.assertEqual([ + mock.call(400, "Bad HTTP/0.9 request type ('jimmy')"), + ], proto_obj.send_error.mock_calls) + self.assertEqual(('a', '123'), proto_obj.client_address) + + +class TestProxyProtocol(unittest.TestCase): + def _run_bytes_through_protocol(self, bytes_from_client, protocol_class): + rfile = BytesIO(bytes_from_client) + wfile = BytesIO() + + # All this fakery is needed to make the WSGI server process one + # connection, possibly with multiple requests, in the main + # greenthread. It doesn't hurt correctness if the function is called + # in a separate greenthread, but it makes using the debugger harder. + class FakeGreenthread(object): + def link(self, a_callable, *args): + a_callable(self, *args) + + class FakePool(object): + def spawn(self, a_callable, *args, **kwargs): + a_callable(*args, **kwargs) + return FakeGreenthread() + + def spawn_n(self, a_callable, *args, **kwargs): + a_callable(*args, **kwargs) + + def waitall(self): + pass + + def dinky_app(env, start_response): + start_response("200 OK", []) + body = "got addr: %s %s\r\n" % ( + env.get("REMOTE_ADDR", ""), + env.get("REMOTE_PORT", "")) + return [body.encode("utf-8")] + + fake_tcp_socket = mock.Mock( + setsockopt=lambda *a: None, + makefile=lambda mode, bufsize: rfile if 'r' in mode else wfile, + ) + fake_listen_socket = mock.Mock(accept=mock.MagicMock( + side_effect=[[fake_tcp_socket, ('127.0.0.1', 8359)], + # KeyboardInterrupt breaks the WSGI server out of + # its infinite accept-process-close loop. + KeyboardInterrupt])) + + # If we let the WSGI server close rfile/wfile then we can't access + # their contents any more. + with mock.patch.object(wfile, 'close', lambda: None), \ + mock.patch.object(rfile, 'close', lambda: None): + eventlet.wsgi.server( + fake_listen_socket, dinky_app, + protocol=protocol_class, + custom_pool=FakePool(), + log_output=False, # quiet the test run + ) + return wfile.getvalue() + + def test_request_with_proxy(self): + bytes_out = self._run_bytes_through_protocol(( + b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + ), wsgi.SwiftHttpProxiedProtocol) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + self.assertEqual(lines[-1], b"got addr: 192.168.0.1 56423") + + def test_multiple_requests_with_proxy(self): + bytes_out = self._run_bytes_through_protocol(( + b"PROXY TCP4 192.168.0.1 192.168.0.11 56423 443\r\n" + b"GET /someurl HTTP/1.1\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + b"GET /otherurl HTTP/1.1\r\n" + b"User-Agent: something or other\r\n" + b"Connection: close\r\n" + b"\r\n" + ), wsgi.SwiftHttpProxiedProtocol) + + lines = bytes_out.split(b"\r\n") + self.assertEqual(lines[0], b"HTTP/1.1 200 OK") # sanity check + + # the address in the PROXY line is applied to every request + addr_lines = [l for l in lines if l.startswith(b"got addr")] + self.assertEqual(addr_lines, [b"got addr: 192.168.0.1 56423"] * 2) + + def test_missing_proxy_line(self): + bytes_out = self._run_bytes_through_protocol(( + # whoops, no PROXY line here + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n" + ), wsgi.SwiftHttpProxiedProtocol) + + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"400 Invalid PROXY line", lines[0]) + + def test_malformed_proxy_lines(self): + for bad_line in [b'PROXY jojo', + b'PROXYjojo a b c d e', + b'PROXY a b c d e', # bad INET protocol and family + ]: + bytes_out = self._run_bytes_through_protocol( + bad_line, wsgi.SwiftHttpProxiedProtocol) + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"400 Invalid PROXY line", lines[0]) + + def test_unknown_client_addr(self): + # For "UNKNOWN", the rest of the line before the CRLF may be omitted by + # the sender, and the receiver must ignore anything presented before + # the CRLF is found. + for unknown_line in [b'PROXY UNKNOWN', # mimimal valid unknown + b'PROXY UNKNOWNblahblah', # also valid + b'PROXY UNKNOWN a b c d']: + bytes_out = self._run_bytes_through_protocol(( + unknown_line + (b"\r\n" + b"GET /someurl HTTP/1.0\r\n" + b"User-Agent: something or other\r\n" + b"\r\n") + ), wsgi.SwiftHttpProxiedProtocol) + lines = [l for l in bytes_out.split(b"\r\n") if l] + self.assertIn(b"200 OK", lines[0]) + + class TestServersPerPortStrategy(unittest.TestCase): def setUp(self): self.logger = FakeLogger() @@ -1275,9 +1511,10 @@ pid += 1 sock_count += 1 + mypid = os.getpid() self.assertEqual([ - 'Started child %s' % 88, - 'Started child %s' % 89, + 'Started child %s from parent %s' % (88, mypid), + 'Started child %s from parent %s' % (89, mypid), ], self.logger.get_lines_for_level('notice')) self.assertEqual(2, sock_count) @@ -1287,7 +1524,7 @@ self.strategy.register_worker_exit(88) self.assertEqual([ - 'Removing dead child %s' % 88, + 'Removing dead child %s from parent %s' % (88, mypid) ], self.logger.get_lines_for_level('error')) for s, i in self.strategy.new_worker_socks(): @@ -1299,9 +1536,9 @@ self.assertEqual(1, sock_count) self.assertEqual([ - 'Started child %s' % 88, - 'Started child %s' % 89, - 'Started child %s' % 90, + 'Started child %s from parent %s' % (88, mypid), + 'Started child %s from parent %s' % (89, mypid), + 'Started child %s from parent %s' % (90, mypid), ], self.logger.get_lines_for_level('notice')) def test_post_fork_hook(self): @@ -1368,7 +1605,8 @@ self.assertEqual('aaaaa', next(iterator)) self.assertEqual('bbbbb', next(iterator)) iterable.close() - self.assertRaises(StopIteration, iterator.next) + with self.assertRaises(StopIteration): + next(iterator) def test_update_content_length(self): statuses = ['200 Ok'] diff -Nru swift-2.17.0/test/unit/container/test_backend.py swift-2.18.0/test/unit/container/test_backend.py --- swift-2.17.0/test/unit/container/test_backend.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/container/test_backend.py 2018-05-30 10:17:02.000000000 +0000 @@ -14,13 +14,13 @@ # limitations under the License. """ Tests for swift.container.backend """ - +import errno import os import hashlib +import inspect import unittest from time import sleep, time from uuid import uuid4 -import itertools import random from collections import defaultdict from contextlib import contextmanager @@ -28,38 +28,69 @@ import pickle import json +from swift.common.exceptions import LockTimeout from swift.container.backend import ContainerBroker, \ - update_new_item_from_existing -from swift.common.utils import Timestamp, encode_timestamps + update_new_item_from_existing, UNSHARDED, SHARDING, SHARDED, \ + COLLAPSED, SHARD_LISTING_STATES, SHARD_UPDATE_STATES +from swift.common.db import DatabaseAlreadyExists, GreenDBConnection +from swift.common.utils import Timestamp, encode_timestamps, hash_path, \ + ShardRange, make_db_file_path from swift.common.storage_policy import POLICIES import mock +from test import annotate_failure from test.unit import (patch_policies, with_tempdir, make_timestamp_iter, - EMPTY_ETAG) + EMPTY_ETAG, FakeLogger, mock_timestamp_now) from test.unit.common import test_db class TestContainerBroker(unittest.TestCase): """Tests for ContainerBroker""" + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'policy_stat', + 'container_info', 'shard_range'} + + def _assert_shard_ranges(self, broker, expected, include_own=False): + actual = broker.get_shard_ranges(include_deleted=True, + include_own=include_own) + self.assertEqual([dict(sr) for sr in expected], + [dict(sr) for sr in actual]) def test_creation(self): # Test ContainerBroker.__init__ broker = ContainerBroker(':memory:', account='a', container='c') - self.assertEqual(broker.db_file, ':memory:') + self.assertEqual(broker._db_file, ':memory:') broker.initialize(Timestamp('1').internal, 0) with broker.get() as conn: curs = conn.cursor() curs.execute('SELECT 1') self.assertEqual(curs.fetchall()[0][0], 1) + curs.execute("SELECT name FROM sqlite_master WHERE type='table';") + self.assertEqual(self.expected_db_tables, + {row[0] for row in curs.fetchall()}) + # check the update trigger + broker.put_object('blah', Timestamp.now().internal, 0, 'text/plain', + 'etag', 0, 0) + with broker.get() as conn: + with self.assertRaises(sqlite3.DatabaseError) as cm: + conn.execute('UPDATE object SET name="blah";') + self.assertIn('UPDATE not allowed', str(cm.exception)) + if 'shard_range' in self.expected_db_tables: + # check the update trigger + broker.merge_shard_ranges(broker.get_own_shard_range()) + with broker.get() as conn: + with self.assertRaises(sqlite3.DatabaseError) as cm: + conn.execute('UPDATE shard_range SET name="blah";') + self.assertIn('UPDATE not allowed', str(cm.exception)) @patch_policies def test_storage_policy_property(self): - ts = (Timestamp(t).internal for t in itertools.count(int(time()))) + ts = make_timestamp_iter() for policy in POLICIES: broker = ContainerBroker(':memory:', account='a', container='policy_%s' % policy.name) - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) with broker.get() as conn: try: conn.execute('''SELECT storage_policy_index @@ -92,16 +123,296 @@ pass self.assertTrue(broker.conn is None) - def test_empty(self): + @with_tempdir + def test_is_deleted(self, tempdir): + # Test ContainerBroker.is_deleted() and get_info_is_deleted() + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + self.assertFalse(broker.is_deleted()) + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.is_deleted()) + info, deleted = broker_to_test.get_info_is_deleted() + self.assertFalse(deleted) + self.assertEqual(1, info['object_count']) + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.is_deleted()) + info, deleted = broker_to_test.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_object_not_counted(broker): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker.merge_items([dict(obj)]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker.merge_items([dict(obj)]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + def check_shard_ranges_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + expected = state not in counted_states + self.assertEqual(expected, broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertEqual(expected, deleted) + self.assertEqual(0 if expected else 13, info['object_count']) + + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.is_deleted()) + info, deleted = broker.get_info_is_deleted() + self.assertTrue(deleted) + self.assertEqual(0, info['object_count']) + + # unsharded + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.is_deleted()) + check_shard_ranges_not_counted() + # misplaced object in fresh db is not considered + check_object_not_counted(broker) + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + check_object_not_counted(broker) + check_shard_ranges_counted() + + # own shard range has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.is_deleted()) + + @with_tempdir + def test_empty(self, tempdir): # Test ContainerBroker.empty - broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(Timestamp('1').internal, 0) + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + self.assertTrue(broker.is_root_container()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.empty()) + # and delete it + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.empty()) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + self.assertTrue(broker.empty()) - broker.put_object('o', Timestamp.now().internal, 0, 'text/plain', - 'd41d8cd98f00b204e9800998ecf8427e') - self.assertTrue(not broker.empty()) - sleep(.00001) - broker.delete_object('o', Timestamp.now().internal) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # own shard range is not considered for object count + own_sr = broker.get_own_shard_range() + self.assertEqual(0, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + broker.put_object('o', next(ts_iter).internal, 0, 'text/plain', + EMPTY_ETAG) + own_sr = broker.get_own_shard_range() + self.assertEqual(1, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertFalse(broker.empty()) + broker.delete_object('o', next(ts_iter).internal) + self.assertTrue(broker.empty()) + + # have own shard range but in state ACTIVE + self.assertEqual(ShardRange.ACTIVE, own_sr.state) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + def check_shard_ranges_counted(): + # other shard range is considered + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + counted_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertEqual(state not in counted_states, broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # enable sharding + broker.enable_sharding(next(ts_iter)) + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # move to sharding state + self.assertTrue(broker.set_sharding_state()) + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.empty()) + # as well as misplaced objects in fresh db + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + check_shard_ranges_counted() + + # own shard range still has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + @with_tempdir + def test_empty_shard_container(self, tempdir): + # Test ContainerBroker.empty for a shard container where shard range + # usage should not be considered + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='.shards_a', container='cc') + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) + + def check_object_counted(broker_to_test, broker_with_object): + obj = {'name': 'o', 'created_at': next(ts_iter).internal, + 'size': 0, 'content_type': 'text/plain', 'etag': EMPTY_ETAG, + 'deleted': 0} + broker_with_object.merge_items([dict(obj)]) + self.assertFalse(broker_to_test.empty()) + # and delete it + obj.update({'created_at': next(ts_iter).internal, 'deleted': 1}) + broker_with_object.merge_items([dict(obj)]) + self.assertTrue(broker_to_test.empty()) + + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + + # own shard range is not considered for object count + own_sr = broker.get_own_shard_range() + self.assertEqual(0, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertTrue(broker.empty()) + + broker.put_object('o', next(ts_iter).internal, 0, 'text/plain', + EMPTY_ETAG) + own_sr = broker.get_own_shard_range() + self.assertEqual(1, own_sr.object_count) + broker.merge_shard_ranges([own_sr]) + self.assertFalse(broker.empty()) + broker.delete_object('o', next(ts_iter).internal) + self.assertTrue(broker.empty()) + + def check_shard_ranges_not_counted(): + sr = ShardRange('.shards_a/shard_c', next(ts_iter), object_count=0) + sr.update_meta(13, 99, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + # empty other shard ranges do not influence result + sr.update_meta(0, 0, meta_timestamp=next(ts_iter)) + for state in ShardRange.STATES: + sr.update_state(state, state_timestamp=next(ts_iter)) + broker.merge_shard_ranges([sr]) + self.assertTrue(broker.empty()) + + check_shard_ranges_not_counted() + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + + # check object in retiring db is considered + check_object_counted(broker, broker.get_brokers()[0]) + self.assertTrue(broker.empty()) + # as well as misplaced objects in fresh db + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + self.assertTrue(broker.empty()) + check_object_counted(broker, broker) + check_shard_ranges_not_counted() + + # own shard range still has no influence + own_sr = broker.get_own_shard_range() + own_sr.update_meta(3, 4, meta_timestamp=next(ts_iter)) + broker.merge_shard_ranges([own_sr]) self.assertTrue(broker.empty()) def test_reclaim(self): @@ -164,48 +475,120 @@ broker.reclaim(Timestamp.now().internal, time()) broker.delete_db(Timestamp.now().internal) + @with_tempdir + def test_reclaim_deadlock(self, tempdir): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp(100).internal, 0) + # there's some magic count here that causes the failure, something + # about the size of object records and sqlite page size maybe? + count = 23000 + for i in range(count): + obj_name = 'o%d' % i + ts = Timestamp(200).internal + broker.delete_object(obj_name, ts) + broker._commit_puts() + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM object").fetchone()[0], count) + # make a broker whose container attribute is not yet set so that + # reclaim will need to query info to set it + broker = ContainerBroker(db_path, timeout=1) + # verify that reclaim doesn't get deadlocked and timeout + broker.reclaim(300, 300) + # check all objects were reclaimed + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM object" + ).fetchone()[0], 0) + + @with_tempdir + def test_reclaim_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + older = next(ts_iter) + same = next(ts_iter) + newer = next(ts_iter) + shard_ranges = [ + ShardRange('.shards_a/older_deleted', older.internal, '', 'a', + deleted=True), + ShardRange('.shards_a/same_deleted', same.internal, 'a', 'b', + deleted=True), + ShardRange('.shards_a/newer_deleted', newer.internal, 'b', 'c', + deleted=True), + ShardRange('.shards_a/older', older.internal, 'c', 'd'), + ShardRange('.shards_a/same', same.internal, 'd', 'e'), + ShardRange('.shards_a/newer', newer.internal, 'e', 'f'), + # own shard range is never reclaimed, even if deleted + ShardRange('a/c', older.internal, '', '', deleted=True)] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + + def assert_row_count(expected): + with broker.get() as conn: + res = conn.execute("SELECT count(*) FROM shard_range") + self.assertEqual(expected, res.fetchone()[0]) + + broker.reclaim(older.internal, older.internal) + assert_row_count(7) + self._assert_shard_ranges(broker, shard_ranges, include_own=True) + broker.reclaim(older.internal, same.internal) + assert_row_count(6) + self._assert_shard_ranges(broker, shard_ranges[1:], include_own=True) + broker.reclaim(older.internal, newer.internal) + assert_row_count(5) + self._assert_shard_ranges(broker, shard_ranges[2:], include_own=True) + broker.reclaim(older.internal, next(ts_iter).internal) + assert_row_count(4) + self._assert_shard_ranges(broker, shard_ranges[3:], include_own=True) + def test_get_info_is_deleted(self): - start = int(time()) - ts = (Timestamp(t).internal for t in itertools.count(start)) + ts = make_timestamp_iter() + start = next(ts) broker = ContainerBroker(':memory:', account='test_account', container='test_container') # create it - broker.initialize(next(ts), POLICIES.default.idx) + broker.initialize(start.internal, POLICIES.default.idx) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(is_deleted, False) # sanity self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], '0') if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: self.assertEqual(info['status_changed_at'], - Timestamp(start).internal) + start.internal) # delete it delete_timestamp = next(ts) - broker.delete_db(delete_timestamp) + broker.delete_db(delete_timestamp.internal) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, True) # sanity self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], delete_timestamp) self.assertEqual(info['status_changed_at'], delete_timestamp) # bring back to life - broker.put_object('obj', next(ts), 0, 'text/plain', 'etag', + broker.put_object('obj', next(ts).internal, 0, 'text/plain', 'etag', storage_policy_index=broker.storage_policy_index) info, is_deleted = broker.get_info_is_deleted() self.assertEqual(is_deleted, False) # sanity self.assertEqual(is_deleted, broker.is_deleted()) self.assertEqual(info, broker.get_info()) - self.assertEqual(info['put_timestamp'], Timestamp(start).internal) + self.assertEqual(info['put_timestamp'], start.internal) self.assertTrue(Timestamp(info['created_at']) >= start) self.assertEqual(info['delete_timestamp'], delete_timestamp) self.assertEqual(info['status_changed_at'], delete_timestamp) @@ -432,6 +815,273 @@ self.assertEqual(conn.execute( "SELECT deleted FROM object").fetchone()[0], 0) + def test_merge_shard_range_single_record(self): + # Test ContainerBroker.merge_shard_range + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + + ts_iter = make_timestamp_iter() + # Stash these for later + old_put_timestamp = next(ts_iter).internal + old_delete_timestamp = next(ts_iter).internal + + # Create initial object + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'low', 'up', meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'low') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'up') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 0) + + # Reput same event + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'low', 'up', meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'low') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'up') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 0) + + # Put new event + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put old event + broker.merge_shard_ranges( + ShardRange('"a/{}"', old_put_timestamp, + 'lower', 'upper', 1, 2, meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) # Not old_put_timestamp! + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put old delete event + broker.merge_shard_ranges( + ShardRange('"a/{}"', old_delete_timestamp, + 'lower', 'upper', meta_timestamp=meta_timestamp, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) # Not old_delete_timestamp! + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lower') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upper') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 2) + + # Put new delete event + timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lower', 'upper', meta_timestamp=meta_timestamp, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 1) + + # Put new event + timestamp = next(ts_iter).internal + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowerer', 'upperer', 3, 4, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowerer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upperer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 3) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 4) + + # We'll use this later + in_between_timestamp = next(ts_iter).internal + + # New update event, meta_timestamp increases + meta_timestamp = next(ts_iter).internal + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowerer', 'upperer', 3, 4, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowerer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'upperer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 3) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 4) + + # Put event from after last put but before last post + timestamp = in_between_timestamp + broker.merge_shard_ranges( + ShardRange('"a/{}"', timestamp, + 'lowererer', 'uppererer', 5, 6, + meta_timestamp=meta_timestamp)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT name FROM shard_range").fetchone()[0], + '"a/{}"') + self.assertEqual(conn.execute( + "SELECT timestamp FROM shard_range").fetchone()[0], + timestamp) + self.assertEqual(conn.execute( + "SELECT meta_timestamp FROM shard_range").fetchone()[0], + meta_timestamp) + self.assertEqual(conn.execute( + "SELECT lower FROM shard_range").fetchone()[0], 'lowererer') + self.assertEqual(conn.execute( + "SELECT upper FROM shard_range").fetchone()[0], 'uppererer') + self.assertEqual(conn.execute( + "SELECT deleted FROM shard_range").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT object_count FROM shard_range").fetchone()[0], 5) + self.assertEqual(conn.execute( + "SELECT bytes_used FROM shard_range").fetchone()[0], 6) + + def test_merge_shard_ranges_deleted(self): + # Test ContainerBroker.merge_shard_ranges sets deleted attribute + ts_iter = make_timestamp_iter() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + # put shard range + broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 0").fetchone()[0], 1) + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 1").fetchone()[0], 0) + + # delete shard range + broker.merge_shard_ranges(ShardRange('a/o', next(ts_iter).internal, + deleted=1)) + with broker.get() as conn: + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 0").fetchone()[0], 0) + self.assertEqual(conn.execute( + "SELECT count(*) FROM shard_range " + "WHERE deleted = 1").fetchone()[0], 1) + def test_make_tuple_for_pickle(self): record = {'name': 'obj', 'created_at': '1234567890.12345', @@ -559,7 +1209,7 @@ "SELECT deleted FROM object").fetchone()[0], deleted) def _test_put_object_multiple_encoded_timestamps(self, broker): - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker.initialize(next(ts).internal, 0) t = [next(ts) for _ in range(9)] @@ -620,6 +1270,194 @@ self._test_put_object_multiple_encoded_timestamps(broker) @with_tempdir + def test_get_db_state(self, tempdir): + acct = 'account' + cont = 'container' + hsh = hash_path(acct, cont) + db_file = "%s.db" % hsh + epoch = Timestamp.now() + fresh_db_file = "%s_%s.db" % (hsh, epoch.normal) + db_path = os.path.join(tempdir, db_file) + fresh_db_path = os.path.join(tempdir, fresh_db_file) + ts = Timestamp.now() + + # First test NOTFOUND state + broker = ContainerBroker(db_path, account=acct, container=cont) + self.assertEqual(broker.get_db_state(), 'not_found') + + # Test UNSHARDED state, that is when db_file exists and fresh_db_file + # doesn't + broker.initialize(ts.internal, 0) + self.assertEqual(broker.get_db_state(), 'unsharded') + + # Test the SHARDING state, this is the period when both the db_file and + # the fresh_db_file exist + fresh_broker = ContainerBroker(fresh_db_path, account=acct, + container=cont, force_db_file=True) + fresh_broker.initialize(ts.internal, 0) + own_shard_range = fresh_broker.get_own_shard_range() + own_shard_range.update_state(ShardRange.SHARDING) + own_shard_range.epoch = epoch + shard_range = ShardRange( + '.shards_%s/%s' % (acct, cont), Timestamp.now()) + fresh_broker.merge_shard_ranges([own_shard_range, shard_range]) + + self.assertEqual(fresh_broker.get_db_state(), 'sharding') + # old broker will also change state if we reload its db files + broker.reload_db_files() + self.assertEqual(broker.get_db_state(), 'sharding') + + # Test the SHARDED state, this is when only fresh_db_file exists. + os.unlink(db_path) + fresh_broker.reload_db_files() + self.assertEqual(fresh_broker.get_db_state(), 'sharded') + + # Test the COLLAPSED state, this is when only fresh_db_file exists. + shard_range.deleted = 1 + shard_range.timestamp = Timestamp.now() + fresh_broker.merge_shard_ranges([shard_range]) + self.assertEqual(fresh_broker.get_db_state(), 'collapsed') + + # back to UNSHARDED if the desired epoch changes + own_shard_range.update_state(ShardRange.SHRINKING, + state_timestamp=Timestamp.now()) + own_shard_range.epoch = Timestamp.now() + fresh_broker.merge_shard_ranges([own_shard_range]) + self.assertEqual(fresh_broker.get_db_state(), 'unsharded') + + @with_tempdir + def test_db_file(self, tempdir): + acct = 'account' + cont = 'continer' + hsh = hash_path(acct, cont) + db_file = "%s.db" % hsh + ts_epoch = Timestamp.now() + fresh_db_file = "%s_%s.db" % (hsh, ts_epoch.normal) + db_path = os.path.join(tempdir, db_file) + fresh_db_path = os.path.join(tempdir, fresh_db_file) + ts = Timestamp.now() + + # First test NOTFOUND state, this will return the db_file passed + # in the constructor + def check_unfound_db_files(broker, init_db_file): + self.assertEqual(init_db_file, broker.db_file) + self.assertEqual(broker._db_file, db_path) + self.assertFalse(os.path.exists(db_path)) + self.assertFalse(os.path.exists(fresh_db_path)) + self.assertEqual([], broker.db_files) + + broker = ContainerBroker(db_path, account=acct, container=cont) + check_unfound_db_files(broker, db_path) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_unfound_db_files(broker, fresh_db_path) + + # Test UNSHARDED state, that is when db_file exists and fresh_db_file + # doesn't, so it should return the db_path + def check_unsharded_db_files(broker): + self.assertEqual(broker.db_file, db_path) + self.assertEqual(broker._db_file, db_path) + self.assertTrue(os.path.exists(db_path)) + self.assertFalse(os.path.exists(fresh_db_path)) + self.assertEqual([db_path], broker.db_files) + + broker = ContainerBroker(db_path, account=acct, container=cont) + broker.initialize(ts.internal, 0) + check_unsharded_db_files(broker) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_unsharded_db_files(broker) + # while UNSHARDED db_path is still used despite giving fresh_db_path + # to init, so we cannot initialize this broker + with self.assertRaises(DatabaseAlreadyExists): + broker.initialize(ts.internal, 0) + + # Test the SHARDING state, this is the period when both the db_file and + # the fresh_db_file exist, in this case it should return the + # fresh_db_path. + def check_sharding_db_files(broker): + self.assertEqual(broker.db_file, fresh_db_path) + self.assertEqual(broker._db_file, db_path) + self.assertTrue(os.path.exists(db_path)) + self.assertTrue(os.path.exists(fresh_db_path)) + self.assertEqual([db_path, fresh_db_path], broker.db_files) + + # Use force_db_file to have db_shard_path created when initializing + broker = ContainerBroker(fresh_db_path, account=acct, + container=cont, force_db_file=True) + self.assertEqual([db_path], broker.db_files) + broker.initialize(ts.internal, 0) + check_sharding_db_files(broker) + broker = ContainerBroker(db_path, account=acct, container=cont) + check_sharding_db_files(broker) + broker = ContainerBroker(fresh_db_path, account=acct, container=cont) + check_sharding_db_files(broker) + + # force_db_file can be used to open db_path specifically + forced_broker = ContainerBroker(db_path, account=acct, + container=cont, force_db_file=True) + self.assertEqual(forced_broker.db_file, db_path) + self.assertEqual(forced_broker._db_file, db_path) + + def check_sharded_db_files(broker): + self.assertEqual(broker.db_file, fresh_db_path) + self.assertEqual(broker._db_file, db_path) + self.assertFalse(os.path.exists(db_path)) + self.assertTrue(os.path.exists(fresh_db_path)) + self.assertEqual([fresh_db_path], broker.db_files) + + # Test the SHARDED state, this is when only fresh_db_file exists, so + # obviously this should return the fresh_db_path + os.unlink(db_path) + broker.reload_db_files() + check_sharded_db_files(broker) + broker = ContainerBroker(db_path, account=acct, container=cont) + check_sharded_db_files(broker) + + @with_tempdir + def test_sharding_initiated_and_required(self, tempdir): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp.now().internal, 0) + # no shard ranges + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + # only own shard range + own_sr = broker.get_own_shard_range() + for state in ShardRange.STATES: + own_sr.update_state(state, state_timestamp=Timestamp.now()) + broker.merge_shard_ranges(own_sr) + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + + # shard ranges, still ACTIVE + own_sr.update_state(ShardRange.ACTIVE, + state_timestamp=Timestamp.now()) + broker.merge_shard_ranges(own_sr) + broker.merge_shard_ranges(ShardRange('.shards_a/cc', Timestamp.now())) + self.assertIs(False, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + + # shard ranges and SHARDING, SHRINKING or SHARDED + broker.enable_sharding(Timestamp.now()) + self.assertTrue(broker.set_sharding_state()) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(True, broker.sharding_required()) + + epoch = broker.db_epoch + own_sr.update_state(ShardRange.SHRINKING, + state_timestamp=Timestamp.now()) + own_sr.epoch = epoch + broker.merge_shard_ranges(own_sr) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(True, broker.sharding_required()) + + own_sr.update_state(ShardRange.SHARDED) + broker.merge_shard_ranges(own_sr) + self.assertTrue(broker.set_sharded_state()) + self.assertIs(True, broker.sharding_initiated()) + self.assertIs(False, broker.sharding_required()) + + @with_tempdir def test_put_object_multiple_encoded_timestamps_using_file(self, tempdir): # Test ContainerBroker.put_object with differing data, content-type # and metadata timestamps, using file db to ensure that the code paths @@ -629,7 +1467,7 @@ self._test_put_object_multiple_encoded_timestamps(broker) def _test_put_object_multiple_explicit_timestamps(self, broker): - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker.initialize(next(ts).internal, 0) t = [next(ts) for _ in range(11)] @@ -733,7 +1571,7 @@ def test_last_modified_time(self): # Test container listing reports the most recent of data or metadata # timestamp as last-modified time - ts = (Timestamp(t) for t in itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') broker.initialize(next(ts).internal, 0) @@ -786,18 +1624,17 @@ @patch_policies def test_put_misplaced_object_does_not_effect_container_stats(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ broker.get_info()['storage_policy_index'] policy = [p for p in POLICIES if p.idx == real_storage_policy_index][0] - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) info = broker.get_info() @@ -805,7 +1642,7 @@ self.assertEqual(123, info['bytes_used']) other_policy = random.choice([p for p in POLICIES if p is not policy]) - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) self.assertEqual(1, info['object_count']) @@ -814,23 +1651,22 @@ @patch_policies def test_has_multiple_policies(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ broker.get_info()['storage_policy_index'] policy = [p for p in POLICIES if p.idx == real_storage_policy_index][0] - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) self.assertFalse(broker.has_multiple_policies()) other_policy = [p for p in POLICIES if p is not policy][0] - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) self.assertTrue(broker.has_multiple_policies()) @@ -838,11 +1674,10 @@ @patch_policies def test_get_policy_info(self): policy = random.choice(list(POLICIES)) - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') - broker.initialize(next(ts), policy.idx) + broker.initialize(next(ts).internal, policy.idx) # migration tests may not honor policy on initialize if isinstance(self, ContainerBrokerMigrationMixin): real_storage_policy_index = \ @@ -854,7 +1689,7 @@ self.assertEqual(policy_stats, expected) # add an object - broker.put_object('correct_o', next(ts), 123, 'text/plain', + broker.put_object('correct_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy.idx) policy_stats = broker.get_policy_stats() @@ -864,7 +1699,7 @@ # add a misplaced object other_policy = random.choice([p for p in POLICIES if p is not policy]) - broker.put_object('wrong_o', next(ts), 123, 'text/plain', + broker.put_object('wrong_o', next(ts).internal, 123, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=other_policy.idx) policy_stats = broker.get_policy_stats() @@ -876,15 +1711,14 @@ @patch_policies def test_policy_stat_tracking(self): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='a', container='c') # Note: in subclasses of this TestCase that inherit the # ContainerBrokerMigrationMixin, passing POLICIES.default.idx here has # no effect and broker.get_policy_stats() returns a dict with a single # entry mapping policy index 0 to the container stats - broker.initialize(next(ts), POLICIES.default.idx) + broker.initialize(next(ts).internal, POLICIES.default.idx) stats = defaultdict(dict) def assert_empty_default_policy_stats(policy_stats): @@ -904,7 +1738,7 @@ policy_index = random.randint(0, iters * 0.1) name = 'object-%s' % random.randint(0, iters * 0.1) size = random.randint(0, iters) - broker.put_object(name, next(ts), size, 'text/plain', + broker.put_object(name, next(ts).internal, size, 'text/plain', '5af83e3196bf99f440f31f2e1a6c9afe', storage_policy_index=policy_index) # track the size of the latest timestamp put for each object @@ -973,7 +1807,8 @@ self.assertEqual(info['delete_timestamp'], '0') if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: self.assertEqual(info['status_changed_at'], @@ -1019,6 +1854,84 @@ self.assertEqual(info['x_container_sync_point1'], -1) self.assertEqual(info['x_container_sync_point2'], -1) + @with_tempdir + def test_get_info_sharding_states(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'part', 'suffix', 'hash', 'hash.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(next(ts_iter).internal, 0) + broker.put_object('o1', next(ts_iter).internal, 123, 'text/plain', + 'fake etag') + sr = ShardRange('.shards_a/c', next(ts_iter)) + broker.merge_shard_ranges(sr) + + def check_info(expected): + errors = [] + for k, v in expected.items(): + if info.get(k) != v: + errors.append((k, v, info.get(k))) + if errors: + self.fail('Mismatches: %s' % ', '.join( + ['%s should be %s but got %s' % error + for error in errors])) + + # unsharded + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1, + 'bytes_used': 123, + 'db_state': 'unsharded'}) + + # sharding + epoch = next(ts_iter) + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + broker.put_object('o2', next(ts_iter).internal, 1, 'text/plain', + 'fake etag') + broker.put_object('o3', next(ts_iter).internal, 320, 'text/plain', + 'fake etag') + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1, + 'bytes_used': 123, + 'db_state': 'sharding'}) + + # sharded + self.assertTrue(broker.set_sharded_state()) + shard_stats = {'object_count': 1001, 'bytes_used': 3003} + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + mock_get_shard_usage.return_value = shard_stats + info = broker.get_info() + mock_get_shard_usage.assert_called_once_with() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 1001, + 'bytes_used': 3003, + 'db_state': 'sharded'}) + + # collapsed + sr.set_deleted(next(ts_iter)) + broker.merge_shard_ranges(sr) + with mock.patch.object( + broker, 'get_shard_usage') as mock_get_shard_usage: + info = broker.get_info() + mock_get_shard_usage.assert_not_called() + check_info({'account': 'myaccount', + 'container': 'mycontainer', + 'object_count': 2, + 'bytes_used': 321, + 'db_state': 'collapsed'}) + def test_set_x_syncs(self): broker = ContainerBroker(':memory:', account='test1', container='test2') @@ -1100,6 +2013,174 @@ self.assertEqual(info['reported_object_count'], 2) self.assertEqual(info['reported_bytes_used'], 1123) + @with_tempdir + def test_remove_objects(self, tempdir): + objects = (('undeleted', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 0, 0), + ('other_policy', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 0, 1), + ('deleted', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG, 1, 0)) + object_names = [o[0] for o in objects] + + def get_rows(broker): + with broker.get() as conn: + cursor = conn.execute("SELECT * FROM object") + return [r[1] for r in cursor] + + def do_setup(): + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(Timestamp.now().internal, 0) + for obj in objects: + # ensure row order matches put order + broker.put_object(*obj) + broker._commit_puts() + + self.assertEqual(3, broker.get_max_row()) # sanity check + self.assertEqual(object_names, get_rows(broker)) # sanity check + return broker + + broker = do_setup() + broker.remove_objects('', '') + self.assertFalse(get_rows(broker)) + + broker = do_setup() + broker.remove_objects('deleted', '') + self.assertEqual([object_names[2]], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', 'deleted', max_row=2) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('deleted', 'un') + self.assertEqual([object_names[0], object_names[2]], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=-1) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=0) + self.assertEqual(object_names, get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=1) + self.assertEqual(object_names[1:], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=2) + self.assertEqual(object_names[2:], get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=3) + self.assertFalse(get_rows(broker)) + + broker = do_setup() + broker.remove_objects('', '', max_row=99) + self.assertFalse(get_rows(broker)) + + def test_get_objects(self): + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + ts_iter = make_timestamp_iter() + objects_0 = [{'name': 'obj_0_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': 0 + } for i in range(1, 8)] + objects_1 = [{'name': 'obj_1_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': 1 + } for i in range(1, 8)] + # merge_objects mutates items + broker.merge_items([dict(obj) for obj in objects_0 + objects_1]) + + actual = broker.get_objects() + self.assertEqual(objects_0 + objects_1, actual) + + with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2): + actual = broker.get_objects() + self.assertEqual(objects_0[:2], actual) + + with mock.patch('swift.container.backend.CONTAINER_LISTING_LIMIT', 2): + actual = broker.get_objects(limit=9) + self.assertEqual(objects_0 + objects_1[:2], actual) + + actual = broker.get_objects(marker=objects_0[2]['name']) + self.assertEqual(objects_0[3:] + objects_1, actual) + + actual = broker.get_objects(end_marker=objects_0[2]['name']) + self.assertEqual(objects_0[:2], actual) + + actual = broker.get_objects(include_deleted=True) + self.assertEqual(objects_0[::2] + objects_1[::2], actual) + + actual = broker.get_objects(include_deleted=False) + self.assertEqual(objects_0[1::2] + objects_1[1::2], actual) + + actual = broker.get_objects(include_deleted=None) + self.assertEqual(objects_0 + objects_1, actual) + + def test_get_objects_since_row(self): + ts_iter = make_timestamp_iter() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + obj_names = ['obj%03d' % i for i in range(20)] + timestamps = [next(ts_iter) for o in obj_names] + for name, timestamp in zip(obj_names, timestamps): + broker.put_object(name, timestamp.internal, + 0, 'text/plain', EMPTY_ETAG) + broker._commit_puts() # ensure predictable row order + timestamps = [next(ts_iter) for o in obj_names[10:]] + for name, timestamp in zip(obj_names[10:], timestamps): + broker.put_object(name, timestamp.internal, + 0, 'text/plain', EMPTY_ETAG, deleted=1) + broker._commit_puts() # ensure predictable row order + + # sanity check + self.assertEqual(30, broker.get_max_row()) + actual = broker.get_objects() + self.assertEqual(obj_names, [o['name'] for o in actual]) + + # all rows included + actual = broker.get_objects(since_row=None) + self.assertEqual(obj_names, [o['name'] for o in actual]) + + actual = broker.get_objects(since_row=-1) + self.assertEqual(obj_names, [o['name'] for o in actual]) + + # selected rows + for since_row in range(10): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[since_row:], + [o['name'] for o in actual]) + + for since_row in range(10, 20): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[10:], + [o['name'] for o in actual]) + + for since_row in range(20, len(obj_names) + 1): + actual = broker.get_objects(since_row=since_row) + with annotate_failure(since_row): + self.assertEqual(obj_names[since_row - 10:], + [o['name'] for o in actual]) + + self.assertFalse(broker.get_objects(end_marker=obj_names[5], + since_row=5)) + def test_list_objects_iter(self): # Test ContainerBroker.list_objects_iter broker = ContainerBroker(':memory:', account='a', container='c') @@ -1832,6 +2913,21 @@ self.assertEqual(['a', 'b', 'c'], sorted([rec['name'] for rec in items])) + @with_tempdir + def test_merge_items_is_green(self, tempdir): + ts = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts).internal, 1) + + broker.put_object('b', next(ts).internal, 0, 'text/plain', + EMPTY_ETAG) + + with mock.patch('swift.container.backend.tpool') as mock_tpool: + broker.get_info() + mock_tpool.execute.assert_called_once() + def test_merge_items_overwrite_unicode(self): # test DatabaseBroker.merge_items snowman = u'\N{SNOWMAN}'.encode('utf-8') @@ -1930,12 +3026,11 @@ self.assertEqual(rec['content_type'], 'text/plain') def test_set_storage_policy_index(self): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() broker = ContainerBroker(':memory:', account='test_account', container='test_container') timestamp = next(ts) - broker.initialize(timestamp, 0) + broker.initialize(timestamp.internal, 0) info = broker.get_info() self.assertEqual(0, info['storage_policy_index']) # sanity check @@ -1943,42 +3038,44 @@ self.assertEqual(0, info['bytes_used']) if self.__class__ in (TestContainerBrokerBeforeMetadata, TestContainerBrokerBeforeXSync, - TestContainerBrokerBeforeSPI): + TestContainerBrokerBeforeSPI, + TestContainerBrokerBeforeShardRanges): self.assertEqual(info['status_changed_at'], '0') else: - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected = {0: {'object_count': 0, 'bytes_used': 0}} self.assertEqual(expected, broker.get_policy_stats()) timestamp = next(ts) - broker.set_storage_policy_index(111, timestamp) + broker.set_storage_policy_index(111, timestamp.internal) self.assertEqual(broker.storage_policy_index, 111) info = broker.get_info() self.assertEqual(111, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected[111] = {'object_count': 0, 'bytes_used': 0} self.assertEqual(expected, broker.get_policy_stats()) timestamp = next(ts) - broker.set_storage_policy_index(222, timestamp) + broker.set_storage_policy_index(222, timestamp.internal) self.assertEqual(broker.storage_policy_index, 222) info = broker.get_info() self.assertEqual(222, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(timestamp, info['status_changed_at']) + self.assertEqual(timestamp.internal, info['status_changed_at']) expected[222] = {'object_count': 0, 'bytes_used': 0} self.assertEqual(expected, broker.get_policy_stats()) old_timestamp, timestamp = timestamp, next(ts) - broker.set_storage_policy_index(222, timestamp) # it's idempotent + # setting again is idempotent + broker.set_storage_policy_index(222, timestamp.internal) info = broker.get_info() self.assertEqual(222, info['storage_policy_index']) self.assertEqual(0, info['object_count']) self.assertEqual(0, info['bytes_used']) - self.assertEqual(old_timestamp, info['status_changed_at']) + self.assertEqual(old_timestamp.internal, info['status_changed_at']) self.assertEqual(expected, broker.get_policy_stats()) def test_set_storage_policy_index_empty(self): @@ -2004,19 +3101,18 @@ @with_tempdir def test_legacy_pending_files(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') # first init an acct DB without the policy_stat table present broker = ContainerBroker(db_path, account='a', container='c') - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries lacking storage_policy_index with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2033,7 +3129,7 @@ else: size = 2 storage_policy_index = 1 - broker.put_object(name, next(ts), size, 'c', 'e', 0, + broker.put_object(name, next(ts).internal, size, 'c', 'e', 0, storage_policy_index=storage_policy_index) broker._commit_puts_stale_ok() @@ -2049,8 +3145,7 @@ @with_tempdir def test_get_info_no_stale_reads(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') def mock_commit_puts(): @@ -2058,13 +3153,13 @@ broker = ContainerBroker(db_path, account='a', container='c', stale_reads_ok=False) - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2079,8 +3174,7 @@ @with_tempdir def test_get_info_stale_read_ok(self, tempdir): - ts = (Timestamp(t).internal for t in - itertools.count(int(time()))) + ts = make_timestamp_iter() db_path = os.path.join(tempdir, 'container.db') def mock_commit_puts(): @@ -2088,13 +3182,13 @@ broker = ContainerBroker(db_path, account='a', container='c', stale_reads_ok=True) - broker.initialize(next(ts), 1) + broker.initialize(next(ts).internal, 1) # manually make some pending entries with open(broker.pending_file, 'a+b') as fp: for i in range(10): name, timestamp, size, content_type, etag, deleted = ( - 'o%s' % i, next(ts), 0, 'c', 'e', 0) + 'o%s' % i, next(ts).internal, 0, 'c', 'e', 0) fp.write(':') fp.write(pickle.dumps( (name, timestamp, size, content_type, etag, deleted), @@ -2104,6 +3198,1231 @@ broker._commit_puts = mock_commit_puts broker.get_info() + @with_tempdir + def test_create_broker(self, tempdir): + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c') + hsh = hash_path('a', 'c') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db') + self.assertEqual(expected_path, broker.db_file) + self.assertTrue(os.path.isfile(expected_path)) + + ts = Timestamp.now() + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c1', + put_timestamp=ts.internal) + hsh = hash_path('a', 'c1') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], hsh, hsh + '.db') + self.assertEqual(expected_path, broker.db_file) + self.assertTrue(os.path.isfile(expected_path)) + self.assertEqual(ts.internal, broker.get_info()['put_timestamp']) + self.assertEqual(0, broker.get_info()['storage_policy_index']) + + epoch = Timestamp.now() + broker = ContainerBroker.create_broker(tempdir, 0, 'a', 'c3', + epoch=epoch) + hsh = hash_path('a', 'c3') + expected_path = os.path.join( + tempdir, 'containers', '0', hsh[-3:], + hsh, '%s_%s.db' % (hsh, epoch.internal)) + self.assertEqual(expected_path, broker.db_file) + + @with_tempdir + def test_pending_file_name(self, tempdir): + # pending file should have same name for sharded or unsharded db + expected_pending_path = os.path.join(tempdir, 'container.db.pending') + + db_path = os.path.join(tempdir, 'container.db') + fresh_db_path = os.path.join(tempdir, 'container_epoch.db') + + def do_test(given_db_file, expected_db_file): + broker = ContainerBroker(given_db_file, account='a', container='c') + self.assertEqual(expected_pending_path, broker.pending_file) + self.assertEqual(expected_db_file, broker.db_file) + + # no files exist + do_test(db_path, db_path) + do_test(fresh_db_path, fresh_db_path) + + # only container.db exists - unsharded + with open(db_path, 'wb'): + pass + do_test(db_path, db_path) + do_test(fresh_db_path, db_path) + + # container.db and container_shard.db exist - sharding + with open(fresh_db_path, 'wb'): + pass + do_test(db_path, fresh_db_path) + do_test(fresh_db_path, fresh_db_path) + + # only container_shard.db exists - sharded + os.unlink(db_path) + do_test(db_path, fresh_db_path) + do_test(fresh_db_path, fresh_db_path) + + @with_tempdir + def test_sharding_sysmeta(self, tempdir): + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(Timestamp.now().internal) + + expected = 'aaa/ccc' + with mock_timestamp_now() as now: + broker.set_sharding_sysmeta('Root', expected) + actual = broker.metadata + self.assertEqual([expected, now.internal], + actual.get('X-Container-Sysmeta-Shard-Root')) + self.assertEqual(expected, broker.get_sharding_sysmeta('Root')) + + expected = {'key': 'value'} + with mock_timestamp_now() as now: + broker.set_sharding_sysmeta('test', expected) + actual = broker.metadata + self.assertEqual([expected, now.internal], + actual.get('X-Container-Sysmeta-Shard-test')) + self.assertEqual(expected, broker.get_sharding_sysmeta('test')) + + @with_tempdir + def test_path(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='myaccount', container='mycontainer') + broker.initialize(next(ts_iter).internal, 1) + # make sure we can cope with unitialized account and container + broker.account = broker.container = None + self.assertEqual('myaccount/mycontainer', broker.path) + + @with_tempdir + def test_root_account_container_path(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='root_a', container='root_c') + broker.initialize(next(ts_iter).internal, 1) + # make sure we can cope with unitialized account and container + broker.account = broker.container = None + + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + self.assertEqual('root_a', broker.account) # sanity check + self.assertEqual('root_c', broker.container) # sanity check + + # we don't expect root containers to have this sysmeta set but if it is + # the broker should still behave like a root container + metadata = { + 'X-Container-Sysmeta-Shard-Root': + ('root_a/root_c', next(ts_iter).internal)} + broker = ContainerBroker( + db_path, account='root_a', container='root_c') + broker.update_metadata(metadata) + broker.account = broker.container = None + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + + # if root is marked deleted, it still considers itself to be a root + broker.delete_db(next(ts_iter).internal) + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + # check the values are not just being cached + broker = ContainerBroker(db_path) + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertTrue(broker.is_root_container()) + + # check a shard container + db_path = os.path.join(tempdir, 'shard_container.db') + broker = ContainerBroker( + db_path, account='.shards_root_a', container='c_shard') + broker.initialize(next(ts_iter).internal, 1) + # now the metadata is significant... + metadata = { + 'X-Container-Sysmeta-Shard-Root': + ('root_a/root_c', next(ts_iter).internal)} + broker.update_metadata(metadata) + broker.account = broker.container = None + broker._root_account = broker._root_container = None + + self.assertEqual('root_a', broker.root_account) + self.assertEqual('root_c', broker.root_container) + self.assertEqual('root_a/root_c', broker.root_path) + self.assertFalse(broker.is_root_container()) + + # check validation + def check_validation(root_value): + metadata = { + 'X-Container-Sysmeta-Shard-Root': + (root_value, next(ts_iter).internal)} + broker.update_metadata(metadata) + broker.account = broker.container = None + broker._root_account = broker._root_container = None + with self.assertRaises(ValueError) as cm: + broker.root_account + self.assertIn('Expected X-Container-Sysmeta-Shard-Root', + str(cm.exception)) + with self.assertRaises(ValueError): + broker.root_container + + check_validation('root_a') + check_validation('/root_a') + check_validation('/root_a/root_c') + check_validation('/root_a/root_c/blah') + check_validation('/') + + def test_resolve_shard_range_states(self): + self.assertIsNone(ContainerBroker.resolve_shard_range_states(None)) + self.assertIsNone(ContainerBroker.resolve_shard_range_states([])) + + for state_num, state_name in ShardRange.STATES.items(): + self.assertEqual({state_num}, + ContainerBroker.resolve_shard_range_states( + [state_name])) + self.assertEqual({state_num}, + ContainerBroker.resolve_shard_range_states( + [state_num])) + + self.assertEqual(set(ShardRange.STATES), + ContainerBroker.resolve_shard_range_states( + ShardRange.STATES_BY_NAME)) + + self.assertEqual( + set(ShardRange.STATES), + ContainerBroker.resolve_shard_range_states(ShardRange.STATES)) + + # check aliases + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states(['listing'])) + + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states(['listing', 'active'])) + + self.assertEqual( + {ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING, ShardRange.CREATED}, + ContainerBroker.resolve_shard_range_states(['listing', 'created'])) + + self.assertEqual( + {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING}, + ContainerBroker.resolve_shard_range_states(['updating'])) + + self.assertEqual( + {ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING, ShardRange.SHRINKING}, + ContainerBroker.resolve_shard_range_states( + ['updating', 'listing'])) + + def check_bad_value(value): + with self.assertRaises(ValueError) as cm: + ContainerBroker.resolve_shard_range_states(value) + self.assertIn('Invalid state', str(cm.exception)) + + check_bad_value(['bad_state', 'active']) + check_bad_value(['']) + check_bad_value('active') + + @with_tempdir + def test_get_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # no rows + self.assertFalse(broker.get_shard_ranges()) + # check that a default own shard range is not generated + self.assertFalse(broker.get_shard_ranges(include_own=True)) + + # merge row for own shard range + own_shard_range = ShardRange(broker.path, next(ts_iter), 'l', 'u', + state=ShardRange.SHARDING) + broker.merge_shard_ranges([own_shard_range]) + self.assertFalse(broker.get_shard_ranges()) + self.assertFalse(broker.get_shard_ranges(include_own=False)) + + actual = broker.get_shard_ranges(include_own=True) + self.assertEqual([dict(sr) for sr in [own_shard_range]], + [dict(sr) for sr in actual]) + + # merge rows for other shard ranges + shard_ranges = [ + ShardRange('.a/c0', next(ts_iter), 'a', 'c'), + ShardRange('.a/c1', next(ts_iter), 'c', 'd'), + ShardRange('.a/c2', next(ts_iter), 'd', 'f', + state=ShardRange.ACTIVE), + ShardRange('.a/c3', next(ts_iter), 'e', 'f', deleted=1, + state=ShardRange.SHARDED,), + ShardRange('.a/c4', next(ts_iter), 'f', 'h', + state=ShardRange.CREATED), + ShardRange('.a/c5', next(ts_iter), 'h', 'j', deleted=1) + ] + broker.merge_shard_ranges(shard_ranges) + actual = broker.get_shard_ranges() + undeleted = shard_ranges[:3] + shard_ranges[4:5] + self.assertEqual([dict(sr) for sr in undeleted], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(include_deleted=True) + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(reverse=True) + self.assertEqual([dict(sr) for sr in reversed(undeleted)], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='c', end_marker='e') + self.assertEqual([dict(sr) for sr in shard_ranges[1:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='c', end_marker='e', + states=ShardRange.ACTIVE) + self.assertEqual([dict(sr) for sr in shard_ranges[2:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(marker='e', end_marker='e') + self.assertFalse([dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(includes='f') + self.assertEqual([dict(sr) for sr in shard_ranges[2:3]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(includes='i') + self.assertFalse(actual) + + actual = broker.get_shard_ranges( + states=[ShardRange.CREATED, ShardRange.ACTIVE]) + self.assertEqual( + [dict(sr) for sr in [shard_ranges[2], shard_ranges[4]]], + [dict(sr) for sr in actual]) + + # get everything + actual = broker.get_shard_ranges(include_own=True) + self.assertEqual([dict(sr) for sr in undeleted + [own_shard_range]], + [dict(sr) for sr in actual]) + + # get just own range + actual = broker.get_shard_ranges(include_own=True, exclude_others=True) + self.assertEqual([dict(sr) for sr in [own_shard_range]], + [dict(sr) for sr in actual]) + + # if you ask for nothing you'll get nothing + actual = broker.get_shard_ranges( + include_own=False, exclude_others=True) + self.assertFalse(actual) + + @with_tempdir + def test_get_shard_ranges_with_sharding_overlaps(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + shard_ranges = [ + ShardRange('.shards_a/c0', next(ts_iter), 'a', 'd', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c1_0', next(ts_iter), 'd', 'g', + state=ShardRange.CLEAVED), + ShardRange('.shards_a/c1_1', next(ts_iter), 'g', 'j', + state=ShardRange.CLEAVED), + ShardRange('.shards_a/c1_2', next(ts_iter), 'j', 'm', + state=ShardRange.CREATED), + ShardRange('.shards_a/c1', next(ts_iter), 'd', 'm', + state=ShardRange.SHARDING), + ShardRange('.shards_a/c2', next(ts_iter), 'm', '', + state=ShardRange.ACTIVE), + ] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + actual = broker.get_shard_ranges() + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_LISTING_STATES) + self.assertEqual( + [dict(sr) for sr in shard_ranges[:3] + shard_ranges[4:]], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='e') + self.assertEqual([shard_ranges[1]], actual) + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='j') + self.assertEqual([shard_ranges[2]], actual) + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='k') + self.assertEqual([shard_ranges[3]], actual) + + @with_tempdir + def test_get_shard_ranges_with_shrinking_overlaps(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + shard_ranges = [ + ShardRange('.shards_a/c0', next(ts_iter), 'a', 'k', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c1', next(ts_iter), 'k', 'm', + state=ShardRange.SHRINKING), + ShardRange('.shards_a/c2', next(ts_iter), 'k', 't', + state=ShardRange.ACTIVE), + ShardRange('.shards_a/c3', next(ts_iter), 't', '', + state=ShardRange.ACTIVE), + ] + broker.merge_shard_ranges( + random.sample(shard_ranges, len(shard_ranges))) + actual = broker.get_shard_ranges() + self.assertEqual([dict(sr) for sr in shard_ranges], + [dict(sr) for sr in actual]) + + actual = broker.get_shard_ranges(states=SHARD_UPDATE_STATES, + includes='l') + self.assertEqual([shard_ranges[2]], actual) + + @with_tempdir + def test_get_own_shard_range(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='.shards_a', container='shard_c') + broker.initialize(next(ts_iter).internal, 0) + + # no row for own shard range - expect entire namespace default + now = Timestamp.now() + expected = ShardRange(broker.path, now, '', '', 0, 0, now, + state=ShardRange.ACTIVE) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + actual = broker.get_own_shard_range(no_default=True) + self.assertIsNone(actual) + + # row for own shard range and others + ts_1 = next(ts_iter) + own_sr = ShardRange(broker.path, ts_1, 'l', 'u') + broker.merge_shard_ranges( + [own_sr, + ShardRange('.a/c1', next(ts_iter), 'b', 'c'), + ShardRange('.a/c2', next(ts_iter), 'c', 'd')]) + expected = ShardRange(broker.path, ts_1, 'l', 'u', 0, 0, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # check stats get updated + broker.put_object( + 'o1', next(ts_iter).internal, 100, 'text/plain', 'etag1') + broker.put_object( + 'o2', next(ts_iter).internal, 99, 'text/plain', 'etag2') + expected = ShardRange( + broker.path, ts_1, 'l', 'u', 2, 199, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # still returned when deleted + delete_ts = next(ts_iter) + own_sr.set_deleted(timestamp=delete_ts) + broker.merge_shard_ranges(own_sr) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + expected = ShardRange( + broker.path, delete_ts, 'l', 'u', 2, 199, now, deleted=True) + self.assertEqual(dict(expected), dict(actual)) + + # still in table after reclaim_age + broker.reclaim(next(ts_iter).internal, next(ts_iter).internal) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + # entire namespace + ts_2 = next(ts_iter) + broker.merge_shard_ranges( + [ShardRange(broker.path, ts_2, '', '')]) + expected = ShardRange( + broker.path, ts_2, '', '', 2, 199, now) + with mock.patch('swift.container.backend.Timestamp.now', + return_value=now): + actual = broker.get_own_shard_range() + self.assertEqual(dict(expected), dict(actual)) + + @with_tempdir + def test_enable_sharding(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'container.db') + broker = ContainerBroker( + db_path, account='.shards_a', container='shard_c') + broker.initialize(next(ts_iter).internal, 0) + epoch = next(ts_iter) + broker.enable_sharding(epoch) + own_sr = broker.get_own_shard_range(no_default=True) + self.assertEqual(epoch, own_sr.epoch) + self.assertEqual(epoch, own_sr.state_timestamp) + self.assertEqual(ShardRange.SHARDING, own_sr.state) + + @with_tempdir + def test_get_shard_usage(self, tempdir): + ts_iter = make_timestamp_iter() + shard_range_by_state = dict( + (state, ShardRange('.shards_a/c_%s' % state, next(ts_iter), + str(state), str(state + 1), + 2 * state, 2 * state + 1, 2, + state=state)) + for state in ShardRange.STATES) + + def make_broker(a, c): + db_path = os.path.join(tempdir, '%s.db' % uuid4()) + broker = ContainerBroker(db_path, account=a, container=c) + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', 'a/c') + broker.merge_shard_ranges(shard_range_by_state.values()) + return broker + + # make broker appear to be a root container + broker = make_broker('a', 'c') + self.assertTrue(broker.is_root_container()) + included_states = (ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING) + included = [shard_range_by_state[state] for state in included_states] + expected = { + 'object_count': sum([sr.object_count for sr in included]), + 'bytes_used': sum([sr.bytes_used for sr in included]) + } + self.assertEqual(expected, broker.get_shard_usage()) + + @with_tempdir + def _check_find_shard_ranges(self, c_lower, c_upper, tempdir): + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + def do_test(expected_bounds, expected_last_found, shard_size, limit, + start_index=0, existing=None): + # expected_bounds is a list of tuples (lower, upper, object_count) + # build expected shard ranges + expected_shard_ranges = [ + dict(lower=lower, upper=upper, index=index, + object_count=object_count) + for index, (lower, upper, object_count) + in enumerate(expected_bounds, start_index)] + + with mock.patch('swift.common.utils.time.time', + return_value=float(ts_now.normal)): + ranges, last_found = broker.find_shard_ranges( + shard_size, limit=limit, existing_ranges=existing) + self.assertEqual(expected_shard_ranges, ranges) + self.assertEqual(expected_last_found, last_found) + + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker( + db_path, account='a', container=container_name) + # shard size > object count, no objects + broker.initialize(next(ts_iter).internal, 0) + + ts = next(ts_iter) + if c_lower or c_upper: + # testing a shard, so set its own shard range + own_shard_range = ShardRange(broker.path, ts, c_lower, c_upper) + broker.merge_shard_ranges([own_shard_range]) + + self.assertEqual(([], False), broker.find_shard_ranges(10)) + + for i in range(10): + broker.put_object( + 'obj%02d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag') + + expected_bounds = [(c_lower, 'obj04', 5), ('obj04', c_upper, 5)] + do_test(expected_bounds, True, shard_size=5, limit=None) + + expected = [(c_lower, 'obj06', 7), ('obj06', c_upper, 3)] + do_test(expected, True, shard_size=7, limit=None) + expected = [(c_lower, 'obj08', 9), ('obj08', c_upper, 1)] + do_test(expected, True, shard_size=9, limit=None) + # shard size >= object count + do_test([], False, shard_size=10, limit=None) + do_test([], False, shard_size=11, limit=None) + + # check use of limit + do_test([], False, shard_size=4, limit=0) + expected = [(c_lower, 'obj03', 4)] + do_test(expected, False, shard_size=4, limit=1) + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4)] + do_test(expected, False, shard_size=4, limit=2) + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4), + ('obj07', c_upper, 2)] + do_test(expected, True, shard_size=4, limit=3) + do_test(expected, True, shard_size=4, limit=4) + do_test(expected, True, shard_size=4, limit=-1) + + # increase object count to 11 + broker.put_object( + 'obj10', next(ts_iter).internal, 0, 'text/plain', 'etag') + expected = [(c_lower, 'obj03', 4), ('obj03', 'obj07', 4), + ('obj07', c_upper, 3)] + do_test(expected, True, shard_size=4, limit=None) + + expected = [(c_lower, 'obj09', 10), ('obj09', c_upper, 1)] + do_test(expected, True, shard_size=10, limit=None) + do_test([], False, shard_size=11, limit=None) + + # now pass in a pre-existing shard range + existing = [ShardRange( + '.shards_a/srange-0', Timestamp.now(), '', 'obj03', + object_count=4, state=ShardRange.FOUND)] + + expected = [('obj03', 'obj07', 4), ('obj07', c_upper, 3)] + do_test(expected, True, shard_size=4, limit=None, start_index=1, + existing=existing) + expected = [('obj03', 'obj07', 4)] + do_test(expected, False, shard_size=4, limit=1, start_index=1, + existing=existing) + # using increased shard size should not distort estimation of progress + expected = [('obj03', 'obj09', 6), ('obj09', c_upper, 1)] + do_test(expected, True, shard_size=6, limit=None, start_index=1, + existing=existing) + + # add another existing... + existing.append(ShardRange( + '.shards_a/srange-1', Timestamp.now(), '', 'obj07', + object_count=4, state=ShardRange.FOUND)) + expected = [('obj07', c_upper, 3)] + do_test(expected, True, shard_size=10, limit=None, start_index=2, + existing=existing) + # an existing shard range not in FOUND state should not distort + # estimation of progress, but may cause final range object count to + # default to shard_size + existing[-1].state = ShardRange.CREATED + existing[-1].object_count = 10 + # there's only 3 objects left to scan but progress cannot be reliably + # calculated, so final shard range has object count of 2 + expected = [('obj07', 'obj09', 2), ('obj09', c_upper, 2)] + do_test(expected, True, shard_size=2, limit=None, start_index=2, + existing=existing) + + # add last shard range so there's none left to find + existing.append(ShardRange( + '.shards_a/srange-2', Timestamp.now(), 'obj07', c_upper, + object_count=4, state=ShardRange.FOUND)) + do_test([], True, shard_size=4, limit=None, existing=existing) + + def test_find_shard_ranges(self): + self._check_find_shard_ranges('', '') + self._check_find_shard_ranges('', 'upper') + self._check_find_shard_ranges('lower', '') + self._check_find_shard_ranges('lower', 'upper') + + @with_tempdir + def test_find_shard_ranges_with_misplaced_objects(self, tempdir): + # verify that misplaced objects outside of a shard's range do not + # influence choice of shard ranges (but do distort the object counts) + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker( + db_path, account='a', container=container_name) + # shard size > object count, no objects + broker.initialize(next(ts_iter).internal, 0) + + ts = next(ts_iter) + own_shard_range = ShardRange(broker.path, ts, 'l', 'u') + broker.merge_shard_ranges([own_shard_range]) + + self.assertEqual(([], False), broker.find_shard_ranges(10)) + + for name in ('a-misplaced', 'm', 'n', 'p', 'q', 'r', 'z-misplaced'): + broker.put_object( + name, next(ts_iter).internal, 0, 'text/plain', 'etag') + + expected_bounds = ( + ('l', 'n', 2), # contains m, n + ('n', 'q', 2), # contains p, q + ('q', 'u', 3) # contains r; object count distorted by 2 misplaced + ) + expected_shard_ranges = [ + dict(lower=lower, upper=upper, index=index, + object_count=object_count) + for index, (lower, upper, object_count) + in enumerate(expected_bounds)] + + with mock.patch('swift.common.utils.time.time', + return_value=float(ts_now.normal)): + actual_shard_ranges, last_found = broker.find_shard_ranges(2, -1) + self.assertEqual(expected_shard_ranges, actual_shard_ranges) + + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() + container_name = 'test_container' + + @with_tempdir + def test_find_shard_ranges_errors(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join(tempdir, 'test_container.db') + broker = ContainerBroker(db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + for i in range(2): + broker.put_object( + 'obj%d' % i, next(ts_iter).internal, 0, 'text/plain', 'etag') + + klass = 'swift.container.backend.ContainerBroker' + with mock.patch(klass + '._get_next_shard_range_upper', + side_effect=LockTimeout()): + ranges, last_found = broker.find_shard_ranges(1) + self.assertFalse(ranges) + self.assertFalse(last_found) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Problem finding shard upper', lines[0]) + self.assertFalse(lines[1:]) + + broker.logger.clear() + with mock.patch(klass + '._get_next_shard_range_upper', + side_effect=sqlite3.OperationalError()): + ranges, last_found = broker.find_shard_ranges(1) + self.assertFalse(last_found) + self.assertFalse(ranges) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Problem finding shard upper', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_set_db_states(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # load up the broker with some objects + objects = [{'name': 'obj_%d' % i, + 'created_at': next(ts_iter).normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': 0, + 'storage_policy_index': 0, + } for i in range(1, 6)] + # merge_items mutates items + broker.merge_items([dict(obj) for obj in objects]) + original_info = broker.get_info() + + # Add some metadata + meta = { + 'X-Container-Meta-Color': ['Blue', next(ts_iter).normal], + 'X-Container-Meta-Cleared': ['', next(ts_iter).normal], + 'X-Container-Sysmeta-Shape': ['Circle', next(ts_iter).normal], + } + broker.update_metadata(meta) + + # Add some syncs + incoming_sync = {'remote_id': 'incoming_123', 'sync_point': 1} + outgoing_sync = {'remote_id': 'outgoing_123', 'sync_point': 2} + broker.merge_syncs([outgoing_sync], incoming=False) + broker.merge_syncs([incoming_sync], incoming=True) + + # Add some ShardRanges + shard_ranges = [ShardRange( + name='.shards_a/shard_range_%s' % i, + timestamp=next(ts_iter), lower='obj_%d' % i, + upper='obj_%d' % (i + 2), + object_count=len(objects[i:i + 2]), + bytes_used=sum(obj['size'] for obj in objects[i:i + 2]), + meta_timestamp=next(ts_iter)) for i in range(0, 6, 2)] + deleted_range = ShardRange('.shards_a/shard_range_z', next(ts_iter), + 'z', '', state=ShardRange.SHARDED, + deleted=1) + own_sr = ShardRange(name='a/c', timestamp=next(ts_iter), + state=ShardRange.ACTIVE) + broker.merge_shard_ranges([own_sr] + shard_ranges + [deleted_range]) + ts_epoch = next(ts_iter) + new_db_path = os.path.join(tempdir, 'part', 'suffix', 'hash', + 'container_%s.db' % ts_epoch.normal) + + def check_broker_properties(broker): + # these broker properties should remain unchanged as state changes + self.assertEqual(broker.get_max_row(), 5) + all_metadata = broker.metadata + original_meta = dict((k, all_metadata[k]) for k in meta) + self.assertEqual(original_meta, meta) + self.assertEqual(broker.get_syncs(True)[0], incoming_sync) + self.assertEqual(broker.get_syncs(False)[0], outgoing_sync) + self.assertEqual(shard_ranges + [own_sr, deleted_range], + broker.get_shard_ranges(include_own=True, + include_deleted=True)) + + def check_broker_info(actual_info): + for key in ('db_state', 'id', 'hash'): + actual_info.pop(key, None) + original_info.pop(key, None) + self.assertEqual(original_info, actual_info) + + def check_unsharded_state(broker): + # these are expected properties in unsharded state + self.assertEqual(len(broker.get_brokers()), 1) + self.assertEqual(broker.get_db_state(), UNSHARDED) + self.assertTrue(os.path.exists(db_path)) + self.assertFalse(os.path.exists(new_db_path)) + self.assertEqual(objects, broker.get_objects()) + + # Sanity checks + check_broker_properties(broker) + check_unsharded_state(broker) + check_broker_info(broker.get_info()) + + # first test that moving from UNSHARDED to SHARDED doesn't work + self.assertFalse(broker.set_sharded_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_unsharded_state(broker) + + # cannot go to SHARDING without an epoch set + self.assertFalse(broker.set_sharding_state()) + + # now set sharding epoch and make sure everything moves. + broker.enable_sharding(ts_epoch) + self.assertTrue(broker.set_sharding_state()) + check_broker_properties(broker) + check_broker_info(broker.get_info()) + + def check_sharding_state(broker): + self.assertEqual(len(broker.get_brokers()), 2) + self.assertEqual(broker.get_db_state(), SHARDING) + self.assertTrue(os.path.exists(db_path)) + self.assertTrue(os.path.exists(new_db_path)) + self.assertEqual([], broker.get_objects()) + self.assertEqual(objects, broker.get_brokers()[0].get_objects()) + check_sharding_state(broker) + + # to confirm we're definitely looking at the shard db + broker2 = ContainerBroker(new_db_path) + check_broker_properties(broker2) + check_broker_info(broker2.get_info()) + self.assertEqual([], broker2.get_objects()) + + # Try to set sharding state again + self.assertFalse(broker.set_sharding_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_sharding_state(broker) + + # Now move to the final state - update shard ranges' state + broker.merge_shard_ranges( + [dict(sr, state=ShardRange.ACTIVE, + state_timestamp=next(ts_iter).internal) + for sr in shard_ranges]) + # pretend all ranges have been cleaved + self.assertTrue(broker.set_sharded_state()) + check_broker_properties(broker) + check_broker_info(broker.get_info()) + + def check_sharded_state(broker): + self.assertEqual(broker.get_db_state(), SHARDED) + self.assertEqual(len(broker.get_brokers()), 1) + self.assertFalse(os.path.exists(db_path)) + self.assertTrue(os.path.exists(new_db_path)) + self.assertEqual([], broker.get_objects()) + check_sharded_state(broker) + + # Try to set sharded state again + self.assertFalse(broker.set_sharded_state()) + # check nothing changed + check_broker_properties(broker) + check_broker_info(broker.get_info()) + check_sharded_state(broker) + + # delete the container - sharding sysmeta gets erased + broker.delete_db(next(ts_iter).internal) + # but it is not considered deleted while shards have content + self.assertFalse(broker.is_deleted()) + check_sharded_state(broker) + # empty the shard ranges + empty_shard_ranges = [sr.copy(object_count=0, bytes_used=0, + meta_timestamp=next(ts_iter)) + for sr in shard_ranges] + broker.merge_shard_ranges(empty_shard_ranges) + # and no it is deleted + self.assertTrue(broker.is_deleted()) + check_sharded_state(broker) + + def do_revive_shard_delete(shard_ranges): + # delete all shard ranges + deleted_shard_ranges = [sr.copy(timestamp=next(ts_iter), deleted=1) + for sr in shard_ranges] + broker.merge_shard_ranges(deleted_shard_ranges) + self.assertEqual(COLLAPSED, broker.get_db_state()) + + # add new shard ranges and go to sharding state - need to force + # broker time to be after the delete time in order to write new + # sysmeta + broker.enable_sharding(next(ts_iter)) + shard_ranges = [sr.copy(timestamp=next(ts_iter)) + for sr in shard_ranges] + broker.merge_shard_ranges(shard_ranges) + with mock.patch('swift.common.db.time.time', + lambda: float(next(ts_iter))): + self.assertTrue(broker.set_sharding_state()) + self.assertEqual(SHARDING, broker.get_db_state()) + + # go to sharded + self.assertTrue( + broker.set_sharded_state()) + self.assertEqual(SHARDED, broker.get_db_state()) + + # delete again + broker.delete_db(next(ts_iter).internal) + self.assertTrue(broker.is_deleted()) + self.assertEqual(SHARDED, broker.get_db_state()) + + do_revive_shard_delete(shard_ranges) + do_revive_shard_delete(shard_ranges) + + @with_tempdir + def test_set_sharding_state_errors(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + broker.enable_sharding(next(ts_iter)) + + orig_execute = GreenDBConnection.execute + trigger = 'INSERT into object' + + def mock_execute(conn, *args, **kwargs): + if trigger in args[0]: + raise sqlite3.OperationalError() + return orig_execute(conn, *args, **kwargs) + + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + res = broker.set_sharding_state() + self.assertFalse(res) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to set the ROWID', lines[0]) + self.assertFalse(lines[1:]) + + broker.logger.clear() + trigger = 'UPDATE container_stat SET created_at' + with mock.patch('swift.common.db.GreenDBConnection.execute', + mock_execute): + res = broker.set_sharding_state() + self.assertFalse(res) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to set matching', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_set_sharded_state_errors(self, tempdir): + ts_iter = make_timestamp_iter() + retiring_db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(retiring_db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + pre_epoch = next(ts_iter) + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + # unlink fails + with mock.patch('os.unlink', side_effect=OSError(errno.EPERM)): + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('error') + self.assertIn('Failed to unlink', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertTrue(os.path.exists(retiring_db_path)) + self.assertTrue(os.path.exists(broker.db_file)) + + # extra files + extra_filename = make_db_file_path(broker.db_file, pre_epoch) + self.assertNotEqual(extra_filename, broker.db_file) # sanity check + with open(extra_filename, 'wb'): + pass + broker.logger.clear() + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Still have multiple db files', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + self.assertTrue(os.path.exists(retiring_db_path)) + self.assertTrue(os.path.exists(broker.db_file)) + + # retiring file missing + broker.logger.clear() + os.unlink(retiring_db_path) + self.assertFalse(broker.set_sharded_state()) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Refusing to delete', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + self.assertTrue(os.path.exists(broker.db_file)) + + @with_tempdir + def test_get_brokers(self, tempdir): + ts_iter = make_timestamp_iter() + retiring_db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(retiring_db_path, account='a', container='c', + logger=FakeLogger()) + broker.initialize(next(ts_iter).internal, 0) + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertFalse(brokers[0].skip_commits) + self.assertFalse(brokers[1:]) + + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + + # same outcome when called on retiring db broker + brokers = brokers[0].get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + + self.assertTrue(broker.set_sharded_state()) + brokers = broker.get_brokers() + self.assertEqual(broker.db_file, brokers[0].db_file) + self.assertFalse(brokers[0].skip_commits) + self.assertFalse(brokers[1:]) + + # unexpected extra file should be ignored + with open(retiring_db_path, 'wb'): + pass + retiring_db_path = broker.db_file + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + broker.reload_db_files() + self.assertEqual(3, len(broker.db_files)) # sanity check + brokers = broker.get_brokers() + self.assertEqual(retiring_db_path, brokers[0].db_file) + self.assertTrue(brokers[0].skip_commits) + self.assertEqual(broker.db_file, brokers[1].db_file) + self.assertFalse(brokers[1].skip_commits) + self.assertFalse(brokers[2:]) + lines = broker.logger.get_lines_for_level('warning') + self.assertIn('Unexpected db files', lines[0]) + self.assertFalse(lines[1:]) + + @with_tempdir + def test_merge_shard_ranges(self, tempdir): + ts_iter = make_timestamp_iter() + ts = [next(ts_iter) for _ in range(13)] + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker( + db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + + # sanity check + self.assertFalse(broker.get_shard_ranges(include_deleted=True)) + + broker.merge_shard_ranges(None) + self.assertFalse(broker.get_shard_ranges(include_deleted=True)) + + # merge item at ts1 + # sr___ + sr_b_1_1 = ShardRange('a/c_b', ts[1], lower='a', upper='b', + object_count=2) + broker.merge_shard_ranges([sr_b_1_1]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge older item - ignored + sr_b_0_0 = ShardRange('a/c_b', ts[0], lower='a', upper='b', + object_count=1) + broker.merge_shard_ranges([sr_b_0_0]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge same timestamp - ignored + broker.merge_shard_ranges([dict(sr_b_1_1, lower='', upper='c')]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + broker.merge_shard_ranges([dict(sr_b_1_1, object_count=99)]) + self._assert_shard_ranges(broker, [sr_b_1_1]) + + # merge list with older item *after* newer item + sr_c_2_2 = ShardRange('a/c_c', ts[2], lower='b', upper='c', + object_count=3) + sr_c_3_3 = ShardRange('a/c_c', ts[3], lower='b', upper='c', + object_count=4) + broker.merge_shard_ranges([sr_c_3_3, sr_c_2_2]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_3_3]) + + # merge newer item - updated + sr_c_5_5 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=5) + broker.merge_shard_ranges([sr_c_5_5]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5]) + + # merge older metadata item - ignored + sr_c_5_4 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=6, meta_timestamp=ts[4]) + broker.merge_shard_ranges([sr_c_5_4]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_5]) + + # merge newer metadata item - only metadata is updated + sr_c_5_6 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=7, meta_timestamp=ts[6]) + broker.merge_shard_ranges([dict(sr_c_5_6, lower='', upper='d')]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6]) + + # merge older created_at, newer metadata item - ignored + sr_c_4_7 = ShardRange('a/c_c', ts[4], lower='b', upper='c', + object_count=8, meta_timestamp=ts[7]) + broker.merge_shard_ranges([sr_c_4_7]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_6]) + + # merge list with older metadata item *after* newer metadata item + sr_c_5_11 = ShardRange('a/c_c', ts[5], lower='b', upper='c', + object_count=9, meta_timestamp=ts[11]) + broker.merge_shard_ranges([sr_c_5_11, sr_c_5_6]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + + # deleted item at *same timestamp* as existing - deleted ignored + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1, object_count=0)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + sr_b_1_1.meta_timestamp = ts[11] + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + sr_b_1_1.state_timestamp = ts[11] + broker.merge_shard_ranges([dict(sr_b_1_1, deleted=1)]) + self._assert_shard_ranges(broker, [sr_b_1_1, sr_c_5_11]) + + # delete item at *newer timestamp* - updated + sr_b_2_2_deleted = ShardRange('a/c_b', ts[2], lower='a', upper='b', + object_count=0, deleted=1) + broker.merge_shard_ranges([sr_b_2_2_deleted]) + self._assert_shard_ranges(broker, [sr_b_2_2_deleted, sr_c_5_11]) + + # merge list with older undeleted item *after* newer deleted item + # NB deleted timestamp trumps newer meta timestamp + sr_c_9_12 = ShardRange('a/c_c', ts[9], lower='b', upper='c', + object_count=10, meta_timestamp=ts[12]) + sr_c_10_10_deleted = ShardRange('a/c_c', ts[10], lower='b', upper='c', + object_count=0, deleted=1) + broker.merge_shard_ranges([sr_c_10_10_deleted, sr_c_9_12]) + self._assert_shard_ranges( + broker, [sr_b_2_2_deleted, sr_c_10_10_deleted]) + + @with_tempdir + def test_merge_shard_ranges_state(self, tempdir): + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker(db_path, account='a', container='c') + broker.initialize(next(ts_iter).internal, 0) + expected_shard_ranges = [] + + def do_test(orig_state, orig_timestamp, test_state, test_timestamp, + expected_state, expected_timestamp): + index = len(expected_shard_ranges) + sr = ShardRange('a/%s' % index, orig_timestamp, '%03d' % index, + '%03d' % (index + 1), state=orig_state) + broker.merge_shard_ranges([sr]) + sr.state = test_state + sr.state_timestamp = test_timestamp + broker.merge_shard_ranges([sr]) + sr.state = expected_state + sr.state_timestamp = expected_timestamp + expected_shard_ranges.append(sr) + self._assert_shard_ranges(broker, expected_shard_ranges) + + # state at older state_timestamp is not merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts_older = next(ts_iter) + ts = next(ts_iter) + do_test(orig_state, ts, test_state, ts_older, orig_state, ts) + + # more advanced state at same timestamp is merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts = next(ts_iter) + do_test(orig_state, ts, test_state, ts, + test_state if test_state > orig_state else orig_state, + ts) + + # any state at newer timestamp is merged + for orig_state in ShardRange.STATES: + for test_state in ShardRange.STATES: + ts = next(ts_iter) + ts_newer = next(ts_iter) + do_test(orig_state, ts, test_state, ts_newer, test_state, + ts_newer) + + def _check_object_stats_when_sharded(self, a, c, root_a, root_c, tempdir): + # common setup and assertions for root and shard containers + ts_iter = make_timestamp_iter() + db_path = os.path.join( + tempdir, 'part', 'suffix', 'hash', 'container.db') + broker = ContainerBroker( + db_path, account=a, container=c) + broker.initialize(next(ts_iter).internal, 0) + broker.set_sharding_sysmeta('Root', '%s/%s' % (root_a, root_c)) + broker.merge_items([{'name': 'obj', 'size': 14, 'etag': 'blah', + 'content_type': 'text/plain', 'deleted': 0, + 'created_at': Timestamp.now().internal}]) + self.assertEqual(1, broker.get_info()['object_count']) + self.assertEqual(14, broker.get_info()['bytes_used']) + + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + sr_1 = ShardRange( + '%s/%s1' % (root_a, root_c), Timestamp.now(), lower='', upper='m', + object_count=99, bytes_used=999, state=ShardRange.ACTIVE) + sr_2 = ShardRange( + '%s/%s2' % (root_a, root_c), Timestamp.now(), lower='m', upper='', + object_count=21, bytes_used=1000, state=ShardRange.ACTIVE) + broker.merge_shard_ranges([sr_1, sr_2]) + self.assertEqual(1, broker.get_info()['object_count']) + self.assertEqual(14, broker.get_info()['bytes_used']) + return broker + + @with_tempdir + def test_object_stats_root_container(self, tempdir): + broker = self._check_object_stats_when_sharded( + 'a', 'c', 'a', 'c', tempdir) + self.assertTrue(broker.is_root_container()) # sanity + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(120, broker.get_info()['object_count']) + self.assertEqual(1999, broker.get_info()['bytes_used']) + + @with_tempdir + def test_object_stats_shard_container(self, tempdir): + broker = self._check_object_stats_when_sharded( + '.shard_a', 'c-blah', 'a', 'c', tempdir) + self.assertFalse(broker.is_root_container()) # sanity + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(0, broker.get_info()['object_count']) + self.assertEqual(0, broker.get_info()['bytes_used']) + class TestCommonContainerBroker(test_db.TestExampleBroker): @@ -2127,6 +4446,15 @@ Mixin for running ContainerBroker against databases created with older schemas. """ + class OverrideCreateShardRangesTable(object): + def __init__(self, func): + self.func = func + + def __get__(self, obj, obj_type): + if inspect.stack()[1][3] == '_initialize': + return lambda *a, **kw: None + return self.func.__get__(obj, obj_type) + def setUp(self): self._imported_create_object_table = \ ContainerBroker.create_object_table @@ -2140,6 +4468,13 @@ ContainerBroker.create_policy_stat_table ContainerBroker.create_policy_stat_table = lambda *args: None + self._imported_create_shard_range_table = \ + ContainerBroker.create_shard_range_table + if 'shard_range' not in self.expected_db_tables: + ContainerBroker.create_shard_range_table = \ + self.OverrideCreateShardRangesTable( + ContainerBroker.create_shard_range_table) + @classmethod @contextmanager def old_broker(cls): @@ -2156,6 +4491,8 @@ self._imported_create_container_info_table ContainerBroker.create_object_table = \ self._imported_create_object_table + ContainerBroker.create_shard_range_table = \ + self._imported_create_shard_range_table ContainerBroker.create_policy_stat_table = \ self._imported_create_policy_stat_table @@ -2209,6 +4546,8 @@ Tests for ContainerBroker against databases created before the metadata column was added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat'} def setUp(self): super(TestContainerBrokerBeforeMetadata, self).setUp() @@ -2281,6 +4620,8 @@ Tests for ContainerBroker against databases created before the x_container_sync_point[12] columns were added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat'} def setUp(self): super(TestContainerBrokerBeforeXSync, self).setUp() @@ -2395,6 +4736,8 @@ Tests for ContainerBroker against databases created before the storage_policy_index column was added. """ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat'} def setUp(self): super(TestContainerBrokerBeforeSPI, self).setUp() @@ -2403,14 +4746,12 @@ broker = ContainerBroker(':memory:', account='a', container='c') broker.initialize(Timestamp('1').internal, 0) - exc = None - with broker.get() as conn: - try: - conn.execute('''SELECT storage_policy_index - FROM container_stat''') - except BaseException as err: - exc = err - self.assertTrue('no such column: storage_policy_index' in str(exc)) + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + conn.execute('''SELECT storage_policy_index + FROM container_stat''') + self.assertIn('no such column: storage_policy_index', + str(raised.exception)) def tearDown(self): super(TestContainerBrokerBeforeSPI, self).tearDown() @@ -2599,6 +4940,35 @@ self.assertEqual(info['bytes_used'], 456) +class TestContainerBrokerBeforeShardRanges(ContainerBrokerMigrationMixin, + TestContainerBroker): + """ + Tests for ContainerBroker against databases created + before the shard_ranges table was added. + """ + # *grumble grumble* This should include container_info/policy_stat :-/ + expected_db_tables = {'outgoing_sync', 'incoming_sync', 'object', + 'sqlite_sequence', 'container_stat'} + + def setUp(self): + super(TestContainerBrokerBeforeShardRanges, self).setUp() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + with self.assertRaises(sqlite3.DatabaseError) as raised, \ + broker.get() as conn: + conn.execute('''SELECT * + FROM shard_range''') + self.assertIn('no such table: shard_range', str(raised.exception)) + + def tearDown(self): + super(TestContainerBrokerBeforeShardRanges, self).tearDown() + broker = ContainerBroker(':memory:', account='a', container='c') + broker.initialize(Timestamp('1').internal, 0) + with broker.get() as conn: + conn.execute('''SELECT * + FROM shard_range''') + + class TestUpdateNewItemFromExisting(unittest.TestCase): # TODO: add test scenarios that have swift_bytes in content_type t0 = '1234567890.00000' diff -Nru swift-2.17.0/test/unit/container/test_replicator.py swift-2.18.0/test/unit/container/test_replicator.py --- swift-2.17.0/test/unit/container/test_replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/container/test_replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -23,16 +23,21 @@ import sqlite3 from swift.common import db_replicator +from swift.common.swob import HTTPServerError from swift.container import replicator, backend, server, sync_store from swift.container.reconciler import ( MISPLACED_OBJECTS_ACCOUNT, get_reconciler_container_name) -from swift.common.utils import Timestamp, encode_timestamps +from swift.common.utils import Timestamp, encode_timestamps, ShardRange, \ + get_db_files, make_db_file_path from swift.common.storage_policy import POLICIES from test.unit.common import test_db_replicator -from test.unit import patch_policies, make_timestamp_iter, mock_check_drive +from test.unit import patch_policies, make_timestamp_iter, mock_check_drive, \ + debug_logger, EMPTY_ETAG, FakeLogger from contextlib import contextmanager +from test.unit.common.test_db_replicator import attach_fake_replication_rpc + @patch_policies class TestReplicatorSync(test_db_replicator.TestReplicatorSync): @@ -42,6 +47,16 @@ replicator_daemon = replicator.ContainerReplicator replicator_rpc = replicator.ContainerReplicatorRpc + def assertShardRangesEqual(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + + def assertShardRangesNotEqual(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertNotEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + def test_report_up_to_date(self): broker = self._get_broker('a', 'c', node_index=0) broker.initialize(Timestamp(1).internal, int(POLICIES.default)) @@ -1148,6 +1163,1099 @@ self.assertEqual(1, mock_remove.call_count) self.assertEqual(broker_2.db_file, mock_remove.call_args[0][0].db_file) + def test_cleanup_post_replicate(self): + broker = self._get_broker('a', 'c', node_index=0) + put_timestamp = Timestamp.now() + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + orig_info = broker.get_replication_info() + daemon = replicator.ContainerReplicator({}, logger=self.logger) + + # db should not be here, replication ok, deleted + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # failed replication, not deleted + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, + [False, True, True]) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual(['Not deleting db %s (2/3 success)' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db has shard ranges, not deleted + broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges( + [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')]) + self.assertTrue(broker.sharding_required()) # sanity check + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual( + ['Not deleting db %s (requires sharding, state unsharded)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharding, not deleted + self._goto_sharding_state(broker, Timestamp.now()) + self.assertTrue(broker.sharding_required()) # sanity check + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual( + ['Not deleting db %s (requires sharding, state sharding)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharded, should not be here, failed replication, not deleted + self._goto_sharded_state(broker) + self.assertFalse(broker.sharding_required()) # sanity check + res = daemon.cleanup_post_replicate(broker, orig_info, + [True, False, True]) + self.assertTrue(res) + self.assertTrue(os.path.exists(broker.db_file)) + self.assertEqual(['Not deleting db %s (2/3 success)' % + broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + # db sharded, should not be here, new shard ranges (e.g. from reverse + # replication), deleted + broker.merge_shard_ranges( + [ShardRange('.shards_a/c', Timestamp.now(), '', 'm')]) + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + daemon.logger.clear() + + # db sharded, should not be here, replication ok, deleted + broker.initialize(put_timestamp.internal, POLICIES.default.idx) + self.assertTrue(os.path.exists(broker.db_file)) + orig_info = broker.get_replication_info() + res = daemon.cleanup_post_replicate(broker, orig_info, [True] * 3) + self.assertTrue(res) + self.assertFalse(os.path.exists(broker.db_file)) + self.assertEqual(['Successfully deleted db %s' % broker.db_file], + daemon.logger.get_lines_for_level('debug')) + daemon.logger.clear() + + def test_sync_shard_ranges(self): + put_timestamp = Timestamp.now().internal + # create "local" broker + broker = self._get_broker('a', 'c', node_index=0) + broker.initialize(put_timestamp, POLICIES.default.idx) + # create "remote" broker + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_broker.initialize(put_timestamp, POLICIES.default.idx) + + def check_replicate(expected_shard_ranges, from_broker, to_broker): + daemon = replicator.ContainerReplicator({}) + part, node = self._get_broker_part_node(to_broker) + info = broker.get_replication_info() + success = daemon._repl_to_node(node, from_broker, part, info) + self.assertTrue(success) + self.assertEqual( + expected_shard_ranges, + to_broker.get_all_shard_range_data() + ) + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['diff']) + local_info = self._get_broker( + 'a', 'c', node_index=0).get_info() + remote_info = self._get_broker( + 'a', 'c', node_index=1).get_info() + for k, v in local_info.items(): + if k == 'id': + continue + self.assertEqual(remote_info[k], v, + "mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + + bounds = (('', 'g'), ('g', 'r'), ('r', '')) + shard_ranges = [ + ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower, + upper, i + 1, 10 * (i + 1)) + for i, (lower, upper) in enumerate(bounds) + ] + # add first two shard_ranges to both brokers + for shard_range in shard_ranges[:2]: + for db in (broker, remote_broker): + db.merge_shard_ranges(shard_range) + # now add a shard range to the "local" broker only + own_sr = broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges[2]) + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # update one shard range + shard_ranges[1].update_meta(99, 0) + broker.merge_shard_ranges(shard_ranges[1]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # delete one shard range + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = Timestamp.now() + broker.merge_shard_ranges(shard_ranges[0]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # put a shard range again + shard_ranges[2].timestamp = Timestamp.now() + shard_ranges[2].object_count = 0 + broker.merge_shard_ranges(shard_ranges[2]) + # sanity check + broker_ranges = broker.get_all_shard_range_data() + self.assertShardRangesEqual(shard_ranges + [own_sr], broker_ranges) + check_replicate(broker_ranges, broker, remote_broker) + + # update same shard range on local and remote, remote later + shard_ranges[-1].meta_timestamp = Timestamp.now() + shard_ranges[-1].bytes_used += 1000 + broker.merge_shard_ranges(shard_ranges[-1]) + remote_shard_ranges = remote_broker.get_shard_ranges( + include_deleted=True) + remote_shard_ranges[-1].meta_timestamp = Timestamp.now() + remote_shard_ranges[-1].bytes_used += 2000 + remote_broker.merge_shard_ranges(remote_shard_ranges[-1]) + # sanity check + remote_broker_ranges = remote_broker.get_all_shard_range_data() + self.assertShardRangesEqual(remote_shard_ranges + [own_sr], + remote_broker_ranges) + self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges) + check_replicate(remote_broker_ranges, broker, remote_broker) + + # undelete shard range *on the remote* + deleted_ranges = [sr for sr in remote_shard_ranges if sr.deleted] + self.assertEqual([shard_ranges[0]], deleted_ranges) + deleted_ranges[0].deleted = 0 + deleted_ranges[0].timestamp = Timestamp.now() + remote_broker.merge_shard_ranges(deleted_ranges[0]) + # sanity check + remote_broker_ranges = remote_broker.get_all_shard_range_data() + self.assertShardRangesEqual(remote_shard_ranges + [own_sr], + remote_broker_ranges) + self.assertShardRangesNotEqual(shard_ranges, remote_shard_ranges) + check_replicate(remote_broker_ranges, broker, remote_broker) + + # reverse replication direction and expect syncs to propagate + check_replicate(remote_broker_ranges, remote_broker, broker) + + def test_sync_shard_ranges_error(self): + # verify that replication is not considered successful if + # merge_shard_ranges fails + put_time = Timestamp.now().internal + broker = self._get_broker('a', 'c', node_index=0) + broker.initialize(put_time, POLICIES.default.idx) + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_broker.initialize(put_time, POLICIES.default.idx) + # put an object into local broker + broker.put_object('obj', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG) + # get an own shard range into local broker + broker.enable_sharding(Timestamp.now()) + self.assertFalse(broker.sharding_initiated()) + + replicate_hook = mock.MagicMock() + fake_repl_connection = attach_fake_replication_rpc( + self.rpc, errors={'merge_shard_ranges': [HTTPServerError()]}, + replicate_hook=replicate_hook) + db_replicator.ReplConnection = fake_repl_connection + part, node = self._get_broker_part_node(remote_broker) + info = broker.get_replication_info() + daemon = replicator.ContainerReplicator({}) + daemon.logger = FakeLogger() + success = daemon._repl_to_node(node, broker, part, info) + self.assertFalse(success) + # broker only has its own shard range so expect objects to be sync'd + self.assertEqual( + ['sync', 'get_shard_ranges', 'merge_shard_ranges', 'merge_items', + 'merge_syncs'], + [call[0][0] for call in replicate_hook.call_args_list]) + error_lines = daemon.logger.get_lines_for_level('error') + self.assertIn('Bad response 500', error_lines[0]) + self.assertFalse(error_lines[1:]) + self.assertEqual(1, daemon.stats['diff']) + self.assertEqual(1, daemon.logger.get_increment_counts()['diffs']) + + def test_sync_shard_ranges_none_to_sync(self): + # verify that merge_shard_ranges is not sent if there are no shard + # ranges to sync + put_time = Timestamp.now().internal + broker = self._get_broker('a', 'c', node_index=0) + broker.initialize(put_time, POLICIES.default.idx) + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_broker.initialize(put_time, POLICIES.default.idx) + # put an object into local broker + broker.put_object('obj', Timestamp.now().internal, 0, 'text/plain', + EMPTY_ETAG) + + replicate_hook = mock.MagicMock() + fake_repl_connection = attach_fake_replication_rpc( + self.rpc, replicate_hook=replicate_hook) + db_replicator.ReplConnection = fake_repl_connection + part, node = self._get_broker_part_node(remote_broker) + info = broker.get_replication_info() + daemon = replicator.ContainerReplicator({}) + success = daemon._repl_to_node(node, broker, part, info) + self.assertTrue(success) + self.assertEqual( + ['sync', 'get_shard_ranges', 'merge_items', 'merge_syncs'], + [call[0][0] for call in replicate_hook.call_args_list]) + + def test_sync_shard_ranges_with_rsync(self): + broker = self._get_broker('a', 'c', node_index=0) + put_timestamp = time.time() + broker.initialize(put_timestamp, POLICIES.default.idx) + + bounds = (('', 'g'), ('g', 'r'), ('r', '')) + shard_ranges = [ + ShardRange('.shards_a/sr-%s' % upper, Timestamp.now(), lower, + upper, i + 1, 10 * (i + 1)) + for i, (lower, upper) in enumerate(bounds) + ] + # add first shard range + own_sr = broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges[:1]) + + # "replicate" + part, node = self._get_broker_part_node(broker) + daemon = self._run_once(node) + self.assertEqual(2, daemon.stats['rsync']) + + # complete rsync to all other nodes + def check_replicate(expected_ranges): + for i in range(1, 3): + remote_broker = self._get_broker('a', 'c', node_index=i) + self.assertTrue(os.path.exists(remote_broker.db_file)) + self.assertShardRangesEqual( + expected_ranges, + remote_broker.get_shard_ranges(include_deleted=True, + include_own=True) + ) + remote_info = remote_broker.get_info() + local_info = self._get_broker( + 'a', 'c', node_index=0).get_info() + for k, v in local_info.items(): + if k == 'id': + continue + if k == 'hash': + self.assertEqual(remote_info[k], '0' * 32) + continue + if k == 'object_count': + self.assertEqual(remote_info[k], 0) + continue + self.assertEqual(remote_info[k], v, + "mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + + check_replicate([shard_ranges[0], own_sr]) + + # delete and add some more shard ranges + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = Timestamp.now() + for shard_range in shard_ranges: + broker.merge_shard_ranges(shard_range) + daemon = self._run_once(node) + self.assertEqual(2, daemon.stats['deferred']) + check_replicate(shard_ranges + [own_sr]) + + def check_replicate(self, from_broker, remote_node_index, repl_conf=None, + expect_success=True, errors=None): + repl_conf = repl_conf or {} + repl_calls = [] + rsync_calls = [] + + def repl_hook(op, *sync_args): + repl_calls.append((op, sync_args)) + + fake_repl_connection = attach_fake_replication_rpc( + self.rpc, replicate_hook=repl_hook, errors=errors) + db_replicator.ReplConnection = fake_repl_connection + daemon = replicator.ContainerReplicator( + repl_conf, logger=debug_logger()) + self._install_fake_rsync_file(daemon, rsync_calls) + part, nodes = self._ring.get_nodes(from_broker.account, + from_broker.container) + + def find_node(node_index): + for node in nodes: + if node['index'] == node_index: + return node + else: + self.fail('Failed to find node index %s' % remote_node_index) + + remote_node = find_node(remote_node_index) + info = from_broker.get_replication_info() + success = daemon._repl_to_node(remote_node, from_broker, part, info) + self.assertEqual(expect_success, success) + return daemon, repl_calls, rsync_calls + + def assert_synced_shard_ranges(self, expected, synced_items): + expected.sort(key=lambda sr: (sr.lower, sr.upper)) + for item in synced_items: + item.pop('record_type', None) + self.assertEqual([dict(ex) for ex in expected], synced_items) + + def assert_info_synced(self, local, remote_node_index, mismatches=None): + mismatches = mismatches or [] + mismatches.append('id') + remote = self._get_broker(local.account, local.container, + node_index=remote_node_index) + local_info = local.get_info() + remote_info = remote.get_info() + errors = [] + for k, v in local_info.items(): + if remote_info.get(k) == v: + if k in mismatches: + errors.append( + "unexpected match remote %s %r == %r" % ( + k, remote_info[k], v)) + continue + else: + if k not in mismatches: + errors.append( + "unexpected mismatch remote %s %r != %r" % ( + k, remote_info[k], v)) + if errors: + self.fail('Found sync errors:\n' + '\n'.join(errors)) + + def assert_shard_ranges_synced(self, local_broker, remote_broker): + self.assertShardRangesEqual( + local_broker.get_shard_ranges(include_deleted=True, + include_own=True), + remote_broker.get_shard_ranges(include_deleted=True, + include_own=True) + ) + + def _setup_replication_test(self, node_index): + ts_iter = make_timestamp_iter() + policy_idx = POLICIES.default.idx + put_timestamp = Timestamp.now().internal + # create "local" broker + broker = self._get_broker('a', 'c', node_index=node_index) + broker.initialize(put_timestamp, policy_idx) + + objs = [{'name': 'blah%03d' % i, 'created_at': next(ts_iter).internal, + 'size': i, 'content_type': 'text/plain', 'etag': 'etag%s' % i, + 'deleted': 0, 'storage_policy_index': policy_idx} + for i in range(20)] + bounds = (('', 'a'), ('a', 'b'), ('b', 'c'), ('c', '')) + shard_ranges = [ + ShardRange( + '.sharded_a/sr-%s' % upper, Timestamp.now(), lower, upper) + for i, (lower, upper) in enumerate(bounds) + ] + return {'broker': broker, + 'objects': objs, + 'shard_ranges': shard_ranges} + + def _merge_object(self, broker, objects, index, **kwargs): + if not isinstance(index, slice): + index = slice(index, index + 1) + objs = [dict(obj) for obj in objects[index]] + broker.merge_items(objs) + + def _merge_shard_range(self, broker, shard_ranges, index, **kwargs): + broker.merge_shard_ranges(shard_ranges[index:index + 1]) + + def _goto_sharding_state(self, broker, epoch): + broker.enable_sharding(epoch) + self.assertTrue(broker.set_sharding_state()) + self.assertEqual(backend.SHARDING, broker.get_db_state()) + + def _goto_sharded_state(self, broker): + self.assertTrue(broker.set_sharded_state()) + self.assertEqual(backend.SHARDED, broker.get_db_state()) + + def _assert_local_sharded_in_sync(self, local_broker, local_id): + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['diff']) + self.assertFalse(rsync_calls) + # new db sync + self.assertEqual(local_id, repl_calls[0][1][2]) + # ...but we still get a merge_shard_ranges for shard ranges + self.assert_synced_shard_ranges( + local_broker.get_shard_ranges(include_own=True), + repl_calls[2][1][0]) + self.assertEqual(local_id, repl_calls[2][1][1]) + + def _check_only_shard_ranges_replicated(self, local_broker, + remote_node_index, + repl_conf, + expected_shard_ranges, + expect_success=True): + # expected_shard_ranges is expected final list of sync'd ranges + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, remote_node_index, repl_conf, + expect_success=expect_success) + + # we always expect only shard ranges to end in abort + self.assertEqual(1, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['diff']) + self.assertEqual(0, daemon.stats['rsync']) + self.assertEqual(['sync', 'get_shard_ranges', 'merge_shard_ranges'], + [call[0] for call in repl_calls]) + self.assertFalse(rsync_calls) + # sync + local_id = local_broker.get_info()['id'] + self.assertEqual(local_id, repl_calls[0][1][2]) + # get_shard_ranges + self.assertEqual((), repl_calls[1][1]) + # merge_shard_ranges for sending local shard ranges + self.assertShardRangesEqual(expected_shard_ranges, repl_calls[2][1][0]) + self.assertEqual(local_id, repl_calls[2][1][1]) + remote_broker = self._get_broker( + local_broker.account, local_broker.container, node_index=1) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assert_shard_ranges_synced(remote_broker, local_broker) + + def test_replication_local_unsharded_remote_missing(self): + context = self._setup_replication_test(0) + local_broker = context['broker'] + local_id = local_broker.get_info()['id'] + objs = context['objects'] + self._merge_object(index=0, **context) + + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + + self.assert_info_synced(local_broker, 1) + self.assertEqual(1, daemon.stats['rsync']) + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(local_id, repl_calls[1][1][0]) + self.assertEqual(os.path.basename(local_broker.db_file), + repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assert_shard_ranges_synced(local_broker, remote_broker) + self.assertTrue(os.path.exists(remote_broker._db_file)) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assertEqual(objs[:1], remote_broker.get_objects()) + + def _check_replication_local_unsharded_remote_sharded(self, repl_conf): + context = self._setup_replication_test(0) + local_broker = context['broker'] + local_id = local_broker.get_info()['id'] + self._merge_object(index=slice(0, 6), **context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=4, **remote_context) + remote_broker = remote_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(remote_broker, epoch=epoch) + remote_context['shard_ranges'][0].object_count = 101 + remote_context['shard_ranges'][0].bytes_used = 1010 + remote_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=5, **remote_context) + self._goto_sharded_state(remote_broker) + self.assertEqual(backend.SHARDED, remote_broker.get_db_state()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + remote_broker = self._get_broker( + local_broker.account, local_broker.container, node_index=1) + self.assertEqual(backend.SHARDED, remote_broker.get_db_state()) + self.assertFalse(os.path.exists(remote_broker._db_file)) + self.assertNotEqual(local_id, remote_broker.get_info()['id']) + self.assertEqual(remote_context['objects'][5:6], + remote_broker.get_objects()) + + # Now that we have shard ranges, we're never considered in-sync :-/ + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_unsharded_remote_sharded(self): + self._check_replication_local_unsharded_remote_sharded({}) + + def test_replication_local_unsharded_remote_sharded_large_diff(self): + self._check_replication_local_unsharded_remote_sharded({'per_diff': 1}) + + def _check_replication_local_sharding_remote_missing(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=0, **local_context) + self._merge_object(index=1, **local_context) + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(2, 8), **local_context) + objs = local_context['objects'] + + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, repl_conf=repl_conf) + + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['rsync']) + self.assertEqual(0, daemon.stats['deferred']) + self.assertEqual(0, daemon.stats['diff']) + + # fresh db is sync'd first... + fresh_id = local_broker.get_info()['id'] + self.assertEqual(fresh_id, repl_calls[0][1][2]) + self.assertEqual(fresh_id, repl_calls[1][1][0]) + # retired db is not sync'd at all + old_broker = self.backend( + local_broker._db_file, account=local_broker.account, + container=local_broker.container, force_db_file=True) + old_id = old_broker.get_info()['id'] + bad_calls = [] + for call in repl_calls: + if old_id in call[1]: + bad_calls.append( + 'old db id %r in %r call args %r' % ( + old_id, call[0], call[1])) + if bad_calls: + self.fail('Found some bad calls:\n' + '\n'.join(bad_calls)) + # complete_rsync + self.assertEqual(os.path.basename(local_broker.db_file), + repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(fresh_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + # TODO: make these stats better; in sharding state local broker pulls + # stats for 2 objects from old db, whereas remote thinks it's sharded + # and has an empty shard range table + self.assert_info_synced(local_broker, 1, mismatches=[ + 'object_count', 'bytes_used', 'db_state']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_id = remote_broker.get_info()['id'] + self.assertNotEqual(old_id, remote_id) + self.assertNotEqual(fresh_id, remote_id) + self.assertEqual( + [remote_broker.db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(os.path.basename(remote_broker.db_file), + os.path.basename(local_broker.db_file)) + self.assertEqual(epoch, remote_broker.db_epoch) + # remote db has only the misplaced objects + self.assertEqual(objs[2:8], remote_broker.get_objects()) + self.assert_shard_ranges_synced(local_broker, remote_broker) + + # replicate again, check asserts abort + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + # sanity + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual( + [remote_broker.db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(os.path.basename(remote_broker.db_file), + os.path.basename(local_broker.db_file)) + self.assertEqual(objs[2:8], remote_broker.get_objects()) + self.assertEqual(epoch, remote_broker.db_epoch) + + def test_replication_local_sharding_remote_missing(self): + self._check_replication_local_sharding_remote_missing({}) + + def test_replication_local_sharding_remote_missing_large_diff(self): + # the local shard db has large diff with respect to the old db + self._check_replication_local_sharding_remote_missing({'per_diff': 1}) + + def _check_replication_local_sharding_remote_unsharded(self, repl_conf): + local_context = self._setup_replication_test(0) + self._merge_object(index=slice(0, 3), **local_context) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(3, 11), **local_context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=11, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual( + [remote_broker._db_file], get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][11:12], + remote_broker.get_objects()) + + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_sharding_remote_unsharded(self): + self._check_replication_local_sharding_remote_unsharded({}) + + def test_replication_local_sharding_remote_unsharded_large_diff(self): + self._check_replication_local_sharding_remote_unsharded( + {'per_diff': 1}) + + def _check_replication_local_sharding_remote_sharding(self, repl_conf): + local_context = self._setup_replication_test(0) + self._merge_object(index=slice(0, 5), **local_context) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(5, 10), **local_context) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=12, **remote_context) + # take snapshot of info now before transition to sharding... + orig_remote_info = remote_context['broker'].get_info() + remote_broker = remote_context['broker'] + self._goto_sharding_state(remote_broker, epoch) + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=13, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True)) + + # in sharding state brokers only reports object stats from old db, and + # they are different + self.assert_info_synced( + local_broker, 1, mismatches=['object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + # no local objects have been sync'd to remote shard db + self.assertEqual(remote_context['objects'][13:14], + remote_broker.get_objects()) + # remote *old db* is unchanged + remote_old_broker = self.backend( + remote_broker._db_file, account=remote_broker.account, + container=remote_broker.container, force_db_file=True) + self.assertEqual(remote_context['objects'][12:13], + remote_old_broker.get_objects()) + self.assertFalse(remote_old_broker.get_shard_ranges()) + remote_old_info = remote_old_broker.get_info() + orig_remote_info.pop('db_state') + remote_old_info.pop('db_state') + self.assertEqual(orig_remote_info, remote_old_info) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True)) + + def test_replication_local_sharding_remote_sharding(self): + self._check_replication_local_sharding_remote_sharding({}) + + def test_replication_local_sharding_remote_sharding_large_diff(self): + self._check_replication_local_sharding_remote_sharding({'per_diff': 1}) + + def test_replication_local_sharded_remote_missing(self): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 3), **local_context) + self._goto_sharded_state(local_broker) + objs = local_context['objects'] + + daemon, repl_calls, rsync_calls = self.check_replicate(local_broker, 1) + + self.assertEqual(['sync', 'complete_rsync'], + [call[0] for call in repl_calls]) + self.assertEqual(1, daemon.stats['rsync']) + + # sync + local_id = local_broker.get_info()['id'] + self.assertEqual(local_id, repl_calls[0][1][2]) + # complete_rsync + self.assertEqual(local_id, repl_calls[1][1][0]) + self.assertEqual( + os.path.basename(local_broker.db_file), repl_calls[1][1][1]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_id, os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + self.assert_info_synced(local_broker, 1) + + remote_broker = self._get_broker('a', 'c', node_index=1) + remote_id = remote_broker.get_info()['id'] + self.assertNotEqual(local_id, remote_id) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + self.assertEqual(objs[:3], remote_broker.get_objects()) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + # sanity check - in sync + self._assert_local_sharded_in_sync(local_broker, local_id) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + # the remote broker object_count comes from replicated shard range... + self.assertEqual(99, remote_broker.get_info()['object_count']) + # these are replicated misplaced objects... + self.assertEqual(objs[:3], remote_broker.get_objects()) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + def _check_replication_local_sharded_remote_unsharded(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 3), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=4, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True), + expect_success=True) + + # sharded broker takes object count from shard range whereas remote + # unsharded broker takes it from object table + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual([remote_broker._db_file], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][4:5], + remote_broker.get_objects()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + local_broker.get_shard_ranges(include_own=True), + expect_success=True) + + remote_broker = self._get_broker('a', 'c', node_index=1) + self.assertEqual([remote_broker._db_file], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][4:5], + remote_broker.get_objects()) + + def test_replication_local_sharded_remote_unsharded(self): + self._check_replication_local_sharded_remote_unsharded({}) + + def test_replication_local_sharded_remote_unsharded_large_diff(self): + self._check_replication_local_sharded_remote_unsharded({'per_diff': 1}) + + def _check_replication_local_sharded_remote_sharding(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch=epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].bytes_used = 999 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 5), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=6, **remote_context) + remote_broker = remote_context['broker'] + remote_info_orig = remote_broker.get_info() + self._goto_sharding_state(remote_broker, epoch=epoch) + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=7, **remote_context) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + # sharded broker takes object count from shard range whereas remote + # sharding broker takes it from object table + self.assert_info_synced( + local_broker, 1, + mismatches=['db_state', 'object_count', 'bytes_used', + 'status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([remote_broker._db_file, shard_db], + get_db_files(remote_broker.db_file)) + # remote fresh db objects are unchanged + self.assertEqual(remote_context['objects'][7:8], + remote_broker.get_objects()) + # remote old hash.db objects are unchanged + remote_old_broker = self.backend( + remote_broker._db_file, account=remote_broker.account, + container=remote_broker.container, force_db_file=True) + self.assertEqual( + remote_context['objects'][6:7], + remote_old_broker.get_objects()) + remote_info = remote_old_broker.get_info() + remote_info_orig.pop('db_state') + remote_info.pop('db_state') + self.assertEqual(remote_info_orig, remote_info) + self.assertEqual(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + def test_replication_local_sharded_remote_sharding(self): + self._check_replication_local_sharded_remote_sharding({}) + + def test_replication_local_sharded_remote_sharding_large_diff(self): + self._check_replication_local_sharded_remote_sharding({'per_diff': 1}) + + def _check_replication_local_sharded_remote_sharded(self, repl_conf): + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + epoch = Timestamp.now() + self._goto_sharding_state(local_broker, epoch) + local_context['shard_ranges'][0].object_count = 99 + local_context['shard_ranges'][0].bytes_used = 999 + local_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **local_context) + self._merge_object(index=slice(0, 6), **local_context) + self._goto_sharded_state(local_broker) + + remote_context = self._setup_replication_test(1) + self._merge_object(index=6, **remote_context) + remote_broker = remote_context['broker'] + self._goto_sharding_state(remote_broker, epoch) + remote_context['shard_ranges'][0].object_count = 101 + remote_context['shard_ranges'][0].bytes_used = 1010 + remote_context['shard_ranges'][0].state = ShardRange.ACTIVE + self._merge_shard_range(index=0, **remote_context) + self._merge_object(index=7, **remote_context) + self._goto_sharded_state(remote_broker) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + self.assert_info_synced( + local_broker, 1, + mismatches=['status_changed_at', 'hash']) + + remote_broker = self._get_broker('a', 'c', node_index=1) + shard_db = make_db_file_path(remote_broker._db_file, epoch) + self.assertEqual([shard_db], + get_db_files(remote_broker.db_file)) + self.assertEqual(remote_context['objects'][7:8], + remote_broker.get_objects()) + # remote shard range was newer than local so object count is not + # updated by sync'd shard range + self.assertEqual( + 101, remote_broker.get_shard_ranges()[0].object_count) + + self._check_only_shard_ranges_replicated( + local_broker, 1, repl_conf, + # remote has newer timestamp for shard range + remote_broker.get_shard_ranges(include_own=True), + expect_success=True) + + def test_replication_local_sharded_remote_sharded(self): + self._check_replication_local_sharded_remote_sharded({}) + + def test_replication_local_sharded_remote_sharded_large_diff(self): + self._check_replication_local_sharded_remote_sharded({'per_diff': 1}) + + def test_replication_rsync_then_merge_aborts_before_merge_sharding(self): + # verify that rsync_then_merge aborts if remote starts sharding during + # the rsync + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge + + def mock_rsync_then_merge(*args): + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + return orig_func(*args) + + with mock.patch( + 'swift.container.replicator.ContainerReplicatorRpc.' + 'rsync_then_merge', + mock_rsync_then_merge): + with mock.patch( + 'swift.container.backend.ContainerBroker.' + 'get_items_since') as mock_get_items_since: + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + mock_get_items_since.assert_not_called() + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_before_merge_sharded(self): + # verify that rsync_then_merge aborts if remote completes sharding + # during the rsync + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_func = replicator.ContainerReplicatorRpc.rsync_then_merge + + def mock_rsync_then_merge(*args): + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + self._goto_sharded_state(remote_broker) + return orig_func(*args) + + with mock.patch( + 'swift.container.replicator.ContainerReplicatorRpc.' + 'rsync_then_merge', + mock_rsync_then_merge): + with mock.patch( + 'swift.container.backend.ContainerBroker.' + 'get_items_since') as mock_get_items_since: + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + mock_get_items_since.assert_not_called() + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_after_merge_sharding(self): + # verify that rsync_then_merge aborts if remote starts sharding during + # the merge + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_get_items_since = backend.ContainerBroker.get_items_since + calls = [] + + def fake_get_items_since(broker, *args): + # remote starts sharding while rpc call is merging + if not calls: + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + calls.append(args) + return orig_get_items_since(broker, *args) + + with mock.patch( + 'swift.container.backend.ContainerBroker.get_items_since', + fake_get_items_since): + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + + def test_replication_rsync_then_merge_aborts_after_merge_sharded(self): + # verify that rsync_then_merge aborts if remote completes sharding + # during the merge + local_context = self._setup_replication_test(0) + local_broker = local_context['broker'] + self._merge_object(index=slice(0, 3), **local_context) + remote_context = self._setup_replication_test(1) + remote_broker = remote_context['broker'] + remote_broker.logger = debug_logger() + self._merge_object(index=5, **remote_context) + + orig_get_items_since = backend.ContainerBroker.get_items_since + calls = [] + + def fake_get_items_since(broker, *args): + # remote starts sharding while rpc call is merging + result = orig_get_items_since(broker, *args) + if calls: + remote_broker.merge_shard_ranges( + ShardRange('.shards_a/cc', Timestamp.now())) + self._goto_sharding_state(remote_broker, Timestamp.now()) + self._goto_sharded_state(remote_broker) + calls.append(args) + return result + + with mock.patch( + 'swift.container.backend.ContainerBroker.get_items_since', + fake_get_items_since): + daemon, repl_calls, rsync_calls = self.check_replicate( + local_broker, 1, expect_success=False, + repl_conf={'per_diff': 1}) + + self.assertEqual(['sync', 'get_shard_ranges', 'rsync_then_merge'], + [call[0] for call in repl_calls]) + self.assertEqual(local_broker.db_file, rsync_calls[0][0]) + self.assertEqual(local_broker.get_info()['id'], + os.path.basename(rsync_calls[0][1])) + self.assertFalse(rsync_calls[1:]) + if __name__ == '__main__': unittest.main() diff -Nru swift-2.17.0/test/unit/container/test_server.py swift-2.18.0/test/unit/container/test_server.py --- swift-2.17.0/test/unit/container/test_server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/container/test_server.py 2018-05-30 10:17:02.000000000 +0000 @@ -22,6 +22,7 @@ from contextlib import contextmanager from shutil import rmtree from tempfile import mkdtemp +from test.unit import make_timestamp_iter, mock_timestamp_now from time import gmtime from xml.dom import minidom import time @@ -40,12 +41,13 @@ from swift.container import server as container_server from swift.common import constraints from swift.common.utils import (Timestamp, mkdirs, public, replication, - storage_directory, lock_parent_directory) + storage_directory, lock_parent_directory, + ShardRange) from test.unit import fake_http_connect, debug_logger, mock_check_drive from swift.common.storage_policy import (POLICIES, StoragePolicy) from swift.common.request_helpers import get_sys_meta_prefix -from test import listen_zero +from test import listen_zero, annotate_failure from test.unit import patch_policies @@ -86,6 +88,16 @@ """ pass + def _put_shard_range(self, shard_range): + put_timestamp = shard_range.timestamp.internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp} + body = json.dumps([dict(shard_range)]) + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertIn(resp.status_int, (201, 202)) + def _check_put_container_storage_policy(self, req, policy_index): resp = req.get_response(self.controller) self.assertEqual(201, resp.status_int) @@ -95,6 +107,11 @@ self.assertEqual(str(policy_index), resp.headers['X-Backend-Storage-Policy-Index']) + def _assert_shard_ranges_equal(self, x, y): + # ShardRange.__eq__ only compares lower and upper; here we generate + # dict representations to compare all attributes + self.assertEqual([dict(sr) for sr in x], [dict(sr) for sr in y]) + def test_creation(self): # later config should be extended to assert more config options replicator = container_server.ContainerController( @@ -424,7 +441,7 @@ elif state[0] == 'race': # Save the original db_file attribute value self._saved_db_file = self.db_file - self.db_file += '.doesnotexist' + self._db_file += '.doesnotexist' def initialize(self, *args, **kwargs): if state[0] == 'initial': @@ -433,7 +450,7 @@ elif state[0] == 'race': # Restore the original db_file attribute to get the race # behavior - self.db_file = self._saved_db_file + self._db_file = self._saved_db_file return super(InterceptedCoBr, self).initialize(*args, **kwargs) with mock.patch("swift.container.server.ContainerBroker", @@ -1372,22 +1389,101 @@ self.assertEqual(resp.status_int, 500) def test_DELETE(self): + ts_iter = make_timestamp_iter() req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'PUT'}, headers={'X-Timestamp': '1'}) + environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': next(ts_iter).internal}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 201) + + # PUT an *empty* shard range + sr = ShardRange('.shards_a/c', next(ts_iter), 'l', 'u', 0, 0, + state=ShardRange.ACTIVE) req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'DELETE'}, headers={'X-Timestamp': '2'}) + environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard'}, + body=json.dumps([dict(sr)])) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 202) + + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'DELETE'}, + headers={'X-Timestamp': next(ts_iter).internal}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 204) + + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + + # the override-deleted header is ignored for object records req = Request.blank( '/sda1/p/a/c', - environ={'REQUEST_METHOD': 'GET'}, headers={'X-Timestamp': '3'}) + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 404) + # but override-deleted header makes shard ranges available after DELETE + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual([dict(sr, last_modified=sr.timestamp.isoformat)], + json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # ... unless the override header equates to False + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'no'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + self.assertNotIn('X-Backend-Record-Type', resp.headers) + + # ...or the db file is unlinked + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self.assertTrue(os.path.exists(broker.db_file)) + os.unlink(broker.db_file) + self.assertFalse(os.path.exists(broker.db_file)) + req = Request.blank( + '/sda1/p/a/c', + environ={'REQUEST_METHOD': 'GET'}, + headers={'X-Timestamp': next(ts_iter).internal, + 'X-Backend-Record-Type': 'shard', + 'X-Backend-Override-Deleted': 'true'}, + params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 404) + self.assertNotIn('X-Backend-Record-Type', resp.headers) + def test_DELETE_PUT_recreate(self): path = '/sda1/p/a/c' req = Request.blank(path, method='PUT', @@ -1460,7 +1556,7 @@ self.assertEqual(True, db.is_deleted()) # now save a copy of this db (and remove it from the "current node") db = self.controller._get_container_broker('sda1', 'p', 'a', 'c') - db_path = db.db_file + db_path = db._db_file other_path = os.path.join(self.testdir, 'othernode.db') os.rename(db_path, other_path) # that should make it missing on this node @@ -1474,6 +1570,8 @@ def mock_exists(db_path): rv = _real_exists(db_path) + if db_path != db._db_file: + return rv if not mock_called: # be as careful as we might hope backend replication can be... with lock_parent_directory(db_path, timeout=1): @@ -2040,6 +2138,1140 @@ resp = req.get_response(self.controller) self.assertEqual(resp.status_int, 412) + def test_PUT_shard_range_autocreates_shard_container(self): + ts_iter = make_timestamp_iter() + shard_range = ShardRange('.shards_a/shard_c', next(ts_iter)) + put_timestamp = next(ts_iter).internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + + # PUT shard range to non-existent container with non-autocreate prefix + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # PUT shard range to non-existent container with autocreate prefix, + # missing storage policy + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(400, resp.status_int) + self.assertIn('X-Backend-Storage-Policy-Index header is required', + resp.body) + + # PUT shard range to non-existent container with autocreate prefix + headers['X-Timestamp'] = next(ts_iter).internal + policy_index = random.choice(POLICIES).idx + headers['X-Backend-Storage-Policy-Index'] = str(policy_index) + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + # repeat PUT of shard range to autocreated container - 204 response + headers['X-Timestamp'] = next(ts_iter).internal + headers.pop('X-Backend-Storage-Policy-Index') # no longer required + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + # regular PUT to autocreated container - 204 response + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/.shards_a/shard_c', method='PUT', + headers={'X-Timestamp': next(ts_iter).internal}, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + def test_PUT_shard_range_to_deleted_container(self): + ts_iter = make_timestamp_iter() + put_time = next(ts_iter).internal + # create a container, get it to sharded state and then delete it + req = Request.blank('/sda1/p/a/c', method='PUT', + headers={'X-Timestamp': put_time}) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + + delete_time = next(ts_iter).internal + req = Request.blank('/sda1/p/a/c', method='DELETE', + headers={'X-Timestamp': delete_time}) + resp = req.get_response(self.controller) + self.assertEqual(204, resp.status_int) + self.assertTrue(broker.is_deleted()) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # shard range PUT is accepted but container remains deleted + shard_range = ShardRange('.shards_a/shard_c', next(ts_iter), + state=ShardRange.ACTIVE) + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': next(ts_iter).internal, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertTrue(broker.get_info_is_deleted()[1]) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # unless shard range has non-zero stats, then container is revived + shard_range.update_meta(99, 1234, meta_timestamp=next(ts_iter)) + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=json.dumps([dict(shard_range)])) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertFalse(broker.get_info_is_deleted()[1]) + self.assertEqual(delete_time, broker.get_info()['delete_timestamp']) + self.assertEqual(put_time, broker.get_info()['put_timestamp']) + req = Request.blank('/sda1/p/a/c', method='GET') + resp = req.get_response(self.controller) + self.assertEqual(204, resp.status_int) + self.assertEqual('99', resp.headers['X-Container-Object-Count']) + + def test_PUT_shard_range_json_in_body(self): + ts_iter = make_timestamp_iter() + oldest_ts = next(ts_iter) # used for stale shard range PUT later + shard_bounds = [('', 'ham', ShardRange.ACTIVE), + ('ham', 'salami', ShardRange.ACTIVE), + ('salami', '', ShardRange.CREATED)] + shard_ranges = [ + ShardRange('.shards_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + + put_timestamp = next(ts_iter).internal + headers = {'X-Backend-Record-Type': 'shard', + 'X-Timestamp': put_timestamp, + 'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + body = json.dumps([dict(sr) for sr in shard_ranges[:2]]) + + # PUT some shard ranges to non-existent container + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertEqual(404, resp.status_int) + + # create the container with a regular PUT + req = Request.blank( + '/sda1/p/a/c', method='PUT', + headers={'X-Timestamp': put_timestamp}, body=body) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + # now we can PUT shard ranges + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers, + body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + + # check broker + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + # sysmeta and user meta is updated + exp_meta = {'X-Container-Sysmeta-Test': 'set', + 'X-Container-Meta-Test': 'persisted'} + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + self._assert_shard_ranges_equal(shard_ranges[:2], + broker.get_shard_ranges()) + + # empty json dict + body = json.dumps({}) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges[:2], + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + older_ts = next(ts_iter) # used for stale shard range PUT later + # updated and new shard ranges + shard_ranges[1].bytes_used += 100 + shard_ranges[1].meta_timestamp = next(ts_iter) + body = json.dumps([dict(sr) for sr in shard_ranges[1:]]) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + # stale shard range + stale_shard_range = shard_ranges[1].copy() + stale_shard_range.bytes_used = 0 + stale_shard_range.object_count = 0 + stale_shard_range.meta_timestamp = older_ts + stale_shard_range.state = ShardRange.CREATED + stale_shard_range.state_timestamp = oldest_ts + body = json.dumps([dict(stale_shard_range)]) + headers['X-Timestamp'] = next(ts_iter).internal + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + # deleted shard range + shard_ranges[0].deleted = 1 + shard_ranges[0].timestamp = next(ts_iter) + body = json.dumps([dict(shard_ranges[0])]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + def check_bad_body(body): + bad_put_timestamp = next(ts_iter).internal + headers['X-Timestamp'] = bad_put_timestamp + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(400, resp.status_int) + self.assertIn('Invalid body', resp.body) + self.assertEqual( + exp_meta, dict((k, v[0]) for k, v in broker.metadata.items())) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + check_bad_body('not json') + check_bad_body('') + check_bad_body('["not a shard range"]') + check_bad_body('[[]]') + bad_shard_range = dict(ShardRange('a/c', next(ts_iter))) + bad_shard_range.pop('timestamp') + check_bad_body(json.dumps([bad_shard_range])) + + def check_not_shard_record_type(headers): + # body ignored + body = json.dumps([dict(sr) for sr in shard_ranges]) + # note, regular PUT so put timestamp is updated + put_timestamp = next(ts_iter).internal + headers['X-Timestamp'] = put_timestamp + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + resp = req.get_response(self.controller) + self.assertEqual(202, resp.status_int) + self._assert_shard_ranges_equal( + shard_ranges, broker.get_shard_ranges(include_deleted=True)) + self.assertEqual(put_timestamp, broker.get_info()['put_timestamp']) + + check_not_shard_record_type({'X-Backend-Record-Type': 'object', + 'X-Timestamp': next(ts_iter).internal}) + + check_not_shard_record_type({'X-Timestamp': next(ts_iter).internal}) + + def test_PUT_GET_shard_ranges(self): + # make a container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + # PUT some objects + objects = [{'name': 'obj_%d' % i, + 'x-timestamp': next(ts_iter).normal, + 'x-content-type': 'text/plain', + 'x-etag': 'etag_%d' % i, + 'x-size': 1024 * i + } for i in range(2)] + for obj in objects: + req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT', + headers=obj) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + # PUT some shard ranges + shard_bounds = [('', 'apple', ShardRange.SHRINKING), + ('apple', 'ham', ShardRange.CLEAVED), + ('ham', 'salami', ShardRange.ACTIVE), + ('salami', 'yoghurt', ShardRange.CREATED), + ('yoghurt', '', ShardRange.FOUND), + ] + shard_ranges = [ + ShardRange('.sharded_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + for shard_range in shard_ranges: + self._put_shard_range(shard_range) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self.assertTrue(broker.is_root_container()) # sanity + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # sanity check - no shard ranges when GET is only for objects + def check_object_GET(path): + req = Request.blank(path, method='GET') + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(hash=obj['x-etag'], bytes=obj['x-size'], + content_type=obj['x-content-type'], + last_modified=Timestamp(obj['x-timestamp']).isoformat, + name=obj['name']) for obj in objects] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('object', resp.headers['X-Backend-Record-Type']) + + check_object_GET('/sda1/p/a/c?format=json') + + # GET only shard ranges + def check_shard_GET(expected_shard_ranges, path, params=''): + req = Request.blank('/sda1/p/%s?format=json%s' % + (path, params), method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(sr, last_modified=Timestamp(sr.timestamp).isoformat) + for sr in expected_shard_ranges] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # all shards + check_shard_GET(shard_ranges, 'a/c') + check_shard_GET(reversed(shard_ranges), 'a/c', params='&reverse=true') + # only created shards + check_shard_GET(shard_ranges[3:4], 'a/c', params='&states=created') + # only found shards + check_shard_GET(shard_ranges[4:5], 'a/c', params='&states=found') + # only cleaved shards + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&states=cleaved') + # only active shards + check_shard_GET(shard_ranges[2:3], 'a/c', + params='&states=active&end_marker=pickle') + # only cleaved or active shards, reversed + check_shard_GET( + reversed(shard_ranges[1:3]), 'a/c', + params='&states=cleaved,active&reverse=true&marker=pickle') + # only shrinking shards + check_shard_GET(shard_ranges[:1], 'a/c', + params='&states=shrinking&end_marker=pickle') + check_shard_GET(shard_ranges[:1], 'a/c', + params='&states=shrinking&reverse=true&marker=pickle') + # only active or shrinking shards + check_shard_GET([shard_ranges[0], shard_ranges[2]], 'a/c', + params='&states=shrinking,active&end_marker=pickle') + check_shard_GET( + [shard_ranges[2], shard_ranges[0]], 'a/c', + params='&states=active,shrinking&reverse=true&marker=pickle') + # only active or shrinking shards using listing alias + check_shard_GET(shard_ranges[:3], 'a/c', + params='&states=listing&end_marker=pickle') + check_shard_GET( + reversed(shard_ranges[:3]), 'a/c', + params='&states=listing&reverse=true&marker=pickle') + # only created, cleaved, active, shrinking shards using updating alias + check_shard_GET(shard_ranges[1:4], 'a/c', + params='&states=updating&end_marker=treacle') + check_shard_GET( + reversed(shard_ranges[1:4]), 'a/c', + params='&states=updating&reverse=true&marker=treacle') + + # listing shards don't cover entire namespace so expect an extra filler + extra_shard_range = ShardRange( + 'a/c', ts_now, shard_ranges[2].upper, ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + expected = shard_ranges[:3] + [extra_shard_range] + check_shard_GET(expected, 'a/c', params='&states=listing') + check_shard_GET(reversed(expected), 'a/c', + params='&states=listing&reverse=true') + expected = [shard_ranges[2], extra_shard_range] + check_shard_GET(expected, 'a/c', + params='&states=listing&marker=pickle') + check_shard_GET( + reversed(expected), 'a/c', + params='&states=listing&reverse=true&end_marker=pickle') + # updating shards don't cover entire namespace so expect a filler + extra_shard_range = ShardRange( + 'a/c', ts_now, shard_ranges[3].upper, ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + expected = shard_ranges[1:4] + [extra_shard_range] + check_shard_GET(expected, 'a/c', params='&states=updating') + check_shard_GET(reversed(expected), 'a/c', + params='&states=updating&reverse=true') + # when no listing shard ranges cover the requested namespace range then + # filler is for entire requested namespace + extra_shard_range = ShardRange( + 'a/c', ts_now, 'treacle', ShardRange.MAX, 2, 1024, + state=ShardRange.ACTIVE) + check_shard_GET([extra_shard_range], 'a/c', + params='&states=listing&marker=treacle') + check_shard_GET( + [extra_shard_range], 'a/c', + params='&states=listing&reverse=true&end_marker=treacle') + extra_shard_range = ShardRange( + 'a/c', ts_now, 'treacle', 'walnut', 2, 1024, + state=ShardRange.ACTIVE) + params = '&states=listing&marker=treacle&end_marker=walnut' + check_shard_GET([extra_shard_range], 'a/c', params=params) + params = '&states=listing&reverse=true&marker=walnut' + \ + '&end_marker=treacle' + check_shard_GET([extra_shard_range], 'a/c', params=params) + # specific object + check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=cheese') + check_shard_GET(shard_ranges[1:2], 'a/c', params='&includes=ham') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=pickle') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=salami') + check_shard_GET(shard_ranges[3:4], 'a/c', params='&includes=walnut') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&includes=walnut&reverse=true') + # with marker + check_shard_GET(shard_ranges[1:], 'a/c', params='&marker=cheese') + check_shard_GET(reversed(shard_ranges[:2]), 'a/c', + params='&marker=cheese&reverse=true') + check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=ham') + check_shard_GET(reversed(shard_ranges[:2]), 'a/c', + params='&marker=ham&reverse=true') + check_shard_GET(shard_ranges[2:], 'a/c', params='&marker=pickle') + check_shard_GET(reversed(shard_ranges[:3]), 'a/c', + params='&marker=pickle&reverse=true') + check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=salami') + check_shard_GET(reversed(shard_ranges[:3]), 'a/c', + params='&marker=salami&reverse=true') + check_shard_GET(shard_ranges[3:], 'a/c', params='&marker=walnut') + check_shard_GET(reversed(shard_ranges[:4]), 'a/c', + params='&marker=walnut&reverse=true') + # with end marker + check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=cheese') + check_shard_GET(reversed(shard_ranges[1:]), 'a/c', + params='&end_marker=cheese&reverse=true') + # everything in range 'apple' - 'ham' is <= end_marker of 'ham' so that + # range is not included because end_marker is non-inclusive + check_shard_GET(shard_ranges[:2], 'a/c', params='&end_marker=ham') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c', + params='&end_marker=ham&reverse=true') + check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=pickle') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c', + params='&end_marker=pickle&reverse=true') + check_shard_GET(shard_ranges[:3], 'a/c', params='&end_marker=salami') + check_shard_GET(reversed(shard_ranges[3:]), 'a/c', + params='&end_marker=salami&reverse=true') + check_shard_GET(shard_ranges[:4], 'a/c', params='&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[3:]), 'a/c', + params='&end_marker=walnut&reverse=true') + # with marker and end marker + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&marker=cheese&end_marker=egg') + check_shard_GET(shard_ranges[1:2], 'a/c', + params='&end_marker=cheese&marker=egg&reverse=true') + check_shard_GET(shard_ranges[1:3], 'a/c', + params='&marker=egg&end_marker=jam') + check_shard_GET(reversed(shard_ranges[1:3]), 'a/c', + params='&end_marker=egg&marker=jam&reverse=true') + check_shard_GET(shard_ranges[1:4], 'a/c', + params='&marker=cheese&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[1:4]), 'a/c', + params='&end_marker=cheese&marker=walnut&reverse=true') + check_shard_GET(shard_ranges[2:4], 'a/c', + params='&marker=jam&end_marker=walnut') + check_shard_GET(reversed(shard_ranges[2:4]), 'a/c', + params='&end_marker=jam&marker=walnut&reverse=true') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&marker=toast&end_marker=walnut') + check_shard_GET(shard_ranges[3:4], 'a/c', + params='&end_marker=toast&marker=walnut&reverse=true') + check_shard_GET([], 'a/c', + params='&marker=egg&end_marker=cheese') + check_shard_GET([], 'a/c', + params='&marker=cheese&end_marker=egg&reverse=true') + + # delete a shard range + shard_range = shard_ranges[1] + shard_range.set_deleted(timestamp=next(ts_iter)) + self._put_shard_range(shard_range) + + self._assert_shard_ranges_equal(shard_ranges[:1] + shard_ranges[2:], + broker.get_shard_ranges()) + + check_shard_GET(shard_ranges[:1] + shard_ranges[2:], 'a/c') + check_shard_GET(shard_ranges[2:3], 'a/c', params='&includes=jam') + # specify obj, marker or end_marker not in any shard range + check_shard_GET([], 'a/c', params='&includes=cheese') + check_shard_GET([], 'a/c', params='&includes=cheese&reverse=true') + check_shard_GET([], 'a/c', params='&includes=ham') + check_shard_GET(shard_ranges[2:], 'a/c/', params='&marker=cheese') + check_shard_GET(shard_ranges[:1], 'a/c/', + params='&marker=cheese&reverse=true') + check_shard_GET(shard_ranges[:1], 'a/c/', params='&end_marker=cheese') + check_shard_GET(reversed(shard_ranges[2:]), 'a/c/', + params='&end_marker=cheese&reverse=true') + + self.assertFalse(self.controller.logger.get_lines_for_level('warning')) + self.assertFalse(self.controller.logger.get_lines_for_level('error')) + + def test_GET_shard_ranges_using_state_aliases(self): + # make a shard container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + + def do_test(root_path, path, params, expected_states): + expected = [ + sr for sr in shard_ranges if sr.state in expected_states] + own_shard_range = ShardRange(path, next(ts_iter), '', '', + state=ShardRange.ACTIVE) + expected.append(own_shard_range.copy( + lower=expected[-1].upper, meta_timestamp=ts_now)) + expected = [dict(sr, last_modified=sr.timestamp.isoformat) + for sr in expected] + headers = {'X-Timestamp': next(ts_iter).normal} + + # create container + req = Request.blank( + '/sda1/p/%s' % path, method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Container-Sysmeta-Shard-Root': root_path, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps( + [dict(sr) for sr in shard_ranges + [own_shard_range]]) + req = Request.blank( + '/sda1/p/%s' % path, method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + req = Request.blank('/sda1/p/%s?format=json%s' % + (path, params), method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + # root's shard ranges for listing + root_path = container_path = 'a/c' + params = '&states=listing' + expected_states = [ + ShardRange.CLEAVED, ShardRange.ACTIVE, ShardRange.SHARDING, + ShardRange.SHRINKING] + do_test(root_path, container_path, params, expected_states) + + # shard's shard ranges for listing + container_path = '.shards_a/c' + params = '&states=listing' + do_test(root_path, container_path, params, expected_states) + + # root's shard ranges for updating + params = '&states=updating' + expected_states = [ + ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING] + container_path = root_path + do_test(root_path, container_path, params, expected_states) + + # shard's shard ranges for updating + container_path = '.shards_a/c' + do_test(root_path, container_path, params, expected_states) + + def test_GET_shard_ranges_include_deleted(self): + # make a shard container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + # create container + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + def do_test(include_deleted, expected): + expected = [dict(sr, last_modified=sr.timestamp.isoformat) + for sr in expected] + headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Include-Deleted': str(include_deleted)} + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers=headers) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual('shard', resp.headers['X-Backend-Record-Type']) + + do_test(False, shard_ranges) + do_test(True, shard_ranges) + + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + for sr in shard_ranges[::2]: + sr.set_deleted(timestamp=next(ts_iter)) + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + self._assert_shard_ranges_equal( + shard_ranges[1::2], broker.get_shard_ranges()) + do_test(False, shard_ranges[1::2]) + do_test(True, shard_ranges) + + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + for sr in shard_ranges[1::2]: + sr.set_deleted(timestamp=next(ts_iter)) + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + self.assertFalse(broker.get_shard_ranges()) + do_test(False, []) + do_test(True, shard_ranges) + + def test_GET_shard_ranges_errors(self): + # verify that x-backend-record-type is not included in error responses + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + shard_ranges = [] + lower = '' + for state in sorted(ShardRange.STATES.keys()): + upper = str(state) + shard_ranges.append( + ShardRange('.shards_a/c_%s' % upper, next(ts_iter), + lower, upper, state * 100, state * 1000, + meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter))) + lower = upper + # create container + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers) + self.assertIn( + req.get_response(self.controller).status_int, (201, 202)) + # PUT some shard ranges + headers = {'X-Timestamp': next(ts_iter).normal, + 'X-Backend-Record-Type': 'shard'} + body = json.dumps([dict(sr) for sr in shard_ranges]) + req = Request.blank( + '/sda1/p/a/c', method='PUT', headers=headers, body=body) + self.assertEqual(202, req.get_response(self.controller).status_int) + + def do_test(params): + params['format'] = 'json' + headers = {'X-Backend-Record-Type': 'shard'} + req = Request.blank('/sda1/p/a/c', method='GET', + headers=headers, params=params) + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.content_type, 'text/html') + self.assertNotIn('X-Backend-Record-Type', resp.headers) + self.assertNotIn('X-Backend-Sharding-State', resp.headers) + self.assertNotIn('X-Container-Object-Count', resp.headers) + self.assertNotIn('X-Container-Bytes-Used', resp.headers) + self.assertNotIn('X-Timestamp', resp.headers) + self.assertNotIn('X-PUT-Timestamp', resp.headers) + return resp + + resp = do_test({'states': 'bad'}) + self.assertEqual(resp.status_int, 400) + resp = do_test({'delimiter': 'bad'}) + self.assertEqual(resp.status_int, 412) + resp = do_test({'limit': str(constraints.CONTAINER_LISTING_LIMIT + 1)}) + self.assertEqual(resp.status_int, 412) + with mock.patch('swift.container.server.check_drive', + lambda *args: False): + resp = do_test({}) + self.assertEqual(resp.status_int, 507) + + # delete the container + req = Request.blank('/sda1/p/a/c', method='DELETE', + headers={'X-Timestamp': next(ts_iter).normal}) + self.assertEqual(204, req.get_response(self.controller).status_int) + + resp = do_test({'states': 'bad'}) + self.assertEqual(resp.status_int, 404) + + def test_GET_auto_record_type(self): + # make a container + ts_iter = make_timestamp_iter() + ts_now = Timestamp.now() # used when mocking Timestamp.now() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + # PUT some objects + objects = [{'name': 'obj_%d' % i, + 'x-timestamp': next(ts_iter).normal, + 'x-content-type': 'text/plain', + 'x-etag': 'etag_%d' % i, + 'x-size': 1024 * i + } for i in range(2)] + for obj in objects: + req = Request.blank('/sda1/p/a/c/%s' % obj['name'], method='PUT', + headers=obj) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + # PUT some shard ranges + shard_bounds = [('', 'm', ShardRange.CLEAVED), + ('m', '', ShardRange.CREATED)] + shard_ranges = [ + ShardRange('.sharded_a/_%s' % upper, next(ts_iter), + lower, upper, + i * 100, i * 1000, meta_timestamp=next(ts_iter), + state=state, state_timestamp=next(ts_iter)) + for i, (lower, upper, state) in enumerate(shard_bounds)] + for shard_range in shard_ranges: + self._put_shard_range(shard_range) + + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + + def assert_GET_objects(req, expected_objects): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(hash=obj['x-etag'], bytes=obj['x-size'], + content_type=obj['x-content-type'], + last_modified=Timestamp(obj['x-timestamp']).isoformat, + name=obj['name']) for obj in expected_objects] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual( + 'object', resp.headers.pop('X-Backend-Record-Type')) + resp.headers.pop('Content-Length') + return resp + + def assert_GET_shard_ranges(req, expected_shard_ranges): + with mock_timestamp_now(ts_now): + resp = req.get_response(self.controller) + self.assertEqual(resp.status_int, 200) + self.assertEqual(resp.content_type, 'application/json') + expected = [ + dict(sr, last_modified=Timestamp(sr.timestamp).isoformat) + for sr in expected_shard_ranges] + self.assertEqual(expected, json.loads(resp.body)) + self.assertIn('X-Backend-Record-Type', resp.headers) + self.assertEqual( + 'shard', resp.headers.pop('X-Backend-Record-Type')) + resp.headers.pop('Content-Length') + return resp + + # unsharded + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_objects(req, objects) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + + # move to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, objects) + self.assertEqual(headers, resp.headers) + + # limit is applied to objects but not shard ranges + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, objects[:1]) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json&limit=1', method='GET') + resp = assert_GET_objects(req, objects[:1]) + self.assertEqual(headers, resp.headers) + + # move to sharded state + self.assertTrue(broker.set_sharded_state()) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'auto'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + headers = resp.headers + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'shard'}) + resp = assert_GET_shard_ranges(req, shard_ranges) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET', + headers={'X-Backend-Record-Type': 'object'}) + resp = assert_GET_objects(req, []) + self.assertEqual(headers, resp.headers) + req = Request.blank('/sda1/p/a/c?format=json', method='GET') + resp = assert_GET_objects(req, []) + self.assertEqual(headers, resp.headers) + + def test_PUT_GET_to_sharding_container(self): + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + ts_iter = make_timestamp_iter() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + + def do_update(name, timestamp=None, headers=None): + # Make a PUT request to container controller to update an object + timestamp = timestamp or next(ts_iter) + headers = headers or {} + headers.update({'X-Timestamp': timestamp.internal, + 'X-Size': 17, + 'X-Content-Type': 'text/plain', + 'X-Etag': 'fake etag'}) + req = Request.blank( + '/sda1/p/a/c/%s' % name, method='PUT', headers=headers) + self._update_object_put_headers(req) + resp = req.get_response(self.controller) + self.assertEqual(201, resp.status_int) + + def get_api_listing(): + req = Request.blank( + '/sda1/p/a/c', method='GET', params={'format': 'json'}) + resp = req.get_response(self.controller) + self.assertEqual(200, resp.status_int) + return [obj['name'] for obj in json.loads(resp.body)] + + def assert_broker_rows(broker, expected_names, expected_max_row): + self.assertEqual(expected_max_row, broker.get_max_row()) + with broker.get() as conn: + curs = conn.execute(''' + SELECT * FROM object WHERE ROWID > -1 ORDER BY ROWID ASC + ''') + actual = [r[1] for r in curs] + + self.assertEqual(expected_names, actual) + + do_update('unsharded') + self.assertEqual(['unsharded'], get_api_listing()) + assert_broker_rows(broker, ['unsharded'], 1) + + # move container to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], [], 1) + + # add another update - should not merge into the older db and therefore + # not appear in api listing + do_update('sharding') + self.assertEqual(['unsharded'], get_api_listing()) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2) + + orig_lister = swift.container.backend.ContainerBroker.list_objects_iter + + def mock_list_objects_iter(*args, **kwargs): + # cause an update to land in the pending file after it has been + # flushed by get_info() calls in the container PUT method, but + # before it is flushed by the call to list_objects_iter + do_update('racing_update') + return orig_lister(*args, **kwargs) + + with mock.patch( + 'swift.container.backend.ContainerBroker.list_objects_iter', + mock_list_objects_iter): + listing = get_api_listing() + + self.assertEqual(['unsharded'], listing) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], ['sharding'], 2) + + # next listing will flush pending file + listing = get_api_listing() + self.assertEqual(['unsharded'], listing) + assert_broker_rows(broker.get_brokers()[0], ['unsharded'], 1) + assert_broker_rows(broker.get_brokers()[1], + ['sharding', 'racing_update'], 3) + + def _check_object_update_redirected_to_shard(self, method): + expected_status = 204 if method == 'DELETE' else 201 + broker = self.controller._get_container_broker('sda1', 'p', 'a', 'c') + ts_iter = make_timestamp_iter() + headers = {'X-Timestamp': next(ts_iter).normal} + req = Request.blank('/sda1/p/a/c', method='PUT', headers=headers) + self.assertEqual(201, req.get_response(self.controller).status_int) + + def do_update(name, timestamp=None, headers=None): + # Make a PUT request to container controller to update an object + timestamp = timestamp or next(ts_iter) + headers = headers or {} + headers.update({'X-Timestamp': timestamp.internal, + 'X-Size': 17, + 'X-Content-Type': 'text/plain', + 'X-Etag': 'fake etag'}) + req = Request.blank( + '/sda1/p/a/c/%s' % name, method=method, headers=headers) + self._update_object_put_headers(req) + return req.get_response(self.controller) + + def get_listing(broker_index): + # index -1 is always the freshest db + sub_broker = broker.get_brokers()[broker_index] + return sub_broker.get_objects() + + def assert_not_redirected(obj_name, timestamp=None, headers=None): + resp = do_update(obj_name, timestamp=timestamp, headers=headers) + self.assertEqual(expected_status, resp.status_int) + self.assertNotIn('Location', resp.headers) + self.assertNotIn('X-Backend-Redirect-Timestamp', resp.headers) + + def assert_redirected(obj_name, shard_range, headers=None): + resp = do_update(obj_name, headers=headers) + self.assertEqual(301, resp.status_int) + self.assertEqual('/%s/%s' % (shard_range.name, obj_name), + resp.headers['Location']) + self.assertEqual(shard_range.timestamp.internal, + resp.headers['X-Backend-Redirect-Timestamp']) + + # sanity check + ts_bashful_orig = next(ts_iter) + mocked_fn = 'swift.container.backend.ContainerBroker.get_shard_ranges' + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected('bashful', ts_bashful_orig) + mock_get_shard_ranges.assert_not_called() + + shard_ranges = { + 'dopey': ShardRange( + '.sharded_a/sr_dopey', next(ts_iter), '', 'dopey'), + 'happy': ShardRange( + '.sharded_a/sr_happy', next(ts_iter), 'dopey', 'happy'), + '': ShardRange('.sharded_a/sr_', next(ts_iter), 'happy', '') + } + # start with only the middle shard range + self._put_shard_range(shard_ranges['happy']) + + # db not yet sharding but shard ranges exist + sr_happy = shard_ranges['happy'] + redirect_states = ( + ShardRange.CREATED, ShardRange.CLEAVED, ShardRange.ACTIVE, + ShardRange.SHARDING) + headers = {'X-Backend-Accept-Redirect': 'true'} + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpy%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + obj_name = 'grumpy%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + + # set broker to sharding state + broker.enable_sharding(next(ts_iter)) + self.assertTrue(broker.set_sharding_state()) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpier%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + # update goes to fresh db, misplaced + self.assertIn( + obj_name, [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn( + obj_name, [obj['name'] for obj in get_listing(0)]) + obj_name = 'grumpier%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn( + obj_name, [obj['name'] for obj in get_listing(-1)]) + # update is misplaced, not in retiring db + self.assertNotIn( + obj_name, [obj['name'] for obj in get_listing(0)]) + + # no shard for this object yet so it is accepted by root container + # and stored in misplaced objects... + assert_not_redirected('dopey', timestamp=next(ts_iter)) + self.assertIn('dopey', [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn('dopey', [obj['name'] for obj in get_listing(0)]) + + # now PUT the first shard range + sr_dopey = shard_ranges['dopey'] + sr_dopey.update_state(ShardRange.CLEAVED, + state_timestamp=next(ts_iter)) + self._put_shard_range(sr_dopey) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'dopey%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + obj_name = 'dopey%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(0)]) + + # further updates to bashful and dopey are now redirected... + assert_redirected('bashful', sr_dopey, headers=headers) + assert_redirected('dopey', sr_dopey, headers=headers) + # ...and existing updates in this container are *not* updated + self.assertEqual([ts_bashful_orig.internal], + [obj['created_at'] for obj in get_listing(0) + if obj['name'] == 'bashful']) + + # set broker to sharded state + self.assertTrue(broker.set_sharded_state()) + for state in ShardRange.STATES: + self.assertTrue( + sr_happy.update_state(state, + state_timestamp=next(ts_iter))) + self._put_shard_range(sr_happy) + with annotate_failure(state): + obj_name = 'grumpiest%s' % state + if state in redirect_states: + assert_redirected(obj_name, sr_happy, headers=headers) + self.assertNotIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + else: + assert_not_redirected(obj_name, headers=headers) + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + obj_name = 'grumpiest%s_no_header' % state + with mock.patch(mocked_fn) as mock_get_shard_ranges: + assert_not_redirected(obj_name) + mock_get_shard_ranges.assert_not_called() + self.assertIn(obj_name, + [obj['name'] for obj in get_listing(-1)]) + + def test_PUT_object_update_redirected_to_shard(self): + self._check_object_update_redirected_to_shard('PUT') + + def test_DELETE_object_update_redirected_to_shard(self): + self._check_object_update_redirected_to_shard('DELETE') + def test_GET_json(self): # make a container req = Request.blank( @@ -2389,7 +3621,7 @@ req = Request.blank( '/sda1/p/a/c', environ={'REQUEST_METHOD': 'PUT', 'HTTP_X_TIMESTAMP': '0'}) - resp = req.get_response(self.controller) + req.get_response(self.controller) # fill the container for i in range(3): req = Request.blank( @@ -2407,6 +3639,24 @@ resp = req.get_response(self.controller) result = resp.body.split() self.assertEqual(result, ['2', ]) + # test limit with end_marker + req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, ['0', ]) + # test limit, reverse with end_marker + req = Request.blank('/sda1/p/a/c?limit=2&end_marker=1&reverse=True', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, ['2', ]) + # test marker > end_marker + req = Request.blank('/sda1/p/a/c?marker=2&end_marker=1', + environ={'REQUEST_METHOD': 'GET'}) + resp = req.get_response(self.controller) + result = resp.body.split() + self.assertEqual(result, []) def test_weird_content_types(self): snowman = u'\u2603' diff -Nru swift-2.17.0/test/unit/container/test_sharder.py swift-2.18.0/test/unit/container/test_sharder.py --- swift-2.17.0/test/unit/container/test_sharder.py 1970-01-01 00:00:00.000000000 +0000 +++ swift-2.18.0/test/unit/container/test_sharder.py 2018-05-30 10:17:02.000000000 +0000 @@ -0,0 +1,4580 @@ +# Copyright (c) 2010-2017 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import hashlib +import json +import random + +import eventlet +import os +import shutil +from contextlib import contextmanager +from tempfile import mkdtemp + +import mock +import unittest + +from collections import defaultdict + +import time + +from copy import deepcopy + +from swift.common import internal_client +from swift.container import replicator +from swift.container.backend import ContainerBroker, UNSHARDED, SHARDING, \ + SHARDED, DATADIR +from swift.container.sharder import ContainerSharder, sharding_enabled, \ + CleavingContext, DEFAULT_SHARD_SHRINK_POINT, \ + DEFAULT_SHARD_CONTAINER_THRESHOLD +from swift.common.utils import ShardRange, Timestamp, hash_path, \ + encode_timestamps, parse_db_filename, quorum_size, Everything +from test import annotate_failure + +from test.unit import FakeLogger, debug_logger, FakeRing, \ + make_timestamp_iter, unlink_files, mocked_http_conn, mock_timestamp_now, \ + attach_fake_replication_rpc + + +class BaseTestSharder(unittest.TestCase): + def setUp(self): + self.tempdir = mkdtemp() + self.ts_iter = make_timestamp_iter() + + def tearDown(self): + shutil.rmtree(self.tempdir, ignore_errors=True) + + def _assert_shard_ranges_equal(self, expected, actual): + self.assertEqual([dict(sr) for sr in expected], + [dict(sr) for sr in actual]) + + def _make_broker(self, account='a', container='c', epoch=None, + device='sda', part=0, hash_=None): + hash_ = hash_ or hashlib.md5(container).hexdigest() + datadir = os.path.join( + self.tempdir, device, 'containers', str(part), hash_[-3:], hash_) + if epoch: + filename = '%s_%s.db' % (hash, epoch) + else: + filename = hash_ + '.db' + db_file = os.path.join(datadir, filename) + broker = ContainerBroker( + db_file, account=account, container=container, + logger=debug_logger()) + broker.initialize() + return broker + + def _make_sharding_broker(self, account='a', container='c', + shard_bounds=(('', 'middle'), ('middle', ''))): + broker = self._make_broker(account=account, container=container) + broker.set_sharding_sysmeta('Root', 'a/c') + old_db_id = broker.get_info()['id'] + broker.enable_sharding(next(self.ts_iter)) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CLEAVED) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + broker = ContainerBroker(broker.db_file, account='a', container='c') + self.assertNotEqual(old_db_id, broker.get_info()['id']) # sanity check + return broker + + def _make_shard_ranges(self, bounds, state=None, object_count=0): + return [ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), + lower, upper, state=state, + object_count=object_count) + for lower, upper in bounds] + + def ts_encoded(self): + # make a unique timestamp string with multiple timestamps encoded; + # use different deltas between component timestamps + timestamps = [next(self.ts_iter) for i in range(4)] + return encode_timestamps( + timestamps[0], timestamps[1], timestamps[3]) + + +class TestSharder(BaseTestSharder): + def test_init(self): + def do_test(conf, expected): + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient') \ + as mock_ic: + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + sharder = ContainerSharder(conf) + mock_ring.assert_called_once_with( + '/etc/swift', ring_name='container') + self.assertEqual( + 'container-sharder', sharder.logger.logger.name) + for k, v in expected.items(): + self.assertTrue(hasattr(sharder, k), 'Missing attr %s' % k) + self.assertEqual(v, getattr(sharder, k), + 'Incorrect value: expected %s=%s but got %s' % + (k, v, getattr(sharder, k))) + return mock_ic + + expected = { + 'mount_check': True, 'bind_ip': '0.0.0.0', 'port': 6201, + 'per_diff': 1000, 'max_diffs': 100, 'interval': 30, + 'cleave_row_batch_size': 10000, + 'node_timeout': 10, 'conn_timeout': 5, + 'rsync_compress': False, + 'rsync_module': '{replication_ip}::container', + 'reclaim_age': 86400 * 7, + 'shard_shrink_point': 0.25, + 'shrink_merge_point': 0.75, + 'shard_container_threshold': 1000000, + 'split_size': 500000, + 'cleave_batch_size': 2, + 'scanner_batch_size': 10, + 'rcache': '/var/cache/swift/container.recon', + 'shards_account_prefix': '.shards_', + 'auto_shard': False, + 'recon_candidates_limit': 5, + 'shard_replication_quorum': 2, + 'existing_shard_replication_quorum': 2 + } + mock_ic = do_test({}, expected) + mock_ic.assert_called_once_with( + '/etc/swift/internal-client.conf', 'Swift Container Sharder', 3, + allow_modify_pipeline=False) + + conf = { + 'mount_check': False, 'bind_ip': '10.11.12.13', 'bind_port': 62010, + 'per_diff': 2000, 'max_diffs': 200, 'interval': 60, + 'cleave_row_batch_size': 3000, + 'node_timeout': 20, 'conn_timeout': 1, + 'rsync_compress': True, + 'rsync_module': '{replication_ip}::container_sda/', + 'reclaim_age': 86400 * 14, + 'shard_shrink_point': 35, + 'shard_shrink_merge_point': 85, + 'shard_container_threshold': 20000000, + 'cleave_batch_size': 4, + 'shard_scanner_batch_size': 8, + 'request_tries': 2, + 'internal_client_conf_path': '/etc/swift/my-sharder-ic.conf', + 'recon_cache_path': '/var/cache/swift-alt', + 'auto_create_account_prefix': '...', + 'auto_shard': 'yes', + 'recon_candidates_limit': 10, + 'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0 + } + expected = { + 'mount_check': False, 'bind_ip': '10.11.12.13', 'port': 62010, + 'per_diff': 2000, 'max_diffs': 200, 'interval': 60, + 'cleave_row_batch_size': 3000, + 'node_timeout': 20, 'conn_timeout': 1, + 'rsync_compress': True, + 'rsync_module': '{replication_ip}::container_sda', + 'reclaim_age': 86400 * 14, + 'shard_shrink_point': 0.35, + 'shrink_merge_point': 0.85, + 'shard_container_threshold': 20000000, + 'split_size': 10000000, + 'cleave_batch_size': 4, + 'scanner_batch_size': 8, + 'rcache': '/var/cache/swift-alt/container.recon', + 'shards_account_prefix': '...shards_', + 'auto_shard': True, + 'recon_candidates_limit': 10, + 'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0 + } + mock_ic = do_test(conf, expected) + mock_ic.assert_called_once_with( + '/etc/swift/my-sharder-ic.conf', 'Swift Container Sharder', 2, + allow_modify_pipeline=False) + + expected.update({'shard_replication_quorum': 3, + 'existing_shard_replication_quorum': 3}) + conf.update({'shard_replication_quorum': 4, + 'existing_shard_replication_quorum': 4}) + do_test(conf, expected) + + with self.assertRaises(ValueError) as cm: + do_test({'shard_shrink_point': 101}, {}) + self.assertIn( + 'greater than 0, less than 100, not "101"', cm.exception.message) + self.assertIn('shard_shrink_point', cm.exception.message) + + with self.assertRaises(ValueError) as cm: + do_test({'shard_shrink_merge_point': 101}, {}) + self.assertIn( + 'greater than 0, less than 100, not "101"', cm.exception.message) + self.assertIn('shard_shrink_merge_point', cm.exception.message) + + def test_init_internal_client_conf_loading_error(self): + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + with self.assertRaises(SystemExit) as cm: + ContainerSharder( + {'internal_client_conf_path': + os.path.join(self.tempdir, 'nonexistent')}) + self.assertIn('Unable to load internal client', str(cm.exception)) + + with mock.patch('swift.common.db_replicator.ring.Ring') \ + as mock_ring: + mock_ring.return_value = mock.MagicMock() + mock_ring.return_value.replica_count = 3 + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient', + side_effect=Exception('kaboom')): + with self.assertRaises(Exception) as cm: + ContainerSharder({}) + self.assertIn('kaboom', str(cm.exception)) + + def _assert_stats(self, expected, sharder, category): + # assertEqual doesn't work with a defaultdict + stats = sharder.stats['sharding'][category] + for k, v in expected.items(): + actual = stats[k] + self.assertEqual( + v, actual, 'Expected %s but got %s for %s in %s' % + (v, actual, k, stats)) + return stats + + def _assert_recon_stats(self, expected, sharder, category): + with open(sharder.rcache, 'rb') as fd: + recon = json.load(fd) + stats = recon['sharding_stats']['sharding'].get(category) + self.assertEqual(expected, stats) + + def test_increment_stats(self): + with self._mock_sharder() as sharder: + sharder._increment_stat('visited', 'success') + sharder._increment_stat('visited', 'success') + sharder._increment_stat('visited', 'failure') + sharder._increment_stat('visited', 'completed') + sharder._increment_stat('cleaved', 'success') + sharder._increment_stat('scanned', 'found', step=4) + expected = {'success': 2, + 'failure': 1, + 'completed': 1} + self._assert_stats(expected, sharder, 'visited') + self._assert_stats({'success': 1}, sharder, 'cleaved') + self._assert_stats({'found': 4}, sharder, 'scanned') + + def test_increment_stats_with_statsd(self): + with self._mock_sharder() as sharder: + sharder._increment_stat('visited', 'success', statsd=True) + sharder._increment_stat('visited', 'success', statsd=True) + sharder._increment_stat('visited', 'failure', statsd=True) + sharder._increment_stat('visited', 'failure', statsd=False) + sharder._increment_stat('visited', 'completed') + expected = {'success': 2, + 'failure': 2, + 'completed': 1} + self._assert_stats(expected, sharder, 'visited') + counts = sharder.logger.get_increment_counts() + self.assertEqual(2, counts.get('visited_success')) + self.assertEqual(1, counts.get('visited_failure')) + self.assertIsNone(counts.get('visited_completed')) + + def test_run_forever(self): + conf = {'recon_cache_path': self.tempdir, + 'devices': self.tempdir} + with self._mock_sharder(conf) as sharder: + sharder._check_node = lambda *args: True + sharder.logger.clear() + brokers = [] + for container in ('c1', 'c2'): + broker = self._make_broker( + container=container, hash_=container + 'hash', + device=sharder.ring.devs[0]['device'], part=0) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + brokers.append(broker) + + fake_stats = { + 'scanned': {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': 99, 'max_time': 123}, + 'created': {'attempted': 1, 'success': 1, 'failure': 1}, + 'cleaved': {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': 0.01, 'max_time': 1.3}, + 'misplaced': {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 1, 'unplaced': 0}, + 'audit_root': {'attempted': 5, 'success': 4, 'failure': 1}, + 'audit_shard': {'attempted': 2, 'success': 2, 'failure': 0}, + } + # NB these are time increments not absolute times... + fake_periods = [1, 2, 3, 3600, 4, 15, 15, 0] + fake_periods_iter = iter(fake_periods) + recon_data = [] + fake_process_broker_calls = [] + + def mock_dump_recon_cache(data, *args): + recon_data.append(deepcopy(data)) + + with mock.patch('swift.container.sharder.time.time') as fake_time: + def fake_process_broker(broker, *args, **kwargs): + # increment time and inject some fake stats + fake_process_broker_calls.append((broker, args, kwargs)) + try: + fake_time.return_value += next(fake_periods_iter) + except StopIteration: + # bail out + fake_time.side_effect = Exception('Test over') + sharder.stats['sharding'].update(fake_stats) + + with mock.patch( + 'swift.container.sharder.time.sleep') as mock_sleep: + with mock.patch( + 'swift.container.sharder.is_sharding_candidate', + return_value=True): + with mock.patch( + 'swift.container.sharder.dump_recon_cache', + mock_dump_recon_cache): + fake_time.return_value = next(fake_periods_iter) + sharder._is_sharding_candidate = lambda x: True + sharder._process_broker = fake_process_broker + with self.assertRaises(Exception) as cm: + sharder.run_forever() + + self.assertEqual('Test over', cm.exception.message) + # four cycles are started, two brokers visited per cycle, but + # fourth never completes + self.assertEqual(8, len(fake_process_broker_calls)) + # expect initial random sleep then one sleep between first and + # second pass + self.assertEqual(2, mock_sleep.call_count) + self.assertLessEqual(mock_sleep.call_args_list[0][0][0], 30) + self.assertLessEqual(mock_sleep.call_args_list[1][0][0], + 30 - fake_periods[0]) + + lines = sharder.logger.get_lines_for_level('info') + categories = ('visited', 'scanned', 'created', 'cleaved', + 'misplaced', 'audit_root', 'audit_shard') + + def check_categories(start_time): + for category in categories: + line = lines.pop(0) + self.assertIn('Since %s' % time.ctime(start_time), line) + self.assertIn(category, line) + for k, v in fake_stats.get(category, {}).items(): + self.assertIn('%s:%s' % (k, v), line) + + def check_logs(cycle_time, start_time, + expect_periodic_stats=False): + self.assertIn('Container sharder cycle starting', lines.pop(0)) + check_categories(start_time) + if expect_periodic_stats: + check_categories(start_time) + self.assertIn('Container sharder cycle completed: %.02fs' % + cycle_time, lines.pop(0)) + + check_logs(sum(fake_periods[1:3]), fake_periods[0]) + check_logs(sum(fake_periods[3:5]), sum(fake_periods[:3]), + expect_periodic_stats=True) + check_logs(sum(fake_periods[5:7]), sum(fake_periods[:5])) + # final cycle start but then exception pops to terminate test + self.assertIn('Container sharder cycle starting', lines.pop(0)) + self.assertFalse(lines) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn( + 'Unhandled exception while dumping progress', lines[0]) + self.assertIn('Test over', lines[0]) + + def check_recon(data, time, last, expected_stats): + self.assertEqual(time, data['sharding_time']) + self.assertEqual(last, data['sharding_last']) + self.assertEqual( + expected_stats, dict(data['sharding_stats']['sharding'])) + + def stats_for_candidate(broker): + return {'object_count': 0, + 'account': broker.account, + 'meta_timestamp': mock.ANY, + 'container': broker.container, + 'file_size': os.stat(broker.db_file).st_size, + 'path': broker.db_file, + 'root': broker.path, + 'node_index': 0} + + self.assertEqual(4, len(recon_data)) + # stats report at end of first cycle + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[:2]] + } + }) + check_recon(recon_data[0], sum(fake_periods[1:3]), + sum(fake_periods[:3]), fake_stats) + # periodic stats report after first broker has been visited during + # second cycle - one candidate identified so far this cycle + fake_stats.update({'visited': {'attempted': 1, 'skipped': 0, + 'success': 1, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 1, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[2:3]] + } + }) + check_recon(recon_data[1], fake_periods[3], + sum(fake_periods[:4]), fake_stats) + # stats report at end of second cycle - both candidates reported + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[2:4]] + } + }) + check_recon(recon_data[2], sum(fake_periods[3:5]), + sum(fake_periods[:5]), fake_stats) + # stats report at end of third cycle + fake_stats.update({'visited': {'attempted': 2, 'skipped': 0, + 'success': 2, 'failure': 0, + 'completed': 0}}) + fake_stats.update({ + 'sharding_candidates': { + 'found': 2, + 'top': [stats_for_candidate(call[0]) + for call in fake_process_broker_calls[4:6]] + } + }) + check_recon(recon_data[3], sum(fake_periods[5:7]), + sum(fake_periods[:7]), fake_stats) + + def test_one_shard_cycle(self): + conf = {'recon_cache_path': self.tempdir, + 'devices': self.tempdir, + 'shard_container_threshold': 9} + with self._mock_sharder(conf) as sharder: + sharder._check_node = lambda *args: True + sharder.reported = time.time() + sharder.logger = debug_logger() + brokers = [] + device_ids = set(range(3)) + for device_id in device_ids: + brokers.append(self._make_broker( + container='c%s' % device_id, hash_='c%shash' % device_id, + device=sharder.ring.devs[device_id]['device'], part=0)) + # enable a/c2 and a/c3 for sharding + for broker in brokers[1:]: + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + # make a/c2 a candidate for sharding + for i in range(10): + brokers[1].put_object('o%s' % i, next(self.ts_iter).internal, + 0, 'text/plain', 'etag', 0) + + # check only sharding enabled containers are processed + with mock.patch.object( + sharder, '_process_broker' + ) as mock_process_broker: + sharder._local_device_ids = {'stale_node_id'} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(2, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c1', 'a/c2'}, set(processed_paths)) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + expected_stats = {'attempted': 2, 'success': 2, 'failure': 0, + 'skipped': 1, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + expected_candidate_stats = { + 'found': 1, + 'top': [{'object_count': 10, 'account': 'a', 'container': 'c1', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[1].db_file, 'root': 'a/c1', + 'node_index': 1}]} + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + self._assert_recon_stats(None, sharder, 'sharding_progress') + + # enable and progress container a/c1 by giving it shard ranges + now = next(self.ts_iter) + brokers[0].merge_shard_ranges( + [ShardRange('a/c0', now, '', '', state=ShardRange.SHARDING), + ShardRange('.s_a/1', now, '', 'b', state=ShardRange.ACTIVE), + ShardRange('.s_a/2', now, 'b', 'c', state=ShardRange.CLEAVED), + ShardRange('.s_a/3', now, 'c', 'd', state=ShardRange.CREATED), + ShardRange('.s_a/4', now, 'd', 'e', state=ShardRange.CREATED), + ShardRange('.s_a/5', now, 'e', '', state=ShardRange.FOUND)]) + brokers[1].merge_shard_ranges( + [ShardRange('a/c1', now, '', '', state=ShardRange.SHARDING), + ShardRange('.s_a/6', now, '', 'b', state=ShardRange.ACTIVE), + ShardRange('.s_a/7', now, 'b', 'c', state=ShardRange.ACTIVE), + ShardRange('.s_a/8', now, 'c', 'd', state=ShardRange.CLEAVED), + ShardRange('.s_a/9', now, 'd', 'e', state=ShardRange.CREATED), + ShardRange('.s_a/0', now, 'e', '', state=ShardRange.CREATED)]) + for i in range(11): + brokers[2].put_object('o%s' % i, next(self.ts_iter).internal, + 0, 'text/plain', 'etag', 0) + + def mock_processing(broker, node, part): + if broker.path == 'a/c1': + raise Exception('kapow!') + elif broker.path not in ('a/c0', 'a/c2'): + raise BaseException("I don't know how to handle a broker " + "for %s" % broker.path) + + # check exceptions are handled + with mock.patch.object( + sharder, '_process_broker', side_effect=mock_processing + ) as mock_process_broker: + sharder._local_device_ids = {'stale_node_id'} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(3, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('Unhandled exception while processing', lines[0]) + self.assertFalse(lines[1:]) + sharder.logger.clear() + expected_stats = {'attempted': 3, 'success': 2, 'failure': 1, + 'skipped': 0, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + expected_candidate_stats = { + 'found': 1, + 'top': [{'object_count': 11, 'account': 'a', 'container': 'c2', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[2].db_file, 'root': 'a/c2', + 'node_index': 2}]} + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + expected_in_progress_stats = { + 'all': [{'object_count': 0, 'account': 'a', 'container': 'c0', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[0].db_file).st_size, + 'path': brokers[0].db_file, 'root': 'a/c0', + 'node_index': 0, + 'found': 1, 'created': 2, 'cleaved': 1, 'active': 1, + 'state': 'sharding', 'db_state': 'unsharded', + 'error': None}, + {'object_count': 10, 'account': 'a', 'container': 'c1', + 'meta_timestamp': mock.ANY, + 'file_size': os.stat(brokers[1].db_file).st_size, + 'path': brokers[1].db_file, 'root': 'a/c1', + 'node_index': 1, + 'found': 0, 'created': 2, 'cleaved': 1, 'active': 2, + 'state': 'sharding', 'db_state': 'unsharded', + 'error': 'kapow!'}]} + self._assert_stats( + expected_in_progress_stats, sharder, 'sharding_in_progress') + + # check that candidates and in progress stats don't stick in recon + own_shard_range = brokers[0].get_own_shard_range() + own_shard_range.state = ShardRange.ACTIVE + brokers[0].merge_shard_ranges([own_shard_range]) + for i in range(10): + brokers[1].delete_object( + 'o%s' % i, next(self.ts_iter).internal) + with mock.patch.object( + sharder, '_process_broker' + ) as mock_process_broker: + sharder._local_device_ids = {999} + sharder._one_shard_cycle(Everything(), Everything()) + + self.assertEqual(device_ids, sharder._local_device_ids) + self.assertEqual(3, mock_process_broker.call_count) + processed_paths = [call[0][0].path + for call in mock_process_broker.call_args_list] + self.assertEqual({'a/c0', 'a/c1', 'a/c2'}, set(processed_paths)) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + expected_stats = {'attempted': 3, 'success': 3, 'failure': 0, + 'skipped': 0, 'completed': 0} + self._assert_recon_stats(expected_stats, sharder, 'visited') + self._assert_recon_stats( + expected_candidate_stats, sharder, 'sharding_candidates') + self._assert_recon_stats(None, sharder, 'sharding_progress') + + @contextmanager + def _mock_sharder(self, conf=None, replicas=3): + conf = conf or {} + conf['devices'] = self.tempdir + with mock.patch( + 'swift.container.sharder.internal_client.InternalClient'): + with mock.patch( + 'swift.common.db_replicator.ring.Ring', + lambda *args, **kwargs: FakeRing(replicas=replicas)): + sharder = ContainerSharder(conf, logger=FakeLogger()) + sharder._local_device_ids = {0, 1, 2} + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True] * sharder.ring.replica_count)) + yield sharder + + def _get_raw_object_records(self, broker): + # use list_objects_iter with no-op transform_func to get back actual + # un-transformed rows with encoded timestamps + return [list(obj) for obj in broker.list_objects_iter( + 10, '', '', '', '', include_deleted=None, all_policies=True, + transform_func=lambda record: record)] + + def _check_objects(self, expected_objs, shard_db): + shard_broker = ContainerBroker(shard_db) + shard_objs = self._get_raw_object_records(shard_broker) + expected_objs = [list(obj) for obj in expected_objs] + self.assertEqual(expected_objs, shard_objs) + + def _check_shard_range(self, expected, actual): + expected_dict = dict(expected) + actual_dict = dict(actual) + self.assertGreater(actual_dict.pop('meta_timestamp'), + expected_dict.pop('meta_timestamp')) + self.assertEqual(expected_dict, actual_dict) + + def test_fetch_shard_ranges_unexpected_response(self): + broker = self._make_broker() + exc = internal_client.UnexpectedResponse( + 'Unexpected response: 404', None) + with self._mock_sharder() as sharder: + sharder.int_client.make_request.side_effect = exc + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Unexpected response: 404', lines[0]) + self.assertFalse(lines[1:]) + + def test_fetch_shard_ranges_bad_record_type(self): + def do_test(mock_resp_headers): + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers)) + sharder.int_client.make_request = mock_make_request + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('unexpected record type', lines[0]) + self.assertFalse(lines[1:]) + + broker = self._make_broker() + do_test({}) + do_test({'x-backend-record-type': 'object'}) + do_test({'x-backend-record-type': 'disco'}) + + def test_fetch_shard_ranges_bad_data(self): + def do_test(mock_resp_body): + mock_resp_headers = {'x-backend-record-type': 'shard'} + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers, + body=mock_resp_body)) + sharder.int_client.make_request = mock_make_request + self.assertIsNone(sharder._fetch_shard_ranges(broker)) + lines = sharder.logger.get_lines_for_level('error') + self.assertIn('invalid data', lines[0]) + self.assertFalse(lines[1:]) + + broker = self._make_broker() + do_test({}) + do_test('') + do_test(json.dumps({})) + do_test(json.dumps([{'account': 'a', 'container': 'c'}])) + + def test_fetch_shard_ranges_ok(self): + def do_test(mock_resp_body, params): + mock_resp_headers = {'x-backend-record-type': 'shard'} + with self._mock_sharder() as sharder: + mock_make_request = mock.MagicMock( + return_value=mock.MagicMock(headers=mock_resp_headers, + body=mock_resp_body)) + sharder.int_client.make_request = mock_make_request + mock_make_path = mock.MagicMock(return_value='/v1/a/c') + sharder.int_client.make_path = mock_make_path + actual = sharder._fetch_shard_ranges(broker, params=params) + sharder.int_client.make_path.assert_called_once_with('a', 'c') + self.assertFalse(sharder.logger.get_lines_for_level('error')) + return actual, mock_make_request + + expected_headers = {'X-Backend-Record-Type': 'shard', + 'X-Backend-Include-Deleted': 'False', + 'X-Backend-Override-Deleted': 'true'} + broker = self._make_broker() + shard_ranges = self._make_shard_ranges((('', 'm'), ('m', ''))) + + params = {'format': 'json'} + actual, mock_call = do_test(json.dumps([dict(shard_ranges[0])]), + params={}) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + self._assert_shard_ranges_equal([shard_ranges[0]], actual) + + params = {'format': 'json', 'includes': 'thing'} + actual, mock_call = do_test( + json.dumps([dict(sr) for sr in shard_ranges]), params=params) + self._assert_shard_ranges_equal(shard_ranges, actual) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + params = {'format': 'json', 'end_marker': 'there', 'marker': 'here'} + actual, mock_call = do_test(json.dumps([]), params=params) + self._assert_shard_ranges_equal([], actual) + mock_call.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + def _check_cleave_root(self, conf=None): + broker = self._make_broker() + objects = [ + # shard 0 + ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0), + ('here', self.ts_encoded(), 10, 'text/plain', 'etag_here', 0, 0), + # shard 1 + ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0), + ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0), + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + # shard 2 + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + # shard 3 + ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted + ('y', self.ts_encoded(), 1000, 'text/plain', 'etag_y', 0, 0), + # shard 4 + ('yyyy', self.ts_encoded(), 14, 'text/plain', 'etag_yyyy', 0, 0), + ] + for obj in objects: + broker.put_object(*obj) + initial_root_info = broker.get_info() + broker.enable_sharding(Timestamp.now()) + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + shard_ranges = self._make_shard_ranges(shard_bounds) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # used to accumulate stats from sharded dbs + total_shard_stats = {'object_count': 0, 'bytes_used': 0} + # run cleave - no shard ranges, nothing happens + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(0, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + self.assertEqual(UNSHARDED, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + for db in expected_shard_dbs: + with annotate_failure(db): + self.assertFalse(os.path.exists(db)) + + # run cleave - all shard ranges in found state, nothing happens + broker.merge_shard_ranges(shard_ranges[:4]) + self.assertTrue(broker.set_sharding_state()) + + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(0, context.ranges_done) + self.assertEqual(4, context.ranges_todo) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + for db in expected_shard_dbs: + with annotate_failure(db): + self.assertFalse(os.path.exists(db)) + for shard_range in broker.get_shard_ranges(): + with annotate_failure(shard_range): + self.assertEqual(ShardRange.FOUND, shard_range.state) + + # move first shard range to created state, first shard range is cleaved + shard_ranges[0].update_state(ShardRange.CREATED) + broker.merge_shard_ranges(shard_ranges[:1]) + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + # update expected state and metadata, check cleaved shard range + shard_ranges[0].bytes_used = 20 + shard_ranges[0].object_count = 2 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:2], expected_shard_dbs[0]) + # other shard ranges should be unchanged + for i in range(1, len(shard_ranges)): + with annotate_failure(i): + self.assertFalse(os.path.exists(expected_shard_dbs[i])) + for i in range(1, len(updated_shard_ranges)): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_ranges[i])) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('here', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(1, context.ranges_done) + self.assertEqual(3, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # move more shard ranges to created state + for i in range(1, 4): + shard_ranges[i].update_state(ShardRange.CREATED) + broker.merge_shard_ranges(shard_ranges[1:4]) + + # replication of next shard range is not sufficiently successful + with self._mock_sharder(conf=conf) as sharder: + quorum = quorum_size(sharder.ring.replica_count) + successes = [True] * (quorum - 1) + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses = successes + fails + random.shuffle(responses) + sharder._replicate_object = mock.MagicMock( + side_effect=((False, responses),)) + self.assertFalse(sharder._cleave(broker)) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + + # cleaving state is unchanged + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + for i in range(1, len(updated_shard_ranges)): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_ranges[i])) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('here', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(1, context.ranges_done) + self.assertEqual(3, context.ranges_todo) + + # try again, this time replication is sufficiently successful + with self._mock_sharder(conf=conf) as sharder: + successes = [True] * quorum + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses1 = successes + fails + responses2 = fails + successes + sharder._replicate_object = mock.MagicMock( + side_effect=((False, responses1), (False, responses2))) + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 2, 'success': 2, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[1:3]] + ) + for db in expected_shard_dbs[1:3]: + shard_broker = ContainerBroker(db) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + + # only 2 are cleaved per batch + # update expected state and metadata, check cleaved shard ranges + shard_ranges[1].bytes_used = 6 + shard_ranges[1].object_count = 3 + shard_ranges[1].state = ShardRange.CLEAVED + shard_ranges[2].bytes_used = 100 + shard_ranges[2].object_count = 1 + shard_ranges[2].state = ShardRange.CLEAVED + for i in range(0, 3): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + self._check_objects(objects[2:5], expected_shard_dbs[1]) + self._check_objects(objects[5:6], expected_shard_dbs[2]) + # other shard ranges should be unchanged + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + for i, db in enumerate(expected_shard_dbs[3:], 3): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + for i, updated_shard_range in enumerate(updated_shard_ranges[3:], 3): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_range)) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('where', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(3, context.ranges_done) + self.assertEqual(1, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # run cleave again - should process the fourth range + with self._mock_sharder(conf=conf) as sharder: + sharder.logger = debug_logger() + self.assertFalse(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + shard_broker = ContainerBroker(expected_shard_dbs[3]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(4, len(updated_shard_ranges)) + + shard_ranges[3].bytes_used = 1000 + shard_ranges[3].object_count = 1 + shard_ranges[3].state = ShardRange.CLEAVED + for i in range(0, 4): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + # NB includes the deleted object + self._check_objects(objects[6:8], expected_shard_dbs[3]) + # other shard ranges should be unchanged + for i, db in enumerate(expected_shard_dbs[:3]): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + for i, updated_shard_range in enumerate(updated_shard_ranges[4:], 4): + with annotate_failure(i): + self.assertEqual(dict(shard_ranges[i]), + dict(updated_shard_range)) + + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('yonder', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(4, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + unlink_files(expected_shard_dbs) + + # run cleave - should be a no-op, all existing ranges have been cleaved + with self._mock_sharder(conf=conf) as sharder: + self.assertFalse(sharder._cleave(broker)) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_not_called() + + # add final shard range - move this to ACTIVE state and update stats to + # simulate another replica having cleaved it and replicated its state + shard_ranges[4].update_state(ShardRange.ACTIVE) + shard_ranges[4].update_meta(2, 15) + broker.merge_shard_ranges(shard_ranges[4:]) + + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + + expected = {'attempted': 1, 'success': 1, 'failure': 0, + 'min_time': mock.ANY, 'max_time': mock.ANY} + stats = self._assert_stats(expected, sharder, 'cleaved') + self.assertIsInstance(stats['min_time'], float) + self.assertIsInstance(stats['max_time'], float) + self.assertLessEqual(stats['min_time'], stats['max_time']) + + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[4], 0) + shard_broker = ContainerBroker(expected_shard_dbs[4]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.ACTIVE, shard_own_sr.state) + shard_info = shard_broker.get_info() + total_shard_stats['object_count'] += shard_info['object_count'] + total_shard_stats['bytes_used'] += shard_info['bytes_used'] + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(5, len(updated_shard_ranges)) + # NB stats of the ACTIVE shard range should not be reset by cleaving + for i in range(0, 4): + with annotate_failure(i): + self._check_shard_range( + shard_ranges[i], updated_shard_ranges[i]) + self.assertEqual(dict(shard_ranges[4]), dict(updated_shard_ranges[4])) + + # object copied to shard + self._check_objects(objects[8:], expected_shard_dbs[4]) + # other shard ranges should be unchanged + for i, db in enumerate(expected_shard_dbs[:4]): + with annotate_failure(i): + self.assertFalse(os.path.exists(db)) + + self.assertEqual(initial_root_info['object_count'], + total_shard_stats['object_count']) + self.assertEqual(initial_root_info['bytes_used'], + total_shard_stats['bytes_used']) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(9, context.cleave_to_row) + self.assertEqual(9, context.max_row) + self.assertEqual(5, context.ranges_done) + self.assertEqual(0, context.ranges_todo) + + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + sharder._replicate_object.assert_not_called() + + self.assertTrue(broker.set_sharded_state()) + # run cleave - should be a no-op + with self._mock_sharder(conf=conf) as sharder: + self.assertTrue(sharder._cleave(broker)) + + sharder._replicate_object.assert_not_called() + + def test_cleave_root(self): + self._check_cleave_root() + + def test_cleave_root_listing_limit_one(self): + # force yield_objects to update its marker and call to the broker's + # get_objects() for each shard range, to check the marker moves on + self._check_cleave_root(conf={'cleave_row_batch_size': 1}) + + def test_cleave_root_ranges_change(self): + # verify that objects are not missed if shard ranges change between + # cleaving batches + broker = self._make_broker() + objects = [ + ('a', self.ts_encoded(), 10, 'text/plain', 'etag_a', 0, 0), + ('b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0), + ('c', self.ts_encoded(), 1, 'text/plain', 'etag_c', 0, 0), + ('d', self.ts_encoded(), 2, 'text/plain', 'etag_d', 0, 0), + ('e', self.ts_encoded(), 3, 'text/plain', 'etag_e', 0, 0), + ('f', self.ts_encoded(), 100, 'text/plain', 'etag_f', 0, 0), + ('x', self.ts_encoded(), 0, '', '', 1, 0), # deleted + ('z', self.ts_encoded(), 1000, 'text/plain', 'etag_z', 0, 0) + ] + for obj in objects: + broker.put_object(*obj) + broker.enable_sharding(Timestamp.now()) + + shard_bounds = (('', 'd'), ('d', 'x'), ('x', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + broker.merge_shard_ranges(shard_ranges[:3]) + self.assertTrue(broker.set_sharding_state()) + + # run cleave - first batch is cleaved + with self._mock_sharder() as sharder: + self.assertFalse(sharder._cleave(broker)) + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[:2]] + ) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(3, len(updated_shard_ranges)) + + # first 2 shard ranges should have updated object count, bytes used and + # meta_timestamp + shard_ranges[0].bytes_used = 23 + shard_ranges[0].object_count = 4 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + shard_ranges[1].bytes_used = 103 + shard_ranges[1].object_count = 2 + shard_ranges[1].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(objects[:4], expected_shard_dbs[0]) + self._check_objects(objects[4:7], expected_shard_dbs[1]) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # third shard range should be unchanged - not yet cleaved + self.assertEqual(dict(shard_ranges[2]), + dict(updated_shard_ranges[2])) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + # now change the shard ranges so that third consumes second + shard_ranges[1].set_deleted() + shard_ranges[2].lower = 'd' + shard_ranges[2].timestamp = Timestamp.now() + + broker.merge_shard_ranges(shard_ranges[1:3]) + + # run cleave - should process the extended third (final) range + with self._mock_sharder() as sharder: + self.assertTrue(sharder._cleave(broker)) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[2], 0) + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + # third shard range should now have updated object count, bytes used, + # including objects previously in the second shard range + shard_ranges[2].bytes_used = 1103 + shard_ranges[2].object_count = 3 + shard_ranges[2].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[2], updated_shard_ranges[1]) + self._check_objects(objects[4:8], expected_shard_dbs[2]) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(shard_ranges[2].upper), context.cursor) + self.assertEqual(8, context.cleave_to_row) + self.assertEqual(8, context.max_row) + + def test_cleave_shard(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + own_shard_range = ShardRange( + broker.path, Timestamp.now(), 'here', 'where', + state=ShardRange.SHARDING, epoch=Timestamp.now()) + broker.merge_shard_ranges([own_shard_range]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) # sanity check + + objects = [ + ('m', self.ts_encoded(), 1, 'text/plain', 'etag_m', 0, 0), + ('n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0), + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + ] + misplaced_objects = [ + ('a', self.ts_encoded(), 1, 'text/plain', 'etag_a', 0, 0), + ('z', self.ts_encoded(), 100, 'text/plain', 'etag_z', 1, 0), + ] + for obj in objects + misplaced_objects: + broker.put_object(*obj) + + shard_bounds = (('here', 'there'), + ('there', 'where')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + misplaced_bounds = (('', 'here'), + ('where', '')) + misplaced_ranges = self._make_shard_ranges( + misplaced_bounds, state=ShardRange.ACTIVE) + misplaced_dbs = [] + for shard_range in misplaced_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + misplaced_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + + # run cleave - first range is cleaved but move of misplaced objects is + # not successful + sharder_conf = {'cleave_batch_size': 1} + with self._mock_sharder(sharder_conf) as sharder: + with mock.patch.object( + sharder, '_make_shard_range_fetcher', + return_value=lambda: iter(misplaced_ranges)): + # cause misplaced objects replication to not succeed + quorum = quorum_size(sharder.ring.replica_count) + successes = [True] * (quorum - 1) + fails = [False] * (sharder.ring.replica_count - len(successes)) + responses = successes + fails + random.shuffle(responses) + bad_result = (False, responses) + ok_result = (True, [True] * sharder.ring.replica_count) + sharder._replicate_object = mock.MagicMock( + # result for misplaced, misplaced, cleave + side_effect=(bad_result, ok_result, ok_result)) + self.assertFalse(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertFalse(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual(str(shard_ranges[0].upper), context.cursor) + self.assertEqual(6, context.cleave_to_row) + self.assertEqual(6, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, misplaced_dbs[0], 0), + mock.call(0, misplaced_dbs[1], 0), + mock.call(0, expected_shard_dbs[0], 0)]) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + # NB cleaving a shard, state goes to CLEAVED not ACTIVE + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + + # first shard range should have updated object count, bytes used and + # meta_timestamp + shard_ranges[0].bytes_used = 6 + shard_ranges[0].object_count = 3 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:3], expected_shard_dbs[0]) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self._check_objects(misplaced_objects[:1], misplaced_dbs[0]) + self._check_objects(misplaced_objects[1:], misplaced_dbs[1]) + unlink_files(expected_shard_dbs) + unlink_files(misplaced_dbs) + + # run cleave - second (final) range is cleaved; move this range to + # CLEAVED state and update stats to simulate another replica having + # cleaved it and replicated its state + shard_ranges[1].update_state(ShardRange.CLEAVED) + shard_ranges[1].update_meta(2, 15) + broker.merge_shard_ranges(shard_ranges[1:2]) + with self._mock_sharder(sharder_conf) as sharder: + with mock.patch.object( + sharder, '_make_shard_range_fetcher', + return_value=lambda: iter(misplaced_ranges)): + self.assertTrue(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(shard_ranges[1].upper), context.cursor) + self.assertEqual(6, context.cleave_to_row) + self.assertEqual(6, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, misplaced_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + shard_own_sr = shard_broker.get_own_shard_range() + self.assertEqual(ShardRange.CLEAVED, shard_own_sr.state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(2, len(updated_shard_ranges)) + + # second shard range should have updated object count, bytes used and + # meta_timestamp + self.assertEqual(dict(shard_ranges[1]), dict(updated_shard_ranges[1])) + self._check_objects(objects[3:], expected_shard_dbs[1]) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self._check_objects(misplaced_objects[:1], misplaced_dbs[0]) + self.assertFalse(os.path.exists(misplaced_dbs[1])) + + def test_cleave_shard_shrinking(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + own_shard_range = ShardRange( + broker.path, next(self.ts_iter), 'here', 'where', + state=ShardRange.SHRINKING, epoch=next(self.ts_iter)) + broker.merge_shard_ranges([own_shard_range]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertFalse(broker.is_root_container()) # sanity check + + objects = [ + ('there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0), + ('where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0), + ] + for obj in objects: + broker.put_object(*obj) + acceptor_epoch = next(self.ts_iter) + acceptor = ShardRange('.shards_a/acceptor', Timestamp.now(), + 'here', 'yonder', '1000', '11111', + state=ShardRange.ACTIVE, epoch=acceptor_epoch) + db_hash = hash_path(acceptor.account, acceptor.container) + # NB expected cleave db includes acceptor epoch + expected_shard_db = os.path.join( + self.tempdir, 'sda', 'containers', '0', db_hash[-3:], db_hash, + '%s_%s.db' % (db_hash, acceptor_epoch.internal)) + + broker.merge_shard_ranges([acceptor]) + broker.set_sharding_state() + + # run cleave + with self._mock_sharder() as sharder: + self.assertTrue(sharder._cleave(broker)) + + context = CleavingContext.load(broker) + self.assertTrue(context.misplaced_done) + self.assertTrue(context.cleaving_done) + self.assertEqual(str(acceptor.upper), context.cursor) + self.assertEqual(2, context.cleave_to_row) + self.assertEqual(2, context.max_row) + + self.assertEqual(SHARDING, broker.get_db_state()) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_db, 0)]) + shard_broker = ContainerBroker(expected_shard_db) + # NB when cleaving a shard container to a larger acceptor namespace + # then expect the shard broker's own shard range to reflect that of the + # acceptor shard range rather than being set to CLEAVED. + self.assertEqual( + ShardRange.ACTIVE, shard_broker.get_own_shard_range().state) + + updated_shard_ranges = broker.get_shard_ranges() + self.assertEqual(1, len(updated_shard_ranges)) + self.assertEqual(dict(acceptor), dict(updated_shard_ranges[0])) + + # shard range should have unmodified acceptor, bytes used and + # meta_timestamp + self._check_objects(objects, expected_shard_db) + + def test_cleave_repeated(self): + # verify that if new objects are merged into retiring db after cleaving + # started then cleaving will repeat but only new objects are cleaved + # in the repeated cleaving pass + broker = self._make_broker() + objects = [ + ('obj%03d' % i, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0) + for i in range(10) + ] + new_objects = [ + (name, next(self.ts_iter), 1, 'text/plain', 'etag', 0, 0) + for name in ('alpha', 'zeta') + ] + for obj in objects: + broker.put_object(*obj) + broker._commit_puts() + broker.enable_sharding(Timestamp.now()) + shard_bounds = (('', 'obj004'), ('obj004', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + old_broker = broker.get_brokers()[0] + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + + calls = [] + key = ('name', 'created_at', 'size', 'content_type', 'etag', 'deleted') + + def mock_replicate_object(part, db, node_id): + # merge new objects between cleave of first and second shard ranges + if not calls: + old_broker.merge_items( + [dict(zip(key, obj)) for obj in new_objects]) + calls.append((part, db, node_id)) + return True, [True, True, True] + + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._replicate_object = mock_replicate_object + sharder._process_broker(broker, node, 99) + + # sanity check - the new objects merged into the old db + self.assertFalse(broker.get_objects()) + self.assertEqual(12, len(old_broker.get_objects())) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual([(0, expected_shard_dbs[0], 0), + (0, expected_shard_dbs[1], 0)], calls) + + # check shard ranges were updated to CLEAVED + updated_shard_ranges = broker.get_shard_ranges() + # 'alpha' was not in table when first shard was cleaved + shard_ranges[0].bytes_used = 5 + shard_ranges[0].object_count = 5 + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(objects[:5], expected_shard_dbs[0]) + # 'zeta' was in table when second shard was cleaved + shard_ranges[1].bytes_used = 6 + shard_ranges[1].object_count = 6 + shard_ranges[1].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(objects[5:] + new_objects[1:], + expected_shard_dbs[1]) + + context = CleavingContext.load(broker) + self.assertFalse(context.misplaced_done) + self.assertFalse(context.cleaving_done) + self.assertEqual('', context.cursor) + self.assertEqual(10, context.cleave_to_row) + self.assertEqual(12, context.max_row) # note that max row increased + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Repeat cleaving required', lines[0]) + self.assertFalse(lines[1:]) + unlink_files(expected_shard_dbs) + + # repeat the cleaving - the newer objects get cleaved + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + # this time the sharding completed + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + + # shard ranges are now ACTIVE - stats not updated by cleaving + updated_shard_ranges = broker.get_shard_ranges() + shard_ranges[0].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[0], updated_shard_ranges[0]) + self._check_objects(new_objects[:1], expected_shard_dbs[0]) + # both new objects are included in repeat cleaving but no older objects + shard_ranges[1].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[1], updated_shard_ranges[1]) + self._check_objects(new_objects[1:], expected_shard_dbs[1]) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + def test_cleave_multiple_storage_policies(self): + # verify that objects in all storage policies are cleaved + broker = self._make_broker() + # add objects in multiple policies + objects = [{'name': 'obj_%03d' % i, + 'created_at': Timestamp.now().normal, + 'content_type': 'text/plain', + 'etag': 'etag_%d' % i, + 'size': 1024 * i, + 'deleted': i % 2, + 'storage_policy_index': i % 2, + } for i in range(1, 8)] + # merge_items mutates items + broker.merge_items([dict(obj) for obj in objects]) + broker.enable_sharding(Timestamp.now()) + shard_ranges = self._make_shard_ranges( + (('', 'obj_004'), ('obj_004', '')), state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + + with self._mock_sharder() as sharder: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + # check shard ranges were updated to ACTIVE + self.assertEqual([ShardRange.ACTIVE] * 2, + [sr.state for sr in broker.get_shard_ranges()]) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[:4], actual_objects) + + shard_broker = ContainerBroker(expected_shard_dbs[1]) + actual_objects = shard_broker.get_objects() + self.assertEqual(objects[4:], actual_objects) + + def test_cleave_insufficient_replication(self): + # verify that if replication of a cleaved shard range fails then rows + # are not merged again to the existing shard db + broker = self._make_broker() + retiring_db_id = broker.get_info()['id'] + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + broker.merge_items([dict(obj) for obj in objects]) + broker._commit_puts() + broker.enable_sharding(Timestamp.now()) + shard_bounds = (('', 'obj004'), ('obj004', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + new_object = {'name': 'alpha', 'created_at': next(self.ts_iter), + 'size': 0, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + broker.merge_items([dict(new_object)]) + + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + orig_merge_items = ContainerBroker.merge_items + + def mock_merge_items(broker, items): + merge_items_calls.append((broker.path, + # merge mutates item so make a copy + [dict(item) for item in items])) + orig_merge_items(broker, items) + + # first shard range cleaved but fails to replicate + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(False, [False, False, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + # first shard range cleaved to shard broker + self.assertEqual([(shard_ranges[0].name, objects[:5])], + merge_items_calls) + # replication of first shard range fails - no more shards attempted + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + # shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[:5], shard_broker.get_objects()) + + # first shard range replicates ok, no new merges required, second is + # cleaved but fails to replicate + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items), self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True]), + (False, [False, False, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + + broker_shard_ranges = broker.get_shard_ranges() + shard_ranges[0].object_count = 5 + shard_ranges[0].bytes_used = sum(obj['size'] for obj in objects[:5]) + shard_ranges[0].state = ShardRange.CLEAVED + self._check_shard_range(shard_ranges[0], broker_shard_ranges[0]) + # second shard range still in created state + self._assert_shard_ranges_equal([shard_ranges[1]], + [broker_shard_ranges[1]]) + # only second shard range rows were merged to shard db + self.assertEqual([(shard_ranges[1].name, objects[5:])], + merge_items_calls) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + # shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + # repeat - second shard range cleaves fully because its previously + # cleaved shard db no longer exists + unlink_files(expected_shard_dbs) + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(True, [True, True, True]), # misplaced obj + (False, [False, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + + broker_shard_ranges = broker.get_shard_ranges() + shard_ranges[1].object_count = 5 + shard_ranges[1].bytes_used = sum(obj['size'] for obj in objects[5:]) + shard_ranges[1].state = ShardRange.ACTIVE + self._check_shard_range(shard_ranges[1], broker_shard_ranges[1]) + # second shard range rows were merged to shard db again + self.assertEqual([(shard_ranges[0].name, [new_object]), + (shard_ranges[1].name, objects[5:])], + merge_items_calls) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0)]) + # first shard broker was created by misplaced object - no sync point + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertFalse(shard_broker.get_syncs()) + self.assertEqual([new_object], shard_broker.get_objects()) + # second shard broker has sync points + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': retiring_db_id, 'sync_point': len(objects)}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + def test_shard_replication_quorum_failures(self): + broker = self._make_broker() + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + broker.merge_items([dict(obj) for obj in objects]) + broker._commit_puts() + shard_bounds = (('', 'obj002'), ('obj002', 'obj004'), + ('obj004', 'obj006'), ('obj006', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CREATED) + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.enable_sharding(Timestamp.now()) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + with self._mock_sharder({'shard_replication_quorum': 3}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True]), + (False, [False, False, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + # replication of first shard range fails - no more shards attempted + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[0], 0) + self.assertEqual([ShardRange.CREATED] * 4, + [sr.state for sr in broker.get_shard_ranges()]) + + # and again with a chilled out quorom, so cleaving moves onto second + # shard range which fails to reach even chilled quorum + with self._mock_sharder({'shard_replication_quorum': 1}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, True]), + (False, [False, False, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual(sharder._replicate_object.call_args_list, [ + mock.call(0, expected_shard_dbs[0], 0), + mock.call(0, expected_shard_dbs[1], 0), + ]) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CREATED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # now pretend another node successfully cleaved the second shard range, + # but this node still fails to replicate so still cannot move on + shard_ranges[1].update_state(ShardRange.CLEAVED) + broker.merge_shard_ranges(shard_ranges[1]) + with self._mock_sharder({'shard_replication_quorum': 1}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # until a super-chilled quorum is used - but even then there must have + # been an attempt to replicate + with self._mock_sharder( + {'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [])]) # maybe shard db was deleted + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + self.assertEqual( + [ShardRange.CLEAVED, ShardRange.CLEAVED, ShardRange.CREATED, + ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + + # next pass - the second shard replication is attempted and fails, but + # that's ok because another node has cleaved it and + # existing_shard_replication_quorum is zero + with self._mock_sharder( + {'shard_replication_quorum': 1, + 'existing_shard_replication_quorum': 0}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, False, False]), + (False, [False, True, False])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + self.assertEqual(sharder._replicate_object.call_args_list, [ + mock.call(0, expected_shard_dbs[1], 0), + mock.call(0, expected_shard_dbs[2], 0), + ]) + self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + self.assertEqual(1, sharder.shard_replication_quorum) + self.assertEqual(0, sharder.existing_shard_replication_quorum) + + # crazy replication quorums will be capped to replica_count + with self._mock_sharder( + {'shard_replication_quorum': 99, + 'existing_shard_replication_quorum': 99}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(False, [False, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + self.assertEqual([ShardRange.CLEAVED] * 3 + [ShardRange.CREATED], + [sr.state for sr in broker.get_shard_ranges()]) + self.assertEqual(3, sharder.shard_replication_quorum) + self.assertEqual(3, sharder.existing_shard_replication_quorum) + + # ...and progress is still made if replication fully succeeds + with self._mock_sharder( + {'shard_replication_quorum': 99, + 'existing_shard_replication_quorum': 99}) as sharder: + sharder._replicate_object = mock.MagicMock( + side_effect=[(True, [True, True, True])]) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[3], 0) + self.assertEqual([ShardRange.ACTIVE] * 4, + [sr.state for sr in broker.get_shard_ranges()]) + warnings = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'shard_replication_quorum of 99 exceeds replica count', + warnings[0]) + self.assertIn( + 'existing_shard_replication_quorum of 99 exceeds replica count', + warnings[1]) + self.assertEqual(3, sharder.shard_replication_quorum) + self.assertEqual(3, sharder.existing_shard_replication_quorum) + + def test_cleave_to_existing_shard_db(self): + # verify that when cleaving to an already existing shard db + def replicate(node, from_broker, part): + # short circuit replication + rpc = replicator.ContainerReplicatorRpc( + self.tempdir, DATADIR, ContainerBroker, mount_check=False) + + fake_repl_connection = attach_fake_replication_rpc(rpc) + with mock.patch('swift.common.db_replicator.ReplConnection', + fake_repl_connection): + with mock.patch('swift.common.db_replicator.ring.Ring', + lambda *args, **kwargs: FakeRing()): + daemon = replicator.ContainerReplicator({}) + info = from_broker.get_replication_info() + success = daemon._repl_to_node( + node, from_broker, part, info) + self.assertTrue(success) + + orig_merge_items = ContainerBroker.merge_items + + def mock_merge_items(broker, items): + # capture merge_items calls + merge_items_calls.append((broker.path, + # merge mutates item so make a copy + [dict(item) for item in items])) + orig_merge_items(broker, items) + + objects = [ + {'name': 'obj%03d' % i, 'created_at': next(self.ts_iter), + 'size': 1, 'content_type': 'text/plain', 'etag': 'etag', + 'deleted': 0, 'storage_policy_index': 0} + for i in range(10) + ] + # local db gets 4 objects + local_broker = self._make_broker() + local_broker.merge_items([dict(obj) for obj in objects[2:6]]) + local_broker._commit_puts() + local_retiring_db_id = local_broker.get_info()['id'] + + # remote db gets 5 objects + remote_broker = self._make_broker(device='sdb') + remote_broker.merge_items([dict(obj) for obj in objects[2:7]]) + remote_broker._commit_puts() + remote_retiring_db_id = remote_broker.get_info()['id'] + + local_node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda', + 'id': '2', 'index': 0, 'replication_ip': '1.2.3.4', + 'replication_port': 6040} + remote_node = {'ip': '1.2.3.5', 'port': 6040, 'device': 'sdb', + 'id': '3', 'index': 1, 'replication_ip': '1.2.3.5', + 'replication_port': 6040} + + # remote db replicates to local, bringing local db's total to 5 objects + self.assertNotEqual(local_broker.get_objects(), + remote_broker.get_objects()) + replicate(local_node, remote_broker, 0) + self.assertEqual(local_broker.get_objects(), + remote_broker.get_objects()) + + # local db gets 2 new objects, bringing its total to 7 + local_broker.merge_items([dict(obj) for obj in objects[1:2]]) + local_broker.merge_items([dict(obj) for obj in objects[7:8]]) + + # local db gets shard ranges + own_shard_range = local_broker.get_own_shard_range() + now = Timestamp.now() + own_shard_range.update_state(ShardRange.SHARDING, state_timestamp=now) + own_shard_range.epoch = now + shard_ranges = self._make_shard_ranges( + (('', 'obj004'), ('obj004', '')), state=ShardRange.CREATED) + local_broker.merge_shard_ranges([own_shard_range] + shard_ranges) + self.assertTrue(local_broker.set_sharding_state()) + + # local db shards + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True, True, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(local_broker, local_node, 0) + + # all objects merged from local to shard ranges + self.assertEqual([(shard_ranges[0].name, objects[1:5]), + (shard_ranges[1].name, objects[5:8])], + merge_items_calls) + + # shard brokers have sync points + expected_shard_dbs = [] + for shard_range in shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 5}], + shard_broker.get_syncs()) + self.assertEqual(objects[1:5], shard_broker.get_objects()) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 5}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:8], shard_broker.get_objects()) + + # local db replicates to remote, so remote now has shard ranges + # note: no objects replicated because local is sharded + self.assertFalse(remote_broker.get_shard_ranges()) + replicate(remote_node, local_broker, 0) + self._assert_shard_ranges_equal(local_broker.get_shard_ranges(), + remote_broker.get_shard_ranges()) + + # remote db gets 3 new objects, bringing its total to 8 + remote_broker.merge_items([dict(obj) for obj in objects[:1]]) + remote_broker.merge_items([dict(obj) for obj in objects[8:]]) + + merge_items_calls = [] + with mock.patch('swift.container.backend.ContainerBroker.merge_items', + mock_merge_items): + with self._mock_sharder() as sharder: + sharder._replicate_object = mock.MagicMock( + return_value=(True, [True, True, True])) + sharder._audit_container = mock.MagicMock() + sharder._process_broker(remote_broker, remote_node, 0) + + # shard brokers have sync points for the remote db so only new objects + # are merged from remote broker to shard brokers + self.assertEqual([(shard_ranges[0].name, objects[:1]), + (shard_ranges[1].name, objects[8:])], + merge_items_calls) + # sync points are updated + shard_broker = ContainerBroker(expected_shard_dbs[0]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 8}], + shard_broker.get_syncs()) + self.assertEqual(objects[:5], shard_broker.get_objects()) + shard_broker = ContainerBroker(expected_shard_dbs[1]) + self.assertEqual( + [{'remote_id': local_retiring_db_id, 'sync_point': 7}, + {'remote_id': remote_retiring_db_id, 'sync_point': 8}], + shard_broker.get_syncs()) + self.assertEqual(objects[5:], shard_broker.get_objects()) + + def _check_complete_sharding(self, account, container, shard_bounds): + broker = self._make_sharding_broker( + account=account, container=container, shard_bounds=shard_bounds) + obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal, + 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag', + 'deleted': 0} + broker.get_brokers()[0].merge_items([obj]) + self.assertEqual(2, len(broker.db_files)) # sanity check + + def check_not_complete(): + with self._mock_sharder() as sharder: + self.assertFalse(sharder._complete_sharding(broker)) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Repeat cleaving required for %r' % broker.db_files[0], + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + context = CleavingContext.load(broker) + self.assertFalse(context.cleaving_done) + self.assertFalse(context.misplaced_done) + self.assertEqual('', context.cursor) + self.assertEqual(ShardRange.SHARDING, + broker.get_own_shard_range().state) + for shard_range in broker.get_shard_ranges(): + self.assertEqual(ShardRange.CLEAVED, shard_range.state) + self.assertEqual(SHARDING, broker.get_db_state()) + + # no cleave context progress + check_not_complete() + + # cleaving_done is False + context = CleavingContext.load(broker) + self.assertEqual(1, context.max_row) + context.cleave_to_row = 1 # pretend all rows have been cleaved + context.cleaving_done = False + context.misplaced_done = True + context.store(broker) + check_not_complete() + + # misplaced_done is False + context.misplaced_done = False + context.cleaving_done = True + context.store(broker) + check_not_complete() + + # modified db max row + old_broker = broker.get_brokers()[0] + obj = {'name': 'obj', 'created_at': next(self.ts_iter).internal, + 'size': 14, 'content_type': 'text/plain', 'etag': 'an etag', + 'deleted': 1} + old_broker.merge_items([obj]) + self.assertGreater(old_broker.get_max_row(), context.max_row) + context.misplaced_done = True + context.cleaving_done = True + context.store(broker) + check_not_complete() + + # db id changes + broker.get_brokers()[0].newid('fake_remote_id') + context.cleave_to_row = 2 # pretend all rows have been cleaved, again + context.store(broker) + check_not_complete() + + # context ok + context = CleavingContext.load(broker) + context.cleave_to_row = context.max_row + context.misplaced_done = True + context.cleaving_done = True + context.store(broker) + with self._mock_sharder() as sharder: + self.assertTrue(sharder._complete_sharding(broker)) + self.assertEqual(SHARDED, broker.get_db_state()) + self.assertEqual(ShardRange.SHARDED, + broker.get_own_shard_range().state) + for shard_range in broker.get_shard_ranges(): + self.assertEqual(ShardRange.ACTIVE, shard_range.state) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertFalse(warning_lines) + sharder.logger.clear() + return broker + + def test_complete_sharding_root(self): + broker = self._check_complete_sharding( + 'a', 'c', (('', 'mid'), ('mid', ''))) + self.assertEqual(0, broker.get_own_shard_range().deleted) + + def test_complete_sharding_shard(self): + broker = self._check_complete_sharding( + '.shards_', 'shard_c', (('l', 'mid'), ('mid', 'u'))) + self.assertEqual(1, broker.get_own_shard_range().deleted) + + def test_identify_sharding_candidate(self): + brokers = [self._make_broker(container='c%03d' % i) for i in range(6)] + for broker in brokers: + broker.set_sharding_sysmeta('Root', 'a/c') + node = {'index': 2} + # containers are all empty + with self._mock_sharder() as sharder: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + expected_stats = {} + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + + objects = [ + ['obj%3d' % i, next(self.ts_iter).internal, i, 'text/plain', + 'etag%s' % i, 0] for i in range(160)] + + # one container has 100 objects, which is below the sharding threshold + for obj in objects[:100]: + brokers[0].put_object(*obj) + conf = {'recon_cache_path': self.tempdir} + with self._mock_sharder(conf=conf) as sharder: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + self.assertFalse(sharder.sharding_candidates) + expected_recon = { + 'found': 0, + 'top': []} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # reduce the sharding threshold and the container is reported + conf = {'shard_container_threshold': 100, + 'recon_cache_path': self.tempdir} + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now() as now: + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_0 = {'path': brokers[0].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c000', + 'root': 'a/c', + 'object_count': 100, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[0].db_file).st_size} + self.assertEqual([stats_0], sharder.sharding_candidates) + expected_recon = { + 'found': 1, + 'top': [stats_0]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # repeat with handoff node and db_file error + with self._mock_sharder(conf=conf) as sharder: + with mock.patch('os.stat', side_effect=OSError('test error')): + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, {}) + stats_0_b = {'path': brokers[0].db_file, + 'node_index': None, + 'account': 'a', + 'container': 'c000', + 'root': 'a/c', + 'object_count': 100, + 'meta_timestamp': now.internal, + 'file_size': None} + self.assertEqual([stats_0_b], sharder.sharding_candidates) + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + expected_recon = { + 'found': 1, + 'top': [stats_0_b]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # load up another container, but not to threshold for sharding, and + # verify it is never a candidate for sharding + for obj in objects[:50]: + brokers[2].put_object(*obj) + own_sr = brokers[2].get_own_shard_range() + for state in ShardRange.STATES: + own_sr.update_state(state, state_timestamp=Timestamp.now()) + brokers[2].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + with annotate_failure(state): + self.assertEqual([stats_0], sharder.sharding_candidates) + + # reduce the threshold and the second container is included + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir} + own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now()) + brokers[2].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_2 = {'path': brokers[2].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c002', + 'root': 'a/c', + 'object_count': 50, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[2].db_file).st_size} + self.assertEqual([stats_0, stats_2], sharder.sharding_candidates) + expected_recon = { + 'found': 2, + 'top': [stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # a broker not in active state is not included + own_sr = brokers[0].get_own_shard_range() + for state in ShardRange.STATES: + if state == ShardRange.ACTIVE: + continue + own_sr.update_state(state, state_timestamp=Timestamp.now()) + brokers[0].merge_shard_ranges([own_sr]) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + with annotate_failure(state): + self.assertEqual([stats_2], sharder.sharding_candidates) + + own_sr.update_state(ShardRange.ACTIVE, state_timestamp=Timestamp.now()) + brokers[0].merge_shard_ranges([own_sr]) + + # load up a third container with 150 objects + for obj in objects[:150]: + brokers[5].put_object(*obj) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + stats_5 = {'path': brokers[5].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c005', + 'root': 'a/c', + 'object_count': 150, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[5].db_file).st_size} + self.assertEqual([stats_0, stats_2, stats_5], + sharder.sharding_candidates) + # note recon top list is sorted by size + expected_recon = { + 'found': 3, + 'top': [stats_5, stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # restrict the number of reported candidates + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir, + 'recon_candidates_limit': 2} + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + self.assertEqual([stats_0, stats_2, stats_5], + sharder.sharding_candidates) + expected_recon = { + 'found': 3, + 'top': [stats_5, stats_0]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + # unrestrict the number of reported candidates + conf = {'shard_container_threshold': 50, + 'recon_cache_path': self.tempdir, + 'recon_candidates_limit': -1} + for i, broker in enumerate([brokers[1]] + brokers[3:5]): + for obj in objects[:(151 + i)]: + broker.put_object(*obj) + with self._mock_sharder(conf=conf) as sharder: + with mock_timestamp_now(now): + for broker in brokers: + sharder._identify_sharding_candidate(broker, node) + + stats_4 = {'path': brokers[4].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c004', + 'root': 'a/c', + 'object_count': 153, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[4].db_file).st_size} + stats_3 = {'path': brokers[3].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c003', + 'root': 'a/c', + 'object_count': 152, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[3].db_file).st_size} + stats_1 = {'path': brokers[1].db_file, + 'node_index': 2, + 'account': 'a', + 'container': 'c001', + 'root': 'a/c', + 'object_count': 151, + 'meta_timestamp': now.internal, + 'file_size': os.stat(brokers[1].db_file).st_size} + + self.assertEqual( + [stats_0, stats_1, stats_2, stats_3, stats_4, stats_5], + sharder.sharding_candidates) + self._assert_stats(expected_stats, sharder, 'sharding_candidates') + expected_recon = { + 'found': 6, + 'top': [stats_4, stats_3, stats_1, stats_5, stats_0, stats_2]} + sharder._report_stats() + self._assert_recon_stats( + expected_recon, sharder, 'sharding_candidates') + + def test_misplaced_objects_root_container(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + + objects = [ + # misplaced objects in second and third shard ranges + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 1], + ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0], + # deleted + ['x', self.ts_encoded(), 0, '', '', 1, 1], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + initial_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for shard_range in initial_shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(initial_shard_ranges) + + # unsharded + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # sharding - no misplaced objects + self.assertTrue(broker.set_sharding_state()) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # pretend we cleaved up to end of second shard range + context = CleavingContext.load(broker) + context.cursor = 'there' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # sharding - misplaced objects + for obj in objects: + broker.put_object(*obj) + # pretend we have not cleaved any ranges + context.cursor = '' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved up to end of second shard range + context.cursor = 'there' + context.store(broker) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_called_once_with( + 0, expected_shard_dbs[1], 0) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects(objects[2:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved up to end of fourth shard range + context.cursor = 'yonder' + context.store(broker) + # and some new misplaced updates arrived in the first shard range + new_objects = [ + ['b', self.ts_encoded(), 10, 'text/plain', 'etag_b', 0, 0], + ['c', self.ts_encoded(), 20, 'text/plain', 'etag_c', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + + # check that *all* misplaced objects are moved despite exceeding + # the listing limit + with self._mock_sharder(conf={'cleave_row_batch_size': 2}) as sharder: + sharder._move_misplaced_objects(broker) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in expected_shard_dbs[2:4]], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check misplaced objects were moved + self._check_objects(new_objects, expected_shard_dbs[0]) + self._check_objects(objects[:2], expected_shard_dbs[1]) + self._check_objects(objects[2:3], expected_shard_dbs[2]) + self._check_objects(objects[3:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[4])) + + # pretend we cleaved all ranges - sharded state + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + + # and then more misplaced updates arrive + newer_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0], + ] + for obj in newer_objects: + broker.put_object(*obj) + broker.get_info() # force updates to be committed + # sanity check the puts landed in sharded broker + self._check_objects(newer_objects, broker.db_file) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) + for db in (expected_shard_dbs[0], expected_shard_dbs[-1])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check new misplaced objects were moved + self._check_objects(newer_objects[:1] + new_objects, + expected_shard_dbs[0]) + self._check_objects(newer_objects[1:], expected_shard_dbs[4]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + # ... and other shard dbs were unchanged + self._check_objects(objects[:2], expected_shard_dbs[1]) + self._check_objects(objects[2:3], expected_shard_dbs[2]) + self._check_objects(objects[3:], expected_shard_dbs[3]) + + def _setup_misplaced_objects(self): + # make a broker with shard ranges, move it to sharded state and then + # put some misplaced objects in it + broker = self._make_broker() + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', 'yonder'), + ('yonder', '')) + initial_shard_ranges = [ + ShardRange('.shards_a/%s-%s' % (lower, upper), + Timestamp.now(), lower, upper, state=ShardRange.ACTIVE) + for lower, upper in shard_bounds + ] + expected_dbs = [] + for shard_range in initial_shard_ranges: + db_hash = hash_path(shard_range.account, shard_range.container) + expected_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(initial_shard_ranges) + objects = [ + # misplaced objects in second, third and fourth shard ranges + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['where', self.ts_encoded(), 100, 'text/plain', 'etag_where', 0, + 0], + # deleted + ['x', self.ts_encoded(), 0, '', '', 1, 0], + ] + broker.enable_sharding(Timestamp.now()) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + for obj in objects: + broker.put_object(*obj) + self.assertEqual(SHARDED, broker.get_db_state()) + return broker, objects, expected_dbs + + def test_misplaced_objects_newer_objects(self): + # verify that objects merged to the db after misplaced objects have + # been identified are not removed from the db + broker, objects, expected_dbs = self._setup_misplaced_objects() + newer_objects = [ + ['j', self.ts_encoded(), 51, 'text/plain', 'etag_j', 0, 0], + ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 1, 0], + ] + + calls = [] + pre_removal_objects = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + if db == expected_dbs[1]: + # put some new objects in the shard range that is being + # replicated before misplaced objects are removed from that + # range in the source db + for obj in newer_objects: + broker.put_object(*obj) + # grab a snapshot of the db contents - a side effect is + # that the newer objects are now committed to the db + pre_removal_objects.extend( + broker.get_objects()) + return True, [True, True, True] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + # sanity check - the newer objects were in the db before the misplaced + # object were removed + for obj in newer_objects: + self.assertIn(obj[0], [o['name'] for o in pre_removal_objects]) + for obj in objects[:2]: + self.assertIn(obj[0], [o['name'] for o in pre_removal_objects]) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but newer objects were not removed from the source db + self._check_objects(newer_objects, broker.db_file) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + # they will be moved on next cycle + unlink_files(expected_dbs) + with self._mock_sharder(replicas=3) as sharder: + sharder._move_misplaced_objects(broker) + + self._check_objects(newer_objects, expected_dbs[1]) + self._check_objects([], broker.db_file) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + def test_misplaced_objects_db_id_changed(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + pre_info = broker.get_info() + calls = [] + expected_retained_objects = [] + expected_retained_objects_dbs = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + if len(calls) == 2: + broker.newid('fake_remote_id') + # grab snapshot of the objects in the broker when it changed id + expected_retained_objects.extend( + self._get_raw_object_records(broker)) + if len(calls) >= 2: + expected_retained_objects_dbs.append(db) + return True, [True, True, True] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + # sanity checks + self.assertNotEqual(pre_info['id'], broker.get_info()['id']) + self.assertTrue(expected_retained_objects) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but objects were not removed after the source db id changed + self._check_objects(expected_retained_objects, broker.db_file) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Refused to remove misplaced objects', lines[0]) + self.assertIn('Refused to remove misplaced objects', lines[1]) + self.assertFalse(lines[2:]) + + # they will be moved again on next cycle + unlink_files(expected_dbs) + sharder.logger.clear() + with self._mock_sharder(replicas=3) as sharder: + sharder._move_misplaced_objects(broker) + + self.assertEqual(2, len(set(expected_retained_objects_dbs))) + for db in expected_retained_objects_dbs: + if db == expected_dbs[1]: + self._check_objects(objects[:2], expected_dbs[1]) + if db == expected_dbs[2]: + self._check_objects(objects[2:3], expected_dbs[2]) + if db == expected_dbs[3]: + self._check_objects(objects[3:], expected_dbs[3]) + self._check_objects([], broker.db_file) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': len(expected_retained_objects), + 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + + def test_misplaced_objects_sufficient_replication(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object.return_value = (True, [True, True, True]) + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_dbs[2:4])], + any_order=True) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_3_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (True, [True, True, True]), # ok + expected_dbs[2]: (False, [True, False, False]), # < quorum + expected_dbs[3]: (False, [False, True, True])} # ok + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=3) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[2:3], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_2_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (True, [True, True]), # ok + expected_dbs[2]: (False, [True, False]), # ok + expected_dbs[3]: (False, [False, False])} # < quorum> + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=2) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[3:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def test_misplaced_objects_insufficient_replication_4_replicas(self): + broker, objects, expected_dbs = self._setup_misplaced_objects() + + returns = {expected_dbs[1]: (False, [True, False, False, False]), + expected_dbs[2]: (True, [True, False, False, True]), + expected_dbs[3]: (False, [False, False, False, False])} + calls = [] + + def mock_replicate_object(part, db, node_id): + calls.append((part, db, node_id)) + return returns[db] + + with self._mock_sharder(replicas=4) as sharder: + sharder._replicate_object = mock_replicate_object + sharder._move_misplaced_objects(broker) + + self.assertEqual( + set([(0, db, 0) for db in (expected_dbs[1:4])]), set(calls)) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'placed': 4, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # check misplaced objects were moved to shard dbs + self._check_objects(objects[:2], expected_dbs[1]) + self._check_objects(objects[2:3], expected_dbs[2]) + self._check_objects(objects[3:], expected_dbs[3]) + # ... but only removed from the source db if sufficiently replicated + self._check_objects(objects[:2] + objects[3:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_dbs[0])) + self.assertFalse(os.path.exists(expected_dbs[4])) + + def _check_misplaced_objects_shard_container_unsharded(self, conf=None): + broker = self._make_broker(account='.shards_a', container='.shard_c') + ts_shard = next(self.ts_iter) + own_sr = ShardRange(broker.path, ts_shard, 'here', 'where') + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check + self.assertEqual(UNSHARDED, broker.get_db_state()) + + objects = [ + # some of these are misplaced objects + ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0], + ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0], + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0], # deleted + ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # no objects + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_not_called() + + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # now put objects + for obj in objects: + broker.put_object(*obj) + self._check_objects(objects, broker.db_file) # sanity check + + # NB final shard range not available + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges[:-1]) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_called_with( + 0, expected_shard_dbs[0], 0), + + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 2, 'unplaced': 2} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + # some misplaced objects could not be moved... + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Failed to find destination for at least 2 misplaced objects', + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + # ... and removed from the source db + self._check_objects(objects[2:], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + self.assertFalse(os.path.exists(expected_shard_dbs[3])) + + # repeat with final shard range available + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + + sharder._replicate_object.assert_called_with( + 0, expected_shard_dbs[-1], 0), + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + self._check_objects(objects[4:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:4], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # repeat - no work remaining + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_not_called() + sharder._replicate_object.assert_not_called() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 0, 'placed': 0, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertFalse( + sharder.logger.get_increment_counts().get('misplaced_found')) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # and then more misplaced updates arrive + new_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + ['z', self.ts_encoded(), 52, 'text/plain', 'etag_z', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + # sanity check the puts landed in sharded broker + self._check_objects(new_objects[:1] + objects[2:4] + new_objects[1:], + broker.db_file) + + with self._mock_sharder(conf=conf) as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) + for db in (expected_shard_dbs[0], expected_shard_dbs[3])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check new misplaced objects were moved + self._check_objects(new_objects[:1] + objects[:2], + expected_shard_dbs[0]) + self._check_objects(objects[4:] + new_objects[1:], + expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:4], broker.db_file) + # ... and nothing else moved + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + def test_misplaced_objects_shard_container_unsharded(self): + self._check_misplaced_objects_shard_container_unsharded() + + def test_misplaced_objects_shard_container_unsharded_limit_two(self): + self._check_misplaced_objects_shard_container_unsharded( + conf={'cleave_row_batch_size': 2}) + + def test_misplaced_objects_shard_container_unsharded_limit_one(self): + self._check_misplaced_objects_shard_container_unsharded( + conf={'cleave_row_batch_size': 1}) + + def test_misplaced_objects_shard_container_sharding(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + ts_shard = next(self.ts_iter) + # note that own_sr spans two root shard ranges + own_sr = ShardRange(broker.path, ts_shard, 'here', 'where') + own_sr.update_state(ShardRange.SHARDING) + own_sr.epoch = next(self.ts_iter) + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + self.assertEqual(own_sr, broker.get_own_shard_range()) # sanity check + self.assertEqual(UNSHARDED, broker.get_db_state()) + + objects = [ + # some of these are misplaced objects + ['b', self.ts_encoded(), 2, 'text/plain', 'etag_b', 0, 0], + ['here', self.ts_encoded(), 2, 'text/plain', 'etag_here', 0, 0], + ['n', self.ts_encoded(), 2, 'text/plain', 'etag_n', 0, 0], + ['there', self.ts_encoded(), 3, 'text/plain', 'etag_there', 0, 0], + ['v', self.ts_encoded(), 10, 'text/plain', 'etag_v', 0, 0], + ['y', self.ts_encoded(), 10, 'text/plain', 'etag_y', 0, 0], + ] + + shard_bounds = (('', 'here'), ('here', 'there'), + ('there', 'where'), ('where', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + + # pretend broker is sharding but not yet cleaved a shard + self.assertTrue(broker.set_sharding_state()) + broker.merge_shard_ranges([dict(sr) for sr in root_shard_ranges[1:3]]) + # then some updates arrive + for obj in objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(objects, broker.db_file) # sanity check + + # first destination is not available + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges[1:]) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'}), + mock.call(broker, newest=True, params={'states': 'updating', + 'marker': 'where', + 'end_marker': ''})]) + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[-1], 0)], + ) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 1, 'placed': 1, 'unplaced': 2} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + warning_lines = sharder.logger.get_lines_for_level('warning') + self.assertIn( + 'Failed to find destination for at least 2 misplaced objects', + warning_lines[0]) + self.assertFalse(warning_lines[1:]) + sharder.logger.clear() + + # check some misplaced objects were moved + self._check_objects(objects[5:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[0])) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # normality resumes and all destinations are available + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, params={'states': 'updating', + 'marker': '', + 'end_marker': 'here\x00'})] + ) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, expected_shard_dbs[0], 0)], + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check misplaced objects were moved + self._check_objects(objects[:2], expected_shard_dbs[0]) + self._check_objects(objects[5:], expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[2:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[1])) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + # pretend first shard has been cleaved + context = CleavingContext.load(broker) + context.cursor = 'there' + context.store(broker) + # and then more misplaced updates arrive + new_objects = [ + ['a', self.ts_encoded(), 51, 'text/plain', 'etag_a', 0, 0], + # this one is in the now cleaved shard range... + ['k', self.ts_encoded(), 52, 'text/plain', 'etag_k', 0, 0], + ['z', self.ts_encoded(), 53, 'text/plain', 'etag_z', 0, 0], + ] + for obj in new_objects: + broker.put_object(*obj) + broker.get_info() # force updates to be committed + # sanity check the puts landed in sharded broker + self._check_objects(sorted(new_objects + objects[2:5]), broker.db_file) + with self._mock_sharder() as sharder: + sharder._fetch_shard_ranges = mock.MagicMock( + return_value=root_shard_ranges) + sharder._move_misplaced_objects(broker) + + sharder._fetch_shard_ranges.assert_has_calls( + [mock.call(broker, newest=True, + params={'states': 'updating', 'marker': '', + 'end_marker': 'there\x00'}), + mock.call(broker, newest=True, + params={'states': 'updating', 'marker': 'where', + 'end_marker': ''})]) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1], + expected_shard_dbs[-1])], + any_order=True + ) + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 5, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + + # check *all* the misplaced objects were moved + self._check_objects(new_objects[:1] + objects[:2], + expected_shard_dbs[0]) + self._check_objects(new_objects[1:2] + objects[2:4], + expected_shard_dbs[1]) + self._check_objects(objects[5:] + new_objects[2:], + expected_shard_dbs[3]) + # ... and removed from the source db + self._check_objects(objects[4:5], broker.db_file) + self.assertFalse(os.path.exists(expected_shard_dbs[2])) + + def test_misplaced_objects_deleted_and_updated(self): + # setup + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + + shard_bounds = (('', 'here'), ('here', '')) + root_shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE) + expected_shard_dbs = [] + for sr in root_shard_ranges: + db_hash = hash_path(sr.account, sr.container) + expected_shard_dbs.append( + os.path.join(self.tempdir, 'sda', 'containers', '0', + db_hash[-3:], db_hash, db_hash + '.db')) + broker.merge_shard_ranges(root_shard_ranges) + self.assertTrue(broker.set_sharding_state()) + + ts_older_internal = self.ts_encoded() # used later + # put deleted objects into source + objects = [ + ['b', self.ts_encoded(), 0, '', '', 1, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0] + ] + for obj in objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(objects, broker.db_file) # sanity check + # pretend we cleaved all ranges - sharded state + self.assertTrue(broker.set_sharded_state()) + + with self._mock_sharder() as sharder: + sharder.logger = debug_logger() + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'placed': 2, 'unplaced': 0} + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check new misplaced objects were moved + self._check_objects(objects[:1], expected_shard_dbs[0]) + self._check_objects(objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # update source db with older undeleted versions of same objects + old_objects = [ + ['b', ts_older_internal, 2, 'text/plain', 'etag_b', 0, 0], + ['x', ts_older_internal, 4, 'text/plain', 'etag_x', 0, 0] + ] + for obj in old_objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(old_objects, broker.db_file) # sanity check + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check older misplaced objects were not merged to shard brokers + self._check_objects(objects[:1], expected_shard_dbs[0]) + self._check_objects(objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # the destination shard dbs for misplaced objects may already exist so + # check they are updated correctly when overwriting objects + # update source db with newer deleted versions of same objects + new_objects = [ + ['b', self.ts_encoded(), 0, '', '', 1, 0], + ['x', self.ts_encoded(), 0, '', '', 1, 0] + ] + for obj in new_objects: + broker.put_object(*obj) + broker.get_info() + self._check_objects(new_objects, broker.db_file) # sanity check + shard_broker = ContainerBroker( + expected_shard_dbs[0], account=root_shard_ranges[0].account, + container=root_shard_ranges[0].container) + # update one shard container with even newer version of object + timestamps = [next(self.ts_iter) for i in range(7)] + ts_newer = encode_timestamps( + timestamps[1], timestamps[3], timestamps[5]) + newer_object = ('b', ts_newer, 10, 'text/plain', 'etag_b', 0, 0) + shard_broker.put_object(*newer_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + sharder._replicate_object.assert_has_calls( + [mock.call(0, db, 0) for db in (expected_shard_dbs[0], + expected_shard_dbs[1])], + any_order=True + ) + self._assert_stats(expected_stats, sharder, 'misplaced') + self.assertEqual( + 1, sharder.logger.get_increment_counts()['misplaced_found']) + + # check only the newer misplaced object was moved + self._check_objects([newer_object], expected_shard_dbs[0]) + self._check_objects(new_objects[1:], expected_shard_dbs[1]) + # ... and removed from the source db + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has newer data + # but older content-type and metadata relative to shard object + ts_update = encode_timestamps( + timestamps[2], timestamps[3], timestamps[4]) + update_object = ('b', ts_update, 20, 'text/ignored', 'etag_newer', 0, + 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[3], timestamps[5]) + expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has older data + # and content-type but newer metadata relative to shard object + ts_update = encode_timestamps( + timestamps[1], timestamps[3], timestamps[6]) + update_object = ('b', ts_update, 999, 'text/ignored', 'etag_b', 0, 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[3], timestamps[6]) + expected = ('b', ts_expected, 20, 'text/plain', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + # update source with a version of 'b' that has older data + # but newer content-type and metadata + ts_update = encode_timestamps( + timestamps[2], timestamps[6], timestamps[6]) + update_object = ('b', ts_update, 999, 'text/newer', 'etag_b', 0, 0) + broker.put_object(*update_object) + + with self._mock_sharder() as sharder: + sharder._move_misplaced_objects(broker) + + ts_expected = encode_timestamps( + timestamps[2], timestamps[6], timestamps[6]) + expected = ('b', ts_expected, 20, 'text/newer', 'etag_newer', 0, 0) + self._check_objects([expected], expected_shard_dbs[0]) + self._check_objects([], broker.db_file) + + def _setup_find_ranges(self, account, cont, lower, upper): + broker = self._make_broker(account=account, container=cont) + own_sr = ShardRange('%s/%s' % (account, cont), Timestamp.now(), + lower, upper) + broker.merge_shard_ranges([own_sr]) + broker.set_sharding_sysmeta('Root', 'a/c') + objects = [ + # some of these are misplaced objects + ['obj%3d' % i, self.ts_encoded(), i, 'text/plain', 'etag%s' % i, 0] + for i in range(100)] + for obj in objects: + broker.put_object(*obj) + return broker, objects + + def _check_find_shard_ranges_none_found(self, broker, objects): + with self._mock_sharder() as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertGreater(sharder.split_size, len(objects)) + self.assertEqual(0, num_found) + self.assertFalse(broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + with self._mock_sharder( + conf={'shard_container_threshold': 200}) as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(sharder.split_size, len(objects)) + self.assertEqual(0, num_found) + self.assertFalse(broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_none_found_root(self): + broker, objects = self._setup_find_ranges('a', 'c', '', '') + self._check_find_shard_ranges_none_found(broker, objects) + + def test_find_shard_ranges_none_found_shard(self): + broker, objects = self._setup_find_ranges( + '.shards_a', 'c', 'lower', 'upper') + self._check_find_shard_ranges_none_found(broker, objects) + + def _check_find_shard_ranges_finds_two(self, account, cont, lower, upper): + def check_ranges(): + self.assertEqual(2, len(broker.get_shard_ranges())) + expected_ranges = [ + ShardRange( + ShardRange.make_path('.int_shards_a', 'c', cont, now, 0), + now, lower, objects[98][0], 99), + ShardRange( + ShardRange.make_path('.int_shards_a', 'c', cont, now, 1), + now, objects[98][0], upper, 1), + ] + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + + # first invocation finds both ranges + broker, objects = self._setup_find_ranges( + account, cont, lower, upper) + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'auto_create_account_prefix': '.int_'} + ) as sharder: + with mock_timestamp_now() as now: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(99, sharder.split_size) + self.assertEqual(2, num_found) + check_ranges() + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # second invocation finds none + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'auto_create_account_prefix': '.int_'} + ) as sharder: + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(0, num_found) + self.assertEqual(2, len(broker.get_shard_ranges())) + check_ranges() + expected_stats = {'attempted': 0, 'success': 0, 'failure': 0, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_finds_two_root(self): + self._check_find_shard_ranges_finds_two('a', 'c', '', '') + + def test_find_shard_ranges_finds_two_shard(self): + self._check_find_shard_ranges_finds_two('.shards_a', 'c_', 'l', 'u') + + def _check_find_shard_ranges_finds_three(self, account, cont, lower, + upper): + broker, objects = self._setup_find_ranges( + account, cont, lower, upper) + now = Timestamp.now() + expected_ranges = [ + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 0), + now, lower, objects[44][0], 45), + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 1), + now, objects[44][0], objects[89][0], 45), + ShardRange( + ShardRange.make_path('.shards_a', 'c', cont, now, 2), + now, objects[89][0], upper, 10), + ] + # first invocation finds 2 ranges + with self._mock_sharder( + conf={'shard_container_threshold': 90, + 'shard_scanner_batch_size': 2}) as sharder: + with mock_timestamp_now(now): + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(45, sharder.split_size) + self.assertEqual(2, num_found) + self.assertEqual(2, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges[:2], + broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 2, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # second invocation finds third shard range + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'shard_scanner_batch_size': 2} + ) as sharder: + with mock_timestamp_now(now): + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(1, num_found) + self.assertEqual(3, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0, + 'found': 1, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + # third invocation finds none + with self._mock_sharder(conf={'shard_container_threshold': 199, + 'shard_scanner_batch_size': 2} + ) as sharder: + sharder._send_shard_ranges = mock.MagicMock(return_value=True) + num_found = sharder._find_shard_ranges(broker) + self.assertEqual(0, num_found) + self.assertEqual(3, len(broker.get_shard_ranges())) + self._assert_shard_ranges_equal(expected_ranges, + broker.get_shard_ranges()) + expected_stats = {'attempted': 0, 'success': 0, 'failure': 0, + 'found': 0, 'min_time': mock.ANY, + 'max_time': mock.ANY} + stats = self._assert_stats(expected_stats, sharder, 'scanned') + self.assertGreaterEqual(stats['max_time'], stats['min_time']) + + def test_find_shard_ranges_finds_three_root(self): + self._check_find_shard_ranges_finds_three('a', 'c', '', '') + + def test_find_shard_ranges_finds_three_shard(self): + self._check_find_shard_ranges_finds_three('.shards_a', 'c_', 'l', 'u') + + def test_sharding_enabled(self): + broker = self._make_broker() + self.assertFalse(sharding_enabled(broker)) + broker.update_metadata( + {'X-Container-Sysmeta-Sharding': + ('yes', Timestamp.now().internal)}) + self.assertTrue(sharding_enabled(broker)) + # deleting broker clears sharding sysmeta + broker.delete_db(Timestamp.now().internal) + self.assertFalse(sharding_enabled(broker)) + # but if broker has a shard range then sharding is enabled + broker.merge_shard_ranges( + ShardRange('acc/a_shard', Timestamp.now(), 'l', 'u')) + self.assertTrue(sharding_enabled(broker)) + + def test_send_shard_ranges(self): + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + + def do_test(replicas, *resp_codes): + sent_data = defaultdict(str) + + def on_send(fake_conn, data): + sent_data[fake_conn] += data + + with self._mock_sharder(replicas=replicas) as sharder: + with mocked_http_conn(*resp_codes, give_send=on_send) as conn: + with mock_timestamp_now() as now: + res = sharder._send_shard_ranges( + 'a', 'c', shard_ranges) + + self.assertEqual(sharder.ring.replica_count, len(conn.requests)) + expected_body = json.dumps([dict(sr) for sr in shard_ranges]) + expected_headers = {'Content-Type': 'application/json', + 'Content-Length': str(len(expected_body)), + 'X-Timestamp': now.internal, + 'X-Backend-Record-Type': 'shard', + 'User-Agent': mock.ANY} + for data in sent_data.values(): + self.assertEqual(expected_body, data) + hosts = set() + for req in conn.requests: + path_parts = req['path'].split('/')[1:] + hosts.add('%s:%s/%s' % (req['ip'], req['port'], path_parts[0])) + # FakeRing only has one partition + self.assertEqual('0', path_parts[1]) + self.assertEqual('PUT', req['method']) + self.assertEqual(['a', 'c'], path_parts[-2:]) + req_headers = req['headers'] + for k, v in expected_headers.items(): + self.assertEqual(v, req_headers[k]) + self.assertTrue( + req_headers['User-Agent'].startswith('container-sharder')) + self.assertEqual(sharder.ring.replica_count, len(hosts)) + return res, sharder + + replicas = 3 + res, sharder = do_test(replicas, 202, 202, 202) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, 404) + self.assertTrue(res) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 202, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 500, 500, 500) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception, 202) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, Exception, eventlet.Timeout(), 202) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + replicas = 2 + res, sharder = do_test(replicas, 202, 202) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 404) + self.assertTrue(res) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception) + self.assertFalse(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, eventlet.Timeout(), Exception) + self.assertFalse(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + replicas = 4 + res, sharder = do_test(replicas, 202, 202, 202, 202) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertTrue(res) + res, sharder = do_test(replicas, 202, 202, 404, 404) + self.assertTrue(res) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 202, 202, Exception, Exception) + self.assertTrue(res) + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test(replicas, 202, 404, 404, 404) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, 500, 500, 500, 202) + self.assertFalse(res) + self.assertEqual([True, True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('warning')]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + res, sharder = do_test(replicas, Exception, Exception, 202, 404) + self.assertFalse(res) + self.assertEqual([True], [ + all(msg in line for msg in ('Failed to put shard ranges', '404')) + for line in sharder.logger.get_lines_for_level('warning')]) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + res, sharder = do_test( + replicas, eventlet.Timeout(), eventlet.Timeout(), 202, 404) + self.assertFalse(res) + self.assertEqual([True], [ + all(msg in line for msg in ('Failed to put shard ranges', '404')) + for line in sharder.logger.get_lines_for_level('warning')]) + self.assertEqual([True, True], [ + 'Failed to put shard ranges' in line for line in + sharder.logger.get_lines_for_level('error')]) + + def test_process_broker_not_sharding_no_others(self): + # verify that sharding process will not start when own shard range is + # missing or in wrong state or there are no other shard ranges + broker = self._make_broker() + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # no own shard range + with self._mock_sharder() as sharder: + sharder._process_broker(broker, node, 99) + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + # now add own shard range + for state in sorted(ShardRange.STATES): + own_sr = broker.get_own_shard_range() # returns the default + own_sr.update_state(state) + broker.merge_shard_ranges([own_sr]) + with mock.patch.object( + broker, 'set_sharding_state') as mock_set_sharding_state: + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + with mock.patch.object(sharder, '_audit_container'): + sharder.logger = debug_logger() + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range( + no_default=True) + mock_set_sharding_state.assert_not_called() + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + def _check_process_broker_sharding_no_others(self, state): + # verify that when existing own_shard_range has given state and there + # are other shard ranges then the sharding process will begin + broker = self._make_broker(hash_='hash%s' % state) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + own_sr = broker.get_own_shard_range() + self.assertTrue(own_sr.update_state(state)) + epoch = Timestamp.now() + own_sr.epoch = epoch + shard_ranges = self._make_shard_ranges((('', 'm'), ('m', ''))) + broker.merge_shard_ranges([own_sr] + shard_ranges) + + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_create_shard_containers', return_value=0): + with mock_timestamp_now() as now: + sharder._audit_container = mock.MagicMock() + sharder._process_broker(broker, node, 99) + final_own_sr = broker.get_own_shard_range(no_default=True) + + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(final_own_sr)) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + + def test_process_broker_sharding_with_own_shard_range_no_others(self): + self._check_process_broker_sharding_no_others(ShardRange.SHARDING) + self._check_process_broker_sharding_no_others(ShardRange.SHRINKING) + + def test_process_broker_not_sharding_others(self): + # verify that sharding process will not start when own shard range is + # missing or in wrong state even when other shard ranges are in the db + broker = self._make_broker() + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # add shard ranges - but not own + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + broker.merge_shard_ranges(shard_ranges) + + with self._mock_sharder() as sharder: + sharder._process_broker(broker, node, 99) + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + # now add own shard range + for state in sorted(ShardRange.STATES): + if state in (ShardRange.SHARDING, + ShardRange.SHRINKING, + ShardRange.SHARDED): + epoch = None + else: + epoch = Timestamp.now() + + own_sr = broker.get_own_shard_range() # returns the default + own_sr.update_state(state) + own_sr.epoch = epoch + broker.merge_shard_ranges([own_sr]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range( + no_default=True) + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + if epoch: + self.assertFalse(broker.logger.get_lines_for_level('warning')) + else: + self.assertIn('missing epoch', + broker.logger.get_lines_for_level('warning')[0]) + self.assertFalse(broker.logger.get_lines_for_level('error')) + broker.logger.clear() + + def _check_process_broker_sharding_others(self, state): + # verify states in which own_shard_range will cause sharding + # process to start when other shard ranges are in the db + broker = self._make_broker(hash_='hash%s' % state) + node = {'ip': '1.2.3.4', 'port': 6040, 'device': 'sda5', 'id': '2', + 'index': 0} + # add shard ranges - but not own + shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + broker.merge_shard_ranges(shard_ranges) + # sanity check + self.assertIsNone(broker.get_own_shard_range(no_default=True)) + self.assertEqual(UNSHARDED, broker.get_db_state()) + + # now set own shard range to given state and persist it + own_sr = broker.get_own_shard_range() # returns the default + self.assertTrue(own_sr.update_state(state)) + epoch = Timestamp.now() + own_sr.epoch = epoch + broker.merge_shard_ranges([own_sr]) + with self._mock_sharder() as sharder: + + sharder.logger = debug_logger() + with mock_timestamp_now() as now: + # we're not testing rest of the process here so prevent any + # attempt to progress shard range states + sharder._create_shard_containers = lambda *args: 0 + sharder._process_broker(broker, node, 99) + own_shard_range = broker.get_own_shard_range(no_default=True) + + self.assertEqual(dict(own_sr, meta_timestamp=now), + dict(own_shard_range)) + self.assertEqual(SHARDING, broker.get_db_state()) + self.assertEqual(epoch.normal, parse_db_filename(broker.db_file)[1]) + self.assertFalse(broker.logger.get_lines_for_level('warning')) + self.assertFalse(broker.logger.get_lines_for_level('error')) + + def test_process_broker_sharding_with_own_shard_range_and_others(self): + self._check_process_broker_sharding_others(ShardRange.SHARDING) + self._check_process_broker_sharding_others(ShardRange.SHRINKING) + self._check_process_broker_sharding_others(ShardRange.SHARDED) + + def check_shard_ranges_sent(self, broker, expected_sent): + bodies = [] + + def capture_send(conn, data): + bodies.append(data) + + with self._mock_sharder() as sharder: + with mocked_http_conn(204, 204, 204, + give_send=capture_send) as mock_conn: + sharder._update_root_container(broker) + + for req in mock_conn.requests: + self.assertEqual('PUT', req['method']) + self.assertEqual([expected_sent] * 3, + [json.loads(b) for b in bodies]) + + def test_update_root_container_own_range(self): + broker = self._make_broker() + + # nothing to send + with self._mock_sharder() as sharder: + with mocked_http_conn() as mock_conn: + sharder._update_root_container(broker) + self.assertFalse(mock_conn.requests) + + def check_only_own_shard_range_sent(state): + own_shard_range = broker.get_own_shard_range() + self.assertTrue(own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter))) + broker.merge_shard_ranges([own_shard_range]) + # add an object, expect to see it reflected in the own shard range + # that is sent + broker.put_object(str(own_shard_range.object_count + 1), + next(self.ts_iter).internal, 1, '', '') + with mock_timestamp_now() as now: + # force own shard range meta updates to be at fixed timestamp + expected_sent = [ + dict(own_shard_range, + meta_timestamp=now.internal, + object_count=own_shard_range.object_count + 1, + bytes_used=own_shard_range.bytes_used + 1)] + self.check_shard_ranges_sent(broker, expected_sent) + + for state in ShardRange.STATES: + with annotate_failure(state): + check_only_own_shard_range_sent(state) + + def test_update_root_container_all_ranges(self): + broker = self._make_broker() + other_shard_ranges = self._make_shard_ranges((('', 'h'), ('h', ''))) + self.assertTrue(other_shard_ranges[0].set_deleted()) + broker.merge_shard_ranges(other_shard_ranges) + + # own range missing - send nothing + with self._mock_sharder() as sharder: + with mocked_http_conn() as mock_conn: + sharder._update_root_container(broker) + self.assertFalse(mock_conn.requests) + + def check_all_shard_ranges_sent(state): + own_shard_range = broker.get_own_shard_range() + self.assertTrue(own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter))) + broker.merge_shard_ranges([own_shard_range]) + # add an object, expect to see it reflected in the own shard range + # that is sent + broker.put_object(str(own_shard_range.object_count + 1), + next(self.ts_iter).internal, 1, '', '') + with mock_timestamp_now() as now: + shard_ranges = broker.get_shard_ranges(include_deleted=True) + expected_sent = sorted([ + own_shard_range.copy( + meta_timestamp=now.internal, + object_count=own_shard_range.object_count + 1, + bytes_used=own_shard_range.bytes_used + 1)] + + shard_ranges, + key=lambda sr: (sr.upper, sr.state, sr.lower)) + self.check_shard_ranges_sent( + broker, [dict(sr) for sr in expected_sent]) + + for state in ShardRange.STATES.keys(): + with annotate_failure(state): + check_all_shard_ranges_sent(state) + + def test_audit_root_container(self): + broker = self._make_broker() + + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + self._assert_stats(expected_stats, sharder, 'audit_root') + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + mocked.assert_not_called() + + def assert_overlap_warning(line, state_text): + self.assertIn( + 'Audit failed for root %s' % broker.db_file, line) + self.assertIn( + 'overlapping ranges in state %s: k-t s-z' % state_text, + line) + + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1} + shard_bounds = (('a', 'j'), ('k', 't'), ('s', 'z')) + for state, state_text in ShardRange.STATES.items(): + shard_ranges = self._make_shard_ranges(shard_bounds, state) + broker.merge_shard_ranges(shard_ranges) + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + lines = sharder.logger.get_lines_for_level('warning') + assert_overlap_warning(lines[0], state_text) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_root') + mocked.assert_not_called() + + def assert_missing_warning(line): + self.assertIn( + 'Audit failed for root %s' % broker.db_file, line) + self.assertIn('missing range(s): -a j-k z-', line) + + own_shard_range = broker.get_own_shard_range() + states = (ShardRange.SHARDING, ShardRange.SHARDED) + for state in states: + own_shard_range.update_state( + state, state_timestamp=next(self.ts_iter)) + broker.merge_shard_ranges([own_shard_range]) + with self._mock_sharder() as sharder: + with mock.patch.object( + sharder, '_audit_shard_container') as mocked: + sharder._audit_container(broker) + lines = sharder.logger.get_lines_for_level('warning') + assert_missing_warning(lines[0]) + assert_overlap_warning(lines[0], state_text) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_root') + mocked.assert_not_called() + + def test_audit_shard_container(self): + broker = self._make_broker(account='.shards_a', container='shard_c') + broker.set_sharding_sysmeta('Root', 'a/c') + # include overlaps to verify correct match for updating own shard range + shard_bounds = ( + ('a', 'j'), ('k', 't'), ('k', 's'), ('l', 's'), ('s', 'z')) + shard_ranges = self._make_shard_ranges(shard_bounds, ShardRange.ACTIVE) + shard_ranges[1].name = broker.path + expected_stats = {'attempted': 1, 'success': 0, 'failure': 1} + + def call_audit_container(exc=None): + with self._mock_sharder() as sharder: + sharder.logger = debug_logger() + with mock.patch.object(sharder, '_audit_root_container') \ + as mocked, mock.patch.object( + sharder, 'int_client') as mock_swift: + mock_response = mock.MagicMock() + mock_response.headers = {'x-backend-record-type': + 'shard'} + mock_response.body = json.dumps( + [dict(sr) for sr in shard_ranges]) + mock_swift.make_request.return_value = mock_response + mock_swift.make_request.side_effect = exc + mock_swift.make_path = (lambda a, c: + '/v1/%s/%s' % (a, c)) + sharder.reclaim_age = 0 + sharder._audit_container(broker) + mocked.assert_not_called() + return sharder, mock_swift + + # bad account name + broker.account = 'bad_account' + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0]) + self.assertIn('account not in shards namespace', lines[0]) + self.assertNotIn('root has no matching shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self.assertIn('Audit failed for shard %s' % broker.db_file, lines[1]) + self.assertIn('missing own shard range', lines[1]) + self.assertFalse(lines[2:]) + self.assertFalse(broker.is_deleted()) + + # missing own shard range + broker.get_info() + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertIn('Audit failed for shard %s' % broker.db_file, lines[0]) + self.assertIn('missing own shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + + # create own shard range, no match in root + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + own_shard_range = broker.get_own_shard_range() # get the default + own_shard_range.lower = 'j' + own_shard_range.upper = 'k' + broker.merge_shard_ranges([own_shard_range]) + sharder, mock_swift = call_audit_container() + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[0]) + self.assertNotIn('account not in shards namespace', lines[0]) + self.assertNotIn('missing own shard range', lines[0]) + self.assertIn('root has no matching shard range', lines[0]) + self.assertNotIn('unable to get shard ranges from root', lines[0]) + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertFalse(lines[1:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + expected_headers = {'X-Backend-Record-Type': 'shard', + 'X-Newest': 'true', + 'X-Backend-Include-Deleted': 'True', + 'X-Backend-Override-Deleted': 'true'} + params = {'format': 'json', 'marker': 'j', 'end_marker': 'k'} + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + # create own shard range, failed response from root + expected_stats = {'attempted': 1, 'success': 1, 'failure': 0} + own_shard_range = broker.get_own_shard_range() # get the default + own_shard_range.lower = 'j' + own_shard_range.upper = 'k' + broker.merge_shard_ranges([own_shard_range]) + sharder, mock_swift = call_audit_container( + exc=internal_client.UnexpectedResponse('bad', 'resp')) + lines = sharder.logger.get_lines_for_level('warning') + self.assertIn('Failed to get shard ranges', lines[0]) + self.assertIn('Audit warnings for shard %s' % broker.db_file, lines[1]) + self.assertNotIn('account not in shards namespace', lines[1]) + self.assertNotIn('missing own shard range', lines[1]) + self.assertNotIn('root has no matching shard range', lines[1]) + self.assertIn('unable to get shard ranges from root', lines[1]) + self._assert_stats(expected_stats, sharder, 'audit_shard') + self.assertFalse(lines[2:]) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self.assertFalse(broker.is_deleted()) + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + def assert_ok(): + sharder, mock_swift = call_audit_container() + self.assertFalse(sharder.logger.get_lines_for_level('warning')) + self.assertFalse(sharder.logger.get_lines_for_level('error')) + self._assert_stats(expected_stats, sharder, 'audit_shard') + params = {'format': 'json', 'marker': 'k', 'end_marker': 't'} + mock_swift.make_request.assert_called_once_with( + 'GET', '/v1/a/c', expected_headers, acceptable_statuses=(2,), + params=params) + + # make own shard range match one in root, but different state + shard_ranges[1].timestamp = Timestamp.now() + broker.merge_shard_ranges([shard_ranges[1]]) + now = Timestamp.now() + shard_ranges[1].update_state(ShardRange.SHARDING, state_timestamp=now) + assert_ok() + self.assertFalse(broker.is_deleted()) + # own shard range state is updated from root version + own_shard_range = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_shard_range.state) + self.assertEqual(now, own_shard_range.state_timestamp) + + own_shard_range.update_state(ShardRange.SHARDED, + state_timestamp=Timestamp.now()) + broker.merge_shard_ranges([own_shard_range]) + assert_ok() + + own_shard_range.deleted = 1 + own_shard_range.timestamp = Timestamp.now() + broker.merge_shard_ranges([own_shard_range]) + assert_ok() + self.assertTrue(broker.is_deleted()) + + def test_find_and_enable_sharding_candidates(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + shard_bounds = (('', 'here'), ('here', 'there'), ('there', '')) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.CLEAVED) + shard_ranges[0].state = ShardRange.ACTIVE + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._find_and_enable_sharding_candidates(broker) + + # one range just below threshold + shard_ranges[0].update_meta(sharder.shard_container_threshold - 1, 0) + broker.merge_shard_ranges(shard_ranges[0]) + with self._mock_sharder() as sharder: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # two ranges above threshold, only one ACTIVE + shard_ranges[0].update_meta(sharder.shard_container_threshold, 0) + shard_ranges[2].update_meta(sharder.shard_container_threshold + 1, 0) + broker.merge_shard_ranges([shard_ranges[0], shard_ranges[2]]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + expected = shard_ranges[0].copy(state=ShardRange.SHARDING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal([expected] + shard_ranges[1:], + broker.get_shard_ranges()) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal([expected] + shard_ranges[1:], + broker.get_shard_ranges()) + + # two ranges above threshold, both ACTIVE + shard_ranges[2].update_state(ShardRange.ACTIVE) + broker.merge_shard_ranges(shard_ranges[2]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + expected_2 = shard_ranges[2].copy(state=ShardRange.SHARDING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [expected, shard_ranges[1], expected_2], broker.get_shard_ranges()) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates(broker) + self._assert_shard_ranges_equal( + [expected, shard_ranges[1], expected_2], broker.get_shard_ranges()) + + def test_find_and_enable_sharding_candidates_bootstrap(self): + broker = self._make_broker() + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + sharder._find_and_enable_sharding_candidates(broker) + self.assertEqual(ShardRange.ACTIVE, broker.get_own_shard_range().state) + broker.put_object('obj', next(self.ts_iter).internal, 1, '', '') + self.assertEqual(1, broker.get_info()['object_count']) + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + with mock_timestamp_now() as now: + sharder._find_and_enable_sharding_candidates( + broker, [broker.get_own_shard_range()]) + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(now, own_sr.state_timestamp) + self.assertEqual(now, own_sr.epoch) + + # check idempotency + with self._mock_sharder( + conf={'shard_container_threshold': 1}) as sharder: + with mock_timestamp_now(): + sharder._find_and_enable_sharding_candidates( + broker, [broker.get_own_shard_range()]) + own_sr = broker.get_own_shard_range() + self.assertEqual(ShardRange.SHARDING, own_sr.state) + self.assertEqual(now, own_sr.state_timestamp) + self.assertEqual(now, own_sr.epoch) + + def test_find_and_enable_shrinking_candidates(self): + broker = self._make_broker() + broker.enable_sharding(next(self.ts_iter)) + shard_bounds = (('', 'here'), ('here', 'there'), ('there', '')) + size = (DEFAULT_SHARD_SHRINK_POINT * + DEFAULT_SHARD_CONTAINER_THRESHOLD / 100) + shard_ranges = self._make_shard_ranges( + shard_bounds, state=ShardRange.ACTIVE, object_count=size) + broker.merge_shard_ranges(shard_ranges) + self.assertTrue(broker.set_sharding_state()) + self.assertTrue(broker.set_sharded_state()) + with self._mock_sharder() as sharder: + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal(shard_ranges, + broker.get_shard_ranges()) + + # one range just below threshold + shard_ranges[0].update_meta(size - 1, 0) + broker.merge_shard_ranges(shard_ranges[0]) + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + acceptor = shard_ranges[1].copy(lower=shard_ranges[0].lower) + acceptor.timestamp = now + donor = shard_ranges[0].copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # check idempotency + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # acceptor falls below threshold - not a candidate + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + acceptor.update_meta(0, 0, meta_timestamp=now) + broker.merge_shard_ranges(acceptor) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + self._assert_shard_ranges_equal([donor, acceptor, shard_ranges[2]], + broker.get_shard_ranges()) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(acceptor.account, acceptor.container, [acceptor]), + mock.call(donor.account, donor.container, [donor, acceptor])] + ) + + # ...until donor has shrunk + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + donor.update_state(ShardRange.SHARDED, state_timestamp=now) + donor.set_deleted(timestamp=now) + broker.merge_shard_ranges(donor) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + new_acceptor = shard_ranges[2].copy(lower=acceptor.lower) + new_acceptor.timestamp = now + new_donor = acceptor.copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [donor, new_donor, new_acceptor], + broker.get_shard_ranges(include_deleted=True)) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(new_acceptor.account, new_acceptor.container, + [new_acceptor]), + mock.call(new_donor.account, new_donor.container, + [new_donor, new_acceptor])] + ) + + # ..finally last shard shrinks to root + with self._mock_sharder() as sharder: + with mock_timestamp_now() as now: + new_donor.update_state(ShardRange.SHARDED, state_timestamp=now) + new_donor.set_deleted(timestamp=now) + new_acceptor.update_meta(0, 0, meta_timestamp=now) + broker.merge_shard_ranges([new_donor, new_acceptor]) + sharder._send_shard_ranges = mock.MagicMock() + sharder._find_and_enable_shrinking_candidates(broker) + final_donor = new_acceptor.copy(state=ShardRange.SHRINKING, + state_timestamp=now, epoch=now) + self._assert_shard_ranges_equal( + [donor, new_donor, final_donor], + broker.get_shard_ranges(include_deleted=True)) + sharder._send_shard_ranges.assert_has_calls( + [mock.call(final_donor.account, final_donor.container, + [final_donor, broker.get_own_shard_range()])] + ) + + def test_partition_and_device_filters(self): + # verify partitions and devices kwargs result in filtering of processed + # containers but not of the local device ids. + ring = FakeRing() + dev_ids = set() + container_data = [] + for dev in ring.devs: + dev_ids.add(dev['id']) + part = str(dev['id']) + broker = self._make_broker( + container='c%s' % dev['id'], hash_='c%shash' % dev['id'], + device=dev['device'], part=part) + broker.update_metadata({'X-Container-Sysmeta-Sharding': + ('true', next(self.ts_iter).internal)}) + container_data.append((broker.path, dev['id'], part)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once() + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set(container_data), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='0') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[0]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='2,0') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[0], container_data[2]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(partitions='2,0', devices='sdc') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set([container_data[2]]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + with self._mock_sharder() as sharder: + sharder.ring = ring + sharder._check_node = lambda *args: True + with mock.patch.object( + sharder, '_process_broker') as mock_process_broker: + sharder.run_once(devices='sdb,sdc') + self.assertEqual(dev_ids, set(sharder._local_device_ids)) + self.assertEqual(set(container_data[1:]), + set((call[0][0].path, call[0][1]['id'], call[0][2]) + for call in mock_process_broker.call_args_list)) + + +class TestCleavingContext(BaseTestSharder): + def test_init(self): + ctx = CleavingContext(ref='test') + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertIsNone(ctx.max_row) + self.assertIsNone(ctx.cleave_to_row) + self.assertIsNone(ctx.last_cleave_to_row) + self.assertFalse(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + + def test_iter(self): + ctx = CleavingContext('test', 'curs', 12, 11, 10, False, True, 0, 4) + expected = {'ref': 'test', + 'cursor': 'curs', + 'max_row': 12, + 'cleave_to_row': 11, + 'last_cleave_to_row': 10, + 'cleaving_done': False, + 'misplaced_done': True, + 'ranges_done': 0, + 'ranges_todo': 4} + self.assertEqual(expected, dict(ctx)) + + def test_cursor(self): + broker = self._make_broker() + ref = CleavingContext._make_ref(broker) + + for curs in ('curs', u'curs\u00e4\u00fb'): + with annotate_failure('%r' % curs): + ctx = CleavingContext(ref, curs, 12, 11, 10, False, True) + self.assertEqual(curs.encode('utf8'), ctx.cursor) + ctx.store(broker) + ctx = CleavingContext.load(broker) + self.assertEqual(curs.encode('utf8'), ctx.cursor) + + def test_load(self): + broker = self._make_broker() + for i in range(6): + broker.put_object('o%s' % i, next(self.ts_iter).internal, 10, + 'text/plain', 'etag_a', 0) + + db_id = broker.get_info()['id'] + params = {'ref': db_id, + 'cursor': 'curs', + 'max_row': 2, + 'cleave_to_row': 2, + 'last_cleave_to_row': 1, + 'cleaving_done': False, + 'misplaced_done': True, + 'ranges_done': 2, + 'ranges_todo': 4} + key = 'X-Container-Sysmeta-Shard-Context-%s' % db_id + broker.update_metadata( + {key: (json.dumps(params), Timestamp.now().internal)}) + ctx = CleavingContext.load(broker) + self.assertEqual(db_id, ctx.ref) + self.assertEqual('curs', ctx.cursor) + # note max_row is dynamically updated during load + self.assertEqual(6, ctx.max_row) + self.assertEqual(2, ctx.cleave_to_row) + self.assertEqual(1, ctx.last_cleave_to_row) + self.assertTrue(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + self.assertEqual(2, ctx.ranges_done) + self.assertEqual(4, ctx.ranges_todo) + + def test_store(self): + broker = self._make_sharding_broker() + old_db_id = broker.get_brokers()[0].get_info()['id'] + ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True, 2, 4) + ctx.store(broker) + key = 'X-Container-Sysmeta-Shard-Context-%s' % old_db_id + data = json.loads(broker.metadata[key][0]) + expected = {'ref': old_db_id, + 'cursor': 'curs', + 'max_row': 12, + 'cleave_to_row': 11, + 'last_cleave_to_row': 2, + 'cleaving_done': True, + 'misplaced_done': True, + 'ranges_done': 2, + 'ranges_todo': 4} + self.assertEqual(expected, data) + + def test_store_add_row_load(self): + # adding row to older db changes only max_row in the context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + old_max_row = old_broker.get_max_row() + self.assertEqual(1, old_max_row) # sanity check + ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True) + ctx.store(broker) + + # adding a row changes max row + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, new_ctx.ref) + self.assertEqual('curs', new_ctx.cursor) + self.assertEqual(2, new_ctx.max_row) + self.assertEqual(1, new_ctx.cleave_to_row) + self.assertEqual(0, new_ctx.last_cleave_to_row) + self.assertTrue(new_ctx.misplaced_done) + self.assertTrue(new_ctx.cleaving_done) + + def test_store_reclaim_load(self): + # reclaiming rows from older db does not change context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + old_broker.merge_items([old_broker._record_to_dict( + ('obj', next(self.ts_iter).internal, 0, 'text/plain', 'etag', 1))]) + old_max_row = old_broker.get_max_row() + self.assertEqual(1, old_max_row) # sanity check + ctx = CleavingContext(old_db_id, 'curs', 1, 1, 0, True, True) + ctx.store(broker) + + self.assertEqual( + 1, len(old_broker.get_objects())) + now = next(self.ts_iter).internal + broker.get_brokers()[0].reclaim(now, now) + self.assertFalse(old_broker.get_objects()) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, new_ctx.ref) + self.assertEqual('curs', new_ctx.cursor) + self.assertEqual(1, new_ctx.max_row) + self.assertEqual(1, new_ctx.cleave_to_row) + self.assertEqual(0, new_ctx.last_cleave_to_row) + self.assertTrue(new_ctx.misplaced_done) + self.assertTrue(new_ctx.cleaving_done) + + def test_store_modify_db_id_load(self): + # changing id changes ref, so results in a fresh context + broker = self._make_sharding_broker() + old_broker = broker.get_brokers()[0] + old_db_id = old_broker.get_info()['id'] + ctx = CleavingContext(old_db_id, 'curs', 12, 11, 2, True, True) + ctx.store(broker) + + old_broker.newid('fake_remote_id') + new_db_id = old_broker.get_info()['id'] + self.assertNotEqual(old_db_id, new_db_id) + + new_ctx = CleavingContext.load(broker) + self.assertEqual(new_db_id, new_ctx.ref) + self.assertEqual('', new_ctx.cursor) + # note max_row is dynamically updated during load + self.assertEqual(-1, new_ctx.max_row) + self.assertEqual(None, new_ctx.cleave_to_row) + self.assertEqual(None, new_ctx.last_cleave_to_row) + self.assertFalse(new_ctx.misplaced_done) + self.assertFalse(new_ctx.cleaving_done) + + def test_load_modify_store_load(self): + broker = self._make_sharding_broker() + old_db_id = broker.get_brokers()[0].get_info()['id'] + ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, ctx.ref) + self.assertEqual('', ctx.cursor) # sanity check + ctx.cursor = 'curs' + ctx.misplaced_done = True + ctx.store(broker) + ctx = CleavingContext.load(broker) + self.assertEqual(old_db_id, ctx.ref) + self.assertEqual('curs', ctx.cursor) + self.assertTrue(ctx.misplaced_done) + + def test_reset(self): + ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True) + + def check_context(): + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertEqual(12, ctx.max_row) + self.assertEqual(11, ctx.cleave_to_row) + self.assertEqual(11, ctx.last_cleave_to_row) + self.assertFalse(ctx.misplaced_done) + self.assertFalse(ctx.cleaving_done) + self.assertEqual(0, ctx.ranges_done) + self.assertEqual(0, ctx.ranges_todo) + ctx.reset() + # check idempotency + ctx.reset() + + def test_start(self): + ctx = CleavingContext('test', 'curs', 12, 11, 2, True, True) + + def check_context(): + self.assertEqual('test', ctx.ref) + self.assertEqual('', ctx.cursor) + self.assertEqual(12, ctx.max_row) + self.assertEqual(12, ctx.cleave_to_row) + self.assertEqual(2, ctx.last_cleave_to_row) + self.assertTrue(ctx.misplaced_done) # *not* reset here + self.assertFalse(ctx.cleaving_done) + self.assertEqual(0, ctx.ranges_done) + self.assertEqual(0, ctx.ranges_todo) + ctx.start() + # check idempotency + ctx.start() diff -Nru swift-2.17.0/test/unit/container/test_updater.py swift-2.18.0/test/unit/container/test_updater.py --- swift-2.17.0/test/unit/container/test_updater.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/container/test_updater.py 2018-05-30 10:17:09.000000000 +0000 @@ -25,7 +25,7 @@ from eventlet import spawn, Timeout -from swift.common import utils +from swift.common import exceptions, utils from swift.container import updater as container_updater from swift.container.backend import ContainerBroker, DATADIR from swift.common.ring import RingData @@ -153,6 +153,30 @@ # Ensure that the container_sweep did not run self.assertFalse(mock_sweep.called) + @mock.patch('swift.container.updater.dump_recon_cache') + def test_run_once_with_get_info_timeout(self, mock_dump_recon): + cu = self._get_container_updater() + containers_dir = os.path.join(self.sda1, DATADIR) + os.mkdir(containers_dir) + subdir = os.path.join(containers_dir, 'subdir') + os.mkdir(subdir) + cb = ContainerBroker(os.path.join(subdir, 'hash.db'), account='a', + container='c') + cb.initialize(normalize_timestamp(1), 0) + + timeout = exceptions.LockTimeout(10) + timeout.cancel() + with mock.patch('swift.container.updater.ContainerBroker.get_info', + side_effect=timeout): + cu.run_once() + log_lines = self.logger.get_lines_for_level('error') + self.assertTrue(log_lines) + self.assertIn('Failed to get container info ', log_lines[0]) + self.assertIn('devices/sda1/containers/subdir/hash.db', log_lines[0]) + self.assertIn('LockTimeout (10s)', log_lines[0]) + self.assertFalse(log_lines[1:]) + self.assertEqual(1, len(mock_dump_recon.mock_calls)) + def test_run_once(self): cu = self._get_container_updater() cu.run_once() diff -Nru swift-2.17.0/test/unit/helpers.py swift-2.18.0/test/unit/helpers.py --- swift-2.17.0/test/unit/helpers.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/helpers.py 2018-05-30 10:17:02.000000000 +0000 @@ -75,7 +75,7 @@ "orig_POLICIES": storage_policy._POLICIES, "orig_SysLogHandler": utils.SysLogHandler} - utils.HASH_PATH_SUFFIX = 'endcap' + utils.HASH_PATH_SUFFIX = b'endcap' utils.SysLogHandler = mock.MagicMock() # Since we're starting up a lot here, we're going to test more than # just chunked puts; we're also going to test parts of diff -Nru swift-2.17.0/test/unit/__init__.py swift-2.18.0/test/unit/__init__.py --- swift-2.17.0/test/unit/__init__.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/__init__.py 2018-05-30 10:17:02.000000000 +0000 @@ -71,7 +71,7 @@ # try not to import this module from swift if not os.path.basename(sys.argv[0]).startswith('swift'): # never patch HASH_PATH_SUFFIX AGAIN! - utils.HASH_PATH_SUFFIX = 'endcap' + utils.HASH_PATH_SUFFIX = b'endcap' EC_TYPE_PREFERENCE = [ @@ -751,6 +751,8 @@ :param response_sleep: float, time to eventlet sleep during response """ # connect exception + if inspect.isclass(status) and issubclass(status, Exception): + raise status('FakeStatus Error') if isinstance(status, (Exception, eventlet.Timeout)): raise status if isinstance(status, tuple): @@ -1063,6 +1065,15 @@ for t in itertools.count(int(time.time()) + offset)) +@contextmanager +def mock_timestamp_now(now=None): + if now is None: + now = Timestamp.now() + with mocklib.patch('swift.common.utils.Timestamp.now', + classmethod(lambda c: now)): + yield now + + class Timeout(object): def __init__(self, seconds): self.seconds = seconds @@ -1080,6 +1091,15 @@ raise TimeoutException +def requires_o_tmpfile_support_in_tmp(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not utils.o_tmpfile_in_tmpdir_supported(): + raise SkipTest('Requires O_TMPFILE support in TMPDIR') + return func(*args, **kwargs) + return wrapper + + def requires_o_tmpfile_support(func): @functools.wraps(func) def wrapper(*args, **kwargs): @@ -1292,7 +1312,7 @@ # assume the worst -- xattrs aren't supported supports_xattr_cached_val = False - big_val = 'x' * (4096 + 1) # more than 4k of metadata + big_val = b'x' * (4096 + 1) # more than 4k of metadata try: fd, tmppath = mkstemp() xattr.setxattr(fd, 'user.swift.testing_key', big_val) @@ -1314,3 +1334,55 @@ if not xattr_supported_check(): raise SkipTest('Large xattrs not supported in `%s`. Skipping test' % gettempdir()) + + +def unlink_files(paths): + for path in paths: + try: + os.unlink(path) + except OSError as err: + if err.errno != errno.ENOENT: + raise + + +class FakeHTTPResponse(object): + + def __init__(self, resp): + self.resp = resp + + @property + def status(self): + return self.resp.status_int + + @property + def data(self): + return self.resp.body + + +def attach_fake_replication_rpc(rpc, replicate_hook=None, errors=None): + class FakeReplConnection(object): + + def __init__(self, node, partition, hash_, logger): + self.logger = logger + self.node = node + self.partition = partition + self.path = '/%s/%s/%s' % (node['device'], partition, hash_) + self.host = node['replication_ip'] + + def replicate(self, op, *sync_args): + print('REPLICATE: %s, %s, %r' % (self.path, op, sync_args)) + resp = None + if errors and op in errors and errors[op]: + resp = errors[op].pop(0) + if not resp: + replicate_args = self.path.lstrip('/').split('/') + args = [op] + copy.deepcopy(list(sync_args)) + with mock_check_drive(isdir=not rpc.mount_check, + ismount=rpc.mount_check): + swob_response = rpc.dispatch(replicate_args, args) + resp = FakeHTTPResponse(swob_response) + if replicate_hook: + replicate_hook(op, *sync_args) + return resp + + return FakeReplConnection diff -Nru swift-2.17.0/test/unit/obj/test_auditor.py swift-2.18.0/test/unit/obj/test_auditor.py --- swift-2.17.0/test/unit/obj/test_auditor.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_auditor.py 2018-05-30 10:17:02.000000000 +0000 @@ -17,6 +17,8 @@ import unittest import mock import os +import sys +import signal import time import string import xattr @@ -850,7 +852,7 @@ self.auditor.run_audit(**kwargs) self.assertFalse(os.path.isdir(quarantine_path)) del(kwargs['zero_byte_fps']) - clear_auditor_status(self.devices) + clear_auditor_status(self.devices, 'objects') self.auditor.run_audit(**kwargs) self.assertTrue(os.path.isdir(quarantine_path)) @@ -1252,6 +1254,12 @@ self.wait_called += 1 return (self.wait_called, 0) + def mock_signal(self, sig, action): + pass + + def mock_exit(self): + pass + for i in string.ascii_letters[2:26]: mkdirs(os.path.join(self.devices, 'sd%s' % i)) @@ -1267,8 +1275,12 @@ my_auditor.run_audit = mocker.mock_run was_fork = os.fork was_wait = os.wait + was_signal = signal.signal + was_exit = sys.exit os.fork = mocker.mock_fork os.wait = mocker.mock_wait + signal.signal = mocker.mock_signal + sys.exit = mocker.mock_exit try: my_auditor._sleep = mocker.mock_sleep_stop my_auditor.run_once(zero_byte_fps=50) @@ -1280,6 +1292,12 @@ 'ERROR auditing: %s', loop_error) my_auditor.audit_loop = real_audit_loop + # sleep between ZBF scanner forks + self.assertRaises(StopForever, my_auditor.fork_child, True, True) + + mocker.fork_called = 0 + signal.signal = was_signal + sys.exit = was_exit self.assertRaises(StopForever, my_auditor.run_forever, zero_byte_fps=50) self.assertEqual(mocker.check_kwargs['zero_byte_fps'], 50) @@ -1306,11 +1324,11 @@ mocker.fork_called = 0 self.assertRaises(StopForever, my_auditor.run_forever) - # Fork is called 2 times since the zbf process is forked just - # once before self._sleep() is called and StopForever is raised - # Also wait is called just once before StopForever is raised - self.assertEqual(mocker.fork_called, 2) - self.assertEqual(mocker.wait_called, 1) + # Fork or Wait are called greate than or equal to 2 times in the + # main process. 2 times if zbf run once and 3 times if zbf run + # again + self.assertGreaterEqual(mocker.fork_called, 2) + self.assertGreaterEqual(mocker.wait_called, 2) my_auditor._sleep = mocker.mock_sleep_continue my_auditor.audit_loop = works_only_once(my_auditor.audit_loop, @@ -1320,13 +1338,13 @@ mocker.fork_called = 0 mocker.wait_called = 0 self.assertRaises(LetMeOut, my_auditor.run_forever) - # Fork is called no. of devices + (no. of devices)/2 + 1 times - # since zbf process is forked (no.of devices)/2 + 1 times + # Fork or Wait are called greater than or equal to + # no. of devices + (no. of devices)/2 + 1 times in main process no_devices = len(os.listdir(self.devices)) - self.assertEqual(mocker.fork_called, no_devices + no_devices / 2 - + 1) - self.assertEqual(mocker.wait_called, no_devices + no_devices / 2 - + 1) + self.assertGreaterEqual(mocker.fork_called, no_devices + + no_devices / 2 + 1) + self.assertGreaterEqual(mocker.wait_called, no_devices + + no_devices / 2 + 1) finally: os.fork = was_fork diff -Nru swift-2.17.0/test/unit/obj/test_diskfile.py swift-2.18.0/test/unit/obj/test_diskfile.py --- swift-2.17.0/test/unit/obj/test_diskfile.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_diskfile.py 2018-05-30 10:17:02.000000000 +0000 @@ -326,7 +326,7 @@ check_metadata() # simulate a legacy diskfile that might have persisted unicode metadata - with mock.patch.object(diskfile, '_encode_metadata', lambda x: x): + with mock.patch.object(diskfile, '_decode_metadata', lambda x: x): with open(path, 'wb') as fd: diskfile.write_metadata(fd, metadata) # sanity check, while still mocked, that we did persist unicode @@ -334,8 +334,8 @@ actual = diskfile.read_metadata(fd) for k, v in actual.items(): if k == u'X-Object-Meta-Strange': - self.assertIsInstance(k, six.text_type) - self.assertIsInstance(v, six.text_type) + self.assertIsInstance(k, str) + self.assertIsInstance(v, str) break else: self.fail('Did not find X-Object-Meta-Strange') @@ -375,17 +375,6 @@ "fcd938702024c25fef6c32fef05298eb")) os.makedirs(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5", "4a943bc72c2e647c4675923d58cf4ca5")) - os.makedirs(os.path.join(tmpdir, "sdq", "objects-2", "9971", "8eb", - "fcd938702024c25fef6c32fef05298eb")) - os.makedirs(os.path.join(tmpdir, "sdq", "objects-99", "9972", - "8eb", - "fcd938702024c25fef6c32fef05298eb")) - # the bad - os.makedirs(os.path.join(tmpdir, "sdq", "objects-", "1135", - "6c3", - "fcd938702024c25fef6c32fef05298eb")) - os.makedirs(os.path.join(tmpdir, "sdq", "objects-fud", "foo")) - os.makedirs(os.path.join(tmpdir, "sdq", "objects-+1", "foo")) self._make_file(os.path.join(tmpdir, "sdp", "objects", "1519", "fed")) @@ -404,27 +393,19 @@ "4f9eee668b66c6f0250bfa3c7ab9e51e")) logger = debug_logger() - locations = [(loc.path, loc.device, loc.partition, loc.policy) - for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=False, - logger=logger)] + loc_generators = [] + datadirs = ["objects", "objects-1"] + for datadir in datadirs: + loc_generators.append( + diskfile.object_audit_location_generator( + devices=tmpdir, datadir=datadir, mount_check=False, + logger=logger)) + + all_locs = itertools.chain(*loc_generators) + locations = [(loc.path, loc.device, loc.partition, loc.policy) for + loc in all_locs] locations.sort() - # expect some warnings about those bad dirs - warnings = logger.get_lines_for_level('warning') - self.assertEqual(set(warnings), set([ - ("Directory 'objects-' does not map to a valid policy " - "(Unknown policy, for index '')"), - ("Directory 'objects-2' does not map to a valid policy " - "(Unknown policy, for index '2')"), - ("Directory 'objects-99' does not map to a valid policy " - "(Unknown policy, for index '99')"), - ("Directory 'objects-fud' does not map to a valid policy " - "(Unknown policy, for index 'fud')"), - ("Directory 'objects-+1' does not map to a valid policy " - "(Unknown policy, for index '+1')"), - ])) - expected = \ [(os.path.join(tmpdir, "sdp", "objects-1", "9970", "ca5", "4a943bc72c2e647c4675923d58cf4ca5"), @@ -448,12 +429,19 @@ self.assertEqual(locations, expected) # Reset status file for next run - diskfile.clear_auditor_status(tmpdir) + for datadir in datadirs: + diskfile.clear_auditor_status(tmpdir, datadir) # now without a logger - locations = [(loc.path, loc.device, loc.partition, loc.policy) - for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=False)] + for datadir in datadirs: + loc_generators.append( + diskfile.object_audit_location_generator( + devices=tmpdir, datadir=datadir, mount_check=False, + logger=logger)) + + all_locs = itertools.chain(*loc_generators) + locations = [(loc.path, loc.device, loc.partition, loc.policy) for + loc in all_locs] locations.sort() self.assertEqual(locations, expected) @@ -470,7 +458,7 @@ locations = [ (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=True)] + devices=tmpdir, datadir="objects", mount_check=True)] locations.sort() self.assertEqual( @@ -485,7 +473,8 @@ locations = [ (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=True, logger=logger)] + devices=tmpdir, datadir="objects", mount_check=True, + logger=logger)] debug_lines = logger.get_lines_for_level('debug') self.assertEqual([ 'Skipping sdq as it is not mounted', @@ -502,7 +491,7 @@ locations = [ (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=False)] + devices=tmpdir, datadir="objects", mount_check=False)] self.assertEqual( locations, @@ -516,30 +505,22 @@ locations = [ (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=False, logger=logger)] + devices=tmpdir, datadir="objects", mount_check=False, + logger=logger)] debug_lines = logger.get_lines_for_level('debug') self.assertEqual([ 'Skipping garbage as it is not a dir', ], debug_lines) logger.clear() - with mock_check_drive(isdir=True): - locations = [ - (loc.path, loc.device, loc.partition, loc.policy) - for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=False, logger=logger)] - debug_lines = logger.get_lines_for_level('debug') - self.assertEqual([ - 'Skipping %s: Not a directory' % os.path.join( - tmpdir, "garbage"), - ], debug_lines) - logger.clear() + with mock_check_drive() as mocks: mocks['ismount'].side_effect = lambda path: ( False if path.endswith('garbage') else True) locations = [ (loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=tmpdir, mount_check=True, logger=logger)] + devices=tmpdir, datadir="objects", mount_check=True, + logger=logger)] debug_lines = logger.get_lines_for_level('debug') self.assertEqual([ 'Skipping garbage as it is not mounted', @@ -550,10 +531,10 @@ # so that errors get logged and a human can see what's going wrong; # only normal FS corruption should be skipped over silently. - def list_locations(dirname): + def list_locations(dirname, datadir): return [(loc.path, loc.device, loc.partition, loc.policy) for loc in diskfile.object_audit_location_generator( - devices=dirname, mount_check=False)] + devices=dirname, datadir=datadir, mount_check=False)] real_listdir = os.listdir @@ -570,30 +551,34 @@ "2607", "b54", "fe450ec990a88cc4b252b181bab04b54")) with mock.patch('os.listdir', splode_if_endswith("sdf/objects")): - self.assertRaises(OSError, list_locations, tmpdir) + self.assertRaises(OSError, list_locations, tmpdir, "objects") with mock.patch('os.listdir', splode_if_endswith("2607")): - self.assertRaises(OSError, list_locations, tmpdir) + self.assertRaises(OSError, list_locations, tmpdir, "objects") with mock.patch('os.listdir', splode_if_endswith("b54")): - self.assertRaises(OSError, list_locations, tmpdir) + self.assertRaises(OSError, list_locations, tmpdir, "objects") def test_auditor_status(self): with temptree([]) as tmpdir: os.makedirs(os.path.join(tmpdir, "sdf", "objects", "1", "a", "b")) os.makedirs(os.path.join(tmpdir, "sdf", "objects", "2", "a", "b")) + datadir = "objects" # Pretend that some time passed between each partition with mock.patch('os.stat') as mock_stat, \ mock_check_drive(isdir=True): mock_stat.return_value.st_mtime = time() - 60 # Auditor starts, there are two partitions to check - gen = diskfile.object_audit_location_generator(tmpdir, False) + gen = diskfile.object_audit_location_generator(tmpdir, + datadir, + False) gen.next() gen.next() # Auditor stopped for some reason without raising StopIterator in # the generator and restarts There is now only one remaining # partition to check - gen = diskfile.object_audit_location_generator(tmpdir, False) + gen = diskfile.object_audit_location_generator(tmpdir, datadir, + False) with mock_check_drive(isdir=True): gen.next() @@ -602,17 +587,19 @@ # There are no partitions to check if the auditor restarts another # time and the status files have not been cleared - gen = diskfile.object_audit_location_generator(tmpdir, False) + gen = diskfile.object_audit_location_generator(tmpdir, datadir, + False) with mock_check_drive(isdir=True): self.assertRaises(StopIteration, gen.next) # Reset status file - diskfile.clear_auditor_status(tmpdir) + diskfile.clear_auditor_status(tmpdir, datadir) # If the auditor restarts another time, we expect to # check two partitions again, because the remaining # partitions were empty and a new listdir was executed - gen = diskfile.object_audit_location_generator(tmpdir, False) + gen = diskfile.object_audit_location_generator(tmpdir, datadir, + False) with mock_check_drive(isdir=True): gen.next() gen.next() @@ -985,7 +972,8 @@ self.df_mgr.logger.increment.assert_called_with('async_pendings') def test_object_audit_location_generator(self): - locations = list(self.df_mgr.object_audit_location_generator()) + locations = list( + self.df_mgr.object_audit_location_generator(POLICIES[0])) self.assertEqual(locations, []) def test_replication_one_per_device_deprecation(self): diff -Nru swift-2.17.0/test/unit/obj/test_expirer.py swift-2.18.0/test/unit/obj/test_expirer.py --- swift-2.17.0/test/unit/obj/test_expirer.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_expirer.py 2018-05-30 10:17:02.000000000 +0000 @@ -16,16 +16,17 @@ from time import time from unittest import main, TestCase from test.unit import FakeRing, mocked_http_conn, debug_logger -from copy import deepcopy from tempfile import mkdtemp from shutil import rmtree from collections import defaultdict +from copy import deepcopy import mock import six from six.moves import urllib from swift.common import internal_client, utils, swob +from swift.common.utils import Timestamp from swift.obj import expirer @@ -61,12 +62,15 @@ self.aco_dict.update(aco_dict) def get_account_info(self, account): - return 1, 2 + acc_dict = self.aco_dict[account] + container_count = len(acc_dict) + obj_count = sum(len(objs) for objs in acc_dict.values()) + return container_count, obj_count def iter_containers(self, account, prefix=''): acc_dict = self.aco_dict[account] - return [{'name': six.text_type(container)} for container in - acc_dict if container.startswith(prefix)] + return sorted([{'name': six.text_type(container)} for container in + acc_dict if container.startswith(prefix)]) def delete_container(*a, **kw): pass @@ -97,6 +101,41 @@ self.conf = {'recon_cache_path': self.rcache} self.logger = debug_logger('test-expirer') + self.past_time = str(int(time() - 86400)) + self.future_time = str(int(time() + 86400)) + # Dummy task queue for test + self.fake_swift = FakeInternalClient({ + '.expiring_objects': { + # this task container will be checked + self.past_time: [ + # tasks ready for execution + self.past_time + '-a0/c0/o0', + self.past_time + '-a1/c1/o1', + self.past_time + '-a2/c2/o2', + self.past_time + '-a3/c3/o3', + self.past_time + '-a4/c4/o4', + self.past_time + '-a5/c5/o5', + self.past_time + '-a6/c6/o6', + self.past_time + '-a7/c7/o7', + # task objects for unicode test + self.past_time + u'-a8/c8/o8\u2661', + self.past_time + u'-a9/c9/o9\xf8', + # this task will be skipped + self.future_time + '-a10/c10/o10'], + # this task container will be skipped + self.future_time: [ + self.future_time + '-a11/c11/o11']} + }) + self.expirer = expirer.ObjectExpirer(self.conf, logger=self.logger, + swift=self.fake_swift) + + # target object paths which should be expirerd now + self.expired_target_path_list = [ + 'a0/c0/o0', 'a1/c1/o1', 'a2/c2/o2', 'a3/c3/o3', 'a4/c4/o4', + 'a5/c5/o5', 'a6/c6/o6', 'a7/c7/o7', + 'a8/c8/o8\xe2\x99\xa1', 'a9/c9/o9\xc3\xb8', + ] + def tearDown(self): rmtree(self.rcache) internal_client.sleep = self.old_sleep @@ -210,41 +249,40 @@ super(ObjectExpirer, self).__init__(conf, swift=swift) self.processes = 3 self.deleted_objects = {} - self.obj_containers_in_order = [] - def delete_object(self, actual_obj, timestamp, container, obj): - if container not in self.deleted_objects: - self.deleted_objects[container] = set() - self.deleted_objects[container].add(obj) - self.obj_containers_in_order.append(container) + def delete_object(self, target_path, delete_timestamp, + task_account, task_container, task_object): + if task_container not in self.deleted_objects: + self.deleted_objects[task_container] = set() + self.deleted_objects[task_container].add(task_object) - aco_dict = { - '.expiring_objects': { - '0': set('1-one 2-two 3-three'.split()), - '1': set('2-two 3-three 4-four'.split()), - '2': set('5-five 6-six'.split()), - '3': set(u'7-seven\u2661'.split()), - }, - } - fake_swift = FakeInternalClient(aco_dict) - x = ObjectExpirer(self.conf, swift=fake_swift) + x = ObjectExpirer(self.conf, swift=self.fake_swift) - deleted_objects = {} + deleted_objects = defaultdict(set) for i in range(3): x.process = i + # reset progress so we know we don't double-up work among processes + x.deleted_objects = defaultdict(set) x.run_once() - self.assertNotEqual(deleted_objects, x.deleted_objects) - deleted_objects = deepcopy(x.deleted_objects) - self.assertEqual(aco_dict['.expiring_objects']['3'].pop(), - deleted_objects['3'].pop().decode('utf8')) - self.assertEqual(aco_dict['.expiring_objects'], deleted_objects) - self.assertEqual(len(set(x.obj_containers_in_order[:4])), 4) + for task_container, deleted in x.deleted_objects.items(): + self.assertFalse(deleted_objects[task_container] & deleted) + deleted_objects[task_container] |= deleted + + # sort for comparison + deleted_objects = { + con: sorted(o_set) for con, o_set in deleted_objects.items()} + expected = { + self.past_time: [ + self.past_time + '-' + target_path + for target_path in self.expired_target_path_list]} + self.assertEqual(deleted_objects, expected) def test_delete_object(self): x = expirer.ObjectExpirer({}, logger=self.logger) actual_obj = 'actual_obj' timestamp = int(time()) reclaim_ts = timestamp - x.reclaim_age + account = 'account' container = 'container' obj = 'obj' @@ -262,12 +300,12 @@ with mock.patch.object(x, 'delete_actual_object', side_effect=exc) as delete_actual: with mock.patch.object(x, 'pop_queue') as pop_queue: - x.delete_object(actual_obj, ts, container, obj) + x.delete_object(actual_obj, ts, account, container, obj) delete_actual.assert_called_once_with(actual_obj, ts) log_lines = x.logger.get_lines_for_level('error') if should_pop: - pop_queue.assert_called_once_with(container, obj) + pop_queue.assert_called_once_with(account, container, obj) self.assertEqual(start_reports + 1, x.report_objects) self.assertFalse(log_lines) else: @@ -277,11 +315,12 @@ if isinstance(exc, internal_client.UnexpectedResponse): self.assertEqual( log_lines[0], - 'Unexpected response while deleting object container ' - 'obj: %s' % exc.resp.status_int) + 'Unexpected response while deleting object ' + 'account container obj: %s' % exc.resp.status_int) else: self.assertTrue(log_lines[0].startswith( - 'Exception while deleting object container obj')) + 'Exception while deleting object ' + 'account container obj')) # verify pop_queue logic on exceptions for exc, ts, should_pop in [(None, timestamp, True), @@ -318,6 +357,171 @@ self.assertTrue( 'so far' in str(x.logger.get_lines_for_level('info'))) + def test_parse_task_obj(self): + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + + def assert_parse_task_obj(task_obj, expected_delete_at, + expected_account, expected_container, + expected_obj): + delete_at, account, container, obj = x.parse_task_obj(task_obj) + self.assertEqual(delete_at, expected_delete_at) + self.assertEqual(account, expected_account) + self.assertEqual(container, expected_container) + self.assertEqual(obj, expected_obj) + + assert_parse_task_obj('0000-a/c/o', 0, 'a', 'c', 'o') + assert_parse_task_obj('0001-a/c/o', 1, 'a', 'c', 'o') + assert_parse_task_obj('1000-a/c/o', 1000, 'a', 'c', 'o') + assert_parse_task_obj('0000-acc/con/obj', 0, 'acc', 'con', 'obj') + + def make_task(self, delete_at, target): + return { + 'task_account': '.expiring_objects', + 'task_container': delete_at, + 'task_object': delete_at + '-' + target, + 'delete_timestamp': Timestamp(delete_at), + 'target_path': target, + } + + def test_round_robin_order(self): + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + task_con_obj_list = [ + # objects in 0000 timestamp container + self.make_task('0000', 'a/c0/o0'), + self.make_task('0000', 'a/c0/o1'), + # objects in 0001 timestamp container + self.make_task('0001', 'a/c1/o0'), + self.make_task('0001', 'a/c1/o1'), + # objects in 0002 timestamp container + self.make_task('0002', 'a/c2/o0'), + self.make_task('0002', 'a/c2/o1'), + ] + result = list(x.round_robin_order(task_con_obj_list)) + + # sorted by popping one object to delete for each target_container + expected = [ + self.make_task('0000', 'a/c0/o0'), + self.make_task('0001', 'a/c1/o0'), + self.make_task('0002', 'a/c2/o0'), + self.make_task('0000', 'a/c0/o1'), + self.make_task('0001', 'a/c1/o1'), + self.make_task('0002', 'a/c2/o1'), + ] + self.assertEqual(expected, result) + + # task containers have some task objects with invalid target paths + task_con_obj_list = [ + # objects in 0000 timestamp container + self.make_task('0000', 'invalid0'), + self.make_task('0000', 'a/c0/o0'), + self.make_task('0000', 'a/c0/o1'), + # objects in 0001 timestamp container + self.make_task('0001', 'a/c1/o0'), + self.make_task('0001', 'invalid1'), + self.make_task('0001', 'a/c1/o1'), + # objects in 0002 timestamp container + self.make_task('0002', 'a/c2/o0'), + self.make_task('0002', 'a/c2/o1'), + self.make_task('0002', 'invalid2'), + ] + result = list(x.round_robin_order(task_con_obj_list)) + + # the invalid task objects are ignored + expected = [ + self.make_task('0000', 'a/c0/o0'), + self.make_task('0001', 'a/c1/o0'), + self.make_task('0002', 'a/c2/o0'), + self.make_task('0000', 'a/c0/o1'), + self.make_task('0001', 'a/c1/o1'), + self.make_task('0002', 'a/c2/o1'), + ] + self.assertEqual(expected, result) + + # for a given target container, tasks won't necessarily all go in + # the same timestamp container + task_con_obj_list = [ + # objects in 0000 timestamp container + self.make_task('0000', 'a/c0/o0'), + self.make_task('0000', 'a/c0/o1'), + self.make_task('0000', 'a/c2/o2'), + self.make_task('0000', 'a/c2/o3'), + # objects in 0001 timestamp container + self.make_task('0001', 'a/c0/o2'), + self.make_task('0001', 'a/c0/o3'), + self.make_task('0001', 'a/c1/o0'), + self.make_task('0001', 'a/c1/o1'), + # objects in 0002 timestamp container + self.make_task('0002', 'a/c2/o0'), + self.make_task('0002', 'a/c2/o1'), + ] + result = list(x.round_robin_order(task_con_obj_list)) + + # so we go around popping by *target* container, not *task* container + expected = [ + self.make_task('0000', 'a/c0/o0'), + self.make_task('0001', 'a/c1/o0'), + self.make_task('0000', 'a/c2/o2'), + self.make_task('0000', 'a/c0/o1'), + self.make_task('0001', 'a/c1/o1'), + self.make_task('0000', 'a/c2/o3'), + self.make_task('0001', 'a/c0/o2'), + self.make_task('0002', 'a/c2/o0'), + self.make_task('0001', 'a/c0/o3'), + self.make_task('0002', 'a/c2/o1'), + ] + self.assertEqual(expected, result) + + # all of the work to be done could be for different target containers + task_con_obj_list = [ + # objects in 0000 timestamp container + self.make_task('0000', 'a/c0/o'), + self.make_task('0000', 'a/c1/o'), + self.make_task('0000', 'a/c2/o'), + self.make_task('0000', 'a/c3/o'), + # objects in 0001 timestamp container + self.make_task('0001', 'a/c4/o'), + self.make_task('0001', 'a/c5/o'), + self.make_task('0001', 'a/c6/o'), + self.make_task('0001', 'a/c7/o'), + # objects in 0002 timestamp container + self.make_task('0002', 'a/c8/o'), + self.make_task('0002', 'a/c9/o'), + ] + result = list(x.round_robin_order(task_con_obj_list)) + + # in which case, we kind of hammer the task containers + self.assertEqual(task_con_obj_list, result) + + def test_hash_mod(self): + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + mod_count = [0, 0, 0] + for i in range(1000): + name = 'obj%d' % i + mod = x.hash_mod(name, 3) + mod_count[mod] += 1 + + # 1000 names are well shuffled + self.assertGreater(mod_count[0], 300) + self.assertGreater(mod_count[1], 300) + self.assertGreater(mod_count[2], 300) + + def test_iter_task_accounts_to_expire(self): + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + results = [_ for _ in x.iter_task_accounts_to_expire()] + self.assertEqual(results, [('.expiring_objects', 0, 1)]) + + self.conf['processes'] = '2' + self.conf['process'] = '1' + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + results = [_ for _ in x.iter_task_accounts_to_expire()] + self.assertEqual(results, [('.expiring_objects', 1, 2)]) + + def test_delete_at_time_of_task_container(self): + x = expirer.ObjectExpirer(self.conf, logger=self.logger) + self.assertEqual(x.delete_at_time_of_task_container('0000'), 0) + self.assertEqual(x.delete_at_time_of_task_container('0001'), 1) + self.assertEqual(x.delete_at_time_of_task_container('1000'), 1000) + def test_run_once_nothing_to_do(self): x = expirer.ObjectExpirer(self.conf, logger=self.logger) x.swift = 'throw error because a string does not have needed methods' @@ -329,165 +533,128 @@ "'str' object has no attribute 'get_account_info'") def test_run_once_calls_report(self): - fake_swift = FakeInternalClient({}) + with mock.patch.object(self.expirer, 'pop_queue', + lambda a, c, o: None): + self.expirer.run_once() + self.assertEqual( + self.expirer.logger.get_lines_for_level('info'), [ + 'Pass beginning for task account .expiring_objects; ' + '2 possible containers; 12 possible objects', + 'Pass completed in 0s; 10 objects expired', + ]) + + def test_skip_task_account_without_task_container(self): + fake_swift = FakeInternalClient({ + # task account has no containers + '.expiring_objects': dict() + }) x = expirer.ObjectExpirer(self.conf, logger=self.logger, swift=fake_swift) x.run_once() self.assertEqual( x.logger.get_lines_for_level('info'), [ - 'Pass beginning; 1 possible containers; 2 possible objects', 'Pass completed in 0s; 0 objects expired', ]) - def test_run_once_unicode_problem(self): - fake_swift = FakeInternalClient({ - '.expiring_objects': {u'1234': [u'1234-troms\xf8']} - }) + def test_iter_task_to_expire(self): + # In this test, all tasks are assigned to the tested expirer + my_index = 0 + divisor = 1 + + task_account_container_list = [('.expiring_objects', self.past_time)] + + expected = [ + self.make_task(self.past_time, target_path) + for target_path in self.expired_target_path_list] + + self.assertEqual( + list(self.expirer.iter_task_to_expire( + task_account_container_list, my_index, divisor)), + expected) + + # the task queue has invalid task object + invalid_aco_dict = deepcopy(self.fake_swift.aco_dict) + invalid_aco_dict['.expiring_objects'][self.past_time].insert( + 0, self.past_time + '-invalid0') + invalid_aco_dict['.expiring_objects'][self.past_time].insert( + 5, self.past_time + '-invalid1') + invalid_fake_swift = FakeInternalClient(invalid_aco_dict) x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) + swift=invalid_fake_swift) + # but the invalid tasks are skipped + self.assertEqual( + list(x.iter_task_to_expire( + task_account_container_list, my_index, divisor)), + expected) + + def test_run_once_unicode_problem(self): requests = [] def capture_requests(ipaddr, port, method, path, *args, **kwargs): requests.append((method, path)) - with mocked_http_conn(200, 200, 200, give_connect=capture_requests): - x.run_once() - self.assertEqual(len(requests), 3) + # 3 DELETE requests for each 10 executed task objects to pop_queue + code_list = [200] * 3 * 10 + with mocked_http_conn(*code_list, give_connect=capture_requests): + self.expirer.run_once() + self.assertEqual(len(requests), 30) def test_container_timestamp_break(self): - def fail_to_iter_objects(*a, **kw): - raise Exception('This should not have been called') + with mock.patch.object(self.fake_swift, 'iter_objects') as mock_method: + self.expirer.run_once() - fake_swift = FakeInternalClient({ - '.expiring_objects': {str(int(time() + 86400)): []} - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - with mock.patch.object(fake_swift, 'iter_objects', - fail_to_iter_objects): - x.run_once() - logs = x.logger.all_log_lines() - self.assertEqual(logs['info'], [ - 'Pass beginning; 1 possible containers; 2 possible objects', - 'Pass completed in 0s; 0 objects expired', - ]) - self.assertNotIn('error', logs) - - # Reverse test to be sure it still would blow up the way expected. - fake_swift = FakeInternalClient({ - '.expiring_objects': {str(int(time() - 86400)): []} - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - with mock.patch.object(fake_swift, 'iter_objects', - fail_to_iter_objects): - x.run_once() - self.assertEqual( - x.logger.get_lines_for_level('error'), ['Unhandled exception: ']) - log_args, log_kwargs = x.logger.log_dict['error'][-1] - self.assertEqual(str(log_kwargs['exc_info'][1]), - 'This should not have been called') + # iter_objects is called only for past_time, not future_time + self.assertEqual(mock_method.call_args_list, + [mock.call('.expiring_objects', self.past_time)]) def test_object_timestamp_break(self): - def should_not_be_called(*a, **kw): - raise Exception('This should not have been called') + with mock.patch.object(self.expirer, 'delete_actual_object') \ + as mock_method, \ + mock.patch.object(self.expirer, 'pop_queue'): + self.expirer.run_once() - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): [ - '%d-actual-obj' % int(time() + 86400)], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.run_once() - self.assertNotIn('error', x.logger.all_log_lines()) - self.assertEqual(x.logger.get_lines_for_level('info'), [ - 'Pass beginning; 1 possible containers; 2 possible objects', - 'Pass completed in 0s; 0 objects expired', - ]) - # Reverse test to be sure it still would blow up the way expected. - ts = int(time() - 86400) - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): ['%d-actual-obj' % ts], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = should_not_be_called - x.run_once() + # executed tasks are with past time self.assertEqual( - x.logger.get_lines_for_level('error'), - ['Exception while deleting object %d %d-actual-obj ' - 'This should not have been called: ' % (ts, ts)]) + mock_method.call_args_list, + [mock.call(target_path, self.past_time) + for target_path in self.expired_target_path_list]) def test_failed_delete_keeps_entry(self): def deliberately_blow_up(actual_obj, timestamp): raise Exception('failed to delete actual object') - def should_not_get_called(container, obj): - raise Exception('This should not have been called') + # any tasks are not done + with mock.patch.object(self.expirer, 'delete_actual_object', + deliberately_blow_up), \ + mock.patch.object(self.expirer, 'pop_queue') as mock_method: + self.expirer.run_once() + + # no tasks are popped from the queue + self.assertEqual(mock_method.call_args_list, []) + + # all tasks are done + with mock.patch.object(self.expirer, 'delete_actual_object', + lambda o, t: None), \ + mock.patch.object(self.expirer, 'pop_queue') as mock_method: + self.expirer.run_once() - ts = int(time() - 86400) - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): ['%d-actual-obj' % ts], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = deliberately_blow_up - x.pop_queue = should_not_get_called - x.run_once() - self.assertEqual( - x.logger.get_lines_for_level('error'), - ['Exception while deleting object %d %d-actual-obj ' - 'failed to delete actual object: ' % (ts, ts)]) + # all tasks are popped from the queue self.assertEqual( - x.logger.get_lines_for_level('info'), [ - 'Pass beginning; 1 possible containers; 2 possible objects', - 'Pass completed in 0s; 0 objects expired', - ]) - - # Reverse test to be sure it still would blow up the way expected. - ts = int(time() - 86400) - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): ['%d-actual-obj' % ts], - }, - }) - self.logger._clear() - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = lambda o, t: None - x.pop_queue = should_not_get_called - x.run_once() - self.assertEqual( - self.logger.get_lines_for_level('error'), - ['Exception while deleting object %d %d-actual-obj This should ' - 'not have been called: ' % (ts, ts)]) + mock_method.call_args_list, + [mock.call('.expiring_objects', self.past_time, + self.past_time + '-' + target_path) + for target_path in self.expired_target_path_list]) def test_success_gets_counted(self): - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): [ - '%d-acc/c/actual-obj' % int(time() - 86400)], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = lambda o, t: None - x.pop_queue = lambda c, o: None - self.assertEqual(x.report_objects, 0) - with mock.patch('swift.obj.expirer.MAX_OBJECTS_TO_CACHE', 0): - x.run_once() - self.assertEqual(x.report_objects, 1) - self.assertEqual( - x.logger.get_lines_for_level('info'), - ['Pass beginning; 1 possible containers; 2 possible objects', - 'Pass completed in 0s; 1 objects expired']) + self.assertEqual(self.expirer.report_objects, 0) + with mock.patch('swift.obj.expirer.MAX_OBJECTS_TO_CACHE', 0), \ + mock.patch.object(self.expirer, 'delete_actual_object', + lambda o, t: None), \ + mock.patch.object(self.expirer, 'pop_queue', + lambda a, c, o: None): + self.expirer.run_once() + self.assertEqual(self.expirer.report_objects, 10) def test_delete_actual_object_does_not_get_unicode(self): got_unicode = [False] @@ -496,24 +663,15 @@ if isinstance(actual_obj, six.text_type): got_unicode[0] = True - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(int(time() - 86400)): [ - '%d-actual-obj' % int(time() - 86400)], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = delete_actual_object_test_for_unicode - x.pop_queue = lambda c, o: None - self.assertEqual(x.report_objects, 0) - x.run_once() - self.assertEqual(x.report_objects, 1) - self.assertEqual( - x.logger.get_lines_for_level('info'), [ - 'Pass beginning; 1 possible containers; 2 possible objects', - 'Pass completed in 0s; 1 objects expired', - ]) + self.assertEqual(self.expirer.report_objects, 0) + + with mock.patch.object(self.expirer, 'delete_actual_object', + delete_actual_object_test_for_unicode), \ + mock.patch.object(self.expirer, 'pop_queue', + lambda a, c, o: None): + self.expirer.run_once() + + self.assertEqual(self.expirer.report_objects, 10) self.assertFalse(got_unicode[0]) def test_failed_delete_continues_on(self): @@ -523,37 +681,26 @@ def fail_delete_actual_object(actual_obj, timestamp): raise Exception('failed to delete actual object') - cts = int(time() - 86400) - ots = int(time() - 86400) - - fake_swift = FakeInternalClient({ - '.expiring_objects': { - str(cts): ['%d-actual-obj' % ots, '%d-next-obj' % ots], - str(cts + 1): ['%d-actual-obj' % ots, '%d-next-obj' % ots], - }, - }) - x = expirer.ObjectExpirer(self.conf, logger=self.logger, - swift=fake_swift) - x.delete_actual_object = fail_delete_actual_object - with mock.patch.object(fake_swift, 'delete_container', - fail_delete_container): - x.run_once() - error_lines = x.logger.get_lines_for_level('error') - self.assertEqual(sorted(error_lines), sorted([ - 'Exception while deleting object %d %d-actual-obj failed to ' - 'delete actual object: ' % (cts, ots), - 'Exception while deleting object %d %d-next-obj failed to ' - 'delete actual object: ' % (cts, ots), - 'Exception while deleting object %d %d-actual-obj failed to ' - 'delete actual object: ' % (cts + 1, ots), - 'Exception while deleting object %d %d-next-obj failed to ' - 'delete actual object: ' % (cts + 1, ots), - 'Exception while deleting container %d failed to delete ' - 'container: ' % (cts,), - 'Exception while deleting container %d failed to delete ' - 'container: ' % (cts + 1,)])) - self.assertEqual(x.logger.get_lines_for_level('info'), [ - 'Pass beginning; 1 possible containers; 2 possible objects', + with mock.patch.object(self.fake_swift, 'delete_container', + fail_delete_container), \ + mock.patch.object(self.expirer, 'delete_actual_object', + fail_delete_actual_object): + self.expirer.run_once() + + error_lines = self.expirer.logger.get_lines_for_level('error') + + self.assertEqual(error_lines, [ + 'Exception while deleting object %s %s %s ' + 'failed to delete actual object: ' % ( + '.expiring_objects', self.past_time, + self.past_time + '-' + target_path) + for target_path in self.expired_target_path_list] + [ + 'Exception while deleting container %s %s ' + 'failed to delete container: ' % ( + '.expiring_objects', self.past_time)]) + self.assertEqual(self.expirer.logger.get_lines_for_level('info'), [ + 'Pass beginning for task account .expiring_objects; ' + '2 possible containers; 12 possible objects', 'Pass completed in 0s; 0 objects expired', ]) @@ -618,7 +765,7 @@ internal_client.loadapp = lambda *a, **kw: fake_app x = expirer.ObjectExpirer({}) - ts = '1234' + ts = Timestamp('1234') x.delete_actual_object('/path/to/object', ts) self.assertEqual(got_env[0]['HTTP_X_IF_DELETE_AT'], ts) self.assertEqual(got_env[0]['HTTP_X_TIMESTAMP'], @@ -637,7 +784,7 @@ internal_client.loadapp = lambda *a, **kw: fake_app x = expirer.ObjectExpirer({}) - ts = '1234' + ts = Timestamp('1234') x.delete_actual_object('/path/to/object name', ts) self.assertEqual(got_env[0]['HTTP_X_IF_DELETE_AT'], ts) self.assertEqual(got_env[0]['HTTP_X_TIMESTAMP'], @@ -656,11 +803,12 @@ internal_client.loadapp = lambda *a, **kw: fake_app x = expirer.ObjectExpirer({}) + ts = Timestamp('1234') if should_raise: with self.assertRaises(internal_client.UnexpectedResponse): - x.delete_actual_object('/path/to/object', '1234') + x.delete_actual_object('/path/to/object', ts) else: - x.delete_actual_object('/path/to/object', '1234') + x.delete_actual_object('/path/to/object', ts) self.assertEqual(calls[0], 1) # object was deleted and tombstone reaped @@ -685,7 +833,7 @@ x = expirer.ObjectExpirer({}) exc = None try: - x.delete_actual_object('/path/to/object', '1234') + x.delete_actual_object('/path/to/object', Timestamp('1234')) except Exception as err: exc = err finally: @@ -694,7 +842,7 @@ def test_delete_actual_object_quotes(self): name = 'this name should get quoted' - timestamp = '1366063156.863045' + timestamp = Timestamp('1366063156.863045') x = expirer.ObjectExpirer({}) x.swift.make_request = mock.Mock() x.swift.make_request.return_value.status_int = 204 @@ -705,7 +853,7 @@ def test_delete_actual_object_queue_cleaning(self): name = 'something' - timestamp = '1515544858.80602' + timestamp = Timestamp('1515544858.80602') x = expirer.ObjectExpirer({}) x.swift.make_request = mock.MagicMock() x.delete_actual_object(name, timestamp) @@ -724,13 +872,13 @@ requests.append((method, path)) with mocked_http_conn( 200, 200, 200, give_connect=capture_requests) as fake_conn: - x.pop_queue('c', 'o') + x.pop_queue('a', 'c', 'o') self.assertRaises(StopIteration, fake_conn.code_iter.next) for method, path in requests: self.assertEqual(method, 'DELETE') device, part, account, container, obj = utils.split_path( path, 5, 5, True) - self.assertEqual(account, '.expiring_objects') + self.assertEqual(account, 'a') self.assertEqual(container, 'c') self.assertEqual(obj, 'o') diff -Nru swift-2.17.0/test/unit/obj/test_reconstructor.py swift-2.18.0/test/unit/obj/test_reconstructor.py --- swift-2.17.0/test/unit/obj/test_reconstructor.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_reconstructor.py 2018-05-30 10:17:02.000000000 +0000 @@ -1474,13 +1474,15 @@ self.assertEqual(2, reconstructor.reconstructor_workers) worker_args = list(reconstructor.get_worker_args( once=True, devices='sdb,sdd,sdf', partitions='99,333')) - self.assertEqual(1, len(worker_args)) - # 5 devices in total, 2 workers -> up to 3 devices per worker so a - # single worker should handle the requested override devices - self.assertEqual([ - {'override_partitions': [99, 333], 'override_devices': [ - 'sdb', 'sdd', 'sdf']}, - ], worker_args) + # 3 devices to operate on, 2 workers -> one worker gets two devices + # and the other worker just gets one + self.assertEqual([{ + 'override_partitions': [99, 333], + 'override_devices': ['sdb', 'sdf'], + }, { + 'override_partitions': [99, 333], + 'override_devices': ['sdd'], + }], worker_args) # with 4 override devices, expect 2 per worker worker_args = list(reconstructor.get_worker_args( @@ -1524,26 +1526,41 @@ {}, logger=self.logger) reconstructor.get_local_devices = lambda: [ 'd%s' % (i + 1) for i in range(21)] - # ... with many devices per worker, worker count is pretty granular - for i in range(1, 8): - reconstructor.reconstructor_workers = i - self.assertEqual(i, len(list(reconstructor.get_worker_args()))) - # ... then it gets sorta stair step - for i in range(9, 10): - reconstructor.reconstructor_workers = i - self.assertEqual(7, len(list(reconstructor.get_worker_args()))) - # 2-3 devices per worker - for args in reconstructor.get_worker_args(): - self.assertIn(len(args['override_devices']), (2, 3)) - for i in range(11, 20): - reconstructor.reconstructor_workers = i - self.assertEqual(11, len(list(reconstructor.get_worker_args()))) - # 1, 2 devices per worker - for args in reconstructor.get_worker_args(): - self.assertIn(len(args['override_devices']), (1, 2)) - # this is debatable, but maybe I'll argue if you're going to have - # *some* workers with > 1 device, it's better to have fewer workers - # with devices spread out evenly than a couple outliers? + + # With more devices than workers, the work is spread out as evenly + # as we can manage. When number-of-devices is a multiple of + # number-of-workers, every worker has the same number of devices to + # operate on. + reconstructor.reconstructor_workers = 7 + worker_args = list(reconstructor.get_worker_args()) + self.assertEqual([len(a['override_devices']) for a in worker_args], + [3] * 7) + + # When number-of-devices is not a multiple of number-of-workers, + # device counts differ by at most 1. + reconstructor.reconstructor_workers = 5 + worker_args = list(reconstructor.get_worker_args()) + self.assertEqual( + sorted([len(a['override_devices']) for a in worker_args]), + [4, 4, 4, 4, 5]) + + # With more workers than devices, we don't create useless workers. + # We'll only make one per device. + reconstructor.reconstructor_workers = 22 + worker_args = list(reconstructor.get_worker_args()) + self.assertEqual( + [len(a['override_devices']) for a in worker_args], + [1] * 21) + + # This is true even if we have far more workers than devices. + reconstructor.reconstructor_workers = 2 ** 16 + worker_args = list(reconstructor.get_worker_args()) + self.assertEqual( + [len(a['override_devices']) for a in worker_args], + [1] * 21) + + # Spot check one full result for sanity's sake + reconstructor.reconstructor_workers = 11 self.assertEqual([ {'override_partitions': [], 'override_devices': ['d1', 'd12']}, {'override_partitions': [], 'override_devices': ['d2', 'd13']}, @@ -1557,12 +1574,6 @@ {'override_partitions': [], 'override_devices': ['d10', 'd21']}, {'override_partitions': [], 'override_devices': ['d11']}, ], list(reconstructor.get_worker_args())) - # you can't get < than 1 device per worker - for i in range(21, 52): - reconstructor.reconstructor_workers = i - self.assertEqual(21, len(list(reconstructor.get_worker_args()))) - for args in reconstructor.get_worker_args(): - self.assertEqual(1, len(args['override_devices'])) def test_next_rcache_update_configured_with_stats_interval(self): now = time.time() diff -Nru swift-2.17.0/test/unit/obj/test_replicator.py swift-2.18.0/test/unit/obj/test_replicator.py --- swift-2.17.0/test/unit/obj/test_replicator.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_replicator.py 2018-05-30 10:17:02.000000000 +0000 @@ -27,7 +27,7 @@ from errno import ENOENT, ENOTEMPTY, ENOTDIR from eventlet.green import subprocess -from eventlet import Timeout +from eventlet import Timeout, sleep from test.unit import (debug_logger, patch_policies, make_timestamp_iter, mocked_http_conn, mock_check_drive, skip_if_no_xattrs) @@ -132,6 +132,40 @@ object_replicator.subprocess.Popen = orig_process +class MockHungProcess(object): + def __init__(self, waits_needed=1, *args, **kwargs): + class MockStdout(object): + def read(self): + pass + self.stdout = MockStdout() + self._state = 'running' + self._calls = [] + self._waits = 0 + self._waits_needed = waits_needed + + def wait(self, timeout=None): + self._calls.append(('wait', self._state)) + if self._state == 'running': + # Sleep so we trip the rsync timeout + sleep(1) + raise BaseException('You need to mock out some timeouts') + elif self._state == 'killed': + self._waits += 1 + if self._waits >= self._waits_needed: + return + else: + raise subprocess.TimeoutExpired('some cmd', timeout) + + def terminate(self): + self._calls.append(('terminate', self._state)) + if self._state == 'running': + self._state = 'terminating' + + def kill(self): + self._calls.append(('kill', self._state)) + self._state = 'killed' + + def _create_test_rings(path, devs=None, next_part_power=None): testgz = os.path.join(path, 'object.ring.gz') intended_replica2part2dev_id = [ @@ -200,7 +234,8 @@ self.conf = dict( bind_ip=_ips()[0], bind_port=6200, swift_dir=self.testdir, devices=self.devices, mount_check='false', - timeout='300', stats_interval='1', sync_method='rsync') + timeout='300', stats_interval='1', sync_method='rsync', + recon_cache_path=self.recon_cache) self._create_replicator() self.ts = make_timestamp_iter() @@ -336,7 +371,6 @@ self.assertEqual((start + 1 + cycle) % 10, replicator.replication_cycle) - self.assertEqual(0, replicator.stats['start']) recon_fname = os.path.join(self.recon_cache, "object.recon") with open(recon_fname) as cachefile: recon = json.loads(cachefile.read()) @@ -493,7 +527,7 @@ _create_test_rings(self.testdir, devs) self.replicator.collect_jobs() - self.assertEqual(self.replicator.stats['failure'], 0) + self.assertEqual(self.replicator.total_stats.failure, 0) @mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l) def test_collect_jobs_multi_disk(self, mock_shuffle): @@ -791,7 +825,7 @@ self.assertEqual('1', jobs[0]['partition']) def test_handoffs_first_mode_will_process_all_jobs_after_handoffs(self): - # make a object in the handoff & primary partition + # make an object in the handoff & primary partition expected_suffix_paths = [] for policy in POLICIES: # primary @@ -842,7 +876,7 @@ self.replicator.replicate() # all jobs processed! self.assertEqual(self.replicator.job_count, - self.replicator.replication_count) + self.replicator.total_stats.attempted) self.assertFalse(self.replicator.handoffs_remaining) # sanity, all the handoffs suffixes we filled in were rsync'd @@ -908,7 +942,7 @@ # jobs may have been spawned into the pool before the failed # update_deleted job incremented handoffs_remaining and caused the # handoffs_first check to abort the current pass - self.assertLessEqual(self.replicator.replication_count, + self.assertLessEqual(self.replicator.total_stats.attempted, 2 + self.replicator.concurrency) # sanity, all the handoffs suffixes we filled in were rsync'd @@ -945,10 +979,11 @@ side_effect=_ips), \ mocked_http_conn(*[200] * 14, body=stub_body) as conn_log: self.replicator.handoff_delete = 2 + self.replicator._zero_stats() self.replicator.replicate() # all jobs processed! self.assertEqual(self.replicator.job_count, - self.replicator.replication_count) + self.replicator.total_stats.attempted) self.assertFalse(self.replicator.handoffs_remaining) # sanity, all parts got replicated found_replicate_calls = defaultdict(int) @@ -1384,6 +1419,11 @@ self.assertFalse(os.access(pol1_part_path, os.F_OK)) self.assertTrue(os.access(pol0_part_path, os.F_OK)) + # since we weren't operating on everything, but only a subset of + # storage policies, we didn't dump any recon stats. + self.assertFalse(os.path.exists( + os.path.join(self.recon_cache, 'object.recon'))) + def test_delete_partition_ssync(self): with mock.patch('swift.obj.replicator.http_connect', mock_http_connect(200)): @@ -1701,7 +1741,7 @@ with mock.patch.object(replicator, 'sync', fake_sync): replicator.run_once() - log_lines = replicator.logger.get_lines_for_level('error') + log_lines = replicator.logger.logger.get_lines_for_level('error') self.assertIn("Error syncing with node:", log_lines[0]) self.assertFalse(log_lines[1:]) # setup creates 4 partitions; partition 1 does not map to local dev id @@ -1736,7 +1776,7 @@ # attempt to 16 times but succeeded only 15 times due to Timeout suffix_hashes = sum( count for (metric, count), _junk in - replicator.logger.log_dict['update_stats'] + replicator.logger.logger.log_dict['update_stats'] if metric == 'suffix.hashes') self.assertEqual(15, suffix_hashes) @@ -1767,7 +1807,8 @@ self.replicator.suffix_count = 0 self.replicator.suffix_sync = 0 self.replicator.suffix_hash = 0 - self.replicator.replication_count = 0 + self.replicator.last_replication_count = 0 + self.replicator._zero_stats() self.replicator.partition_times = [] self.headers = {'Content-Length': '0', @@ -1892,10 +1933,11 @@ reqs.append(mock.call(node, local_job, ['a83'])) fake_func.assert_has_calls(reqs, any_order=True) self.assertEqual(fake_func.call_count, 2) - self.assertEqual(self.replicator.replication_count, 1) - self.assertEqual(self.replicator.suffix_sync, 2) - self.assertEqual(self.replicator.suffix_hash, 1) - self.assertEqual(self.replicator.suffix_count, 1) + stats = self.replicator.total_stats + self.assertEqual(stats.attempted, 1) + self.assertEqual(stats.suffix_sync, 2) + self.assertEqual(stats.suffix_hash, 1) + self.assertEqual(stats.suffix_count, 1) # Efficient Replication Case set_default(self) @@ -1911,10 +1953,11 @@ # belong to another region self.replicator.update(job) self.assertEqual(fake_func.call_count, 1) - self.assertEqual(self.replicator.replication_count, 1) - self.assertEqual(self.replicator.suffix_sync, 1) - self.assertEqual(self.replicator.suffix_hash, 1) - self.assertEqual(self.replicator.suffix_count, 1) + stats = self.replicator.total_stats + self.assertEqual(stats.attempted, 1) + self.assertEqual(stats.suffix_sync, 1) + self.assertEqual(stats.suffix_hash, 1) + self.assertEqual(stats.suffix_count, 1) mock_http.reset_mock() self.logger.clear() @@ -2004,11 +2047,641 @@ _create_test_rings(self.testdir, next_part_power=4) self.replicator.replicate() self.assertEqual(0, self.replicator.job_count) - self.assertEqual(0, self.replicator.replication_count) + self.assertEqual(0, self.replicator.total_stats.attempted) warnings = self.logger.get_lines_for_level('warning') self.assertIn( "next_part_power set in policy 'one'. Skipping", warnings) + def test_replicate_rsync_timeout(self): + cur_part = '0' + df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', + policy=POLICIES[0]) + mkdirs(df._datadir) + f = open(os.path.join(df._datadir, + normalize_timestamp(time.time()) + '.data'), + 'wb') + f.write('1234567890') + f.close() + + mock_procs = [] + + def new_mock(*a, **kw): + proc = MockHungProcess() + mock_procs.append(proc) + return proc + + with mock.patch('swift.obj.replicator.http_connect', + mock_http_connect(200)), \ + mock.patch.object(self.replicator, 'rsync_timeout', 0.01), \ + mock.patch('eventlet.green.subprocess.Popen', new_mock): + self.replicator.rsync_error_log_line_length = 20 + self.replicator.run_once() + for proc in mock_procs: + self.assertEqual(proc._calls, [ + ('wait', 'running'), + ('kill', 'running'), + ('wait', 'killed'), + ]) + self.assertEqual(len(mock_procs), 2) + error_lines = self.replicator.logger.get_lines_for_level('error') + # verify logs are truncated to rsync_error_log_line_length + self.assertEqual('Killing long-running', error_lines[0]) + self.assertEqual('Killing long-running', error_lines[1]) + + def test_replicate_rsync_timeout_wedged(self): + cur_part = '0' + df = self.df_mgr.get_diskfile('sda', cur_part, 'a', 'c', 'o', + policy=POLICIES[0]) + mkdirs(df._datadir) + f = open(os.path.join(df._datadir, + normalize_timestamp(time.time()) + '.data'), + 'wb') + f.write('1234567890') + f.close() + + mock_procs = [] + + def new_mock(*a, **kw): + proc = MockHungProcess(waits_needed=2) + mock_procs.append(proc) + return proc + + with mock.patch('swift.obj.replicator.http_connect', + mock_http_connect(200)), \ + mock.patch.object(self.replicator, 'rsync_timeout', 0.01), \ + mock.patch('eventlet.green.subprocess.Popen', new_mock): + self.replicator.run_once() + for proc in mock_procs: + self.assertEqual(proc._calls, [ + ('wait', 'running'), + ('kill', 'running'), + ('wait', 'killed'), + ('wait', 'killed'), + ]) + self.assertEqual(len(mock_procs), 2) + + def test_limit_rsync_log(self): + def do_test(length_limit, log_line, expected): + self.replicator.rsync_error_log_line_length = length_limit + result = self.replicator._limit_rsync_log(log_line) + self.assertEqual(result, expected) + + tests = [{'length_limit': 20, + 'log_line': 'a' * 20, + 'expected': 'a' * 20}, + {'length_limit': 20, + 'log_line': 'a' * 19, + 'expected': 'a' * 19}, + {'length_limit': 20, + 'log_line': 'a' * 21, + 'expected': 'a' * 20}, + {'length_limit': None, + 'log_line': 'a' * 50, + 'expected': 'a' * 50}, + {'length_limit': 0, + 'log_line': 'a' * 50, + 'expected': 'a' * 50}] + + for params in tests: + do_test(**params) + + +@patch_policies([StoragePolicy(0, 'zero', False), + StoragePolicy(1, 'one', True)]) +class TestMultiProcessReplicator(unittest.TestCase): + def setUp(self): + # recon cache path + self.recon_cache = tempfile.mkdtemp() + rmtree(self.recon_cache, ignore_errors=1) + os.mkdir(self.recon_cache) + self.recon_file = os.path.join(self.recon_cache, 'object.recon') + + bind_port = 6200 + + # Set up some rings + self.testdir = tempfile.mkdtemp() + _create_test_rings(self.testdir, devs=[ + {'id': 0, 'device': 'sda', 'zone': 0, + 'region': 1, 'ip': '127.0.0.1', 'port': bind_port}, + {'id': 1, 'device': 'sdb', 'zone': 0, + 'region': 1, 'ip': '127.0.0.1', 'port': bind_port}, + {'id': 2, 'device': 'sdc', 'zone': 0, + 'region': 1, 'ip': '127.0.0.1', 'port': bind_port}, + {'id': 3, 'device': 'sdd', 'zone': 0, + 'region': 1, 'ip': '127.0.0.1', 'port': bind_port}, + {'id': 4, 'device': 'sde', 'zone': 0, + 'region': 1, 'ip': '127.0.0.1', 'port': bind_port}, + {'id': 100, 'device': 'notme0', 'zone': 0, + 'region': 1, 'ip': '127.99.99.99', 'port': bind_port}]) + + self.logger = debug_logger('test-replicator') + self.conf = dict( + bind_ip='127.0.0.1', bind_port=bind_port, + swift_dir=self.testdir, + mount_check='false', recon_cache_path=self.recon_cache, + timeout='300', stats_interval='1', sync_method='rsync') + + self.replicator = object_replicator.ObjectReplicator( + self.conf, logger=self.logger) + + def tearDown(self): + self.assertFalse(process_errors) + rmtree(self.testdir, ignore_errors=1) + rmtree(self.recon_cache, ignore_errors=1) + + def fake_replicate(self, override_devices, **kw): + # Faked-out replicate() method. Just updates the stats, but doesn't + # do any work. + for device in override_devices: + stats = self.replicator.stats_for_dev[device] + if device == 'sda': + stats.attempted = 1 + stats.success = 10 + stats.failure = 100 + stats.hashmatch = 1000 + stats.rsync = 10000 + stats.remove = 100000 + stats.suffix_count = 1000000 + stats.suffix_hash = 10000000 + stats.suffix_sync = 100000000 + stats.failure_nodes = { + '10.1.1.1': {'d11': 1}} + elif device == 'sdb': + stats.attempted = 2 + stats.success = 20 + stats.failure = 200 + stats.hashmatch = 2000 + stats.rsync = 20000 + stats.remove = 200000 + stats.suffix_count = 2000000 + stats.suffix_hash = 20000000 + stats.suffix_sync = 200000000 + stats.failure_nodes = { + '10.2.2.2': {'d22': 2}} + elif device == 'sdc': + stats.attempted = 3 + stats.success = 30 + stats.failure = 300 + stats.hashmatch = 3000 + stats.rsync = 30000 + stats.remove = 300000 + stats.suffix_count = 3000000 + stats.suffix_hash = 30000000 + stats.suffix_sync = 300000000 + stats.failure_nodes = { + '10.3.3.3': {'d33': 3}} + elif device == 'sdd': + stats.attempted = 4 + stats.success = 40 + stats.failure = 400 + stats.hashmatch = 4000 + stats.rsync = 40000 + stats.remove = 400000 + stats.suffix_count = 4000000 + stats.suffix_hash = 40000000 + stats.suffix_sync = 400000000 + stats.failure_nodes = { + '10.4.4.4': {'d44': 4}} + elif device == 'sde': + stats.attempted = 5 + stats.success = 50 + stats.failure = 500 + stats.hashmatch = 5000 + stats.rsync = 50000 + stats.remove = 500000 + stats.suffix_count = 5000000 + stats.suffix_hash = 50000000 + stats.suffix_sync = 500000000 + stats.failure_nodes = { + '10.5.5.5': {'d55': 5}} + else: + raise Exception("mock can't handle %r" % device) + + def test_no_multiprocessing(self): + self.replicator.replicator_workers = 0 + self.assertEqual(self.replicator.get_worker_args(), []) + + def test_device_distribution(self): + self.replicator.replicator_workers = 2 + self.assertEqual(self.replicator.get_worker_args(), [{ + 'override_devices': ['sda', 'sdc', 'sde'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdb', 'sdd'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 1, + }]) + + def test_override_policies(self): + self.replicator.replicator_workers = 2 + args = self.replicator.get_worker_args(policies="3,5,7", once=True) + self.assertEqual(args, [{ + 'override_devices': ['sda', 'sdc', 'sde'], + 'override_partitions': [], + 'override_policies': [3, 5, 7], + 'have_overrides': True, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdb', 'sdd'], + 'override_partitions': [], + 'override_policies': [3, 5, 7], + 'have_overrides': True, + 'multiprocess_worker_index': 1, + }]) + + # override policies don't apply in run-forever mode + args = self.replicator.get_worker_args(policies="3,5,7", once=False) + self.assertEqual(args, [{ + 'override_devices': ['sda', 'sdc', 'sde'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdb', 'sdd'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 1, + }]) + + def test_more_workers_than_disks(self): + self.replicator.replicator_workers = 999 + self.assertEqual(self.replicator.get_worker_args(), [{ + 'override_devices': ['sda'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdb'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 1, + }, { + 'override_devices': ['sdc'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 2, + }, { + 'override_devices': ['sdd'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 3, + }, { + 'override_devices': ['sde'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 4, + }]) + + # Remember how many workers we actually have so that the log-line + # prefixes are reasonable. Otherwise, we'd have five workers, each + # logging lines starting with things like "[worker X/999 pid=P]" + # despite there being only five. + self.assertEqual(self.replicator.replicator_workers, 5) + + def test_command_line_overrides(self): + self.replicator.replicator_workers = 2 + + args = self.replicator.get_worker_args( + devices="sda,sdc,sdd", partitions="12,34,56", once=True) + self.assertEqual(args, [{ + 'override_devices': ['sda', 'sdd'], + 'override_partitions': [12, 34, 56], + 'override_policies': [], + 'have_overrides': True, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdc'], + 'override_partitions': [12, 34, 56], + 'override_policies': [], + 'have_overrides': True, + 'multiprocess_worker_index': 1, + }]) + + args = self.replicator.get_worker_args( + devices="sda,sdc,sdd", once=True) + self.assertEqual(args, [{ + 'override_devices': ['sda', 'sdd'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': True, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdc'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': True, + 'multiprocess_worker_index': 1, + }]) + + # no overrides apply in run-forever mode + args = self.replicator.get_worker_args( + devices="sda,sdc,sdd", partitions="12,34,56", once=False) + self.assertEqual(args, [{ + 'override_devices': ['sda', 'sdc', 'sde'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 0, + }, { + 'override_devices': ['sdb', 'sdd'], + 'override_partitions': [], + 'override_policies': [], + 'have_overrides': False, + 'multiprocess_worker_index': 1, + }]) + + def test_worker_logging(self): + self.replicator.replicator_workers = 3 + + def log_some_stuff(*a, **kw): + self.replicator.logger.debug("debug message") + self.replicator.logger.info("info message") + self.replicator.logger.warning("warning message") + self.replicator.logger.error("error message") + + with mock.patch.object(self.replicator, 'replicate', log_some_stuff), \ + mock.patch("os.getpid", lambda: 8804): + self.replicator.get_worker_args() + self.replicator.run_once(multiprocess_worker_index=0, + override_devices=['sda', 'sdb']) + + prefix = "[worker 1/3 pid=8804] " + for level, lines in self.logger.logger.all_log_lines().items(): + for line in lines: + self.assertTrue( + line.startswith(prefix), + "%r doesn't start with %r (level %s)" % ( + line, prefix, level)) + + def test_recon_run_once(self): + self.replicator.replicator_workers = 3 + + the_time = [1521680000] + + def mock_time(): + rv = the_time[0] + the_time[0] += 120 + return rv + + # Simulate a couple child processes + with mock.patch.object(self.replicator, 'replicate', + self.fake_replicate), \ + mock.patch('time.time', mock_time): + self.replicator.get_worker_args() + self.replicator.run_once(multiprocess_worker_index=0, + override_devices=['sda', 'sdb']) + self.replicator.run_once(multiprocess_worker_index=1, + override_devices=['sdc']) + self.replicator.run_once(multiprocess_worker_index=2, + override_devices=['sdd', 'sde']) + + with open(self.recon_file) as fh: + recon_data = json.load(fh) + self.assertIn('object_replication_per_disk', recon_data) + self.assertIn('sda', recon_data['object_replication_per_disk']) + self.assertIn('sdb', recon_data['object_replication_per_disk']) + self.assertIn('sdc', recon_data['object_replication_per_disk']) + self.assertIn('sdd', recon_data['object_replication_per_disk']) + self.assertIn('sde', recon_data['object_replication_per_disk']) + sda = recon_data['object_replication_per_disk']['sda'] + + # Spot-check a couple of fields + self.assertEqual(sda['replication_stats']['attempted'], 1) + self.assertEqual(sda['replication_stats']['success'], 10) + self.assertEqual(sda['object_replication_time'], 2) # minutes + self.assertEqual(sda['object_replication_last'], 1521680120) + + # Aggregate the workers' recon updates + self.replicator.post_multiprocess_run() + with open(self.recon_file) as fh: + recon_data = json.load(fh) + self.assertEqual(recon_data['replication_stats']['attempted'], 15) + self.assertEqual(recon_data['replication_stats']['failure'], 1500) + self.assertEqual(recon_data['replication_stats']['hashmatch'], 15000) + self.assertEqual(recon_data['replication_stats']['remove'], 1500000) + self.assertEqual(recon_data['replication_stats']['rsync'], 150000) + self.assertEqual(recon_data['replication_stats']['success'], 150) + self.assertEqual(recon_data['replication_stats']['suffix_count'], + 15000000) + self.assertEqual(recon_data['replication_stats']['suffix_hash'], + 150000000) + self.assertEqual(recon_data['replication_stats']['suffix_sync'], + 1500000000) + self.assertEqual(recon_data['replication_stats']['failure_nodes'], { + '10.1.1.1': {'d11': 1}, + '10.2.2.2': {'d22': 2}, + '10.3.3.3': {'d33': 3}, + '10.4.4.4': {'d44': 4}, + '10.5.5.5': {'d55': 5}, + }) + self.assertEqual(recon_data['object_replication_time'], 2) # minutes + self.assertEqual(recon_data['object_replication_last'], 1521680120) + + def test_recon_skipped_with_overrides(self): + self.replicator.replicator_workers = 3 + + the_time = [1521680000] + + def mock_time(): + rv = the_time[0] + the_time[0] += 120 + return rv + + with mock.patch.object(self.replicator, 'replicate', + self.fake_replicate), \ + mock.patch('time.time', mock_time): + self.replicator.get_worker_args() + self.replicator.run_once(multiprocess_worker_index=0, + have_overrides=True, + override_devices=['sda', 'sdb']) + self.assertFalse(os.path.exists(self.recon_file)) + + # have_overrides=False makes us get recon stats + with mock.patch.object(self.replicator, 'replicate', + self.fake_replicate), \ + mock.patch('time.time', mock_time): + self.replicator.get_worker_args() + self.replicator.run_once(multiprocess_worker_index=0, + have_overrides=False, + override_devices=['sda', 'sdb']) + with open(self.recon_file) as fh: + recon_data = json.load(fh) + self.assertIn('sda', recon_data['object_replication_per_disk']) + + def test_recon_run_forever(self): + the_time = [1521521521.52152] + + def mock_time(): + rv = the_time[0] + the_time[0] += 120 + return rv + + self.replicator.replicator_workers = 2 + self.replicator._next_rcache_update = the_time[0] + + # One worker has finished a pass, the other hasn't. + with mock.patch.object(self.replicator, 'replicate', + self.fake_replicate), \ + mock.patch('time.time', mock_time): + self.replicator.get_worker_args() + # Yes, this says run_once, but this is only to populate + # object.recon with some stats. The real test is for the + # aggregation. + self.replicator.run_once(multiprocess_worker_index=0, + override_devices=['sda', 'sdb', 'sdc']) + + # This will not produce aggregate stats since not every device has + # finished a pass. + the_time[0] += self.replicator.stats_interval + with mock.patch('time.time', mock_time): + rv = self.replicator.is_healthy() + self.assertTrue(rv) + with open(self.recon_file) as fh: + recon_data = json.load(fh) + self.assertNotIn('replication_stats', recon_data) + + # Now all the local devices have completed a replication pass, so we + # will produce aggregate stats. + with mock.patch.object(self.replicator, 'replicate', + self.fake_replicate), \ + mock.patch('time.time', mock_time): + self.replicator.get_worker_args() + self.replicator.run_once(multiprocess_worker_index=1, + override_devices=['sdd', 'sde']) + the_time[0] += self.replicator.stats_interval + with mock.patch('time.time', mock_time): + rv = self.replicator.is_healthy() + self.assertTrue(rv) + with open(self.recon_file) as fh: + recon_data = json.load(fh) + self.assertIn('replication_stats', recon_data) + + # no need to exhaustively check every sum + self.assertEqual(recon_data['replication_stats']['attempted'], 15) + self.assertEqual(recon_data['replication_stats']['success'], 150) + + self.assertEqual( + recon_data['replication_last'], + min(pd['replication_last'] + for pd in recon_data['object_replication_per_disk'].values())) + + +class TestReplicatorStats(unittest.TestCase): + def test_to_recon(self): + st = object_replicator.Stats( + attempted=1, failure=2, hashmatch=3, remove=4, + rsync=5, success=7, + suffix_count=8, suffix_hash=9, suffix_sync=10, + failure_nodes={'10.1.2.3': {'sda': 100, 'sdb': 200}}) + # This is what appears in the recon dump + self.assertEqual(st.to_recon(), { + 'attempted': 1, + 'failure': 2, + 'hashmatch': 3, + 'remove': 4, + 'rsync': 5, + 'success': 7, + 'suffix_count': 8, + 'suffix_hash': 9, + 'suffix_sync': 10, + 'failure_nodes': {'10.1.2.3': {'sda': 100, 'sdb': 200}}, + }) + + def test_recon_roundtrip(self): + before = object_replicator.Stats( + attempted=1, failure=2, hashmatch=3, remove=4, + rsync=5, success=7, + suffix_count=8, suffix_hash=9, suffix_sync=10, + failure_nodes={'10.1.2.3': {'sda': 100, 'sdb': 200}}) + after = object_replicator.Stats.from_recon(before.to_recon()) + self.assertEqual(after.attempted, before.attempted) + self.assertEqual(after.failure, before.failure) + self.assertEqual(after.hashmatch, before.hashmatch) + self.assertEqual(after.remove, before.remove) + self.assertEqual(after.rsync, before.rsync) + self.assertEqual(after.success, before.success) + self.assertEqual(after.suffix_count, before.suffix_count) + self.assertEqual(after.suffix_hash, before.suffix_hash) + self.assertEqual(after.suffix_sync, before.suffix_sync) + self.assertEqual(after.failure_nodes, before.failure_nodes) + + def test_from_recon_skips_extra_fields(self): + # If another attribute ever sneaks its way in, we should ignore it. + # This will make aborted upgrades a little less painful for + # operators. + recon_dict = {'attempted': 1, 'failure': 2, 'hashmatch': 3, + 'spices': 5, 'treasures': 8} + stats = object_replicator.Stats.from_recon(recon_dict) + self.assertEqual(stats.attempted, 1) + self.assertEqual(stats.failure, 2) + self.assertEqual(stats.hashmatch, 3) + # We don't gain attributes just because they're in object.recon. + self.assertFalse(hasattr(stats, 'spices')) + self.assertFalse(hasattr(stats, 'treasures')) + + def test_add_failure_stats(self): + st = object_replicator.Stats() + st.add_failure_stats([('10.1.1.1', 'd10'), ('10.1.1.1', 'd11')]) + st.add_failure_stats([('10.1.1.1', 'd10')]) + st.add_failure_stats([('10.1.1.1', 'd12'), ('10.2.2.2', 'd20'), + ('10.2.2.2', 'd21'), ('10.2.2.2', 'd21'), + ('10.2.2.2', 'd21')]) + self.assertEqual(st.failure, 8) + + as_dict = st.to_recon() + self.assertEqual(as_dict['failure_nodes'], { + '10.1.1.1': { + 'd10': 2, + 'd11': 1, + 'd12': 1, + }, + '10.2.2.2': { + 'd20': 1, + 'd21': 3, + }, + }) + + def test_add(self): + st1 = object_replicator.Stats( + attempted=1, failure=2, hashmatch=3, remove=4, rsync=5, + success=6, suffix_count=7, suffix_hash=8, suffix_sync=9, + failure_nodes={ + '10.1.1.1': {'sda': 10, 'sdb': 20}, + '10.1.1.2': {'sda': 10, 'sdb': 20}}) + st2 = object_replicator.Stats( + attempted=2, failure=4, hashmatch=6, remove=8, rsync=10, + success=12, suffix_count=14, suffix_hash=16, suffix_sync=18, + failure_nodes={ + '10.1.1.2': {'sda': 10, 'sdb': 20}, + '10.1.1.3': {'sda': 10, 'sdb': 20}}) + total = st1 + st2 + self.assertEqual(total.attempted, 3) + self.assertEqual(total.failure, 6) + self.assertEqual(total.hashmatch, 9) + self.assertEqual(total.remove, 12) + self.assertEqual(total.rsync, 15) + self.assertEqual(total.success, 18) + self.assertEqual(total.suffix_count, 21) + self.assertEqual(total.suffix_hash, 24) + self.assertEqual(total.suffix_sync, 27) + self.assertEqual(total.failure_nodes, { + '10.1.1.1': {'sda': 10, 'sdb': 20}, + '10.1.1.2': {'sda': 20, 'sdb': 40}, + '10.1.1.3': {'sda': 10, 'sdb': 20}, + }) + if __name__ == '__main__': unittest.main() diff -Nru swift-2.17.0/test/unit/obj/test_server.py swift-2.18.0/test/unit/obj/test_server.py --- swift-2.17.0/test/unit/obj/test_server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_server.py 2018-05-30 10:17:02.000000000 +0000 @@ -1053,7 +1053,7 @@ mock_ring = mock.MagicMock() mock_ring.get_nodes.return_value = (99, [node]) object_updater.container_ring = mock_ring - mock_update.return_value = ((True, 1)) + mock_update.return_value = ((True, 1, None)) object_updater.run_once() self.assertEqual(1, mock_update.call_count) self.assertEqual((node, 99, 'PUT', '/a/c/o'), @@ -1061,6 +1061,7 @@ actual_headers = mock_update.call_args_list[0][0][4] # User-Agent is updated. expected_post_headers['User-Agent'] = 'object-updater %s' % os.getpid() + expected_post_headers['X-Backend-Accept-Redirect'] = 'true' self.assertDictEqual(expected_post_headers, actual_headers) self.assertFalse( os.listdir(os.path.join( @@ -1073,6 +1074,104 @@ self._test_PUT_then_POST_async_pendings( POLICIES[1], update_etag='override_etag') + def _check_PUT_redirected_async_pending(self, container_path=None): + # When container update is redirected verify that the redirect location + # is persisted in the async pending file. + policy = POLICIES[0] + device_dir = os.path.join(self.testdir, 'sda1') + t_put = next(self.ts) + update_etag = '098f6bcd4621d373cade4e832627b4f6' + + put_headers = { + 'X-Trans-Id': 'put_trans_id', + 'X-Timestamp': t_put.internal, + 'Content-Type': 'application/octet-stream;swift_bytes=123456789', + 'Content-Length': '4', + 'X-Backend-Storage-Policy-Index': int(policy), + 'X-Container-Host': 'chost:3200', + 'X-Container-Partition': '99', + 'X-Container-Device': 'cdevice'} + + if container_path: + # the proxy may include this header + put_headers['X-Backend-Container-Path'] = container_path + expected_update_path = '/cdevice/99/%s/o' % container_path + else: + expected_update_path = '/cdevice/99/a/c/o' + + if policy.policy_type == EC_POLICY: + put_headers.update({ + 'X-Object-Sysmeta-Ec-Frag-Index': '2', + 'X-Backend-Container-Update-Override-Etag': update_etag, + 'X-Object-Sysmeta-Ec-Etag': update_etag}) + + req = Request.blank('/sda1/p/a/c/o', + environ={'REQUEST_METHOD': 'PUT'}, + headers=put_headers, body='test') + resp_headers = {'Location': '/.sharded_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': next(self.ts).internal} + + with mocked_http_conn(301, headers=[resp_headers]) as conn, \ + mock.patch('swift.common.utils.HASH_PATH_PREFIX', ''),\ + fake_spawn(): + resp = req.get_response(self.object_controller) + + self.assertEqual(resp.status_int, 201) + self.assertEqual(1, len(conn.requests)) + + self.assertEqual(expected_update_path, conn.requests[0]['path']) + + # whether or not an X-Backend-Container-Path was received from the + # proxy, the async pending file should now have the container_path + # equal to the Location header received in the update response. + async_pending_file_put = os.path.join( + device_dir, diskfile.get_async_dir(policy), 'a83', + '06fbf0b514e5199dfc4e00f42eb5ea83-%s' % t_put.internal) + self.assertTrue(os.path.isfile(async_pending_file_put), + 'Expected %s to be a file but it is not.' + % async_pending_file_put) + expected_put_headers = { + 'Referer': 'PUT http://localhost/sda1/p/a/c/o', + 'X-Trans-Id': 'put_trans_id', + 'X-Timestamp': t_put.internal, + 'X-Content-Type': 'application/octet-stream;swift_bytes=123456789', + 'X-Size': '4', + 'X-Etag': '098f6bcd4621d373cade4e832627b4f6', + 'User-Agent': 'object-server %s' % os.getpid(), + 'X-Backend-Storage-Policy-Index': '%d' % int(policy)} + if policy.policy_type == EC_POLICY: + expected_put_headers['X-Etag'] = update_etag + self.assertEqual( + {'headers': expected_put_headers, + 'account': 'a', 'container': 'c', 'obj': 'o', 'op': 'PUT', + 'container_path': '.sharded_a/c_shard_1'}, + pickle.load(open(async_pending_file_put))) + + # when updater is run its first request will be to the redirect + # location that is persisted in the async pending file + with mocked_http_conn(201) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache', + lambda *args: None): + object_updater = updater.ObjectUpdater( + {'devices': self.testdir, + 'mount_check': 'false'}, logger=debug_logger()) + node = {'id': 1, 'ip': 'chost', 'port': 3200, + 'device': 'cdevice'} + mock_ring = mock.MagicMock() + mock_ring.get_nodes.return_value = (99, [node]) + object_updater.container_ring = mock_ring + object_updater.run_once() + + self.assertEqual(1, len(conn.requests)) + self.assertEqual('/cdevice/99/.sharded_a/c_shard_1/o', + conn.requests[0]['path']) + + def test_PUT_redirected_async_pending(self): + self._check_PUT_redirected_async_pending() + + def test_PUT_redirected_async_pending_with_container_path(self): + self._check_PUT_redirected_async_pending(container_path='.another/c') + def test_POST_quarantine_zbyte(self): timestamp = normalize_timestamp(time()) req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, @@ -2918,6 +3017,22 @@ self.assertEqual(resp.headers['X-Backend-Timestamp'], utils.Timestamp(timestamp).internal) + def test_GET_range_zero_byte_object(self): + timestamp = normalize_timestamp(time()) + req = Request.blank('/sda1/p/a/c/zero-byte', + environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': timestamp, + 'Content-Type': 'application/x-test'}) + req.body = '' + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 201) + + req = Request.blank('/sda1/p/a/c/zero-byte', + environ={'REQUEST_METHOD': 'GET'}, + headers={'Range': 'bytes=-10'}) + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 200) + def test_GET_if_match(self): req = Request.blank('/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, headers={ @@ -5247,6 +5362,95 @@ 'X-Backend-Container-Update-Override-Content-Type': 'ignored', 'X-Backend-Container-Update-Override-Foo': 'ignored'}) + def test_PUT_container_update_to_shard(self): + # verify that alternate container update path is respected when + # included in request headers + def do_test(container_path, expected_path, expected_container_path): + policy = random.choice(list(POLICIES)) + container_updates = [] + + def capture_updates( + ip, port, method, path, headers, *args, **kwargs): + container_updates.append((ip, port, method, path, headers)) + + pickle_async_update_args = [] + + def fake_pickle_async_update(*args): + pickle_async_update_args.append(args) + + diskfile_mgr = self.object_controller._diskfile_router[policy] + diskfile_mgr.pickle_async_update = fake_pickle_async_update + + ts_put = next(self.ts) + headers = { + 'X-Timestamp': ts_put.internal, + 'X-Trans-Id': '123', + 'X-Container-Host': 'chost:cport', + 'X-Container-Partition': 'cpartition', + 'X-Container-Device': 'cdevice', + 'Content-Type': 'text/plain', + 'X-Object-Sysmeta-Ec-Frag-Index': 0, + 'X-Backend-Storage-Policy-Index': int(policy), + } + if container_path is not None: + headers['X-Backend-Container-Path'] = container_path + + req = Request.blank('/sda1/0/a/c/o', method='PUT', + headers=headers, body='') + with mocked_http_conn( + 500, give_connect=capture_updates) as fake_conn: + with fake_spawn(): + resp = req.get_response(self.object_controller) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + self.assertEqual(resp.status_int, 201) + self.assertEqual(len(container_updates), 1) + # verify expected path used in update request + ip, port, method, path, headers = container_updates[0] + self.assertEqual(ip, 'chost') + self.assertEqual(port, 'cport') + self.assertEqual(method, 'PUT') + self.assertEqual(path, '/cdevice/cpartition/%s/o' % expected_path) + + # verify that the picked update *always* has root container + self.assertEqual(1, len(pickle_async_update_args)) + (objdevice, account, container, obj, data, timestamp, + policy) = pickle_async_update_args[0] + self.assertEqual(objdevice, 'sda1') + self.assertEqual(account, 'a') # NB user account + self.assertEqual(container, 'c') # NB root container + self.assertEqual(obj, 'o') + self.assertEqual(timestamp, ts_put.internal) + self.assertEqual(policy, policy) + expected_data = { + 'headers': HeaderKeyDict({ + 'X-Size': '0', + 'User-Agent': 'object-server %s' % os.getpid(), + 'X-Content-Type': 'text/plain', + 'X-Timestamp': ts_put.internal, + 'X-Trans-Id': '123', + 'Referer': 'PUT http://localhost/sda1/0/a/c/o', + 'X-Backend-Storage-Policy-Index': int(policy), + 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'}), + 'obj': 'o', + 'account': 'a', + 'container': 'c', + 'op': 'PUT'} + if expected_container_path: + expected_data['container_path'] = expected_container_path + self.assertEqual(expected_data, data) + + do_test('a_shard/c_shard', 'a_shard/c_shard', 'a_shard/c_shard') + do_test('', 'a/c', None) + do_test(None, 'a/c', None) + # TODO: should these cases trigger a 400 response rather than + # defaulting to root path? + do_test('garbage', 'a/c', None) + do_test('/', 'a/c', None) + do_test('/no-acct', 'a/c', None) + do_test('no-cont/', 'a/c', None) + do_test('too/many/parts', 'a/c', None) + do_test('/leading/slash', 'a/c', None) + def test_container_update_async(self): policy = random.choice(list(POLICIES)) req = Request.blank( @@ -5319,23 +5523,21 @@ 'X-Container-Partition': '20', 'X-Container-Host': '1.2.3.4:5', 'X-Container-Device': 'sdb1'}) - with mock.patch.object(object_server, 'spawn', - local_fake_spawn): - with mock.patch.object(self.object_controller, - 'async_update', - local_fake_async_update): - resp = req.get_response(self.object_controller) - # check the response is completed and successful - self.assertEqual(resp.status_int, 201) - # check that async_update hasn't been called - self.assertFalse(len(called_async_update_args)) - # now do the work in greenthreads - for func, a, kw in saved_spawn_calls: - gt = spawn(func, *a, **kw) - greenthreads.append(gt) - # wait for the greenthreads to finish - for gt in greenthreads: - gt.wait() + with mock.patch.object(object_server, 'spawn', local_fake_spawn), \ + mock.patch.object(self.object_controller, 'async_update', + local_fake_async_update): + resp = req.get_response(self.object_controller) + # check the response is completed and successful + self.assertEqual(resp.status_int, 201) + # check that async_update hasn't been called + self.assertFalse(len(called_async_update_args)) + # now do the work in greenthreads + for func, a, kw in saved_spawn_calls: + gt = spawn(func, *a, **kw) + greenthreads.append(gt) + # wait for the greenthreads to finish + for gt in greenthreads: + gt.wait() # check that the calls to async_update have happened headers_out = {'X-Size': '0', 'X-Content-Type': 'application/burrito', @@ -5346,7 +5548,8 @@ 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e'} expected = [('PUT', 'a', 'c', 'o', '1.2.3.4:5', '20', 'sdb1', headers_out, 'sda1', POLICIES[0]), - {'logger_thread_locals': (None, None)}] + {'logger_thread_locals': (None, None), + 'container_path': None}] self.assertEqual(called_async_update_args, [expected]) def test_container_update_as_greenthread_with_timeout(self): @@ -6048,6 +6251,68 @@ except OSError as err: self.assertEqual(err.errno, errno.ENOENT) + def test_x_if_delete_at_formats(self): + policy = POLICIES.get_by_index(0) + test_time = time() + put_time = test_time + delete_time = test_time + 1 + delete_at_timestamp = int(test_time + 10000) + delete_at_container = str( + delete_at_timestamp / + self.object_controller.expiring_objects_container_divisor * + self.object_controller.expiring_objects_container_divisor) + + def do_test(if_delete_at, expected_status): + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'PUT'}, + headers={'X-Timestamp': normalize_timestamp(put_time), + 'X-Delete-At': str(delete_at_timestamp), + 'X-Delete-At-Container': delete_at_container, + 'Content-Length': '4', + 'Content-Type': 'application/octet-stream'}) + req.body = 'TEST' + + # Mock out async_update so we don't get any async_pending files. + with mock.patch.object(self.object_controller, 'async_update'): + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, 201) + + req = Request.blank( + '/sda1/p/a/c/o', environ={'REQUEST_METHOD': 'DELETE'}, + headers={'X-Timestamp': normalize_timestamp(delete_time), + 'X-Backend-Clean-Expiring-Object-Queue': 'false', + 'X-If-Delete-At': if_delete_at}) + # Again, we don't care about async_pending files (for this test) + with mock.patch.object(self.object_controller, 'async_update'): + resp = req.get_response(self.object_controller) + self.assertEqual(resp.status_int, expected_status) + + # Clean up the tombstone + objfile = self.df_mgr.get_diskfile('sda1', 'p', 'a', 'c', 'o', + policy=policy) + files = os.listdir(objfile._datadir) + self.assertEqual(len(files), 1, + 'Expected to find one file, got %r' % files) + if expected_status == 204: + self.assertTrue(files[0].endswith('.ts'), + 'Expected a tombstone, found %r' % files[0]) + else: + self.assertTrue(files[0].endswith('.data'), + 'Expected a data file, found %r' % files[0]) + os.unlink(os.path.join(objfile._datadir, files[0])) + + # More as a reminder than anything else + self.assertIsInstance(delete_at_timestamp, int) + + do_test(str(delete_at_timestamp), 204) + do_test(str(delete_at_timestamp) + ':', 400) + do_test(Timestamp(delete_at_timestamp).isoformat, 400) + do_test(Timestamp(delete_at_timestamp).normal, 204) + do_test(Timestamp(delete_at_timestamp, delta=1).normal, 412) + do_test(Timestamp(delete_at_timestamp, delta=-1).normal, 412) + do_test(Timestamp(delete_at_timestamp, offset=1).internal, 412) + do_test(Timestamp(delete_at_timestamp, offset=15).internal, 412) + def test_DELETE_but_expired(self): test_time = time() + 10000 delete_at_timestamp = int(test_time + 100) diff -Nru swift-2.17.0/test/unit/obj/test_ssync_sender.py swift-2.18.0/test/unit/obj/test_ssync_sender.py --- swift-2.17.0/test/unit/obj/test_ssync_sender.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_ssync_sender.py 2018-05-30 10:17:02.000000000 +0000 @@ -87,8 +87,9 @@ def setUp(self): skip_if_no_xattrs() super(TestSender, self).setUp() + self.daemon_logger = debug_logger('test-ssync-sender') self.daemon = ObjectReplicator(self.daemon_conf, - debug_logger('test-ssync-sender')) + self.daemon_logger) job = {'policy': POLICIES.legacy} # sufficient for Sender.__init__ self.sender = ssync_sender.Sender(self.daemon, None, job, None) @@ -109,7 +110,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') self.assertEqual(1, len(error_lines)) self.assertEqual('1.2.3.4:5678/sda1/9 1 second: test connect', error_lines[0]) @@ -128,7 +129,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') self.assertEqual(1, len(error_lines)) self.assertEqual('1.2.3.4:5678/sda1/9 test connect', error_lines[0]) @@ -143,10 +144,10 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith( - '1.2.3.4:5678/sda1/9 EXCEPTION in ssync.Sender:')) + '1.2.3.4:5678/sda1/9 EXCEPTION in ssync.Sender: ')) def test_call_catches_exception_handling_exception(self): self.sender.node = None # Will cause inside exception handler to fail @@ -155,7 +156,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith( 'EXCEPTION in ssync.Sender')) @@ -598,7 +599,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith( '1.2.3.4:5678/sda1/9 0.01 seconds: connect send')) @@ -622,7 +623,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith( '1.2.3.4:5678/sda1/9 0.02 seconds: connect receive')) @@ -650,7 +651,7 @@ success, candidates = self.sender() self.assertFalse(success) self.assertEqual(candidates, {}) - error_lines = self.daemon.logger.get_lines_for_level('error') + error_lines = self.daemon_logger.get_lines_for_level('error') for line in error_lines: self.assertTrue(line.startswith( '1.2.3.4:5678/sda1/9 Expected status 200; got 503')) diff -Nru swift-2.17.0/test/unit/obj/test_updater.py swift-2.18.0/test/unit/obj/test_updater.py --- swift-2.17.0/test/unit/obj/test_updater.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/obj/test_updater.py 2018-05-30 10:17:02.000000000 +0000 @@ -65,7 +65,9 @@ {'id': 1, 'ip': '127.0.0.1', 'port': 1, 'device': 'sda1', 'zone': 2}, {'id': 2, 'ip': '127.0.0.1', 'port': 1, - 'device': 'sda1', 'zone': 4}], 30), + 'device': 'sda1', 'zone': 4}, + {'id': 3, 'ip': '127.0.0.1', 'port': 1, + 'device': 'sda1', 'zone': 6}], 30), f) self.devices_dir = os.path.join(self.testdir, 'devices') os.mkdir(self.devices_dir) @@ -74,6 +76,7 @@ for policy in POLICIES: os.mkdir(os.path.join(self.sda1, get_tmp_dir(policy))) self.logger = debug_logger() + self.ts_iter = make_timestamp_iter() def tearDown(self): rmtree(self.testdir, ignore_errors=1) @@ -299,19 +302,22 @@ self.assertIn("sweep progress", info_lines[1]) # the space ensures it's a positive number self.assertIn( - "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 error", + "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 errors, " + "0 redirects", info_lines[1]) self.assertIn(self.sda1, info_lines[1]) self.assertIn("sweep progress", info_lines[2]) self.assertIn( - "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 error", + "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 errors, " + "0 redirects", info_lines[2]) self.assertIn(self.sda1, info_lines[2]) self.assertIn("sweep complete", info_lines[3]) self.assertIn( - "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 error", + "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 errors, " + "0 redirects", info_lines[3]) self.assertIn(self.sda1, info_lines[3]) @@ -547,6 +553,26 @@ {'successes': 1, 'unlinks': 1, 'async_pendings': 1}) + def _write_async_update(self, dfmanager, timestamp, policy, + headers=None, container_path=None): + # write an async + account, container, obj = 'a', 'c', 'o' + op = 'PUT' + headers_out = headers or { + 'x-size': 0, + 'x-content-type': 'text/plain', + 'x-etag': 'd41d8cd98f00b204e9800998ecf8427e', + 'x-timestamp': timestamp.internal, + 'X-Backend-Storage-Policy-Index': int(policy), + 'User-Agent': 'object-server %s' % os.getpid() + } + data = {'op': op, 'account': account, 'container': container, + 'obj': obj, 'headers': headers_out} + if container_path: + data['container_path'] = container_path + dfmanager.pickle_async_update(self.sda1, account, container, obj, + data, timestamp, policy) + def test_obj_put_async_updates(self): ts_iter = make_timestamp_iter() policies = list(POLICIES) @@ -562,16 +588,12 @@ async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) os.mkdir(async_dir) - def do_test(headers_out, expected): + def do_test(headers_out, expected, container_path=None): # write an async dfmanager = DiskFileManager(conf, daemon.logger) - account, container, obj = 'a', 'c', 'o' - op = 'PUT' - data = {'op': op, 'account': account, 'container': container, - 'obj': obj, 'headers': headers_out} - dfmanager.pickle_async_update(self.sda1, account, container, obj, - data, next(ts_iter), policies[0]) - + self._write_async_update(dfmanager, next(ts_iter), policies[0], + headers=headers_out, + container_path=container_path) request_log = [] def capture(*args, **kwargs): @@ -613,11 +635,21 @@ 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e', 'X-Timestamp': ts.normal, 'X-Backend-Storage-Policy-Index': str(int(policies[0])), - 'User-Agent': 'object-updater %s' % os.getpid() + 'User-Agent': 'object-updater %s' % os.getpid(), + 'X-Backend-Accept-Redirect': 'true', } + # always expect X-Backend-Accept-Redirect to be true + do_test(headers_out, expected, container_path='.shards_a/shard_c') do_test(headers_out, expected) + # ...unless X-Backend-Accept-Redirect is already set + expected['X-Backend-Accept-Redirect'] = 'false' + headers_out_2 = dict(headers_out) + headers_out_2['X-Backend-Accept-Redirect'] = 'false' + do_test(headers_out_2, expected) + # updater should add policy header if missing + expected['X-Backend-Accept-Redirect'] = 'true' headers_out['X-Backend-Storage-Policy-Index'] = None do_test(headers_out, expected) @@ -632,6 +664,414 @@ 'X-Backend-Storage-Policy-Index') do_test(headers_out, expected) + def _check_update_requests(self, requests, timestamp, policy): + # do some sanity checks on update request + expected_headers = { + 'X-Size': '0', + 'X-Content-Type': 'text/plain', + 'X-Etag': 'd41d8cd98f00b204e9800998ecf8427e', + 'X-Timestamp': timestamp.internal, + 'X-Backend-Storage-Policy-Index': str(int(policy)), + 'User-Agent': 'object-updater %s' % os.getpid(), + 'X-Backend-Accept-Redirect': 'true'} + for request in requests: + self.assertEqual('PUT', request['method']) + self.assertDictEqual(expected_headers, request['headers']) + + def test_obj_put_async_root_update_redirected(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + + # run once + ts_redirect_1 = next(self.ts_iter) + ts_redirect_2 = next(self.ts_iter) + fake_responses = [ + # first round of update attempts, newest redirect should be chosen + (200, {}), + (301, {'Location': '/.shards_a/c_shard_new/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}), + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + # second round of update attempts + (200, {}), + (200, {}), + (200, {}), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3 + + ['/sda1/0/.shards_a/c_shard_new/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 1, 'successes': 1, + 'unlinks': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def test_obj_put_async_root_update_redirected_previous_success(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + with mocked_http_conn( + 507, 200, 507) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'failures': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(dict(orig_async_data, successes=[1]), async_data) + + # run again - expect 3 redirected updates despite previous success + ts_redirect = next(self.ts_iter) + resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 2 + [(200, {})] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests[:2], ts_obj, policies[0]) + self._check_update_requests(conn.requests[2:], ts_obj, policies[0]) + root_part = daemon.container_ring.get_part('a/c') + shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1') + self.assertEqual( + ['/sda1/%s/a/c/o' % root_part] * 2 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 1, 'successes': 1, 'failures': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def _check_async_file(self, async_dir): + async_subdirs = os.listdir(async_dir) + self.assertEqual([mock.ANY], async_subdirs) + async_files = os.listdir(os.path.join(async_dir, async_subdirs[0])) + self.assertEqual([mock.ANY], async_files) + async_path = os.path.join( + async_dir, async_subdirs[0], async_files[0]) + with open(async_path) as fd: + async_data = pickle.load(fd) + return async_path, async_data + + def _check_obj_put_async_update_bad_redirect_headers(self, headers): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + fake_responses = [ + (301, headers), + (301, headers), + (301, headers), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/0/a/c/o'] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'failures': 1, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # async file still intact + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual(orig_async_data, async_data) + return daemon + + def test_obj_put_async_root_update_missing_location_header(self): + headers = { + 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal} + self._check_obj_put_async_update_bad_redirect_headers(headers) + + def test_obj_put_async_root_update_bad_location_header(self): + headers = { + 'Location': 'bad bad bad', + 'X-Backend-Redirect-Timestamp': next(self.ts_iter).internal} + daemon = self._check_obj_put_async_update_bad_redirect_headers(headers) + error_lines = daemon.logger.get_lines_for_level('error') + self.assertIn('Container update failed', error_lines[0]) + self.assertIn('Invalid path: bad%20bad%20bad', error_lines[0]) + + def test_obj_put_async_shard_update_redirected_twice(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0], + container_path='.shards_a/c_shard_older') + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + ts_redirect_1 = next(self.ts_iter) + ts_redirect_2 = next(self.ts_iter) + ts_redirect_3 = next(self.ts_iter) + fake_responses = [ + # 1st round of redirects, newest redirect should be chosen + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + (301, {'Location': '/.shards_a/c_shard_new/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_2.internal}), + (301, {'Location': '/.shards_a/c_shard_old/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_1.internal}), + # 2nd round of redirects + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + (301, {'Location': '/.shards_a/c_shard_newer/o', + 'X-Backend-Redirect-Timestamp': ts_redirect_3.internal}), + ] + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + # only *one* set of redirected requests is attempted per cycle + older_part = daemon.container_ring.get_part('.shards_a/c_shard_older') + new_part = daemon.container_ring.get_part('.shards_a/c_shard_new') + newer_part = daemon.container_ring.get_part('.shards_a/c_shard_newer') + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_older/o' % older_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_new/o' % new_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value added to data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_newer', + redirect_history=['.shards_a/c_shard_new', + '.shards_a/c_shard_newer']), + async_data) + + # next cycle, should get latest redirect from pickled async update + fake_responses = [(200, {})] * 3 + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_newer/o' % newer_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'successes': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + + def test_obj_put_async_update_redirection_loop(self): + policies = list(POLICIES) + random.shuffle(policies) + # setup updater + conf = { + 'devices': self.devices_dir, + 'mount_check': 'false', + 'swift_dir': self.testdir, + } + daemon = object_updater.ObjectUpdater(conf, logger=self.logger) + async_dir = os.path.join(self.sda1, get_async_dir(policies[0])) + os.mkdir(async_dir) + dfmanager = DiskFileManager(conf, daemon.logger) + + ts_obj = next(self.ts_iter) + self._write_async_update(dfmanager, ts_obj, policies[0]) + orig_async_path, orig_async_data = self._check_async_file(async_dir) + + # run once + ts_redirect = next(self.ts_iter) + + resp_headers_1 = {'Location': '/.shards_a/c_shard_1/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + resp_headers_2 = {'Location': '/.shards_a/c_shard_2/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_2)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + # only *one* set of redirected requests is attempted per cycle + root_part = daemon.container_ring.get_part('a/c') + shard_1_part = daemon.container_ring.get_part('.shards_a/c_shard_1') + shard_2_part = daemon.container_ring.get_part('.shards_a/c_shard_2') + shard_3_part = daemon.container_ring.get_part('.shards_a/c_shard_3') + self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 2, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value added to data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_2', + redirect_history=['.shards_a/c_shard_1', + '.shards_a/c_shard_2']), + async_data) + + # next cycle, more redirects! first is to previously visited location + resp_headers_3 = {'Location': '/.shards_a/c_shard_3/o', + 'X-Backend-Redirect-Timestamp': ts_redirect.internal} + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests[:3], ts_obj, policies[0]) + self._check_update_requests(conn.requests[3:], ts_obj, policies[0]) + # first try the previously persisted container path, response to that + # creates a loop so ignore and send to root + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_2/o' % shard_2_part] * 3 + + ['/sda1/%s/a/c/o' % root_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 4, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file with most recent redirect + # response Location header value from root added to persisted data + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + # note: redirect_history was reset when falling back to root + self.assertEqual( + dict(orig_async_data, container_path='.shards_a/c_shard_3', + redirect_history=['.shards_a/c_shard_3']), + async_data) + + # next cycle, more redirects! first is to a location visited previously + # but not since last fall back to root, so that location IS tried; + # second is to a location visited since last fall back to root so that + # location is NOT tried + fake_responses = ( + # 1st round of redirects, 2nd round of redirects + [(301, resp_headers_1)] * 3 + [(301, resp_headers_3)] * 3) + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual( + ['/sda1/%s/.shards_a/c_shard_3/o' % shard_3_part] * 3 + + ['/sda1/%s/.shards_a/c_shard_1/o' % shard_1_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 6, 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + # update failed, we still have pending file, but container_path is None + # because most recent redirect location was a repeat + async_path, async_data = self._check_async_file(async_dir) + self.assertEqual(orig_async_path, async_path) + self.assertEqual( + dict(orig_async_data, container_path=None, + redirect_history=[]), + async_data) + + # next cycle, persisted container path is None so update should go to + # root, this time it succeeds + fake_responses = [(200, {})] * 3 + fake_status_codes, fake_headers = zip(*fake_responses) + with mocked_http_conn( + *fake_status_codes, headers=fake_headers) as conn: + with mock.patch('swift.obj.updater.dump_recon_cache'): + daemon.run_once() + self._check_update_requests(conn.requests, ts_obj, policies[0]) + self.assertEqual(['/sda1/%s/a/c/o' % root_part] * 3, + [req['path'] for req in conn.requests]) + self.assertEqual( + {'redirects': 6, 'successes': 1, 'unlinks': 1, + 'async_pendings': 1}, + daemon.logger.get_increment_counts()) + self.assertFalse(os.listdir(async_dir)) # no async file + if __name__ == '__main__': unittest.main() diff -Nru swift-2.17.0/test/unit/proxy/controllers/test_base.py swift-2.18.0/test/unit/proxy/controllers/test_base.py --- swift-2.17.0/test/unit/proxy/controllers/test_base.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/proxy/controllers/test_base.py 2018-05-30 10:17:02.000000000 +0000 @@ -14,6 +14,7 @@ # limitations under the License. import itertools +import json from collections import defaultdict import unittest import mock @@ -23,11 +24,14 @@ Controller, GetOrHeadHandler, bytes_to_skip from swift.common.swob import Request, HTTPException, RESPONSE_REASONS from swift.common import exceptions -from swift.common.utils import split_path +from swift.common.utils import split_path, ShardRange, Timestamp from swift.common.header_key_dict import HeaderKeyDict from swift.common.http import is_success from swift.common.storage_policy import StoragePolicy, StoragePolicyCollection -from test.unit import fake_http_connect, FakeRing, FakeMemcache, PatchPolicies +from test.unit import ( + fake_http_connect, FakeRing, FakeMemcache, PatchPolicies, FakeLogger, + make_timestamp_iter, + mocked_http_conn) from swift.proxy import server as proxy_server from swift.common.request_helpers import ( get_sys_meta_prefix, get_object_transient_sysmeta @@ -172,7 +176,8 @@ def setUp(self): self.app = proxy_server.Application(None, FakeMemcache(), account_ring=FakeRing(), - container_ring=FakeRing()) + container_ring=FakeRing(), + logger=FakeLogger()) def test_get_info_zero_recheck(self): mock_cache = mock.Mock() @@ -1030,3 +1035,146 @@ # prime numbers self.assertEqual(bytes_to_skip(11, 7), 4) self.assertEqual(bytes_to_skip(97, 7873823), 55) + + def test_get_shard_ranges_for_container_get(self): + ts_iter = make_timestamp_iter() + shard_ranges = [dict(ShardRange( + '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i, + '%d_upper' % i, object_count=i, bytes_used=1024 * i, + meta_timestamp=next(ts_iter))) + for i in range(3)] + base = Controller(self.app) + req = Request.blank('/v1/a/c', method='GET') + resp_headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', json.dumps(shard_ranges)]), + headers=resp_headers + ) as fake_conn: + actual = base._get_shard_ranges(req, 'a', 'c') + + # account info + captured = fake_conn.requests + self.assertEqual('HEAD', captured[0]['method']) + self.assertEqual('a', captured[0]['path'][7:]) + # container GET + self.assertEqual('GET', captured[1]['method']) + self.assertEqual('a/c', captured[1]['path'][7:]) + self.assertEqual('format=json', captured[1]['qs']) + self.assertEqual( + 'shard', captured[1]['headers'].get('X-Backend-Record-Type')) + self.assertEqual(shard_ranges, [dict(pr) for pr in actual]) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + + def test_get_shard_ranges_for_object_put(self): + ts_iter = make_timestamp_iter() + shard_ranges = [dict(ShardRange( + '.sharded_a/sr%d' % i, next(ts_iter), '%d_lower' % i, + '%d_upper' % i, object_count=i, bytes_used=1024 * i, + meta_timestamp=next(ts_iter))) + for i in range(3)] + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + resp_headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', json.dumps(shard_ranges[1:2])]), + headers=resp_headers + ) as fake_conn: + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + + # account info + captured = fake_conn.requests + self.assertEqual('HEAD', captured[0]['method']) + self.assertEqual('a', captured[0]['path'][7:]) + # container GET + self.assertEqual('GET', captured[1]['method']) + self.assertEqual('a/c', captured[1]['path'][7:]) + params = sorted(captured[1]['qs'].split('&')) + self.assertEqual( + ['format=json', 'includes=1_test'], params) + self.assertEqual( + 'shard', captured[1]['headers'].get('X-Backend-Record-Type')) + self.assertEqual(shard_ranges[1:2], [dict(pr) for pr in actual]) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + + def _check_get_shard_ranges_bad_data(self, body): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + # empty response + headers = {'X-Backend-Record-Type': 'shard'} + with mocked_http_conn(200, 200, body_iter=iter(['', body]), + headers=headers): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + lines = self.app.logger.get_lines_for_level('error') + return lines + + def test_get_shard_ranges_empty_body(self): + error_lines = self._check_get_shard_ranges_bad_data('') + self.assertIn('Problem with listing response', error_lines[0]) + self.assertIn('No JSON', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_not_a_list(self): + error_lines = self._check_get_shard_ranges_bad_data(json.dumps({})) + self.assertIn('Problem with listing response', error_lines[0]) + self.assertIn('not a list', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_key_missing(self): + error_lines = self._check_get_shard_ranges_bad_data(json.dumps([{}])) + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('KeyError', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_invalid_shard_range(self): + sr = ShardRange('a/c', Timestamp.now()) + bad_sr_data = dict(sr, name='bad_name') + error_lines = self._check_get_shard_ranges_bad_data( + json.dumps([bad_sr_data])) + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('ValueError', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_missing_record_type(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + sr = ShardRange('a/c', Timestamp.now()) + body = json.dumps([dict(sr)]) + with mocked_http_conn( + 200, 200, body_iter=iter(['', body])): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + error_lines = self.app.logger.get_lines_for_level('error') + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('unexpected record type', error_lines[0]) + self.assertIn('/a/c', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_wrong_record_type(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + sr = ShardRange('a/c', Timestamp.now()) + body = json.dumps([dict(sr)]) + headers = {'X-Backend-Record-Type': 'object'} + with mocked_http_conn( + 200, 200, body_iter=iter(['', body]), + headers=headers): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + error_lines = self.app.logger.get_lines_for_level('error') + self.assertIn('Failed to get shard ranges', error_lines[0]) + self.assertIn('unexpected record type', error_lines[0]) + self.assertIn('/a/c', error_lines[0]) + self.assertFalse(error_lines[1:]) + + def test_get_shard_ranges_request_failed(self): + base = Controller(self.app) + req = Request.blank('/v1/a/c/o', method='PUT') + with mocked_http_conn(200, 404, 404, 404): + actual = base._get_shard_ranges(req, 'a', 'c', '1_test') + self.assertIsNone(actual) + self.assertFalse(self.app.logger.get_lines_for_level('error')) + warning_lines = self.app.logger.get_lines_for_level('warning') + self.assertIn('Failed to get container listing', warning_lines[0]) + self.assertIn('/a/c', warning_lines[0]) + self.assertFalse(warning_lines[1:]) diff -Nru swift-2.17.0/test/unit/proxy/controllers/test_container.py swift-2.18.0/test/unit/proxy/controllers/test_container.py --- swift-2.17.0/test/unit/proxy/controllers/test_container.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/proxy/controllers/test_container.py 2018-05-30 10:17:02.000000000 +0000 @@ -12,17 +12,24 @@ # implied. # See the License for the specific language governing permissions and # limitations under the License. +import json import mock import socket import unittest from eventlet import Timeout +from six.moves import urllib +from swift.common.constraints import CONTAINER_LISTING_LIMIT from swift.common.swob import Request +from swift.common.utils import ShardRange, Timestamp from swift.proxy import server as proxy_server -from swift.proxy.controllers.base import headers_to_container_info, Controller -from test.unit import fake_http_connect, FakeRing, FakeMemcache +from swift.proxy.controllers.base import headers_to_container_info, Controller, \ + get_container_info +from test import annotate_failure +from test.unit import fake_http_connect, FakeRing, FakeMemcache, \ + make_timestamp_iter from swift.common.storage_policy import StoragePolicy from swift.common.request_helpers import get_sys_meta_prefix @@ -72,6 +79,7 @@ new=FakeAccountInfoContainerController): return _orig_get_controller(*args, **kwargs) self.app.get_controller = wrapped_get_controller + self.ts_iter = make_timestamp_iter() def _make_callback_func(self, context): def callback(ipaddr, port, device, partition, method, path, @@ -151,6 +159,91 @@ for key in owner_headers: self.assertIn(key, resp.headers) + def test_reseller_admin(self): + reseller_internal_headers = { + get_sys_meta_prefix('container') + 'sharding': 'True'} + reseller_external_headers = {'x-container-sharding': 'on'} + controller = proxy_server.ContainerController(self.app, 'a', 'c') + + # Normal users, even swift owners, can't set it + req = Request.blank('/v1/a/c', method='PUT', + headers=reseller_external_headers, + environ={'swift_owner': True}) + with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertNotIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', method='POST', + headers=reseller_external_headers, + environ={'swift_owner': True}) + with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertNotIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', environ={'swift_owner': True}) + # Heck, they don't even get to know + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.HEAD(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertNotIn(key, resp.headers) + + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.GET(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertNotIn(key, resp.headers) + + # But reseller admins can set it + req = Request.blank('/v1/a/c', method='PUT', + headers=reseller_external_headers, + environ={'reseller_request': True}) + with mocked_http_conn(*[201] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertIn(key.title(), captured['headers']) + + req = Request.blank('/v1/a/c', method='POST', + headers=reseller_external_headers, + environ={'reseller_request': True}) + with mocked_http_conn(*[204] * self.CONTAINER_REPLICAS) as mock_conn: + resp = req.get_response(self.app) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_internal_headers: + for captured in mock_conn.requests: + self.assertIn(key.title(), captured['headers']) + + # And see that they have + req = Request.blank('/v1/a/c', environ={'reseller_request': True}) + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.HEAD(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertIn(key, resp.headers) + self.assertEqual(resp.headers[key], 'True') + + with mock.patch('swift.proxy.controllers.base.http_connect', + fake_http_connect(200, 200, + headers=reseller_internal_headers)): + resp = controller.GET(req) + self.assertEqual(2, resp.status_int // 100) + for key in reseller_external_headers: + self.assertEqual(resp.headers[key], 'True') + def test_sys_meta_headers_PUT(self): # check that headers in sys meta namespace make it through # the container controller @@ -329,6 +422,933 @@ ] self._assert_responses('POST', POST_TEST_CASES) + def _make_shard_objects(self, shard_range): + lower = ord(shard_range.lower[0]) if shard_range.lower else ord('@') + upper = ord(shard_range.upper[0]) if shard_range.upper else ord('z') + + objects = [{'name': chr(i), 'bytes': i, 'hash': 'hash%s' % chr(i), + 'content_type': 'text/plain', 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + for i in range(lower + 1, upper + 1)] + return objects + + def _check_GET_shard_listing(self, mock_responses, expected_objects, + expected_requests, query_string='', + reverse=False): + # mock_responses is a list of tuples (status, json body, headers) + # expected objects is a list of dicts + # expected_requests is a list of tuples (path, hdrs dict, params dict) + + # sanity check that expected objects is name ordered with no repeats + def name(obj): + return obj.get('name', obj.get('subdir')) + + for (prev, next_) in zip(expected_objects, expected_objects[1:]): + if reverse: + self.assertGreater(name(prev), name(next_)) + else: + self.assertLess(name(prev), name(next_)) + container_path = '/v1/a/c' + query_string + codes = (resp[0] for resp in mock_responses) + bodies = iter([json.dumps(resp[1]) for resp in mock_responses]) + exp_headers = [resp[2] for resp in mock_responses] + request = Request.blank(container_path) + with mocked_http_conn( + *codes, body_iter=bodies, headers=exp_headers) as fake_conn: + resp = request.get_response(self.app) + for backend_req in fake_conn.requests: + self.assertEqual(request.headers['X-Trans-Id'], + backend_req['headers']['X-Trans-Id']) + self.assertTrue(backend_req['headers']['User-Agent'].startswith( + 'proxy-server')) + self.assertEqual(200, resp.status_int) + actual_objects = json.loads(resp.body) + self.assertEqual(len(expected_objects), len(actual_objects)) + self.assertEqual(expected_objects, actual_objects) + self.assertEqual(len(expected_requests), len(fake_conn.requests)) + for i, ((exp_path, exp_headers, exp_params), req) in enumerate( + zip(expected_requests, fake_conn.requests)): + with annotate_failure('Request check at index %d.' % i): + # strip off /sdx/0/ from path + self.assertEqual(exp_path, req['path'][7:]) + self.assertEqual( + dict(exp_params, format='json'), + dict(urllib.parse.parse_qsl(req['qs'], True))) + for k, v in exp_headers.items(): + self.assertIn(k, req['headers']) + self.assertEqual(v, req['headers'][k]) + self.assertNotIn('X-Backend-Override-Delete', req['headers']) + return resp + + def check_response(self, resp, root_resp_hdrs, expected_objects=None): + info_hdrs = dict(root_resp_hdrs) + if expected_objects is None: + # default is to expect whatever the root container sent + expected_obj_count = root_resp_hdrs['X-Container-Object-Count'] + expected_bytes_used = root_resp_hdrs['X-Container-Bytes-Used'] + else: + expected_bytes_used = sum([o['bytes'] for o in expected_objects]) + expected_obj_count = len(expected_objects) + info_hdrs['X-Container-Bytes-Used'] = expected_bytes_used + info_hdrs['X-Container-Object-Count'] = expected_obj_count + self.assertEqual(expected_bytes_used, + int(resp.headers['X-Container-Bytes-Used'])) + self.assertEqual(expected_obj_count, + int(resp.headers['X-Container-Object-Count'])) + self.assertEqual('sharded', resp.headers['X-Backend-Sharding-State']) + for k, v in root_resp_hdrs.items(): + if k.lower().startswith('x-container-meta'): + self.assertEqual(v, resp.headers[k]) + # check that info cache is correct for root container + info = get_container_info(resp.request.environ, self.app) + self.assertEqual(headers_to_container_info(info_hdrs), info) + + def test_GET_sharded_container(self): + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + expected_objects = all_objects + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': num_all_objects - 1, + 'X-Container-Bytes-Used': size_all_objects - 1, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # GET all objects + # include some failed responses + mock_responses = [ + # status, body, headers + (404, '', {}), + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET all objects - sharding, final shard range points back to root + root_range = ShardRange('a/c', Timestamp.now(), 'pie', '') + mock_responses = [ + # status, body, headers + (200, sr_dicts[:2] + [dict(root_range)], root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], root_resp_hdrs) + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (root_range.name, {'X-Backend-Record-Type': 'object'}, + dict(marker='p', end_marker='', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET all objects in reverse + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]), + (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[0]), + ] + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie', reverse='true', + limit=str(limit), states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='q', end_marker='ham', states='listing', + reverse='true', limit=str(limit - len(sr_objs[2])))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='i', end_marker='', states='listing', reverse='true', + limit=str(limit - len(sr_objs[2] + sr_objs[1])))), # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(expected_objects)), + expected_requests, query_string='?reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # GET with limit param + limit = len(sr_objs[0]) + len(sr_objs[1]) + 1 + expected_objects = all_objects[:limit] + mock_responses = [ + (404, '', {}), + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2][:1], shard_resp_hdrs[2]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(limit=str(limit), states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(limit=str(limit), states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?limit=%s' % limit) + self.check_response(resp, root_resp_hdrs) + + # GET with marker + marker = sr_objs[1][2]['name'] + first_included = len(sr_objs[0]) + 2 + limit = CONTAINER_LISTING_LIMIT + expected_objects = all_objects[first_included:] + mock_responses = [ + (404, '', {}), + (200, sr_dicts[1:], root_shard_resp_hdrs), + (404, '', {}), + (200, sr_objs[1][2:], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=marker, states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=marker, states='listing')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404 + dict(marker=marker, end_marker='pie\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=marker, end_marker='pie\x00', states='listing', + limit=str(limit))), + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[1][2:])))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s' % marker) + self.check_response(resp, root_resp_hdrs) + + # GET with end marker + end_marker = sr_objs[1][6]['name'] + first_excluded = len(sr_objs[0]) + 6 + expected_objects = all_objects[:first_excluded] + mock_responses = [ + (404, '', {}), + (200, sr_dicts[:2], root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (404, '', {}), + (200, sr_objs[1][:6], shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(end_marker=end_marker, states='listing')), # 404 + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(end_marker=end_marker, states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 404 + dict(marker='h', end_marker=end_marker, states='listing', + limit=str(limit - len(sr_objs[0])))), + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker='h', end_marker=end_marker, states='listing', + limit=str(limit - len(sr_objs[0])))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?end_marker=%s' % end_marker) + self.check_response(resp, root_resp_hdrs) + + # marker and end_marker and limit + limit = 2 + expected_objects = all_objects[first_included:first_excluded] + mock_responses = [ + (200, sr_dicts[1:2], root_shard_resp_hdrs), + (200, sr_objs[1][2:6], shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', limit=str(limit), + marker=marker, end_marker=end_marker)), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=marker, end_marker=end_marker, states='listing', + limit=str(limit))), + ] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s&end_marker=%s&limit=%s' + % (marker, end_marker, limit)) + self.check_response(resp, root_resp_hdrs) + + # reverse with marker, end_marker + expected_objects.reverse() + mock_responses = [ + (200, sr_dicts[1:2], root_shard_resp_hdrs), + (200, list(reversed(sr_objs[1][2:6])), shard_resp_hdrs[1]) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(marker=end_marker, reverse='true', end_marker=marker, + limit=str(limit), states='listing',)), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, # 200 + dict(marker=end_marker, end_marker=marker, states='listing', + limit=str(limit), reverse='true')), + ] + self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?marker=%s&end_marker=%s&limit=%s&reverse=true' + % (end_marker, marker, limit), reverse=True) + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_with_delimiter(self): + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': 2, + 'X-Container-Bytes-Used': 4, + 'X-Backend-Storage-Policy-Index': 0} + + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': 6, + 'X-Container-Bytes-Used': 12, + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + sr_0_obj = {'name': 'apple', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + sr_2_obj = {'name': 'pumpkin', + 'bytes': 1, + 'hash': 'hash', + 'content_type': 'text/plain', + 'deleted': 0, + 'last_modified': next(self.ts_iter).isoformat} + subdir = {'subdir': 'ha/'} + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, [sr_0_obj, subdir], shard_resp_hdrs), + (200, [], shard_resp_hdrs), + (200, [sr_2_obj], shard_resp_hdrs) + ] + expected_requests = [ + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', delimiter='/')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', limit=str(limit), + states='listing', delimiter='/')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='pie\x00', states='listing', + limit=str(limit - 2), delimiter='/')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='ha/', end_marker='', states='listing', + limit=str(limit - 2), delimiter='/')) # 200 + ] + + expected_objects = [sr_0_obj, subdir, sr_2_obj] + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?delimiter=/') + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_overlapping_shards(self): + # verify ordered listing even if unexpected overlapping shard ranges + shard_bounds = (('', 'ham', ShardRange.CLEAVED), + ('', 'pie', ShardRange.ACTIVE), + ('lemon', '', ShardRange.ACTIVE)) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper, + state=state) + for lower, upper, state in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + # pretend root object stats are not yet updated + 'X-Container-Object-Count': num_all_objects - 1, + 'X-Container-Bytes-Used': size_all_objects - 1, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # forwards listing + + # expect subset of second shard range + objs_1 = [o for o in sr_objs[1] if o['name'] > sr_objs[0][-1]['name']] + # expect subset of third shard range + objs_2 = [o for o in sr_objs[2] if o['name'] > sr_objs[1][-1]['name']] + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, objs_1, shard_resp_hdrs[1]), + (200, objs_2, shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + objs_1)))) # 200 + ] + + expected_objects = sr_objs[0] + objs_1 + objs_2 + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + # reverse listing + + # expect subset of third shard range + objs_0 = [o for o in sr_objs[0] if o['name'] < sr_objs[1][0]['name']] + # expect subset of second shard range + objs_1 = [o for o in sr_objs[1] if o['name'] < sr_objs[2][0]['name']] + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, list(reversed(sr_objs[2])), shard_resp_hdrs[2]), + (200, list(reversed(objs_1)), shard_resp_hdrs[1]), + (200, list(reversed(objs_0)), shard_resp_hdrs[0]), + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='lemon', states='listing', + limit=str(limit), + reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='m', end_marker='', reverse='true', states='listing', + limit=str(limit - len(sr_objs[2])))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='A', end_marker='', reverse='true', states='listing', + limit=str(limit - len(sr_objs[2] + objs_1)))) # 200 + ] + + expected_objects = list(reversed(objs_0 + objs_1 + sr_objs[2])) + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests, + query_string='?reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs, + expected_objects=expected_objects) + + def test_GET_sharded_container_gap_in_shards(self): + # verify ordered listing even if unexpected gap between shard ranges + shard_bounds = (('', 'ham'), ('onion', 'pie'), ('rhubarb', '')) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, all_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_empty_shard(self): + # verify ordered listing when a shard is empty + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + empty_shard_resp_hdrs = { + 'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': 0, + 'X-Container-Bytes-Used': 0, + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + + # empty first shard range + all_objects = sr_objs[1] + sr_objs[2] + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': len(all_objects), + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, sr_objs[1], shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker does not advance until an object is in the listing + limit = CONTAINER_LISTING_LIMIT + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, sr_objs[1] + sr_objs[2], expected_requests) + self.check_response(resp, root_resp_hdrs) + + # empty last shard range, reverse + all_objects = sr_objs[0] + sr_objs[1] + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': len(all_objects), + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts)), root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, list(reversed(sr_objs[1])), shard_resp_hdrs[1]), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[0]), + ] + limit = CONTAINER_LISTING_LIMIT + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', reverse='true')), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='pie', states='listing', + limit=str(limit), reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham', states='listing', + limit=str(limit), reverse='true')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker=sr_objs[1][0]['name'], end_marker='', + states='listing', reverse='true', + limit=str(limit - len(sr_objs[1])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(sr_objs[0] + sr_objs[1])), + expected_requests, query_string='?reverse=true', reverse=True) + self.check_response(resp, root_resp_hdrs) + + # empty second shard range + all_objects = sr_objs[0] + sr_objs[2] + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': len(all_objects), + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, [], empty_shard_resp_hdrs), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + limit = CONTAINER_LISTING_LIMIT + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0])))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, sr_objs[0] + sr_objs[2], expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + # marker in empty second range + mock_responses = [ + # status, body, headers + (200, sr_dicts[1:], root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker unchanged when getting from third range + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', marker='koolaid')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='pie\x00', states='listing', + limit=str(limit))), # 200 + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='', states='listing', + limit=str(limit))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, sr_objs[2], expected_requests, + query_string='?marker=koolaid') + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + # marker in empty second range, reverse + mock_responses = [ + # status, body, headers + (200, list(reversed(sr_dicts[:2])), root_shard_resp_hdrs), + (200, [], empty_shard_resp_hdrs), + (200, list(reversed(sr_objs[0])), shard_resp_hdrs[2]) + ] + # NB marker unchanged when getting from first range + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing', marker='koolaid', reverse='true')), # 200 + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='ham', reverse='true', + states='listing', limit=str(limit))), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='koolaid', end_marker='', reverse='true', + states='listing', limit=str(limit))) # 200 + ] + + resp = self._check_GET_shard_listing( + mock_responses, list(reversed(sr_objs[0])), expected_requests, + query_string='?marker=koolaid&reverse=true', reverse=True) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def _check_GET_sharded_container_shard_error(self, error): + # verify ordered listing when a shard is empty + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('lemon', '')) + shard_ranges = [ + ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + # empty second shard range + sr_objs[1] = [] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0])] + \ + [(error, [], {})] * 2 * self.CONTAINER_REPLICAS + \ + [(200, sr_objs[2], shard_resp_hdrs[2])] + + # NB marker always advances to last object name + expected_requests = [ + # path, headers, params + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit)))] \ + + [(shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))) + ] * 2 * self.CONTAINER_REPLICAS \ + + [(shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1]))))] + + resp = self._check_GET_shard_listing( + mock_responses, all_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + + def test_GET_sharded_container_shard_errors(self): + self._check_GET_sharded_container_shard_error(404) + self._check_GET_sharded_container_shard_error(500) + + def test_GET_sharded_container_sharding_shard(self): + # one shard is in process of sharding + shard_bounds = (('', 'ham'), ('ham', 'pie'), ('pie', '')) + shard_ranges = [ + ShardRange('.shards_a/c_' + upper, Timestamp.now(), lower, upper) + for lower, upper in shard_bounds] + sr_dicts = [dict(sr) for sr in shard_ranges] + sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges] + shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(3)] + shard_1_shard_resp_hdrs = dict(shard_resp_hdrs[1]) + shard_1_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + # second shard is sharding and has cleaved two out of three sub shards + shard_resp_hdrs[1]['X-Backend-Sharding-State'] = 'sharding' + sub_shard_bounds = (('ham', 'juice'), ('juice', 'lemon')) + sub_shard_ranges = [ + ShardRange('a/c_sub_' + upper, Timestamp.now(), lower, upper) + for lower, upper in sub_shard_bounds] + sub_sr_dicts = [dict(sr) for sr in sub_shard_ranges] + sub_sr_objs = [self._make_shard_objects(sr) for sr in sub_shard_ranges] + sub_shard_resp_hdrs = [ + {'X-Backend-Sharding-State': 'unsharded', + 'X-Container-Object-Count': len(sub_sr_objs[i]), + 'X-Container-Bytes-Used': + sum([obj['bytes'] for obj in sub_sr_objs[i]]), + 'X-Container-Meta-Flavour': 'flavour%d' % i, + 'X-Backend-Storage-Policy-Index': 0} + for i in range(2)] + + all_objects = [] + for objects in sr_objs: + all_objects.extend(objects) + size_all_objects = sum([obj['bytes'] for obj in all_objects]) + num_all_objects = len(all_objects) + limit = CONTAINER_LISTING_LIMIT + root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded', + 'X-Container-Object-Count': num_all_objects, + 'X-Container-Bytes-Used': size_all_objects, + 'X-Container-Meta-Flavour': 'peach', + 'X-Backend-Storage-Policy-Index': 0} + root_shard_resp_hdrs = dict(root_resp_hdrs) + root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard' + + mock_responses = [ + # status, body, headers + (200, sr_dicts, root_shard_resp_hdrs), + (200, sr_objs[0], shard_resp_hdrs[0]), + (200, sub_sr_dicts + [sr_dicts[1]], shard_1_shard_resp_hdrs), + (200, sub_sr_objs[0], sub_shard_resp_hdrs[0]), + (200, sub_sr_objs[1], sub_shard_resp_hdrs[1]), + (200, sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):], + shard_resp_hdrs[1]), + (200, sr_objs[2], shard_resp_hdrs[2]) + ] + # NB marker always advances to last object name + expected_requests = [ + # get root shard ranges + ('a/c', {'X-Backend-Record-Type': 'auto'}, + dict(states='listing')), # 200 + # get first shard objects + (shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='', end_marker='ham\x00', states='listing', + limit=str(limit))), # 200 + # get second shard sub-shard ranges + (shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='pie\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + # get first sub-shard objects + (sub_shard_ranges[0].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='h', end_marker='juice\x00', states='listing', + limit=str(limit - len(sr_objs[0])))), + # get second sub-shard objects + (sub_shard_ranges[1].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='j', end_marker='lemon\x00', states='listing', + limit=str(limit - len(sr_objs[0] + sub_sr_objs[0])))), + # get remainder of first shard objects + (shard_ranges[1].name, {'X-Backend-Record-Type': 'object'}, + dict(marker='l', end_marker='pie\x00', + limit=str(limit - len(sr_objs[0] + sub_sr_objs[0] + + sub_sr_objs[1])))), # 200 + # get third shard objects + (shard_ranges[2].name, {'X-Backend-Record-Type': 'auto'}, + dict(marker='p', end_marker='', states='listing', + limit=str(limit - len(sr_objs[0] + sr_objs[1])))) # 200 + ] + expected_objects = ( + sr_objs[0] + sub_sr_objs[0] + sub_sr_objs[1] + + sr_objs[1][len(sub_sr_objs[0] + sub_sr_objs[1]):] + sr_objs[2]) + resp = self._check_GET_shard_listing( + mock_responses, expected_objects, expected_requests) + # root object count will overridden by actual length of listing + self.check_response(resp, root_resp_hdrs) + @patch_policies( [StoragePolicy(0, 'zero', True, object_ring=FakeRing(replicas=4))]) diff -Nru swift-2.17.0/test/unit/proxy/test_server.py swift-2.18.0/test/unit/proxy/test_server.py --- swift-2.17.0/test/unit/proxy/test_server.py 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test/unit/proxy/test_server.py 2018-05-30 10:17:02.000000000 +0000 @@ -47,7 +47,7 @@ from six import BytesIO from six import StringIO from six.moves import range -from six.moves.urllib.parse import quote +from six.moves.urllib.parse import quote, parse_qsl from test import listen_zero from test.unit import ( @@ -3222,95 +3222,197 @@ # reset the router post patch_policies self.app.obj_controller_router = proxy_server.ObjectControllerRouter() self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes - backend_requests = [] - def capture_requests(ip, port, method, path, headers, *args, - **kwargs): - backend_requests.append((method, path, headers)) - - req = Request.blank('/v1/a/c/o', {}, method='POST', - headers={'X-Object-Meta-Color': 'Blue', - 'Content-Type': 'text/plain'}) + def do_test(resp_headers): + self.app.memcache.store = {} + backend_requests = [] - # we want the container_info response to says a policy index of 1 - resp_headers = {'X-Backend-Storage-Policy-Index': 1} - with mocked_http_conn( - 200, 200, 202, 202, 202, - headers=resp_headers, give_connect=capture_requests - ) as fake_conn: - resp = req.get_response(self.app) - self.assertRaises(StopIteration, fake_conn.code_iter.next) + def capture_requests(ip, port, method, path, headers, *args, + **kwargs): + backend_requests.append((method, path, headers)) - self.assertEqual(resp.status_int, 202) - self.assertEqual(len(backend_requests), 5) + req = Request.blank('/v1/a/c/o', {}, method='POST', + headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain'}) - def check_request(req, method, path, headers=None): - req_method, req_path, req_headers = req - self.assertEqual(method, req_method) - # caller can ignore leading path parts - self.assertTrue(req_path.endswith(path), - 'expected path to end with %s, it was %s' % ( - path, req_path)) - headers = headers or {} - # caller can ignore some headers - for k, v in headers.items(): - self.assertEqual(req_headers[k], v) - account_request = backend_requests.pop(0) - check_request(account_request, method='HEAD', path='/sda/0/a') - container_request = backend_requests.pop(0) - check_request(container_request, method='HEAD', path='/sda/0/a/c') - # make sure backend requests included expected container headers - container_headers = {} - for request in backend_requests: - req_headers = request[2] - device = req_headers['x-container-device'] - host = req_headers['x-container-host'] - container_headers[device] = host - expectations = { - 'method': 'POST', - 'path': '/0/a/c/o', - 'headers': { - 'X-Container-Partition': '0', - 'Connection': 'close', - 'User-Agent': 'proxy-server %s' % os.getpid(), - 'Host': 'localhost:80', - 'Referer': 'POST http://localhost/v1/a/c/o', - 'X-Object-Meta-Color': 'Blue', - 'X-Backend-Storage-Policy-Index': '1' - }, - } - check_request(request, **expectations) + # we want the container_info response to says a policy index of 1 + with mocked_http_conn( + 200, 200, 202, 202, 202, + headers=resp_headers, give_connect=capture_requests + ) as fake_conn: + resp = req.get_response(self.app) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + + self.assertEqual(resp.status_int, 202) + self.assertEqual(len(backend_requests), 5) + + def check_request(req, method, path, headers=None): + req_method, req_path, req_headers = req + self.assertEqual(method, req_method) + # caller can ignore leading path parts + self.assertTrue(req_path.endswith(path), + 'expected path to end with %s, it was %s' % ( + path, req_path)) + headers = headers or {} + # caller can ignore some headers + for k, v in headers.items(): + self.assertEqual(req_headers[k], v) + self.assertNotIn('X-Backend-Container-Path', req_headers) + + account_request = backend_requests.pop(0) + check_request(account_request, method='HEAD', path='/sda/0/a') + container_request = backend_requests.pop(0) + check_request(container_request, method='HEAD', path='/sda/0/a/c') + # make sure backend requests included expected container headers + container_headers = {} + for request in backend_requests: + req_headers = request[2] + device = req_headers['x-container-device'] + host = req_headers['x-container-host'] + container_headers[device] = host + expectations = { + 'method': 'POST', + 'path': '/0/a/c/o', + 'headers': { + 'X-Container-Partition': '0', + 'Connection': 'close', + 'User-Agent': 'proxy-server %s' % os.getpid(), + 'Host': 'localhost:80', + 'Referer': 'POST http://localhost/v1/a/c/o', + 'X-Object-Meta-Color': 'Blue', + 'X-Backend-Storage-Policy-Index': '1' + }, + } + check_request(request, **expectations) - expected = {} - for i, device in enumerate(['sda', 'sdb', 'sdc']): - expected[device] = '10.0.0.%d:100%d' % (i, i) - self.assertEqual(container_headers, expected) + expected = {} + for i, device in enumerate(['sda', 'sdb', 'sdc']): + expected[device] = '10.0.0.%d:100%d' % (i, i) + self.assertEqual(container_headers, expected) - # and again with policy override - self.app.memcache.store = {} - backend_requests = [] - req = Request.blank('/v1/a/c/o', {}, method='POST', - headers={'X-Object-Meta-Color': 'Blue', - 'Content-Type': 'text/plain', - 'X-Backend-Storage-Policy-Index': 0}) - with mocked_http_conn( - 200, 200, 202, 202, 202, - headers=resp_headers, give_connect=capture_requests - ) as fake_conn: - resp = req.get_response(self.app) - self.assertRaises(StopIteration, fake_conn.code_iter.next) - self.assertEqual(resp.status_int, 202) - self.assertEqual(len(backend_requests), 5) - for request in backend_requests[2:]: - expectations = { - 'method': 'POST', - 'path': '/0/a/c/o', # ignore device bit - 'headers': { - 'X-Object-Meta-Color': 'Blue', - 'X-Backend-Storage-Policy-Index': '0', + # and again with policy override + self.app.memcache.store = {} + backend_requests = [] + req = Request.blank('/v1/a/c/o', {}, method='POST', + headers={'X-Object-Meta-Color': 'Blue', + 'Content-Type': 'text/plain', + 'X-Backend-Storage-Policy-Index': 0}) + with mocked_http_conn( + 200, 200, 202, 202, 202, + headers=resp_headers, give_connect=capture_requests + ) as fake_conn: + resp = req.get_response(self.app) + self.assertRaises(StopIteration, fake_conn.code_iter.next) + self.assertEqual(resp.status_int, 202) + self.assertEqual(len(backend_requests), 5) + for request in backend_requests[2:]: + expectations = { + 'method': 'POST', + 'path': '/0/a/c/o', # ignore device bit + 'headers': { + 'X-Object-Meta-Color': 'Blue', + 'X-Backend-Storage-Policy-Index': '0', + } } - } - check_request(request, **expectations) + check_request(request, **expectations) + + resp_headers = {'X-Backend-Storage-Policy-Index': 1} + do_test(resp_headers) + resp_headers['X-Backend-Sharding-State'] = 'unsharded' + do_test(resp_headers) + + @patch_policies([ + StoragePolicy(0, 'zero', is_default=True, object_ring=FakeRing()), + StoragePolicy(1, 'one', object_ring=FakeRing()), + ]) + def test_backend_headers_update_shard_container(self): + # verify that when container is sharded the backend container update is + # directed to the shard container + # reset the router post patch_policies + self.app.obj_controller_router = proxy_server.ObjectControllerRouter() + self.app.sort_nodes = lambda nodes, *args, **kwargs: nodes + + def do_test(method, sharding_state): + self.app.memcache.store = {} + req = Request.blank('/v1/a/c/o', {}, method=method, body='', + headers={'Content-Type': 'text/plain'}) + + # we want the container_info response to say policy index of 1 and + # sharding state + # acc HEAD, cont HEAD, cont shard GET, obj POSTs + status_codes = (200, 200, 200, 202, 202, 202) + resp_headers = {'X-Backend-Storage-Policy-Index': 1, + 'x-backend-sharding-state': sharding_state, + 'X-Backend-Record-Type': 'shard'} + shard_range = utils.ShardRange( + '.shards_a/c_shard', utils.Timestamp.now(), 'l', 'u') + body = json.dumps([dict(shard_range)]) + with mocked_http_conn(*status_codes, headers=resp_headers, + body=body) as fake_conn: + resp = req.get_response(self.app) + + self.assertEqual(resp.status_int, 202) + backend_requests = fake_conn.requests + + def check_request(req, method, path, headers=None, params=None): + self.assertEqual(method, req['method']) + # caller can ignore leading path parts + self.assertTrue(req['path'].endswith(path), + 'expected path to end with %s, it was %s' % ( + path, req['path'])) + headers = headers or {} + # caller can ignore some headers + for k, v in headers.items(): + self.assertEqual(req['headers'][k], v, + 'Expected %s but got %s for key %s' % + (v, req['headers'][k], k)) + params = params or {} + req_params = dict(parse_qsl(req['qs'])) if req['qs'] else {} + for k, v in params.items(): + self.assertEqual(req_params[k], v, + 'Expected %s but got %s for key %s' % + (v, req_params[k], k)) + + account_request = backend_requests[0] + check_request(account_request, method='HEAD', path='/sda/0/a') + container_request = backend_requests[1] + check_request(container_request, method='HEAD', path='/sda/0/a/c') + container_request_shard = backend_requests[2] + check_request( + container_request_shard, method='GET', path='/sda/0/a/c', + params={'includes': 'o'}) + + # make sure backend requests included expected container headers + container_headers = {} + + for request in backend_requests[3:]: + req_headers = request['headers'] + device = req_headers['x-container-device'] + container_headers[device] = req_headers['x-container-host'] + expectations = { + 'method': method, + 'path': '/0/a/c/o', + 'headers': { + 'X-Container-Partition': '0', + 'Host': 'localhost:80', + 'Referer': '%s http://localhost/v1/a/c/o' % method, + 'X-Backend-Storage-Policy-Index': '1', + 'X-Backend-Container-Path': shard_range.name + }, + } + check_request(request, **expectations) + + expected = {} + for i, device in enumerate(['sda', 'sdb', 'sdc']): + expected[device] = '10.0.0.%d:100%d' % (i, i) + self.assertEqual(container_headers, expected) + + do_test('POST', 'sharding') + do_test('POST', 'sharded') + do_test('DELETE', 'sharding') + do_test('DELETE', 'sharded') + do_test('PUT', 'sharding') + do_test('PUT', 'sharded') def test_DELETE(self): with save_globals(): @@ -8356,6 +8458,29 @@ self.assertEqual(res.content_length, 0) self.assertNotIn('transfer-encoding', res.headers) + def test_GET_account_non_existent(self): + with save_globals(): + set_http_connect(404, 404, 404) + controller = proxy_server.ContainerController(self.app, 'a', 'c') + req = Request.blank('/v1/a/c') + self.app.update_request(req) + res = controller.GET(req) + self.assertEqual(res.status_int, 404) + self.assertNotIn('container/a/c', res.environ['swift.infocache']) + + def test_GET_auto_create_prefix_account_non_existent(self): + with save_globals(): + set_http_connect(404, 404, 404, 204, 204, 204) + controller = proxy_server.ContainerController(self.app, '.a', 'c') + req = Request.blank('/v1/a/c') + self.app.update_request(req) + res = controller.GET(req) + self.assertEqual(res.status_int, 204) + ic = res.environ['swift.infocache'] + self.assertEqual(ic['container/.a/c']['status'], 204) + self.assertEqual(res.content_length, 0) + self.assertNotIn('transfer-encoding', res.headers) + def test_GET_calls_authorize(self): called = [False] diff -Nru swift-2.17.0/test-requirements.txt swift-2.18.0/test-requirements.txt --- swift-2.17.0/test-requirements.txt 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/test-requirements.txt 2018-05-30 10:17:02.000000000 +0000 @@ -8,14 +8,16 @@ nose # LGPL nosexcover # BSD nosehtmloutput>=0.0.3 # Apache-2.0 -sphinx>=1.6.2 # BSD -openstackdocstheme>=1.11.0 # Apache-2.0 -os-api-ref>=1.0.0 # Apache-2.0 os-testr>=0.8.0 # Apache-2.0 mock>=2.0 # BSD python-swiftclient python-keystoneclient!=2.1.0,>=2.0.0 # Apache-2.0 reno>=1.8.0 # Apache-2.0 +python-openstackclient +boto +requests-mock>=1.2.0 # Apache-2.0 +fixtures>=3.0.0 # Apache-2.0/BSD +keystonemiddleware>=4.17.0 # Apache-2.0 # Security checks bandit>=1.1.0 # Apache-2.0 diff -Nru swift-2.17.0/tools/test-setup.sh swift-2.18.0/tools/test-setup.sh --- swift-2.17.0/tools/test-setup.sh 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/tools/test-setup.sh 2018-05-30 10:17:02.000000000 +0000 @@ -25,6 +25,6 @@ if is_rhel7; then # Install CentOS OpenStack repos so that we have access to some extra # packages. - sudo yum install -y centos-release-openstack-pike + sudo yum install -y centos-release-openstack-queens sudo yum install -y liberasurecode-devel fi diff -Nru swift-2.17.0/tox.ini swift-2.18.0/tox.ini --- swift-2.17.0/tox.ini 2018-02-05 14:00:48.000000000 +0000 +++ swift-2.18.0/tox.ini 2018-05-30 10:17:02.000000000 +0000 @@ -26,16 +26,28 @@ NOSE_COVER_HTML=1 NOSE_COVER_HTML_DIR={toxinidir}/cover -[testenv:py34] +[testenv:py35] commands = - nosetests \ + nosetests {posargs:\ + test/unit/cli/test_dispersion_report.py \ + test/unit/cli/test_form_signature.py \ + test/unit/cli/test_info.py \ + test/unit/cli/test_recon.py \ + test/unit/cli/test_relinker.py \ + test/unit/cli/test_ring_builder_analyzer.py \ + test/unit/cli/test_ringbuilder.py \ + test/unit/common/middleware/test_gatekeeper.py \ + test/unit/common/ring \ + test/unit/common/test_daemon.py \ test/unit/common/test_exceptions.py \ test/unit/common/test_header_key_dict.py \ + test/unit/common/test_linkat.py \ + test/unit/common/test_memcached.py \ test/unit/common/test_manager.py \ - test/unit/common/test_splice.py - -[testenv:py35] -commands = {[testenv:py34]commands} + test/unit/common/test_splice.py \ + test/unit/common/test_storage_policy.py \ + test/unit/common/test_utils.py \ + test/unit/common/test_wsgi.py} [testenv:pep8] basepython = python2.7 @@ -64,25 +76,34 @@ setenv = SWIFT_TEST_IN_PROCESS=1 SWIFT_TEST_IN_PROCESS_CONF_LOADER=encryption +[testenv:func-domain-remap-staticweb] +commands = ./.functests {posargs} +setenv = SWIFT_TEST_IN_PROCESS=1 + SWIFT_TEST_IN_PROCESS_CONF_LOADER=domain_remap_staticweb + [testenv:func-ec] commands = ./.functests {posargs} setenv = SWIFT_TEST_IN_PROCESS=1 SWIFT_TEST_IN_PROCESS_CONF_LOADER=ec +[testenv:func-s3api] +commands = ./.functests {posargs} +setenv = SWIFT_TEST_IN_PROCESS=1 + SWIFT_TEST_IN_PROCESS_CONF_LOADER=s3api + [testenv:venv] commands = {posargs} [testenv:docs] basepython = python2.7 -commands = python setup.py build_sphinx +deps = -r{toxinidir}/doc/requirements.txt +commands = sphinx-build -W -b html doc/source doc/build/html [testenv:api-ref] # This environment is called from CI scripts to test and publish # the API Ref to developer.openstack.org. -# we do not use -W here because we are doing some slightly tricky -# things to build a single page document, and as such, we are ok -# ignoring the duplicate stanzas warning. basepython = python2.7 +deps = -r{toxinidir}/doc/requirements.txt commands = rm -rf api-ref/build sphinx-build -W -b html -d api-ref/build/doctrees api-ref/source api-ref/build/html @@ -114,4 +135,5 @@ commands = bindep test [testenv:releasenotes] +deps = -r{toxinidir}/doc/requirements.txt commands = sphinx-build -a -W -E -d releasenotes/build/doctrees -b html releasenotes/source releasenotes/build/html diff -Nru swift-2.17.0/.zuul.yaml swift-2.18.0/.zuul.yaml --- swift-2.17.0/.zuul.yaml 2018-02-05 14:00:47.000000000 +0000 +++ swift-2.18.0/.zuul.yaml 2018-05-30 10:17:02.000000000 +0000 @@ -9,7 +9,7 @@ timeout: 2400 vars: tox_environment: - TMPDIR: "{{ ansible_env.HOME }}/xfstmp" + TMPDIR: '{{ ansible_env.HOME }}/xfstmp' - job: name: swift-tox-py27 @@ -93,22 +93,65 @@ parent: swift-tox-func-ec nodeset: centos-7 +- job: + name: swift-tox-func-domain-remap-staticweb + parent: swift-tox-base + description: | + Run functional tests for swift under cPython version 2.7. + + Uses tox with the ``func-domain-remap-staticweb`` environment. + It sets TMPDIR to an XFS mount point created via + tools/test-setup.sh. + vars: + tox_envlist: func-domain-remap-staticweb + +- job: + name: swift-tox-func-s3api + parent: swift-tox-base + description: | + Run functional tests for swift under cPython version 2.7. + + Uses tox with the ``func-s3api`` environment. + It sets TMPDIR to an XFS mount point created via + tools/test-setup.sh. + vars: + tox_envlist: func-s3api + +- job: + name: swift-probetests-centos-7 + parent: unittests + nodeset: centos-7 + voting: false + description: | + Setup a SAIO dev environment and run Swift's probe tests + timeout: 3600 + pre-run: + - playbooks/saio_single_node_setup/install_dependencies.yaml + - playbooks/saio_single_node_setup/setup_saio.yaml + - playbooks/saio_single_node_setup/make_rings.yaml + run: playbooks/probetests/run.yaml + post-run: playbooks/probetests/post.yaml + - project: - name: openstack/swift check: jobs: - swift-tox-py27 - swift-tox-py35 - swift-tox-func - swift-tox-func-encryption + - swift-tox-func-domain-remap-staticweb - swift-tox-func-ec + - swift-tox-func-s3api + - swift-probetests-centos-7 gate: jobs: - swift-tox-py27 - swift-tox-py35 - swift-tox-func - swift-tox-func-encryption + - swift-tox-func-domain-remap-staticweb - swift-tox-func-ec + - swift-tox-func-s3api experimental: jobs: - swift-tox-py27-centos-7