diff -Nru ceph-10.2.7/AUTHORS ceph-10.2.9/AUTHORS --- ceph-10.2.7/AUTHORS 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/AUTHORS 2017-07-13 13:07:11.000000000 +0000 @@ -119,6 +119,7 @@ Clement Lebrun Colin Mattson Colin P. McCabe +craigchi cy.l@inwinstack.com Dan Chai Dan Horák @@ -169,6 +170,7 @@ Etienne Menguy Evan Felix Evgeniy Firsov +Fabian Grünbichler Fabio Alessandro Locati fangdong Federico Gimenez @@ -215,11 +217,13 @@ Harry Harrington Hazem Amara Hector Martin +Henrik Korkuc Henry C Chang Henry Chang Herb Shiu Hervé Rousseau Holger Macht +hrchu Huamin Chen Huang Jun Ian Holsman @@ -341,6 +345,7 @@ Liam Monahan Li Peng Li Tianqing +liuchang0812 Liu Peiyan LiuYang Li Wang @@ -354,6 +359,7 @@ Lukasz Jagiello Luo Kexue Luo Runbing +lu.shasha Lu Shi Ma Jianpeng Marcel Lauhoff @@ -383,6 +389,7 @@ Michael Riederer Michael Rodriguez Michal Jarzabek +Michal Koutný Michel Normand Mike Kelly Mike Lundy @@ -513,6 +520,7 @@ Simone Gotti Simon Leinen SirishaGuduru +snakeAngel2015 Somnath Roy Sondra.Menthers Song Baisen @@ -622,6 +630,7 @@ Yannick Atchy Dalama Yan, Zheng Yan, Zheng +yaoning Yazen Ghannam Yehua Chen Yehuda Sadeh @@ -635,6 +644,7 @@ You Ji Yuan Zhou Yunchuan Wen +YunfeiGuan Yuri Weinstein Yuri Weinstein Yuri Weinstein @@ -646,6 +656,7 @@ Zengran Zhang Zeqiang Zhuang Zhang Huan +Zhang Shaowen zhangweibing Zhao Chao Zhao Junwang diff -Nru ceph-10.2.7/ceph.spec ceph-10.2.9/ceph.spec --- ceph-10.2.7/ceph.spec 2017-04-10 11:45:43.000000000 +0000 +++ ceph-10.2.9/ceph.spec 2017-07-13 13:06:57.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -56,7 +56,7 @@ # common ################################################################################# Name: ceph -Version: 10.2.7 +Version: 10.2.9 Release: 0%{?dist} Epoch: 1 Summary: User space components of the Ceph file system @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/ceph.spec.in ceph-10.2.9/ceph.spec.in --- ceph-10.2.7/ceph.spec.in 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/ceph.spec.in 2017-07-13 13:05:36.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/ChangeLog ceph-10.2.9/ChangeLog --- ceph-10.2.7/ChangeLog 2017-04-10 11:45:57.000000000 +0000 +++ ceph-10.2.9/ChangeLog 2017-07-13 13:07:12.000000000 +0000 @@ -1,5 +1,181 @@ -50e863e (HEAD, tag: v10.2.7, origin/jewel) 10.2.7 +2ee413f (HEAD, tag: v10.2.9, origin/jewel) 10.2.9 +fef1c87 Revert "osdc/Journaler: make header write_pos align to boundary of flushed entry" +3f89971 Revert "osdc/Journaler: avoid executing on_safe contexts prematurely" +6b479c2 doc: zero PendingReleaseNotes in preparation for v10.2.9 +55de93f doc: clarify status of jewel PendingReleaseNotes +f5b1f1f (tag: v10.2.8) 10.2.8 +a372b4e Fixed upgrade sequence to 10.2.0 -> 10.2.7 -> latest -x (10.2.8) +55eeaad tests: run upgrade/client-upgrade on latest CentOS 7.3 +9d3110c tests: upgrade/hammer-x/v0-94-6-mon-overload: tweak packages list +6a64f89 tests: upgrade/hammer-x: new v0-94-6-mon-overload subsuite +d2d4b72 qa: enable quotas for pre-luminous quota tests +682b4d7 tests: drop upgrade/hammer-jewel-x +4028774 tests: swift.py: tweak imports +8e0e4a0 tests: upgrade/hammer-x/stress-split: tweak packages list +a86ce72 tests: swift.py: clone the ceph-jewel branch +7b58ac9 tests: move swift.py task to qa/tasks +1c0c909 qa/suites/upgrade/hammer-x: set "sortbitwise" for jewel clusters +de76fdb qa/workunits/rados/test-upgrade-*: whitelist tests the right way +d43e19d jewel: osd: pg_pool_t::encode(): be compatible with Hammer <= 0.94.6 +1af6781 qa/workunits/rados/test-upgrade-*: whitelist tests for master +a744340 mds: issue new caps when sending reply to client +d33b30c ceph-disk: do not setup_statedir on trigger +f46ccf2 tests: rados: sleep before ceph tell osd.0 flush_pg_stats after restart +ab78cd0 rgw: fix crash when listing objects via swift +cda721b client: update the 'approaching max_size' code +439f391 mds: limit client writable range increment +06cf9f3 osdc/Journaler: avoid executing on_safe contexts prematurely +2e299b5 osdc/Journaler: make header write_pos align to boundary of flushed entry +8ac0e5c pybind: fix cephfs.OSError initialization +09b9410 pybind: fix open flags calculation +aa0cd46 osd: Move scrub sleep timer to osdservice +c47bd05 osd: Implement asynchronous scrub sleep +e6daee8 Client.cc: adjust Client::_getattr calls +a2c7a22 qa/cephfs: use getfattr/setfattr helpers +12aa35a tasks/cephfs: fix race while mounting +a7b6992 qa: add test for reading quotas from different clients +8b8ee39 client: _getattr on quota_root before using in statfs +dd7d59a client: getattr before read on ceph.* xattrs +8dd93ca osd: Object level shard errors are tracked and used if no auth available +59bd671 rgw:fix memory leaks +b429fa1 cls/rgw: list_plain_entries() stops before bi_log entries +1f895c2 qa/workunits/rados/test-upgrade-*: whitelist tests for master +aa99558 rgw: rest conn functions cleanup, only append zonegroup if not empty +5d90798 rgw: rest and http client code to use param vectors +81e35b9 qa/suites/rados/singleton-nomsgr/*: set sortbitwise after upgrade +f2814e4 mon/OSDMonitor: fixup sortbitwise flag warning +0261718 mon: remove config option to disable no sortbitwise warning +bb79663 fs: normalize file open flags internally used by cephfs +4ceaa7c msg/simple/Pipe: manual backport of fix in PR#14795 +99c65bb rgw: new rest api to retrieve object layout +33745a3 rgw: rest api to read zone config params +acf608a filestore, tools: Fix logging of DBObjectMap check() repairs +fecc523 osd: Simplify DBObjectMap by no longer creating complete tables +6902c31 ceph-osdomap-tool: Fix seg fault with large amount of check error output +4a3e4bc osd: Add automatic repair for DBObjectMap bug +d4f0ac0 ceph-osdomap-tool: Fix tool exit status +5f36c31 DBObjectMap: rewrite rm_keys and merge_new_complete +1fe4b85 DBObjectMap: strengthen in_complete_region post condition +85f2151 DBObjectMap: fix next_parent() +484ccda test_object_map: add tests to trigger some bugs related to 18533 +cdeb690 test: Add ceph_test_object_map to make check tests +cf5d588 ceph-osdomap-tool: Add --debug and only show internal logging if enabled +4c4a06f osd: DBOjectMap::check: Dump complete mapping when inconsistency found +6c128ff test_object_map: Use ASSERT_EQ() for check() so failure doesn't stop testing +aa769a9 tools: Check for overlaps in internal "complete" table for DBObjectMap +761ee7c tools: Add dump-headers command to ceph-osdomap-tool +117db1c tools: Add --oid option to ceph-osdomap-tool +4d8120d osd: Remove unnecessary assert and assignment in DBObjectMap +86980a0 rgw: add suport for creating S3 type subuser of admin rest api +be9e832 civetweb: pullup chunked encoding by Marcus +608785a os: make zero values noops for set_alloc_hint() in FileStore +f32b5c6 test/librados/tmap_migrate: g_ceph_context->put() upon return +905c4ac Fix reveresed promote throttle default parameters. +fb3ee2e rgw: swift: disable revocation thread if sleep == 0 || cache_size == 0 +e552d91 rgw: add bucket size limit check to radosgw-admin +6c48266 test/ceph_crypto: do not read ceph.conf in global_init() +aca2659 tests: fix erasure-code premature deallocation of cct +8bddd42 rbd-nbd: no need create asok file for unmap and list-mapped commands. +328bfbd rbd-nbd: restart parent process logger after forking +192e7bc crushtool: do not release g_ceph_context at exit +8a2f27c common,test: g_ceph_context->put() upon return +040ff01 crushtool: s/exit(EXIT_FAILURE)/return EXIT_FAILURE/ +8e993e6 global/signal_handler: reset injected segv after test +f1c0042 test_cors.cc: fix the mem leak +c49b114 rgw: fix failed to create bucket if a non-master zonegroup has a single zone +43327f8 ceph-disk: dmcrypt activate must use the same cluster as prepare +3ec1a9b ceph_test_librados_api_misc: fix stupid LibRadosMiscConnectFailure.ConnectFailure test +66c3db7 rgw: data sync skips slo data when syncing the manifest object +303a62f rgw: RGWGetObj applies skip_manifest flag to SLO +f3d99ae rgw: allow system users to read SLO parts +db053da mds: validate prealloc_inos on sessions after load +2b5eb8f mds: operator<< for Session +5b56214 client/Client.cc: add feature to reconnect client after MDS reset +8f21038 doc: cephfs: fix the unexpected indent warning +f9a1954 doc: additional edits in FUSE client config +018649f doc: Dirty data are not the same as corrupted data +1d8a5b6 doc: minor changes in fuse client config reference +1ae46b2 doc: add client config ref +b8fd297 use sudo to check check health +1b91ffc Add reboot case for systemd test +3d8d1da Fix distro's, point to latest version +f34489d cephfs: fix mount point break off problem after mds switch occured +c2efeb4 osd: Give requested scrub work a higher priority +db86a24 client: wait for lastest osdmap when handling set file/dir layout +7b9283b cephfs: fix write_buf's _len overflow problem +b52c508 mds: shut down finisher before objecter +96e801f mds: ignore ENOENT on writing backtrace +7468689 test: rbd master/slave notify test should test active features +7347f11 mds: make C_MDSInternalNoop::complete() delete 'this' +824b19a tools/cephfs: set dir_layout when injecting inodes +eab56da mon: fix hiding mdsmonitor informative strings +d57437e mds: reset heartbeat in export_remaining_imported_caps +6adf190 mds: heartbeat_reset in dispatch +63f41d5 test/libcephfs: avoid buffer overflow when testing ceph_getdents() +7146816 mds/StrayManager: aviod reusing deleted inode in StrayManager::_purge_stray_logged +d8b139b tasks/cephfs: switch open vs. write in test_open_inode +e8ae80f qa: fix race in Mount.open_background +36c86f7 mds: don't purge strays when mds is in clientreplay state +8b01cf3 mds: skip fragment space check for replayed request +82ea097 client: fix the cross-quota rename boundary check conditions +dbe90c7 librbd: fix rbd_metadata_list and rbd_metadata_get +5096fc9 test/librados_test_stub: fixed cls_cxx_map_get_keys/vals return value +216156b rbd: prevent adding multiple mirror peers to a single pool +c05bd1c radosgw-admin: use zone id when creating a zone +ba81cbb qa: rgw task uses period instead of region-map +a755c952 rgw-admin: remove deprecated regionmap commands +482bd1a doc: rgw: correct the quota section +21a83e1 rgw_file: remove unused rgw_key variable +ebad040 rgw_file: fix readdir after dirent-change +dd9833c rgw_file: don't expire directories being read +30a5e85 rgw_file: rgw_readdir: return dot-dirs only when *offset is 0 +fe836bf rgw_file: chunked readdir +16eeb8c rgw_file: fix missing unlock in unlink +8c7cb82 rgw_file: implement reliable has-children check (unlink dir) +18f14dd rgw_file: introduce rgw_lookup type hints +eac0e27 Revert "osdc/Objecter: If osd full, it should pause read op which w/ rwordered flag." +0efe16d Revert "osdc/Objecter: resend RWORDERED ops on full" +33af18e doc: mention --show-mappings in crushtool manpage +97cd21a tests: Thrasher: handle "OSD has the store locked" gracefully +faeb808 rgw: fix for null version_id in fetch_remote_obj() +6180fcb rgw: version id doesn't work in fetch_remote_obj +d079b91 rgw: don't return skew time in pre-signed url +9b77b16 ceph-disk: enable directory backed OSD at boot time +68fcb01 pglog: require users set a config option before ignoring divergent_priors +b947730 osd: pglog: clean up divergent_priors off disk when running; don't assert on startup +a84dc8f PendingReleaseNotes: discuss snap trim improvements +360a9d9 PrimaryLogPG: reimplement osd_snap_trim_sleep within the state machine +18dbf6a rados: check that pool is done trimming before removing it +7f78450 osd/ReplicatedPG: limit the number of concurrently trimming pgs +527911f mon/MonClient: make get_mon_log_message() atomic +b698d1f librbd: is_exclusive_lock_owner API should ping OSD +d311eea msg/simple/Pipe: support IPv6 QoS. +332b517 pybind: fix incorrect exception format strings +25e43ac tests: fix oversight in yaml comment +50e863e (tag: v10.2.7) 10.2.7 +f247404 osdc/Objecter: resend RWORDERED ops on full +2d68822 osdc/Objecter: If osd full, it should pause read op which w/ rwordered flag. +2271cd8 ceph-disk: Populate mount options when running "list" +6546535 debian: replace SysV rbdmap with systemd service +6d47615 qa/tasks/workunit.py: use "overrides" as the default settings of workunit +fdc71e7 tasks/workunit.py: specify the branch name when cloning a branch +6c14a80 tasks/workunit.py: when cloning, use --depth=1 +f8aa6be build/ops: rpm: move $CEPH_EXTRA_CONFIGURE_ARGS to right place +bb3eb42 build/ops: rpm: explicitly provide --with-ocf to configure +30c9527 rgw: use separate http_manager for read_sync_status +87cb847 rgw: pass cr registry to managers +1a6d7c0 rgw: use separate cr manager for read_sync_status +c466ade rgw: change read_sync_status interface +36921a3 rgw: don't ignore ENOENT in RGWRemoteDataLog::read_sync_status() +b249fd5 PendingReleaseNotes: warning about 'osd rm ...' and #19119 +335258f osdc/Objecter: respect epoch barrier in _op_submit() +a20d2b8 ceph-disk: Adding retry loop in get_partition_dev() +2d5d0ae ceph-disk: Reporting /sys directory in get_partition_dev() +bcd3c90 osd: don't share osdmap with objecter when preboot +dd25a8f osd: Calculate degraded and misplaced more accurately +b5b441a common: fix segfault in public IPv6 addr picking fb85c68 rgw_file: use fh_hook::is_linked() to check residence +7fdf4d4 mon: remove bad rocksdb option fcc3ada jewel: librbd: possible race in ExclusiveLock handle_peer_notification eedb9f7 rgw_file: RGWFileHandle dtor must also cond-unlink from FHCache 6c2a40a rgw_file: split last argv on ws, if provided @@ -17,6 +193,7 @@ 7e4e290 rgw_file: fix reversed return value of getattr 3c02ee4 rgw_file: fix non-negative return code for open operation e9a4903 rgw_file: fix double unref on rgw_fh for rename +3860ccf osd: bypass readonly ops when osd full. 9910eac rgw: multipart part copy, fix regression a3fdf0e rgw: minor optimization 2161376 rgw: rgw_obj_key use adjustment in multipart copy part @@ -27,19 +204,57 @@ bd118b5 rgw: doc: add multipart uploads copy part feature as supported b56b719 rgw: multipart uploads copy part support d44263f rgw: make sending Content-Length in 204 and 304 controllable +b856773 qa/workunits/rbd: resolve potential rbd-mirror race conditions +1cc8d0d librbd: Include WorkQueue.h since we use it +ee06517 librbd: avoid possible recursive lock when racing acquire lock +f6489d0 rbd: destination pool should be source pool if it is not specified +9e123e6 rgw: use rgw_zone_root_pool for region_map like is done in hammer f3face6 rgw: clear old zone short ids on period update +e2ee70a rgw: skip conversion of zones without any zoneparams +c7d292b rgw: better debug information for upgrade +11f5c84 rgw/rgw_rados.cc: prefer ++operator for non-primitive iterators +819af9e41 tools/rados: Check return value of connect +72e2476 brag: count the number of mds in fsmap not in mdsmap +3cb1927 brag: Assume there are 0 MDS instead of crashing when data is missing +8bed107 doc: update description of rbdmap unmap[-all] behaviour +da4e0b5 doc: add verbiage to rbdmap manpage +167d4fd rbdmap: unmap RBDMAPFILE images unless called with unmap-all +39aab76 Revert "dummy: reduce run time, run user.yaml playbook" +2e50fe1 rgw: fix break inside of yield in RGWFetchAllMetaCR +dc4e7a1 rgw: delete_system_obj() fails on empty object name +e9a577c rgw: if user.email is empty, dont try to delete +90de64b jewel: osd/PGLog: reindex properly on pg log split +ae498e8 os/filestore: fix clang static check warn use-after-free 0be4e89 rgw: hold a reference on data_sync_cr over run() 18ffdb7 rgw: clear data_sync_cr if RGWDataSyncControlCR fails d0a0d2f tests: fix merge error in rgw/singleton/all/radosgw-admin.yaml +335b5fa rbd-nbd: check /sys/block/nbdX/size to ensure kernel mapped correctly +ced799f rgw: Use decoded URI when verifying TempURL +043d704 osd/OSDMap: don't set weight to IN when OSD is destroyed +6b5322c osd/ReplicatedPG: try with pool's use-gmt setting if hitset archive not found +d30c4d5 doc: rgw: make a note abt system users vs normal users +5ee8fea rgw: Let the object stat command be shown in the usage 1985662 rgw: RGWMetaSyncShardControlCR retries with backoff on all error codes +754b4a4 rgw: Correct the return codes for the health check feature Fixes: http://tracker.ceph.com/issues/19025 Signed-off-by: Pavan Rallabhandi +9cd7dd8 rgw: Fixes typo in rgw_admin.cc dfaaec0 rgw_file: return of RGWFileHandle::FLAG_EXACT_MATCH 9b3784d rgw_file: invalid use of RGWFileHandle::FLAG_EXACT_MATCH 7761376 rgw_file: rgw_lookup: don't ref for "/" or ".." 97fed01 rgw_file: avoid stranding invalid-name bucket handles in fhcache d333add rgw: set dumpable flag after setuid post ff0e521 +85fbb00 rgw: don't init rgw_obj from rgw_obj_key when it's incorrect to do so +ec0668c rgw: fix for broken yields in RGWMetaSyncShardCR +6afe3ef rgw: kill a compile warning for rgw_sync +06916a8 rgw: change log level to 20 for 'System already converted' message +4c1f302 rgw: the swift container acl should support field .ref +714eb86 server: negative error code when responding to client 656b5b6 (tag: v10.2.6) 10.2.6 f7ce5df rgw: fix swift cannot disable object versioning with empty X-Versions-Location +1a4e1e0 librbd: remove image header lock assertions +cebba01 os/filestore/HashIndex: be loud about splits +1d054c3 build/ops: add psmisc dependency to ceph-base (deb and rpm) 2f20328 osd: Increase priority for inactive PGs backfill +6add2a4 rgw: metadata sync info should be shown at master zone of slave zonegroup e8041f6 rgw_file: fix RGWLibFS::setattr for directory objects 88f4895 qa/suites/upgrade/hammer-x: Add some volumes 0c242d1 qa/suites/ceph-deploy: Drop OpenStack volume count @@ -48,6 +263,8 @@ 2cbec5b Removed dumplin test 13234.yaml as not needed anymore 771e1d9 qa/suites/rest: Openstack volumes a18640f qa/suites/ceph-ansible: Openstack volumes +0e11a93 systemd: Start OSDs after MONs +3bdd439 osd: preserve allocation hint attribute during recovery 841688b qa/suites/fs: Add openstack volume configuration 9778743 qa/suites/samba: Openstack volume configuration cd1e8ef qa/suites/hadoop: Openstack volume configuration @@ -55,14 +272,27 @@ ba35859 qa/suites/kcephfs: Openstack volume configuration aced718 qa/suites/krbd: Add openstack volume configuration 94d5888 qa/suites/rgw: Add openstack volume configuration +8d0140a librbd: improve debug logging for lock / watch state machines +62ce346 test: use librados API to retrieve config params +01d04e2 tests: Thrasher: eliminate a race between kill_osd and __init__ +08a6678 rpm: build ceph-resource-agents by default 78c8be7 rgw/openssl fix: xenial autoconf logic problem: gcc/ld got too smart... db928d6 rbd-mirror: retry object copy after -ENOENT error a643fa8 rbd-mirror: object copy should always reference valid snapshots 37bbc95 rbd-mirror: replace complex object op tuple with struct +d22beca msg/simple: cleanups +a18a2dd msg/simple: set close on exec on server sockets +91a968b msg/async: set close on exec on server sockets 8941881 rgw_file: interned RGWFileHandle objects need parent refs +547e867 librbd: possible deadlock with flush if refresh in-progress +07501de mon/OSDMonitor: make 'osd crush move ...' work on osds +7c6c3c7 auth: 'ceph auth import -i' overwrites caps, if caps are not specified in given keyring file, should alert user and should not allow this import. Because in 'ceph auth list' we keep all the keyrings with caps and importing 'client.admin' user keyring without caps locks the cluster with error[1] because admin keyring caps are missing in 'ceph auth'. +8c7a1df2 osd/PG: restrict want_acting to up+acting on recovery completion cb7bb0c tests: reduce stress-split-erasure-code-x86_64 dependency on stress-split 8ae1886 tests: reduce stress-split-erasure-code dependency on stress-split 4d4b38e qa: drop ubuntu trusty support +cfa37d6 ceph-disk: Fix getting wrong group name when --setgroup in bluestore +2d17092 ceph-osd: --flush-journal: sporadic segfaults on exit ebb2f73 mds: fix incorrect assertion in Server::_dir_is_nonempty() 256b850 tests: remove extra indentation in exec block 898702d tests: add require_jewel_osds to upgrade/hammer-x/tiering @@ -130,10 +360,13 @@ a27f6a7 rgw: we need to reinit the zonegroup after assignment to avoid invalid cct and store 006140a rgw: fix init_zg_from_period when default zone is not set as default 78d296b osd: do not send ENXIO on misdirected op by default +d012c38 radosstriper : protect aio_write API from calls with 0 bytes +915dbac osdc: cache should ignore error bhs during trim 30fb615 tests: ignore bogus ceph-objectstore-tool error in ceph_manager 3eff1ac librbd: allow to open an image without opening parent image bee1d2c rbd-mirror: hold owner lock when testing if lock owner 463e88e OSDMonitor: clear jewel+ feature bits when talking to Hammer OSD +419c992 OSD: allow client throttler to be adjusted on-fly, without restart b1d6c2e rgw: RGWCloneMetaLogCoroutine uses RGWMetadataLogInfoCompletion 7ca400b rgw: expose completion for RGWMetadataLog::get_info_async() 335a732 rgw: RGWMetaSyncShardCR drops stack refs on destruction @@ -323,6 +556,7 @@ 64c0cae librbd/diff_iterator: use proper snap to query parent overlap d584f9e rgw: log name instead of id for SystemMetaObj on failure 1a0becf rgw: drop unnecessary spacing in rgw zg init log +957c19b swift: added "--cluster" to rgw-admin command for multisite support f15c8da ceph_disk: fix a jewel checkin test break 8e0cffd automake: convert to tar-pax a0ae9a8 client: drop setuid/setgid bits on ownership change @@ -4480,7 +4714,7 @@ be3e272 rgw: use internal wait instead of special coroutine 07feb13 rgw: coroutine stack wait util 9ca266a rgw: move more code around -f2eb0d7 rgw: move code around +f2eb0d75 rgw: move code around e6548ec rgw: rearrange full metadata sync order 6adf75c rgw: trivial fixes 5cc80b0 rgw: fix shutdown @@ -9850,7 +10084,7 @@ 4520b3d osd/SnapMapper: use ghobject_t instead of hobject_t 76aef7e osd: use explicit ghobject_t() ctor for omap operations 93aac48 os/{Flat,LFN}Index: use explicit ghobject_t ctor -6033591 os/FileStore: munge cid -> temp cid on read ops too +6033591d os/FileStore: munge cid -> temp cid on read ops too b4ce84b osd: clear temp objects in the OSD, not FileStore cc4c608 osd/PGBackend: set correct shard in objects_list_partial 5a7a52e osd: kill META_COLL constant; use named ctor @@ -14118,6 +14352,7 @@ 7dcc850 doc: Updates man page for ceph-disk utility. e983230 remove unneeded include file a1ba385 doc: rgw document s3 bucket location features +cd72cf2 swift: set full access to subusers creation 0c33930 mon: fix MDS health status from peons a450cab doc: Adds man page for ceph-disk utility. 4b35ae0 rgw: check for timestamp for s3 keystone auth @@ -16237,6 +16472,7 @@ 4c8e322 mon: remove *_kb perf counters 3179bb1 mon/PGMonitor: add _bytes perf counters adb2791 mon/PGMonitor: add _bytes fields for all usage dumps +fce2ed6 Remove most ceph-specific tasks. They are in ceph-qa-suite now. 895318c README.md: word wrap 500b95e README: symlink from README.md f9aa9c1 ignore errors if 'ps aux' fails @@ -18407,6 +18643,7 @@ b6ad5c6 rgw: idle timeout config moves to the external server line fd6056b schedule_suite: ugly hack to set priority when scheduling 501e31d logrotate: do not rotate empty logs (2nd logrotate file) +fac452a Revert "Lines formerly of the form '(remote,) = ctx.cluster.only(role).remotes.keys()'" 158f9ba Revert "Lines formerly of the form '(remote,) = ctx.cluster.only(role).remotes.keys()'" 91176f1 erasure-code: test encode/decode of SSE optimized jerasure plugins b76ad97 erasure-code: test jerasure SSE optimized plugins selection @@ -18417,6 +18654,7 @@ cc0cc15 erasure-code: gf-complete / jerasure modules updates 12d4f38 erasure-code: allow loading a plugin from factory() b454bd6 rgw: add erasure coded data pool variant +e98b107 Lines formerly of the form '(remote,) = ctx.cluster.only(role).remotes.keys()' and '(remote,) = ctx.cluster.only(role).remotes.iterkeys()' would fail with ValueError and no message if there were less than 0 or more than 1 key. Now a new function, get_single_remote_value() is called which prints out more understandable messages. d693b3f Lines formerly of the form '(remote,) = ctx.cluster.only(role).remotes.keys()' and '(remote,) = ctx.cluster.only(role).remotes.iterkeys()' would fail with ValueError and no message if there were less than 0 or more than 1 key. Now a new function, get_single_remote_value() is called which prints out more understandable messages. 506d2bb logrotate improvement: do not rotate empty logs dc3ce58 osd: do not make pg_pool_t incompat when hit_sets are enabled @@ -19256,7 +19494,7 @@ 3ed68eb rgw: return error if accessing object in non-existent bucket b0dcc79 radosgw-admin: gc list --include-all b9e1341 rados/thrash: add ec-radosbench workload -386650d rados/thrash: add ec workloads +386650d9 rados/thrash: add ec workloads 609f4c5 Throw a Timeout exception on timeout. 2718dbc radosbench: simplify pool creation and add ec_pool support e371565 task/: add ec_pool and append to rados.py @@ -19490,6 +19728,7 @@ eedbf50 osdmaptool: fix --pool option for --test-map-object mode 6810610 Add docstrings to internal.py e44122f test: fix signed/unsigned warnings in TestCrushWrapper.cc +f4284b5 Add doc strings to Swift tests 5c36a5c Add doc strings to Swift tests 567961b Readjust the indentation of mon_clock_skew_check.py and mon_thrash.py. Added docstrings. 12b7904 Review: Rename calamari_testdir/ to calamari/ @@ -19546,6 +19785,7 @@ 840e918 tests: fix packaging for s/filestore/objectstore/ b64f1e3 tests: fix objectstore tests f34eb1b mds: force update backtraces for previously created FS +7b63876 Add docstrings to s3 related tasks. cee713f Add docstrings to s3 related tasks. b5c10bf Fix bad dealloctor a4b3b78 correct one command line at building packages section @@ -21926,6 +22166,7 @@ 99c10bc doc: Updated diagrams, refactored text. 1c12eef osd/ReplicatedPG: fix leak of RepGather on watch timeout 1d67e15 osd/ReplicatedPG: fix leak of RepGather on large-object COPY_FROM +f8bf53c Fix namespace collision 218776b Fix namespace collision f1aac66 ceph.conf: fix typo 3955666 Add config option 'verify_host_keys' @@ -21993,6 +22234,7 @@ 59ee51a osd/ReplicatedPG: handle COPY_FROM self 5cb7b29 mon: fix wrong arg to "instructed to" status message 9e98620 rgw: destroy get_obj handle in copy_obj() +66555a4 Don't hardcode the git://ceph.com/git/ mirror 0ce6278 Don't hardcode the git://ceph.com/git/ mirror e6040f7 Don't hardcode ceph.com mirror for linux-firmware b79343c move the sitepackages to testenv section @@ -22242,6 +22484,7 @@ 82cfa84 Get rid of chdir-coredump. a60e84d ceph: fix daemon-helper typo 2214fe1 misc: valgrind --num-callers=50 +6e8a380 Helper scripts live in /usr/local/bin now! 53b8e27 Helper scripts live in /usr/local/bin now! edc5ef8 Move helper scripts to /usr/local/bin 1a05f9d queue: fix stderr redirect @@ -22411,6 +22654,7 @@ b91c1c5 add CEPH_FEATURE_OSD_CACHEPOOL 53cea02 Add apology for non-public links 25defd4 Indent wrapped exceptions. +2346f1d s3tests: extend for multi-region tests ffac4ad s3tests: extend for multi-region tests db51888 rgw: persist the region info 2877e27 radosgw-agent: store radosgw_agent server/port @@ -22692,6 +22936,7 @@ 3afc7d9 If get_testdir_base() exists, might as well use it... f41436a Tweak regex to work for non-FQDN hostnames 54ed1d1 Note that target hostnames must be resolvable. +09b01b2 Fix some instances where print is being used instead of log 5746efb Fix some instances where print is being used instead of log ab2d2fa Default to log level INFO. 24ec320 Ceph-qa: change the fsx.sh to support hole punching test @@ -22902,6 +23147,7 @@ 8c8e9a1 rgw.py: change --secret-key to --secret 1cff5bd added rgw task before swift b4ed4e2 qa/workunits/cephtool/test_daemon.sh: sudo +9b2c4fa4 s3/swift tests: call radosgw-admin as the right client 1f7127b s3/swift tests: call radosgw-admin as the right client 2f2108b rgw: fix dir creation and keyring d651658 osdc: Add asserts that client_lock is held @@ -22941,6 +23187,7 @@ e703942 ceph_manager: allow-experimental-feature now causes an EINVAL a6cd9fe osd: get initial full map after a map gap e24b502 osd: fix off-by-one in map gap logic +3b3816d s3tests: clone correct branch 9dac3fe s3tests: clone correct branch 17fa544 fix double requirements issue 251a6a4 Use dh_installinit to install upstart job files @@ -23546,6 +23793,7 @@ 78bae33 ceph.spec.in: add some ceph_test_cls_* files dc99a23 Makefile.am: fix build, use $(CRYPTO_LIBS) 42b9ea9 misc: move system type checking to a generic location +343a42c s3tests: fix client configurations that aren't dictionaries 7845848 s3tests: fix client configurations that aren't dictionaries 1c22bdb restart rgw after upgrade for rgw tests 00ae543 mon: do not scrub if scrub is in progress @@ -23882,6 +24130,7 @@ ad64067 rgw: init member variable b09d799 rgw: encode bucket info only after setting a flag cd98eb0 mon/AuthMonitor: make initial auth include rotating keys +253cc98 enable-coredump -> adjust-ulimits 13dbe9d enable-coredump -> adjust-ulimits 9b2dfb7 mon: do not leak no_reply messages ad12b0d mon: fix leak of MOSDFailure messages @@ -25196,6 +25445,7 @@ 6f8c1e9 doc/release-notes: add/link complete changelogs 4fa2c49 doc/release-notes: v0.56.5 72fc6eb doc: Fixed typos. +a9f3eb6 s3tests: add force-branch with higher precdence than 'branch' e3b0e1e s3tests: add force-branch with higher precdence than 'branch' 5cdd731 Revert "mon: fix Monitor::pick_random_mon()" b4e73cc doc/install/upgrading...: note that argonaut->bobtail->cuttlefish must be v0.56.5 @@ -25241,10 +25491,14 @@ bd6ea8d doc: Reordered header levels for visual clarity. bb93eba doc: Fixed a few typos. 14ce0ad doc: Updated the upgrade guide for Aronaut and Bobtail to Cuttlefish. +5a7267f fix some errors found by pyflakes 52742fb fix some errors found by pyflakes +f866037 s3tests: revert useless portion of 1c50db6a4630d07e72144dafd985c397f8a42dc5 7df72f2 s3tests: revert useless portion of 1c50db6a4630d07e72144dafd985c397f8a42dc5 809814b rgw: restart radosgw too +2dcce57 rgw tests: remove users after each test 5a6e560 rgw tests: remove users after each test +3c60425 rgw tests: clean up immediately after the test 6aba6d2 rgw tests: clean up immediately after the test 7de29dd doc/release-notes: update cuttlefish release notes to include bobtail 935e868 ceph: allow restarting radosgw @@ -25261,6 +25515,7 @@ 6ae9bbb elector: trigger a mon reset whenever we bump the epoch 0acede3 mon: change leveldb block size to 64K 4f70c89 misc: default base_test_dir to /home/ubuntu/cephtest +022bd4a swift, s3readwrite: add missing yield 57404b6 swift, s3readwrite: add missing yield 6f2a7df doc: Fix typo. 35a9823 doc: Added reference to transition from mkcephfs to ceph-deploy. @@ -25292,6 +25547,7 @@ ffc8557 doc: update rbd man page for new options 8b2a147 gitignore: add ceph_monstore_tool 29831f9 Makefile: fix java build warning +820c72b s3tests, s3readwrite, swift: cleanup explicitly c8ec76e s3tests, s3readwrite, swift: cleanup explicitly a2fe013 mon: remap creating pgs on startup 278186d mon: only map/send pg creations if osdmap is defined @@ -27013,6 +27269,7 @@ 27fec23 ceph: simpilfy package removal 28116db nuke: remove librados, librbd a529bb7 ceph: install ceph-mds, ceph-common +fa1f894 Install ceph debs and use installed debs 5235fc1 ceph: fix purge c525e10 Install ceph debs and use installed debs d790eeb nuke: testrados -> ceph_test_rados @@ -27487,6 +27744,7 @@ 8f72045 Assign devices to osds using the device wwn 5811159 Support power cycling osds/nodes through ipmi 87b9849 add --name option to teuthology +3eb19c8 Replace /tmp/cephtest/ with configurable path ace4cb0 Replace /tmp/cephtest/ with configurable path 1473027 Fixes for syntax errors found by pyflakes. 3390cc3 Scripts to use pyflakes to check python syntax. @@ -28102,6 +28360,7 @@ ae044e6 osd: allow transition from Clean -> WaitLocalRecoveryReserved for repair 670afc6 PG: in sched_scrub() set PG_STATE_DEEP_SCRUB not scrubber.deep 19e44bf osd: clear scrub state if queued scrub doesn't start +2f82987 task/swift: change upstream repository url c02d34d task/swift: change upstream repository url feb0aad doc: Moved path to individual OSD entires. e765dcb osd: only dec_scrubs_active if we were active @@ -28527,6 +28786,7 @@ deabdc8 auth: cephx: increase log levels when logging secrets d6cf77d crush: CrushWrapper: don't add item to a bucket with != type than wanted 95e1fe8 mon: PGMonitor: check if pg exists when handling 'pg map ' +b8e6ce4 s3tests: fix typo b2f8035 s3tests: fix typo 1c715a1 mds: child directory inherits SGID bit 55081c2 crush: prevent loops from insert_item @@ -28550,6 +28810,7 @@ 328d72d rgw: signal shuts down fcgi socket ca08626 xfstests: run in parallel on multiple machines 4eb50e6 crypto: fix nss related leak +26df886 rgw-logsocket: a task to verify opslog socket works 1c50db6 rgw-logsocket: a task to verify opslog socket works 436baa0 java: add Java exception for ENOTDIR 700b5c0 qa/run_xfstests.sh: drop tests 174 and 181 @@ -29936,6 +30197,7 @@ f8c365e rgw: add missing ret code check d6c2ded radosgw-admin: update task for new usage reporting be426d1 schedule_suite.sh: try to use same branch for s3-tests.git +617534e s3tests: run against arbitrary branch/sha1 of s3-tests.git 3473c2e s3tests: run against arbitrary branch/sha1 of s3-tests.git 0cfac6d librbd: bump version db8037d debian ntp servers @@ -31227,6 +31489,7 @@ 9313cde teuthology-lock: add --summary and --brief options 0f6d90c make everyone use our assert #include and macro a30601a assert: detect when /usr/include/assert.h clobbers us +7d5c7ee pull s3-tests.git using git, not http 9ec2843 pull s3-tests.git using git, not http 165fbd1 keyserver: also authenticate against mon keyring 5d520f1 keyring: implement get_caps() @@ -31617,6 +31880,7 @@ ae0ca7b keyring: catch key decode errors 6812309 debian: depend on uuid-runtime 3509b03 safe_io: int -> ssize_t +ce951cf ceph.newdream.net -> ceph.com 396d1fe ceph.newdream.net -> ceph.com 203a7d6 objectcacher: wait directly from writex() 991c93e mon: fix call to get_uuid() on non-existant osd @@ -32565,6 +32829,7 @@ ffa5955 msgr: Remove SimpleMessenger::register_entity 3bd1d2a msgr: add start() and wait() stubs to the Messenger interface 70360f8 github.com/NewDreamNetwork -> github.com/ceph +1970713 github.com/NewDreamNetwork -> github.com/ceph dc1abab github.com/NewDreamNetwork -> github.com/ceph cacf0fd filestore: fix rollback safety check 9fa8781 (tag: v0.43) v0.43 @@ -32710,6 +32975,7 @@ 995dc1f Add a task for testing stuck pg visibility. 2a1c74c Move duration calculation to an internal task e67c0ff osd: make object_info_t::dump using hobject_t and object_locator_t dumpers +1ac4bb1 Add necessary imports for s3 tasks, and keep them alphabetical. eb434a5 Add necessary imports for s3 tasks, and keep them alphabetical. 55a6065 osdmap: dump embedded crush map in Incremental::dump() 2365c77 rgw: maintain separate policies for object and bucket @@ -32719,6 +32985,7 @@ 7cafa25 osdmap: dump fullmap from dump() 11073e5 s3roundtrip, s3readwrite: access key uses url safe chars 0e4367a rgw: accepted access key chars should be url safe +92110e5 rgw: access key uses url safe chars 6e1b3a5 rgw: access key uses url safe chars df5f573 add valgrind collection to regression suite 17d3870 rgw: don't invalidate cache when adding xattrs @@ -33271,6 +33538,7 @@ 40fb86f ceph: take single arg or list for valgrind args c88ec57 combined mon, osd, mds starter functions f8ec23e rbd: default to all: +709d944 use local mirrors for (most) github urls 72057a9 use local mirrors for (most) github urls fbfa94b teuthology-ls: show pid, last line of output for running jobs f70b158 show host -> roles mapping on startup @@ -33748,6 +34016,7 @@ 86c34ba vstart.sh: .ceph_keyring -> keyring 1e3da7e filejournal: remove bogus check in read_entry dbd7a3b Rename "testrados" task to not begin with "test". +9598e47 Rename "testrados" and "testswift" tasks to not begin with "test". e80c32c Rename "testrados" and "testswift" tasks to not begin with "test". 0dd4d69 Fix unit tests for SSH keep-alive setting. dc167ba filejournal: set last_committed_seq based on fs, not journal @@ -33855,6 +34124,7 @@ 42cecb5 suite: put common config before facets 044a88c suite: schedule a list of collections for running instead of a single suite directory 6ae0f81 rgw: if swift url is not set up, just use whatever client used +6236e7d testswift: fix config 23aae67 testswift: fix config d8fc151 Clean up C++isms. c545094 Add a task for easily running chef-solo on all the nodes. @@ -33875,6 +34145,7 @@ f4d527e thrashosds: timeout for every clean check, not just the last one 9d12b72 ceph_manager: add a default timeout of 5 minutes for mon quorum cb9ac08 ceph_manager: log mon quorum status so the logs show progress (or lack thereof) +1dd607c rgw: add swift task f3c569e rgw: add swift task fa4b0fb osd: add pending_ops assert 17fa1e0 mon: renamed get_latest* -> get_stashed* @@ -34509,6 +34780,7 @@ 19228d4 rgw: revert part of commit:30b814e2677659ddda4109a1421f33a47d83a05b 30b814e rgw: don't remove bucket object twice b441ecf rgw: fix bucket-id assignment +cb425c1 s3-tests: use radosgw-admin instead of radosgw_admin 9b44469 s3-tests: use radosgw-admin instead of radosgw_admin 48558bb rgw: separate bucket vs object owner tracking cb2d366 radosgw-admin: fix acl vs content-length check @@ -34768,6 +35040,7 @@ 89c06e7 doc: Add section about changing config values 0d5dbfa workunit: Fetch source from github. 7d3aa0e osd: use target obj locator for source object if empty +37d7d51 s3tests: Clone repository from github. 5583fac s3tests: Clone repository from github. 4d92c35 coverage: Fetch source from github. 4a0f8fe ceph.py: remove unused variables mds_daemons and mon_daemons @@ -34789,6 +35062,7 @@ b68eaf1 doc: Say "radosgw" not "rgw". ecd368c doc: Shrinking of MDSes is not supported yet. ee6126b Makefile.am: fix test_librbd +29a242d Move orchestra to teuthology.orchestra so there's just one top-level package. a2372fc Move orchestra to teuthology.orchestra so there's just one top-level package. e86f3bd osd/OSD.cc: don't crash on incorrect injectargs 6c81960 PG: fix typo in PgPriorSet constructor @@ -34825,6 +35099,7 @@ b37b61e workunit: use sudo when first reading /sys... too efaf91b workunit: and delete root-owned file d021b22 workunit: you need sudo to look at /sys/kernel/debug +ec49a5f Callers of task s3tests.create_users don't need to provide dummy "fixtures" dict. cc72fe6 Callers of task s3tests.create_users don't need to provide dummy "fixtures" dict. 9713666 rgw: rgw_admin, init gen_secret, gen_key 1970bad thrashosds: fix timeout when no options are specified @@ -35480,6 +35755,7 @@ 41f484d objecter: allow requesting specific maps in maybe_request_map 4e2ec6f Add interactive-on-error, to pause and explore on error. 5cb2bd2 testlibrbd: remove useless print statements +d7d995e allow s3tests.create_users defaults be overridden eee1d9a allow s3tests.create_users defaults be overridden 52b90b7 testlibrbd: NUL-terminate orig_data 60b4588 testlibrbd(pp): accept standard command line arguments @@ -35817,6 +36093,7 @@ 629187f Set content-type with PUT. 019955a PGMonitor.cc: add force_create_pg command to monitor 02d0efa schedule: make default owner different from that of a normal run +0086109 Make targets a dictionary mapping hosts to ssh host keys. 99afde1 Update example targets in readme. 731fe41 Remove print that clutters the worker logs. 271e066 Connect without using any known_hosts files. @@ -36034,7 +36311,9 @@ 03b6c40 qa: mds rename: account for not being in the ceph root dir b5e4a31 move mds rename tests to workunits directory e483bb6 qa: mds rename: Rework so it will function in teuthology as a workunit: +1b2c964 Skip s3-tests marked fails_on_rgw, they will fail anyway. 9655042 Skip s3-tests marked fails_on_rgw, they will fail anyway. +06fb9b9 The shell exits after the command, hence there is no need for pushd/popd. 3665f75 The shell exits after the command, hence there is no need for pushd/popd. 088013b mds: cleanup: use enum for lock states 8f9eaf0 rgw: when listing objects, set locator key only when needed @@ -36063,6 +36342,7 @@ 3dd95f6 .gitignore: ignore emacs backups d4fdaba a few basic kclient workunits, reorg e1db8e9 new workunits +cd524a6 Add s3tests task. ae87abf Add s3tests task. 03a08eb Add rgw task with hardcoded apache config. 924a322 obsync: improve formatting a little bit @@ -42699,7 +42979,7 @@ 31ab233 byteorder: more typos 1d95c8f qa: start new qa dir, add some client tests 119a73e comm: monitor dispatch should check for closed connection -419e1b7 byteorder: braces! +419e1b71 byteorder: braces! a5a7c55 osd: check is_dup earlier; do not set osdop version if it's a noop 8d146b5 auth: monitors authenticate themselves on outgoing connections 4eebe54 osd: use sized int32 in osdmap incremental new_pg_temp @@ -44084,7 +44364,7 @@ cb6e700 mon: don't dup entires in log.err 916b199 osd: fix backlog ordering part deux 24dd3ee osd: repair out of order logs in read_log -f1ff934 osd: fix order of prior_version backlog entries +f1ff934f osd: fix order of prior_version backlog entries 61995ee osd: cleanup e16ed9b mon: handle failures during CLIENTREPLAY state 7685e7f radostool: 'ls' fixes diff -Nru ceph-10.2.7/CMakeLists.txt ceph-10.2.9/CMakeLists.txt --- ceph-10.2.7/CMakeLists.txt 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/CMakeLists.txt 2017-07-13 13:05:36.000000000 +0000 @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.11) project(Ceph) -set(VERSION 10.2.7) +set(VERSION 10.2.9) if (NOT (CMAKE_MAJOR_VERSION LESS 3)) # Tweak policies (this one disables "missing" dependency warning) diff -Nru ceph-10.2.7/configure ceph-10.2.9/configure --- ceph-10.2.7/configure 2017-04-10 11:45:17.000000000 +0000 +++ ceph-10.2.9/configure 2017-07-13 13:06:30.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for ceph 10.2.7. +# Generated by GNU Autoconf 2.69 for ceph 10.2.9. # # Report bugs to . # @@ -590,8 +590,8 @@ # Identity of this package. PACKAGE_NAME='ceph' PACKAGE_TARNAME='ceph' -PACKAGE_VERSION='10.2.7' -PACKAGE_STRING='ceph 10.2.7' +PACKAGE_VERSION='10.2.9' +PACKAGE_STRING='ceph 10.2.9' PACKAGE_BUGREPORT='ceph-devel@vger.kernel.org' PACKAGE_URL='' @@ -1600,7 +1600,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures ceph 10.2.7 to adapt to many kinds of systems. +\`configure' configures ceph 10.2.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1672,7 +1672,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of ceph 10.2.7:";; + short | recursive ) echo "Configuration of ceph 10.2.9:";; esac cat <<\_ACEOF @@ -1862,7 +1862,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -ceph configure 10.2.7 +ceph configure 10.2.9 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2938,7 +2938,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by ceph $as_me 10.2.7, which was +It was created by ceph $as_me 10.2.9, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -17051,7 +17051,7 @@ # Define the identity of the package. PACKAGE='ceph' - VERSION='10.2.7' + VERSION='10.2.9' cat >>confdefs.h <<_ACEOF @@ -26914,7 +26914,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by ceph $as_me 10.2.7, which was +This file was extended by ceph $as_me 10.2.9, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -26980,7 +26980,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -ceph config.status 10.2.7 +ceph config.status 10.2.9 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru ceph-10.2.7/configure.ac ceph-10.2.9/configure.ac --- ceph-10.2.7/configure.ac 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/configure.ac 2017-07-13 13:05:36.000000000 +0000 @@ -8,7 +8,7 @@ # VERSION define is not used by the code. It gets a version string # from 'git describe'; see src/ceph_ver.[ch] -AC_INIT([ceph], [10.2.7], [ceph-devel@vger.kernel.org]) +AC_INIT([ceph], [10.2.9], [ceph-devel@vger.kernel.org]) AX_CXX_COMPILE_STDCXX_11(, mandatory) diff -Nru ceph-10.2.7/debian/changelog ceph-10.2.9/debian/changelog --- ceph-10.2.7/debian/changelog 2017-04-21 08:21:10.000000000 +0000 +++ ceph-10.2.9/debian/changelog 2017-09-26 06:39:00.000000000 +0000 @@ -1,3 +1,10 @@ +ceph (10.2.9-0ubuntu0.16.04.1) xenial; urgency=medium + + * New upstream point release (LP: #1706566): + - d/p/sleep-recover.patch: Drop, superceeded by upstream fix. + + -- James Page Tue, 26 Sep 2017 07:39:00 +0100 + ceph (10.2.7-0ubuntu0.16.04.1) xenial; urgency=medium [ Billy Olsen ] diff -Nru ceph-10.2.7/debian/patches/series ceph-10.2.9/debian/patches/series --- ceph-10.2.7/debian/patches/series 2017-04-21 08:21:08.000000000 +0000 +++ ceph-10.2.9/debian/patches/series 2017-09-26 06:38:52.000000000 +0000 @@ -1,6 +1,3 @@ -## Backported / Upstream -sleep-recover.patch - ## Debian #rbdmap3-lazyumount.patch arch.patch diff -Nru ceph-10.2.7/debian/patches/sleep-recover.patch ceph-10.2.9/debian/patches/sleep-recover.patch --- ceph-10.2.7/debian/patches/sleep-recover.patch 2017-04-20 09:25:49.000000000 +0000 +++ ceph-10.2.9/debian/patches/sleep-recover.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,16 +0,0 @@ -Last-Update: 2014-05-12 -Forwarded: not-needed -Bug-Ceph: http://tracker.ceph.com/issues/8291 -Author: Yan, Zheng -Description: fix fuse-client hang after wake-up from suspend. - ---- a/src/client/Client.cc -+++ b/src/client/Client.cc -@@ -12061,6 +12061,7 @@ void Client::ms_handle_remote_reset(Conn - case MetaSession::STATE_OPEN: - ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl; - s->state = MetaSession::STATE_STALE; -+ _closed_mds_session(s); - break; - - case MetaSession::STATE_NEW: diff -Nru ceph-10.2.7/doc/cephfs/client-config-ref.rst ceph-10.2.9/doc/cephfs/client-config-ref.rst --- ceph-10.2.7/doc/cephfs/client-config-ref.rst 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/doc/cephfs/client-config-ref.rst 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,214 @@ +======================== + Client Config Reference +======================== + +``client acl type`` + +:Description: Set the ACL type. Currently, only possible value is ``"posix_acl"`` to enable POSIX ACL, or an empty string. This option only takes effect when the ``fuse_default_permissions`` is set to ``false``. + +:Type: String +:Default: ``""`` (no ACL enforcement) + +``client cache mid`` + +:Description: Set client cache midpoint. The midpoint splits the least recently used lists into a hot and warm list. +:Type: Float +:Default: ``0.75`` + +``client_cache_size`` + +:Description: Set the number of inodes that the client keeps in the metadata cache. +:Type: Integer +:Default: ``16384`` + +``client_caps_release_delay`` + +:Description: Set the delay between capability releases in seconds. The delay sets how many seconds a client waits to release capabilities that it no longer needs in case the capabilities are needed for another user space operation. +:Type: Integer +:Default: ``5`` (seconds) + +``client_debug_force_sync_read`` + +:Description: If set to ``true``, clients read data directly from OSDs instead of using a local page cache. +:Type: Boolean +:Default: ``false`` + +``client_dirsize_rbytes`` + +:Description: If set to `true`, use the recursive size of a directory (that is, total of all descendants). +:Type: Boolean +:Default: ``true`` + +``client_max_inline_size`` + +:Description: Set the maximum size of inlined data stored in a file inode rather than in a separate data object in RADOS. This setting only applies if the ``inline_data`` flag is set on the MDS map. +:Type: Integer +:Default: ``4096`` + +``client_metadata`` + +:Description: Comma-delimited strings for client metadata sent to each MDS, in addition to the automatically generated version, host name, and other metadata. +:Type: String +:Default: ``""`` (no additional metadata) + +``client_mount_gid`` + +:Description: Set the group ID of CephFS mount. +:Type: Integer +:Default: ``-1`` + +``client_mount_timeout`` + +:Description: Set the timeout for CephFS mount in seconds. +:Type: Float +:Default: ``300.0`` + +``client_mount_uid`` + +:Description: Set the user ID of CephFS mount. +:Type: Integer +:Default: ``-1`` + +``client_mountpoint`` + +:Description: Directory to mount on the CephFS file system. An alternative to the `-r` option of the `ceph-fuse` command. +:Type: String +:Default: ``"/"`` + +``client_oc`` + +:Description: Enable object caching. +:Type: Boolean +:Default: ``true`` + +``client_oc_max_dirty`` + +:Description: Set the maximum number of dirty bytes in the object cache. +:Type: Integer +:Default: ``104857600`` (100MB) + +``client_oc_max_dirty_age`` + +:Description: Set the maximum age in seconds of dirty data in the object cache before writeback. +:Type: Float +:Default: ``5.0`` (seconds) + +``client_oc_max_objects`` + +:Description: Set the maximum number of objects in the object cache. +:Type: Integer +:Default: ``1000`` + +``client_oc_size`` + +:Description: Set how many bytes of data will the client cache. +:Type: Integer +:Default: ``209715200`` (200 MB) + +``client_oc_target_dirty`` + +:Description: Set the target size of dirty data. We recommend to keep this number low. +:Type: Integer +:Default: ``8388608`` (8MB) + +``client_permissions`` + +:Description: Check client permissions on all I/O operations. +:Type: Boolean +:Default: ``true`` + +``client_quota`` + +:Description: Enable client quota checking if set to ``true``. +:Type: Boolean +:Default: ``true`` + +``client_quota_df`` + +:Description: Report root directory quota for the ``statfs`` operation. +:Type: Boolean +:Default: ``true`` + +``client_readahead_max_bytes`` + +:Description: Set the maximum number of bytes that the kernel reads ahead for future read operations. Overridden by the ``client_readahead_max_periods`` setting. +:Type: Integer +:Default: ``0`` (unlimited) + +``client_readahead_max_periods`` + +:Description: Set the number of file layout periods (object size * number of stripes) that the kernel reads ahead. Overrides the ``client_readahead_max_bytes`` setting. +:Type: Integer +:Default: ``4`` + +``client_readahead_min`` + +:Description: Set the minimum number bytes that the kernel reads ahead. +:Type: Integer +:Default: ``131072`` (128KB) + +``client_reconnect_stale`` + +:Description: Automatically reconnect stale session. +:Type: Boolean +:Default: ``false`` + +``client_snapdir`` + +:Description: Set the snapshot directory name. +:Type: String +:Default: ``".snap"`` + +``client_tick_interval`` + +:Description: Set the interval in seconds between capability renewal and other upkeep. +:Type: Float +:Default: ``1.0`` (seconds) + +``client_use_random_mds`` + +:Description: Choose random MDS for each request. +:Type: Boolean +:Default: ``false`` + +``fuse_default_permissions`` + +:Description: When set to ``false``, ``ceph-fuse`` utility checks does its own permissions checking, instead of relying on the permissions enforcement in FUSE. Set to ``false`` together with the ``client acl type=posix_acl`` option to enable POSIX ACL. +:Type: Boolean +:Default: ``true`` + +Developer Options +################# + +.. important:: These options are internal. They are listed here only to complete the list of options. + +``client_debug_getattr_caps`` + +:Description: Check if the reply from the MDS contains required capabilities. +:Type: Boolean +:Default: ``false`` + +``client_debug_inject_tick_delay`` + +:Description: Add artificial delay between client ticks. +:Type: Integer +:Default: ``0`` + +``client_inject_fixed_oldest_tid`` + +:Description: +:Type: Boolean +:Default: ``false`` + +``client_inject_release_failure`` + +:Description: +:Type: Boolean +:Default: ``false`` + +``client_trace`` + +:Description: The path to the trace file for all file operations. The output is designed to be used by the Ceph `synthetic client <../man/8/ceph-syn>`. +:Type: String +:Default: ``""`` (disabled) + diff -Nru ceph-10.2.7/doc/cephfs/index.rst ceph-10.2.9/doc/cephfs/index.rst --- ceph-10.2.7/doc/cephfs/index.rst 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/doc/cephfs/index.rst 2017-07-13 13:05:36.000000000 +0000 @@ -51,6 +51,7 @@ Add/Remove MDS <../../rados/deployment/ceph-deploy-mds> MDS failover and standby configuration MDS Configuration Settings + Client Configuration Settings Journaler Configuration Manpage ceph-mds <../../man/8/ceph-mds> diff -Nru ceph-10.2.7/doc/Makefile ceph-10.2.9/doc/Makefile --- ceph-10.2.7/doc/Makefile 2017-04-10 11:45:43.000000000 +0000 +++ ceph-10.2.9/doc/Makefile 2017-07-13 13:06:57.000000000 +0000 @@ -243,10 +243,10 @@ PACKAGE = ceph PACKAGE_BUGREPORT = ceph-devel@vger.kernel.org PACKAGE_NAME = ceph -PACKAGE_STRING = ceph 10.2.7 +PACKAGE_STRING = ceph 10.2.9 PACKAGE_TARNAME = ceph PACKAGE_URL = -PACKAGE_VERSION = 10.2.7 +PACKAGE_VERSION = 10.2.9 PATH_SEPARATOR = : PKG_CONFIG = /usr/bin/pkg-config PKG_CONFIG_LIBDIR = @@ -272,7 +272,7 @@ SONAME_DEFINES = -DSSL_LIB=\"libssl.so.1.0.0\" -DCRYPTO_LIB=\"libcrypto.so.1.0.0\" SPHINX_BUILD = sphinx-build STRIP = strip -VERSION = 10.2.7 +VERSION = 10.2.9 WARN_ERROR_FORMAT_SECURITY = -Werror=format-security WARN_IGNORED_QUALIFIERS = -Wignored-qualifiers WARN_TYPE_LIMITS = -Wtype-limits diff -Nru ceph-10.2.7/doc/man/8/crushtool.rst ceph-10.2.9/doc/man/8/crushtool.rst --- ceph-10.2.7/doc/man/8/crushtool.rst 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/doc/man/8/crushtool.rst 2017-07-13 13:05:36.000000000 +0000 @@ -17,15 +17,15 @@ =========== **crushtool** is a utility that lets you create, compile, decompile - and test CRUSH map files. +and test CRUSH map files. CRUSH is a pseudo-random data distribution algorithm that efficiently -maps input values (typically data objects) across a heterogeneous, -hierarchically structured device map. The algorithm was originally -described in detail in the following paper (although it has evolved -some since then): +maps input values (which, in the context of Ceph, correspond to Placement +Groups) across a heterogeneous, hierarchically structured device map. +The algorithm was originally described in detail in the following paper +(although it has evolved some since then):: - http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf + http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf The tool has four modes of operation. @@ -46,7 +46,9 @@ .. option:: --test will perform a dry run of a CRUSH mapping for a range of input - object names. See below for a detailed explanation. + values ``[--min-x,--max-x]`` (default ``[0,1023]``) which can be + thought of as simulated Placement Groups. See below for a more + detailed explanation. Unlike other Ceph tools, **crushtool** does not accept generic options such as **--debug-crush** from the command line. They can, however, be @@ -60,28 +62,30 @@ ========================= The test mode will use the input crush map ( as specified with **-i -map** ) and perform a dry run of CRUSH mapping or random placement ( -if **--simulate** is set ). On completion, two kinds of reports can be +map** ) and perform a dry run of CRUSH mapping or random placement +(if **--simulate** is set ). On completion, two kinds of reports can be created. 1) The **--show-...** option outputs human readable information on stderr. 2) The **--output-csv** option creates CSV files that are documented by the **--help-output** option. -.. option:: --show-statistics +Note: Each Placement Group (PG) has an integer ID which can be obtained +from ``ceph pg dump`` (for example PG 2.2f means pool id 2, PG id 32). +The pool and PG IDs are combined by a function to get a value which is +given to CRUSH to map it to OSDs. crushtool does not know about PGs or +pools; it only runs simulations by mapping values in the range +``[--min-x,--max-x]``. - For each rule, displays the mapping of each object. For instance:: - CRUSH rule 1 x 24 [11,6] +.. option:: --show-statistics - shows that object **24** is mapped to devices **[11,6]** by rule - **1**. At the end of the mapping details, a summary of the - distribution is displayed. For instance:: + Displays a summary of the distribution. For instance:: rule 1 (metadata) num_rep 5 result size == 5: 1024/1024 shows that rule **1** which is named **metadata** successfully - mapped **1024** objects to **result size == 5** devices when trying + mapped **1024** values to **result size == 5** devices when trying to map them to **num_rep 5** replicas. When it fails to provide the required mapping, presumably because the number of **tries** must be increased, a breakdown of the failures is displayed. For instance:: @@ -91,12 +95,22 @@ rule 1 (metadata) num_rep 10 result size == 10: 927/1024 shows that although **num_rep 10** replicas were required, **4** - out of **1024** objects ( **4/1024** ) were mapped to **result size + out of **1024** values ( **4/1024** ) were mapped to **result size == 8** devices only. +.. option:: --show-mappings + + Displays the mapping of each value in the range ``[--min-x,--max-x]``. + For instance:: + + CRUSH rule 1 x 24 [11,6] + + shows that value **24** is mapped to devices **[11,6]** by rule + **1**. + .. option:: --show-bad-mappings - Displays which object failed to be mapped to the required number of + Displays which value failed to be mapped to the required number of devices. For instance:: bad mapping rule 1 x 781 num_rep 7 result [8,10,2,11,6,9] @@ -113,7 +127,7 @@ device 1: stored : 963 expected : 853.333 ... - shows that device **0** stored **951** objects and was expected to store **853**. + shows that device **0** stored **951** values and was expected to store **853**. Implies **--show-statistics**. .. option:: --show-utilization-all @@ -244,6 +258,11 @@ # recompile crushtool -c map.txt -o crushmap +Example output from --test +========================== + +See https://github.com/ceph/ceph/blob/master/src/test/cli/crushtool/set-choose.t +for sample ``crushtool --test`` commands and output produced thereby. Availability ============ diff -Nru ceph-10.2.7/doc/man/8/rbdmap.rst ceph-10.2.9/doc/man/8/rbdmap.rst --- ceph-10.2.7/doc/man/8/rbdmap.rst 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/doc/man/8/rbdmap.rst 2017-07-13 13:05:36.000000000 +0000 @@ -16,16 +16,91 @@ Description =========== -**rbdmap** is a shell script that can be run manually by the system -administrator at any time, or automatically at boot time by the init system -(sysvinit, upstart, systemd). The script looks for an environment variable -``RBDMAPFILE``, which defaults to ``/etc/ceph/rbdmap``. This file is -expected to contain a list of RBD images and, possibly, parameters to be -passed to the underlying ``rbd`` command. The syntax of -``/etc/ceph/rbdmap`` is described in the comments at the top of that file. +**rbdmap** is a shell script that automates ``rbd map`` and ``rbd unmap`` +operations on one or more RBD (RADOS Block Device) images. While the script can be +run manually by the system administrator at any time, the principal use case is +automatic mapping/mounting of RBD images at boot time (and unmounting/unmapping +at shutdown), as triggered by the init system (a systemd unit file, +``rbdmap.service`` is included with the ceph-common package for this purpose). -The script mounts devices after mapping, and unmounts them before -unmapping. +The script takes a single argument, which can be either "map" or "unmap". +In either case, the script parses a configuration file (defaults to ``/etc/ceph/rbdmap``, +but can be overridden via an environment variable ``RBDMAPFILE``). Each line +of the configuration file corresponds to an RBD image which is to be mapped, or +unmapped. + +The configuration file format is:: + + IMAGESPEC RBDOPTS + +where ``IMAGESPEC`` should be specified as ``POOLNAME/IMAGENAME`` (the pool +name, a forward slash, and the image name), or merely ``IMAGENAME``, in which +case the ``POOLNAME`` defaults to "rbd". ``RBDOPTS`` is an optional list of +parameters to be passed to the underlying ``rbd map`` command. These parameters +and their values should be specified as a comma-separated string:: + + PARAM1=VAL1,PARAM2=VAL2,...,PARAMN=VALN + +This will cause the script to issue an ``rbd map`` command like the following:: + + rbd map POOLNAME/IMAGENAME --PARAM1 VAL1 --PARAM2 VAL2 + +(See the ``rbd`` manpage for a full list of possible options.) + +When run as ``rbdmap map``, the script parses the configuration file, and for +each RBD image specified attempts to first map the image (using the ``rbd map`` +command) and, second, to mount the image. + +When run as ``rbdmap unmap``, images listed in the configuration file will +be unmounted and unmapped. + +``rbdmap unmap-all`` attempts to unmount and subsequently unmap all currently +mapped RBD images, regardless of whether or not they're listed in the +configuration file. + +If successful, the ``rbd map`` operation maps the image to a ``/dev/rbdX`` +device, at which point a udev rule is triggered to create a friendly device +name symlink, ``/dev/rbd/POOLNAME/IMAGENAME``, pointing to the real mapped +device. + +In order for mounting/unmounting to succeed, the friendly device name must +have a corresponding entry in ``/etc/fstab``. + +When writing ``/etc/fstab`` entries for RBD images, it's a good idea to specify +the "noauto" (or "nofail") mount option. This prevents the init system from +trying to mount the device too early - before the device in question even +exists. (Since ``rbdmap.service`` +executes a shell script, it is typically triggered quite late in the boot +sequence.) + + +Examples +======== + +Example ``/etc/ceph/rbdmap`` for two RBD images called "bar1" and "bar2", both +in pool "foopool":: + + foopool/bar1 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring + foopool/bar2 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring + +Each line in the file contains two strings: the image spec and the options to +be passed to ``rbd map``. These two lines get transformed into the following +commands:: + + rbd map foopool/bar1 --id admin --keyring /etc/ceph/ceph.client.admin.keyring + rbd map foopool/bar2 --id admin --keyring /etc/ceph/ceph.client.admin.keyring + +If the images had XFS filesystems on them, the corresponding ``/etc/fstab`` +entries might look like this:: + + /dev/rbd/foopool/bar1 /mnt/bar1 xfs noauto 0 0 + /dev/rbd/foopool/bar2 /mnt/bar2 xfs noauto 0 0 + +After creating the images and populating the ``/etc/ceph/rbdmap`` file, making +the images get automatically mapped and mounted at boot is just a matter of +enabling that unit:: + + systemctl enable rbdmap.service Options diff -Nru ceph-10.2.7/doc/radosgw/adminops.rst ceph-10.2.9/doc/radosgw/adminops.rst --- ceph-10.2.7/doc/radosgw/adminops.rst 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/doc/radosgw/adminops.rst 2017-07-13 13:05:36.000000000 +0000 @@ -1834,10 +1834,11 @@ - **Maximum Size:** The ``max-size`` option allows you to specify a quota for the maximum number of bytes. A negative value disables this setting. -- **Quota Scope:** The ``quota-scope`` option sets the scope for the quota. +- **Quota Type:** The ``quota-type`` option sets the scope for the quota. The options are ``bucket`` and ``user``. - +- **Enable/Disable Quota:** The ``enabled`` option specifies whether the + quota should be enabled. The value should be either 'True' or 'False'. Get User Quota ~~~~~~~~~~~~~~ diff -Nru ceph-10.2.7/doc/radosgw/multisite.rst ceph-10.2.9/doc/radosgw/multisite.rst --- ceph-10.2.7/doc/radosgw/multisite.rst 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/doc/radosgw/multisite.rst 2017-07-13 13:05:36.000000000 +0000 @@ -281,6 +281,11 @@ } +.. note:: Please note that system users have super user privileges over the + entire zone, and will not behave like normal users for rest operations + like creating buckets, objects etc. as the output would contain + additional json fields for maintaining metadata. + Update the period ----------------- diff -Nru ceph-10.2.7/man/ceph.8 ceph-10.2.9/man/ceph.8 --- ceph-10.2.7/man/ceph.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/ceph.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph \- ceph administration tool . diff -Nru ceph-10.2.7/man/ceph-authtool.8 ceph-10.2.9/man/ceph-authtool.8 --- ceph-10.2.7/man/ceph-authtool.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-authtool.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-AUTHTOOL" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-AUTHTOOL" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-authtool \- ceph keyring manipulation tool . diff -Nru ceph-10.2.7/man/ceph-clsinfo.8 ceph-10.2.9/man/ceph-clsinfo.8 --- ceph-10.2.7/man/ceph-clsinfo.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/ceph-clsinfo.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-CLSINFO" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-CLSINFO" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-clsinfo \- show class object information . diff -Nru ceph-10.2.7/man/ceph-conf.8 ceph-10.2.9/man/ceph-conf.8 --- ceph-10.2.7/man/ceph-conf.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-conf.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-CONF" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-CONF" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-conf \- ceph conf file tool . diff -Nru ceph-10.2.7/man/ceph-create-keys.8 ceph-10.2.9/man/ceph-create-keys.8 --- ceph-10.2.7/man/ceph-create-keys.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/ceph-create-keys.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-CREATE-KEYS" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-CREATE-KEYS" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-create-keys \- ceph keyring generate tool . diff -Nru ceph-10.2.7/man/ceph-debugpack.8 ceph-10.2.9/man/ceph-debugpack.8 --- ceph-10.2.7/man/ceph-debugpack.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/ceph-debugpack.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-DEBUGPACK" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-DEBUGPACK" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-debugpack \- ceph debug packer utility . diff -Nru ceph-10.2.7/man/ceph-dencoder.8 ceph-10.2.9/man/ceph-dencoder.8 --- ceph-10.2.7/man/ceph-dencoder.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-dencoder.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-DENCODER" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-DENCODER" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-dencoder \- ceph encoder/decoder utility . diff -Nru ceph-10.2.7/man/ceph-deploy.8 ceph-10.2.9/man/ceph-deploy.8 --- ceph-10.2.7/man/ceph-deploy.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-deploy.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-DEPLOY" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-DEPLOY" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-deploy \- Ceph deployment tool . diff -Nru ceph-10.2.7/man/ceph-detect-init.8 ceph-10.2.9/man/ceph-detect-init.8 --- ceph-10.2.7/man/ceph-detect-init.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-detect-init.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-DETECT-INIT" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-DETECT-INIT" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-detect-init \- display the init system Ceph should use . diff -Nru ceph-10.2.7/man/ceph-disk.8 ceph-10.2.9/man/ceph-disk.8 --- ceph-10.2.7/man/ceph-disk.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-disk.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-DISK" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-DISK" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-disk \- Ceph disk utility for OSD . diff -Nru ceph-10.2.7/man/cephfs.8 ceph-10.2.9/man/cephfs.8 --- ceph-10.2.7/man/cephfs.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/cephfs.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPHFS" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPHFS" "8" "July 13, 2017" "dev" "Ceph" .SH NAME cephfs \- ceph file system options utility . diff -Nru ceph-10.2.7/man/ceph-fuse.8 ceph-10.2.9/man/ceph-fuse.8 --- ceph-10.2.7/man/ceph-fuse.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-fuse.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-FUSE" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-FUSE" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-fuse \- FUSE-based client for ceph . diff -Nru ceph-10.2.7/man/ceph-mds.8 ceph-10.2.9/man/ceph-mds.8 --- ceph-10.2.7/man/ceph-mds.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-mds.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-MDS" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-MDS" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-mds \- ceph metadata server daemon . diff -Nru ceph-10.2.7/man/ceph-mon.8 ceph-10.2.9/man/ceph-mon.8 --- ceph-10.2.7/man/ceph-mon.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/ceph-mon.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-MON" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-MON" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-mon \- ceph monitor daemon . diff -Nru ceph-10.2.7/man/ceph-osd.8 ceph-10.2.9/man/ceph-osd.8 --- ceph-10.2.7/man/ceph-osd.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-osd.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-OSD" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-OSD" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-osd \- ceph object storage daemon . diff -Nru ceph-10.2.7/man/ceph-post-file.8 ceph-10.2.9/man/ceph-post-file.8 --- ceph-10.2.7/man/ceph-post-file.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-post-file.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-POST-FILE" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-POST-FILE" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-post-file \- post files for ceph developers . diff -Nru ceph-10.2.7/man/ceph-rbdnamer.8 ceph-10.2.9/man/ceph-rbdnamer.8 --- ceph-10.2.7/man/ceph-rbdnamer.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-rbdnamer.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-RBDNAMER" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-RBDNAMER" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-rbdnamer \- udev helper to name RBD devices . diff -Nru ceph-10.2.7/man/ceph-rest-api.8 ceph-10.2.9/man/ceph-rest-api.8 --- ceph-10.2.7/man/ceph-rest-api.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-rest-api.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-REST-API" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-REST-API" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-rest-api \- ceph RESTlike administration server . diff -Nru ceph-10.2.7/man/ceph-run.8 ceph-10.2.9/man/ceph-run.8 --- ceph-10.2.7/man/ceph-run.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-run.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-RUN" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-RUN" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-run \- restart daemon on core dump . diff -Nru ceph-10.2.7/man/ceph-syn.8 ceph-10.2.9/man/ceph-syn.8 --- ceph-10.2.7/man/ceph-syn.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/ceph-syn.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH-SYN" "8" "April 10, 2017" "dev" "Ceph" +.TH "CEPH-SYN" "8" "July 13, 2017" "dev" "Ceph" .SH NAME ceph-syn \- ceph synthetic workload generator . diff -Nru ceph-10.2.7/man/crushtool.8 ceph-10.2.9/man/crushtool.8 --- ceph-10.2.7/man/crushtool.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/crushtool.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "CRUSHTOOL" "8" "April 10, 2017" "dev" "Ceph" +.TH "CRUSHTOOL" "8" "July 13, 2017" "dev" "Ceph" .SH NAME crushtool \- CRUSH map manipulation tool . @@ -37,20 +37,23 @@ .fi .sp .SH DESCRIPTION -.INDENT 0.0 -.TP -.B \fBcrushtool\fP is a utility that lets you create, compile, decompile +.sp +\fBcrushtool\fP is a utility that lets you create, compile, decompile and test CRUSH map files. -.UNINDENT .sp CRUSH is a pseudo\-random data distribution algorithm that efficiently -maps input values (typically data objects) across a heterogeneous, -hierarchically structured device map. The algorithm was originally -described in detail in the following paper (although it has evolved -some since then): +maps input values (which, in the context of Ceph, correspond to Placement +Groups) across a heterogeneous, hierarchically structured device map. +The algorithm was originally described in detail in the following paper +(although it has evolved some since then): .INDENT 0.0 .INDENT 3.5 -\fI\%http://www.ssrc.ucsc.edu/Papers/weil\-sc06.pdf\fP +.sp +.nf +.ft C +http://www.ssrc.ucsc.edu/Papers/weil\-sc06.pdf +.ft P +.fi .UNINDENT .UNINDENT .sp @@ -76,7 +79,9 @@ .TP .B \-\-test will perform a dry run of a CRUSH mapping for a range of input -object names. See below for a detailed explanation. +values \fB[\-\-min\-x,\-\-max\-x]\fP (default \fB[0,1023]\fP) which can be +thought of as simulated Placement Groups. See below for a more +detailed explanation. .UNINDENT .sp Unlike other Ceph tools, \fBcrushtool\fP does not accept generic options @@ -96,31 +101,24 @@ .SH RUNNING TESTS WITH --TEST .sp The test mode will use the input crush map ( as specified with \fB\-i -map\fP ) and perform a dry run of CRUSH mapping or random placement ( -if \fB\-\-simulate\fP is set ). On completion, two kinds of reports can be +map\fP ) and perform a dry run of CRUSH mapping or random placement +(if \fB\-\-simulate\fP is set ). On completion, two kinds of reports can be created. 1) The \fB\-\-show\-...\fP option outputs human readable information on stderr. 2) The \fB\-\-output\-csv\fP option creates CSV files that are documented by the \fB\-\-help\-output\fP option. +.sp +Note: Each Placement Group (PG) has an integer ID which can be obtained +from \fBceph pg dump\fP (for example PG 2.2f means pool id 2, PG id 32). +The pool and PG IDs are combined by a function to get a value which is +given to CRUSH to map it to OSDs. crushtool does not know about PGs or +pools; it only runs simulations by mapping values in the range +\fB[\-\-min\-x,\-\-max\-x]\fP\&. .INDENT 0.0 .TP .B \-\-show\-statistics -For each rule, displays the mapping of each object. For instance: -.INDENT 7.0 -.INDENT 3.5 -.sp -.nf -.ft C -CRUSH rule 1 x 24 [11,6] -.ft P -.fi -.UNINDENT -.UNINDENT -.sp -shows that object \fB24\fP is mapped to devices \fB[11,6]\fP by rule -\fB1\fP\&. At the end of the mapping details, a summary of the -distribution is displayed. For instance: +Displays a summary of the distribution. For instance: .INDENT 7.0 .INDENT 3.5 .sp @@ -133,7 +131,7 @@ .UNINDENT .sp shows that rule \fB1\fP which is named \fBmetadata\fP successfully -mapped \fB1024\fP objects to \fBresult size == 5\fP devices when trying +mapped \fB1024\fP values to \fBresult size == 5\fP devices when trying to map them to \fBnum_rep 5\fP replicas. When it fails to provide the required mapping, presumably because the number of \fBtries\fP must be increased, a breakdown of the failures is displayed. For instance: @@ -151,13 +149,32 @@ .UNINDENT .sp shows that although \fBnum_rep 10\fP replicas were required, \fB4\fP -out of \fB1024\fP objects ( \fB4/1024\fP ) were mapped to \fBresult size +out of \fB1024\fP values ( \fB4/1024\fP ) were mapped to \fBresult size == 8\fP devices only. .UNINDENT .INDENT 0.0 .TP +.B \-\-show\-mappings +Displays the mapping of each value in the range \fB[\-\-min\-x,\-\-max\-x]\fP\&. +For instance: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +CRUSH rule 1 x 24 [11,6] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +shows that value \fB24\fP is mapped to devices \fB[11,6]\fP by rule +\fB1\fP\&. +.UNINDENT +.INDENT 0.0 +.TP .B \-\-show\-bad\-mappings -Displays which object failed to be mapped to the required number of +Displays which value failed to be mapped to the required number of devices. For instance: .INDENT 7.0 .INDENT 3.5 @@ -191,7 +208,7 @@ .UNINDENT .UNINDENT .sp -shows that device \fB0\fP stored \fB951\fP objects and was expected to store \fB853\fP\&. +shows that device \fB0\fP stored \fB951\fP values and was expected to store \fB853\fP\&. Implies \fB\-\-show\-statistics\fP\&. .UNINDENT .INDENT 0.0 @@ -390,6 +407,10 @@ .fi .UNINDENT .UNINDENT +.SH EXAMPLE OUTPUT FROM --TEST +.sp +See \fI\%https://github.com/ceph/ceph/blob/master/src/test/cli/crushtool/set\-choose.t\fP +for sample \fBcrushtool \-\-test\fP commands and output produced thereby. .SH AVAILABILITY .sp \fBcrushtool\fP is part of Ceph, a massively scalable, open\-source, distributed storage system. Please diff -Nru ceph-10.2.7/man/librados-config.8 ceph-10.2.9/man/librados-config.8 --- ceph-10.2.7/man/librados-config.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/librados-config.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "LIBRADOS-CONFIG" "8" "April 10, 2017" "dev" "Ceph" +.TH "LIBRADOS-CONFIG" "8" "July 13, 2017" "dev" "Ceph" .SH NAME librados-config \- display information about librados . diff -Nru ceph-10.2.7/man/monmaptool.8 ceph-10.2.9/man/monmaptool.8 --- ceph-10.2.7/man/monmaptool.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/monmaptool.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "MONMAPTOOL" "8" "April 10, 2017" "dev" "Ceph" +.TH "MONMAPTOOL" "8" "July 13, 2017" "dev" "Ceph" .SH NAME monmaptool \- ceph monitor cluster map manipulation tool . diff -Nru ceph-10.2.7/man/mount.ceph.8 ceph-10.2.9/man/mount.ceph.8 --- ceph-10.2.7/man/mount.ceph.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/mount.ceph.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "MOUNT.CEPH" "8" "April 10, 2017" "dev" "Ceph" +.TH "MOUNT.CEPH" "8" "July 13, 2017" "dev" "Ceph" .SH NAME mount.ceph \- mount a ceph file system . diff -Nru ceph-10.2.7/man/osdmaptool.8 ceph-10.2.9/man/osdmaptool.8 --- ceph-10.2.7/man/osdmaptool.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/osdmaptool.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "OSDMAPTOOL" "8" "April 10, 2017" "dev" "Ceph" +.TH "OSDMAPTOOL" "8" "July 13, 2017" "dev" "Ceph" .SH NAME osdmaptool \- ceph osd cluster map manipulation tool . diff -Nru ceph-10.2.7/man/rados.8 ceph-10.2.9/man/rados.8 --- ceph-10.2.7/man/rados.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rados.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RADOS" "8" "April 10, 2017" "dev" "Ceph" +.TH "RADOS" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rados \- rados object storage utility . diff -Nru ceph-10.2.7/man/radosgw.8 ceph-10.2.9/man/radosgw.8 --- ceph-10.2.7/man/radosgw.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/radosgw.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RADOSGW" "8" "April 10, 2017" "dev" "Ceph" +.TH "RADOSGW" "8" "July 13, 2017" "dev" "Ceph" .SH NAME radosgw \- rados REST gateway . diff -Nru ceph-10.2.7/man/radosgw-admin.8 ceph-10.2.9/man/radosgw-admin.8 --- ceph-10.2.7/man/radosgw-admin.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/radosgw-admin.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RADOSGW-ADMIN" "8" "April 10, 2017" "dev" "Ceph" +.TH "RADOSGW-ADMIN" "8" "July 13, 2017" "dev" "Ceph" .SH NAME radosgw-admin \- rados REST gateway user administration utility . diff -Nru ceph-10.2.7/man/rbd.8 ceph-10.2.9/man/rbd.8 --- ceph-10.2.7/man/rbd.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rbd.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd \- manage rados block device (RBD) images . diff -Nru ceph-10.2.7/man/rbd-fuse.8 ceph-10.2.9/man/rbd-fuse.8 --- ceph-10.2.7/man/rbd-fuse.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rbd-fuse.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-FUSE" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-FUSE" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-fuse \- expose rbd images as files . diff -Nru ceph-10.2.7/man/rbdmap.8 ceph-10.2.9/man/rbdmap.8 --- ceph-10.2.7/man/rbdmap.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rbdmap.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBDMAP" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBDMAP" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbdmap \- map RBD devices at boot time . @@ -38,16 +38,144 @@ .sp .SH DESCRIPTION .sp -\fBrbdmap\fP is a shell script that can be run manually by the system -administrator at any time, or automatically at boot time by the init system -(sysvinit, upstart, systemd). The script looks for an environment variable -\fBRBDMAPFILE\fP, which defaults to \fB/etc/ceph/rbdmap\fP\&. This file is -expected to contain a list of RBD images and, possibly, parameters to be -passed to the underlying \fBrbd\fP command. The syntax of -\fB/etc/ceph/rbdmap\fP is described in the comments at the top of that file. +\fBrbdmap\fP is a shell script that automates \fBrbd map\fP and \fBrbd unmap\fP +operations on one or more RBD (RADOS Block Device) images. While the script can be +run manually by the system administrator at any time, the principal use case is +automatic mapping/mounting of RBD images at boot time (and unmounting/unmapping +at shutdown), as triggered by the init system (a systemd unit file, +\fBrbdmap.service\fP is included with the ceph\-common package for this purpose). .sp -The script mounts devices after mapping, and unmounts them before -unmapping. +The script takes a single argument, which can be either "map" or "unmap". +In either case, the script parses a configuration file (defaults to \fB/etc/ceph/rbdmap\fP, +but can be overridden via an environment variable \fBRBDMAPFILE\fP). Each line +of the configuration file corresponds to an RBD image which is to be mapped, or +unmapped. +.sp +The configuration file format is: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +IMAGESPEC RBDOPTS +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where \fBIMAGESPEC\fP should be specified as \fBPOOLNAME/IMAGENAME\fP (the pool +name, a forward slash, and the image name), or merely \fBIMAGENAME\fP, in which +case the \fBPOOLNAME\fP defaults to "rbd". \fBRBDOPTS\fP is an optional list of +parameters to be passed to the underlying \fBrbd map\fP command. These parameters +and their values should be specified as a comma\-separated string: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +PARAM1=VAL1,PARAM2=VAL2,...,PARAMN=VALN +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This will cause the script to issue an \fBrbd map\fP command like the following: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +rbd map POOLNAME/IMAGENAME \-\-PARAM1 VAL1 \-\-PARAM2 VAL2 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +(See the \fBrbd\fP manpage for a full list of possible options.) +.sp +When run as \fBrbdmap map\fP, the script parses the configuration file, and for +each RBD image specified attempts to first map the image (using the \fBrbd map\fP +command) and, second, to mount the image. +.sp +When run as \fBrbdmap unmap\fP, images listed in the configuration file will +be unmounted and unmapped. +.sp +\fBrbdmap unmap\-all\fP attempts to unmount and subsequently unmap all currently +mapped RBD images, regardless of whether or not they\(aqre listed in the +configuration file. +.sp +If successful, the \fBrbd map\fP operation maps the image to a \fB/dev/rbdX\fP +device, at which point a udev rule is triggered to create a friendly device +name symlink, \fB/dev/rbd/POOLNAME/IMAGENAME\fP, pointing to the real mapped +device. +.sp +In order for mounting/unmounting to succeed, the friendly device name must +have a corresponding entry in \fB/etc/fstab\fP\&. +.sp +When writing \fB/etc/fstab\fP entries for RBD images, it\(aqs a good idea to specify +the "noauto" (or "nofail") mount option. This prevents the init system from +trying to mount the device too early \- before the device in question even +exists. (Since \fBrbdmap.service\fP +executes a shell script, it is typically triggered quite late in the boot +sequence.) +.SH EXAMPLES +.sp +Example \fB/etc/ceph/rbdmap\fP for two RBD images called "bar1" and "bar2", both +in pool "foopool": +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +foopool/bar1 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring +foopool/bar2 id=admin,keyring=/etc/ceph/ceph.client.admin.keyring +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Each line in the file contains two strings: the image spec and the options to +be passed to \fBrbd map\fP\&. These two lines get transformed into the following +commands: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +rbd map foopool/bar1 \-\-id admin \-\-keyring /etc/ceph/ceph.client.admin.keyring +rbd map foopool/bar2 \-\-id admin \-\-keyring /etc/ceph/ceph.client.admin.keyring +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If the images had XFS filesystems on them, the corresponding \fB/etc/fstab\fP +entries might look like this: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +/dev/rbd/foopool/bar1 /mnt/bar1 xfs noauto 0 0 +/dev/rbd/foopool/bar2 /mnt/bar2 xfs noauto 0 0 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +After creating the images and populating the \fB/etc/ceph/rbdmap\fP file, making +the images get automatically mapped and mounted at boot is just a matter of +enabling that unit: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +systemctl enable rbdmap.service +.ft P +.fi +.UNINDENT +.UNINDENT .SH OPTIONS .sp None diff -Nru ceph-10.2.7/man/rbd-mirror.8 ceph-10.2.9/man/rbd-mirror.8 --- ceph-10.2.7/man/rbd-mirror.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/rbd-mirror.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-MIRROR" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-MIRROR" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-mirror \- Ceph daemon for mirroring RBD images . diff -Nru ceph-10.2.7/man/rbd-nbd.8 ceph-10.2.9/man/rbd-nbd.8 --- ceph-10.2.7/man/rbd-nbd.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/rbd-nbd.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-NBD" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-NBD" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-nbd \- map rbd images to nbd device . diff -Nru ceph-10.2.7/man/rbd-replay.8 ceph-10.2.9/man/rbd-replay.8 --- ceph-10.2.7/man/rbd-replay.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rbd-replay.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-REPLAY" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-REPLAY" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-replay \- replay rados block device (RBD) workloads . diff -Nru ceph-10.2.7/man/rbd-replay-many.8 ceph-10.2.9/man/rbd-replay-many.8 --- ceph-10.2.7/man/rbd-replay-many.8 2017-04-10 11:45:56.000000000 +0000 +++ ceph-10.2.9/man/rbd-replay-many.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-REPLAY-MANY" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-REPLAY-MANY" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-replay-many \- replay a rados block device (RBD) workload on several clients . diff -Nru ceph-10.2.7/man/rbd-replay-prep.8 ceph-10.2.9/man/rbd-replay-prep.8 --- ceph-10.2.7/man/rbd-replay-prep.8 2017-04-10 11:45:55.000000000 +0000 +++ ceph-10.2.9/man/rbd-replay-prep.8 2017-07-13 13:07:10.000000000 +0000 @@ -1,6 +1,6 @@ .\" Man page generated from reStructuredText. . -.TH "RBD-REPLAY-PREP" "8" "April 10, 2017" "dev" "Ceph" +.TH "RBD-REPLAY-PREP" "8" "July 13, 2017" "dev" "Ceph" .SH NAME rbd-replay-prep \- prepare captured rados block device (RBD) workloads for replay . diff -Nru ceph-10.2.7/qa/suites/dummy/all/nop.yaml ceph-10.2.9/qa/suites/dummy/all/nop.yaml --- ceph-10.2.7/qa/suites/dummy/all/nop.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/dummy/all/nop.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -1,6 +1,3 @@ -overrides: - ansible.cephlab: - playbook: users.yml roles: - [mon.a, mds.a, osd.0, osd.1, client.0] diff -Nru ceph-10.2.7/qa/suites/fs/recovery/tasks/quota.yaml ceph-10.2.9/qa/suites/fs/recovery/tasks/quota.yaml --- ceph-10.2.7/qa/suites/fs/recovery/tasks/quota.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/fs/recovery/tasks/quota.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,10 @@ + +tasks: + - cephfs_test_runner: + modules: + - tasks.cephfs.test_quota +overrides: + ceph: + conf: + client: + client quota: true diff -Nru ceph-10.2.7/qa/suites/rados/singleton/all/ec-lost-unfound-upgrade.yaml ceph-10.2.9/qa/suites/rados/singleton/all/ec-lost-unfound-upgrade.yaml --- ceph-10.2.7/qa/suites/rados/singleton/all/ec-lost-unfound-upgrade.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/singleton/all/ec-lost-unfound-upgrade.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -26,5 +26,7 @@ - print: "upgraded mon.a and friends" - ceph.restart: daemons: [mon.a, mon.b, mon.c, osd.0, osd.1, osd.2] +- sleep: + duration: 20 # http://tracker.ceph.com/issues/16239 - ec_lost_unfound: parallel_bench: false diff -Nru ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/11429.yaml ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/11429.yaml --- ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/11429.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/11429.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -119,6 +119,7 @@ osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 - ceph osd set require_jewel_osds + - ceph osd set sortbitwise - ceph.healthy: - ceph_manager.wait_for_clean: null - ceph.restart: [osd.0, osd.1, osd.2] diff -Nru ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/16113.yaml ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/16113.yaml --- ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/16113.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/16113.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -68,6 +68,7 @@ osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 - ceph osd set require_jewel_osds + - ceph osd set sortbitwise - ceph.healthy: - sleep: duration: 10 diff -Nru ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/lfn-upgrade-hammer.yaml ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/lfn-upgrade-hammer.yaml --- ceph-10.2.7/qa/suites/rados/singleton-nomsgr/all/lfn-upgrade-hammer.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/singleton-nomsgr/all/lfn-upgrade-hammer.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -91,6 +91,7 @@ - exec: mon.a: - sleep 60 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - ceph_manager.wait_for_clean: null - ceph_manager.do_pg_scrub: diff -Nru ceph-10.2.7/qa/suites/rados/thrash/thrashers/default.yaml ceph-10.2.9/qa/suites/rados/thrash/thrashers/default.yaml --- ceph-10.2.7/qa/suites/rados/thrash/thrashers/default.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/thrash/thrashers/default.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -10,6 +10,8 @@ osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff -Nru ceph-10.2.7/qa/suites/rados/thrash/thrashers/pggrow.yaml ceph-10.2.9/qa/suites/rados/thrash/thrashers/pggrow.yaml --- ceph-10.2.7/qa/suites/rados/thrash/thrashers/pggrow.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/thrash/thrashers/pggrow.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -9,6 +9,8 @@ osd scrub min interval: 60 osd scrub max interval: 120 filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 - thrashosds: timeout: 1200 chance_pgnum_grow: 2 diff -Nru ceph-10.2.7/qa/suites/rados/thrash-erasure-code-isa/thrashers/default.yaml ceph-10.2.9/qa/suites/rados/thrash-erasure-code-isa/thrashers/default.yaml --- ceph-10.2.7/qa/suites/rados/thrash-erasure-code-isa/thrashers/default.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/thrash-erasure-code-isa/thrashers/default.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -10,6 +10,8 @@ osd max backfills: 1 osd scrub min interval: 60 osd scrub max interval: 120 + osd max backfills: 3 + osd snap trim sleep: 2 - thrashosds: timeout: 1200 chance_pgnum_grow: 1 diff -Nru ceph-10.2.7/qa/suites/rados/thrash-erasure-code-isa/thrashers/pggrow.yaml ceph-10.2.9/qa/suites/rados/thrash-erasure-code-isa/thrashers/pggrow.yaml --- ceph-10.2.7/qa/suites/rados/thrash-erasure-code-isa/thrashers/pggrow.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/rados/thrash-erasure-code-isa/thrashers/pggrow.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -9,6 +9,8 @@ osd scrub min interval: 60 osd scrub max interval: 120 filestore odsync write: true + osd max backfills: 2 + osd snap trim sleep: .5 - thrashosds: timeout: 1200 chance_pgnum_grow: 2 diff -Nru ceph-10.2.7/qa/suites/smoke/systemd/distro/centos.yaml ceph-10.2.9/qa/suites/smoke/systemd/distro/centos.yaml --- ceph-10.2.7/qa/suites/smoke/systemd/distro/centos.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/smoke/systemd/distro/centos.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos_7.2.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos_7.2.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos_7.2.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos_7.2.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.2" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/firefly-client-x/basic/distros/centos.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos_7.2.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos_7.2.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos_7.2.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos_7.2.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.2" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/hammer-client-x/basic/distros/centos.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/2-workload/rbd_api_tests.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/2-workload/rbd_api_tests.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/2-workload/rbd_api_tests.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/2-workload/rbd_api_tests.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -10,7 +10,7 @@ client.0: - "cp --force $TESTDIR/ceph_test_librbd_api $(which ceph_test_librbd_api)" - "rm -rf $TESTDIR/ceph_test_librbd_api" -- print: "**** done reverting to hammer ceph_test_librbd_api" +- print: "**** done reverting to infernalis ceph_test_librbd_api" - workunit: branch: infernalis clients: diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos_7.2.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos_7.2.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos_7.2.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos_7.2.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.2" diff -Nru ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos.yaml ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos.yaml --- ceph-10.2.7/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/client-upgrade/infernalis-client-x/basic/distros/centos.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-jewel-x/parallel/distros/centos.yaml ceph-10.2.9/qa/suites/upgrade/hammer-jewel-x/parallel/distros/centos.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-jewel-x/parallel/distros/centos.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-jewel-x/parallel/distros/centos.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-jewel-x/stress-split/distros/centos.yaml ceph-10.2.9/qa/suites/upgrade/hammer-jewel-x/stress-split/distros/centos.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-jewel-x/stress-split/distros/centos.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-jewel-x/stress-split/distros/centos.yaml 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -os_type: centos -os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-all.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-all.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-all.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-all.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -12,6 +12,7 @@ - exec: mon.a: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - ceph.healthy: - print: "**** done ceph.healthy" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-osd-mds-mon.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-osd-mds-mon.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-osd-mds-mon.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/parallel/3-upgrade-sequence/upgrade-osd-mds-mon.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -32,6 +32,7 @@ - exec: osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - ceph.healthy: - sleep: diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split/1-hammer-install/hammer.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split/1-hammer-install/hammer.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split/1-hammer-install/hammer.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split/1-hammer-install/hammer.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,8 @@ tasks: - install: branch: hammer + exclude_packages: [ 'libcephfs-java', 'libcephfs-jni' ] + extra_packages: [ 'libradosstriper1', 'python-rados', 'python-cephfs', 'python-rbd' ] - print: "**** done install hammer" - ceph: fs: xfs diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split/8-finish-upgrade/last-osds-and-monc.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split/8-finish-upgrade/last-osds-and-monc.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split/8-finish-upgrade/last-osds-and-monc.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split/8-finish-upgrade/last-osds-and-monc.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -16,5 +16,6 @@ - exec: osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - print: "**** done wait_for_mon_quorum 8-next-mon" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code/1-hammer-install/hammer.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code/1-hammer-install/hammer.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code/1-hammer-install/hammer.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code/1-hammer-install/hammer.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,8 @@ tasks: - install: branch: hammer + exclude_packages: [ 'libcephfs-java', 'libcephfs-jni' ] + extra_packages: [ 'libradosstriper1', 'python-rados', 'python-cephfs', 'python-rbd' ] - print: "**** done install hammer" - ceph: fs: xfs diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code/8-finish-upgrade/last-osds-and-monc.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code/8-finish-upgrade/last-osds-and-monc.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code/8-finish-upgrade/last-osds-and-monc.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code/8-finish-upgrade/last-osds-and-monc.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -16,5 +16,6 @@ - exec: osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - print: "**** done wait_for_mon_quorum 8-next-mon" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/1-hammer-install/hammer.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/1-hammer-install/hammer.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/1-hammer-install/hammer.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/1-hammer-install/hammer.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,8 @@ tasks: - install: branch: hammer + exclude_packages: [ 'libcephfs-java', 'libcephfs-jni' ] + extra_packages: [ 'libradosstriper1', 'python-rados', 'python-cephfs', 'python-rbd' ] - print: "**** done install hammer" - ceph: fs: xfs diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/8-finish-upgrade/last-osds-and-monc.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/8-finish-upgrade/last-osds-and-monc.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/8-finish-upgrade/last-osds-and-monc.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/stress-split-erasure-code-x86_64/8-finish-upgrade/last-osds-and-monc.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -16,5 +16,6 @@ - exec: osd.0: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - print: "**** done wait_for_mon_quorum 8-next-mon" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/tiering/3-upgrade/upgrade.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/tiering/3-upgrade/upgrade.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/tiering/3-upgrade/upgrade.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/tiering/3-upgrade/upgrade.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -77,6 +77,7 @@ duration: 60 - exec: mon.a: + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - ceph.healthy: - print: "**** HEALTH_OK reached after upgrading last OSD to jewel" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-4-stop/v0-94-4-stop.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-4-stop/v0-94-4-stop.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-4-stop/v0-94-4-stop.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-4-stop/v0-94-4-stop.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -106,5 +106,6 @@ - exec: mon.a: - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise - ceph osd set require_jewel_osds - ceph.healthy: diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/centos_7.3.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/centos_7.3.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/centos_7.3.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/centos_7.3.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,2 @@ +os_type: centos +os_version: "7.3" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/ubuntu_14.04.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/ubuntu_14.04.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/ubuntu_14.04.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/distros/ubuntu_14.04.yaml 2017-07-13 13:05:36.000000000 +0000 @@ -0,0 +1,2 @@ +os_type: ubuntu +os_version: "14.04" diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/ignore.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/ignore.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/ignore.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/ignore.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -0,0 +1,8 @@ +overrides: + ceph: + log-whitelist: + - scrub mismatch + - ScrubResult + conf: + mon: + mon warn on legacy crush tunables: false diff -Nru ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/v0-94-6-mon-overload.yaml ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/v0-94-6-mon-overload.yaml --- ceph-10.2.7/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/v0-94-6-mon-overload.yaml 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/hammer-x/v0-94-6-mon-overload/v0-94-6-mon-overload.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -0,0 +1,65 @@ +# +# Test the expected behavior of the code that ensures proper encoding +# when +# +# CEPH_FEATURE_OSD_HITSET_GMT +# +# feature is not active (prevents mon overload on upgrade to jewel) +# +# for details, see http://tracker.ceph.com/issues/19508 +# +roles: +- - mon.a + - osd.0 + - osd.1 +- - osd.2 +openstack: +- volumes: # attached to each instance + count: 2 + size: 10 # GB +tasks: +- install: + tag: v0.94.6 + exclude_packages: [ 'libcephfs-java', 'libcephfs-jni' ] + extra_packages: [ 'libradosstriper1', 'python-rados', 'python-cephfs', 'python-rbd' ] +- ceph: + fs: xfs +- install.upgrade: + osd.0: +- ceph.restart: + daemons: [osd.0] + wait-for-osds-up: false + wait-for-healthy: true +- exec: + osd.0: + - |- + set -ex + ceph daemon osd.0 log flush + ! grep -E "failed to encode map e[0-9]+ with expected crc" /var/log/ceph/ceph-osd.0.log + +- install.upgrade: + osd.2: +- ceph.restart: + daemons: [mon.a, osd.1, osd.2] + wait-for-healthy: false +- exec: + mon.a: + - sleep 300 # http://tracker.ceph.com/issues/17808 + - ceph osd set sortbitwise + - ceph osd set require_jewel_osds +- ceph.healthy: +- exec: + mon.a: + - |- + set -ex + ceph daemon mon.a log flush + ceph daemon osd.0 log flush + ceph daemon osd.1 log flush + ! grep -E "failed to encode map e[0-9]+ with expected crc" /var/log/ceph/ceph*.log + + osd.2: + - |- + set -ex + ceph daemon osd.2 log flush + ! grep -E "failed to encode map e[0-9]+ with expected crc" /var/log/ceph/ceph*.log + diff -Nru ceph-10.2.7/qa/suites/upgrade/jewel-x/point-to-point-x/point-to-point-upgrade.yaml ceph-10.2.9/qa/suites/upgrade/jewel-x/point-to-point-x/point-to-point-upgrade.yaml --- ceph-10.2.7/qa/suites/upgrade/jewel-x/point-to-point-x/point-to-point-upgrade.yaml 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/suites/upgrade/jewel-x/point-to-point-x/point-to-point-upgrade.yaml 2017-07-13 13:05:37.000000000 +0000 @@ -5,9 +5,9 @@ Use xfs beneath the osds. install ceph/jewel v10.2.0 point version run workload and upgrade-sequence in parallel - install ceph/jewel latest version + install ceph/jewel v10.0.7 version run workload and upgrade-sequence in parallel - install ceph/-x version (jewel) + install ceph/-x latest version (jewel) run workload and upgrade-sequence in parallel overrides: ceph: @@ -53,10 +53,12 @@ - print: "**** done workload v10.2.0" - install.upgrade: mon.a: - branch: jewel + # this has to change to v10.2.8 for the next point release + tag: v10.2.7 exclude_packages: ['ceph-mgr'] mon.b: - branch: jewel + # this has to change to v10.2.8 for the next point release + tag: v10.2.7 exclude_packages: ['ceph-mgr'] # Note that client.a IS NOT upgraded at this point #client.1: @@ -64,12 +66,13 @@ - parallel: - workload_jewel - upgrade-sequence_jewel -- print: "**** done parallel jewel branch" +- print: "**** done parallel jewel v10.2.0 + v10.2.7 branch" - install.upgrade: client.1: - branch: jewel + # this has to change to v10.2.8 for the next point release + tag: v10.2.7 exclude_packages: ['ceph-mgr'] -- print: "**** done branch: jewel install.upgrade on client.1" +- print: "**** done branch: jewel v10.2.7 install.upgrade on client.1" - install.upgrade: mon.a: mon.b: @@ -147,6 +150,15 @@ - ceph.restart: [mon.c] - sleep: duration: 60 + - ceph.restart: + daemons: [mon.c] + wait-for-healthy: false + wait-for-up-osds: true + - exec: + mon.a: + - ceph osd set require_jewel_osds + - sleep: + duration: 60 - print: "**** done ceph.restart all jewel branch mds/osd/mon" workload_x: sequential: @@ -202,13 +214,7 @@ - ceph.restart: [osd.4] - sleep: duration: 30 - - ceph.restart: - daemons: [osd.5] - wait-for-healthy: false - wait-for-up-osds: true - - exec: - mon.a: - - ceph osd set require_jewel_osds + - ceph.restart: [osd.5] - sleep: - duration: 60 + duration: 30 - print: "**** done ceph.restart all -x branch mds/osd/mon" diff -Nru ceph-10.2.7/qa/tasks/cephfs/mount.py ceph-10.2.9/qa/tasks/cephfs/mount.py --- ceph-10.2.7/qa/tasks/cephfs/mount.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/mount.py 2017-07-13 13:05:37.000000000 +0000 @@ -157,7 +157,10 @@ def open_background(self, basename="background_file"): """ Open a file for writing, then block such that the client - will hold a capability + will hold a capability. + + Don't return until the remote process has got as far as opening + the file, then return the RemoteProcess instance. """ assert(self.is_mounted()) @@ -176,6 +179,12 @@ rproc = self._run_python(pyscript) self.background_procs.append(rproc) + + # This wait would not be sufficient if the file had already + # existed, but it's simple and in practice users of open_background + # are not using it on existing files. + self.wait_for_visible(basename) + return rproc def wait_for_visible(self, basename="background_file", timeout=30): @@ -559,13 +568,33 @@ # gives you [''] instead of [] return [] + def setfattr(self, path, key, val): + """ + Wrap setfattr. + + :param path: relative to mount point + :param key: xattr name + :param val: xattr value + :return: None + """ + self.run_shell(["setfattr", "-n", key, "-v", val, path]) + def getfattr(self, path, attr): """ - Wrap getfattr: return the values of a named xattr on one file. + Wrap getfattr: return the values of a named xattr on one file, or + None if the attribute is not found. :return: a string """ - p = self.run_shell(["getfattr", "--only-values", "-n", attr, path]) + p = self.run_shell(["getfattr", "--only-values", "-n", attr, path], wait=False) + try: + p.wait() + except CommandFailedError as e: + if e.exitstatus == 1 and "No such attribute" in p.stderr.getvalue(): + return None + else: + raise + return p.stdout.getvalue() def df(self): diff -Nru ceph-10.2.7/qa/tasks/cephfs/test_backtrace.py ceph-10.2.9/qa/tasks/cephfs/test_backtrace.py --- ceph-10.2.7/qa/tasks/cephfs/test_backtrace.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/test_backtrace.py 2017-07-13 13:05:37.000000000 +0000 @@ -50,7 +50,8 @@ new_pool_id = get_pool_id(new_pool_name) # That an object which has switched pools gets its backtrace updated - self.mount_a.run_shell(["setfattr", "-n", "ceph.file.layout.pool", "-v", new_pool_name, "./parent_b/alpha"]) + self.mount_a.setfattr("./parent_b/alpha", + "ceph.file.layout.pool", new_pool_name) self.fs.mds_asok(["flush", "journal"]) backtrace_old_pool = self.fs.read_backtrace(file_ino, pool=old_data_pool_name) self.assertEqual(backtrace_old_pool['pool'], new_pool_id) @@ -70,7 +71,8 @@ self.assertEqual(['alpha', 'parent_c'], [a['dname'] for a in backtrace_new_pool['ancestors']]) # That layout is written to new pool after change to other field in layout - self.mount_a.run_shell(["setfattr", "-n", "ceph.file.layout.object_size", "-v", "8388608", "./parent_c/alpha"]) + self.mount_a.setfattr("./parent_c/alpha", + "ceph.file.layout.object_size", "8388608") self.fs.mds_asok(["flush", "journal"]) new_pool_layout = self.fs.read_layout(file_ino, pool=new_pool_name) diff -Nru ceph-10.2.7/qa/tasks/cephfs/test_data_scan.py ceph-10.2.9/qa/tasks/cephfs/test_data_scan.py --- ceph-10.2.7/qa/tasks/cephfs/test_data_scan.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/test_data_scan.py 2017-07-13 13:05:37.000000000 +0000 @@ -157,13 +157,11 @@ # Create a dir with a striped layout set on it self._mount.run_shell(["mkdir", "stripey"]) - self._mount.run_shell([ - "setfattr", "-n", "ceph.dir.layout", "-v", - "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format( - ss=self.ss, os=self.os, sc=self.sc, - pool=self._filesystem.get_data_pool_name() - ), - "./stripey"]) + self._mount.setfattr("./stripey", "ceph.dir.layout", + "stripe_unit={ss} stripe_count={sc} object_size={os} pool={pool}".format( + ss=self.ss, os=self.os, sc=self.sc, + pool=self._filesystem.get_data_pool_name() + )) # Write files, then flush metadata so that its layout gets written into an xattr for i, n_bytes in enumerate(self.interesting_sizes): @@ -288,15 +286,14 @@ """ def write(self): self._mount.run_shell(["touch", "datafile"]) - self._mount.run_shell(["setfattr", "-n", "ceph.file.layout.object_size", "-v", "8388608", "./datafile"]) + self._mount.setfattr("./datafile", "ceph.file.layout.object_size", "8388608") self._mount.run_shell(["dd", "if=/dev/urandom", "of=./datafile", "bs=1M", "count=32"]) self._initial_state = self._mount.stat("datafile") def validate(self): - p = self._mount.run_shell(["getfattr", "--only-values", "-n", "ceph.file.layout.object_size", "./datafile"]) - # Check we got the layout reconstructed properly - object_size = int(p.stdout.getvalue().strip()) + object_size = int(self._mount.getfattr( + "./datafile", "ceph.file.layout.object_size")) self.assert_equal(object_size, 8388608) # Check we got the file size reconstructed properly diff -Nru ceph-10.2.7/qa/tasks/cephfs/test_pool_perm.py ceph-10.2.9/qa/tasks/cephfs/test_pool_perm.py --- ceph-10.2.7/qa/tasks/cephfs/test_pool_perm.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/test_pool_perm.py 2017-07-13 13:05:37.000000000 +0000 @@ -80,15 +80,14 @@ self.mount_a.umount_wait() self.mount_a.mount() + self.mount_a.wait_until_mounted() with self.assertRaises(CommandFailedError): - self.mount_a.run_shell(["setfattr", - "-n", "ceph.file.layout.pool", - "-v", new_pool_name, "layoutfile"]) + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) with self.assertRaises(CommandFailedError): - self.mount_a.run_shell(["setfattr", - "-n", "ceph.dir.layout.pool", - "-v", new_pool_name, "layoutdir"]) + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) self.mount_a.umount_wait() # Set MDS 'rwp' perms: should now be able to set layouts @@ -100,12 +99,11 @@ self.fs.get_data_pool_names()[1], )) self.mount_a.mount() - self.mount_a.run_shell(["setfattr", - "-n", "ceph.file.layout.pool", - "-v", new_pool_name, "layoutfile"]) - self.mount_a.run_shell(["setfattr", - "-n", "ceph.dir.layout.pool", - "-v", new_pool_name, "layoutdir"]) + self.mount_a.wait_until_mounted() + self.mount_a.setfattr("layoutfile", "ceph.file.layout.pool", + new_pool_name) + self.mount_a.setfattr("layoutdir", "ceph.dir.layout.pool", + new_pool_name) self.mount_a.umount_wait() def tearDown(self): diff -Nru ceph-10.2.7/qa/tasks/cephfs/test_quota.py ceph-10.2.9/qa/tasks/cephfs/test_quota.py --- ceph-10.2.7/qa/tasks/cephfs/test_quota.py 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/test_quota.py 2017-07-13 13:05:37.000000000 +0000 @@ -0,0 +1,106 @@ + +from cephfs_test_case import CephFSTestCase + +from teuthology.exceptions import CommandFailedError + +class TestQuota(CephFSTestCase): + CLIENTS_REQUIRED = 2 + MDSS_REQUIRED = 1 + + def test_remote_update_getfattr(self): + """ + That quota changes made from one client are visible to another + client looking at ceph.quota xattrs + """ + self.mount_a.run_shell(["mkdir", "subdir"]) + + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + None) + self.assertEqual( + self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + None) + + self.mount_a.setfattr("./subdir", "ceph.quota.max_files", "10") + self.assertEqual( + self.mount_a.getfattr("./subdir", "ceph.quota.max_files"), + "10") + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.getfattr("./subdir", "ceph.quota.max_files"), + "10", timeout=10) + + def test_remote_update_df(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the change + reflected in their statfs output. + """ + + self.mount_b.umount_wait() + + self.mount_a.run_shell(["mkdir", "subdir"]) + + size_before = 1024 * 1024 * 128 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_before) + + self.mount_b.mount(mount_path="/subdir") + + self.assertDictEqual( + self.mount_b.df(), + { + "total": size_before, + "used": 0, + "available": size_before + }) + + size_after = 1024 * 1024 * 256 + self.mount_a.setfattr("./subdir", "ceph.quota.max_bytes", + "%s" % size_after) + + # Should be visible as soon as setxattr operation completes on + # mds (we get here sooner because setfattr gets an early reply) + self.wait_until_equal( + lambda: self.mount_b.df(), + { + "total": size_after, + "used": 0, + "available": size_after + }, + timeout=10 + ) + + def test_remote_update_write(self): + """ + That when a client modifies the quota on a directory used + as another client's root, the other client sees the effect + of the change when writing data. + """ + + self.mount_a.run_shell(["mkdir", "subdir_files"]) + self.mount_a.run_shell(["mkdir", "subdir_data"]) + + # Set some nice high quotas that mount_b's initial operations + # will be well within + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "100") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "104857600") + + # Do some writes within my quota + self.mount_b.create_n_files("subdir_files/file", 20) + self.mount_b.write_n_mb("subdir_data/file", 20) + + # Set quotas lower than what mount_b already wrote, it should + # refuse to write more once it's seen them + self.mount_a.setfattr("./subdir_files", "ceph.quota.max_files", "10") + self.mount_a.setfattr("./subdir_data", "ceph.quota.max_bytes", "1048576") + + # Do some writes that would have been okay within the old quota, + # but are forbidden under the new quota + with self.assertRaises(CommandFailedError): + self.mount_b.create_n_files("subdir_files/file", 40) + with self.assertRaises(CommandFailedError): + self.mount_b.write_n_mb("subdir_data/file", 40) + diff -Nru ceph-10.2.7/qa/tasks/cephfs/test_strays.py ceph-10.2.9/qa/tasks/cephfs/test_strays.py --- ceph-10.2.7/qa/tasks/cephfs/test_strays.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/cephfs/test_strays.py 2017-07-13 13:05:37.000000000 +0000 @@ -235,11 +235,11 @@ # Write some bytes to a file size_mb = 8 - self.mount_a.write_n_mb("open_file", size_mb) - open_file_ino = self.mount_a.path_to_ino("open_file") # Hold the file open p = self.mount_a.open_background("open_file") + self.mount_a.write_n_mb("open_file", size_mb) + open_file_ino = self.mount_a.path_to_ino("open_file") self.assertEqual(self.get_session(mount_a_client_id)['num_caps'], 2) @@ -671,7 +671,7 @@ self.mount_a.run_shell(["touch", file_name]) file_layout = "stripe_unit=1048576 stripe_count=4 object_size=8388608" - self.mount_a.run_shell(["setfattr", "-n", "ceph.file.layout", "-v", file_layout, file_name]) + self.mount_a.setfattr(file_name, "ceph.file.layout", file_layout) # 35MB requires 7 objects size_mb = 35 diff -Nru ceph-10.2.7/qa/tasks/ceph_manager.py ceph-10.2.9/qa/tasks/ceph_manager.py --- ceph-10.2.7/qa/tasks/ceph_manager.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/ceph_manager.py 2017-07-13 13:05:37.000000000 +0000 @@ -136,9 +136,6 @@ except Exception: manager.raw_cluster_cmd('--', 'mon', 'tell', '*', 'injectargs', '--mon-osd-down-out-interval 0') - self.thread = gevent.spawn(self.do_thrash) - if self.sighup_delay: - self.sighup_thread = gevent.spawn(self.do_sighup) if self.config.get('powercycle') or not self.cmd_exists_on_osds("ceph-objectstore-tool"): self.ceph_objectstore_tool = False self.test_rm_past_intervals = False @@ -153,6 +150,10 @@ self.config.get('ceph_objectstore_tool', True) self.test_rm_past_intervals = \ self.config.get('test_rm_past_intervals', True) + # spawn do_thrash + self.thread = gevent.spawn(self.do_thrash) + if self.sighup_delay: + self.sighup_thread = gevent.spawn(self.do_sighup) def cmd_exists_on_osds(self, cmd): allremotes = self.ceph_manager.ctx.cluster.only(\ @@ -222,12 +223,22 @@ break log.debug("ceph-objectstore-tool binary not present, trying again") - proc = exp_remote.run(args=cmd, wait=True, - check_status=False, stdout=StringIO()) - if proc.exitstatus: - raise Exception("ceph-objectstore-tool: " - "exp list-pgs failure with status {ret}". - format(ret=proc.exitstatus)) + # ceph-objectstore-tool might bogusly fail with "OSD has the store locked" + # see http://tracker.ceph.com/issues/19556 + with safe_while(sleep=15, tries=40, action="ceph-objectstore-tool --op list-pgs") as proceed: + while proceed(): + proc = exp_remote.run(args=cmd, wait=True, + check_status=False, + stdout=StringIO(), stderr=StringIO()) + if proc.exitstatus == 0: + break + elif proc.exitstatus == 1 and proc.stderr == "OSD has the store locked": + continue + else: + raise Exception("ceph-objectstore-tool: " + "exp list-pgs failure with status {ret}". + format(ret=proc.exitstatus)) + pgs = proc.stdout.getvalue().split('\n')[:-1] if len(pgs) == 0: self.log("No PGs found for osd.{osd}".format(osd=exp_osd)) @@ -1458,6 +1469,33 @@ self.raw_cluster_cmd('pg', stype, self.get_pgid(pool, pgnum)) time.sleep(10) + def wait_snap_trimming_complete(self, pool): + """ + Wait for snap trimming on pool to end + """ + POLL_PERIOD = 10 + FATAL_TIMEOUT = 600 + start = time.time() + poolnum = self.get_pool_num(pool) + poolnumstr = "%s." % (poolnum,) + while (True): + now = time.time() + if (now - start) > FATAL_TIMEOUT: + assert (now - start) < FATAL_TIMEOUT, \ + 'failed to complete snap trimming before timeout' + all_stats = self.get_pg_stats() + trimming = False + for pg in all_stats: + if (poolnumstr in pg['pgid']) and ('snaptrim' in pg['state']): + self.log("pg {pg} in trimming, state: {state}".format( + pg=pg['pgid'], + state=pg['state'])) + trimming = True + if not trimming: + break + self.log("{pool} still trimming, waiting".format(pool=pool)) + time.sleep(POLL_PERIOD) + def get_single_pg_stats(self, pgid): """ Return pg for the pgid specified. diff -Nru ceph-10.2.7/qa/tasks/rados.py ceph-10.2.9/qa/tasks/rados.py --- ceph-10.2.7/qa/tasks/rados.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/rados.py 2017-07-13 13:05:37.000000000 +0000 @@ -239,6 +239,7 @@ run.wait(tests.itervalues()) for pool in created_pools: + manager.wait_snap_trimming_complete(pool); manager.remove_pool(pool) running = gevent.spawn(thread) diff -Nru ceph-10.2.7/qa/tasks/swift.py ceph-10.2.9/qa/tasks/swift.py --- ceph-10.2.7/qa/tasks/swift.py 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/tasks/swift.py 2017-07-13 13:05:37.000000000 +0000 @@ -0,0 +1,266 @@ +""" +Test Swift API +""" +from cStringIO import StringIO +from configobj import ConfigObj +import base64 +import contextlib +import logging +import os + +from teuthology import misc as teuthology +from teuthology import contextutil +from teuthology.config import config as teuth_config +from teuthology.orchestra import run +from teuthology.orchestra.connection import split_user + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def download(ctx, config): + """ + Download the Swift API. + """ + testdir = teuthology.get_testdir(ctx) + assert isinstance(config, list) + log.info('Downloading swift...') + for client in config: + ctx.cluster.only(client).run( + args=[ + 'git', + 'clone', + '--branch', + 'ceph-jewel', + teuth_config.ceph_git_base_url + 'swift.git', + '{tdir}/swift'.format(tdir=testdir), + ], + ) + try: + yield + finally: + log.info('Removing swift...') + testdir = teuthology.get_testdir(ctx) + for client in config: + ctx.cluster.only(client).run( + args=[ + 'rm', + '-rf', + '{tdir}/swift'.format(tdir=testdir), + ], + ) + +def _config_user(testswift_conf, account, user, suffix): + """ + Configure a swift user + + :param account: Swift account + :param user: User name + :param suffix: user name and email suffixes. + """ + testswift_conf['func_test'].setdefault('account{s}'.format(s=suffix), account) + testswift_conf['func_test'].setdefault('username{s}'.format(s=suffix), user) + testswift_conf['func_test'].setdefault('email{s}'.format(s=suffix), '{account}+test@test.test'.format(account=account)) + testswift_conf['func_test'].setdefault('display_name{s}'.format(s=suffix), 'Mr. {account} {user}'.format(account=account, user=user)) + testswift_conf['func_test'].setdefault('password{s}'.format(s=suffix), base64.b64encode(os.urandom(40))) + +@contextlib.contextmanager +def create_users(ctx, config): + """ + Create rgw users to interact with the swift interface. + """ + assert isinstance(config, dict) + log.info('Creating rgw users...') + testdir = teuthology.get_testdir(ctx) + users = {'': 'foo', '2': 'bar'} + for client in config['clients']: + cluster_name, daemon_type, client_id = teuthology.split_role(client) + testswift_conf = config['testswift_conf'][client] + for suffix, user in users.iteritems(): + _config_user(testswift_conf, '{user}.{client}'.format(user=user, client=client), user, suffix) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + '--cluster', cluster_name, + 'user', 'create', + '--subuser', '{account}:{user}'.format(account=testswift_conf['func_test']['account{s}'.format(s=suffix)],user=user), + '--display-name', testswift_conf['func_test']['display_name{s}'.format(s=suffix)], + '--secret', testswift_conf['func_test']['password{s}'.format(s=suffix)], + '--email', testswift_conf['func_test']['email{s}'.format(s=suffix)], + '--key-type', 'swift', + '--access', 'full', + ], + ) + try: + yield + finally: + for client in config['clients']: + for user in users.itervalues(): + uid = '{user}.{client}'.format(user=user, client=client) + cluster_name, daemon_type, client_id = teuthology.split_role(client) + ctx.cluster.only(client).run( + args=[ + 'adjust-ulimits', + 'ceph-coverage', + '{tdir}/archive/coverage'.format(tdir=testdir), + 'radosgw-admin', + '-n', client, + '--cluster', cluster_name, + 'user', 'rm', + '--uid', uid, + '--purge-data', + ], + ) + +@contextlib.contextmanager +def configure(ctx, config): + """ + Configure rgw and Swift + """ + assert isinstance(config, dict) + log.info('Configuring testswift...') + testdir = teuthology.get_testdir(ctx) + for client, properties in config['clients'].iteritems(): + log.info('client={c}'.format(c=client)) + log.info('config={c}'.format(c=config)) + testswift_conf = config['testswift_conf'][client] + if properties is not None and 'rgw_server' in properties: + host = None + for target, roles in zip(ctx.config['targets'].iterkeys(), ctx.config['roles']): + log.info('roles: ' + str(roles)) + log.info('target: ' + str(target)) + if properties['rgw_server'] in roles: + _, host = split_user(target) + assert host is not None, "Invalid client specified as the rgw_server" + testswift_conf['func_test']['auth_host'] = host + else: + testswift_conf['func_test']['auth_host'] = 'localhost' + + log.info(client) + (remote,) = ctx.cluster.only(client).remotes.keys() + remote.run( + args=[ + 'cd', + '{tdir}/swift'.format(tdir=testdir), + run.Raw('&&'), + './bootstrap', + ], + ) + conf_fp = StringIO() + testswift_conf.write(conf_fp) + teuthology.write_file( + remote=remote, + path='{tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client), + data=conf_fp.getvalue(), + ) + yield + + +@contextlib.contextmanager +def run_tests(ctx, config): + """ + Run an individual Swift test. + """ + assert isinstance(config, dict) + testdir = teuthology.get_testdir(ctx) + for client, client_config in config.iteritems(): + args = [ + 'SWIFT_TEST_CONFIG_FILE={tdir}/archive/testswift.{client}.conf'.format(tdir=testdir, client=client), + '{tdir}/swift/virtualenv/bin/nosetests'.format(tdir=testdir), + '-w', + '{tdir}/swift/test/functional'.format(tdir=testdir), + '-v', + '-a', '!fails_on_rgw', + ] + if client_config is not None and 'extra_args' in client_config: + args.extend(client_config['extra_args']) + + ctx.cluster.only(client).run( + args=args, + ) + yield + +@contextlib.contextmanager +def task(ctx, config): + """ + Run the testswift suite against rgw. + + To run all tests on all clients:: + + tasks: + - ceph: + - rgw: + - testswift: + + To restrict testing to particular clients:: + + tasks: + - ceph: + - rgw: [client.0] + - testswift: [client.0] + + To run against a server on client.1:: + + tasks: + - ceph: + - rgw: [client.1] + - testswift: + client.0: + rgw_server: client.1 + + To pass extra arguments to nose (e.g. to run a certain test):: + + tasks: + - ceph: + - rgw: [client.0] + - testswift: + client.0: + extra_args: ['test.functional.tests:TestFileUTF8', '-m', 'testCopy'] + client.1: + extra_args: ['--exclude', 'TestFile'] + """ + assert config is None or isinstance(config, list) \ + or isinstance(config, dict), \ + "task testswift only supports a list or dictionary for configuration" + all_clients = ['client.{id}'.format(id=id_) + for id_ in teuthology.all_roles_of_type(ctx.cluster, 'client')] + if config is None: + config = all_clients + if isinstance(config, list): + config = dict.fromkeys(config) + clients = config.keys() + + log.info('clients={c}'.format(c=clients)) + + testswift_conf = {} + for client in clients: + testswift_conf[client] = ConfigObj( + indent_type='', + infile={ + 'func_test': + { + 'auth_port' : 7280, + 'auth_ssl' : 'no', + 'auth_prefix' : '/auth/', + }, + } + ) + + with contextutil.nested( + lambda: download(ctx=ctx, config=clients), + lambda: create_users(ctx=ctx, config=dict( + clients=clients, + testswift_conf=testswift_conf, + )), + lambda: configure(ctx=ctx, config=dict( + clients=config, + testswift_conf=testswift_conf, + )), + lambda: run_tests(ctx=ctx, config=config), + ): + pass + yield diff -Nru ceph-10.2.7/qa/tasks/systemd.py ceph-10.2.9/qa/tasks/systemd.py --- ceph-10.2.7/qa/tasks/systemd.py 1970-01-01 00:00:00.000000000 +0000 +++ ceph-10.2.9/qa/tasks/systemd.py 2017-07-13 13:05:37.000000000 +0000 @@ -0,0 +1,142 @@ +""" +Systemd test +""" +import contextlib +import logging +import re +import time + +from cStringIO import StringIO +from teuthology.orchestra import run +from teuthology.misc import reconnect, get_first_mon, wait_until_healthy + +log = logging.getLogger(__name__) + + +@contextlib.contextmanager +def task(ctx, config): + """ + - tasks: + ceph-deploy: + systemd: + + Test ceph systemd services can start, stop and restart and + check for any failed services and report back errors + """ + for remote, roles in ctx.cluster.remotes.iteritems(): + remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + r = remote.run(args=['sudo', 'systemctl', 'list-units', run.Raw('|'), + 'grep', 'ceph'], stdout=StringIO(), + check_status=False) + log.info(r.stdout.getvalue()) + if r.stdout.getvalue().find('failed'): + log.info("Ceph services in failed state") + + # test overall service stop and start using ceph.target + # ceph.target tests are meant for ceph systemd tests + # and not actual process testing using 'ps' + log.info("Stopping all Ceph services") + remote.run(args=['sudo', 'systemctl', 'stop', 'ceph.target']) + r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'], + stdout=StringIO(), check_status=False) + log.info(r.stdout.getvalue()) + log.info("Checking process status") + r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph'], stdout=StringIO()) + if r.stdout.getvalue().find('Active: inactive'): + log.info("Sucessfully stopped all ceph services") + else: + log.info("Failed to stop ceph services") + + log.info("Starting all Ceph services") + remote.run(args=['sudo', 'systemctl', 'start', 'ceph.target']) + r = remote.run(args=['sudo', 'systemctl', 'status', 'ceph.target'], + stdout=StringIO()) + log.info(r.stdout.getvalue()) + if r.stdout.getvalue().find('Active: active'): + log.info("Sucessfully started all Ceph services") + else: + log.info("info", "Failed to start Ceph services") + r = remote.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph'], stdout=StringIO()) + log.info(r.stdout.getvalue()) + time.sleep(4) + + # test individual services start stop + name = remote.shortname + mon_name = 'ceph-mon@' + name + '.service' + mds_name = 'ceph-mds@' + name + '.service' + mgr_name = 'ceph-mgr@' + name + '.service' + mon_role_name = 'mon.' + name + mds_role_name = 'mds.' + name + mgr_role_name = 'mgr.' + name + m_osd = re.search('--id (\d+) --setuser ceph', r.stdout.getvalue()) + if m_osd: + osd_service = 'ceph-osd@{m}.service'.format(m=m_osd.group(1)) + remote.run(args=['sudo', 'systemctl', 'status', + osd_service]) + remote.run(args=['sudo', 'systemctl', 'stop', + osd_service]) + time.sleep(4) # immediate check will result in deactivating state + r = remote.run(args=['sudo', 'systemctl', 'status', osd_service], + stdout=StringIO(), check_status=False) + log.info(r.stdout.getvalue()) + if r.stdout.getvalue().find('Active: inactive'): + log.info("Sucessfully stopped single osd ceph service") + else: + log.info("Failed to stop ceph osd services") + remote.run(args=['sudo', 'systemctl', 'start', + osd_service]) + time.sleep(4) + if mon_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mon_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mon_name]) + time.sleep(4) # immediate check will result in deactivating state + r = remote.run(args=['sudo', 'systemctl', 'status', mon_name], + stdout=StringIO(), check_status=False) + if r.stdout.getvalue().find('Active: inactive'): + log.info("Sucessfully stopped single mon ceph service") + else: + log.info("Failed to stop ceph mon service") + remote.run(args=['sudo', 'systemctl', 'start', mon_name]) + time.sleep(4) + if mgr_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mgr_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mgr_name]) + time.sleep(4) # immediate check will result in deactivating state + r = remote.run(args=['sudo', 'systemctl', 'status', mgr_name], + stdout=StringIO(), check_status=False) + if r.stdout.getvalue().find('Active: inactive'): + log.info("Sucessfully stopped single ceph mgr service") + else: + log.info("Failed to stop ceph mgr service") + remote.run(args=['sudo', 'systemctl', 'start', mgr_name]) + time.sleep(4) + if mds_role_name in roles: + remote.run(args=['sudo', 'systemctl', 'status', mds_name]) + remote.run(args=['sudo', 'systemctl', 'stop', mds_name]) + time.sleep(4) # immediate check will result in deactivating state + r = remote.run(args=['sudo', 'systemctl', 'status', mds_name], + stdout=StringIO(), check_status=False) + if r.stdout.getvalue().find('Active: inactive'): + log.info("Sucessfully stopped single ceph mds service") + else: + log.info("Failed to stop ceph mds service") + remote.run(args=['sudo', 'systemctl', 'start', mds_name]) + time.sleep(4) + + # reboot all nodes and verify the systemd units restart + # workunit that runs would fail if any of the systemd unit doesnt start + ctx.cluster.run(args='sudo reboot', wait=False, check_status=False) + # avoid immediate reconnect + time.sleep(120) + reconnect(ctx, 480) # reconnect all nodes + # for debug info + ctx.cluster.run(args=['sudo', 'ps', '-eaf', run.Raw('|'), + 'grep', 'ceph']) + # wait for HEALTH_OK + mon = get_first_mon(ctx, config) + (mon_remote,) = ctx.cluster.only(mon).remotes.iterkeys() + wait_until_healthy(ctx, mon_remote, use_sudo=True) + yield diff -Nru ceph-10.2.7/qa/tasks/util/rgw.py ceph-10.2.9/qa/tasks/util/rgw.py --- ceph-10.2.7/qa/tasks/util/rgw.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/util/rgw.py 2017-07-13 13:05:37.000000000 +0000 @@ -70,11 +70,12 @@ return get_user_summary(out, user)['total']['successful_ops'] def get_zone_host_and_port(ctx, client, zone): - _, region_map = rgwadmin(ctx, client, check_status=True, - cmd=['-n', client, 'region-map', 'get']) - regions = region_map['zonegroups'] - for region in regions: - for zone_info in region['val']['zones']: + _, period = rgwadmin(ctx, client, check_status=True, + cmd=['-n', client, 'period', 'get']) + period_map = period['period_map'] + zonegroups = period_map['zonegroups'] + for zonegroup in zonegroups: + for zone_info in zonegroup['zones']: if zone_info['name'] == zone: endpoint = urlparse(zone_info['endpoints'][0]) host, port = endpoint.hostname, endpoint.port @@ -84,17 +85,18 @@ assert False, 'no endpoint for zone {zone} found'.format(zone=zone) def get_master_zone(ctx, client): - _, region_map = rgwadmin(ctx, client, check_status=True, - cmd=['-n', client, 'region-map', 'get']) - regions = region_map['zonegroups'] - for region in regions: - is_master = (region['val']['is_master'] == "true") - log.info('region={r} is_master={ism}'.format(r=region, ism=is_master)) + _, period = rgwadmin(ctx, client, check_status=True, + cmd=['-n', client, 'period', 'get']) + period_map = period['period_map'] + zonegroups = period_map['zonegroups'] + for zonegroup in zonegroups: + is_master = (zonegroup['is_master'] == "true") + log.info('zonegroup={z} is_master={ism}'.format(z=zonegroup, ism=is_master)) if not is_master: continue - master_zone = region['val']['master_zone'] + master_zone = zonegroup['master_zone'] log.info('master_zone=%s' % master_zone) - for zone_info in region['val']['zones']: + for zone_info in zonegroup['zones']: if zone_info['name'] == master_zone: return master_zone log.info('couldn\'t find master zone') diff -Nru ceph-10.2.7/qa/tasks/workunit.py ceph-10.2.9/qa/tasks/workunit.py --- ceph-10.2.7/qa/tasks/workunit.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/tasks/workunit.py 2017-07-13 13:05:37.000000000 +0000 @@ -5,6 +5,7 @@ import pipes import os +from copy import deepcopy from util import get_remote_for_role from teuthology import misc @@ -16,6 +17,58 @@ log = logging.getLogger(__name__) +class Refspec: + def __init__(self, refspec): + self.refspec = refspec + + def __str__(self): + return self.refspec + + def _clone(self, git_url, clonedir, opts=None): + if opts is None: + opts = [] + return (['rm', '-rf', clonedir] + + [run.Raw('&&')] + + ['git', 'clone'] + opts + + [git_url, clonedir]) + + def _cd(self, clonedir): + return ['cd', clonedir] + + def _checkout(self): + return ['git', 'checkout', self.refspec] + + def clone(self, git_url, clonedir): + return (self._clone(git_url, clonedir) + + [run.Raw('&&')] + + self._cd(clonedir) + + [run.Raw('&&')] + + self._checkout()) + + +class Branch(Refspec): + def __init__(self, tag): + Refspec.__init__(self, tag) + + def clone(self, git_url, clonedir): + opts = ['--depth', '1', + '--branch', self.refspec] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + +class Head(Refspec): + def __init__(self): + Refspec.__init__(self, 'HEAD') + + def clone(self, git_url, clonedir): + opts = ['--depth', '1'] + return (self._clone(git_url, clonedir, opts) + + [run.Raw('&&')] + + self._cd(clonedir)) + + def task(ctx, config): """ Run ceph on all workunits found under the specified path. @@ -75,16 +128,24 @@ assert isinstance(config.get('clients'), dict), \ 'configuration must contain a dictionary of clients' - overrides = ctx.config.get('overrides', {}) - misc.deep_merge(config, overrides.get('workunit', {})) - - refspec = config.get('branch') + # mimic the behavior of the "install" task, where the "overrides" are + # actually the defaults of that task. in other words, if none of "sha1", + # "tag", or "branch" is specified by a "workunit" tasks, we will update + # it with the information in the "workunit" sub-task nested in "overrides". + overrides = deepcopy(ctx.config.get('overrides', {}).get('workunit', {})) + refspecs = {'branch': Branch, 'tag': Refspec, 'sha1': Refspec} + if any(map(lambda i: i in config, refspecs.iterkeys())): + for i in refspecs.iterkeys(): + overrides.pop(i, None) + misc.deep_merge(config, overrides) + + for spec, cls in refspecs.iteritems(): + refspec = config.get(spec) + if refspec: + refspec = cls(refspec) + break if refspec is None: - refspec = config.get('tag') - if refspec is None: - refspec = config.get('sha1') - if refspec is None: - refspec = 'HEAD' + refspec = Head() timeout = config.get('timeout', '3h') @@ -276,6 +337,7 @@ for role, _ in client_remotes.items(): _delete_dir(ctx, role, created_mountpoint[role]) + def _run_tests(ctx, refspec, role, tests, env, subdir=None, timeout=None): """ Run the individual test. Create a scratch directory and then extract the @@ -310,23 +372,8 @@ git_url = teuth_config.get_ceph_git_url() try: - remote.run( - logger=log.getChild(role), - args=[ - 'rm', - '-rf', - clonedir, - run.Raw('&&'), - 'git', - 'clone', - git_url, - clonedir, - run.Raw('&&'), - 'cd', '--', clonedir, - run.Raw('&&'), - 'git', 'checkout', refspec, - ], - ) + remote.run(logger=log.getChild(role), + args=refspec.clone(git_url, clonedir)) except CommandFailedError: alt_git_url = git_url.replace('ceph-ci', 'ceph') log.info( @@ -335,24 +382,8 @@ git_url, alt_git_url, ) - remote.run( - logger=log.getChild(role), - args=[ - 'rm', - '-rf', - clonedir, - run.Raw('&&'), - 'git', - 'clone', - alt_git_url, - clonedir, - run.Raw('&&'), - 'cd', '--', clonedir, - run.Raw('&&'), - 'git', 'checkout', refspec, - ], - ) - + remote.run(logger=log.getChild(role), + args=refspec.clone(alt_git_url, clonedir)) remote.run( logger=log.getChild(role), args=[ diff -Nru ceph-10.2.7/qa/workunits/cephtool/test.sh ceph-10.2.9/qa/workunits/cephtool/test.sh --- ceph-10.2.7/qa/workunits/cephtool/test.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/cephtool/test.sh 2017-07-13 13:05:37.000000000 +0000 @@ -504,6 +504,9 @@ # local auid=444 ceph-authtool --create-keyring --name client.TEST --gen-key --set-uid $auid TEST-keyring + expect_false ceph auth import --in-file TEST-keyring + rm TEST-keyring + ceph-authtool --create-keyring --name client.TEST --gen-key --cap mon "allow r" --set-uid $auid TEST-keyring ceph auth import --in-file TEST-keyring rm TEST-keyring ceph auth get client.TEST > $TMPFILE diff -Nru ceph-10.2.7/qa/workunits/mon/crush_ops.sh ceph-10.2.9/qa/workunits/mon/crush_ops.sh --- ceph-10.2.7/qa/workunits/mon/crush_ops.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/mon/crush_ops.sh 2017-07-13 13:05:37.000000000 +0000 @@ -74,11 +74,16 @@ ceph osd crush rm osd.$o2 host2 ceph osd crush rm host2 +ceph osd crush add-bucket foo host +ceph osd crush move foo root=default rack=localrack + +ceph osd crush create-or-move osd.$o1 1.0 root=default +ceph osd crush move osd.$o1 host=foo +ceph osd find $o1 | grep host | grep foo + ceph osd crush rm osd.$o1 ceph osd crush rm osd.$o2 -ceph osd crush add-bucket foo host -ceph osd crush move foo root=default rack=localrack ceph osd crush rm foo # test reweight diff -Nru ceph-10.2.7/qa/workunits/rados/test-upgrade-v11.0.0.sh ceph-10.2.9/qa/workunits/rados/test-upgrade-v11.0.0.sh --- ceph-10.2.7/qa/workunits/rados/test-upgrade-v11.0.0.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/rados/test-upgrade-v11.0.0.sh 2017-07-13 13:05:37.000000000 +0000 @@ -13,11 +13,14 @@ pids="" for f in \ - 'api_aio --gtest_filter=-LibRadosAio.RacingRemovePP' \ + 'api_aio --gtest_filter=-LibRadosAio.RacingRemovePP:*WriteSame*:*CmpExt*' \ 'api_list --gtest_filter=-LibRadosList*.EnumerateObjects*' \ - api_io api_lock api_misc \ - api_tier api_pool api_snapshots api_stat api_watch_notify api_cmd \ - api_c_write_operations \ + 'api_io --gtest_filter=-*Checksum*' \ + api_lock \ + 'api_misc --gtest_filter=-*WriteSame*:*CmpExt*:*Checksum*:*CloneRange*' \ + 'api_watch_notify --gtest_filter=-*WatchNotify3*' \ + api_tier api_pool api_snapshots api_stat api_cmd \ + 'api_c_write_operations --gtest_filter=-*WriteSame*' \ api_c_read_operations \ list_parallel \ open_pools_parallel \ diff -Nru ceph-10.2.7/qa/workunits/rbd/copy.sh ceph-10.2.9/qa/workunits/rbd/copy.sh --- ceph-10.2.7/qa/workunits/rbd/copy.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/rbd/copy.sh 2017-07-13 13:05:37.000000000 +0000 @@ -95,7 +95,8 @@ rbd create -p rbd2 -s 1 foo rbd rename rbd2/foo rbd2/bar rbd -p rbd2 ls | grep bar - ! rbd rename rbd2/bar foo + rbd rename rbd2/bar foo + rbd rename --pool rbd2 foo bar ! rbd rename rbd2/bar --dest-pool rbd foo rbd rename --pool rbd2 bar --dest-pool rbd2 foo rbd -p rbd2 ls | grep foo diff -Nru ceph-10.2.7/qa/workunits/rbd/rbd_mirror.sh ceph-10.2.9/qa/workunits/rbd/rbd_mirror.sh --- ceph-10.2.7/qa/workunits/rbd/rbd_mirror.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/rbd/rbd_mirror.sh 2017-07-13 13:05:37.000000000 +0000 @@ -38,6 +38,7 @@ testlog "TEST: test the first image is replaying after restart" write_image ${CLUSTER2} ${POOL} ${image} 100 +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position' compare_images ${POOL} ${image} @@ -247,7 +248,9 @@ stop_mirror ${CLUSTER2} set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image' disable_mirror ${CLUSTER2} ${POOL} ${image} -test_image_present ${CLUSTER1} ${POOL} ${image} 'present' +if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then + test_image_present ${CLUSTER1} ${POOL} ${image} 'present' +fi start_mirror ${CLUSTER1} wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool' @@ -276,6 +279,7 @@ stop_mirror ${CLUSTER1} request_resync_image ${CLUSTER1} ${POOL} ${image} image_id start_mirror ${CLUSTER1} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' ${image_id} wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position' compare_images ${POOL} ${image} diff -Nru ceph-10.2.7/qa/workunits/rbd/rbd-nbd.sh ceph-10.2.9/qa/workunits/rbd/rbd-nbd.sh --- ceph-10.2.7/qa/workunits/rbd/rbd-nbd.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/qa/workunits/rbd/rbd-nbd.sh 2017-07-13 13:05:37.000000000 +0000 @@ -4,7 +4,6 @@ POOL=rbd IMAGE=testrbdnbd$$ -TOO_LARGE_IMAGE=${IMAGE}_large SUDO=sudo SIZE=64 DATA= @@ -17,7 +16,6 @@ DATA=${TEMPDIR}/data dd if=/dev/urandom of=${DATA} bs=1M count=${SIZE} rbd --dest-pool ${POOL} --no-progress import ${DATA} ${IMAGE} - rbd -p ${POOL} create ${TOO_LARGE_IMAGE} --size 3T if [ `id -u` = 0 ] then @@ -40,7 +38,6 @@ done rbd -p ${POOL} remove ${IMAGE} fi - rbd -p ${POOL} remove ${TOO_LARGE_IMAGE} } function expect_false() @@ -63,7 +60,6 @@ fi expect_false ${SUDO} rbd-nbd map INVALIDIMAGE expect_false ${SUDO} rbd-nbd --device INVALIDDEV map ${IMAGE} -expect_false ${SUDO} rbd-nbd map ${TOO_LARGE_IMAGE} # map test using the first unused device DEV=`${SUDO} rbd-nbd map ${POOL}/${IMAGE}` diff -Nru ceph-10.2.7/src/brag/client/ceph-brag ceph-10.2.9/src/brag/client/ceph-brag --- ceph-10.2.7/src/brag/client/ceph-brag 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/brag/client/ceph-brag 2017-07-13 13:05:37.000000000 +0000 @@ -245,7 +245,10 @@ oj = json.loads(o) num_mons = len(oj['monmap']['mons']) num_osds = int(oj['osdmap']['osdmap']['num_in_osds']) - num_mdss = oj['mdsmap']['in'] + try: + num_mdss = oj['fsmap']['in'] + except KeyError: + num_mdss = 0 pgmap = oj['pgmap'] num_pgs = pgmap['num_pgs'] diff -Nru ceph-10.2.7/src/ceph-disk/ceph_disk/main.py ceph-10.2.9/src/ceph-disk/ceph_disk/main.py --- ceph-10.2.7/src/ceph-disk/ceph_disk/main.py 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph-disk/ceph_disk/main.py 2017-07-13 13:05:37.000000000 +0000 @@ -617,22 +617,36 @@ sda 1 -> sda1 cciss/c0d1 1 -> cciss!c0d1p1 """ - partname = None - if is_mpath(dev): - partname = get_partition_mpath(dev, pnum) - else: - name = get_dev_name(os.path.realpath(dev)) - for f in os.listdir(os.path.join('/sys/block', name)): - if f.startswith(name) and f.endswith(str(pnum)): - # we want the shortest name that starts with the base name - # and ends with the partition number - if not partname or len(f) < len(partname): - partname = f - if partname: - return get_dev_path(partname) - else: - raise Error('partition %d for %s does not appear to exist' % - (pnum, dev)) + max_retry = 10 + for retry in range(0, max_retry + 1): + partname = None + error_msg = "" + if is_mpath(dev): + partname = get_partition_mpath(dev, pnum) + else: + name = get_dev_name(os.path.realpath(dev)) + sys_entry = os.path.join('/sys/block', name) + error_msg = " in %s" % sys_entry + for f in os.listdir(sys_entry): + if f.startswith(name) and f.endswith(str(pnum)): + # we want the shortest name that starts with the base name + # and ends with the partition number + if not partname or len(f) < len(partname): + partname = f + if partname: + if retry: + LOG.info('Found partition %d for %s after %d tries' % + (pnum, dev, retry)) + return get_dev_path(partname) + else: + if retry < max_retry: + LOG.info('Try %d/%d : partition %d for %s does not exist%s' % + (retry + 1, max_retry, pnum, dev, error_msg)) + time.sleep(.2) + continue + else: + raise Error('partition %d for %s does not appear to exist%s' % + (pnum, dev, error_msg)) def list_all_partitions(): @@ -1136,10 +1150,19 @@ if os.path.exists(path): mode = get_oneliner(path, 'key-management-mode') osd_uuid = get_oneliner(path, 'osd-uuid') + ceph_fsid = read_one_line(path, 'ceph_fsid') + if ceph_fsid is None: + raise Error('No cluster uuid assigned.') + cluster = find_cluster_by_uuid(ceph_fsid) + if cluster is None: + raise Error('No cluster conf found in ' + SYSCONFDIR + + ' with fsid %s' % ceph_fsid) + if mode == KEY_MANAGEMENT_MODE_V1: key, stderr, ret = command( [ 'ceph', + '--cluster', cluster, '--name', 'client.osd-lockbox.' + osd_uuid, '--keyring', @@ -1515,6 +1538,26 @@ raise Error('unable to create symlink %s -> %s' % (path, target)) +def get_mount_options(cluster, fs_type): + mount_options = get_conf( + cluster, + variable='osd_mount_options_{fstype}'.format( + fstype=fs_type, + ), + ) + if mount_options is None: + mount_options = get_conf( + cluster, + variable='osd_fs_mount_options_{fstype}'.format( + fstype=fs_type, + ), + ) + else: + # remove whitespaces + mount_options = "".join(mount_options.split()) + return mount_options + + class Device(object): def __init__(self, path, args): @@ -2302,6 +2345,7 @@ command_check_call( [ 'ceph', + '--cluster', cluster, '--name', 'client.bootstrap-osd', '--keyring', bootstrap, 'config-key', @@ -2313,6 +2357,7 @@ keyring, stderr, ret = command( [ 'ceph', + '--cluster', cluster, '--name', 'client.bootstrap-osd', '--keyring', bootstrap, 'auth', @@ -2350,6 +2395,9 @@ LOG.debug('Mounting lockbox ' + str(" ".join(args))) command_check_call(args) write_one_line(path, 'osd-uuid', self.args.osd_uuid) + if self.args.cluster_uuid is None: + self.args.cluster_uuid = get_fsid(cluster=self.args.cluster) + write_one_line(path, 'ceph_fsid', self.args.cluster_uuid) self.create_key() self.symlink_spaces(path) write_one_line(path, 'magic', CEPH_LOCKBOX_ONDISK_MAGIC) @@ -2546,22 +2594,8 @@ ), ) - self.mount_options = get_conf( - cluster=self.args.cluster, - variable='osd_mount_options_{fstype}'.format( - fstype=self.args.fs_type, - ), - ) - if self.mount_options is None: - self.mount_options = get_conf( - cluster=self.args.cluster, - variable='osd_fs_mount_options_{fstype}'.format( - fstype=self.args.fs_type, - ), - ) - else: - # remove whitespaces - self.mount_options = "".join(self.mount_options.split()) + self.mount_options = get_mount_options(cluster=self.args.cluster, + fs_type=self.args.fs_type) if self.args.osd_uuid is None: self.args.osd_uuid = str(uuid.uuid4()) @@ -2753,7 +2787,7 @@ '--osd-uuid', fsid, '--keyring', os.path.join(path, 'keyring'), '--setuser', get_ceph_user(), - '--setgroup', get_ceph_user(), + '--setgroup', get_ceph_group(), ], ) else: @@ -2865,6 +2899,67 @@ ) +# +# For upgrade purposes, to make sure there are no competing units, +# both --runtime unit and the default should be disabled. There can be +# two units at the same time: one with --runtime and another without +# it. If, for any reason (manual or ceph-disk) the two units co-exist +# they will compete with each other. +# +def systemd_disable( + path, + osd_id, +): + # ensure there is no duplicate ceph-osd@.service + for style in ([], ['--runtime']): + command_check_call( + [ + 'systemctl', + 'disable', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ] + style, + ) + + +def systemd_start( + path, + osd_id, +): + systemd_disable(path, osd_id) + if is_mounted(path): + style = ['--runtime'] + else: + style = [] + command_check_call( + [ + 'systemctl', + 'enable', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ] + style, + ) + command_check_call( + [ + 'systemctl', + 'start', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ], + ) + + +def systemd_stop( + path, + osd_id, +): + systemd_disable(path, osd_id) + command_check_call( + [ + 'systemctl', + 'stop', + 'ceph-osd@{osd_id}'.format(osd_id=osd_id), + ], + ) + + def start_daemon( cluster, osd_id, @@ -2908,29 +3003,7 @@ ], ) elif os.path.exists(os.path.join(path, 'systemd')): - # ensure there is no duplicate ceph-osd@.service - command_check_call( - [ - 'systemctl', - 'disable', - 'ceph-osd@{osd_id}'.format(osd_id=osd_id), - ], - ) - command_check_call( - [ - 'systemctl', - 'enable', - '--runtime', - 'ceph-osd@{osd_id}'.format(osd_id=osd_id), - ], - ) - command_check_call( - [ - 'systemctl', - 'start', - 'ceph-osd@{osd_id}'.format(osd_id=osd_id), - ], - ) + systemd_start(path, osd_id) else: raise Error('{cluster} osd.{osd_id} is not tagged ' 'with an init system'.format( @@ -2974,21 +3047,7 @@ ], ) elif os.path.exists(os.path.join(path, 'systemd')): - command_check_call( - [ - 'systemctl', - 'disable', - '--runtime', - 'ceph-osd@{osd_id}'.format(osd_id=osd_id), - ], - ) - command_check_call( - [ - 'systemctl', - 'stop', - 'ceph-osd@{osd_id}'.format(osd_id=osd_id), - ], - ) + systemd_stop(path, osd_id) else: raise Error('{cluster} osd.{osd_id} is not tagged with an init ' ' system'.format(cluster=cluster, osd_id=osd_id)) @@ -3069,24 +3128,7 @@ # TODO always using mount options from cluster=ceph for # now; see http://tracker.newdream.net/issues/3253 - mount_options = get_conf( - cluster='ceph', - variable='osd_mount_options_{fstype}'.format( - fstype=fstype, - ), - ) - - if mount_options is None: - mount_options = get_conf( - cluster='ceph', - variable='osd_fs_mount_options_{fstype}'.format( - fstype=fstype, - ), - ) - - # remove whitespaces from mount_options - if mount_options is not None: - mount_options = "".join(mount_options.split()) + mount_options = get_mount_options(cluster='ceph', fs_type=fstype) path = mount(dev=dev, fstype=fstype, options=mount_options) @@ -3591,15 +3633,17 @@ ]) -def _remove_lockbox(uuid): +def _remove_lockbox(uuid, cluster): command([ 'ceph', + '--cluster', cluster, 'auth', 'del', 'client.osd-lockbox.' + uuid, ]) command([ 'ceph', + '--cluster', cluster, 'config-key', 'del', 'dm-crypt/osd/' + uuid + '/luks', @@ -3697,7 +3741,7 @@ for name in Space.NAMES: if target_dev.get(name + '_uuid'): dmcrypt_unmap(target_dev[name + '_uuid']) - _remove_lockbox(target_dev['uuid']) + _remove_lockbox(target_dev['uuid'], args.cluster) # Check zap flag. If we found zap flag, we need to find device for # destroy this osd data. @@ -4171,9 +4215,11 @@ fs_type = get_dev_fs(dev_to_mount) if fs_type is not None: + mount_options = get_mount_options(cluster='ceph', + fs_type=fs_type) try: tpath = mount(dev=dev_to_mount, - fstype=fs_type, options='') + fstype=fs_type, options=mount_options) try: for name in Space.NAMES: space_uuid = get_oneliner(tpath, @@ -4986,7 +5032,9 @@ path = os.environ.get('PATH', os.defpath) os.environ['PATH'] = args.prepend_to_path + ":" + path - setup_statedir(args.statedir) + if args.func.__name__ != 'main_trigger': + # trigger may run when statedir is unavailable and does not use it + setup_statedir(args.statedir) setup_sysconfdir(args.sysconfdir) global CEPH_PREF_USER diff -Nru ceph-10.2.7/src/ceph_fuse.cc ceph-10.2.9/src/ceph_fuse.cc --- ceph-10.2.7/src/ceph_fuse.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph_fuse.cc 2017-07-13 13:05:37.000000000 +0000 @@ -78,8 +78,9 @@ } env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); for (std::vector::iterator i = args.begin(); i != args.end(); ) { if (ceph_argparse_double_dash(args, i)) { break; @@ -289,7 +290,6 @@ } delete messenger; - g_ceph_context->put(); free(newargv); delete mc; diff -Nru ceph-10.2.7/src/ceph_mds.cc ceph-10.2.9/src/ceph_mds.cc --- ceph-10.2.7/src/ceph_mds.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph_mds.cc 2017-07-13 13:05:37.000000000 +0000 @@ -93,8 +93,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_MDS, CODE_ENVIRONMENT_DAEMON, - 0, "mds_data"); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_MDS, CODE_ENVIRONMENT_DAEMON, + 0, "mds_data"); ceph_heap_profiler_init(); std::string val, action; @@ -230,8 +231,6 @@ delete msgr; } - g_ceph_context->put(); - // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. char s[20]; snprintf(s, sizeof(s), "gmon/%d", getpid()); diff -Nru ceph-10.2.7/src/ceph_mon.cc ceph-10.2.9/src/ceph_mon.cc --- ceph-10.2.7/src/ceph_mon.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph_mon.cc 2017-07-13 13:05:37.000000000 +0000 @@ -257,8 +257,9 @@ } } - global_init(&def_args, args, - CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, flags, "mon_data"); + auto cct = global_init(&def_args, args, + CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_DAEMON, + flags, "mon_data"); ceph_heap_profiler_init(); uuid_d fsid; @@ -781,7 +782,6 @@ delete msgr; delete client_throttler; delete daemon_throttler; - g_ceph_context->put(); // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. char s[20]; diff -Nru ceph-10.2.7/src/ceph_osd.cc ceph-10.2.9/src/ceph_osd.cc --- ceph-10.2.7/src/ceph_osd.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph_osd.cc 2017-07-13 13:05:37.000000000 +0000 @@ -118,8 +118,9 @@ // option, therefore we will pass it as a default argument to global_init(). def_args.push_back("--leveldb-log="); - global_init(&def_args, args, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_DAEMON, - 0, "osd_data"); + auto cct = global_init(&def_args, args, CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_DAEMON, + 0, "osd_data"); ceph_heap_profiler_init(); // osd specific args @@ -360,13 +361,16 @@ derr << TEXT_RED << " ** ERROR: error flushing journal " << g_conf->osd_journal << " for object store " << g_conf->osd_data << ": " << cpp_strerror(-err) << TEXT_NORMAL << dendl; - exit(1); + goto flushjournal_out; } store->umount(); derr << "flushed journal " << g_conf->osd_journal << " for object store " << g_conf->osd_data << dendl; - exit(0); +flushjournal_out: + delete store; + g_ceph_context->put(); + exit(err < 0 ? 1 : 0); } if (dump_journal) { common_init_finish(g_ceph_context); @@ -643,7 +647,6 @@ client_byte_throttler.reset(); client_msg_throttler.reset(); - g_ceph_context->put(); // cd on exit, so that gmon.out (if any) goes into a separate directory for each node. char s[20]; diff -Nru ceph-10.2.7/src/ceph_syn.cc ceph-10.2.9/src/ceph_syn.cc --- ceph-10.2.7/src/ceph_syn.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/ceph_syn.cc 2017-07-13 13:05:37.000000000 +0000 @@ -42,7 +42,8 @@ vector args; argv_to_vec(argc, argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); parse_syn_options(args); // for SyntheticClient diff -Nru ceph-10.2.7/src/civetweb/src/civetweb.c ceph-10.2.9/src/civetweb/src/civetweb.c --- ceph-10.2.7/src/civetweb/src/civetweb.c 2017-04-10 11:45:02.000000000 +0000 +++ ceph-10.2.9/src/civetweb/src/civetweb.c 2017-07-13 13:06:15.000000000 +0000 @@ -843,7 +843,7 @@ #if defined(USE_LUA) && defined(USE_WEBSOCKET) void * lua_websocket_state; /* Lua_State for a websocket connection */ #endif - int is_chunked; /* transfer-encoding is chunked */ + int is_chunked; /* transfer-encoding is chunked; 2=and consumed */ }; static pthread_key_t sTlsKey; /* Thread local storage index */ @@ -2272,6 +2272,12 @@ char buf[MG_BUF_LEN]; int to_read, nread; + if (conn->is_chunked) { + while (conn->is_chunked == 1 && + mg_read(conn, buf, sizeof buf) > 0) + ; + return; + } while (conn->consumed_content < conn->content_len) { to_read = sizeof(buf); if ((int64_t) to_read > conn->content_len - conn->consumed_content) { @@ -2334,14 +2340,17 @@ } int mg_read(struct mg_connection *conn, void *buf, size_t len) { - if ( conn->is_chunked ) { + switch ( conn->is_chunked ) { + case 2: + return -1; + case 1: if (conn->content_len <= 0 ) conn->content_len = 0; if (conn->consumed_content < conn->content_len) return mg_read_inner(conn,buf,len); int i = 0; char str[64]; while (1) { int c = mg_getc(conn); - if (c == EOF) return EOF; + if (c == EOF) return 0; if ( ! ( c == '\n' || c == '\r' ) ) { str[i++] = c; break; @@ -2356,7 +2365,10 @@ char *end = 0; long chunkSize = strtol(str,&end,16); if ( end != str+(i-1) ) return -1; - if ( chunkSize == 0 ) return 0; + if ( chunkSize == 0 ) { + conn->is_chunked = 2; + return 0; + } conn->content_len += chunkSize; } return mg_read_inner(conn,buf,len); diff -Nru ceph-10.2.7/src/client/Client.cc ceph-10.2.9/src/client/Client.cc --- ceph-10.2.7/src/client/Client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/client/Client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2102,6 +2102,19 @@ m->put(); } +bool Client::_any_stale_sessions() const +{ + assert(client_lock.is_locked_by_me()); + + for (const auto &i : mds_sessions) { + if (i.second->state == MetaSession::STATE_STALE) { + return true; + } + } + + return false; +} + void Client::_kick_stale_sessions() { ldout(cct, 1) << "kick_stale_sessions" << dendl; @@ -3258,6 +3271,19 @@ session->con->send_message(m); } +static bool is_max_size_approaching(Inode *in) +{ + /* mds will adjust max size according to the reported size */ + if (in->flushing_caps & CEPH_CAP_FILE_WR) + return false; + if (in->size >= in->max_size) + return true; + /* half of previous max_size increment has been used */ + if (in->max_size > in->reported_size && + (in->size << 1) >= in->max_size + in->reported_size) + return true; + return false; +} void Client::check_caps(Inode *in, bool is_delayed) { @@ -3339,11 +3365,10 @@ /* approaching file_max? */ if ((cap->issued & CEPH_CAP_FILE_WR) && - (in->size << 1) >= in->max_size && - (in->reported_size << 1) < in->max_size && - cap == in->auth_cap) { + cap == in->auth_cap && + is_max_size_approaching(in)) { ldout(cct, 10) << "size " << in->size << " approaching max_size " << in->max_size - << ", reported " << in->reported_size << dendl; + << ", reported " << in->reported_size << dendl; goto ack; } @@ -7543,11 +7568,11 @@ int Client::open(const char *relpath, int flags, mode_t mode, int stripe_unit, int stripe_count, int object_size, const char *data_pool) { - ldout(cct, 3) << "open enter(" << relpath << ", " << flags << "," << mode << ") = " << dendl; + ldout(cct, 3) << "open enter(" << relpath << ", " << ceph_flags_sys2wire(flags) << "," << mode << ") = " << dendl; Mutex::Locker lock(client_lock); tout(cct) << "open" << std::endl; tout(cct) << relpath << std::endl; - tout(cct) << flags << std::endl; + tout(cct) << ceph_flags_sys2wire(flags) << std::endl; uid_t uid = get_uid(); gid_t gid = get_gid(); @@ -7619,7 +7644,7 @@ out: tout(cct) << r << std::endl; - ldout(cct, 3) << "open exit(" << path << ", " << flags << ") = " << r << dendl; + ldout(cct, 3) << "open exit(" << path << ", " << ceph_flags_sys2wire(flags) << ") = " << r << dendl; return r; } @@ -7820,7 +7845,8 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid, int gid) { - int cmode = ceph_flags_to_mode(flags); + // use normalized flags to generate cmode + int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); if (cmode < 0) return -EINVAL; int want = ceph_caps_for_mode(cmode); @@ -7841,8 +7867,8 @@ MetaRequest *req = new MetaRequest(CEPH_MDS_OP_OPEN); filepath path; in->make_nosnap_relative_path(path); - req->set_filepath(path); - req->head.args.open.flags = flags & ~O_CREAT; + req->set_filepath(path); + req->head.args.open.flags = ceph_flags_sys2wire(flags & ~O_CREAT); req->head.args.open.mode = mode; req->head.args.open.pool = -1; if (cct->_conf->client_debug_getattr_caps) @@ -8620,10 +8646,8 @@ if (is_quota_bytes_approaching(in)) { check_caps(in, true); - } else { - if ((in->size << 1) >= in->max_size && - (in->reported_size << 1) < in->max_size) - check_caps(in, false); + } else if (is_max_size_approaching(in)) { + check_caps(in, false); } ldout(cct, 7) << "wrote to " << totalwritten+offset << ", extending file size" << dendl; @@ -8916,6 +8940,21 @@ assert(cct->_conf->client_quota == false || quota_root != nullptr); if (quota_root && cct->_conf->client_quota_df && quota_root->quota.max_bytes) { + + // Skip the getattr if any sessions are stale, as we don't want to + // block `df` if this client has e.g. been evicted, or if the MDS cluster + // is unhealthy. + if (!_any_stale_sessions()) { + int r = _getattr(quota_root, 0, -1, -1, true); + if (r != 0) { + // Ignore return value: error getting latest inode metadata is not a good + // reason to break "df". + lderr(cct) << "Error in getattr on quota root 0x" + << std::hex << quota_root->ino << std::dec + << " statfs result may be outdated" << dendl; + } + } + // Special case: if there is a size quota set on the Inode acting // as the root for this client mount, then report the quota status // as the filesystem statistics. @@ -9791,6 +9830,8 @@ int Client::setxattr(const char *path, const char *name, const void *value, size_t size, int flags) { + _setxattr_maybe_wait_for_osdmap(name, value, size); + Mutex::Locker lock(client_lock); InodeRef in; int r = Client::path_walk(path, &in, true); @@ -9801,6 +9842,8 @@ int Client::lsetxattr(const char *path, const char *name, const void *value, size_t size, int flags) { + _setxattr_maybe_wait_for_osdmap(name, value, size); + Mutex::Locker lock(client_lock); InodeRef in; int r = Client::path_walk(path, &in, false); @@ -9811,6 +9854,8 @@ int Client::fsetxattr(int fd, const char *name, const void *value, size_t size, int flags) { + _setxattr_maybe_wait_for_osdmap(name, value, size); + Mutex::Locker lock(client_lock); Fh *f = get_filehandle(fd); if (!f) @@ -9827,10 +9872,21 @@ if (vxattr) { r = -ENODATA; - char buf[256]; + // Do a force getattr to get the latest quota before returning + // a value to userspace. + r = _getattr(in, 0, uid, gid, true); + if (r != 0) { + // Error from getattr! + return r; + } + // call pointer-to-member function - if (!(vxattr->exists_cb && !(this->*(vxattr->exists_cb))(in))) + char buf[256]; + if (!(vxattr->exists_cb && !(this->*(vxattr->exists_cb))(in))) { r = (this->*(vxattr->getxattr_cb))(in, buf, sizeof(buf)); + } else { + r = -ENODATA; + } if (size != 0) { if (r > (int)size) { @@ -10055,7 +10111,7 @@ return _setxattr(in.get(), name, value, size, flags); } -int Client::check_data_pool_exist(string name, string value, const OSDMap *osdmap) +int Client::_setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap) { string tmp; if (name == "layout") { @@ -10095,18 +10151,17 @@ return 0; } -int Client::ll_setxattr(Inode *in, const char *name, const void *value, - size_t size, int flags, int uid, int gid) +void Client::_setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t size) { // For setting pool of layout, MetaRequest need osdmap epoch. // There is a race which create a new data pool but client and mds both don't have. // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap. - if (strcmp(name, "ceph.file.layout.pool") == 0 || strcmp(name, "ceph.dir.layout.pool") == 0 || + if (strcmp(name, "ceph.file.layout.pool") == 0 || strcmp(name, "ceph.dir.layout.pool") == 0 || strcmp(name, "ceph.file.layout") == 0 || strcmp(name, "ceph.dir.layout") == 0) { string rest(strstr(name, "layout")); - string v((const char*)value); + string v((const char*)value, size); int r = objecter->with_osdmap([&](const OSDMap& o) { - return check_data_pool_exist(rest, v, &o); + return _setxattr_check_data_pool(rest, v, &o); }); if (r == -ENOENT) { @@ -10115,6 +10170,12 @@ ctx.wait(); } } +} + +int Client::ll_setxattr(Inode *in, const char *name, const void *value, + size_t size, int flags, int uid, int gid) +{ + _setxattr_maybe_wait_for_osdmap(name, value, size); Mutex::Locker lock(client_lock); @@ -10540,7 +10601,8 @@ return -EDQUOT; } - int cmode = ceph_flags_to_mode(flags); + // use normalized flags to generate cmode + int cmode = ceph_flags_to_mode(ceph_flags_sys2wire(flags)); if (cmode < 0) return -EINVAL; @@ -10561,7 +10623,7 @@ path.push_dentry(name); req->set_filepath(path); req->set_inode(dir); - req->head.args.open.flags = flags | O_CREAT; + req->head.args.open.flags = ceph_flags_sys2wire(flags | O_CREAT); req->head.args.open.stripe_unit = stripe_unit; req->head.args.open.stripe_count = stripe_count; @@ -10929,11 +10991,14 @@ return -EROFS; } if (cct->_conf->client_quota && - fromdir != todir && - (fromdir->quota.is_enable() || - todir->quota.is_enable() || - get_quota_root(fromdir) != get_quota_root(todir))) { - return -EXDEV; + fromdir != todir) { + Inode *fromdir_root = + fromdir->quota.is_enable() ? fromdir : get_quota_root(fromdir); + Inode *todir_root = + todir->quota.is_enable() ? todir : get_quota_root(todir); + if (fromdir_root != todir_root) { + return -EXDEV; + } } InodeRef target; @@ -11262,10 +11327,10 @@ vinodeno_t vino = _get_vino(in); - ldout(cct, 3) << "ll_open " << vino << " " << flags << dendl; + ldout(cct, 3) << "ll_open " << vino << " " << ceph_flags_sys2wire(flags) << dendl; tout(cct) << "ll_open" << std::endl; tout(cct) << vino.ino.val << std::endl; - tout(cct) << flags << std::endl; + tout(cct) << ceph_flags_sys2wire(flags) << std::endl; int r; if (uid < 0) { @@ -11286,8 +11351,8 @@ ll_unclosed_fh_set.insert(fhptr); } tout(cct) << (unsigned long)fhptr << std::endl; - ldout(cct, 3) << "ll_open " << vino << " " << flags << " = " << r << " (" << - fhptr << ")" << dendl; + ldout(cct, 3) << "ll_open " << vino << " " << ceph_flags_sys2wire(flags) << + " = " << r << " (" << fhptr << ")" << dendl; return r; } @@ -11300,12 +11365,12 @@ vinodeno_t vparent = _get_vino(parent); ldout(cct, 3) << "ll_create " << vparent << " " << name << " 0" << oct << - mode << dec << " " << flags << ", uid " << uid << ", gid " << gid << dendl; + mode << dec << " " << ceph_flags_sys2wire(flags)<< ", uid " << uid << ", gid " << gid << dendl; tout(cct) << "ll_create" << std::endl; tout(cct) << vparent.ino.val << std::endl; tout(cct) << name << std::endl; tout(cct) << mode << std::endl; - tout(cct) << flags << std::endl; + tout(cct) << ceph_flags_sys2wire(flags) << std::endl; bool created = false; InodeRef in; @@ -11362,7 +11427,7 @@ tout(cct) << (unsigned long)fhptr << std::endl; tout(cct) << attr->st_ino << std::endl; ldout(cct, 3) << "ll_create " << parent << " " << name << " 0" << oct << - mode << dec << " " << flags << " = " << r << " (" << fhptr << " " << + mode << dec << " " << ceph_flags_sys2wire(flags) << " = " << r << " (" << fhptr << " " << hex << attr->st_ino << dec << ")" << dendl; // passing an Inode in outp requires an additional ref @@ -11681,10 +11746,8 @@ if (is_quota_bytes_approaching(in)) { check_caps(in, true); - } else { - if ((in->size << 1) >= in->max_size && - (in->reported_size << 1) < in->max_size) - check_caps(in, false); + } else if (is_max_size_approaching(in)) { + check_caps(in, false); } } } @@ -12059,8 +12122,16 @@ break; case MetaSession::STATE_OPEN: - ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl; - s->state = MetaSession::STATE_STALE; + { + const md_config_t *conf = cct->_conf; + if (conf->client_reconnect_stale) { + ldout(cct, 1) << "reset from mds we were open; close mds session for reconnect" << dendl; + _closed_mds_session(s); + } else { + ldout(cct, 1) << "reset from mds we were open; mark session as stale" << dendl; + s->state = MetaSession::STATE_STALE; + } + } break; case MetaSession::STATE_NEW: diff -Nru ceph-10.2.7/src/client/Client.h ceph-10.2.9/src/client/Client.h --- ceph-10.2.7/src/client/Client.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/client/Client.h 2017-07-13 13:05:37.000000000 +0000 @@ -345,6 +345,7 @@ MetaSession *_open_mds_session(mds_rank_t mds); void _close_mds_session(MetaSession *s); void _closed_mds_session(MetaSession *s); + bool _any_stale_sessions() const; void _kick_stale_sessions(); void handle_client_session(MClientSession *m); void send_reconnect(MetaSession *s); @@ -777,6 +778,8 @@ int _do_setxattr(Inode *in, const char *name, const void *value, size_t len, int flags, int uid, int gid); int _setxattr(Inode *in, const char *name, const void *value, size_t len, int flags, int uid=-1, int gid=-1); int _setxattr(InodeRef &in, const char *name, const void *value, size_t len, int flags); + int _setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap); + void _setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t len); int _removexattr(Inode *in, const char *nm, int uid=-1, int gid=-1); int _removexattr(InodeRef &in, const char *nm); int _open(Inode *in, int flags, mode_t mode, Fh **fhp, int uid, int gid); @@ -843,8 +846,6 @@ int _getattr_for_perm(Inode *in, int uid, int gid); int _getgrouplist(gid_t **sgids, int uid, int gid); - int check_data_pool_exist(string name, string value, const OSDMap *osdmap); - vinodeno_t _get_vino(Inode *in); inodeno_t _get_inodeno(Inode *in); diff -Nru ceph-10.2.7/src/cls/rgw/cls_rgw.cc ceph-10.2.9/src/cls/rgw/cls_rgw.cc --- ceph-10.2.7/src/cls/rgw/cls_rgw.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/cls/rgw/cls_rgw.cc 2017-07-13 13:05:37.000000000 +0000 @@ -124,11 +124,15 @@ *key = buf; } -static void bi_log_index_key(cls_method_context_t hctx, string& key, string& id, uint64_t index_ver) +static void bi_log_prefix(string& key) { key = BI_PREFIX_CHAR; key.append(bucket_index_prefixes[BI_BUCKET_LOG_INDEX]); +} +static void bi_log_index_key(cls_method_context_t hctx, string& key, string& id, uint64_t index_ver) +{ + bi_log_prefix(key); get_index_ver_key(hctx, index_ver, &id); key.append(id); } @@ -2286,9 +2290,8 @@ string filter = name; string start_key = marker; - string first_instance_idx; - encode_obj_versioned_data_key(string(), &first_instance_idx); - string end_key = first_instance_idx; + string end_key; // stop listing at bi_log_prefix + bi_log_prefix(end_key); int count = 0; map keys; @@ -2536,7 +2539,7 @@ ret = list_olh_entries(hctx, op.name, op.marker, max - count, &op_ret.entries); if (ret < 0) { - CLS_LOG(0, "ERROR: %s(): list_instance_entries retured ret=%d", __func__, ret); + CLS_LOG(0, "ERROR: %s(): list_olh_entries retured ret=%d", __func__, ret); return ret; } diff -Nru ceph-10.2.7/src/common/ceph_fs.cc ceph-10.2.9/src/common/ceph_fs.cc --- ceph-10.2.7/src/common/ceph_fs.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/ceph_fs.cc 2017-07-13 13:05:37.000000000 +0000 @@ -41,18 +41,18 @@ int mode = -1; #ifdef O_DIRECTORY /* fixme */ - if ((flags & O_DIRECTORY) == O_DIRECTORY) + if ((flags & CEPH_O_DIRECTORY) == CEPH_O_DIRECTORY) return CEPH_FILE_MODE_PIN; #endif switch (flags & O_ACCMODE) { - case O_WRONLY: + case CEPH_O_WRONLY: mode = CEPH_FILE_MODE_WR; break; - case O_RDONLY: + case CEPH_O_RDONLY: mode = CEPH_FILE_MODE_RD; break; - case O_RDWR: + case CEPH_O_RDWR: case O_ACCMODE: /* this is what the VFS does */ mode = CEPH_FILE_MODE_RDWR; break; @@ -78,3 +78,33 @@ return caps; } + +int ceph_flags_sys2wire(int flags) +{ + int wire_flags = 0; + + switch (flags & O_ACCMODE) { + case O_RDONLY: + wire_flags |= CEPH_O_RDONLY; + break; + case O_WRONLY: + wire_flags |= CEPH_O_WRONLY; + break; + case O_RDWR: + wire_flags |= CEPH_O_RDWR; + break; + } + flags &= ~O_ACCMODE; + +#define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; } + + ceph_sys2wire(O_CREAT); + ceph_sys2wire(O_EXCL); + ceph_sys2wire(O_TRUNC); + ceph_sys2wire(O_DIRECTORY); + ceph_sys2wire(O_NOFOLLOW); + +#undef ceph_sys2wire + + return wire_flags; +} diff -Nru ceph-10.2.7/src/common/config_opts.h ceph-10.2.9/src/common/config_opts.h --- ceph-10.2.7/src/common/config_opts.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/config_opts.h 2017-07-13 13:05:37.000000000 +0000 @@ -239,6 +239,7 @@ OPTION(mon_osd_prime_pg_temp, OPT_BOOL, true) // prime osdmap with pg mapping changes OPTION(mon_osd_prime_pg_temp_max_time, OPT_FLOAT, .5) // max time to spend priming OPTION(mon_osd_pool_ec_fast_read, OPT_BOOL, false) // whether turn on fast read on the pool or not +OPTION(osd_ignore_stale_divergent_priors, OPT_BOOL, false) // do not assert on divergent_prior entries which aren't in the log and whose on-disk objects are newer OPTION(mon_stat_smooth_intervals, OPT_INT, 2) // smooth stats over last N PGMap maps OPTION(mon_election_timeout, OPT_FLOAT, 5) // on election proposer, max waiting time for all ACKs OPTION(mon_lease, OPT_FLOAT, 5) // lease interval @@ -272,7 +273,6 @@ OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL, true) // warn if crush straw_calc_version==0 OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0' OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true) -OPTION(mon_warn_on_no_sortbitwise, OPT_BOOL, true) // warn when sortbitwise not set OPTION(mon_min_osdmap_epochs, OPT_INT, 500) OPTION(mon_max_pgmap_epochs, OPT_INT, 500) OPTION(mon_max_log_epochs, OPT_INT, 500) @@ -374,6 +374,7 @@ OPTION(client_readahead_min, OPT_LONGLONG, 128*1024) // readahead at _least_ this much. OPTION(client_readahead_max_bytes, OPT_LONGLONG, 0) // default unlimited OPTION(client_readahead_max_periods, OPT_LONGLONG, 4) // as multiple of file layout period (object size * num stripes) +OPTION(client_reconnect_stale, OPT_BOOL, false) // automatically reconnect stale session OPTION(client_snapdir, OPT_STR, ".snap") OPTION(client_mountpoint, OPT_STR, "/") OPTION(client_mount_uid, OPT_INT, -1) @@ -438,6 +439,8 @@ OPTION(journaler_batch_max, OPT_U64, 0) // max bytes we'll delay flushing; disable, for now.... OPTION(mds_data, OPT_STR, "/var/lib/ceph/mds/$cluster-$id") OPTION(mds_max_file_size, OPT_U64, 1ULL << 40) // Used when creating new CephFS. Change with 'ceph mds set max_file_size ' afterwards +// max xattr kv pairs size for each dir/file +OPTION(mds_max_xattr_pairs_size, OPT_U32, 64 << 10) OPTION(mds_cache_size, OPT_INT, 100000) OPTION(mds_cache_mid, OPT_FLOAT, .7) OPTION(mds_max_file_recover, OPT_U32, 32) @@ -559,6 +562,9 @@ // Maximum number of damaged frags/dentries before whole MDS rank goes damaged OPTION(mds_damage_table_max_entries, OPT_INT, 10000) +// Maximum increment for client writable range, counted by number of objects +OPTION(mds_client_writeable_range_max_inc_objs, OPT_U32, 1024) + // verify backend can support configured max object name length OPTION(osd_check_max_object_name_len_on_startup, OPT_BOOL, true) @@ -661,8 +667,8 @@ OPTION(osd_hit_set_namespace, OPT_STR, ".ceph-internal") // rados namespace for hit_set tracking // conservative default throttling values -OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 5 * 1024*1024) -OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 25) +OPTION(osd_tier_promote_max_objects_sec, OPT_U64, 25) +OPTION(osd_tier_promote_max_bytes_sec, OPT_U64, 5 * 1024*1024) OPTION(osd_tier_default_cache_mode, OPT_STR, "writeback") OPTION(osd_tier_default_cache_hit_set_count, OPT_INT, 4) @@ -714,7 +720,7 @@ OPTION(osd_recovery_thread_timeout, OPT_INT, 30) OPTION(osd_recovery_thread_suicide_timeout, OPT_INT, 300) OPTION(osd_recovery_sleep, OPT_FLOAT, 0) // seconds to sleep between recovery ops -OPTION(osd_snap_trim_sleep, OPT_FLOAT, 0) +OPTION(osd_snap_trim_sleep, OPT_DOUBLE, 0) OPTION(osd_scrub_invalid_stats, OPT_BOOL, true) OPTION(osd_remove_thread_timeout, OPT_INT, 60*60) OPTION(osd_remove_thread_suicide_timeout, OPT_INT, 10*60*60) @@ -728,6 +734,8 @@ // max number of parallel snap trims/pg OPTION(osd_pg_max_concurrent_snap_trims, OPT_U64, 2) +// max number of trimming pgs +OPTION(osd_max_trimming_pgs, OPT_U64, 2) // minimum number of peers that must be reachable to mark ourselves // back up after being wrongly marked down. @@ -849,7 +857,7 @@ // rocksdb options that will be used for omap(if omap_backend is rocksdb) OPTION(filestore_rocksdb_options, OPT_STR, "") // rocksdb options that will be used in monstore -OPTION(mon_rocksdb_options, OPT_STR, "cache_size=536870912,write_buffer_size=33554432,block_size=65536,compression=kNoCompression") +OPTION(mon_rocksdb_options, OPT_STR, "write_buffer_size=33554432,compression=kNoCompression") /** * osd_*_priority adjust the relative priority of client io, recovery io, @@ -868,6 +876,9 @@ OPTION(osd_scrub_priority, OPT_U32, 5) // set default cost equal to 50MB io OPTION(osd_scrub_cost, OPT_U32, 50<<20) +// set requested scrub priority higher than scrub priority to make the +// requested scrubs jump the queue of scheduled scrubs +OPTION(osd_requested_scrub_priority, OPT_U32, 120) /** * osd_recovery_op_warn_multiple scales the normal warning threshhold, @@ -1444,6 +1455,10 @@ OPTION(rgw_period_push_interval, OPT_DOUBLE, 2) // seconds to wait before retrying "period push" OPTION(rgw_period_push_interval_max, OPT_DOUBLE, 30) // maximum interval after exponential backoff +OPTION(rgw_safe_max_objects_per_shard, OPT_INT, 100*1024) // safe max loading +OPTION(rgw_shard_warning_threshold, OPT_DOUBLE, 90) // pct of safe max + // at which to warn + OPTION(rgw_swift_versioning_enabled, OPT_BOOL, false) // whether swift object versioning feature is enabled OPTION(mutex_perf_counter, OPT_BOOL, false) // enable/disable mutex perf counter diff -Nru ceph-10.2.7/src/common/ipaddr.cc ceph-10.2.9/src/common/ipaddr.cc --- ceph-10.2.7/src/common/ipaddr.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/ipaddr.cc 2017-07-13 13:05:37.000000000 +0000 @@ -110,7 +110,7 @@ } -bool parse_network(const char *s, struct sockaddr *network, unsigned int *prefix_len) { +bool parse_network(const char *s, struct sockaddr_storage *network, unsigned int *prefix_len) { char *slash = strchr((char*)s, '/'); if (!slash) { // no slash @@ -144,14 +144,14 @@ int ok; ok = inet_pton(AF_INET, addr, &((struct sockaddr_in*)network)->sin_addr); if (ok) { - network->sa_family = AF_INET; + network->ss_family = AF_INET; return true; } // try parsing as ipv6 ok = inet_pton(AF_INET6, addr, &((struct sockaddr_in6*)network)->sin6_addr); if (ok) { - network->sa_family = AF_INET6; + network->ss_family = AF_INET6; return true; } diff -Nru ceph-10.2.7/src/common/LogClient.cc ceph-10.2.9/src/common/LogClient.cc --- ceph-10.2.7/src/common/LogClient.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/LogClient.cc 2017-07-13 13:05:37.000000000 +0000 @@ -265,15 +265,13 @@ } } -void LogClient::reset_session() -{ - Mutex::Locker l(log_lock); - last_log_sent = last_log - log_queue.size(); -} - -Message *LogClient::get_mon_log_message() +Message *LogClient::get_mon_log_message(bool flush) { Mutex::Locker l(log_lock); + if (flush) { + // reset session + last_log_sent = last_log - log_queue.size(); + } return _get_mon_log_message(); } diff -Nru ceph-10.2.7/src/common/LogClient.h ceph-10.2.9/src/common/LogClient.h --- ceph-10.2.7/src/common/LogClient.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/LogClient.h 2017-07-13 13:05:37.000000000 +0000 @@ -205,8 +205,7 @@ } bool handle_log_ack(MLogAck *m); - void reset_session(); - Message *get_mon_log_message(); + Message *get_mon_log_message(bool flush); bool are_pending(); LogChannelRef create_channel() { diff -Nru ceph-10.2.7/src/common/pick_address.cc ceph-10.2.9/src/common/pick_address.cc --- ceph-10.2.7/src/common/pick_address.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/common/pick_address.cc 2017-07-13 13:05:37.000000000 +0000 @@ -32,7 +32,7 @@ get_str_list(networks, nets); for(std::list::iterator s = nets.begin(); s != nets.end(); ++s) { - struct sockaddr net; + struct sockaddr_storage net; unsigned int prefix_len; if (!parse_network(s->c_str(), &net, &prefix_len)) { @@ -40,7 +40,7 @@ exit(1); } - const struct sockaddr *found = find_ip_in_subnet(ifa, &net, prefix_len); + const struct sockaddr *found = find_ip_in_subnet(ifa, (struct sockaddr *) &net, prefix_len); if (found) return found; } diff -Nru ceph-10.2.7/src/.git_version ceph-10.2.9/src/.git_version --- ceph-10.2.7/src/.git_version 2017-04-10 11:45:50.000000000 +0000 +++ ceph-10.2.9/src/.git_version 2017-07-13 13:07:05.000000000 +0000 @@ -1,2 +1,2 @@ -50e863e0f4bc8f4b9e31156de690d765af245185 -v10.2.7 +2ee413f77150c0f375ff6f10edd6c8f9c7d060d0 +v10.2.9 diff -Nru ceph-10.2.7/src/global/global_init.cc ceph-10.2.9/src/global/global_init.cc --- ceph-10.2.7/src/global/global_init.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/global/global_init.cc 2017-07-13 13:05:37.000000000 +0000 @@ -134,11 +134,12 @@ g_conf->complain_about_parse_errors(g_ceph_context); } -void global_init(std::vector < const char * > *alt_def_args, - std::vector < const char* >& args, - uint32_t module_type, code_environment_t code_env, - int flags, - const char *data_dir_option, bool run_pre_init) +boost::intrusive_ptr +global_init(std::vector < const char * > *alt_def_args, + std::vector < const char* >& args, + uint32_t module_type, code_environment_t code_env, + int flags, + const char *data_dir_option, bool run_pre_init) { // Ensure we're not calling the global init functions multiple times. static bool first_run = true; @@ -331,6 +332,18 @@ if (code_env == CODE_ENVIRONMENT_DAEMON && !(flags & CINIT_FLAG_NO_DAEMON_ACTIONS)) output_ceph_version(); + + return boost::intrusive_ptr{g_ceph_context, false}; +} + +void intrusive_ptr_add_ref(CephContext* cct) +{ + cct->get(); +} + +void intrusive_ptr_release(CephContext* cct) +{ + cct->put(); } void global_print_banner(void) diff -Nru ceph-10.2.7/src/global/global_init.h ceph-10.2.9/src/global/global_init.h --- ceph-10.2.7/src/global/global_init.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/global/global_init.h 2017-07-13 13:05:37.000000000 +0000 @@ -19,7 +19,8 @@ #include #include #include - +#include +#include "include/assert.h" #include "common/code_environment.h" #include "common/common_init.h" @@ -30,7 +31,8 @@ * daemons and utility programs need to call. It takes care of a lot of * initialization, including setting up g_ceph_context. */ -void global_init(std::vector < const char * > *alt_def_args, +boost::intrusive_ptr + global_init(std::vector < const char * > *alt_def_args, std::vector < const char* >& args, uint32_t module_type, code_environment_t code_env, @@ -38,6 +40,9 @@ const char *data_dir_option = 0, bool run_pre_init = true); +void intrusive_ptr_add_ref(CephContext* cct); +void intrusive_ptr_release(CephContext* cct); + // just the first half; enough to get config parsed but doesn't start up the // cct or log. void global_pre_init(std::vector < const char * > *alt_def_args, diff -Nru ceph-10.2.7/src/include/ceph_fs.h ceph-10.2.9/src/include/ceph_fs.h --- ceph-10.2.7/src/include/ceph_fs.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/include/ceph_fs.h 2017-07-13 13:05:37.000000000 +0000 @@ -380,6 +380,20 @@ #define CEPH_SETATTR_ATIME_NOW (1 << 8) /* + * open request flags + */ +#define CEPH_O_RDONLY 00000000 +#define CEPH_O_WRONLY 00000001 +#define CEPH_O_RDWR 00000002 +#define CEPH_O_CREAT 00000100 +#define CEPH_O_EXCL 00000200 +#define CEPH_O_TRUNC 00001000 +#define CEPH_O_DIRECTORY 00200000 +#define CEPH_O_NOFOLLOW 00400000 + +int ceph_flags_sys2wire(int flags); + +/* * Ceph setxattr request flags. */ #define CEPH_XATTR_CREATE (1 << 0) diff -Nru ceph-10.2.7/src/include/ipaddr.h ceph-10.2.9/src/include/ipaddr.h --- ceph-10.2.7/src/include/ipaddr.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/include/ipaddr.h 2017-07-13 13:05:37.000000000 +0000 @@ -16,6 +16,6 @@ unsigned int prefix_len); -bool parse_network(const char *s, struct sockaddr *network, unsigned int *prefix_len); +bool parse_network(const char *s, struct sockaddr_storage *network, unsigned int *prefix_len); #endif diff -Nru ceph-10.2.7/src/include/rados/rgw_file.h ceph-10.2.9/src/include/rados/rgw_file.h --- ceph-10.2.7/src/include/rados/rgw_file.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/include/rados/rgw_file.h 2017-07-13 13:05:37.000000000 +0000 @@ -26,7 +26,7 @@ #define LIBRGW_FILE_VER_MAJOR 1 #define LIBRGW_FILE_VER_MINOR 1 -#define LIBRGW_FILE_VER_EXTRA 2 +#define LIBRGW_FILE_VER_EXTRA 3 #define LIBRGW_FILE_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra) #define LIBRGW_FILE_VERSION_CODE LIBRGW_FILE_VERSION(LIBRGW_FILE_VER_MAJOR, LIBRGW_FILE_VER_MINOR, LIBRGW_FILE_VER_EXTRA) @@ -35,7 +35,8 @@ * object types */ enum rgw_fh_type { - RGW_FS_TYPE_FILE = 0, + RGW_FS_TYPE_NIL = 0, + RGW_FS_TYPE_FILE, RGW_FS_TYPE_DIRECTORY, }; @@ -88,6 +89,11 @@ #define RGW_LOOKUP_FLAG_NONE 0x0000 #define RGW_LOOKUP_FLAG_CREATE 0x0001 #define RGW_LOOKUP_FLAG_RCB 0x0002 /* readdir callback hint */ +#define RGW_LOOKUP_FLAG_DIR 0x0004 +#define RGW_LOOKUP_FLAG_FILE 0x0008 + +#define RGW_LOOKUP_TYPE_FLAGS \ + (RGW_LOOKUP_FLAG_DIR|RGW_LOOKUP_FLAG_FILE) int rgw_lookup(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh, const char *path, @@ -196,7 +202,8 @@ /* read directory content */ -typedef bool (*rgw_readdir_cb)(const char *name, void *arg, uint64_t offset); +typedef bool (*rgw_readdir_cb)(const char *name, void *arg, uint64_t offset, + uint32_t flags); #define RGW_READDIR_FLAG_NONE 0x0000 #define RGW_READDIR_FLAG_DOTDOT 0x0001 /* send dot names */ diff -Nru ceph-10.2.7/src/librados-config.cc ceph-10.2.9/src/librados-config.cc --- ceph-10.2.7/src/librados-config.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librados-config.cc 2017-07-13 13:05:37.000000000 +0000 @@ -42,8 +42,9 @@ bool opt_version = false; bool opt_vernum = false; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); for (std::vector::iterator i = args.begin(); i != args.end(); ) { diff -Nru ceph-10.2.7/src/libradosstriper/RadosStriperImpl.cc ceph-10.2.9/src/libradosstriper/RadosStriperImpl.cc --- ceph-10.2.7/src/libradosstriper/RadosStriperImpl.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/libradosstriper/RadosStriperImpl.cc 2017-07-13 13:05:37.000000000 +0000 @@ -770,33 +770,37 @@ uint64_t off, const ceph_file_layout& layout) { - // get list of extents to be written to - vector extents; - std::string format = soid + RADOS_OBJECT_EXTENSION_FORMAT; - file_layout_t l; - l.from_legacy(layout); - Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents); - // go through the extents int r = 0; - for (vector::iterator p = extents.begin(); p != extents.end(); ++p) { - // assemble pieces of a given object into a single buffer list - bufferlist oid_bl; - for (vector >::iterator q = p->buffer_extents.begin(); - q != p->buffer_extents.end(); - ++q) { - bufferlist buffer_bl; - buffer_bl.substr_of(bl, q->first, q->second); - oid_bl.append(buffer_bl); - } - // and write the object - c->add_request(); - librados::AioCompletion *rados_completion = - m_radosCluster.aio_create_completion(c, rados_req_write_complete, rados_req_write_safe); - r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl, p->length, p->offset); - rados_completion->release(); - if (r < 0) - break; - } + // Do not try anything if we are called with empty buffer, + // file_to_extents would raise an exception + if (len > 0) { + // get list of extents to be written to + vector extents; + std::string format = soid + RADOS_OBJECT_EXTENSION_FORMAT; + file_layout_t l; + l.from_legacy(layout); + Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents); + // go through the extents + for (vector::iterator p = extents.begin(); p != extents.end(); ++p) { + // assemble pieces of a given object into a single buffer list + bufferlist oid_bl; + for (vector >::iterator q = p->buffer_extents.begin(); + q != p->buffer_extents.end(); + ++q) { + bufferlist buffer_bl; + buffer_bl.substr_of(bl, q->first, q->second); + oid_bl.append(buffer_bl); + } + // and write the object + c->add_request(); + librados::AioCompletion *rados_completion = + m_radosCluster.aio_create_completion(c, rados_req_write_complete, rados_req_write_safe); + r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl, p->length, p->offset); + rados_completion->release(); + if (r < 0) + break; + } + } c->finish_adding_requests(); return r; } diff -Nru ceph-10.2.7/src/librbd/exclusive_lock/AcquireRequest.cc ceph-10.2.9/src/librbd/exclusive_lock/AcquireRequest.cc --- ceph-10.2.7/src/librbd/exclusive_lock/AcquireRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/exclusive_lock/AcquireRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -22,7 +22,8 @@ #define dout_subsys ceph_subsys_rbd #undef dout_prefix -#define dout_prefix *_dout << "librbd::exclusive_lock::AcquireRequest: " +#define dout_prefix *_dout << "librbd::exclusive_lock::AcquireRequest: " \ + << this << " " << __func__ << ": " namespace librbd { namespace exclusive_lock { @@ -64,7 +65,7 @@ template void AcquireRequest::send_prepare_lock() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; // acquire the lock if the image is not busy performing other actions Context *ctx = create_context_callback< @@ -75,7 +76,7 @@ template Context *AcquireRequest::handle_prepare_lock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; send_flush_notifies(); return nullptr; @@ -84,7 +85,7 @@ template void AcquireRequest::send_flush_notifies() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_context_callback( @@ -95,7 +96,7 @@ template Context *AcquireRequest::handle_flush_notifies(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; assert(*ret_val == 0); send_get_locker(); @@ -105,7 +106,7 @@ template void AcquireRequest::send_get_locker() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; Context *ctx = create_context_callback< AcquireRequest, &AcquireRequest::handle_get_locker>(this); @@ -116,7 +117,7 @@ template Context *AcquireRequest::handle_get_locker(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == -ENOENT) { ldout(cct, 20) << "no lockers detected" << dendl; @@ -138,7 +139,7 @@ template void AcquireRequest::send_lock() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << "cookie=" << m_cookie << dendl; librados::ObjectWriteOperation op; rados::cls::lock::lock(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, m_cookie, @@ -156,7 +157,7 @@ template Context *AcquireRequest::handle_lock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == 0) { return send_refresh(); @@ -180,7 +181,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_async_context_callback( @@ -197,11 +198,11 @@ template Context *AcquireRequest::handle_refresh(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == -ERESTART) { // next issued IO or op will (re)-refresh the image and shut down lock - ldout(cct, 5) << ": exclusive lock dynamically disabled" << dendl; + ldout(cct, 5) << "exclusive lock dynamically disabled" << dendl; *ret_val = 0; } else if (*ret_val < 0) { lderr(cct) << "failed to refresh image: " << cpp_strerror(*ret_val) @@ -233,7 +234,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_context_callback( @@ -250,7 +251,7 @@ template Context *AcquireRequest::handle_open_journal(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { lderr(cct) << "failed to open journal: " << cpp_strerror(*ret_val) << dendl; @@ -266,7 +267,7 @@ template void AcquireRequest::send_allocate_journal_tag() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; RWLock::RLocker snap_locker(m_image_ctx.snap_lock); using klass = AcquireRequest; @@ -278,7 +279,7 @@ template Context *AcquireRequest::handle_allocate_journal_tag(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { lderr(cct) << "failed to allocate journal tag: " << cpp_strerror(*ret_val) @@ -293,7 +294,7 @@ template void AcquireRequest::send_close_journal() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_context_callback( @@ -304,7 +305,7 @@ template Context *AcquireRequest::handle_close_journal(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { lderr(cct) << "failed to close journal: " << cpp_strerror(*ret_val) @@ -322,7 +323,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_context_callback( @@ -336,7 +337,7 @@ template Context *AcquireRequest::handle_open_object_map(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { lderr(cct) << "failed to open object map: " << cpp_strerror(*ret_val) @@ -358,7 +359,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = AcquireRequest; Context *ctx = create_context_callback< @@ -369,7 +370,7 @@ template Context *AcquireRequest::handle_close_object_map(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; // object map should never result in an error assert(*ret_val == 0); @@ -380,7 +381,7 @@ template void AcquireRequest::send_unlock() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; librados::ObjectWriteOperation op; rados::cls::lock::unlock(&op, RBD_LOCK_NAME, m_cookie); @@ -397,7 +398,7 @@ template Context *AcquireRequest::handle_unlock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { lderr(cct) << "failed to unlock image: " << cpp_strerror(*ret_val) << dendl; @@ -410,7 +411,7 @@ template void AcquireRequest::send_break_lock() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; Context *ctx = create_context_callback< AcquireRequest, &AcquireRequest::handle_break_lock>(this); @@ -422,7 +423,7 @@ template Context *AcquireRequest::handle_break_lock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == -EAGAIN) { ldout(cct, 5) << "lock owner is still alive" << dendl; diff -Nru ceph-10.2.7/src/librbd/exclusive_lock/ReleaseRequest.cc ceph-10.2.9/src/librbd/exclusive_lock/ReleaseRequest.cc --- ceph-10.2.7/src/librbd/exclusive_lock/ReleaseRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/exclusive_lock/ReleaseRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -16,7 +16,8 @@ #define dout_subsys ceph_subsys_rbd #undef dout_prefix -#define dout_prefix *_dout << "librbd::exclusive_lock::ReleaseRequest: " +#define dout_prefix *_dout << "librbd::exclusive_lock::ReleaseRequest: " \ + << this << " " << __func__ << ": " namespace librbd { namespace exclusive_lock { @@ -65,7 +66,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; // release the lock if the image is not busy performing other actions Context *ctx = create_context_callback< @@ -76,7 +77,7 @@ template Context *ReleaseRequest::handle_prepare_lock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; send_cancel_op_requests(); return nullptr; @@ -85,7 +86,7 @@ template void ReleaseRequest::send_cancel_op_requests() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = ReleaseRequest; Context *ctx = create_context_callback< @@ -96,7 +97,7 @@ template Context *ReleaseRequest::handle_cancel_op_requests(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; assert(*ret_val == 0); @@ -107,7 +108,7 @@ template void ReleaseRequest::send_block_writes() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = ReleaseRequest; Context *ctx = create_context_callback< @@ -125,7 +126,7 @@ template Context *ReleaseRequest::handle_block_writes(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == -EBLACKLISTED) { // allow clean shut down if blacklisted @@ -149,7 +150,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": purge_on_error=" << purge_on_error << dendl; + ldout(cct, 10) << "purge_on_error=" << purge_on_error << dendl; RWLock::RLocker owner_lock(m_image_ctx.owner_lock); Context *ctx = create_async_context_callback( @@ -162,7 +163,7 @@ template Context *ReleaseRequest::handle_invalidate_cache(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val == -EBLACKLISTED) { lderr(cct) << "failed to invalidate cache because client is blacklisted" @@ -186,7 +187,7 @@ template void ReleaseRequest::send_flush_notifies() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = ReleaseRequest; Context *ctx = create_context_callback< @@ -197,7 +198,7 @@ template Context *ReleaseRequest::handle_flush_notifies(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; assert(*ret_val == 0); send_close_journal(); @@ -217,7 +218,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = ReleaseRequest; Context *ctx = create_context_callback( @@ -228,7 +229,7 @@ template Context *ReleaseRequest::handle_close_journal(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0) { // error implies some journal events were not flushed -- continue @@ -255,7 +256,7 @@ } CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << dendl; using klass = ReleaseRequest; Context *ctx = create_context_callback< @@ -266,7 +267,7 @@ template Context *ReleaseRequest::handle_close_object_map(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; // object map shouldn't return errors assert(*ret_val == 0); @@ -279,7 +280,7 @@ template void ReleaseRequest::send_unlock() { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << dendl; + ldout(cct, 10) << "cookie=" << m_cookie << dendl; if (m_on_releasing != nullptr) { // alert caller that we no longer own the exclusive lock @@ -302,7 +303,7 @@ template Context *ReleaseRequest::handle_unlock(int *ret_val) { CephContext *cct = m_image_ctx.cct; - ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl; + ldout(cct, 10) << "r=" << *ret_val << dendl; if (*ret_val < 0 && *ret_val != -ENOENT) { lderr(cct) << "failed to unlock: " << cpp_strerror(*ret_val) << dendl; diff -Nru ceph-10.2.7/src/librbd/ExclusiveLock.cc ceph-10.2.9/src/librbd/ExclusiveLock.cc --- ceph-10.2.7/src/librbd/ExclusiveLock.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/ExclusiveLock.cc 2017-07-13 13:05:37.000000000 +0000 @@ -251,10 +251,41 @@ } template -void ExclusiveLock::assert_header_locked(librados::ObjectWriteOperation *op) { - Mutex::Locker locker(m_lock); - rados::cls::lock::assert_locked(op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, - m_cookie, WATCHER_LOCK_TAG); +int ExclusiveLock::assert_header_locked() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + librados::ObjectReadOperation op; + { + Mutex::Locker locker(m_lock); + rados::cls::lock::assert_locked(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, + m_cookie, WATCHER_LOCK_TAG); + } + + int r = m_image_ctx.md_ctx.operate(m_image_ctx.header_oid, &op, nullptr); + if (r < 0) { + if (r == -EBLACKLISTED) { + ldout(cct, 5) << this << " " << __func__ << ": " + << "client is not lock owner -- client blacklisted" + << dendl; + } else if (r == -ENOENT) { + ldout(cct, 5) << this << " " << __func__ << ": " + << "client is not lock owner -- no lock detected" + << dendl; + } else if (r == -EBUSY) { + ldout(cct, 5) << this << " " << __func__ << ": " + << "client is not lock owner -- owned by different client" + << dendl; + } else { + lderr(cct) << this << " " << __func__ << ": " + << "failed to verify lock ownership: " << cpp_strerror(r) + << dendl; + } + + return r; + } + + return 0; } template diff -Nru ceph-10.2.7/src/librbd/ExclusiveLock.h ceph-10.2.9/src/librbd/ExclusiveLock.h --- ceph-10.2.7/src/librbd/ExclusiveLock.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/ExclusiveLock.h 2017-07-13 13:05:37.000000000 +0000 @@ -43,9 +43,9 @@ void reacquire_lock(Context *on_reacquired = nullptr); - void handle_peer_notification(int r); + int assert_header_locked(); - void assert_header_locked(librados::ObjectWriteOperation *op); + void handle_peer_notification(int r); static bool decode_lock_cookie(const std::string &cookie, uint64_t *handle); diff -Nru ceph-10.2.7/src/librbd/ImageState.cc ceph-10.2.9/src/librbd/ImageState.cc --- ceph-10.2.7/src/librbd/ImageState.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/ImageState.cc 2017-07-13 13:05:37.000000000 +0000 @@ -306,7 +306,7 @@ template bool ImageState::is_refresh_required() const { Mutex::Locker locker(m_lock); - return (m_last_refresh != m_refresh_seq); + return (m_last_refresh != m_refresh_seq || find_pending_refresh() != nullptr); } template @@ -338,7 +338,14 @@ C_SaferCond ctx; { m_lock.Lock(); - if (m_last_refresh == m_refresh_seq) { + Action action(ACTION_TYPE_REFRESH); + action.refresh_seq = m_refresh_seq; + + auto refresh_action = find_pending_refresh(); + if (refresh_action != nullptr) { + // if a refresh is in-flight, delay until it is finished + action = *refresh_action; + } else if (m_last_refresh == m_refresh_seq) { m_lock.Unlock(); return 0; } else if (is_closed()) { @@ -346,8 +353,6 @@ return -ESHUTDOWN; } - Action action(ACTION_TYPE_REFRESH); - action.refresh_seq = m_refresh_seq; execute_action_unlock(action, &ctx); } @@ -355,6 +360,22 @@ } template +const typename ImageState::Action * +ImageState::find_pending_refresh() const { + assert(m_lock.is_locked()); + + auto it = std::find_if(m_actions_contexts.rbegin(), + m_actions_contexts.rend(), + [](const ActionContexts& action_contexts) { + return (action_contexts.first == ACTION_TYPE_REFRESH); + }); + if (it != m_actions_contexts.rend()) { + return &it->first; + } + return nullptr; +} + +template void ImageState::snap_set(const std::string &snap_name, Context *on_finish) { CephContext *cct = m_image_ctx->cct; ldout(cct, 20) << __func__ << ": snap_name=" << snap_name << dendl; diff -Nru ceph-10.2.7/src/librbd/ImageState.h ceph-10.2.9/src/librbd/ImageState.h --- ceph-10.2.7/src/librbd/ImageState.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/ImageState.h 2017-07-13 13:05:37.000000000 +0000 @@ -114,6 +114,8 @@ bool is_transition_state() const; bool is_closed() const; + const Action *find_pending_refresh() const; + void append_context(const Action &action, Context *context); void execute_next_action_unlock(); void execute_action_unlock(const Action &action, Context *context); diff -Nru ceph-10.2.7/src/librbd/image_watcher/RewatchRequest.cc ceph-10.2.9/src/librbd/image_watcher/RewatchRequest.cc --- ceph-10.2.7/src/librbd/image_watcher/RewatchRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/image_watcher/RewatchRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -10,7 +10,7 @@ #define dout_subsys ceph_subsys_rbd #undef dout_prefix #define dout_prefix *_dout << "librbd::image_watcher::RewatchRequest: " \ - << this << ": " << __func__ + << this << " " << __func__ << " " namespace librbd { namespace image_watcher { diff -Nru ceph-10.2.7/src/librbd/internal.cc ceph-10.2.9/src/librbd/internal.cc --- ceph-10.2.7/src/librbd/internal.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/internal.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2099,9 +2099,22 @@ int is_exclusive_lock_owner(ImageCtx *ictx, bool *is_owner) { - RWLock::RLocker l(ictx->owner_lock); - *is_owner = (ictx->exclusive_lock != nullptr && - ictx->exclusive_lock->is_lock_owner()); + *is_owner = false; + + RWLock::RLocker owner_locker(ictx->owner_lock); + if (ictx->exclusive_lock == nullptr || + !ictx->exclusive_lock->is_lock_owner()) { + return 0; + } + + // might have been blacklisted by peer -- ensure we still own + // the lock by pinging the OSD + int r = ictx->exclusive_lock->assert_header_locked(); + if (r < 0) { + return r; + } + + *is_owner = true; return 0; } @@ -2967,10 +2980,13 @@ } ictx->user_flushed(); + C_SaferCond ctx; { RWLock::RLocker owner_locker(ictx->owner_lock); - r = ictx->flush(); + ictx->flush(&ctx); } + r = ctx.wait(); + ictx->perfcounter->inc(l_librbd_flush); return r; } diff -Nru ceph-10.2.7/src/librbd/Journal.h ceph-10.2.9/src/librbd/Journal.h --- ceph-10.2.7/src/librbd/Journal.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/Journal.h 2017-07-13 13:05:37.000000000 +0000 @@ -11,6 +11,7 @@ #include "common/Cond.h" #include "common/Mutex.h" #include "common/Cond.h" +#include "common/WorkQueue.h" #include "journal/Future.h" #include "journal/JournalMetadataListener.h" #include "journal/ReplayEntry.h" @@ -23,7 +24,6 @@ #include #include -class ContextWQ; class SafeTimer; namespace journal { class Journaler; diff -Nru ceph-10.2.7/src/librbd/librbd.cc ceph-10.2.9/src/librbd/librbd.cc --- ceph-10.2.7/src/librbd/librbd.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/librbd.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2802,12 +2802,12 @@ tracepoint(librbd, metadata_get_exit, r, key, NULL); return r; } - if (*vallen < val_s.size()) { + if (*vallen < val_s.size() + 1) { r = -ERANGE; - *vallen = val_s.size(); + *vallen = val_s.size() + 1; tracepoint(librbd, metadata_get_exit, r, key, NULL); } else { - strncpy(value, val_s.c_str(), val_s.size()); + strncpy(value, val_s.c_str(), val_s.size() + 1); tracepoint(librbd, metadata_get_exit, r, key, value); } return r; @@ -2845,7 +2845,7 @@ key_total_len += it->first.size() + 1; val_total_len += it->second.length() + 1; } - if (*key_len < key_total_len || *val_len < key_total_len) + if (*key_len < key_total_len || *val_len < val_total_len) too_short = true; *key_len = key_total_len; *val_len = val_total_len; @@ -2858,10 +2858,12 @@ for (map::iterator it = pairs.begin(); it != pairs.end(); ++it) { - strncpy(key_p, it->first.c_str(), it->first.size()); + strncpy(key_p, it->first.c_str(), it->first.size() + 1); key_p += it->first.size() + 1; strncpy(value_p, it->second.c_str(), it->second.length()); - value_p += it->second.length() + 1; + value_p += it->second.length(); + *value_p = '\0'; + value_p++; tracepoint(librbd, metadata_list_entry, it->first.c_str(), it->second.c_str()); } tracepoint(librbd, metadata_list_exit, r); diff -Nru ceph-10.2.7/src/librbd/object_map/InvalidateRequest.cc ceph-10.2.9/src/librbd/object_map/InvalidateRequest.cc --- ceph-10.2.7/src/librbd/object_map/InvalidateRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/object_map/InvalidateRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -59,10 +59,6 @@ lderr(cct) << this << " invalidating object map on-disk" << dendl; librados::ObjectWriteOperation op; - if (image_ctx.exclusive_lock != nullptr && - m_snap_id == CEPH_NOSNAP && !m_force) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::set_flags(&op, m_snap_id, flags, flags); librados::AioCompletion *rados_completion = diff -Nru ceph-10.2.7/src/librbd/operation/FlattenRequest.cc ceph-10.2.9/src/librbd/operation/FlattenRequest.cc --- ceph-10.2.7/src/librbd/operation/FlattenRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/FlattenRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -140,9 +140,6 @@ // remove parent from this (base) image librados::ObjectWriteOperation op; - if (image_ctx.exclusive_lock != nullptr) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::remove_parent(&op); librados::AioCompletion *rados_completion = this->create_callback_completion(); diff -Nru ceph-10.2.7/src/librbd/operation/RebuildObjectMapRequest.cc ceph-10.2.9/src/librbd/operation/RebuildObjectMapRequest.cc --- ceph-10.2.7/src/librbd/operation/RebuildObjectMapRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/RebuildObjectMapRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -352,9 +352,6 @@ m_state = STATE_UPDATE_HEADER; librados::ObjectWriteOperation op; - if (m_image_ctx.exclusive_lock != nullptr) { - m_image_ctx.exclusive_lock->assert_header_locked(&op); - } uint64_t flags = RBD_FLAG_OBJECT_MAP_INVALID | RBD_FLAG_FAST_DIFF_INVALID; cls_client::set_flags(&op, m_image_ctx.snap_id, 0, flags); diff -Nru ceph-10.2.7/src/librbd/operation/ResizeRequest.cc ceph-10.2.9/src/librbd/operation/ResizeRequest.cc --- ceph-10.2.7/src/librbd/operation/ResizeRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/ResizeRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -381,9 +381,6 @@ bl.append(reinterpret_cast(&m_new_size), sizeof(m_new_size)); op.write(offsetof(rbd_obj_header_ondisk, image_size), bl); } else { - if (image_ctx.exclusive_lock != nullptr) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::set_size(&op, m_new_size); } diff -Nru ceph-10.2.7/src/librbd/operation/SnapshotCreateRequest.cc ceph-10.2.9/src/librbd/operation/SnapshotCreateRequest.cc --- ceph-10.2.7/src/librbd/operation/SnapshotCreateRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/SnapshotCreateRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -207,9 +207,6 @@ if (image_ctx.old_format) { cls_client::old_snapshot_add(&op, m_snap_id, m_snap_name); } else { - if (image_ctx.exclusive_lock != nullptr) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::snapshot_add(&op, m_snap_id, m_snap_name); } diff -Nru ceph-10.2.7/src/librbd/operation/SnapshotRemoveRequest.cc ceph-10.2.9/src/librbd/operation/SnapshotRemoveRequest.cc --- ceph-10.2.7/src/librbd/operation/SnapshotRemoveRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/SnapshotRemoveRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -180,10 +180,6 @@ if (image_ctx.old_format) { cls_client::old_snapshot_remove(&op, m_snap_name); } else { - if (image_ctx.exclusive_lock != nullptr && - image_ctx.exclusive_lock->is_lock_owner()) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::snapshot_remove(&op, m_snap_id); } diff -Nru ceph-10.2.7/src/librbd/operation/SnapshotRenameRequest.cc ceph-10.2.9/src/librbd/operation/SnapshotRenameRequest.cc --- ceph-10.2.7/src/librbd/operation/SnapshotRenameRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/operation/SnapshotRenameRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -89,10 +89,6 @@ if (image_ctx.old_format) { cls_client::old_snapshot_rename(&op, m_snap_id, m_snap_name); } else { - if (image_ctx.exclusive_lock != nullptr && - image_ctx.exclusive_lock->is_lock_owner()) { - image_ctx.exclusive_lock->assert_header_locked(&op); - } cls_client::snapshot_rename(&op, m_snap_id, m_snap_name); } diff -Nru ceph-10.2.7/src/librbd/Operations.cc ceph-10.2.9/src/librbd/Operations.cc --- ceph-10.2.7/src/librbd/Operations.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/librbd/Operations.cc 2017-07-13 13:05:37.000000000 +0000 @@ -193,10 +193,10 @@ CephContext *cct = image_ctx.cct; ldout(cct, 20) << __func__ << dendl; - Context *ctx = util::create_context_callback< - C_InvokeAsyncRequest, - &C_InvokeAsyncRequest::handle_acquire_exclusive_lock>( - this); + Context *ctx = util::create_async_context_callback( + image_ctx, util::create_context_callback< + C_InvokeAsyncRequest, + &C_InvokeAsyncRequest::handle_acquire_exclusive_lock>(this)); if (request_lock) { // current lock owner doesn't support op -- try to perform diff -Nru ceph-10.2.7/src/log/Log.cc ceph-10.2.9/src/log/Log.cc --- ceph-10.2.7/src/log/Log.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/log/Log.cc 2017-07-13 13:05:37.000000000 +0000 @@ -474,5 +474,10 @@ m_inject_segv = true; } +void Log::reset_segv() +{ + m_inject_segv = false; +} + } // ceph::log:: } // ceph:: diff -Nru ceph-10.2.7/src/log/Log.h ceph-10.2.9/src/log/Log.h --- ceph-10.2.7/src/log/Log.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/log/Log.h 2017-07-13 13:05:37.000000000 +0000 @@ -96,6 +96,7 @@ /// induce a segv on the next log event void inject_segv(); + void reset_segv(); }; } diff -Nru ceph-10.2.7/src/mds/CInode.cc ceph-10.2.9/src/mds/CInode.cc --- ceph-10.2.7/src/mds/CInode.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/CInode.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1198,6 +1198,23 @@ void CInode::_stored_backtrace(int r, version_t v, Context *fin) { + if (r == -ENOENT) { + const int64_t pool = get_backtrace_pool(); + bool exists = mdcache->mds->objecter->with_osdmap( + [pool](const OSDMap &osd_map) { + return osd_map.have_pg_pool(pool); + }); + + // This ENOENT is because the pool doesn't exist (the user deleted it + // out from under us), so the backtrace can never be written, so pretend + // to succeed so that the user can proceed to e.g. delete the file. + if (!exists) { + dout(4) << "store_backtrace got ENOENT: a data pool was deleted " + "beneath us!" << dendl; + r = 0; + } + } + if (r < 0) { dout(1) << "store backtrace error " << r << " v " << v << dendl; mdcache->mds->clog->error() << "failed to store backtrace on ino " diff -Nru ceph-10.2.7/src/mds/InoTable.cc ceph-10.2.9/src/mds/InoTable.cc --- ceph-10.2.7/src/mds/InoTable.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/InoTable.cc 2017-07-13 13:05:37.000000000 +0000 @@ -190,3 +190,15 @@ { ls.push_back(new InoTable()); } + +bool InoTable::intersects_free( + const interval_set &other, + interval_set *intersection) +{ + interval_set i; + i.intersection_of(free, other); + if (intersection != nullptr) { + *intersection = i; + } + return !(i.empty()); +} diff -Nru ceph-10.2.7/src/mds/InoTable.h ceph-10.2.9/src/mds/InoTable.h --- ceph-10.2.7/src/mds/InoTable.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/InoTable.h 2017-07-13 13:05:37.000000000 +0000 @@ -41,6 +41,9 @@ void replay_alloc_ids(interval_set& inos); void replay_release_ids(interval_set& inos); void replay_reset(); + bool intersects_free( + const interval_set &other, + interval_set *intersection); void reset_state(); void encode_state(bufferlist& bl) const { diff -Nru ceph-10.2.7/src/mds/Locker.cc ceph-10.2.9/src/mds/Locker.cc --- ceph-10.2.7/src/mds/Locker.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/Locker.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1905,10 +1905,12 @@ << " wanted " << ccap_string(wanted) << dendl; - // skip if suppress, and not revocation - if (cap->is_suppress() && !(pending & ~allowed)) { - dout(20) << " suppressed and !revoke, skipping client." << it->first << dendl; - continue; + if (!(pending & ~allowed)) { + // skip if suppress or new, and not revocation + if (cap->is_new() || cap->is_suppress()) { + dout(20) << " !revoke and new|suppressed, skipping client." << it->first << dendl; + continue; + } } // notify clients about deleted inode, to make sure they release caps ASAP. @@ -2153,13 +2155,23 @@ } }; +uint64_t Locker::calc_new_max_size(inode_t *pi, uint64_t size) +{ + uint64_t new_max = (size + 1) << 1; + uint64_t max_inc = g_conf->mds_client_writeable_range_max_inc_objs; + if (max_inc > 0) { + max_inc *= pi->get_layout_size_increment(); + new_max = MIN(new_max, size + max_inc); + } + return ROUND_UP_TO(new_max, pi->get_layout_size_increment()); +} void Locker::calc_new_client_ranges(CInode *in, uint64_t size, map& new_ranges) { inode_t *latest = in->get_projected_inode(); uint64_t ms; if(latest->has_layout()) { - ms = ROUND_UP_TO((size+1)<<1, latest->get_layout_size_increment()); + ms = calc_new_max_size(latest, size); } else { // Layout-less directories like ~mds0/, have zero size ms = 0; @@ -2876,17 +2888,6 @@ } } -static uint64_t calc_bounding(uint64_t t) -{ - t |= t >> 1; - t |= t >> 2; - t |= t >> 4; - t |= t >> 8; - t |= t >> 16; - t |= t >> 32; - return t + 1; -} - /** * m and ack might be NULL, so don't dereference them unless dirty != 0 */ @@ -3089,11 +3090,9 @@ << " > max " << old_max << dendl; change_max = true; forced_change_max = true; - new_max = ROUND_UP_TO((m->get_max_size()+1) << 1, latest->get_layout_size_increment()); + new_max = calc_new_max_size(latest, m->get_max_size()); } else { - new_max = calc_bounding(size * 2); - if (new_max < latest->get_layout_size_increment()) - new_max = latest->get_layout_size_increment(); + new_max = calc_new_max_size(latest, size); if (new_max > old_max) change_max = true; diff -Nru ceph-10.2.7/src/mds/Locker.h ceph-10.2.9/src/mds/Locker.h --- ceph-10.2.7/src/mds/Locker.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/Locker.h 2017-07-13 13:05:37.000000000 +0000 @@ -250,6 +250,8 @@ void file_update_finish(CInode *in, MutationRef& mut, bool share, client_t client, Capability *cap, MClientCaps *ack); +private: + uint64_t calc_new_max_size(inode_t *pi, uint64_t size); public: void calc_new_client_ranges(CInode *in, uint64_t size, map& new_ranges); bool check_inode_max_size(CInode *in, bool force_wrlock=false, diff -Nru ceph-10.2.7/src/mds/MDCache.cc ceph-10.2.9/src/mds/MDCache.cc --- ceph-10.2.7/src/mds/MDCache.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/MDCache.cc 2017-07-13 13:05:37.000000000 +0000 @@ -5600,6 +5600,8 @@ mds->send_message_client_counted(stale, q->first); } } + + mds->heartbeat_reset(); } for (map >::iterator p = cap_reconnect_waiters.begin(); diff -Nru ceph-10.2.7/src/mds/MDCache.h ceph-10.2.9/src/mds/MDCache.h --- ceph-10.2.7/src/mds/MDCache.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/MDCache.h 2017-07-13 13:05:37.000000000 +0000 @@ -147,6 +147,10 @@ stray_index = (stray_index+1)%NUM_STRAY; } + void activate_stray_manager() { + stray_manager.activate(); + } + /** * Call this when you know that a CDentry is ready to be passed * on to StrayManager (i.e. this is a stray you've just created) diff -Nru ceph-10.2.7/src/mds/MDSContext.h ceph-10.2.9/src/mds/MDSContext.h --- ceph-10.2.7/src/mds/MDSContext.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/MDSContext.h 2017-07-13 13:05:37.000000000 +0000 @@ -114,12 +114,12 @@ /** * No-op for callers expecting MDSInternalContextBase */ -class C_MDSInternalNoop : public MDSInternalContextBase +class C_MDSInternalNoop final : public MDSInternalContextBase { virtual MDSRank* get_mds() {assert(0);} public: void finish(int r) {} - void complete(int r) {} + void complete(int r) { delete this; } }; diff -Nru ceph-10.2.7/src/mds/MDSRank.cc ceph-10.2.9/src/mds/MDSRank.cc --- ceph-10.2.7/src/mds/MDSRank.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/MDSRank.cc 2017-07-13 13:05:37.000000000 +0000 @@ -228,6 +228,10 @@ // shut down cache mdcache->shutdown(); + mds_lock.Unlock(); + finisher->stop(); // no flushing + mds_lock.Lock(); + if (objecter->initialized.read()) objecter->shutdown(); @@ -241,7 +245,6 @@ // MDSDaemon::ms_handle_reset called from Messenger). mds_lock.Unlock(); - finisher->stop(); // no flushing messenger->shutdown(); mds_lock.Lock(); @@ -427,6 +430,8 @@ m->put(); return false; } + + heartbeat_reset(); } if (dispatch_depth > 1) @@ -975,11 +980,45 @@ break; case MDS_BOOT_REPLAY_DONE: assert(is_any_replay()); + + // Sessiontable and inotable should be in sync after replay, validate + // that they are consistent. + validate_sessions(); + replay_done(); break; } } +void MDSRank::validate_sessions() +{ + assert(mds_lock.is_locked_by_me()); + std::vector victims; + + // Identify any sessions which have state inconsistent with other, + // after they have been loaded from rados during startup. + // Mitigate bugs like: http://tracker.ceph.com/issues/16842 + const auto &sessions = sessionmap.get_sessions(); + for (const auto &i : sessions) { + Session *session = i.second; + interval_set badones; + if (inotable->intersects_free(session->info.prealloc_inos, &badones)) { + clog->error() << "Client session loaded with invalid preallocated " + "inodes, evicting session " << *session; + + // Make the session consistent with inotable so that it can + // be cleanly torn down + session->info.prealloc_inos.subtract(badones); + + victims.push_back(session); + } + } + + for (const auto &session: victims) { + server->kill_session(session, nullptr); + } +} + void MDSRank::starting_done() { dout(3) << "starting_done" << dendl; @@ -1268,6 +1307,7 @@ mdcache->start_files_to_recover(); mdcache->reissue_all_caps(); + mdcache->activate_stray_manager(); finish_contexts(g_ceph_context, waiting_for_active); // kick waiters } diff -Nru ceph-10.2.7/src/mds/MDSRank.h ceph-10.2.9/src/mds/MDSRank.h --- ceph-10.2.7/src/mds/MDSRank.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/MDSRank.h 2017-07-13 13:05:37.000000000 +0000 @@ -225,7 +225,6 @@ bool _dispatch(Message *m, bool new_msg); ceph::heartbeat_handle_d *hb; // Heartbeat for threads using mds_lock - void heartbeat_reset(); bool is_stale_message(Message *m); @@ -292,6 +291,12 @@ // <<< /** + * Call this periodically if inside a potentially long running piece + * of code while holding the mds_lock + */ + void heartbeat_reset(); + + /** * Report state DAMAGED to the mon, and then pass on to respawn(). Call * this when an unrecoverable error is encountered while attempting * to load an MDS rank's data structures. This is *not* for use with @@ -447,6 +452,8 @@ void active_start(); void stopping_start(); void stopping_done(); + + void validate_sessions(); // <<< // >>> diff -Nru ceph-10.2.7/src/mds/Server.cc ceph-10.2.9/src/mds/Server.cc --- ceph-10.2.7/src/mds/Server.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/Server.cc 2017-07-13 13:05:37.000000000 +0000 @@ -58,7 +58,6 @@ #include "osd/OSDMap.h" #include -#include #include #include @@ -769,6 +768,7 @@ // notify client of success with an OPEN m->get_connection()->send_message(new MClientSession(CEPH_SESSION_OPEN)); + session->last_cap_renew = ceph_clock_now(g_ceph_context); mds->clog->debug() << "reconnect by " << session->info.inst << " after " << delay << "\n"; // snaprealms @@ -2262,7 +2262,8 @@ CDir *straydir = mdcache->get_stray_dir(in); - if (!check_fragment_space(mdr, straydir)) + if (!mdr->client_request->is_replay() && + !check_fragment_space(mdr, straydir)) return NULL; straydn = mdcache->get_or_create_stray_dentry(in); @@ -2886,7 +2887,7 @@ int flags = req->head.args.open.flags; int cmode = ceph_flags_to_mode(flags); - bool need_auth = !file_mode_is_readonly(cmode) || (flags & O_TRUNC); + bool need_auth = !file_mode_is_readonly(cmode) || (flags & CEPH_O_TRUNC); dout(7) << "open on " << req->get_filepath() << dendl; @@ -2922,8 +2923,8 @@ // can only open non-regular inode with mode FILE_MODE_PIN, at least for now. cmode = CEPH_FILE_MODE_PIN; // the inode is symlink and client wants to follow it, ignore the O_TRUNC flag. - if (cur->inode.is_symlink() && !(flags & O_NOFOLLOW)) - flags &= ~O_TRUNC; + if (cur->inode.is_symlink() && !(flags & CEPH_O_NOFOLLOW)) + flags &= ~CEPH_O_TRUNC; } dout(10) << "open flags = " << flags @@ -2937,16 +2938,16 @@ respond_to_request(mdr, -ENXIO); // FIXME what error do we want? return; }*/ - if ((flags & O_DIRECTORY) && !cur->inode.is_dir() && !cur->inode.is_symlink()) { + if ((flags & CEPH_O_DIRECTORY) && !cur->inode.is_dir() && !cur->inode.is_symlink()) { dout(7) << "specified O_DIRECTORY on non-directory " << *cur << dendl; respond_to_request(mdr, -EINVAL); return; } - if ((flags & O_TRUNC) && !cur->inode.is_file()) { + if ((flags & CEPH_O_TRUNC) && !cur->inode.is_file()) { dout(7) << "specified O_TRUNC on !(file|symlink) " << *cur << dendl; // we should return -EISDIR for directory, return -EINVAL for other non-regular - respond_to_request(mdr, cur->inode.is_dir() ? EISDIR : -EINVAL); + respond_to_request(mdr, cur->inode.is_dir() ? -EISDIR : -EINVAL); return; } @@ -2981,7 +2982,7 @@ } // O_TRUNC - if ((flags & O_TRUNC) && !mdr->has_completed) { + if ((flags & CEPH_O_TRUNC) && !mdr->has_completed) { assert(cur->is_auth()); xlocks.insert(&cur->filelock); @@ -3121,7 +3122,7 @@ return; } - if (!(req->head.args.open.flags & O_EXCL)) { + if (!(req->head.args.open.flags & CEPH_O_EXCL)) { int r = mdcache->path_traverse(mdr, NULL, NULL, req->get_filepath(), &mdr->dn[0], NULL, MDS_TRAVERSE_FORWARD); if (r > 0) return; @@ -3144,7 +3145,7 @@ // r == -ENOENT } - bool excl = (req->head.args.open.flags & O_EXCL); + bool excl = (req->head.args.open.flags & CEPH_O_EXCL); set rdlocks, wrlocks, xlocks; file_layout_t *dir_layout = NULL; CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, rdlocks, wrlocks, xlocks, @@ -3219,7 +3220,7 @@ if (!dnl->is_null()) { // it existed. - assert(req->head.args.open.flags & O_EXCL); + assert(req->head.args.open.flags & CEPH_O_EXCL); dout(10) << "O_EXCL, target exists, failing with -EEXIST" << dendl; mdr->tracei = dnl->get_inode(); mdr->tracedn = dn; @@ -3378,7 +3379,8 @@ max = dir->get_num_any(); // whatever, something big. unsigned max_bytes = req->head.args.readdir.max_bytes; if (!max_bytes) - max_bytes = 512 << 10; // 512 KB? + // make sure at least one item can be encoded + max_bytes = (512 << 10) + g_conf->mds_max_xattr_pairs_size; // start final blob bufferlist dirbl; @@ -4457,6 +4459,25 @@ return; map *pxattrs = cur->get_projected_xattrs(); + size_t len = req->get_data().length(); + size_t inc = len + name.length(); + + // check xattrs kv pairs size + size_t cur_xattrs_size = 0; + for (const auto& p : *pxattrs) { + if ((flags & CEPH_XATTR_REPLACE) && (name.compare(p.first) == 0)) { + continue; + } + cur_xattrs_size += p.first.length() + p.second.length(); + } + + if (((cur_xattrs_size + inc) > g_conf->mds_max_xattr_pairs_size)) { + dout(10) << "xattr kv pairs size too big. cur_xattrs_size " + << cur_xattrs_size << ", inc " << inc << dendl; + respond_to_request(mdr, -ENOSPC); + return; + } + if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(name)) { dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl; respond_to_request(mdr, -EEXIST); @@ -4468,7 +4489,6 @@ return; } - int len = req->get_data().length(); dout(10) << "setxattr '" << name << "' len " << len << " on " << *cur << dendl; // project update @@ -4674,9 +4694,6 @@ newi->filelock.set_state(LOCK_EXCL); newi->authlock.set_state(LOCK_EXCL); newi->xattrlock.set_state(LOCK_EXCL); - cap->issue_norevoke(CEPH_CAP_AUTH_EXCL|CEPH_CAP_AUTH_SHARED| - CEPH_CAP_XATTR_EXCL|CEPH_CAP_XATTR_SHARED| - CEPH_CAP_ANY_FILE_WR); } } @@ -4772,8 +4789,6 @@ newi->filelock.set_state(LOCK_EXCL); newi->authlock.set_state(LOCK_EXCL); newi->xattrlock.set_state(LOCK_EXCL); - cap->issue_norevoke(CEPH_CAP_AUTH_EXCL|CEPH_CAP_AUTH_SHARED| - CEPH_CAP_XATTR_EXCL|CEPH_CAP_XATTR_SHARED); } // make sure this inode gets into the journal @@ -7947,7 +7962,8 @@ max_entries = infomap.size(); int max_bytes = req->head.args.readdir.max_bytes; if (!max_bytes) - max_bytes = 512 << 10; + // make sure at least one item can be encoded + max_bytes = (512 << 10) + g_conf->mds_max_xattr_pairs_size; __u64 last_snapid = 0; string offset_str = req->get_path2(); diff -Nru ceph-10.2.7/src/mds/SessionMap.cc ceph-10.2.9/src/mds/SessionMap.cc --- ceph-10.2.7/src/mds/SessionMap.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/SessionMap.cc 2017-07-13 13:05:37.000000000 +0000 @@ -999,3 +999,13 @@ return true; } +std::ostream& operator<<(std::ostream &out, const Session &s) +{ + if (s.get_human_name() == stringify(s.info.inst.name.num())) { + out << s.get_human_name(); + } else { + out << s.get_human_name() << " (" << std::dec << s.info.inst.name.num() << ")"; + } + return out; +} + diff -Nru ceph-10.2.7/src/mds/SessionMap.h ceph-10.2.9/src/mds/SessionMap.h --- ceph-10.2.7/src/mds/SessionMap.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/SessionMap.h 2017-07-13 13:05:37.000000000 +0000 @@ -636,5 +636,7 @@ MDSGatherBuilder *gather_bld); }; +std::ostream& operator<<(std::ostream &out, const Session &s); + #endif diff -Nru ceph-10.2.7/src/mds/StrayManager.cc ceph-10.2.9/src/mds/StrayManager.cc --- ceph-10.2.7/src/mds/StrayManager.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/StrayManager.cc 2017-07-13 13:05:37.000000000 +0000 @@ -290,11 +290,6 @@ dn->state_clear(CDentry::STATE_PURGING); dn->put(CDentry::PIN_PURGING); - // drop inode - if (in->is_dirty()) - in->mark_clean(); - in->mdcache->remove_inode(in); - // drop dentry? if (dn->is_new()) { dout(20) << " dn is new, removing" << dendl; @@ -303,6 +298,11 @@ } else { in->mdcache->touch_dentry_bottom(dn); // drop dn as quickly as possible. } + + // drop inode + if (in->is_dirty()) + in->mark_clean(); + in->mdcache->remove_inode(in); } void StrayManager::enqueue(CDentry *dn, bool trunc) @@ -384,6 +384,11 @@ { const int files_avail = g_conf->mds_max_purge_files - files_purging; + if (!started) { + dout(20) << __func__ << ": haven't started purging yet" << dendl; + return false; + } + if (files_avail <= 0) { dout(20) << __func__ << ": throttling on max files" << dendl; return false; @@ -651,6 +656,13 @@ } } +void StrayManager::activate() +{ + dout(10) << __func__ << dendl; + started = true; + _advance(); +} + bool StrayManager::eval_stray(CDentry *dn, bool delay) { // avoid nested eval_stray @@ -756,7 +768,7 @@ StrayManager::StrayManager(MDSRank *mds) : delayed_eval_stray(member_offset(CDentry, item_stray)), - mds(mds), logger(NULL), + mds(mds), logger(NULL), started(false), ops_in_flight(0), files_purging(0), max_purge_ops(0), num_strays(0), num_strays_purging(0), num_strays_delayed(0), diff -Nru ceph-10.2.7/src/mds/StrayManager.h ceph-10.2.9/src/mds/StrayManager.h --- ceph-10.2.7/src/mds/StrayManager.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mds/StrayManager.h 2017-07-13 13:05:37.000000000 +0000 @@ -45,6 +45,8 @@ MDSRank *mds; PerfCounters *logger; + bool started; + // Throttled allowances uint64_t ops_in_flight; uint64_t files_purging; @@ -152,6 +154,7 @@ public: explicit StrayManager(MDSRank *mds); void set_logger(PerfCounters *l) {logger = l;} + void activate(); bool eval_stray(CDentry *dn, bool delay=false); diff -Nru ceph-10.2.7/src/mon/AuthMonitor.cc ceph-10.2.9/src/mon/AuthMonitor.cc --- ceph-10.2.7/src/mon/AuthMonitor.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/AuthMonitor.cc 2017-07-13 13:05:37.000000000 +0000 @@ -652,11 +652,15 @@ mon->key_server.export_keyring(keyring); } -void AuthMonitor::import_keyring(KeyRing& keyring) +int AuthMonitor::import_keyring(KeyRing& keyring) { for (map::iterator p = keyring.get_keys().begin(); p != keyring.get_keys().end(); ++p) { + if (p->second.caps.empty()) { + dout(0) << "import: no caps supplied" << dendl; + return -EINVAL; + } KeyServerData::Incremental auth_inc; auth_inc.name = p->first; auth_inc.auth = p->second; @@ -665,6 +669,7 @@ dout(30) << " " << auth_inc.auth << dendl; push_cephx_inc(auth_inc); } + return 0; } bool AuthMonitor::prepare_command(MonOpRequestRef op) @@ -731,7 +736,13 @@ rs = err; goto done; } - import_keyring(keyring); + err = import_keyring(keyring); + if (err < 0) { + ss << "auth import: no caps supplied"; + getline(ss, rs); + mon->reply_command(op, -EINVAL, rs, get_last_committed()); + return true; + } ss << "imported keyring"; getline(ss, rs); err = 0; diff -Nru ceph-10.2.7/src/mon/AuthMonitor.h ceph-10.2.9/src/mon/AuthMonitor.h --- ceph-10.2.7/src/mon/AuthMonitor.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/AuthMonitor.h 2017-07-13 13:05:37.000000000 +0000 @@ -114,7 +114,7 @@ void upgrade_format(); void export_keyring(KeyRing& keyring); - void import_keyring(KeyRing& keyring); + int import_keyring(KeyRing& keyring); void push_cephx_inc(KeyServerData::Incremental& auth_inc) { Incremental inc; diff -Nru ceph-10.2.7/src/mon/MDSMonitor.cc ceph-10.2.9/src/mon/MDSMonitor.cc --- ceph-10.2.7/src/mon/MDSMonitor.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/MDSMonitor.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2198,7 +2198,6 @@ pending_fsmap.modify_daemon(gid, [state](MDSMap::mds_info_t *info) { info->state = state; }); - stringstream ss; ss << "set mds gid " << gid << " to state " << state << " " << ceph_mds_state_name(state); return 0; @@ -2229,7 +2228,6 @@ return -EBUSY; } else { pending_fsmap.erase(gid, {}); - stringstream ss; ss << "removed mds gid " << gid; return 0; } @@ -2259,7 +2257,6 @@ fs->mds_map.failed.erase(role.rank); }); - stringstream ss; ss << "removed failed mds." << role; return 0; } else if (prefix == "mds compat rm_compat") { diff -Nru ceph-10.2.7/src/mon/MonClient.cc ceph-10.2.9/src/mon/MonClient.cc --- ceph-10.2.7/src/mon/MonClient.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/MonClient.cc 2017-07-13 13:05:37.000000000 +0000 @@ -310,10 +310,10 @@ return true; } -void MonClient::send_log() +void MonClient::send_log(bool flush) { if (log_client) { - Message *lm = log_client->get_mon_log_message(); + Message *lm = log_client->get_mon_log_message(flush); if (lm) _send_mon_message(lm); more_log_pending = log_client->are_pending(); @@ -539,13 +539,9 @@ _send_mon_message(waiting_for_session.front()); waiting_for_session.pop_front(); } - _resend_mon_commands(); - if (log_client) { - log_client->reset_session(); - send_log(); - } + send_log(true); if (session_established_context) { cb = session_established_context; session_established_context = NULL; diff -Nru ceph-10.2.7/src/mon/MonClient.h ceph-10.2.9/src/mon/MonClient.h --- ceph-10.2.7/src/mon/MonClient.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/MonClient.h 2017-07-13 13:05:37.000000000 +0000 @@ -138,7 +138,7 @@ LogClient *log_client; bool more_log_pending; - void send_log(); + void send_log(bool flush = false); AuthMethodList *auth_supported; diff -Nru ceph-10.2.7/src/mon/OSDMonitor.cc ceph-10.2.9/src/mon/OSDMonitor.cc --- ceph-10.2.7/src/mon/OSDMonitor.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/mon/OSDMonitor.cc 2017-07-13 13:05:37.000000000 +0000 @@ -3044,9 +3044,8 @@ } // Not using 'sortbitwise' and should be? - if (g_conf->mon_warn_on_no_sortbitwise && - !osdmap.test_flag(CEPH_OSDMAP_SORTBITWISE) && - (osdmap.get_features(CEPH_ENTITY_TYPE_OSD, NULL) & + if (!osdmap.test_flag(CEPH_OSDMAP_SORTBITWISE) && + (osdmap.get_up_osd_features() & CEPH_FEATURE_OSD_BITWISE_HOBJ_SORT)) { ostringstream ss; ss << "no legacy OSD present but 'sortbitwise' flag is not set"; @@ -5743,7 +5742,11 @@ int id = newcrush.get_item_id(name); if (!newcrush.check_item_loc(g_ceph_context, id, loc, (int *)NULL)) { - err = newcrush.move_bucket(g_ceph_context, id, loc); + if (id >= 0) { + err = newcrush.create_or_move_item(g_ceph_context, id, 0, name, loc); + } else { + err = newcrush.move_bucket(g_ceph_context, id, loc); + } if (err >= 0) { ss << "moved item id " << id << " name '" << name << "' to location " << loc << " in crush map"; pending_inc.crush.clear(); diff -Nru ceph-10.2.7/src/msg/async/AsyncMessenger.cc ceph-10.2.9/src/msg/async/AsyncMessenger.cc --- ceph-10.2.7/src/msg/async/AsyncMessenger.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/msg/async/AsyncMessenger.cc 2017-07-13 13:05:37.000000000 +0000 @@ -87,7 +87,7 @@ listen_sd = -1; return r; } - + net.set_close_on_exec(listen_sd); net.set_socket_options(listen_sd); // use whatever user specified (if anything) @@ -239,6 +239,7 @@ int sd = ::accept(listen_sd, (sockaddr*)&addr.ss_addr(), &slen); if (sd >= 0) { errors = 0; + net.set_close_on_exec(sd); ldout(msgr->cct, 10) << __func__ << " accepted incoming on sd " << sd << dendl; msgr->add_accept(sd); diff -Nru ceph-10.2.7/src/msg/async/net_handler.cc ceph-10.2.9/src/msg/async/net_handler.cc --- ceph-10.2.7/src/msg/async/net_handler.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/msg/async/net_handler.cc 2017-07-13 13:05:37.000000000 +0000 @@ -71,6 +71,22 @@ return 0; } +void NetHandler::set_close_on_exec(int sd) +{ + int flags = fcntl(sd, F_GETFD, 0); + if (flags < 0) { + int r = errno; + lderr(cct) << __func__ << " fcntl(F_GETFD): " + << cpp_strerror(r) << dendl; + return; + } + if (fcntl(sd, F_SETFD, flags | FD_CLOEXEC)) { + int r = errno; + lderr(cct) << __func__ << " fcntl(F_SETFD): " + << cpp_strerror(r) << dendl; + } +} + void NetHandler::set_socket_options(int sd) { // disable Nagle algorithm? diff -Nru ceph-10.2.7/src/msg/async/net_handler.h ceph-10.2.9/src/msg/async/net_handler.h --- ceph-10.2.7/src/msg/async/net_handler.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/msg/async/net_handler.h 2017-07-13 13:05:37.000000000 +0000 @@ -28,6 +28,7 @@ public: explicit NetHandler(CephContext *c): cct(c) {} int set_nonblock(int sd); + void set_close_on_exec(int sd); void set_socket_options(int sd); int connect(const entity_addr_t &addr); diff -Nru ceph-10.2.7/src/msg/simple/Accepter.cc ceph-10.2.9/src/msg/simple/Accepter.cc --- ceph-10.2.7/src/msg/simple/Accepter.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/msg/simple/Accepter.cc 2017-07-13 13:05:37.000000000 +0000 @@ -37,6 +37,18 @@ * Accepter */ +static int set_close_on_exec(int fd) +{ + int flags = fcntl(fd, F_GETFD, 0); + if (flags < 0) { + return errno; + } + if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) { + return errno; + } + return 0; +} + int Accepter::bind(const entity_addr_t &bind_addr, const set& avoid_ports) { const md_config_t *conf = msgr->cct->_conf; @@ -63,6 +75,11 @@ return -errno; } + if (set_close_on_exec(listen_sd)) { + lderr(msgr->cct) << "accepter.bind unable to set_close_exec(): " + << cpp_strerror(errno) << dendl; + } + // use whatever user specified (if anything) entity_addr_t listen_addr = bind_addr; listen_addr.set_family(family); @@ -240,6 +257,11 @@ socklen_t slen = sizeof(addr.ss_addr()); int sd = ::accept(listen_sd, (sockaddr*)&addr.ss_addr(), &slen); if (sd >= 0) { + int r = set_close_on_exec(sd); + if (r) { + ldout(msgr->cct,0) << "accepter set_close_on_exec() failed " + << cpp_strerror(r) << dendl; + } errors = 0; ldout(msgr->cct,10) << "accepted incoming on sd " << sd << dendl; diff -Nru ceph-10.2.7/src/msg/simple/Pipe.cc ceph-10.2.9/src/msg/simple/Pipe.cc --- ceph-10.2.7/src/msg/simple/Pipe.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/msg/simple/Pipe.cc 2017-07-13 13:05:37.000000000 +0000 @@ -827,7 +827,8 @@ int r = ::setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char*)&flag, sizeof(flag)); if (r < 0) { r = -errno; - ldout(msgr->cct,0) << "couldn't set TCP_NODELAY: " << cpp_strerror(r) << dendl; + ldout(msgr->cct,0) << "couldn't set TCP_NODELAY: " + << cpp_strerror(r) << dendl; } } if (msgr->cct->_conf->ms_tcp_rcvbuf) { @@ -835,7 +836,8 @@ int r = ::setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (void*)&size, sizeof(size)); if (r < 0) { r = -errno; - ldout(msgr->cct,0) << "couldn't set SO_RCVBUF to " << size << ": " << cpp_strerror(r) << dendl; + ldout(msgr->cct,0) << "couldn't set SO_RCVBUF to " << size + << ": " << cpp_strerror(r) << dendl; } } @@ -845,7 +847,8 @@ int r = ::setsockopt(sd, SOL_SOCKET, SO_NOSIGPIPE, (void*)&val, sizeof(val)); if (r) { r = -errno; - ldout(msgr->cct,0) << "couldn't set SO_NOSIGPIPE: " << cpp_strerror(r) << dendl; + ldout(msgr->cct,0) << "couldn't set SO_NOSIGPIPE: " + << cpp_strerror(r) << dendl; } #endif @@ -854,10 +857,24 @@ int r = -1; #ifdef IPTOS_CLASS_CS6 int iptos = IPTOS_CLASS_CS6; - r = ::setsockopt(sd, IPPROTO_IP, IP_TOS, &iptos, sizeof(iptos)); - if (r < 0) { - ldout(msgr->cct,0) << "couldn't set IP_TOS to " << iptos - << ": " << cpp_strerror(errno) << dendl; + + if (peer_addr.get_family() == AF_INET) { + r = ::setsockopt(sd, IPPROTO_IP, IP_TOS, &iptos, sizeof(iptos)); + if (r < 0) { + r = -errno; + ldout(msgr->cct,0) << "couldn't set IP_TOS to " << iptos + << ": " << cpp_strerror(r) << dendl; + } + } else if (peer_addr.get_family() == AF_INET6) { + r = ::setsockopt(sd, IPPROTO_IPV6, IPV6_TCLASS, &iptos, sizeof(iptos)); + if (r < 0) { + r = -errno; + ldout(msgr->cct,0) << "couldn't set IPV6_TCLASS to " << iptos + << ": " << cpp_strerror(r) << dendl; + } + } else { + ldout(msgr->cct,0) << "couldn't set ToS of unknown family to " << iptos + << dendl; } #endif #if defined(SO_PRIORITY) @@ -868,8 +885,9 @@ r = ::setsockopt(sd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)); #endif if (r < 0) { + r = -errno; ldout(msgr->cct,0) << "couldn't set SO_PRIORITY to " << prio - << ": " << cpp_strerror(errno) << dendl; + << ": " << cpp_strerror(r) << dendl; } #endif } diff -Nru ceph-10.2.7/src/os/bluestore/bluefs_tool.cc ceph-10.2.9/src/os/bluestore/bluefs_tool.cc --- ceph-10.2.7/src/os/bluestore/bluefs_tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/bluestore/bluefs_tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -25,7 +25,8 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val( "enable_experimental_unrecoverable_data_corrupting_features", diff -Nru ceph-10.2.7/src/os/filestore/DBObjectMap.cc ceph-10.2.9/src/os/filestore/DBObjectMap.cc --- ceph-10.2.7/src/os/filestore/DBObjectMap.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/filestore/DBObjectMap.cc 2017-07-13 13:05:37.000000000 +0000 @@ -54,22 +54,52 @@ } } -bool DBObjectMap::check(std::ostream &out) +int DBObjectMap::check(std::ostream &out, bool repair) { - bool retval = true; + int errors = 0; + bool repaired = false; map parent_to_num_children; map parent_to_actual_num_children; KeyValueDB::Iterator iter = db->get_iterator(HOBJECT_TO_SEQ); for (iter->seek_to_first(); iter->valid(); iter->next()) { _Header header; - assert(header.num_children == 1); - header.num_children = 0; // Hack for leaf node bufferlist bl = iter->value(); while (true) { bufferlist::iterator bliter = bl.begin(); header.decode(bliter); if (header.seq != 0) parent_to_actual_num_children[header.seq] = header.num_children; + + // Check complete table + bool complete_error = false; + boost::optional prev; + KeyValueDB::Iterator complete_iter = db->get_iterator(USER_PREFIX + header_key(header.seq) + COMPLETE_PREFIX); + for (complete_iter->seek_to_first(); complete_iter->valid(); + complete_iter->next()) { + if (prev && prev >= complete_iter->key()) { + out << "Bad complete for " << header.oid << std::endl; + complete_error = true; + break; + } + prev = string(complete_iter->value().c_str(), complete_iter->value().length() - 1); + } + if (complete_error) { + out << "Complete mapping for " << header.seq << " :" << std::endl; + for (complete_iter->seek_to_first(); complete_iter->valid(); + complete_iter->next()) { + out << complete_iter->key() << " -> " << string(complete_iter->value().c_str(), complete_iter->value().length() - 1) << std::endl; + } + if (repair) { + repaired = true; + KeyValueDB::Transaction t = db->get_transaction(); + t->rmkeys_by_prefix(USER_PREFIX + header_key(header.seq) + COMPLETE_PREFIX); + db->submit_transaction(t); + out << "Cleared complete mapping to repair" << std::endl; + } else { + errors++; // Only count when not repaired + } + } + if (header.parent == 0) break; @@ -85,7 +115,7 @@ db->get(sys_parent_prefix(header), to_get, &got); if (got.empty()) { out << "Missing: seq " << header.parent << std::endl; - retval = false; + errors++; break; } else { bl = got.begin()->second; @@ -102,11 +132,13 @@ out << "Invalid: seq " << i->first << " recorded children: " << parent_to_actual_num_children[i->first] << " found: " << i->second << std::endl; - retval = false; + errors++; } parent_to_actual_num_children.erase(i->first); } - return retval; + if (errors == 0 && repaired) + return -1; + return errors; } string DBObjectMap::ghobject_key(const ghobject_t &oid) @@ -314,6 +346,17 @@ return adjust(); } +int DBObjectMap::DBObjectMapIteratorImpl::lower_bound_parent(const string &to) +{ + int r = lower_bound(to); + if (r < 0) + return r; + if (valid() && !on_parent()) + return next_parent(); + else + return r; +} + int DBObjectMap::DBObjectMapIteratorImpl::upper_bound(const string &after) { init(); @@ -354,39 +397,57 @@ int DBObjectMap::DBObjectMapIteratorImpl::next_parent() { - if (!parent_iter || !parent_iter->valid()) { - invalid = true; - return 0; - } r = next(); if (r < 0) return r; - if (!valid() || on_parent() || !parent_iter->valid()) - return 0; + while (parent_iter && parent_iter->valid() && !on_parent()) { + assert(valid()); + r = lower_bound(parent_iter->key()); + if (r < 0) + return r; + } - return lower_bound(parent_iter->key()); + if (!parent_iter || !parent_iter->valid()) { + invalid = true; + } + return 0; } int DBObjectMap::DBObjectMapIteratorImpl::in_complete_region(const string &to_test, string *begin, string *end) { + /* This is clumsy because one cannot call prev() on end(), nor can one + * test for == begin(). + */ complete_iter->upper_bound(to_test); - if (complete_iter->valid()) + if (complete_iter->valid()) { complete_iter->prev(); - else + if (!complete_iter->valid()) { + complete_iter->upper_bound(to_test); + return false; + } + } else { complete_iter->seek_to_last(); + if (!complete_iter->valid()) + return false; + } - if (!complete_iter->valid()) + assert(complete_iter->key() <= to_test); + assert(complete_iter->value().length() >= 1); + string _end(complete_iter->value().c_str(), + complete_iter->value().length() - 1); + if (_end.empty() || _end > to_test) { + if (begin) + *begin = complete_iter->key(); + if (end) + *end = _end; + return true; + } else { + complete_iter->next(); + assert(!complete_iter->valid() || complete_iter->key() > to_test); return false; - - string _end; - if (begin) - *begin = complete_iter->key(); - _end = string(complete_iter->value().c_str()); - if (end) - *end = _end; - return (to_test >= complete_iter->key()) && (!_end.size() || _end > to_test); + } } /** @@ -554,58 +615,6 @@ return 0; } -int DBObjectMap::merge_new_complete(Header header, - const map &new_complete, - DBObjectMapIterator iter, - KeyValueDB::Transaction t) -{ - KeyValueDB::Iterator complete_iter = db->get_iterator( - complete_prefix(header) - ); - map::const_iterator i = new_complete.begin(); - set to_remove; - map to_add; - - string begin, end; - while (i != new_complete.end()) { - string new_begin = i->first; - string new_end = i->second; - int r = iter->in_complete_region(new_begin, &begin, &end); - if (r < 0) - return r; - if (r) { - to_remove.insert(begin); - new_begin = begin; - } - ++i; - while (i != new_complete.end()) { - if (!new_end.size() || i->first <= new_end) { - if (!new_end.size() && i->second > new_end) { - new_end = i->second; - } - ++i; - continue; - } - - r = iter->in_complete_region(new_end, &begin, &end); - if (r < 0) - return r; - if (r) { - to_remove.insert(begin); - new_end = end; - continue; - } - break; - } - bufferlist bl; - bl.append(bufferptr(new_end.c_str(), new_end.size() + 1)); - to_add.insert(make_pair(new_begin, bl)); - } - t->rmkeys(complete_prefix(header), to_remove); - t->set(complete_prefix(header), to_add); - return 0; -} - int DBObjectMap::copy_up_header(Header header, KeyValueDB::Transaction t) { @@ -618,22 +627,6 @@ return 0; } -int DBObjectMap::need_parent(DBObjectMapIterator iter) -{ - int r = iter->seek_to_first(); - if (r < 0) - return r; - - if (!iter->valid()) - return 0; - - string begin, end; - if (iter->in_complete_region(iter->key(), &begin, &end) && end == "") { - return 0; - } - return 1; -} - int DBObjectMap::rm_keys(const ghobject_t &oid, const set &to_clear, const SequencerPosition *spos) @@ -650,62 +643,33 @@ return db->submit_transaction(t); } - // Copy up keys from parent around to_clear - int keep_parent; + assert(state.v < 3); + { - DBObjectMapIterator iter = _get_iterator(header); - iter->seek_to_first(); - map new_complete; + // We only get here for legacy (v2) stores + // Copy up all keys from parent excluding to_clear + // and remove parent + // This eliminates a v2 format use of complete for this oid only map to_write; - for(set::const_iterator i = to_clear.begin(); - i != to_clear.end(); - ) { - unsigned copied = 0; - iter->lower_bound(*i); - ++i; - if (!iter->valid()) - break; - string begin = iter->key(); - if (!iter->on_parent()) - iter->next_parent(); - if (new_complete.size() && new_complete.rbegin()->second == begin) { - begin = new_complete.rbegin()->first; - } - while (iter->valid() && copied < 20) { - if (!to_clear.count(iter->key())) - to_write[iter->key()].append(iter->value()); - if (i != to_clear.end() && *i <= iter->key()) { - ++i; - copied = 0; - } - - iter->next_parent(); - copied++; - } - if (iter->valid()) { - new_complete[begin] = iter->key(); - } else { - new_complete[begin] = ""; - break; - } + ObjectMapIterator iter = _get_iterator(header); + for (iter->seek_to_first() ; iter->valid() ; iter->next()) { + if (iter->status()) + return iter->status(); + if (!to_clear.count(iter->key())) + to_write[iter->key()] = iter->value(); } t->set(user_prefix(header), to_write); - merge_new_complete(header, new_complete, iter, t); - keep_parent = need_parent(iter); - if (keep_parent < 0) - return keep_parent; - } - if (!keep_parent) { - copy_up_header(header, t); - Header parent = lookup_parent(header); - if (!parent) - return -EINVAL; - parent->num_children--; - _clear(parent, t); - header->parent = 0; - set_map_header(hl, oid, *header, t); - t->rmkeys_by_prefix(complete_prefix(header)); - } + } // destruct iter which has parent in_use + + copy_up_header(header, t); + Header parent = lookup_parent(header); + if (!parent) + return -EINVAL; + parent->num_children--; + _clear(parent, t); + header->parent = 0; + set_map_header(hl, oid, *header, t); + t->rmkeys_by_prefix(complete_prefix(header)); return db->submit_transaction(t); } @@ -880,10 +844,14 @@ return db->submit_transaction(t); } -int DBObjectMap::clone(const ghobject_t &oid, +// ONLY USED FOR TESTING +// Set version to 2 to avoid asserts +int DBObjectMap::legacy_clone(const ghobject_t &oid, const ghobject_t &target, const SequencerPosition *spos) { + state.v = 2; + if (oid == target) return 0; @@ -936,6 +904,72 @@ return db->submit_transaction(t); } +int DBObjectMap::clone(const ghobject_t &oid, + const ghobject_t &target, + const SequencerPosition *spos) +{ + if (oid == target) + return 0; + + MapHeaderLock _l1(this, MIN_GHOBJ(oid, target, true)); + MapHeaderLock _l2(this, MAX_GHOBJ(oid, target, true)); + MapHeaderLock *lsource, *ltarget; + if (cmp_bitwise(oid, target) > 0) { + lsource = &_l2; + ltarget= &_l1; + } else { + lsource = &_l1; + ltarget= &_l2; + } + + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(*ltarget, target); + if (destination) { + if (check_spos(target, destination, spos)) + return 0; + destination->num_children--; + remove_map_header(*ltarget, target, destination, t); + _clear(destination, t); + } + } + + Header source = lookup_map_header(*lsource, oid); + if (!source) + return db->submit_transaction(t); + + Header destination = generate_new_header(target, Header()); + if (spos) + destination->spos = *spos; + + set_map_header(*ltarget, target, *destination, t); + + bufferlist bl; + int r = _get_header(source, &bl); + if (r < 0) + return r; + _set_header(destination, bl, t); + + map to_set; + KeyValueDB::Iterator xattr_iter = db->get_iterator(xattr_prefix(source)); + for (xattr_iter->seek_to_first(); + xattr_iter->valid(); + xattr_iter->next()) + to_set.insert(make_pair(xattr_iter->key(), xattr_iter->value())); + t->set(xattr_prefix(destination), to_set); + + map to_write; + ObjectMapIterator iter = _get_iterator(source); + for (iter->seek_to_first() ; iter->valid() ; iter->next()) { + if (iter->status()) + return iter->status(); + to_write[iter->key()] = iter->value(); + } + t->set(user_prefix(destination), to_write); + + return db->submit_transaction(t); +} + int DBObjectMap::upgrade_to_v2() { dout(1) << __func__ << " start" << dendl; @@ -1023,9 +1057,17 @@ } } else { // New store - state.v = 2; + // Version 3 means that complete regions never used + state.v = 3; state.seq = 1; } + ostringstream ss; + int errors = check(ss, true); + if (errors) { + derr << ss.str() << dendl; + if (errors > 0) + return -EINVAL; + } dout(20) << "(init)dbobjectmap: seq is " << state.seq << dendl; return 0; } @@ -1150,9 +1192,9 @@ } Header header = Header(new _Header(), RemoveOnDelete(this)); - header->seq = input->parent; bufferlist::iterator iter = out.begin()->second.begin(); header->decode(iter); + assert(header->seq == input->parent); dout(20) << "lookup_parent: parent seq is " << header->seq << " with parent " << header->parent << dendl; in_use.insert(header->seq); @@ -1178,7 +1220,8 @@ dout(20) << "clear_header: clearing seq " << header->seq << dendl; t->rmkeys_by_prefix(user_prefix(header)); t->rmkeys_by_prefix(sys_prefix(header)); - t->rmkeys_by_prefix(complete_prefix(header)); + if (state.v < 3) + t->rmkeys_by_prefix(complete_prefix(header)); // Needed when header.parent != 0 t->rmkeys_by_prefix(xattr_prefix(header)); set keys; keys.insert(header_key(header->seq)); @@ -1262,3 +1305,82 @@ } return 0; } + +int DBObjectMap::list_object_headers(vector<_Header> *out) +{ + int error = 0; + KeyValueDB::Iterator iter = db->get_iterator(HOBJECT_TO_SEQ); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + bufferlist bl = iter->value(); + bufferlist::iterator bliter = bl.begin(); + _Header header; + header.decode(bliter); + out->push_back(header); + while (header.parent) { + set to_get; + map got; + to_get.insert(HEADER_KEY); + db->get(sys_parent_prefix(header), to_get, &got); + if (got.empty()) { + dout(0) << "Missing: seq " << header.parent << dendl; + error = -ENOENT; + break; + } else { + bl = got.begin()->second; + bufferlist::iterator bliter = bl.begin(); + header.decode(bliter); + out->push_back(header); + } + } + } + return error; +} + +ostream& operator<<(ostream& out, const DBObjectMap::_Header& h) +{ + out << "seq=" << h.seq << " parent=" << h.parent + << " num_children=" << h.num_children + << " ghobject=" << h.oid; + return out; +} + +int DBObjectMap::rename(const ghobject_t &from, + const ghobject_t &to, + const SequencerPosition *spos) +{ + if (from == to) + return 0; + + MapHeaderLock _l1(this, MIN_GHOBJ(from, to, true)); + MapHeaderLock _l2(this, MAX_GHOBJ(from, to, true)); + MapHeaderLock *lsource, *ltarget; + if (cmp_bitwise(from, to) > 0) { + lsource = &_l2; + ltarget= &_l1; + } else { + lsource = &_l1; + ltarget= &_l2; + } + + KeyValueDB::Transaction t = db->get_transaction(); + { + Header destination = lookup_map_header(*ltarget, to); + if (destination) { + if (check_spos(to, destination, spos)) + return 0; + destination->num_children--; + remove_map_header(*ltarget, to, destination, t); + _clear(destination, t); + } + } + + Header hdr = lookup_map_header(*lsource, from); + if (!hdr) + return db->submit_transaction(t); + + remove_map_header(*lsource, from, hdr, t); + hdr->oid = to; + set_map_header(*ltarget, to, *hdr, t); + + return db->submit_transaction(t); +} diff -Nru ceph-10.2.7/src/os/filestore/DBObjectMap.h ceph-10.2.9/src/os/filestore/DBObjectMap.h --- ceph-10.2.7/src/os/filestore/DBObjectMap.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/filestore/DBObjectMap.h 2017-07-13 13:05:37.000000000 +0000 @@ -30,7 +30,7 @@ * @see user_prefix * @see sys_prefix * - * - GHOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->hobj.seq and + * - HOBJECT_TO_SEQ: Contains leaf mapping from ghobject_t->header.seq and * corresponding omap header * - SYS_PREFIX: GLOBAL_STATE_KEY - contains next seq number * @see State @@ -205,6 +205,18 @@ const SequencerPosition *spos=0 ); + int rename( + const ghobject_t &from, + const ghobject_t &to, + const SequencerPosition *spos=0 + ); + + int legacy_clone( + const ghobject_t &oid, + const ghobject_t &target, + const SequencerPosition *spos=0 + ); + /// Read initial state from backing store int init(bool upgrade = false); @@ -212,15 +224,20 @@ int upgrade_to_v2(); /// Consistency check, debug, there must be no parallel writes - bool check(std::ostream &out); + int check(std::ostream &out, bool repair = false); /// Ensure that all previous operations are durable int sync(const ghobject_t *oid=0, const SequencerPosition *spos=0); - /// Util, list all objects, there must be no other concurrent access + /// Util, get all objects, there must be no other concurrent access int list_objects(vector *objs ///< [out] objects ); + struct _Header; + // Util, get all object headers, there must be no other concurrent access + int list_object_headers(vector<_Header> *out ///< [out] headers + ); + ObjectMapIterator get_iterator(const ghobject_t &oid); static const string USER_PREFIX; @@ -275,28 +292,29 @@ uint64_t parent; uint64_t num_children; - coll_t c; ghobject_t oid; SequencerPosition spos; void encode(bufferlist &bl) const { + coll_t unused; ENCODE_START(2, 1, bl); ::encode(seq, bl); ::encode(parent, bl); ::encode(num_children, bl); - ::encode(c, bl); + ::encode(unused, bl); ::encode(oid, bl); ::encode(spos, bl); ENCODE_FINISH(bl); } void decode(bufferlist::iterator &bl) { + coll_t unused; DECODE_START(2, bl); ::decode(seq, bl); ::decode(parent, bl); ::decode(num_children, bl); - ::decode(c, bl); + ::decode(unused, bl); ::decode(oid, bl); if (struct_v >= 2) ::decode(spos, bl); @@ -307,7 +325,6 @@ f->dump_unsigned("seq", seq); f->dump_unsigned("parent", parent); f->dump_unsigned("num_children", num_children); - f->dump_stream("coll") << c; f->dump_stream("oid") << oid; } @@ -398,8 +415,15 @@ /// skips to next valid parent entry int next_parent(); - - /// Tests whether to_test is in complete region + + /// first parent() >= to + int lower_bound_parent(const string &to); + + /** + * Tests whether to_test is in complete region + * + * postcondition: complete_iter will be max s.t. complete_iter->value > to_test + */ int in_complete_region(const string &to_test, ///< [in] key to test string *begin, ///< [out] beginning of region string *end ///< [out] end of region @@ -486,19 +510,19 @@ /// Remove header and all related prefixes int _clear(Header header, KeyValueDB::Transaction t); - /// Adds to t operations necessary to add new_complete to the complete set - int merge_new_complete(Header header, - const map &new_complete, - DBObjectMapIterator iter, - KeyValueDB::Transaction t); + + /* Scan complete region bumping *begin to the beginning of any + * containing region and adding all complete region keys between + * the updated begin and end to the complete_keys_to_remove set */ + int merge_new_complete(DBObjectMapIterator &iter, + string *begin, + const string &end, + set *complete_keys_to_remove); /// Writes out State (mainly next_seq) int write_state(KeyValueDB::Transaction _t = KeyValueDB::Transaction()); - /// 0 if the complete set now contains all of key space, < 0 on error, 1 else - int need_parent(DBObjectMapIterator iter); - /// Copies header entry from parent @see rm_keys int copy_up_header(Header header, KeyValueDB::Transaction t); @@ -531,4 +555,6 @@ WRITE_CLASS_ENCODER(DBObjectMap::_Header) WRITE_CLASS_ENCODER(DBObjectMap::State) +ostream& operator<<(ostream& out, const DBObjectMap::_Header& h); + #endif diff -Nru ceph-10.2.7/src/os/filestore/FileStore.cc ceph-10.2.9/src/os/filestore/FileStore.cc --- ceph-10.2.7/src/os/filestore/FileStore.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/filestore/FileStore.cc 2017-07-13 13:05:37.000000000 +0000 @@ -3103,13 +3103,16 @@ last = extent++; } const bool is_last = last->fe_flags & FIEMAP_EXTENT_LAST; - free(fiemap); if (!is_last) { uint64_t xoffset = last->fe_logical + last->fe_length - offset; offset = last->fe_logical + last->fe_length; len -= xoffset; + free(fiemap); /* fix clang warn: use-after-free */ goto more; } + else { + free(fiemap); + } return r; } @@ -5254,7 +5257,7 @@ if (r == 0) { // the name changed; link the omap content - r = object_map->clone(oldoid, o, &spos); + r = object_map->rename(oldoid, o, &spos); if (r == -ENOENT) r = 0; } @@ -5520,7 +5523,10 @@ dout(15) << "set_alloc_hint " << cid << "/" << oid << " object_size " << expected_object_size << " write_size " << expected_write_size << dendl; FDRef fd; - int ret; + int ret = 0; + + if (expected_object_size == 0 || expected_write_size == 0) + goto out; ret = lfn_open(cid, oid, false, &fd); if (ret < 0) diff -Nru ceph-10.2.7/src/os/filestore/HashIndex.cc ceph-10.2.9/src/os/filestore/HashIndex.cc --- ceph-10.2.7/src/os/filestore/HashIndex.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/filestore/HashIndex.cc 2017-07-13 13:05:37.000000000 +0000 @@ -315,6 +315,8 @@ } if (must_split(info)) { + dout(1) << __func__ << " " << path << " has " << info.objs + << " objects, starting split." << dendl; r = initiate_split(path, info); if (r < 0) { dout(10) << "error initiating split on " << path << ": " @@ -323,6 +325,8 @@ } r = complete_split(path, info); + dout(1) << __func__ << " " << path << " split completed." + << dendl; if (r < 0) { dout(10) << "error completing split on " << path << ": " << cpp_strerror(r) << dendl; @@ -378,10 +382,15 @@ return r; if (must_split(info)) { + dout(1) << __func__ << " " << path << " has " << info.objs + << " objects, starting split." << dendl; int r = initiate_split(path, info); if (r < 0) return r; - return complete_split(path, info); + r = complete_split(path, info); + dout(1) << __func__ << " " << path << " split completed." + << dendl; + return r; } else { return 0; } diff -Nru ceph-10.2.7/src/os/ObjectMap.h ceph-10.2.9/src/os/ObjectMap.h --- ceph-10.2.7/src/os/ObjectMap.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/ObjectMap.h 2017-07-13 13:05:37.000000000 +0000 @@ -124,20 +124,34 @@ ) = 0; - /// Clone keys efficiently from oid map to target map + /// Clone keys from oid map to target map virtual int clone( const ghobject_t &oid, ///< [in] object containing map const ghobject_t &target, ///< [in] target of clone const SequencerPosition *spos=0 ///< [in] sequencer position ) { return 0; } + /// Rename map because of name change + virtual int rename( + const ghobject_t &from, ///< [in] object containing map + const ghobject_t &to, ///< [in] new name + const SequencerPosition *spos=0 ///< [in] sequencer position + ) { return 0; } + + /// For testing clone keys from oid map to target map using faster but more complex method + virtual int legacy_clone( + const ghobject_t &oid, ///< [in] object containing map + const ghobject_t &target, ///< [in] target of clone + const SequencerPosition *spos=0 ///< [in] sequencer position + ) { return 0; } + /// Ensure all previous writes are durable virtual int sync( const ghobject_t *oid=0, ///< [in] object const SequencerPosition *spos=0 ///< [in] Sequencer ) { return 0; } - virtual bool check(std::ostream &out) { return true; } + virtual int check(std::ostream &out, bool repair = false) { return 0; } typedef KeyValueDB::GenericIteratorImpl ObjectMapIteratorImpl; typedef ceph::shared_ptr ObjectMapIterator; diff -Nru ceph-10.2.7/src/os/ObjectStore.h ceph-10.2.9/src/os/ObjectStore.h --- ceph-10.2.7/src/os/ObjectStore.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/os/ObjectStore.h 2017-07-13 13:05:37.000000000 +0000 @@ -1718,6 +1718,8 @@ data.ops++; } + /// Set allocation hint for an object + /// make 0 values(expected_object_size, expected_write_size) noops for all implementations void set_alloc_hint( coll_t cid, const ghobject_t &oid, diff -Nru ceph-10.2.7/src/osd/OSD.cc ceph-10.2.9/src/osd/OSD.cc --- ceph-10.2.7/src/osd/OSD.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/OSD.cc 2017-07-13 13:05:37.000000000 +0000 @@ -254,6 +254,14 @@ remote_reserver(&reserver_finisher, cct->_conf->osd_max_backfills, cct->_conf->osd_min_recovery_priority), pg_temp_lock("OSDService::pg_temp_lock"), + snap_sleep_lock("OSDService::snap_sleep_lock"), + snap_sleep_timer( + osd->client_messenger->cct, snap_sleep_lock, false /* relax locking */), + scrub_sleep_lock("OSDService::scrub_sleep_lock"), + scrub_sleep_timer( + osd->client_messenger->cct, scrub_sleep_lock, false /* relax locking */), + snap_reserver(&reserver_finisher, + cct->_conf->osd_max_trimming_pgs), map_cache_lock("OSDService::map_lock"), map_cache(cct, cct->_conf->osd_map_cache_size), map_bl_cache(cct->_conf->osd_map_cache_size), @@ -482,6 +490,17 @@ Mutex::Locker l(backfill_request_lock); backfill_request_timer.shutdown(); } + + { + Mutex::Locker l(snap_sleep_lock); + snap_sleep_timer.shutdown(); + } + + { + Mutex::Locker l(scrub_sleep_lock); + scrub_sleep_timer.shutdown(); + } + osdmap = OSDMapRef(); next_osdmap = OSDMapRef(); } @@ -493,6 +512,8 @@ objecter->set_client_incarnation(0); watch_timer.init(); agent_timer.init(); + snap_sleep_timer.init(); + scrub_sleep_timer.init(); agent_thread.create("osd_srv_agent"); } @@ -3104,6 +3125,11 @@ return pg; } +PG *OSD::lookup_lock_pg(spg_t pgid) +{ + return _lookup_lock_pg(pgid); +} + PG *OSD::_lookup_pg(spg_t pgid) { @@ -6658,7 +6684,8 @@ session->put(); // share with the objecter - service.objecter->handle_osd_map(m); + if (!is_preboot()) + service.objecter->handle_osd_map(m); epoch_t first = m->get_first(); epoch_t last = m->get_last(); @@ -8990,6 +9017,8 @@ "clog_to_graylog_port", "host", "fsid", + "osd_client_message_size_cap", + "osd_client_message_cap", NULL }; return KEYS; @@ -9006,6 +9035,9 @@ service.local_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); service.remote_reserver.set_min_priority(cct->_conf->osd_min_recovery_priority); } + if (changed.count("osd_max_trimming_pgs")) { + service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs); + } if (changed.count("osd_op_complaint_time") || changed.count("osd_op_log_threshold")) { op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time, @@ -9046,6 +9078,22 @@ } } #endif + + if (changed.count("osd_client_message_cap")) { + uint64_t newval = cct->_conf->osd_client_message_cap; + Messenger::Policy pol = client_messenger->get_policy(entity_name_t::TYPE_CLIENT); + if (pol.throttler_messages && newval > 0) { + pol.throttler_messages->reset_max(newval); + } + } + if (changed.count("osd_client_message_size_cap")) { + uint64_t newval = cct->_conf->osd_client_message_size_cap; + Messenger::Policy pol = client_messenger->get_policy(entity_name_t::TYPE_CLIENT); + if (pol.throttler_bytes && newval > 0) { + pol.throttler_bytes->reset_max(newval); + } + } + check_config(); } diff -Nru ceph-10.2.7/src/osd/OSD.h ceph-10.2.9/src/osd/OSD.h --- ceph-10.2.7/src/osd/OSD.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/OSD.h 2017-07-13 13:05:37.000000000 +0000 @@ -861,6 +861,15 @@ ceph_clock_now(cct), entity_inst_t()))); } + + Mutex snap_sleep_lock; + SafeTimer snap_sleep_timer; + + Mutex scrub_sleep_lock; + SafeTimer scrub_sleep_timer; + + AsyncReserver snap_reserver; + void queue_for_scrub(PG *pg) { op_wq.queue( make_pair( @@ -868,7 +877,7 @@ PGQueueable( PGScrub(pg->get_osdmap()->get_epoch()), cct->_conf->osd_scrub_cost, - pg->get_scrub_priority(), + pg->scrubber.priority, ceph_clock_now(cct), entity_inst_t()))); } @@ -1949,6 +1958,10 @@ bool _have_pg(spg_t pgid); PG *_lookup_lock_pg_with_map_lock_held(spg_t pgid); PG *_lookup_lock_pg(spg_t pgid); +public: + PG *lookup_lock_pg(spg_t pgid); + +protected: PG *_lookup_pg(spg_t pgid); PG *_open_lock_pg(OSDMapRef createmap, spg_t pg, bool no_lockdep_check=false); diff -Nru ceph-10.2.7/src/osd/OSDMap.cc ceph-10.2.9/src/osd/OSDMap.cc --- ceph-10.2.7/src/osd/OSDMap.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/OSDMap.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1356,7 +1356,6 @@ (*osd_uuid)[i->first] = uuid_d(); osd_info[i->first] = osd_info_t(); osd_xinfo[i->first] = osd_xinfo_t(); - osd_weight[i->first] = CEPH_OSD_IN; set_primary_affinity(i->first, CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); osd_addrs->client_addr[i->first].reset(new entity_addr_t()); osd_addrs->cluster_addr[i->first].reset(new entity_addr_t()); diff -Nru ceph-10.2.7/src/osd/osd_types.cc ceph-10.2.9/src/osd/osd_types.cc --- ceph-10.2.7/src/osd/osd_types.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/osd_types.cc 2017-07-13 13:05:37.000000000 +0000 @@ -835,6 +835,10 @@ oss << "incomplete+"; if (state & PG_STATE_PEERED) oss << "peered+"; + if (state & PG_STATE_SNAPTRIM) + oss << "snaptrim+"; + if (state & PG_STATE_SNAPTRIM_WAIT) + oss << "snaptrim_wait+"; string ret(oss.str()); if (ret.length() > 0) ret.resize(ret.length() - 1); @@ -892,6 +896,10 @@ type = PG_STATE_ACTIVATING; else if (state == "peered") type = PG_STATE_PEERED; + else if (state == "snaptrim") + type = PG_STATE_SNAPTRIM; + else if (state == "snaptrim_wait") + type = PG_STATE_SNAPTRIM_WAIT; else type = -1; return type; @@ -1498,6 +1506,12 @@ // this was the first post-hammer thing we added; if it's missing, encode // like hammer. v = 21; + if (!(features & CEPH_FEATURE_OSD_HITSET_GMT)) { + // CEPH_FEATURE_OSD_HITSET_GMT requires pg_pool_t v21 which has + // use_gmt_hitset, and two fields added before v21. + // See http://tracker.ceph.com/issues/19508 + v = 17; + } } ENCODE_START(v, 5, bl); @@ -4724,7 +4738,7 @@ ++i) { old_watchers.insert(make_pair(i->first.second, i->second)); } - ENCODE_START(15, 8, bl); + ENCODE_START(16, 8, bl); ::encode(soid, bl); ::encode(myoloc, bl); //Retained for compatibility ::encode((__u32)0, bl); // was category, no longer used @@ -4752,13 +4766,16 @@ ::encode(local_mtime, bl); ::encode(data_digest, bl); ::encode(omap_digest, bl); + ::encode(expected_object_size, bl); + ::encode(expected_write_size, bl); + ::encode(alloc_hint_flags, bl); ENCODE_FINISH(bl); } void object_info_t::decode(bufferlist::iterator& bl) { object_locator_t myoloc; - DECODE_START_LEGACY_COMPAT_LEN(15, 8, 8, bl); + DECODE_START_LEGACY_COMPAT_LEN(16, 8, 8, bl); map old_watchers; ::decode(soid, bl); ::decode(myoloc, bl); @@ -4831,6 +4848,15 @@ clear_flag(FLAG_DATA_DIGEST); clear_flag(FLAG_OMAP_DIGEST); } + if (struct_v >= 16) { + ::decode(expected_object_size, bl); + ::decode(expected_write_size, bl); + ::decode(alloc_hint_flags, bl); + } else { + expected_object_size = 0; + expected_write_size = 0; + alloc_hint_flags = 0; + } DECODE_FINISH(bl); } @@ -4856,6 +4882,8 @@ f->dump_unsigned("truncate_size", truncate_size); f->dump_unsigned("data_digest", data_digest); f->dump_unsigned("omap_digest", omap_digest); + f->dump_unsigned("expected_object_size", expected_object_size); + f->dump_unsigned("expected_write_size", expected_write_size); f->open_object_section("watchers"); for (map,watch_info_t>::const_iterator p = watchers.begin(); p != watchers.end(); ++p) { @@ -4890,6 +4918,8 @@ out << " dd " << std::hex << oi.data_digest << std::dec; if (oi.is_omap_digest()) out << " od " << std::hex << oi.omap_digest << std::dec; + out << " alloc_hint [" << oi.expected_object_size + << " " << oi.expected_write_size << "]"; out << ")"; return out; diff -Nru ceph-10.2.7/src/osd/osd_types.h ceph-10.2.9/src/osd/osd_types.h --- ceph-10.2.7/src/osd/osd_types.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/osd_types.h 2017-07-13 13:05:37.000000000 +0000 @@ -929,6 +929,8 @@ #define PG_STATE_UNDERSIZED (1<<23) // pg acting < pool size #define PG_STATE_ACTIVATING (1<<24) // pg is peered but not yet active #define PG_STATE_PEERED (1<<25) // peered, cannot go active, can recover +#define PG_STATE_SNAPTRIM (1<<26) // trimming snaps +#define PG_STATE_SNAPTRIM_WAIT (1<<27) // queued to trim snaps std::string pg_state_string(int state); std::string pg_vector_string(const vector &a); @@ -3325,6 +3327,10 @@ // opportunistic checksums; may or may not be present __u32 data_digest; ///< data crc32c __u32 omap_digest; ///< omap crc32c + + // alloc hint attribute + uint64_t expected_object_size, expected_write_size; + uint32_t alloc_hint_flags; void copy_user_bits(const object_info_t& other); @@ -3395,14 +3401,18 @@ explicit object_info_t() : user_version(0), size(0), flags((flag_t)0), truncate_seq(0), truncate_size(0), - data_digest(-1), omap_digest(-1) + data_digest(-1), omap_digest(-1), + expected_object_size(0), expected_write_size(0), + alloc_hint_flags(0) {} explicit object_info_t(const hobject_t& s) : soid(s), user_version(0), size(0), flags((flag_t)0), truncate_seq(0), truncate_size(0), - data_digest(-1), omap_digest(-1) + data_digest(-1), omap_digest(-1), + expected_object_size(0), expected_write_size(0), + alloc_hint_flags(0) {} explicit object_info_t(bufferlist& bl) { diff -Nru ceph-10.2.7/src/osd/PGBackend.cc ceph-10.2.9/src/osd/PGBackend.cc --- ceph-10.2.7/src/osd/PGBackend.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/PGBackend.cc 2017-07-13 13:05:37.000000000 +0000 @@ -666,6 +666,7 @@ be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error); list auth_list; + set object_errors; if (auth == maps.end()) { object_error.set_version(0); object_error.set_auth_missing(*k, maps, shard_map, shallow_errors, deep_errors); @@ -709,6 +710,10 @@ if (found) errorstream << pgid << " shard " << j->first << ": soid " << *k << " " << ss.str() << "\n"; + } else if (found) { + // Track possible shard to use as authoritative, if needed + // There are errors, without identifying the shard + object_errors.insert(j->first); } else { // XXX: The auth shard might get here that we don't know // that it has the "correct" data. @@ -726,10 +731,25 @@ } if (auth_list.empty()) { - errorstream << pgid.pgid << " soid " << *k + if (object_errors.empty()) { + errorstream << pgid.pgid << " soid " << *k << ": failed to pick suitable auth object\n"; - goto out; + goto out; + } + // Object errors exist and we haven't found an authortative shard + // Prefer the primary shard otherwise take first from list. + pg_shard_t auth_shard; + if (object_errors.count(get_parent()->whoami_shard())) { + auth_shard = get_parent()->whoami_shard(); + } else { + auth_shard = *(object_errors.begin()); + } + auth_list.push_back(auth_shard); + object_errors.erase(auth_shard); } + // At this point auth_list is populated, so we add the object errors shards + // as inconsistent. + cur_inconsistent.insert(object_errors.begin(), object_errors.end()); if (!cur_missing.empty()) { missing[*k] = cur_missing; } diff -Nru ceph-10.2.7/src/osd/PG.cc ceph-10.2.9/src/osd/PG.cc --- ceph-10.2.7/src/osd/PG.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/PG.cc 2017-07-13 13:05:37.000000000 +0000 @@ -991,7 +991,9 @@ * 3) Prefer current primary */ map::const_iterator PG::find_best_info( - const map &infos, bool *history_les_bound) const + const map &infos, + bool restrict_to_up_acting, + bool *history_les_bound) const { assert(history_les_bound); /* See doc/dev/osd_internals/last_epoch_started.rst before attempting @@ -1031,6 +1033,9 @@ for (map::const_iterator p = infos.begin(); p != infos.end(); ++p) { + if (restrict_to_up_acting && !is_up(p->first) && + !is_acting(p->first)) + continue; // Only consider peers with last_update >= min_last_update_acceptable if (p->second.last_update < min_last_update_acceptable) continue; @@ -1089,17 +1094,19 @@ pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *_want, set *backfill, set *acting_backfill, pg_shard_t *want_primary, - ostream &ss) { + ostream &ss) +{ vector want(size, CRUSH_ITEM_NONE); map > all_info_by_shard; unsigned usable = 0; - for(map::const_iterator i = all_info.begin(); - i != all_info.end(); - ++i) { + for (map::const_iterator i = all_info.begin(); + i != all_info.end(); + ++i) { all_info_by_shard[i->first.shard].insert(i->first); } for (uint8_t i = 0; i < want.size(); ++i) { @@ -1126,7 +1133,7 @@ ss << " selecting acting[i]: " << pg_shard_t(acting[i], shard_id_t(i)) << std::endl; want[i] = acting[i]; ++usable; - } else { + } else if (!restrict_to_up_acting) { for (set::iterator j = all_info_by_shard[shard_id_t(i)].begin(); j != all_info_by_shard[shard_id_t(i)].end(); ++j) { @@ -1175,6 +1182,7 @@ pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, @@ -1182,7 +1190,8 @@ ostream &ss) { ss << "calc_acting newest update on osd." << auth_log_shard->first - << " with " << auth_log_shard->second << std::endl; + << " with " << auth_log_shard->second + << (restrict_to_up_acting ? " restrict_to_up_acting" : "") << std::endl; pg_shard_t auth_log_shard_id = auth_log_shard->first; // select primary @@ -1273,6 +1282,9 @@ } } + if (restrict_to_up_acting) { + return; + } for (map::const_iterator i = all_info.begin(); i != all_info.end(); ++i) { @@ -1309,8 +1321,19 @@ * * calculate the desired acting, and request a change with the monitor * if it differs from the current acting. + * + * if restrict_to_up_acting=true, we filter out anything that's not in + * up/acting. in order to lift this restriction, we need to + * 1) check whether it's worth switching the acting set any time we get + * a new pg info (not just here, when recovery finishes) + * 2) check whether anything in want_acting went down on each new map + * (and, if so, calculate a new want_acting) + * 3) remove the assertion in PG::RecoveryState::Active::react(const AdvMap) + * TODO! */ -bool PG::choose_acting(pg_shard_t &auth_log_shard_id, bool *history_les_bound) +bool PG::choose_acting(pg_shard_t &auth_log_shard_id, + bool restrict_to_up_acting, + bool *history_les_bound) { map all_info(peer_info.begin(), peer_info.end()); all_info[pg_whoami] = info; @@ -1322,7 +1345,7 @@ } map::const_iterator auth_log_shard = - find_best_info(all_info, history_les_bound); + find_best_info(all_info, restrict_to_up_acting, history_les_bound); if (auth_log_shard == all_info.end()) { if (up != acting) { @@ -1376,6 +1399,7 @@ up_primary, all_info, compat_mode, + restrict_to_up_acting, &want, &want_backfill, &want_acting_backfill, @@ -1391,6 +1415,7 @@ up_primary, all_info, compat_mode, + restrict_to_up_acting, &want, &want_backfill, &want_acting_backfill, @@ -2069,6 +2094,8 @@ if (is_scrubbing()) { return false; } + scrubber.priority = scrubber.must_scrub ? + cct->_conf->osd_requested_scrub_priority : get_scrub_priority(); scrubber.must_scrub = false; state_set(PG_STATE_SCRUBBING); if (scrubber.must_deep_scrub) { @@ -2512,28 +2539,36 @@ info.stats.log_start = pg_log.get_tail(); info.stats.ondisk_log_start = pg_log.get_tail(); - // calc copies, degraded + // If actingset is larger then upset we will have misplaced, + // so we will report based on actingset size. + + // If upset is larger then we will have degraded, + // so we will report based on upset size. + + // If target is the largest of them all, it will contribute to + // the degraded count because num_object_copies is + // computed using target and eventual used to get degraded total. + unsigned target = get_osdmap()->get_pg_size(info.pgid.pgid); - info.stats.stats.calc_copies(MAX(target, actingbackfill.size())); + unsigned nrep = MAX(actingset.size(), upset.size()); + // calc num_object_copies + info.stats.stats.calc_copies(MAX(target, nrep)); info.stats.stats.sum.num_objects_degraded = 0; + info.stats.stats.sum.num_objects_unfound = 0; info.stats.stats.sum.num_objects_misplaced = 0; if ((is_degraded() || is_undersized() || !is_clean()) && is_peered()) { - // NOTE: we only generate copies, degraded, unfound values for - // the summation, not individual stat categories. + // NOTE: we only generate copies, degraded, misplaced and unfound + // values for the summation, not individual stat categories. uint64_t num_objects = info.stats.stats.sum.num_objects; - // a degraded objects has fewer replicas or EC shards than the - // pool specifies - int64_t degraded = 0; - - // if acting is smaller than desired, add in those missing replicas - if (actingset.size() < target) - degraded += (target - actingset.size()) * num_objects; - - // missing on primary - info.stats.stats.sum.num_objects_missing_on_primary = - pg_log.get_missing().num_missing(); - degraded += pg_log.get_missing().num_missing(); + // Total sum of all missing + int64_t missing = 0; + // Objects that have arrived backfilled to up OSDs (not in acting) + int64_t backfilled = 0; + // A misplaced object is not stored on the correct OSD + int64_t misplaced = 0; + // Total of object copies/shards found + int64_t object_copies = 0; // num_objects_missing on each peer for (map::iterator pi = @@ -2548,53 +2583,57 @@ } } - assert(!acting.empty()); - for (set::iterator i = actingset.begin(); - i != actingset.end(); + assert(!actingbackfill.empty()); + for (set::iterator i = actingbackfill.begin(); + i != actingbackfill.end(); ++i) { - if (*i == pg_whoami) continue; - assert(peer_missing.count(*i)); + const pg_shard_t &p = *i; - // in missing set - degraded += peer_missing[*i].num_missing(); + bool in_up = (upset.find(p) != upset.end()); + bool in_acting = (actingset.find(p) != actingset.end()); + assert(in_up || in_acting); + + // in acting Compute total objects excluding num_missing + // in acting and not in up Compute misplaced objects excluding num_missing + // in up and not in acting Compute total objects already backfilled + if (in_acting) { + int osd_missing; + // primary handling + if (p == pg_whoami) { + osd_missing = pg_log.get_missing().num_missing(); + info.stats.stats.sum.num_objects_missing_on_primary = + osd_missing; + object_copies += num_objects; // My local (primary) count + } else { + assert(peer_missing.count(p)); + osd_missing = peer_missing[p].num_missing(); + object_copies += peer_info[p].stats.stats.sum.num_objects; + } + missing += osd_missing; + // Count non-missing objects not in up as misplaced + if (!in_up) + misplaced += MAX(0, num_objects - osd_missing); + } else { + assert(in_up && !in_acting); - // not yet backfilled - int64_t diff = num_objects - peer_info[*i].stats.stats.sum.num_objects; - if (diff > 0) - degraded += diff; + // If this peer has more objects then it should, ignore them + backfilled += MIN(num_objects, peer_info[p].stats.stats.sum.num_objects); + } } + + // Any objects that have been backfilled to up OSDs can deducted from misplaced + misplaced = MAX(0, misplaced - backfilled); + + // Deduct computed total missing on acting nodes + object_copies -= missing; + // Include computed backfilled objects on up nodes + object_copies += backfilled; + // a degraded objects has fewer replicas or EC shards than the + // pool specifies. num_object_copies will never be smaller than target * num_copies. + int64_t degraded = MAX(0, info.stats.stats.sum.num_object_copies - object_copies); + info.stats.stats.sum.num_objects_degraded = degraded; info.stats.stats.sum.num_objects_unfound = get_num_unfound(); - - // a misplaced object is not stored on the correct OSD - uint64_t misplaced = 0; - unsigned in_place = 0; - for (set::const_iterator p = upset.begin(); - p != upset.end(); - ++p) { - const pg_shard_t &s = *p; - if (actingset.count(s)) { - ++in_place; - } else { - // not where it should be - misplaced += num_objects; - if (actingbackfill.count(s)) { - // ...but partially backfilled - misplaced -= peer_info[s].stats.stats.sum.num_objects; - dout(20) << __func__ << " osd." << *p << " misplaced " - << num_objects << " but partially backfilled " - << peer_info[s].stats.stats.sum.num_objects - << dendl; - } else { - dout(20) << __func__ << " osd." << *p << " misplaced " - << num_objects << " but partially backfilled " - << dendl; - } - } - } - // count extra replicas in acting but not in up as misplaced - if (in_place < actingset.size()) - misplaced += (actingset.size() - in_place) * num_objects; info.stats.stats.sum.num_objects_misplaced = misplaced; } } @@ -3178,7 +3217,7 @@ coll, info_struct_v < 8 ? coll_t::meta() : coll, ghobject_t(info_struct_v < 8 ? OSD::make_pg_log_oid(pg_id) : pgmeta_oid), - info, oss); + info, oss, cct->_conf->osd_ignore_stale_divergent_priors); if (oss.tellp()) osd->clog->error() << oss.rdbuf(); @@ -3990,22 +4029,49 @@ { if (g_conf->osd_scrub_sleep > 0 && (scrubber.state == PG::Scrubber::NEW_CHUNK || - scrubber.state == PG::Scrubber::INACTIVE)) { + scrubber.state == PG::Scrubber::INACTIVE) && + scrubber.needs_sleep) { + ceph_assert(!scrubber.sleeping); dout(20) << __func__ << " state is INACTIVE|NEW_CHUNK, sleeping" << dendl; - unlock(); - utime_t t; - t.set_from_double(g_conf->osd_scrub_sleep); - handle.suspend_tp_timeout(); - t.sleep(); - handle.reset_tp_timeout(); - lock(); - dout(20) << __func__ << " slept for " << t << dendl; + + // Do an async sleep so we don't block the op queue + OSDService *osds = osd; + spg_t pgid = get_pgid(); + int state = scrubber.state; + auto scrub_requeue_callback = + new FunctionContext([osds, pgid, state](int r) { + PG *pg = osds->osd->lookup_lock_pg(pgid); + if (pg == nullptr) { + lgeneric_dout(osds->osd->cct, 20) + << "scrub_requeue_callback: Could not find " + << "PG " << pgid << " can't complete scrub requeue after sleep" + << dendl; + return; + } + pg->scrubber.sleeping = false; + pg->scrubber.needs_sleep = false; + lgeneric_dout(pg->cct, 20) + << "scrub_requeue_callback: slept for " + << ceph_clock_now(pg->cct) - pg->scrubber.sleep_start + << ", re-queuing scrub with state " << state << dendl; + pg->scrub_queued = false; + pg->requeue_scrub(); + pg->scrubber.sleep_start = utime_t(); + pg->unlock(); + }); + Mutex::Locker l(osd->scrub_sleep_lock); + osd->scrub_sleep_timer.add_event_after(cct->_conf->osd_scrub_sleep, + scrub_requeue_callback); + scrubber.sleeping = true; + scrubber.sleep_start = ceph_clock_now(cct); + return; } if (pg_has_reset_since(queued)) { return; } assert(scrub_queued); scrub_queued = false; + scrubber.needs_sleep = true; if (!is_primary() || !is_active() || !is_clean() || !is_scrubbing()) { dout(10) << "scrub -- not primary or active or not clean" << dendl; @@ -6662,7 +6728,7 @@ // adjust acting set? (e.g. because backfill completed...) bool history_les_bound = false; if (pg->acting != pg->up && !pg->choose_acting(auth_log_shard, - &history_les_bound)) + true, &history_les_bound)) assert(pg->want_acting.size()); if (context< Active >().all_replicas_activated) @@ -7451,8 +7517,8 @@ PG *pg = context< RecoveryMachine >().pg; // adjust acting? - if (!pg->choose_acting(auth_log_shard, - &context< Peering >().history_les_bound)) { + if (!pg->choose_acting(auth_log_shard, false, + &context< Peering >().history_les_bound)) { if (!pg->want_acting.empty()) { post_event(NeedActingChange()); } else { diff -Nru ceph-10.2.7/src/osd/PG.h ceph-10.2.9/src/osd/PG.h --- ceph-10.2.7/src/osd/PG.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/PG.h 2017-07-13 13:05:37.000000000 +0000 @@ -35,6 +35,7 @@ #include "include/xlist.h" #include "include/atomic.h" #include "SnapMapper.h" +#include "common/Timer.h" #include "PGLog.h" #include "OpRequest.h" @@ -872,22 +873,20 @@ public: void clear_primary_state(); - public: bool is_actingbackfill(pg_shard_t osd) const { return actingbackfill.count(osd); } bool is_acting(pg_shard_t osd) const { - if (pool.info.ec_pool()) { - return acting.size() > (unsigned)osd.shard && acting[osd.shard] == osd.osd; - } else { - return std::find(acting.begin(), acting.end(), osd.osd) != acting.end(); - } + return has_shard(pool.info.ec_pool(), acting, osd); } bool is_up(pg_shard_t osd) const { - if (pool.info.ec_pool()) { - return up.size() > (unsigned)osd.shard && up[osd.shard] == osd.osd; + return has_shard(pool.info.ec_pool(), up, osd); + } + static bool has_shard(bool ec, const vector& v, pg_shard_t osd) { + if (ec) { + return v.size() > (unsigned)osd.shard && v[osd.shard] == osd.osd; } else { - return std::find(up.begin(), up.end(), osd.osd) != up.end(); + return std::find(v.begin(), v.end(), osd.osd) != v.end(); } } @@ -1049,6 +1048,7 @@ map::const_iterator find_best_info( const map &infos, + bool restrict_to_up_acting, bool *history_les_bound) const; static void calc_ec_acting( map::const_iterator auth_log_shard, @@ -1059,6 +1059,7 @@ pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, @@ -1073,12 +1074,14 @@ pg_shard_t up_primary, const map &all_info, bool compat_mode, + bool restrict_to_up_acting, vector *want, set *backfill, set *acting_backfill, pg_shard_t *want_primary, ostream &ss); bool choose_acting(pg_shard_t &auth_log_shard, + bool restrict_to_up_acting, bool *history_les_bound); void build_might_have_unfound(); void replay_queued_ops(); @@ -1156,9 +1159,17 @@ OpRequestRef active_rep_scrub; utime_t scrub_reg_stamp; // stamp we registered for + // For async sleep + bool sleeping = false; + bool needs_sleep = true; + utime_t sleep_start; + // flags to indicate explicitly requested scrubs (by admin) bool must_scrub, must_deep_scrub, must_repair; + // Priority to use for scrub scheduling + unsigned priority; + // this flag indicates whether we would like to do auto-repair of the PG or not bool auto_repair; @@ -1288,6 +1299,9 @@ authoritative.clear(); num_digest_updates_pending = 0; cleaned_meta_map = ScrubMap(); + sleeping = false; + needs_sleep = true; + sleep_start = utime_t(); } void create_results(const hobject_t& obj); diff -Nru ceph-10.2.7/src/osd/PGLog.cc ceph-10.2.9/src/osd/PGLog.cc --- ceph-10.2.7/src/osd/PGLog.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/PGLog.cc 2017-07-13 13:05:37.000000000 +0000 @@ -67,6 +67,7 @@ PGLog::IndexedLog *olog) { list oldlog; + unindex(); oldlog.swap(log); eversion_t old_tail; @@ -177,6 +178,9 @@ /* If we are trimming, we must be complete up to trim_to, time * to throw out any divergent_priors */ + if (!divergent_priors.empty()) { + dirty_divergent_priors = true; + } divergent_priors.clear(); // We shouldn't be trimming the log past last_complete assert(trim_to <= info.last_complete); @@ -929,8 +933,10 @@ IndexedLog &log, pg_missing_t &missing, ostringstream &oss, + bool tolerate_divergent_missing_log, const DoutPrefixProvider *dpp, set *log_keys_debug) + { ldpp_dout(dpp, 20) << "read_log coll " << pg_coll << " log_oid " << log_oid << dendl; @@ -1044,7 +1050,20 @@ * version would not have been recovered, and a newer version * would show up in the log above. */ - assert(oi.version == i->first); + /** + * Unfortunately the assessment above is incorrect because of + * http://tracker.ceph.com/issues/17916 (we were incorrectly + * not removing the divergent_priors set from disk state!), + * so let's check that if the user asked us to. + */ + if (oi.version > i->first && tolerate_divergent_missing_log) { + ldpp_dout(dpp, 0) << "read_log divergent_priors entry (" << *i + << ") inconsistent with disk state (" << oi + << "), assuming it is tracker.ceph.com/issues/17916" + << dendl; + } else { + assert(oi.version == i->first); + } } else { ldpp_dout(dpp, 15) << "read_log missing " << *i << dendl; missing.add(i->second, i->first, eversion_t()); diff -Nru ceph-10.2.7/src/osd/PGLog.h ceph-10.2.9/src/osd/PGLog.h --- ceph-10.2.7/src/osd/PGLog.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/PGLog.h 2017-07-13 13:05:37.000000000 +0000 @@ -833,10 +833,11 @@ void read_log(ObjectStore *store, coll_t pg_coll, coll_t log_coll, ghobject_t log_oid, - const pg_info_t &info, ostringstream &oss) { + const pg_info_t &info, ostringstream &oss, + bool tolerate_divergent_missing_log) { return read_log( store, pg_coll, log_coll, log_oid, info, divergent_priors, - log, missing, oss, + log, missing, oss, tolerate_divergent_missing_log, this, (pg_log_debug ? &log_keys_debug : 0)); } @@ -846,6 +847,7 @@ const pg_info_t &info, map &divergent_priors, IndexedLog &log, pg_missing_t &missing, ostringstream &oss, + bool tolerate_divergent_missing_log, const DoutPrefixProvider *dpp = NULL, set *log_keys_debug = 0 ); diff -Nru ceph-10.2.7/src/osd/ReplicatedBackend.cc ceph-10.2.9/src/osd/ReplicatedBackend.cc --- ceph-10.2.7/src/osd/ReplicatedBackend.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/ReplicatedBackend.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1676,6 +1676,12 @@ t->touch(coll, ghobject_t(target_oid)); t->truncate(coll, ghobject_t(target_oid), recovery_info.size); t->omap_setheader(coll, ghobject_t(target_oid), omap_header); + + bufferlist bv = attrs[OI_ATTR]; + object_info_t oi(bv); + t->set_alloc_hint(coll, ghobject_t(target_oid), + oi.expected_object_size, + oi.expected_write_size); } uint64_t off = 0; uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL; diff -Nru ceph-10.2.7/src/osd/ReplicatedPG.cc ceph-10.2.9/src/osd/ReplicatedPG.cc --- ceph-10.2.7/src/osd/ReplicatedPG.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/ReplicatedPG.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1675,20 +1675,27 @@ return; } + // order this op as a write? + bool write_ordered = + op->may_write() || + op->may_cache() || + m->has_flag(CEPH_OSD_FLAG_RWORDERED); + // discard due to cluster full transition? (we discard any op that // originates before the cluster or pool is marked full; the client // will resend after the full flag is removed or if they expect the // op to succeed despite being full). The except is FULL_FORCE ops, // which there is no reason to discard because they bypass all full // checks anyway. + // If this op isn't write or read-ordered, we skip // FIXME: we exclude mds writes for now. - if (!(m->get_source().is_mds() || m->has_flag(CEPH_OSD_FLAG_FULL_FORCE)) && + if (write_ordered && !( m->get_source().is_mds() || m->has_flag(CEPH_OSD_FLAG_FULL_FORCE)) && info.history.last_epoch_marked_full > m->get_map_epoch()) { dout(10) << __func__ << " discarding op sent before full " << m << " " << *m << dendl; return; } - if (!m->get_source().is_mds() && osd->check_failsafe_full()) { + if (!(m->get_source().is_mds()) && osd->check_failsafe_full() && write_ordered) { dout(10) << __func__ << " fail-safe full check failed, dropping request" << dendl; return; @@ -1719,12 +1726,6 @@ } } - // order this op as a write? - bool write_ordered = - op->may_write() || - op->may_cache() || - m->has_flag(CEPH_OSD_FLAG_RWORDERED); - dout(10) << "do_op " << *m << (op->may_write() ? " may_write" : "") << (op->may_read() ? " may_read" : "") @@ -3617,14 +3618,6 @@ void ReplicatedPG::snap_trimmer(epoch_t queued) { - if (g_conf->osd_snap_trim_sleep > 0) { - unlock(); - utime_t t; - t.set_from_double(g_conf->osd_snap_trim_sleep); - t.sleep(); - lock(); - dout(20) << __func__ << " slept for " << t << dendl; - } if (deleting || pg_has_reset_since(queued)) { return; } @@ -4936,6 +4929,8 @@ ctx->mod_desc.create(); t->touch(soid); } + oi.expected_object_size = op.alloc_hint.expected_object_size; + oi.expected_write_size = op.alloc_hint.expected_write_size; t->set_alloc_hint(soid, op.alloc_hint.expected_object_size, op.alloc_hint.expected_write_size); ctx->delta_stats.num_wr++; @@ -11508,6 +11503,25 @@ hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); assert(!is_degraded_or_backfilling_object(oid)); ObjectContextRef obc = get_object_context(oid, false); + if (!obc) { + dout(1) << __func__ << " " << oid << " not found" << dendl; + if (pool.info.use_gmt_hitset != p->using_gmt) { + dout(1) << __func__ << " trying with pool's setting: " + << "use_gmt_hitset = " << pool.info.use_gmt_hitset << dendl; + // redo the check + for (const auto& hitset : info.hit_set.history) { + auto oid = get_hit_set_archive_object(hitset.begin, hitset.end, + pool.info.use_gmt_hitset); + if (is_degraded_or_backfilling_object(oid)) + return; + if (scrubber.write_blocked_by_scrub(oid, get_sort_bitwise())) + return; + } + auto oid = get_hit_set_archive_object(p->begin, p->end, + pool.info.use_gmt_hitset); + obc = get_object_context(oid, false); + } + } assert(obc); OpContextUPtr ctx = simple_opc_create(obc); @@ -11749,7 +11763,17 @@ list::iterator p = updated_hit_set_hist.history.begin(); assert(p != updated_hit_set_hist.history.end()); hobject_t oid = get_hit_set_archive_object(p->begin, p->end, p->using_gmt); - + ObjectContextRef obc = get_object_context(oid, false); + if (!obc) { + dout(1) << __func__ << " " << oid << " not found" << dendl; + if (pool.info.use_gmt_hitset != p->using_gmt) { + dout(1) << __func__ << " trying with pool's setting: " + << "use_gmt_hitset = " << pool.info.use_gmt_hitset << dendl; + oid = get_hit_set_archive_object(p->begin, p->end, pool.info.use_gmt_hitset); + obc = get_object_context(oid, false); + } + } + assert(obc); assert(!is_degraded_or_backfilling_object(oid)); dout(20) << __func__ << " removing " << oid << dendl; @@ -11775,8 +11799,6 @@ } updated_hit_set_hist.history.pop_front(); - ObjectContextRef obc = get_object_context(oid, false); - assert(obc); --ctx->delta_stats.num_objects; --ctx->delta_stats.num_objects_hit_set_archive; ctx->delta_stats.num_bytes -= obc->obs.oi.size; @@ -13059,21 +13081,55 @@ << pg->snap_trimq.range_start() << dendl; post_event(SnapTrim()); - return transit(); + return transit(); } } +boost::statechart::result ReplicatedPG::WaitReservation::react(const SnapTrimReserved&) +{ + ReplicatedPG *pg = context< SnapTrimmer >().pg; + ldout(pg->cct, 10) << "WaitReservation react SnapTrimReserved" << dendl; + + pending = nullptr; + if (!pg->is_primary() || !pg->is_active() || !pg->is_clean() || + pg->scrubber.active || pg->snap_trimq.empty()) { + post_event(SnapTrim()); + return transit< NotTrimming >(); + } + + context().snap_to_trim = pg->snap_trimq.range_start(); + ldout(pg->cct, 10) << "NotTrimming: trimming " + << pg->snap_trimq.range_start() + << dendl; + pg->queue_snap_trim(); + return transit< TrimmingObjects >(); +} + /* TrimmingObjects */ ReplicatedPG::TrimmingObjects::TrimmingObjects(my_context ctx) : my_base(ctx), NamedState(context< SnapTrimmer >().pg->cct, "Trimming/TrimmingObjects") { + auto *pg = context< SnapTrimmer >().pg; + context< SnapTrimmer >().log_enter(state_name); + pg->state_set(PG_STATE_SNAPTRIM); + pg->publish_stats_to_osd(); +} + +ReplicatedPG::Trimming::Trimming(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming") +{ context< SnapTrimmer >().log_enter(state_name); } -void ReplicatedPG::TrimmingObjects::exit() +void ReplicatedPG::Trimming::exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); + auto *pg = context< SnapTrimmer >().pg; + pg->osd->snap_reserver.cancel_reservation(pg->get_pgid()); + pg->state_clear(PG_STATE_SNAPTRIM); + pg->publish_stats_to_osd(); context().in_flight.clear(); } @@ -13121,7 +13177,7 @@ in_flight.insert(pos); pg->simple_opc_submit(std::move(ctx)); } - return discard_event(); + return transit< WaitTrimTimer >(); } /* WaitingOnReplicasObjects */ diff -Nru ceph-10.2.7/src/osd/ReplicatedPG.h ceph-10.2.9/src/osd/ReplicatedPG.h --- ceph-10.2.7/src/osd/ReplicatedPG.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osd/ReplicatedPG.h 2017-07-13 13:05:37.000000000 +0000 @@ -1555,12 +1555,20 @@ } private: struct NotTrimming; + struct WaitReservation; struct SnapTrim : boost::statechart::event< SnapTrim > { SnapTrim() : boost::statechart::event < SnapTrim >() {} }; struct Reset : boost::statechart::event< Reset > { Reset() : boost::statechart::event< Reset >() {} }; + struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > { + SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {} + }; + struct SnapTrimTimerReady : boost::statechart::event< SnapTrimTimerReady > { + SnapTrimTimerReady() : boost::statechart::event< SnapTrimTimerReady >() {} + }; + struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > { ReplicatedPG *pg; set in_flight; @@ -1573,18 +1581,136 @@ } snap_trimmer_machine; /* SnapTrimmerStates */ - struct TrimmingObjects : boost::statechart::state< TrimmingObjects, SnapTrimmer >, NamedState { + struct Trimming : boost::statechart::state< Trimming, + SnapTrimmer, + WaitReservation >, + NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< SnapTrim >, + boost::statechart::transition< Reset, NotTrimming > + > reactions; + explicit Trimming(my_context ctx); + void exit(); + boost::statechart::result react(const SnapTrim&) { return discard_event(); } + }; + + struct TrimmingObjects : boost::statechart::state, NamedState { typedef boost::mpl::list < boost::statechart::custom_reaction< SnapTrim >, boost::statechart::transition< Reset, NotTrimming > > reactions; hobject_t pos; explicit TrimmingObjects(my_context ctx); - void exit(); + void exit() { context< SnapTrimmer >().log_exit(state_name, enter_time); } boost::statechart::result react(const SnapTrim&); }; - struct WaitingOnReplicas : boost::statechart::state< WaitingOnReplicas, SnapTrimmer >, NamedState { + struct WaitReservation : boost::statechart::state< WaitReservation, Trimming >, NamedState { + /* WaitReservation is a sub-state of trimming simply so that exiting Trimming + * always cancels the reservation */ + typedef boost::mpl::list < + boost::statechart::custom_reaction< SnapTrimReserved > + > reactions; + struct ReservationCB : public Context { + ReplicatedPGRef pg; + bool canceled; + ReservationCB(ReplicatedPG *pg) : pg(pg), canceled(false) {} + void finish(int) override { + pg->lock(); + if (!canceled) + pg->snap_trimmer_machine.process_event(SnapTrimReserved()); + pg->unlock(); + } + void cancel() { + assert(pg->is_locked()); + assert(!canceled); + canceled = true; + } + }; + ReservationCB *pending = nullptr; + + explicit WaitReservation(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitReservation") { + context< SnapTrimmer >().log_enter(state_name); + auto *pg = context< SnapTrimmer >().pg; + pending = new ReservationCB(pg); + pg->osd->snap_reserver.request_reservation(pg->get_pgid(), pending, 0); + pg->state_set(PG_STATE_SNAPTRIM_WAIT); + pg->publish_stats_to_osd(); + } + boost::statechart::result react(const SnapTrimReserved&); + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + if (pending) + pending->cancel(); + pending = nullptr; + auto *pg = context< SnapTrimmer >().pg; + pg->state_clear(PG_STATE_SNAPTRIM_WAIT); + pg->publish_stats_to_osd(); + } + boost::statechart::result react(const SnapTrim&) { + return discard_event(); + } + }; + + struct WaitTrimTimer : boost::statechart::state< WaitTrimTimer, Trimming >, NamedState { + typedef boost::mpl::list < + boost::statechart::custom_reaction< SnapTrimTimerReady > + > reactions; + Context *wakeup = nullptr; + bool slept = false; + explicit WaitTrimTimer(my_context ctx) + : my_base(ctx), + NamedState(context< SnapTrimmer >().pg->cct, "Trimming/WaitTrimTimer") { + context< SnapTrimmer >().log_enter(state_name); + struct OnTimer : Context { + ReplicatedPGRef pg; + epoch_t epoch; + OnTimer(ReplicatedPGRef pg, epoch_t epoch) : pg(pg), epoch(epoch) {} + void finish(int) override { + pg->lock(); + if (!pg->pg_has_reset_since(epoch)) + pg->snap_trimmer_machine.process_event(SnapTrimTimerReady()); + pg->unlock(); + } + }; + auto *pg = context< SnapTrimmer >().pg; + if (pg->cct->_conf->osd_snap_trim_sleep > 0) { + wakeup = new OnTimer{pg, pg->get_osdmap()->get_epoch()}; + Mutex::Locker l(pg->osd->snap_sleep_lock); + pg->osd->snap_sleep_timer.add_event_after( + pg->cct->_conf->osd_snap_trim_sleep, wakeup); + slept = true; + } else { + post_event(SnapTrimTimerReady()); + } + } + void exit() { + context< SnapTrimmer >().log_exit(state_name, enter_time); + auto *pg = context< SnapTrimmer >().pg; + if (wakeup) { + Mutex::Locker l(pg->osd->snap_sleep_lock); + pg->osd->snap_sleep_timer.cancel_event(wakeup); + wakeup = nullptr; + } + } + boost::statechart::result react(const SnapTrimTimerReady &) { + wakeup = nullptr; + auto *pg = context< SnapTrimmer >().pg; + if (!pg->is_primary() || !pg->is_active() || !pg->is_clean() || + pg->scrubber.active) { + post_event(SnapTrim()); + return transit< NotTrimming >(); + } else { + if (slept) + post_event(SnapTrim()); + return transit< TrimmingObjects >(); + } + } + }; + + struct WaitingOnReplicas : boost::statechart::state< WaitingOnReplicas, Trimming >, NamedState { typedef boost::mpl::list < boost::statechart::custom_reaction< SnapTrim >, boost::statechart::transition< Reset, NotTrimming > diff -Nru ceph-10.2.7/src/osdc/Journaler.cc ceph-10.2.9/src/osdc/Journaler.cc --- ceph-10.2.7/src/osdc/Journaler.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osdc/Journaler.cc 2017-07-13 13:05:37.000000000 +0000 @@ -528,7 +528,7 @@ uint64_t Journaler::append_entry(bufferlist& bl) { - lock_guard l(lock); + unique_lock l(lock); assert(!readonly); uint32_t s = bl.length(); @@ -547,6 +547,15 @@ bufferptr bp(write_pos - owp); bp.zero(); assert(bp.length() >= 4); + if (!write_buf_throttle.get_or_fail(bp.length())) { + l.unlock(); + ldout(cct, 10) << "write_buf_throttle wait, bp len " + << bp.length() << dendl; + write_buf_throttle.get(bp.length()); + l.lock(); + } + ldout(cct, 20) << "write_buf_throttle get, bp len " + << bp.length() << dendl; write_buf.push_back(bp); // now flush. @@ -560,6 +569,15 @@ // append + size_t delta = bl.length() + journal_stream.get_envelope_size(); + // write_buf space is nearly full + if (!write_buf_throttle.get_or_fail(delta)) { + l.unlock(); + ldout(cct, 10) << "write_buf_throttle wait, delta " << delta << dendl; + write_buf_throttle.get(delta); + l.lock(); + } + ldout(cct, 20) << "write_buf_throttle get, delta " << delta << dendl; size_t wrote = journal_stream.write(bl, &write_buf, write_pos); ldout(cct, 10) << "append_entry len " << s << " to " << write_pos << "~" << wrote << dendl; @@ -589,7 +607,7 @@ assert(!readonly); // flush - unsigned len = write_pos - flush_pos; + uint64_t len = write_pos - flush_pos; assert(len == write_buf.length()); if (amount && amount < len) len = amount; @@ -645,7 +663,9 @@ flush_pos += len; assert(write_buf.length() == write_pos - flush_pos); - + write_buf_throttle.put(len); + ldout(cct, 20) << "write_buf_throttle put, len " << len << dendl; + ldout(cct, 10) << "_do_flush (prezeroing/prezero)/write/flush/safe pointers now at " << "(" << prezeroing_pos << "/" << prezero_pos << ")/" << write_pos diff -Nru ceph-10.2.7/src/osdc/Journaler.h ceph-10.2.9/src/osdc/Journaler.h --- ceph-10.2.7/src/osdc/Journaler.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osdc/Journaler.h 2017-07-13 13:05:37.000000000 +0000 @@ -64,7 +64,7 @@ #include "Filer.h" #include "common/Timer.h" - +#include "common/Throttle.h" class CephContext; class Context; @@ -113,6 +113,13 @@ bool readable(bufferlist &bl, uint64_t *need) const; size_t read(bufferlist &from, bufferlist *to, uint64_t *start_ptr); size_t write(bufferlist &entry, bufferlist *to, uint64_t const &start_ptr); + size_t get_envelope_size() const { + if (format >= JOURNAL_FORMAT_RESILIENT) { + return JOURNAL_ENVELOPE_RESILIENT; + } else { + return JOURNAL_ENVELOPE_LEGACY; + } + } // A magic number for the start of journal entries, so that we can // identify them in damaged journals. @@ -305,6 +312,9 @@ bufferlist write_buf; ///< write buffer. flush_pos + /// write_buf.length() == write_pos. + // protect write_buf from bufferlist _len overflow + Throttle write_buf_throttle; + bool waiting_for_zero; interval_set pending_zero; // non-contig bits we've zeroed std::set pending_safe; @@ -400,6 +410,7 @@ timer(tim), delay_flush_event(0), state(STATE_UNDEF), error(0), prezeroing_pos(0), prezero_pos(0), write_pos(0), flush_pos(0), safe_pos(0), + write_buf_throttle(cct, "write_buf_throttle", UINT_MAX - (UINT_MAX >> 3)), waiting_for_zero(false), read_pos(0), requested_pos(0), received_pos(0), fetch_len(0), temp_fetch_len(0), diff -Nru ceph-10.2.7/src/osdc/ObjectCacher.cc ceph-10.2.9/src/osdc/ObjectCacher.cc --- ceph-10.2.7/src/osdc/ObjectCacher.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osdc/ObjectCacher.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1151,7 +1151,7 @@ break; ldout(cct, 10) << "trim trimming " << *bh << dendl; - assert(bh->is_clean() || bh->is_zero()); + assert(bh->is_clean() || bh->is_zero() || bh->is_error()); Object *ob = bh->ob; bh_remove(ob, bh); diff -Nru ceph-10.2.7/src/osdc/Objecter.cc ceph-10.2.9/src/osdc/Objecter.cc --- ceph-10.2.7/src/osdc/Objecter.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/osdc/Objecter.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2289,8 +2289,13 @@ bool need_send = false; - if ((op->target.flags & CEPH_OSD_FLAG_WRITE) && - osdmap->test_flag(CEPH_OSDMAP_PAUSEWR)) { + if (osdmap->get_epoch() < epoch_barrier) { + ldout(cct, 10) << " barrier, paused " << op << " tid " << op->tid + << dendl; + op->target.paused = true; + _maybe_request_map(); + } else if ((op->target.flags & CEPH_OSD_FLAG_WRITE) && + osdmap->test_flag(CEPH_OSDMAP_PAUSEWR)) { ldout(cct, 10) << " paused modify " << op << " tid " << op->tid << dendl; op->target.paused = true; diff -Nru ceph-10.2.7/src/pybind/cephfs/cephfs.pyx ceph-10.2.9/src/pybind/cephfs/cephfs.pyx --- ceph-10.2.7/src/pybind/cephfs/cephfs.pyx 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/pybind/cephfs/cephfs.pyx 2017-07-13 13:05:37.000000000 +0000 @@ -322,7 +322,8 @@ self.state = "uninitialized" if rados_inst is not None: if auth_id is not None or conffile is not None or conf is not None: - raise InvalidValue("May not pass RADOS instance as well as other configuration") + raise make_ex(errno.EINVAL, + "May not pass RADOS instance as well as other configuration") self.create_with_rados(rados_inst) else: @@ -618,16 +619,26 @@ if flags == '': cephfs_flags = os.O_RDONLY else: + access_flags = 0; for c in flags: if c == 'r': - cephfs_flags |= os.O_RDONLY + access_flags = 1; elif c == 'w': - cephfs_flags |= os.O_WRONLY | os.O_TRUNC | os.O_CREAT - elif c == '+': - cephfs_flags |= os.O_RDWR + access_flags = 2; + cephfs_flags |= os.O_TRUNC | os.O_CREAT + elif access_flags > 0 and c == '+': + access_flags = 3; else: - raise OperationNotSupported( - "open flags doesn't support %s" % c) + raise make_ex(errno.EOPNOTSUPP, + "open flags doesn't support %s" % c) + + if access_flags == 1: + cephfs_flags |= os.O_RDONLY; + elif access_flags == 2: + cephfs_flags |= os.O_WRONLY; + else: + cephfs_flags |= os.O_RDWR; + elif isinstance(flags, int): cephfs_flags = flags else: diff -Nru ceph-10.2.7/src/pybind/rbd/rbd.pyx ceph-10.2.9/src/pybind/rbd/rbd.pyx --- ceph-10.2.7/src/pybind/rbd/rbd.pyx 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/pybind/rbd/rbd.pyx 2017-07-13 13:05:37.000000000 +0000 @@ -1236,7 +1236,7 @@ with nogil: ret = rbd_get_old_format(self.image, &old) if ret != 0: - raise make_ex(ret, 'error getting old_format for image' % (self.name)) + raise make_ex(ret, 'error getting old_format for image %s' % (self.name)) return old != 0 def size(self): @@ -1250,7 +1250,7 @@ with nogil: ret = rbd_get_size(self.image, &image_size) if ret != 0: - raise make_ex(ret, 'error getting size for image' % (self.name)) + raise make_ex(ret, 'error getting size for image %s' % (self.name)) return image_size def features(self): @@ -1263,7 +1263,7 @@ with nogil: ret = rbd_get_features(self.image, &features) if ret != 0: - raise make_ex(ret, 'error getting features for image' % (self.name)) + raise make_ex(ret, 'error getting features for image %s' % (self.name)) return features def update_features(self, features, enabled): @@ -1300,7 +1300,7 @@ with nogil: ret = rbd_get_overlap(self.image, &overlap) if ret != 0: - raise make_ex(ret, 'error getting overlap for image' % (self.name)) + raise make_ex(ret, 'error getting overlap for image %s' % (self.name)) return overlap def flags(self): @@ -1313,7 +1313,7 @@ with nogil: ret = rbd_get_flags(self.image, &flags) if ret != 0: - raise make_ex(ret, 'error getting flags for image' % (self.name)) + raise make_ex(ret, 'error getting flags for image %s' % (self.name)) return flags def is_exclusive_lock_owner(self): @@ -1326,7 +1326,7 @@ with nogil: ret = rbd_is_exclusive_lock_owner(self.image, &owner) if ret != 0: - raise make_ex(ret, 'error getting lock status for image' % (self.name)) + raise make_ex(ret, 'error getting lock status for image %s' % (self.name)) return owner == 1 def copy(self, dest_ioctx, dest_name, features=None, order=None, @@ -1696,7 +1696,7 @@ with nogil: ret = rbd_get_stripe_unit(self.image, &stripe_unit) if ret != 0: - raise make_ex(ret, 'error getting stripe unit for image' % (self.name)) + raise make_ex(ret, 'error getting stripe unit for image %s' % (self.name)) return stripe_unit def stripe_count(self): @@ -1707,7 +1707,7 @@ with nogil: ret = rbd_get_stripe_count(self.image, &stripe_count) if ret != 0: - raise make_ex(ret, 'error getting stripe count for image' % (self.name)) + raise make_ex(ret, 'error getting stripe count for image %s' % (self.name)) return stripe_count def flatten(self): diff -Nru ceph-10.2.7/src/rbdmap ceph-10.2.9/src/rbdmap --- ceph-10.2.7/src/rbdmap 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rbdmap 2017-07-13 13:05:37.000000000 +0000 @@ -1,15 +1,6 @@ #!/bin/bash do_map() { - - # default to reasonable value if RBDMAPFILE not set in environment - printenv RBDMAPFILE >/dev/null || local RBDMAPFILE=/etc/ceph/rbdmap - - if [ ! -f "$RBDMAPFILE" ]; then - logger -p "daemon.warning" -t rbdmap "No $RBDMAPFILE found." - exit 0 - fi - # Read /etc/rbdtab to create non-existant mapping RET=0 while read DEV PARAMS; do @@ -65,7 +56,33 @@ } -do_unmap() { +unmount_unmap() { + local rbd_dev=$1 + local mnt=$(findmnt --mtab --source ${rbd_dev} --noheadings \ + | awk '{print $1'}) + + logger -p "daemon.debug" -t rbdmap "Unmapping '${rbd_dev}'" + if [ -n "${mnt}" ]; then + logger -p "daemon.debug" -t rbdmap "Unmounting '${mnt}'" + umount "${mnt}" >>/dev/null 2>&1 + fi + if mountpoint -q "${mnt}"; then + ## Un-mounting failed. + logger -p "daemon.warning" -t rbdmap "Failed to unmount '${mnt}'" + return 1 + fi + ## Un-mapping. + rbd unmap $rbd_dev >>/dev/null 2>&1 + if [ $? -ne 0 ]; then + logger -p "daemon.warning" -t rbdmap "Failed to unmap '${mnt}'" + return 1 + fi + logger -p "daemon.debug" -t rbdmap "Unmapped '${rbd_dev}'" + + return 0 +} + +do_unmap_all() { RET=0 ## Unmount and unmap all rbd devices if ls /dev/rbd[0-9]* >/dev/null 2>&1; then @@ -81,31 +98,57 @@ fi done - logger -p "daemon.debug" -t rbdmap "Unmapping '${DEV}'" - MNT=$(findmnt --mtab --source ${DEV} --noheadings | awk '{print $1'}) - if [ -n "${MNT}" ]; then - logger -p "daemon.debug" -t rbdmap "Unmounting '${MNT}'" - umount "${MNT}" >>/dev/null 2>&1 - fi - if mountpoint -q "${MNT}"; then - ## Un-mounting failed. - logger -p "daemon.warning" -t rbdmap "Failed to unmount '${MNT}'" - RET=$((${RET}+1)) - continue - fi - ## Un-mapping. - rbd unmap $DEV >>/dev/null 2>&1 - if [ $? -ne 0 ]; then - logger -p "daemon.warning" -t rbdmap "Failed to unmap '${MNT}'" - RET=$((${RET}+$?)) - continue - fi - logger -p "daemon.debug" -t rbdmap "Unmapped '${DEV}'" + unmount_unmap "$DEV" || RET=$((${RET}+$?)) + done fi exit ${RET} } +do_unmap() { + RET=0 + ## skip if nothing is mapped + ls /dev/rbd[0-9]* >/dev/null 2>&1 || exit ${RET} + + # Read /etc/rbdtab to create non-existant mapping + while read DEV PARAMS; do + case "$DEV" in + ""|\#*) + continue + ;; + */*) + ;; + *) + DEV=rbd/$DEV + ;; + esac + + MAP_RV="$(readlink -f /dev/rbd/$DEV)" + if [ ! -b $MAP_RV ]; then + logger -p "daemon.debug" -t rbdmap "$DEV not mapped, skipping unmap" + continue + fi + + ## pre-unmapping + if [ -x "/etc/ceph/rbd.d/${DEV}" ]; then + logger -p "daemon.debug" -t rbdmap "Running pre-unmap hook '/etc/ceph/rbd.d/${DEV}'" + /etc/ceph/rbd.d/${DEV} unmap "/dev/rbd/${DEV}" + fi + + unmount_unmap "$MAP_RV" || RET=$((${RET}+$?)) + + done < $RBDMAPFILE + exit ${RET} +} + +# default to reasonable value if RBDMAPFILE not set in environment +RBDMAPFILE="${RBDMAPFILE:-/etc/ceph/rbdmap}" + +if [ ! -f "$RBDMAPFILE" ]; then + logger -p "daemon.warning" -t rbdmap "No $RBDMAPFILE found." + exit 0 +fi + case "$1" in map) do_map @@ -114,6 +157,11 @@ unmap) do_unmap ;; + + unmap-all) + do_unmap_all + ;; + *) - echo "Usage: rbdmap map | unmap" + echo "Usage: rbdmap map | unmap | unmap-all" esac diff -Nru ceph-10.2.7/src/rbd_replay/rbd-replay.cc ceph-10.2.9/src/rbd_replay/rbd-replay.cc --- ceph-10.2.7/src/rbd_replay/rbd-replay.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rbd_replay/rbd-replay.cc 2017-07-13 13:05:37.000000000 +0000 @@ -60,7 +60,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); std::vector::iterator i; string pool_name = "rbd"; diff -Nru ceph-10.2.7/src/rgw/librgw.cc ceph-10.2.9/src/rgw/librgw.cc --- ceph-10.2.7/src/rgw/librgw.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/librgw.cc 2017-07-13 13:05:37.000000000 +0000 @@ -440,10 +440,10 @@ def_args.push_back("--keyring=$rgw_data/keyring"); def_args.push_back("--log-file=/var/log/radosgw/$cluster-$name.log"); - global_init(&def_args, args, - CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + cct = global_init(&def_args, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); Mutex mutex("main"); SafeTimer init_timer(g_ceph_context, mutex); @@ -550,7 +550,7 @@ rgw_perf_stop(g_ceph_context); dout(1) << "final shutdown" << dendl; - g_ceph_context->put(); + cct.reset(); ceph::crypto::shutdown(); diff -Nru ceph-10.2.7/src/rgw/rgw_acl_swift.cc ceph-10.2.9/src/rgw/rgw_acl_swift.cc --- ceph-10.2.7/src/rgw/rgw_acl_swift.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_acl_swift.cc 2017-07-13 13:05:37.000000000 +0000 @@ -54,6 +54,7 @@ return false; return sub.compare(".r") == 0 || + sub.compare(".ref") == 0 || sub.compare(".referer") == 0 || sub.compare(".referrer") == 0; } diff -Nru ceph-10.2.7/src/rgw/rgw_admin.cc ceph-10.2.9/src/rgw/rgw_admin.cc --- ceph-10.2.7/src/rgw/rgw_admin.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_admin.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,3 +1,4 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab #include @@ -66,6 +67,7 @@ cout << " key create create access key\n"; cout << " key rm remove access key\n"; cout << " bucket list list buckets\n"; + cout << " bucket limit check show bucket sharding stats\n"; cout << " bucket link link bucket to specified user\n"; cout << " bucket unlink unlink bucket from specified user\n"; cout << " bucket stats returns bucket statistics\n"; @@ -76,6 +78,7 @@ cout << " bi put store bucket index object entries\n"; cout << " bi list list raw bucket index entries\n"; cout << " object rm remove object\n"; + cout << " object stat stat an object for its metadata\n"; cout << " object unlink unlink object from bucket index\n"; cout << " objects expire run expired objects cleanup\n"; cout << " period delete delete a period\n"; @@ -115,8 +118,6 @@ cout << " zonegroup placement modify modify a placement target of a specific zonegroup\n"; cout << " zonegroup placement rm remove a placement target from a zonegroup\n"; cout << " zonegroup placement default set a zonegroup's default placement target\n"; - cout << " zonegroup-map get show zonegroup-map\n"; - cout << " zonegroup-map set set zonegroup-map (requires infile)\n"; cout << " zone create create a new zone\n"; cout << " zone delete delete a zone\n"; cout << " zone get show zone cluster params\n"; @@ -249,7 +250,9 @@ cout << " --categories= comma separated list of categories, used in usage show\n"; cout << " --caps= list of caps (e.g., \"usage=read, write; user=read\"\n"; cout << " --yes-i-really-mean-it required for certain operations\n"; - cout << " --reset-regions reset regionmap when regionmap update\n"; + cout << " --warnings-only when specified with bucket limit check, list\n"; + cout << " only buckets nearing or over the current max\n"; + cout << " objects per shard value\n"; cout << " --bypass-gc when specified with bucket deletion, triggers\n"; cout << " object deletions by not involving GC\n"; cout << " --inconsistent-index when specified with bucket deletion and bypass-gc set to true,\n"; @@ -293,6 +296,7 @@ OPT_KEY_CREATE, OPT_KEY_RM, OPT_BUCKETS_LIST, + OPT_BUCKETS_LIMIT_CHECK, OPT_BUCKET_LINK, OPT_BUCKET_UNLINK, OPT_BUCKET_STATS, @@ -345,10 +349,7 @@ OPT_ZONEGROUP_PLACEMENT_RM, OPT_ZONEGROUP_PLACEMENT_LIST, OPT_ZONEGROUP_PLACEMENT_DEFAULT, - OPT_ZONEGROUPMAP_GET, - OPT_ZONEGROUPMAP_SET, - OPT_ZONEGROUPMAP_UPDATE, - OPT_ZONE_CREATE, + OPT_ZONE_CREATE, OPT_ZONE_DELETE, OPT_ZONE_GET, OPT_ZONE_MODIFY, @@ -440,9 +441,6 @@ strcmp(cmd, "pools") == 0 || strcmp(cmd, "quota") == 0 || strcmp(cmd, "realm") == 0 || - strcmp(cmd, "region") == 0 || - strcmp(cmd, "region-map") == 0 || - strcmp(cmd, "regionmap") == 0 || strcmp(cmd, "replicalog") == 0 || strcmp(cmd, "subuser") == 0 || strcmp(cmd, "sync") == 0 || @@ -450,9 +448,7 @@ strcmp(cmd, "user") == 0 || strcmp(cmd, "zone") == 0 || strcmp(cmd, "zonegroup") == 0 || - strcmp(cmd, "zonegroups") == 0 || - strcmp(cmd, "zonegroup-map") == 0 || - strcmp(cmd, "zonegroupmap") == 0 ) + strcmp(cmd, "zonegroups") == 0) { *need_more = true; return 0; @@ -496,6 +492,10 @@ } else if (strcmp(prev_cmd, "buckets") == 0) { if (strcmp(cmd, "list") == 0) return OPT_BUCKETS_LIST; + if (strcmp(cmd, "limit") == 0) { + *need_more = true; + return 0; + } } else if (strcmp(prev_cmd, "bucket") == 0) { if (strcmp(cmd, "list") == 0) return OPT_BUCKETS_LIST; @@ -517,14 +517,18 @@ *need_more = true; return 0; } - } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "bucket") == 0) && - (strcmp(prev_cmd, "sync") == 0)) { - if (strcmp(cmd, "status") == 0) - return OPT_BUCKET_SYNC_STATUS; - if (strcmp(cmd, "init") == 0) - return OPT_BUCKET_SYNC_INIT; - if (strcmp(cmd, "run") == 0) - return OPT_BUCKET_SYNC_RUN; + } else if (prev_prev_cmd && strcmp(prev_prev_cmd, "bucket") == 0) { + if (strcmp(prev_cmd, "sync") == 0) { + if (strcmp(cmd, "status") == 0) + return OPT_BUCKET_SYNC_STATUS; + if (strcmp(cmd, "init") == 0) + return OPT_BUCKET_SYNC_INIT; + if (strcmp(cmd, "run") == 0) + return OPT_BUCKET_SYNC_RUN; + } else if ((strcmp(prev_cmd, "limit") == 0) && + (strcmp(cmd, "check") == 0)) { + return OPT_BUCKETS_LIMIT_CHECK; + } } else if (strcmp(prev_cmd, "log") == 0) { if (strcmp(cmd, "list") == 0) return OPT_LOG_LIST; @@ -630,8 +634,7 @@ return OPT_ZONEGROUP_PLACEMENT_LIST; if (strcmp(cmd, "default") == 0) return OPT_ZONEGROUP_PLACEMENT_DEFAULT; - } else if (strcmp(prev_cmd, "zonegroup") == 0 || - strcmp(prev_cmd, "region") == 0) { + } else if (strcmp(prev_cmd, "zonegroup") == 0) { if (strcmp(cmd, "add") == 0) return OPT_ZONEGROUP_ADD; if (strcmp(cmd, "create")== 0) @@ -659,20 +662,9 @@ return OPT_QUOTA_ENABLE; if (strcmp(cmd, "disable") == 0) return OPT_QUOTA_DISABLE; - } else if (strcmp(prev_cmd, "zonegroups") == 0 || - strcmp(prev_cmd, "regions") == 0) { + } else if (strcmp(prev_cmd, "zonegroups") == 0) { if (strcmp(cmd, "list") == 0) return OPT_ZONEGROUP_LIST; - } else if (strcmp(prev_cmd, "zonegroup-map") == 0 || - strcmp(prev_cmd, "zonegroupmap") == 0 || - strcmp(prev_cmd, "region-map") == 0 || - strcmp(prev_cmd, "regionmap") == 0) { - if (strcmp(cmd, "get") == 0) - return OPT_ZONEGROUPMAP_GET; - if (strcmp(cmd, "set") == 0) - return OPT_ZONEGROUPMAP_SET; - if (strcmp(cmd, "update") == 0) - return OPT_ZONEGROUPMAP_UPDATE; } else if ((prev_prev_cmd && strcmp(prev_prev_cmd, "zone") == 0) && (strcmp(prev_cmd, "placement") == 0)) { if (strcmp(cmd, "add") == 0) @@ -1420,7 +1412,7 @@ key.id = access; key.key = secret; - list > params; + param_vec_t params; RGWRESTSimpleRequest req(g_ceph_context, url, NULL, ¶ms); bufferlist response; @@ -1831,14 +1823,13 @@ return; } - ret = sync.read_sync_status(); - if (ret < 0) { + rgw_data_sync_status sync_status; + ret = sync.read_sync_status(&sync_status); + if (ret < 0 && ret != -ENOENT) { push_ss(ss, status, tab) << string("failed read sync status: ") + cpp_strerror(-ret); return; } - const rgw_data_sync_status& sync_status = sync.get_sync_status(); - string status_str; switch (sync_status.sync_info.state) { case rgw_data_sync_info::StateInit: @@ -1977,7 +1968,7 @@ list md_status; - if (zone.id == zonegroup.master_zone) { + if (store->is_meta_master()) { md_status.push_back("no sync (zone is master)"); } else { get_md_sync_status(md_status); @@ -2192,7 +2183,8 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); rgw_user user_id; @@ -2284,8 +2276,8 @@ int include_all = false; int sync_stats = false; - int reset_regions = false; int bypass_gc = false; + int warnings_only = false; int inconsistent_index = false; int verbose = false; @@ -2508,12 +2500,12 @@ // do nothing } else if (ceph_argparse_binary_flag(args, i, &include_all, NULL, "--include-all", (char*)NULL)) { // do nothing - } else if (ceph_argparse_binary_flag(args, i, &reset_regions, NULL, "--reset-regions", (char*)NULL)) { - // do nothing } else if (ceph_argparse_binary_flag(args, i, &extra_info, NULL, "--extra-info", (char*)NULL)) { // do nothing } else if (ceph_argparse_binary_flag(args, i, &bypass_gc, NULL, "--bypass-gc", (char*)NULL)) { // do nothing + } else if (ceph_argparse_binary_flag(args, i, &warnings_only, NULL, "--warnings-only", (char*)NULL)) { + // do nothing } else if (ceph_argparse_binary_flag(args, i, &inconsistent_index, NULL, "--inconsistent-index", (char*)NULL)) { // do nothing } else if (ceph_argparse_witharg(args, i, &val, "--caps", (char*)NULL)) { @@ -2711,8 +2703,6 @@ OPT_ZONEGROUP_PLACEMENT_ADD, OPT_ZONEGROUP_PLACEMENT_RM, OPT_ZONEGROUP_PLACEMENT_MODIFY, OPT_ZONEGROUP_PLACEMENT_LIST, OPT_ZONEGROUP_PLACEMENT_DEFAULT, - OPT_ZONEGROUPMAP_GET, OPT_ZONEGROUPMAP_SET, - OPT_ZONEGROUPMAP_UPDATE, OPT_ZONE_CREATE, OPT_ZONE_DELETE, OPT_ZONE_GET, OPT_ZONE_SET, OPT_ZONE_RENAME, OPT_ZONE_LIST, OPT_ZONE_MODIFY, OPT_ZONE_DEFAULT, @@ -2950,7 +2940,7 @@ list realms; ret = store->list_realms(realms); if (ret < 0) { - cerr << "failed to list realmss: " << cpp_strerror(-ret) << std::endl; + cerr << "failed to list realms: " << cpp_strerror(-ret) << std::endl; return -ret; } formatter->open_object_section("realms_list"); @@ -3550,78 +3540,6 @@ formatter->flush(cout); } break; - case OPT_ZONEGROUPMAP_GET: - { - RGWZoneGroupMap zonegroupmap; - - int ret = zonegroupmap.read(g_ceph_context, store); - if (ret < 0 && ret != -ENOENT) { - cerr << "failed to read zonegroupmap info: " << cpp_strerror(ret); - return ret; - } - - encode_json("zonegroup-map", zonegroupmap, formatter); - formatter->flush(cout); - } - break; - case OPT_ZONEGROUPMAP_SET: - { - RGWZoneGroupMap zonegroupmap; - int ret = read_decode_json(infile, zonegroupmap); - if (ret < 0) { - cerr << "ERROR: failed to read map json: " << cpp_strerror(-ret) << std::endl; - return ret; - } - - RGWPeriod period; - ret = period.init(g_ceph_context, store); - if (ret < 0) { - cerr << "ERROR: failed to read current period info: " << cpp_strerror(-ret) << std::endl; - return ret; - } - - period.fork(); - period.update(zonegroupmap); - period.store_info(false); - - encode_json("zonegroup-map", zonegroupmap, formatter); - formatter->flush(cout); - } - break; - case OPT_ZONEGROUPMAP_UPDATE: - { - RGWZoneGroupMap zonegroupmap; - int ret = zonegroupmap.read(g_ceph_context, store); - if (ret < 0 && ret != -ENOENT) { - cerr << "failed to read zonegroup map: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - - if (reset_regions) { - zonegroupmap.zonegroups.clear(); - } - - list realms; - ret = store->list_realms(realms); - if (ret < 0) { - cerr << "failed to list realms: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - - for (list::iterator iter = realms.begin(); iter != realms.end(); ++iter) - { - RGWRealm realm("", *iter); - ret = realm.init(g_ceph_context, store); - if (ret < 0) { - cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl; - return -ret; - } - } - - encode_json("zonegroup-map", zonegroupmap, formatter); - formatter->flush(cout); - } - break; case OPT_ZONE_CREATE: { if (zone_name.empty()) { @@ -3642,7 +3560,7 @@ } } - RGWZoneParams zone(zone_name); + RGWZoneParams zone(zone_id, zone_name); ret = zone.init(g_ceph_context, store, false); if (ret < 0) { cerr << "unable to initialize zone: " << cpp_strerror(-ret) << std::endl; @@ -4405,6 +4323,51 @@ } } + if (opt_cmd == OPT_BUCKETS_LIMIT_CHECK) { + void *handle; + std::list user_ids; + metadata_key = "user"; + int max = 1000; + + bool truncated; + + if (! user_id.empty()) { + user_ids.push_back(user_id.id); + ret = + RGWBucketAdminOp::limit_check(store, bucket_op, user_ids, f, + warnings_only); + } else { + /* list users in groups of max-keys, then perform user-bucket + * limit-check on each group */ + ret = store->meta_mgr->list_keys_init(metadata_key, &handle); + if (ret < 0) { + cerr << "ERROR: buckets limit check can't get user metadata_key: " + << cpp_strerror(-ret) << std::endl; + return -ret; + } + + do { + ret = store->meta_mgr->list_keys_next(handle, max, user_ids, + &truncated); + if (ret < 0 && ret != -ENOENT) { + cerr << "ERROR: buckets limit check lists_keys_next(): " + << cpp_strerror(-ret) << std::endl; + break; + } else { + /* ok, do the limit checks for this group */ + ret = + RGWBucketAdminOp::limit_check(store, bucket_op, user_ids, f, + warnings_only); + if (ret < 0) + break; + } + user_ids.clear(); + } while (truncated); + store->meta_mgr->list_keys_complete(handle); + } + return -ret; + } /* OPT_BUCKETS_LIMIT_CHECK */ + if (opt_cmd == OPT_BUCKETS_LIST) { if (bucket_name.empty()) { RGWBucketAdminOp::info(store, bucket_op, f); @@ -4455,8 +4418,8 @@ formatter->close_section(); formatter->flush(cout); - } - } + } /* have bucket_name */ + } /* OPT_BUCKETS_LIST */ if (opt_cmd == OPT_BUCKET_STATS) { bucket_op.set_fetch_stats(true); @@ -5735,14 +5698,13 @@ return -ret; } - ret = sync.read_sync_status(); - if (ret < 0) { + rgw_data_sync_status sync_status; + ret = sync.read_sync_status(&sync_status); + if (ret < 0 && ret != -ENOENT) { cerr << "ERROR: sync.read_sync_status() returned ret=" << ret << std::endl; return -ret; } - rgw_data_sync_status& sync_status = sync.get_sync_status(); - formatter->open_object_section("summary"); encode_json("sync_status", sync_status, formatter); diff -Nru ceph-10.2.7/src/rgw/rgw_bucket.cc ceph-10.2.9/src/rgw/rgw_bucket.cc --- ceph-10.2.7/src/rgw/rgw_bucket.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_bucket.cc 2017-07-13 13:05:37.000000000 +0000 @@ -5,8 +5,10 @@ #include #include +#include #include +#include #include "common/errno.h" #include "common/ceph_json.h" @@ -1413,6 +1415,126 @@ return 0; } +int RGWBucketAdminOp::limit_check(RGWRados *store, + RGWBucketAdminOpState& op_state, + const std::list& user_ids, + RGWFormatterFlusher& flusher, + bool warnings_only) +{ + int ret = 0; + const size_t max_entries = + store->ctx()->_conf->rgw_list_buckets_max_chunk; + + const size_t safe_max_objs_per_shard = + store->ctx()->_conf->rgw_safe_max_objects_per_shard; + + uint16_t shard_warn_pct = + store->ctx()->_conf->rgw_shard_warning_threshold; + if (shard_warn_pct > 100) + shard_warn_pct = 90; + + Formatter *formatter = flusher.get_formatter(); + flusher.start(0); + + formatter->open_array_section("users"); + + for (const auto& user_id : user_ids) { + formatter->open_object_section("user"); + formatter->dump_string("user_id", user_id); + bool done; + formatter->open_array_section("buckets"); + do { + RGWUserBuckets buckets; + string marker; + bool is_truncated; + + ret = rgw_read_user_buckets(store, user_id, buckets, + marker, string(), max_entries, false, + &is_truncated); + if (ret < 0) + return ret; + + map& m_buckets = buckets.get_buckets(); + + for (auto& iter : m_buckets) { + auto& bucket = iter.second.bucket; + uint32_t num_shards = 1; + uint64_t num_objects = 0; + + /* need info for num_shards */ + RGWBucketInfo info; + RGWObjectCtx obj_ctx(store); + + marker = bucket.name; /* Casey's location for marker update, + * as we may now not reach the end of + * the loop body */ + + ret = store->get_bucket_info(obj_ctx, bucket.tenant, bucket.name, + info, nullptr); + if (ret < 0) + continue; + + /* need stats for num_entries */ + string bucket_ver, master_ver; + std::map stats; + ret = store->get_bucket_stats(bucket, RGW_NO_SHARD, &bucket_ver, + &master_ver, stats, nullptr); + + if (ret < 0) + continue; + + for (const auto& s : stats) { + num_objects += s.second.num_objects; + } + + num_shards = info.num_shards; + uint64_t objs_per_shard = num_objects / num_shards; + { + bool warn = false; + stringstream ss; + if (objs_per_shard > safe_max_objs_per_shard) { + double over = + 100 - (safe_max_objs_per_shard/objs_per_shard * 100); + ss << boost::format("OVER %4f%%") % over; + warn = true; + } else { + double fill_pct = + objs_per_shard / safe_max_objs_per_shard * 100; + if (fill_pct >= shard_warn_pct) { + ss << boost::format("WARN %4f%%") % fill_pct; + warn = true; + } else { + ss << "OK"; + } + } + + if (warn || (! warnings_only)) { + formatter->open_object_section("bucket"); + formatter->dump_string("bucket", bucket.name); + formatter->dump_string("tenant", bucket.tenant); + formatter->dump_int("num_objects", num_objects); + formatter->dump_int("num_shards", num_shards); + formatter->dump_int("objects_per_shard", objs_per_shard); + formatter->dump_string("fill_status", ss.str()); + formatter->close_section(); + } + } + } + + done = (m_buckets.size() < max_entries); + } while (!done); /* foreach: bucket */ + + formatter->close_section(); + formatter->close_section(); + formatter->flush(cout); + + } /* foreach: user_id */ + + formatter->close_section(); + formatter->flush(cout); + + return ret; +} /* RGWBucketAdminOp::limit_check */ int RGWBucketAdminOp::info(RGWRados *store, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher) @@ -1433,7 +1555,7 @@ CephContext *cct = store->ctx(); - size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; + const size_t max_entries = cct->_conf->rgw_list_buckets_max_chunk; bool show_stats = op_state.will_fetch_stats(); rgw_user user_id = op_state.get_user_id(); diff -Nru ceph-10.2.7/src/rgw/rgw_bucket.h ceph-10.2.9/src/rgw/rgw_bucket.h --- ceph-10.2.7/src/rgw/rgw_bucket.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_bucket.h 2017-07-13 13:05:37.000000000 +0000 @@ -314,6 +314,10 @@ static int remove_bucket(RGWRados *store, RGWBucketAdminOpState& op_state, bool bypass_gc = false, bool keep_index_consistent = true); static int remove_object(RGWRados *store, RGWBucketAdminOpState& op_state); static int info(RGWRados *store, RGWBucketAdminOpState& op_state, RGWFormatterFlusher& flusher); + static int limit_check(RGWRados *store, RGWBucketAdminOpState& op_state, + const std::list& user_ids, + RGWFormatterFlusher& flusher, + bool warnings_only = false); }; diff -Nru ceph-10.2.7/src/rgw/rgw_common.h ceph-10.2.9/src/rgw/rgw_common.h --- ceph-10.2.7/src/rgw/rgw_common.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_common.h 2017-07-13 13:05:37.000000000 +0000 @@ -428,7 +428,8 @@ RGW_OP_GET_INFO, /* rgw specific */ - RGW_OP_ADMIN_SET_METADATA + RGW_OP_ADMIN_SET_METADATA, + RGW_OP_GET_OBJ_LAYOUT, }; class RGWAccessControlPolicy; diff -Nru ceph-10.2.7/src/rgw/rgw_cr_rest.h ceph-10.2.9/src/rgw/rgw_cr_rest.h --- ceph-10.2.7/src/rgw/rgw_cr_rest.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_cr_rest.h 2017-07-13 13:05:37.000000000 +0000 @@ -12,7 +12,7 @@ RGWRESTConn *conn; RGWHTTPManager *http_manager; string path; - param_list_t params; + param_vec_t params; T *result; boost::intrusive_ptr http_op; @@ -72,7 +72,7 @@ RGWRESTConn *conn; RGWHTTPManager *http_manager; string path; - param_list_t params; + param_vec_t params; T *result; S input; diff -Nru ceph-10.2.7/src/rgw/rgw_data_sync.cc ceph-10.2.9/src/rgw/rgw_data_sync.cc --- ceph-10.2.7/src/rgw/rgw_data_sync.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_data_sync.cc 2017-07-13 13:05:37.000000000 +0000 @@ -367,6 +367,7 @@ int request_complete() { int ret = http_op->wait(result); + http_op->put(); if (ret < 0 && ret != -ENOENT) { ldout(sync_env->store->ctx(), 0) << "ERROR: failed to list remote datalog shard, ret=" << ret << dendl; return ret; @@ -593,16 +594,35 @@ int RGWRemoteDataLog::read_sync_status(rgw_data_sync_status *sync_status) { - int r = run(new RGWReadDataSyncStatusCoroutine(&sync_env, sync_status)); - if (r == -ENOENT) { - r = 0; + // cannot run concurrently with run_sync(), so run in a separate manager + RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry()); + RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); + int ret = http_manager.set_threaded(); + if (ret < 0) { + ldout(store->ctx(), 0) << "failed in http_manager.set_threaded() ret=" << ret << dendl; + return ret; } - return r; + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + ret = crs.run(new RGWReadDataSyncStatusCoroutine(&sync_env_local, sync_status)); + http_manager.stop(); + return ret; } int RGWRemoteDataLog::init_sync_status(int num_shards) { - return run(new RGWInitDataSyncStatusCoroutine(&sync_env, num_shards)); + RGWCoroutinesManager crs(store->ctx(), store->get_cr_registry()); + RGWHTTPManager http_manager(store->ctx(), crs.get_completion_mgr()); + int ret = http_manager.set_threaded(); + if (ret < 0) { + ldout(store->ctx(), 0) << "failed in http_manager.set_threaded() ret=" << ret << dendl; + return ret; + } + RGWDataSyncEnv sync_env_local = sync_env; + sync_env_local.http_manager = &http_manager; + ret = crs.run(new RGWInitDataSyncStatusCoroutine(&sync_env_local, num_shards)); + http_manager.stop(); + return ret; } static string full_data_sync_index_shard_oid(const string& source_zone, int shard_id) @@ -1483,20 +1503,14 @@ data_sync_cr->wakeup(shard_id, keys); } -int RGWRemoteDataLog::run_sync(int num_shards, rgw_data_sync_status& sync_status) +int RGWRemoteDataLog::run_sync(int num_shards) { - int r = run(new RGWReadDataSyncStatusCoroutine(&sync_env, &sync_status)); - if (r < 0 && r != -ENOENT) { - ldout(store->ctx(), 0) << "ERROR: failed to read sync status from source_zone=" << sync_env.source_zone << " r=" << r << dendl; - return r; - } - lock.get_write(); data_sync_cr = new RGWDataSyncControlCR(&sync_env, num_shards); data_sync_cr->get(); // run() will drop a ref, so take another lock.unlock(); - r = run(data_sync_cr); + int r = run(data_sync_cr); lock.get_write(); data_sync_cr->put(); diff -Nru ceph-10.2.7/src/rgw/rgw_data_sync.h ceph-10.2.9/src/rgw/rgw_data_sync.h --- ceph-10.2.7/src/rgw/rgw_data_sync.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_data_sync.h 2017-07-13 13:05:37.000000000 +0000 @@ -212,7 +212,7 @@ int get_shard_info(int shard_id); int read_sync_status(rgw_data_sync_status *sync_status); int init_sync_status(int num_shards); - int run_sync(int num_shards, rgw_data_sync_status& sync_status); + int run_sync(int num_shards); void wakeup(int shard_id, set& keys); }; @@ -231,7 +231,6 @@ string source_shard_status_oid_prefix; rgw_obj source_status_obj; - rgw_data_sync_status sync_status; map shard_objs; int num_shards; @@ -247,12 +246,12 @@ int init(); void finalize(); - rgw_data_sync_status& get_sync_status() { return sync_status; } - static string shard_obj_name(const string& source_zone, int shard_id); static string sync_status_oid(const string& source_zone); - int read_sync_status() { return source_log.read_sync_status(&sync_status); } + int read_sync_status(rgw_data_sync_status *sync_status) { + return source_log.read_sync_status(sync_status); + } int init_sync_status() { return source_log.init_sync_status(num_shards); } int read_log_info(rgw_datalog_info *log_info) { @@ -265,7 +264,7 @@ return source_log.read_source_log_shards_next(shard_markers, result); } - int run() { return source_log.run_sync(num_shards, sync_status); } + int run() { return source_log.run_sync(num_shards); } void wakeup(int shard_id, set& keys) { return source_log.wakeup(shard_id, keys); } void stop() { diff -Nru ceph-10.2.7/src/rgw/rgw_file.cc ceph-10.2.9/src/rgw/rgw_file.cc --- ceph-10.2.7/src/rgw/rgw_file.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_file.cc 2017-07-13 13:05:37.000000000 +0000 @@ -73,6 +73,7 @@ LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent, const char *path, + enum rgw_fh_type type, uint32_t flags) { /* find either-of , , only one of @@ -97,6 +98,10 @@ switch (ix) { case 0: { + /* type hint */ + if (type == RGW_FS_TYPE_DIRECTORY) + continue; + RGWStatObjRequest req(cct, get_user(), parent->bucket_name(), obj_path, RGWStatObjRequest::FLAG_NONE); @@ -122,6 +127,10 @@ case 1: { /* try dir form */ + /* type hint */ + if (type == RGW_FS_TYPE_FILE) + continue; + obj_path += "/"; RGWStatObjRequest req(cct, get_user(), parent->bucket_name(), obj_path, @@ -153,7 +162,8 @@ if ((rc == 0) && (req.get_ret() == 0)) { if (req.matched) { - // we need rgw object's key name equal to file name, if not return NULL + /* we need rgw object's key name equal to file name, if + * not return NULL */ if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) && !req.exact_matched) { lsubdout(get_context(), rgw, 15) @@ -256,8 +266,16 @@ } std::string oname = rgw_fh->relative_object_name(); - if (rgw_fh->is_dir()) + if (rgw_fh->is_dir()) { + /* for the duration of our cache timer, trust positive + * child cache */ + if (rgw_fh->has_children()) { + rgw_fh->mtx.unlock(); + unref(rgw_fh); + return(-ENOTEMPTY); + } oname += "/"; + } RGWDeleteObjRequest req(cct, get_user(), parent->bucket_name(), oname); rc = rgwlib.get_fe()->execute_req(&req); @@ -667,6 +685,15 @@ rele(); } /* RGWLibFS::close */ + inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) { + os << ""; + return os; + } + std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) { os << ">"; + << ";ts=" << ev.ts << ">"; return os; } @@ -696,13 +723,18 @@ uint32_t max_ev = std::max(1, get_context()->_conf->rgw_nfs_max_gc); - struct timespec now; + struct timespec now, expire_ts; event_vector ve; bool stop = false; std::deque &events = state.events; - (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); do { + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); + lsubdout(get_context(), rgw, 15) + << "GC: top of expire loop" + << " now=" << now + << " expire_s=" << expire_s + << dendl; { lock_guard guard(state.mtx); /* LOCKED */ /* just return if no events */ @@ -713,7 +745,9 @@ (events.size() < 500) ? max_ev : (events.size() / 4); for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) { event& ev = events.front(); - if (ev.ts.tv_sec > (now.tv_sec + expire_s)) { + expire_ts = ev.ts; + expire_ts.tv_sec += expire_s; + if (expire_ts > now) { stop = true; break; } @@ -740,12 +774,29 @@ << dendl; goto rele; } - /* clear state */ + /* maybe clear state */ d = get(&rgw_fh->variant_type); if (d) { + struct timespec ev_ts = ev.ts; lock_guard guard(rgw_fh->mtx); - d->clear_state(); - rgw_fh->invalidate(); + struct timespec d_last_readdir = d->last_readdir; + if (unlikely(ev_ts < d_last_readdir)) { + /* readdir cycle in progress, don't invalidate */ + lsubdout(get_context(), rgw, 15) + << "GC: delay expiration for " + << rgw_fh->object_name() + << " ev.ts=" << ev_ts + << " last_readdir=" << d_last_readdir + << dendl; + continue; + } else { + lsubdout(get_context(), rgw, 15) + << "GC: expiring " + << rgw_fh->object_name() + << dendl; + rgw_fh->clear_state(); + rgw_fh->invalidate(); + } } rele: unref(rgw_fh); @@ -830,6 +881,20 @@ return true; } /* RGWFileHandle::reclaim */ + bool RGWFileHandle::has_children() const + { + if (unlikely(! is_dir())) + return false; + + RGWRMdirCheck req(fs->get_context(), fs->get_user(), this); + int rc = rgwlib.get_fe()->execute_req(&req); + if (! rc) { + return req.valid && req.has_children; + } + + return false; + } + int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg, uint64_t *offset, bool *eof, uint32_t flags) { @@ -838,12 +903,23 @@ struct timespec now; CephContext* cct = fs->get_context(); - (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */ - - if (flags & RGW_READDIR_FLAG_DOTDOT) { + if ((*offset == 0) && + (flags & RGW_READDIR_FLAG_DOTDOT)) { /* send '.' and '..' with their NFS-defined offsets */ - rcb(".", cb_arg, 1); - rcb("..", cb_arg, 2); + rcb(".", cb_arg, 1, RGW_LOOKUP_FLAG_DIR); + rcb("..", cb_arg, 2, RGW_LOOKUP_FLAG_DIR); + } + + lsubdout(fs->get_context(), rgw, 15) + << __func__ + << " offset=" << *offset + << dendl; + + directory* d = get(&variant_type); + if (d) { + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */ + lock_guard guard(mtx); + d->last_readdir = now; } if (is_root()) { @@ -851,26 +927,37 @@ offset); rc = rgwlib.get_fe()->execute_req(&req); if (! rc) { + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */ lock_guard guard(mtx); state.atime = now; - set_nlink(2 + 1); + if (*offset == 0) + set_nlink(2); + inc_nlink(req.d_count); *eof = req.eof(); event ev(event::type::READDIR, get_key(), state.atime); fs->state.push_event(ev); } } else { - rgw_obj_key marker{"", ""}; RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset); rc = rgwlib.get_fe()->execute_req(&req); if (! rc) { + (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */ lock_guard guard(mtx); state.atime = now; - set_nlink(2 + 1); + if (*offset == 0) + set_nlink(2); + inc_nlink(req.d_count); *eof = req.eof(); event ev(event::type::READDIR, get_key(), state.atime); fs->state.push_event(ev); } } + + lsubdout(fs->get_context(), rgw, 15) + << __func__ + << " final link count=" << state.nlink + << dendl; + return rc; } /* RGWFileHandle::readdir */ @@ -1022,9 +1109,13 @@ delete write_req; } - void RGWFileHandle::directory::clear_state() + void RGWFileHandle::clear_state() { - marker_cache.clear(); + directory* d = get(&variant_type); + if (d) { + state.nlink = 2; + d->last_marker = rgw_obj_key{}; + } } void RGWFileHandle::invalidate() { @@ -1406,10 +1497,13 @@ } } else { /* lookup in a readdir callback */ + enum rgw_fh_type fh_type = fh_type_of(flags); + uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB) ? RGWFileHandle::FLAG_NONE : RGWFileHandle::FLAG_EXACT_MATCH; - fhr = fs->stat_leaf(parent, path, sl_flags); + + fhr = fs->stat_leaf(parent, path, fh_type, sl_flags); if (! get<0>(fhr)) { if (! (flags & RGW_LOOKUP_FLAG_CREATE)) return -ENOENT; diff -Nru ceph-10.2.7/src/rgw/rgw_file.h ceph-10.2.9/src/rgw/rgw_file.h --- ceph-10.2.7/src/rgw/rgw_file.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_file.h 2017-07-13 13:05:37.000000000 +0000 @@ -61,6 +61,20 @@ class RGWFileHandle; class RGWWriteRequest; + static inline bool operator <(const struct timespec& lhs, + const struct timespec& rhs) { + if (lhs.tv_sec == rhs.tv_sec) + return lhs.tv_nsec < rhs.tv_nsec; + else + return lhs.tv_sec < rhs.tv_sec; + } + + static inline bool operator ==(const struct timespec& lhs, + const struct timespec& rhs) { + return ((lhs.tv_sec == rhs.tv_sec) && + (lhs.tv_nsec == rhs.tv_nsec)); + } + /* * XXX * The current 64-bit, non-cryptographic hash used here is intended @@ -164,7 +178,12 @@ using lock_guard = std::lock_guard; using unique_lock = std::unique_lock; - using marker_cache_t = flat_map; + /* TODO: keeping just the last marker is sufficient for + * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will + * be able to hint the name of the next dirent required, + * from which we can directly synthesize a RADOS marker. + * using marker_cache_t = flat_map; + */ struct State { uint64_t dev; @@ -189,17 +208,16 @@ struct directory { static constexpr uint32_t FLAG_NONE = 0x0000; - static constexpr uint32_t FLAG_CACHED = 0x0001; - static constexpr uint32_t FLAG_OVERFLOW = 0x0002; uint32_t flags; - marker_cache_t marker_cache; - - directory() : flags(FLAG_NONE) {} + rgw_obj_key last_marker; + struct timespec last_readdir; - void clear_state(); + directory() : flags(FLAG_NONE), last_readdir{0,0} {} }; + void clear_state(); + boost::variant variant_type; uint16_t depth; @@ -368,7 +386,7 @@ switch (fh.fh_type) { case RGW_FS_TYPE_DIRECTORY: - st->st_nlink = 3; + st->st_nlink = state.nlink; break; case RGW_FS_TYPE_FILE: st->st_nlink = 1; @@ -392,11 +410,11 @@ const std::string& object_name() const { return name; } - std::string full_object_name(bool omit_bucket = false) { + std::string full_object_name(bool omit_bucket = false) const { std::string path; std::vector segments; int reserve = 0; - RGWFileHandle* tfh = this; + const RGWFileHandle* tfh = this; while (tfh && !tfh->is_root() && !(tfh->is_bucket() && omit_bucket)) { segments.push_back(&tfh->object_name()); reserve += (1 + tfh->object_name().length()); @@ -417,11 +435,11 @@ return path; } - inline std::string relative_object_name() { + inline std::string relative_object_name() const { return full_object_name(true /* omit_bucket */); } - inline std::string format_child_name(const std::string& cbasename) { + inline std::string format_child_name(const std::string& cbasename) const { std::string child_name{relative_object_name()}; if ((child_name.size() > 0) && (child_name.back() != '/')) @@ -430,7 +448,7 @@ return child_name; } - inline std::string make_key_name(const char *name) { + inline std::string make_key_name(const char *name) const { std::string key_name{full_object_name()}; if (key_name.length() > 0) key_name += "/"; @@ -438,7 +456,7 @@ return key_name; } - fh_key make_fhk(const std::string& name) { + fh_key make_fhk(const std::string& name) const { if (depth <= 1) return fh_key(fhk.fh_hk.object, name.c_str()); else { @@ -453,22 +471,21 @@ directory* d = get(&variant_type); if (d) { unique_lock guard(mtx); - d->marker_cache.insert( - marker_cache_t::value_type(off, marker)); + d->last_marker = marker; } } - const rgw_obj_key* find_marker(uint64_t off) { + const rgw_obj_key* find_marker(uint64_t off) const { using std::get; - directory* d = get(&variant_type); - if (d) { - const auto& iter = d->marker_cache.find(off); - if (iter != d->marker_cache.end()) - return &(iter->second); + if (off > 0) { + const directory* d = get(&variant_type); + if (d ) { + return &d->last_marker; + } } return nullptr; } - + bool is_open() const { return flags & FLAG_OPEN; } bool is_root() const { return flags & FLAG_ROOT; } bool is_bucket() const { return flags & FLAG_BUCKET; } @@ -478,6 +495,7 @@ bool creating() const { return flags & FLAG_CREATING; } bool deleted() const { return flags & FLAG_DELETED; } bool stateless_open() const { return flags & FLAG_STATELESS_OPEN; } + bool has_children() const; int open(uint32_t gsh_flags) { lock_guard guard(mtx); @@ -518,6 +536,10 @@ flags &= ~FLAG_CREATING; } + void inc_nlink(const uint64_t n) { + state.nlink += n; + } + void set_nlink(const uint64_t n) { state.nlink = n; } @@ -678,6 +700,22 @@ return static_cast(fh->fh_private); } + static inline enum rgw_fh_type fh_type_of(uint32_t flags) { + enum rgw_fh_type fh_type; + switch(flags & RGW_LOOKUP_TYPE_FLAGS) + { + case RGW_LOOKUP_FLAG_DIR: + fh_type = RGW_FS_TYPE_DIRECTORY; + break; + case RGW_LOOKUP_FLAG_FILE: + fh_type = RGW_FS_TYPE_FILE; + break; + default: + fh_type = RGW_FS_TYPE_NIL; + }; + return fh_type; + } + typedef std::tuple LookupFHResult; typedef std::tuple MkObjResult; @@ -1002,7 +1040,8 @@ const char *path, uint32_t flags); LookupFHResult stat_leaf(RGWFileHandle* parent, const char *path, - uint32_t flags); + enum rgw_fh_type type = RGW_FS_TYPE_NIL, + uint32_t flags = RGWFileHandle::FLAG_NONE); int read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length, size_t* bytes_read, void* buffer, uint32_t flags); @@ -1110,12 +1149,13 @@ void* cb_arg; rgw_readdir_cb rcb; size_t ix; + uint32_t d_count; RGWListBucketsRequest(CephContext* _cct, RGWUserInfo *_user, RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb, void* _cb_arg, uint64_t* _offset) : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset), - cb_arg(_cb_arg), rcb(_rcb), ix(0) { + cb_arg(_cb_arg), rcb(_rcb), ix(0), d_count(0) { const auto& mk = rgw_fh->find_marker(*offset); if (mk) { marker = mk->name; @@ -1170,8 +1210,14 @@ for (const auto& iter : m) { boost::string_ref marker{iter.first}; const RGWBucketEnt& ent = iter.second; - /* call me maybe */ - this->operator()(ent.bucket.name, marker); + if (! this->operator()(ent.bucket.name, marker)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "ListBuckets rcb failed" + << " dirent=" << ent.bucket.name + << " call count=" << ix + << dendl; + return; + } ++ix; } } /* send_response_data */ @@ -1187,8 +1233,8 @@ /* update traversal cache */ rgw_fh->add_marker(off, rgw_obj_key{marker.data(), ""}, RGW_FS_TYPE_DIRECTORY); - rcb(name.data(), cb_arg, off); - return 0; + ++d_count; + return rcb(name.data(), cb_arg, off, RGW_LOOKUP_FLAG_DIR); } bool eof() { @@ -1204,8 +1250,8 @@ read directory content (bucket objects) */ - class RGWReaddirRequest : public RGWLibRequest, - public RGWListBucket /* RGWOp */ +class RGWReaddirRequest : public RGWLibRequest, + public RGWListBucket /* RGWOp */ { public: RGWFileHandle* rgw_fh; @@ -1213,12 +1259,13 @@ void* cb_arg; rgw_readdir_cb rcb; size_t ix; + uint32_t d_count; RGWReaddirRequest(CephContext* _cct, RGWUserInfo *_user, RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb, void* _cb_arg, uint64_t* _offset) : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset), - cb_arg(_cb_arg), rcb(_rcb), ix(0) { + cb_arg(_cb_arg), rcb(_rcb), ix(0), d_count(0) { const auto& mk = rgw_fh->find_marker(*offset); if (mk) { marker = *mk; @@ -1274,8 +1321,11 @@ *offset = off; /* update traversal cache */ rgw_fh->add_marker(off, marker, type); - rcb(name.data(), cb_arg, off); // XXX has to be legit C-style string - return 0; + ++d_count; + return rcb(name.data(), cb_arg, off, + (type == RGW_FS_TYPE_DIRECTORY) ? + RGW_LOOKUP_FLAG_DIR : + RGW_LOOKUP_FLAG_FILE); } virtual int get_params() { @@ -1308,8 +1358,14 @@ << " (" << sref << ")" << "" << dendl; - /* call me maybe */ - this->operator()(sref, next_marker, RGW_FS_TYPE_FILE); + if(! this->operator()(sref, next_marker, RGW_FS_TYPE_FILE)) { + /* caller cannot accept more */ + lsubdout(cct, rgw, 5) << "readdir rcb failed" + << " dirent=" << sref.data() + << " call count=" << ix + << dendl; + return; + } ++ix; } for (auto& iter : common_prefixes) { @@ -1362,6 +1418,89 @@ }; /* RGWReaddirRequest */ /* + dir has-children predicate (bucket objects) +*/ + +class RGWRMdirCheck : public RGWLibRequest, + public RGWListBucket /* RGWOp */ +{ +public: + const RGWFileHandle* rgw_fh; + bool valid; + bool has_children; + + RGWRMdirCheck (CephContext* _cct, RGWUserInfo *_user, + const RGWFileHandle* _rgw_fh) + : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), valid(false), + has_children(false) { + default_max = 2; + op = this; + } + + virtual bool only_bucket() override { return false; } + + virtual int op_init() override { + // assign store, s, and dialect_handler + RGWObjectCtx* rados_ctx + = static_cast(get_state()->obj_ctx); + // framework promises to call op_init after parent init + assert(rados_ctx); + RGWOp::init(rados_ctx->store, get_state(), this); + op = this; // assign self as op: REQUIRED + return 0; + } + + virtual int header_init() override { + struct req_state* s = get_state(); + s->info.method = "GET"; + s->op = OP_GET; + + std::string uri = "/" + rgw_fh->bucket_name() + "/"; + s->relative_uri = uri; + s->info.request_uri = uri; + s->info.effective_uri = uri; + s->info.request_params = ""; + s->info.domain = ""; /* XXX ? */ + + s->user = user; + + prefix = rgw_fh->relative_object_name(); + if (prefix.length() > 0) + prefix += "/"; + delimiter = '/'; + + return 0; + } + + virtual int get_params() override { + max = default_max; + return 0; + } + + virtual void send_response() override { + valid = true; + if ((objs.size() > 1) || + (! objs.empty() && + (objs.front().key.name != prefix))) { + has_children = true; + return; + } + for (auto& iter : common_prefixes) { + /* readdir never produces a name for this case */ + if (iter.first == "/") + continue; + has_children = true; + break; + } + } + + virtual void send_versioned_response() { + send_response(); + } + +}; /* RGWRMdirCheck */ + +/* create bucket */ diff -Nru ceph-10.2.7/src/rgw/rgw_http_client.cc ceph-10.2.9/src/rgw/rgw_http_client.cc --- ceph-10.2.7/src/rgw/rgw_http_client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_http_client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -113,6 +113,7 @@ return ret; } + /* * the following set of callbacks will be called either on RGWHTTPManager::process(), * or via the RGWHTTPManager async processing. @@ -173,11 +174,11 @@ return ret; } -static curl_slist *headers_to_slist(list >& headers) +static curl_slist *headers_to_slist(param_vec_t& headers) { curl_slist *h = NULL; - list >::iterator iter; + param_vec_t::iterator iter; for (iter = headers.begin(); iter != headers.end(); ++iter) { pair& p = *iter; string val = p.first; diff -Nru ceph-10.2.7/src/rgw/rgw_http_client.h ceph-10.2.9/src/rgw/rgw_http_client.h --- ceph-10.2.7/src/rgw/rgw_http_client.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_http_client.h 2017-07-13 13:05:37.000000000 +0000 @@ -9,6 +9,9 @@ #include "include/atomic.h" #include "rgw_common.h" +using param_pair_t = pair; +using param_vec_t = vector; + struct rgw_http_req_data; class RGWHTTPClient @@ -34,7 +37,7 @@ protected: CephContext *cct; - list > headers; + param_vec_t headers; int init_request(const char *method, const char *url, rgw_http_req_data *req_data); public: diff -Nru ceph-10.2.7/src/rgw/rgw_lib.h ceph-10.2.9/src/rgw/rgw_lib.h --- ceph-10.2.7/src/rgw/rgw_lib.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_lib.h 2017-07-13 13:05:37.000000000 +0000 @@ -5,6 +5,7 @@ #include #include "include/unordered_map.h" +#include "global/global_init.h" #include "rgw_common.h" #include "rgw_client_io.h" #include "rgw_rest.h" @@ -29,6 +30,7 @@ rgw::LDAPHelper* ldh; RGWREST rest; // XXX needed for RGWProcessEnv RGWRados* store; + boost::intrusive_ptr cct; public: RGWLib() : fec(nullptr), fe(nullptr), olog(nullptr), store(nullptr) diff -Nru ceph-10.2.7/src/rgw/rgw_main.cc ceph-10.2.9/src/rgw/rgw_main.cc --- ceph-10.2.7/src/rgw/rgw_main.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_main.cc 2017-07-13 13:05:37.000000000 +0000 @@ -178,8 +178,6 @@ return mgr; } -void intrusive_ptr_add_ref(CephContext* cct) { cct->get(); } -void intrusive_ptr_release(CephContext* cct) { cct->put(); } /* * start up the RADOS connection and then handle HTTP messages as they come in @@ -249,8 +247,10 @@ // Now that we've determined which frontend(s) to use, continue with global // initialization. Passing false as the final argument ensures that // global_pre_init() is not invoked twice. - global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, - flags, "rgw_data", false); + // claim the reference and release it after subsequent destructors have fired + auto cct = global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + flags, "rgw_data", false); for (std::vector::iterator i = args.begin(); i != args.end(); ++i) { if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { @@ -290,9 +290,6 @@ common_init_finish(g_ceph_context); - // claim the reference and release it after subsequent destructors have fired - boost::intrusive_ptr cct(g_ceph_context, false); - rgw_tools_init(g_ceph_context); rgw_init_resolver(); diff -Nru ceph-10.2.7/src/rgw/rgw_object_expirer.cc ceph-10.2.9/src/rgw/rgw_object_expirer.cc --- ceph-10.2.7/src/rgw/rgw_object_expirer.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_object_expirer.cc 2017-07-13 13:05:37.000000000 +0000 @@ -60,8 +60,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS, "rgw_data"); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS, "rgw_data"); for (std::vector::iterator i = args.begin(); i != args.end(); ) { if (ceph_argparse_double_dash(args, i)) { diff -Nru ceph-10.2.7/src/rgw/rgw_op.cc ceph-10.2.9/src/rgw/rgw_op.cc --- ceph-10.2.7/src/rgw/rgw_op.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_op.cc 2017-07-13 13:05:37.000000000 +0000 @@ -411,7 +411,8 @@ } s->object_acl = new RGWAccessControlPolicy(s->cct); - rgw_obj obj(s->bucket, s->object); + rgw_obj obj(s->bucket, s->object.name); + obj.set_instance(s->object.instance); store->set_atomic(s->obj_ctx, obj); if (prefetch_data) { @@ -723,7 +724,9 @@ if (op_ret < 0) return op_ret; - if (!verify_object_permission(s, bucket_policy, &obj_policy, RGW_PERM_READ)) { + if (s->system_request) { + ldout(s->cct, 2) << "overriding permissions due to system operation" << dendl; + } else if (!verify_object_permission(s, bucket_policy, &obj_policy, RGW_PERM_READ)) { return -EPERM; } @@ -1274,7 +1277,7 @@ return; } attr_iter = attrs.find(RGW_ATTR_SLO_MANIFEST); - if (attr_iter != attrs.end()) { + if (attr_iter != attrs.end() && !skip_manifest) { is_slo = true; op_ret = handle_slo_manifest(attr_iter->second); if (op_ret < 0) { @@ -4494,9 +4497,10 @@ void RGWGetHealthCheck::execute() { - if (! g_conf->rgw_healthcheck_disabling_path.empty() && - ::access(g_conf->rgw_healthcheck_disabling_path.c_str(), F_OK )) { - op_ret = -ERR_SERVICE_UNAVAILABLE; + if (!g_conf->rgw_healthcheck_disabling_path.empty() && + (::access(g_conf->rgw_healthcheck_disabling_path.c_str(), F_OK) == 0)) { + /* Disabling path specified & existent in the filesystem. */ + op_ret = -ERR_SERVICE_UNAVAILABLE; /* 503 */ } else { op_ret = 0; /* 200 OK */ } @@ -4814,7 +4818,8 @@ if (op_ret < 0) return; - rgw_obj obj(s->bucket, s->object); + rgw_obj obj(s->bucket, s->object.name); + obj.set_instance(s->object.instance); store->set_atomic(s->obj_ctx, obj); @@ -4829,6 +4834,29 @@ } } +void RGWGetObjLayout::pre_exec() +{ + rgw_bucket_object_pre_exec(s); +} + +void RGWGetObjLayout::execute() +{ + rgw_obj obj(s->bucket, s->object.name); + obj.set_instance(s->object.instance); + target = new RGWRados::Object(store, s->bucket_info, *static_cast(s->obj_ctx), obj); + RGWRados::Object::Read stat_op(target); + + op_ret = stat_op.prepare(NULL, NULL); + if (op_ret < 0) { + return; + } + + head_obj = stat_op.state.obj; + + op_ret = target->get_manifest(&manifest); +} + + RGWHandler::~RGWHandler() { } diff -Nru ceph-10.2.7/src/rgw/rgw_op.h ceph-10.2.9/src/rgw/rgw_op.h --- ceph-10.2.7/src/rgw/rgw_op.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_op.h 2017-07-13 13:05:37.000000000 +0000 @@ -1615,4 +1615,32 @@ virtual uint32_t op_mask() { return RGW_OP_TYPE_WRITE; } }; +class RGWGetObjLayout : public RGWOp { +protected: + RGWRados::Object *target{nullptr}; + RGWObjManifest *manifest{nullptr}; + rgw_obj head_obj; + +public: + RGWGetObjLayout() { + delete target; + } + + int check_caps(RGWUserCaps& caps) { + return caps.check_cap("admin", RGW_CAP_READ); + } + int verify_permission() { + return check_caps(s->user->caps); + } + void pre_exec(); + void execute(); + + virtual void send_response() = 0; + virtual const string name() { return "get_obj_layout"; } + virtual RGWOpType get_type() { return RGW_OP_GET_OBJ_LAYOUT; } + virtual uint32_t op_mask() { return RGW_OP_TYPE_READ; } +}; + + + #endif /* CEPH_RGW_OP_H */ diff -Nru ceph-10.2.7/src/rgw/rgw_rados.cc ceph-10.2.9/src/rgw/rgw_rados.cc --- ceph-10.2.7/src/rgw/rgw_rados.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rados.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1247,7 +1247,7 @@ { ldout(cct, 20) << __func__ << " realm " << realm_id << " period " << id << dendl; for (std::map::const_iterator iter = map.zonegroups.begin(); - iter != map.zonegroups.end(); iter++) { + iter != map.zonegroups.end(); ++iter) { period_map.zonegroups_by_api[iter->second.api_name] = iter->second; period_map.zonegroups[iter->second.get_name()] = iter->second; } @@ -3243,9 +3243,9 @@ { RGWZoneGroupMap zonegroupmap; - string pool_name = cct->_conf->rgw_region_root_pool; + string pool_name = cct->_conf->rgw_zone_root_pool; if (pool_name.empty()) { - pool_name = RGW_DEFAULT_ZONEGROUP_ROOT_POOL; + pool_name = RGW_DEFAULT_ZONE_ROOT_POOL; } string oid = region_map_oid; @@ -3333,23 +3333,23 @@ int ret = rgw_get_system_obj(this, obj_ctx, pool ,oid, bl, NULL, NULL); if (ret < 0 && ret != -ENOENT) { - ldout(cct, 0) << "failed to read converted: ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to read converted: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } else if (ret != -ENOENT) { - ldout(cct, 0) << "System already converted " << dendl; + ldout(cct, 20) << "System already converted " << dendl; return 0; } string default_region; ret = default_zonegroup.init(cct, this, false, true); if (ret < 0) { - ldout(cct, 0) << "failed init default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed init default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } ret = default_zonegroup.read_default_id(default_region, true); if (ret < 0 && ret != -ENOENT) { - ldout(cct, 0) << "failed reading old default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed reading old default region: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } @@ -3357,7 +3357,7 @@ list regions; ret = list_regions(regions); if (ret < 0 && ret != -ENOENT) { - ldout(cct, 0) << "failed to list regions: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to list regions: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } else if (ret == -ENOENT || regions.empty()) { RGWZoneParams zoneparams(default_zone_name); @@ -3386,7 +3386,7 @@ RGWZoneGroup region(*iter); int ret = region.init(cct, this, true, true); if (ret < 0) { - ldout(cct, 0) << "failed init region "<< *iter << ": " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed init region "<< *iter << ": " << cpp_strerror(-ret) << dendl; return ret; } if (region.is_master) { @@ -3411,27 +3411,27 @@ RGWRealm new_realm(new_realm_id,new_realm_name); ret = new_realm.init(cct, this, false); if (ret < 0) { - ldout(cct, 0) << "Error initing new realm: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " Error initing new realm: " << cpp_strerror(-ret) << dendl; return ret; } ret = new_realm.create(); if (ret < 0 && ret != -EEXIST) { - ldout(cct, 0) << "Error creating new realm: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " Error creating new realm: " << cpp_strerror(-ret) << dendl; return ret; } ret = new_realm.set_as_default(); if (ret < 0) { - ldout(cct, 0) << "Error setting realm as default: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " Error setting realm as default: " << cpp_strerror(-ret) << dendl; return ret; } ret = realm.init(cct, this); if (ret < 0) { - ldout(cct, 0) << "Error initing realm: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " Error initing realm: " << cpp_strerror(-ret) << dendl; return ret; } ret = current_period.init(cct, this, realm.get_id(), realm.get_name()); if (ret < 0) { - ldout(cct, 0) << "Error initing current period: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " Error initing current period: " << cpp_strerror(-ret) << dendl; return ret; } } @@ -3440,12 +3440,12 @@ /* create zonegroups */ for (iter = regions.begin(); iter != regions.end(); ++iter) { - ldout(cct, 0) << "Converting " << *iter << dendl; + ldout(cct, 0) << __func__ << "Converting " << *iter << dendl; /* check to see if we don't have already a zonegroup with this name */ RGWZoneGroup new_zonegroup(*iter); ret = new_zonegroup.init(cct , this); if (ret == 0 && new_zonegroup.get_id() != *iter) { - ldout(cct, 0) << "zonegroup "<< *iter << " already exists id " << new_zonegroup.get_id () << + ldout(cct, 0) << __func__ << " zonegroup "<< *iter << " already exists id " << new_zonegroup.get_id () << " skipping conversion " << dendl; continue; } @@ -3453,55 +3453,58 @@ zonegroup.set_id(*iter); int ret = zonegroup.init(cct, this, true, true); if (ret < 0) { - ldout(cct, 0) << "failed init zonegroup: ret "<< ret << " " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed init zonegroup: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } zonegroup.realm_id = realm.get_id(); /* fix default region master zone */ if (*iter == default_zonegroup_name && zonegroup.master_zone.empty()) { - ldout(cct, 0) << "Setting default zone as master for default region" << dendl; + ldout(cct, 0) << __func__ << " Setting default zone as master for default region" << dendl; zonegroup.master_zone = default_zone_name; } ret = zonegroup.update(); if (ret < 0 && ret != -EEXIST) { - ldout(cct, 0) << "failed to update zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to update zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } ret = zonegroup.update_name(); if (ret < 0 && ret != -EEXIST) { - ldout(cct, 0) << "failed to update_name for zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to update_name for zonegroup " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } if (zonegroup.get_name() == default_region) { ret = zonegroup.set_as_default(); if (ret < 0) { - ldout(cct, 0) << "failed to set_as_default " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to set_as_default " << *iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } } for (map::const_iterator iter = zonegroup.zones.begin(); iter != zonegroup.zones.end(); - iter ++) { - ldout(cct, 0) << "Converting zone" << iter->first << dendl; + ++iter) { + ldout(cct, 0) << __func__ << " Converting zone" << iter->first << dendl; RGWZoneParams zoneparams(iter->first, iter->first); zoneparams.set_id(iter->first); zoneparams.realm_id = realm.get_id(); ret = zoneparams.init(cct, this); - if (ret < 0) { - ldout(cct, 0) << "failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + if (ret < 0 && ret != -ENOENT) { + ldout(cct, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; return ret; + } else if (ret == -ENOENT) { + ldout(cct, 0) << __func__ << " zone is part of another cluster " << iter->first << " skipping " << dendl; + continue; } zonegroup.realm_id = realm.get_id(); ret = zoneparams.update(); if (ret < 0 && ret != -EEXIST) { - ldout(cct, 0) << "failed to update zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to update zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; return ret; } ret = zoneparams.update_name(); if (ret < 0 && ret != -EEXIST) { - ldout(cct, 0) << "failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to init zoneparams " << iter->first << ": " << cpp_strerror(-ret) << dendl; return ret; } } @@ -3509,7 +3512,7 @@ if (!current_period.get_id().empty()) { ret = current_period.add_zonegroup(zonegroup); if (ret < 0) { - ldout(cct, 0) << "failed to add zonegroup to current_period: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to add zonegroup to current_period: " << cpp_strerror(-ret) << dendl; return ret; } } @@ -3518,17 +3521,17 @@ if (!current_period.get_id().empty()) { ret = current_period.update(); if (ret < 0) { - ldout(cct, 0) << "failed to update new period: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to update new period: " << cpp_strerror(-ret) << dendl; return ret; } ret = current_period.store_info(false); if (ret < 0) { - ldout(cct, 0) << "failed to store new period: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to store new period: " << cpp_strerror(-ret) << dendl; return ret; } ret = current_period.reflect(); if (ret < 0) { - ldout(cct, 0) << "failed to update local objects: " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed to update local objects: " << cpp_strerror(-ret) << dendl; return ret; } } @@ -3537,12 +3540,12 @@ RGWZoneGroup zonegroup(iter); int ret = zonegroup.init(cct, this, true, true); if (ret < 0) { - ldout(cct, 0) << "failed init zonegroup" << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; + ldout(cct, 0) << __func__ << " failed init zonegroup" << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } ret = zonegroup.delete_obj(true); if (ret < 0 && ret != -ENOENT) { - ldout(cct, 0) << "failed to delete region " << iter << ": ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to delete region " << iter << ": ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } @@ -3552,7 +3555,7 @@ ret = rgw_put_system_obj(this, pool, oid, bl.c_str(), bl.length(), true, NULL, real_time(), NULL); if (ret < 0 ) { - ldout(cct, 0) << "failed to mark cluster as converted: ret "<< ret << " " << cpp_strerror(-ret) + ldout(cct, 0) << __func__ << " failed to mark cluster as converted: ret "<< ret << " " << cpp_strerror(-ret) << dendl; return ret; } @@ -3858,9 +3861,9 @@ meta_mgr->init_oldest_log_period(); } - /* not point of running sync thread if there is a single zone or - we don't have a master zone configured or there is no rest_master_conn */ - if (get_zonegroup().zones.size() < 2 || get_zonegroup().master_zone.empty() || !rest_master_conn) { + /* no point of running sync thread if we don't have a master zone configured + or there is no rest_master_conn */ + if (get_zonegroup().master_zone.empty() || !rest_master_conn) { run_sync_thread = false; } @@ -6759,9 +6762,8 @@ RGWPutObjProcessor_Atomic processor(obj_ctx, dest_bucket_info, dest_obj.bucket, dest_obj.get_orig_obj(), cct->_conf->rgw_obj_stripe_size, tag, dest_bucket_info.versioning_enabled()); - const string& instance = dest_obj.get_instance(); - if (instance != "null") { - processor.set_version_id(dest_obj.get_instance()); + if (version_id && *version_id != "null") { + processor.set_version_id(*version_id); } processor.set_olh_epoch(olh_epoch); int ret = processor.prepare(this, NULL); @@ -6832,7 +6834,8 @@ ret = conn->get_obj(user_id, info, src_obj, pmod, unmod_ptr, dest_mtime_weight.zone_short_id, dest_mtime_weight.pg_ver, - true, &cb, &in_stream_req); + true /* prepend_meta */, true /* sync manifest */, + &cb, &in_stream_req); if (ret < 0) { goto set_err_state; } @@ -7996,6 +7999,11 @@ int RGWRados::delete_system_obj(rgw_obj& obj, RGWObjVersionTracker *objv_tracker) { + if (obj.get_object().empty()) { + ldout(cct, 1) << "delete_system_obj got empty object name " + << obj << ", returning EINVAL" << dendl; + return -EINVAL; + } rgw_rados_ref ref; rgw_bucket bucket; int r = get_obj_ref(obj, &ref, &bucket); @@ -8286,6 +8294,19 @@ return ret; } +int RGWRados::Object::get_manifest(RGWObjManifest **pmanifest) +{ + RGWObjState *astate; + int r = get_state(&astate, true); + if (r < 0) { + return r; + } + + *pmanifest = &astate->manifest; + + return 0; +} + int RGWRados::Object::Read::get_attr(const char *name, bufferlist& dest) { RGWObjState *state; diff -Nru ceph-10.2.7/src/rgw/rgw_rados.h ceph-10.2.9/src/rgw/rgw_rados.h --- ceph-10.2.7/src/rgw/rgw_rados.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rados.h 2017-07-13 13:05:37.000000000 +0000 @@ -2223,6 +2223,7 @@ rgw_obj& get_obj() { return obj; } RGWObjectCtx& get_ctx() { return ctx; } RGWBucketInfo& get_bucket_info() { return bucket_info; } + int get_manifest(RGWObjManifest **pmanifest); int get_bucket_shard(BucketShard **pbs) { if (!bs_initialized) { diff -Nru ceph-10.2.7/src/rgw/rgw_rest_client.cc ceph-10.2.9/src/rgw/rgw_rest_client.cc --- ceph-10.2.7/src/rgw/rgw_rest_client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -200,7 +200,7 @@ for (miter = extra_args.begin(); miter != extra_args.end(); ++miter) { append_param(dest, miter->first, miter->second); } - list >::iterator iter; + param_vec_t::iterator iter; for (iter = params.begin(); iter != params.end(); ++iter) { append_param(dest, iter->first, iter->second); } @@ -631,7 +631,7 @@ get_params_str(args, params_str); /* merge params with extra args so that we can sign correctly */ - for (list >::iterator iter = params.begin(); iter != params.end(); ++iter) { + for (param_vec_t::iterator iter = params.begin(); iter != params.end(); ++iter) { new_info.args.append(iter->first, iter->second); } diff -Nru ceph-10.2.7/src/rgw/rgw_rest_client.h ceph-10.2.9/src/rgw/rgw_rest_client.h --- ceph-10.2.7/src/rgw/rgw_rest_client.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_client.h 2017-07-13 13:05:37.000000000 +0000 @@ -4,8 +4,6 @@ #ifndef CEPH_RGW_REST_CLIENT_H #define CEPH_RGW_REST_CLIENT_H -#include - #include "rgw_http_client.h" class RGWGetDataCB; @@ -18,7 +16,7 @@ string url; map out_headers; - list > params; + param_vec_t params; bufferlist::iterator *send_iter; @@ -31,20 +29,20 @@ int sign_request(RGWAccessKey& key, RGWEnv& env, req_info& info); public: - RGWRESTSimpleRequest(CephContext *_cct, const string& _url, list > *_headers, - list > *_params) : RGWHTTPClient(_cct), http_status(0), status(0), + RGWRESTSimpleRequest(CephContext *_cct, const string& _url, param_vec_t *_headers, + param_vec_t *_params) : RGWHTTPClient(_cct), http_status(0), status(0), url(_url), send_iter(NULL), max_response(0) { set_headers(_headers); set_params(_params); } - void set_headers(list > *_headers) { + void set_headers(param_vec_t *_headers) { if (_headers) headers = *_headers; } - void set_params(list > *_params) { + void set_params(param_vec_t *_params) { if (_params) params = *_params; } @@ -74,8 +72,8 @@ int add_output_data(bufferlist& bl); int send_data(void *ptr, size_t len); - RGWRESTStreamWriteRequest(CephContext *_cct, const string& _url, list > *_headers, - list > *_params) : RGWRESTSimpleRequest(_cct, _url, _headers, _params), + RGWRESTStreamWriteRequest(CephContext *_cct, const string& _url, param_vec_t *_headers, + param_vec_t *_params) : RGWRESTSimpleRequest(_cct, _url, _headers, _params), lock("RGWRESTStreamWriteRequest"), cb(NULL), http_manager(_cct) {} ~RGWRESTStreamWriteRequest(); int put_obj_init(RGWAccessKey& key, rgw_obj& obj, uint64_t obj_size, map& attrs); @@ -101,8 +99,7 @@ int receive_data(void *ptr, size_t len); RGWRESTStreamRWRequest(CephContext *_cct, const char *_method, const string& _url, RGWGetDataCB *_cb, - list > *_headers, - list > *_params) : RGWRESTSimpleRequest(_cct, _url, _headers, _params), + param_vec_t *_headers, param_vec_t *_params) : RGWRESTSimpleRequest(_cct, _url, _headers, _params), lock("RGWRESTStreamReadRequest"), cb(_cb), chunk_ofs(0), ofs(0), http_manager(_cct), method(_method), write_ofs(0) { } @@ -120,8 +117,8 @@ class RGWRESTStreamReadRequest : public RGWRESTStreamRWRequest { public: - RGWRESTStreamReadRequest(CephContext *_cct, const string& _url, RGWGetDataCB *_cb, list > *_headers, - list > *_params) : RGWRESTStreamRWRequest(_cct, "GET", _url, _cb, _headers, _params) {} + RGWRESTStreamReadRequest(CephContext *_cct, const string& _url, RGWGetDataCB *_cb, param_vec_t *_headers, + param_vec_t *_params) : RGWRESTStreamRWRequest(_cct, "GET", _url, _cb, _headers, _params) {} }; #endif diff -Nru ceph-10.2.7/src/rgw/rgw_rest_config.cc ceph-10.2.9/src/rgw/rgw_rest_config.cc --- ceph-10.2.7/src/rgw/rgw_rest_config.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_config.cc 2017-07-13 13:05:37.000000000 +0000 @@ -52,12 +52,28 @@ flusher.flush(); } +void RGWOp_ZoneConfig_Get::send_response() { + const RGWZoneParams& zone_params = store->get_zone_params(); + + set_req_state_err(s, http_ret); + dump_errno(s); + end_header(s); + + if (http_ret < 0) + return; + + encode_json("zone_params", zone_params, s->formatter); + flusher.flush(); +} + RGWOp* RGWHandler_Config::op_get() { bool exists; string type = s->info.args.get("type", &exists); if (type.compare("zonegroup-map") == 0) { return new RGWOp_ZoneGroupMap_Get(false); + } else if (type.compare("zone") == 0) { + return new RGWOp_ZoneConfig_Get(); } else { return new RGWOp_ZoneGroupMap_Get(true); } diff -Nru ceph-10.2.7/src/rgw/rgw_rest_config.h ceph-10.2.9/src/rgw/rgw_rest_config.h --- ceph-10.2.7/src/rgw/rgw_rest_config.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_config.h 2017-07-13 13:05:37.000000000 +0000 @@ -36,6 +36,24 @@ } }; +class RGWOp_ZoneConfig_Get : public RGWRESTOp { + RGWZoneParams zone_params; +public: + RGWOp_ZoneConfig_Get() {} + + int check_caps(RGWUserCaps& caps) { + return caps.check_cap("admin", RGW_CAP_READ); + } + int verify_permission() { + return check_caps(s->user->caps); + } + void execute() {} /* store already has the info we need, just need to send response */ + void send_response(); + const string name() { + return "get_zone_config"; + } +}; + class RGWHandler_Config : public RGWHandler_Auth_S3 { protected: RGWOp *op_get(); diff -Nru ceph-10.2.7/src/rgw/rgw_rest_conn.cc ceph-10.2.9/src/rgw/rgw_rest_conn.cc --- ceph-10.2.7/src/rgw/rgw_rest_conn.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_conn.cc 2017-07-13 13:05:37.000000000 +0000 @@ -44,17 +44,27 @@ return endpoint; } +static void populate_params(param_vec_t& params, const rgw_user *uid, const string& zonegroup) +{ + if (uid) { + string uid_str = uid->to_str(); + if (!uid->empty()) { + params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "uid", uid_str)); + } + } + if (!zonegroup.empty()) { + params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", zonegroup)); + } +} + int RGWRESTConn::forward(const rgw_user& uid, req_info& info, obj_version *objv, size_t max_response, bufferlist *inbl, bufferlist *outbl) { string url; int ret = get_url(url); if (ret < 0) return ret; - string uid_str = uid.to_str(); - param_list_t params; - if (!uid.empty()) - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "uid", uid_str)); - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", self_zone_group)); + param_vec_t params; + populate_params(params, &uid, self_zone_group); if (objv) { params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "tag", objv->tag)); char buf[16]; @@ -79,10 +89,8 @@ if (ret < 0) return ret; - string uid_str = uid.to_str(); - param_list_t params; - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "uid", uid_str)); - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", self_zone_group)); + param_vec_t params; + populate_params(params, &uid, self_zone_group); *req = new RGWRESTStreamWriteRequest(cct, url, NULL, ¶ms); return (*req)->put_obj_init(key, obj, obj_size, attrs); } @@ -118,21 +126,22 @@ int RGWRESTConn::get_obj(const rgw_user& uid, req_info *info /* optional */, rgw_obj& obj, const real_time *mod_ptr, const real_time *unmod_ptr, uint32_t mod_zone_id, uint64_t mod_pg_ver, - bool prepend_metadata, RGWGetDataCB *cb, RGWRESTStreamReadRequest **req) + bool prepend_metadata, bool sync_manifest, + RGWGetDataCB *cb, RGWRESTStreamReadRequest **req) { string url; int ret = get_url(url); if (ret < 0) return ret; - param_list_t params; - if (!uid.empty()) { - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "uid", uid.to_str())); - } - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", self_zone_group)); + param_vec_t params; + populate_params(params, &uid, self_zone_group); if (prepend_metadata) { params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "prepend-metadata", self_zone_group)); } + if (sync_manifest) { + params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "sync-manifest", "")); + } if (!obj.get_instance().empty()) { const string& instance = obj.get_instance(); params.push_back(param_pair_t("versionId", instance)); @@ -175,7 +184,7 @@ } int RGWRESTConn::get_resource(const string& resource, - param_list_t *extra_params, + param_vec_t *extra_params, map *extra_headers, bufferlist& bl, RGWHTTPManager *mgr) @@ -185,13 +194,13 @@ if (ret < 0) return ret; - param_list_t params; + param_vec_t params; if (extra_params) { params.insert(params.end(), extra_params->begin(), extra_params->end()); } - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", self_zone_group)); + populate_params(params, nullptr, self_zone_group); RGWStreamIntoBufferlist cb(bl); @@ -216,7 +225,7 @@ RGWRESTReadResource::RGWRESTReadResource(RGWRESTConn *_conn, const string& _resource, const rgw_http_param_pair *pp, - param_list_t *extra_headers, + param_vec_t *extra_headers, RGWHTTPManager *_mgr) : cct(_conn->get_ctx()), conn(_conn), resource(_resource), params(make_param_list(pp)), cb(bl), mgr(_mgr), @@ -227,8 +236,8 @@ RGWRESTReadResource::RGWRESTReadResource(RGWRESTConn *_conn, const string& _resource, - param_list_t& _params, - param_list_t *extra_headers, + param_vec_t& _params, + param_vec_t *extra_headers, RGWHTTPManager *_mgr) : cct(_conn->get_ctx()), conn(_conn), resource(_resource), params(_params), cb(bl), mgr(_mgr), req(cct, conn->get_url(), &cb, NULL, NULL) @@ -236,9 +245,9 @@ init_common(extra_headers); } -void RGWRESTReadResource::init_common(param_list_t *extra_headers) +void RGWRESTReadResource::init_common(param_vec_t *extra_headers) { - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", conn->get_self_zonegroup())); + populate_params(params, nullptr, conn->get_self_zonegroup()); if (extra_headers) { headers.insert(extra_headers->begin(), extra_headers->end()); @@ -274,7 +283,7 @@ RGWRESTPostResource::RGWRESTPostResource(RGWRESTConn *_conn, const string& _resource, const rgw_http_param_pair *pp, - param_list_t *extra_headers, + param_vec_t *extra_headers, RGWHTTPManager *_mgr) : cct(_conn->get_ctx()), conn(_conn), resource(_resource), params(make_param_list(pp)), cb(bl), mgr(_mgr), @@ -285,8 +294,8 @@ RGWRESTPostResource::RGWRESTPostResource(RGWRESTConn *_conn, const string& _resource, - param_list_t& params, - param_list_t *extra_headers, + param_vec_t& params, + param_vec_t *extra_headers, RGWHTTPManager *_mgr) : cct(_conn->get_ctx()), conn(_conn), resource(_resource), params(params), cb(bl), mgr(_mgr), req(cct, "POST", conn->get_url(), &cb, NULL, NULL) @@ -294,9 +303,9 @@ init_common(extra_headers); } -void RGWRESTPostResource::init_common(param_list_t *extra_headers) +void RGWRESTPostResource::init_common(param_vec_t *extra_headers) { - params.push_back(param_pair_t(RGW_SYS_PARAM_PREFIX "zonegroup", conn->get_self_zonegroup())); + populate_params(params, nullptr, conn->get_self_zonegroup()); if (extra_headers) { headers.insert(extra_headers->begin(), extra_headers->end()); diff -Nru ceph-10.2.7/src/rgw/rgw_rest_conn.h ceph-10.2.9/src/rgw/rgw_rest_conn.h --- ceph-10.2.7/src/rgw/rgw_rest_conn.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_conn.h 2017-07-13 13:05:37.000000000 +0000 @@ -36,14 +36,10 @@ const char *val; }; -using param_pair_t = pair; -// TODO: consider vector instead of list -using param_list_t = std::list; - // copy a null-terminated rgw_http_param_pair list into a list of string pairs -inline param_list_t make_param_list(const rgw_http_param_pair* pp) +inline param_vec_t make_param_list(const rgw_http_param_pair* pp) { - param_list_t params; + param_vec_t params; while (pp && pp->key) { string k = pp->key; string v = (pp->val ? pp->val : ""); @@ -93,23 +89,24 @@ int get_obj(const rgw_user& uid, req_info *info /* optional */, rgw_obj& obj, const ceph::real_time *mod_ptr, const ceph::real_time *unmod_ptr, uint32_t mod_zone_id, uint64_t mod_pg_ver, - bool prepend_metadata, RGWGetDataCB *cb, RGWRESTStreamReadRequest **req); + bool prepend_metadata, bool sync_manifest, RGWGetDataCB *cb, + RGWRESTStreamReadRequest **req); int complete_request(RGWRESTStreamReadRequest *req, string& etag, ceph::real_time *mtime, map& attrs); int get_resource(const string& resource, - param_list_t *extra_params, + param_vec_t *extra_params, map* extra_headers, bufferlist& bl, RGWHTTPManager *mgr = NULL); template - int get_json_resource(const string& resource, param_list_t *params, T& t); + int get_json_resource(const string& resource, param_vec_t *params, T& t); template int get_json_resource(const string& resource, const rgw_http_param_pair *pp, T& t); }; template -int RGWRESTConn::get_json_resource(const string& resource, param_list_t *params, T& t) +int RGWRESTConn::get_json_resource(const string& resource, param_vec_t *params, T& t) { bufferlist bl; int ret = get_resource(resource, params, NULL, bl); @@ -128,7 +125,7 @@ template int RGWRESTConn::get_json_resource(const string& resource, const rgw_http_param_pair *pp, T& t) { - param_list_t params = make_param_list(pp); + param_vec_t params = make_param_list(pp); return get_json_resource(resource, ¶ms, t); } @@ -146,7 +143,7 @@ CephContext *cct; RGWRESTConn *conn; string resource; - param_list_t params; + param_vec_t params; map headers; bufferlist bl; RGWStreamIntoBufferlist cb; @@ -154,19 +151,19 @@ RGWHTTPManager *mgr; RGWRESTStreamReadRequest req; - void init_common(param_list_t *extra_headers); + void init_common(param_vec_t *extra_headers); public: RGWRESTReadResource(RGWRESTConn *_conn, const string& _resource, const rgw_http_param_pair *pp, - param_list_t *extra_headers, + param_vec_t *extra_headers, RGWHTTPManager *_mgr); RGWRESTReadResource(RGWRESTConn *_conn, const string& _resource, - param_list_t& _params, - param_list_t *extra_headers, + param_vec_t& _params, + param_vec_t *extra_headers, RGWHTTPManager *_mgr); void set_user_info(void *user_info) { @@ -260,7 +257,7 @@ CephContext *cct; RGWRESTConn *conn; string resource; - param_list_t params; + param_vec_t params; map headers; bufferlist bl; RGWStreamIntoBufferlist cb; @@ -268,19 +265,19 @@ RGWHTTPManager *mgr; RGWRESTStreamRWRequest req; - void init_common(param_list_t *extra_headers); + void init_common(param_vec_t *extra_headers); public: RGWRESTPostResource(RGWRESTConn *_conn, const string& _resource, const rgw_http_param_pair *pp, - param_list_t *extra_headers, + param_vec_t *extra_headers, RGWHTTPManager *_mgr); RGWRESTPostResource(RGWRESTConn *_conn, const string& _resource, - param_list_t& params, - param_list_t *extra_headers, + param_vec_t& params, + param_vec_t *extra_headers, RGWHTTPManager *_mgr); void set_user_info(void *user_info) { diff -Nru ceph-10.2.7/src/rgw/rgw_rest_s3.cc ceph-10.2.9/src/rgw/rgw_rest_s3.cc --- ceph-10.2.7/src/rgw/rgw_rest_s3.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_s3.cc 2017-07-13 13:05:37.000000000 +0000 @@ -113,6 +113,15 @@ return RGWGetObj_ObjStore_S3::send_response_data_error(); } +int RGWGetObj_ObjStore_S3::get_params() +{ + // for multisite sync requests, only read the slo manifest itself, rather than + // all of the data from its parts. the parts will sync as separate objects + skip_manifest = s->info.args.exists(RGW_SYS_PARAM_PREFIX "sync-manifest"); + + return RGWGetObj_ObjStore::get_params(); +} + int RGWGetObj_ObjStore_S3::send_response_data_error() { bufferlist bl; @@ -2860,6 +2869,38 @@ rgw_flush_formatter_and_reset(s, s->formatter); } +void RGWGetObjLayout_ObjStore_S3::send_response() +{ + if (op_ret) + set_req_state_err(s, op_ret); + dump_errno(s); + end_header(s, this, "application/json"); + + JSONFormatter f; + + if (op_ret < 0) { + return; + } + + f.open_object_section("result"); + ::encode_json("head", head_obj, &f); + ::encode_json("manifest", *manifest, &f); + f.open_array_section("data_location"); + for (auto miter = manifest->obj_begin(); miter != manifest->obj_end(); ++miter) { + f.open_object_section("obj"); + rgw_obj loc = miter.get_location(); + ::encode_json("ofs", miter.get_ofs(), &f); + ::encode_json("loc", loc, &f); + ::encode_json("loc_ofs", miter.location_ofs(), &f); + ::encode_json("loc_size", miter.get_stripe_size(), &f); + f.close_section(); + rgw_flush_formatter(s, &f); + } + f.close_section(); + f.close_section(); + rgw_flush_formatter(s, &f); +} + RGWOp *RGWHandler_REST_Service_S3::op_get() { if (is_usage_op()) { @@ -2991,6 +3032,8 @@ return new RGWGetACLs_ObjStore_S3; } else if (s->info.args.exists("uploadId")) { return new RGWListMultipart_ObjStore_S3; + } else if (s->info.args.exists("layout")) { + return new RGWGetObjLayout_ObjStore_S3; } return get_obj_op(true); } @@ -3352,6 +3395,7 @@ return 0; } else { + /* Authorization in Header */ /* AWS4 */ @@ -3494,6 +3538,9 @@ { string::size_type pos; bool using_qs; + /* used for pre-signatured url, We shouldn't return -ERR_REQUEST_TIME_SKEWED when + current time <= X-Amz-Expires */ + bool qsr = false; uint64_t now_req = 0; uint64_t now = ceph_clock_now(s->cct); @@ -3529,12 +3576,12 @@ return -EPERM; s->aws4_auth->expires = s->info.args.get("X-Amz-Expires"); - if (s->aws4_auth->expires.size() != 0) { + if (!s->aws4_auth->expires.empty()) { /* X-Amz-Expires provides the time period, in seconds, for which the generated presigned URL is valid. The minimum value you can set is 1, and the maximum is 604800 (seven days) */ time_t exp = atoll(s->aws4_auth->expires.c_str()); - if ((exp < 1) || (exp > 604800)) { + if ((exp < 1) || (exp > 7*24*60*60)) { dout(10) << "NOTICE: exp out of range, exp = " << exp << dendl; return -EPERM; } @@ -3544,12 +3591,17 @@ dout(10) << "NOTICE: now = " << now << ", now_req = " << now_req << ", exp = " << exp << dendl; return -EPERM; } + qsr = true; } - if ( (now_req < now - RGW_AUTH_GRACE_MINS * 60) || - (now_req > now + RGW_AUTH_GRACE_MINS * 60) ) { + if ((now_req < now - RGW_AUTH_GRACE_MINS * 60 || + now_req > now + RGW_AUTH_GRACE_MINS * 60) && !qsr) { dout(10) << "NOTICE: request time skew too big." << dendl; - dout(10) << "now_req = " << now_req << " now = " << now << "; now - RGW_AUTH_GRACE_MINS=" << now - RGW_AUTH_GRACE_MINS * 60 << "; now + RGW_AUTH_GRACE_MINS=" << now + RGW_AUTH_GRACE_MINS * 60 << dendl; + dout(10) << "now_req = " << now_req << " now = " << now + << "; now - RGW_AUTH_GRACE_MINS=" + << now - RGW_AUTH_GRACE_MINS * 60 + << "; now + RGW_AUTH_GRACE_MINS=" + << now + RGW_AUTH_GRACE_MINS * 60 << dendl; return -ERR_REQUEST_TIME_SKEWED; } diff -Nru ceph-10.2.7/src/rgw/rgw_rest_s3.h ceph-10.2.9/src/rgw/rgw_rest_s3.h --- ceph-10.2.7/src/rgw/rgw_rest_s3.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_s3.h 2017-07-13 13:05:37.000000000 +0000 @@ -30,6 +30,7 @@ RGWGetObj_ObjStore_S3() {} ~RGWGetObj_ObjStore_S3() {} + int get_params() override; int send_response_data_error(); int send_response_data(bufferlist& bl, off_t ofs, off_t len); void set_custom_http_response(int http_ret) { custom_http_ret = http_ret; } @@ -364,6 +365,14 @@ void end_response(); }; +class RGWGetObjLayout_ObjStore_S3 : public RGWGetObjLayout { +public: + RGWGetObjLayout_ObjStore_S3() {} + ~RGWGetObjLayout_ObjStore_S3() {} + + void send_response(); +}; + class RGW_Auth_S3_Keystone_ValidateToken : public RGWHTTPClient { private: bufferlist rx_buffer; diff -Nru ceph-10.2.7/src/rgw/rgw_rest_swift.cc ceph-10.2.9/src/rgw/rgw_rest_swift.cc --- ceph-10.2.7/src/rgw/rgw_rest_swift.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_swift.cc 2017-07-13 13:05:37.000000000 +0000 @@ -251,15 +251,18 @@ while (iter != objs.end() || pref_iter != common_prefixes.end()) { bool do_pref = false; bool do_objs = false; - rgw_obj_key& key = iter->key; + rgw_obj_key key; + if (iter != objs.end()) { + key = iter->key; + } if (pref_iter == common_prefixes.end()) do_objs = true; else if (iter == objs.end()) do_pref = true; - else if (key.name.compare(pref_iter->first) == 0) { + else if (!key.empty() && key.name.compare(pref_iter->first) == 0) { do_objs = true; ++pref_iter; - } else if (key.name.compare(pref_iter->first) <= 0) + } else if (!key.empty() && key.name.compare(pref_iter->first) <= 0) do_objs = true; else do_pref = true; diff -Nru ceph-10.2.7/src/rgw/rgw_rest_user.cc ceph-10.2.9/src/rgw/rgw_rest_user.cc --- ceph-10.2.7/src/rgw/rgw_rest_user.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_rest_user.cc 2017-07-13 13:05:37.000000000 +0000 @@ -327,11 +327,13 @@ std::string uid_str; std::string subuser; std::string secret_key; + std::string access_key; std::string perm_str; std::string key_type_str; bool gen_subuser = false; // FIXME placeholder bool gen_secret; + bool gen_access; uint32_t perm_mask = 0; int32_t key_type = KEY_TYPE_SWIFT; @@ -342,12 +344,14 @@ rgw_user uid(uid_str); RESTArgs::get_string(s, "subuser", subuser, &subuser); + RESTArgs::get_string(s, "access-key", access_key, &access_key); RESTArgs::get_string(s, "secret-key", secret_key, &secret_key); RESTArgs::get_string(s, "access", perm_str, &perm_str); RESTArgs::get_string(s, "key-type", key_type_str, &key_type_str); //RESTArgs::get_bool(s, "generate-subuser", false, &gen_subuser); RESTArgs::get_bool(s, "generate-secret", false, &gen_secret); - + RESTArgs::get_bool(s, "gen-access-key", false, &gen_access); + perm_mask = rgw_str_to_perm(perm_str.c_str()); op_state.set_perm(perm_mask); @@ -358,11 +362,17 @@ if (!subuser.empty()) op_state.set_subuser(subuser); + if (!access_key.empty()) + op_state.set_access_key(access_key); + if (!secret_key.empty()) op_state.set_secret_key(secret_key); op_state.set_generate_subuser(gen_subuser); + if (gen_access) + op_state.set_gen_access(); + if (gen_secret) op_state.set_gen_secret(); diff -Nru ceph-10.2.7/src/rgw/rgw_swift.cc ceph-10.2.9/src/rgw/rgw_swift.cc --- ceph-10.2.7/src/rgw/rgw_swift.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_swift.cc 2017-07-13 13:05:37.000000000 +0000 @@ -688,8 +688,8 @@ * of Swift API entry point removed. */ const size_t pos = g_conf->rgw_swift_url_prefix.find_last_not_of('/') + 1; const vector allowed_paths = { - s->info.request_uri, - s->info.request_uri.substr(pos + 1) + s->decoded_uri, + s->decoded_uri.substr(pos + 1) }; vector allowed_methods; @@ -826,9 +826,16 @@ void RGWSwift::init_keystone() { keystone_token_cache = new RGWKeystoneTokenCache(cct, cct->_conf->rgw_keystone_token_cache_size); - + /* revocation logic needs to be smarter, but meanwhile, + * make it optional. + * see http://tracker.ceph.com/issues/9493 + * http://tracker.ceph.com/issues/19499 + */ + if (cct->_conf->rgw_keystone_revocation_interval > 0 + && cct->_conf->rgw_keystone_token_cache_size ) { keystone_revoke_thread = new KeystoneRevokeThread(cct, this); keystone_revoke_thread->create("rgw_swift_k_rev"); + } } diff -Nru ceph-10.2.7/src/rgw/rgw_sync.cc ceph-10.2.9/src/rgw/rgw_sync.cc --- ceph-10.2.7/src/rgw/rgw_sync.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_sync.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,8 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab +#include + #include "common/ceph_json.h" #include "common/RWLock.h" #include "common/RefCountedObj.h" @@ -492,6 +494,7 @@ } yield { int ret = http_op->wait(shard_info); + http_op->put(); if (ret < 0) { return set_cr_error(ret); } @@ -556,6 +559,7 @@ int request_complete() { int ret = http_op->wait(result); + http_op->put(); if (ret < 0 && ret != -ENOENT) { ldout(sync_env->store->ctx(), 0) << "ERROR: failed to list remote mdlog shard, ret=" << ret << dendl; return ret; @@ -833,27 +837,25 @@ } iter = result.begin(); for (; iter != result.end(); ++iter) { - RGWRados *store; - int ret; - yield { - if (!lease_cr->is_locked()) { - lost_lock = true; - break; - } - ldout(cct, 20) << "list metadata: section=" << *sections_iter << " key=" << *iter << dendl; - string s = *sections_iter + ":" + *iter; - int shard_id; - store = sync_env->store; - ret = store->meta_mgr->get_log_shard_id(*sections_iter, *iter, &shard_id); - if (ret < 0) { - ldout(cct, 0) << "ERROR: could not determine shard id for " << *sections_iter << ":" << *iter << dendl; - ret_status = ret; - break; - } - if (!entries_index->append(s, shard_id)) { - break; - } - } + if (!lease_cr->is_locked()) { + lost_lock = true; + break; + } + yield; // allow entries_index consumer to make progress + + ldout(cct, 20) << "list metadata: section=" << *sections_iter << " key=" << *iter << dendl; + string s = *sections_iter + ":" + *iter; + int shard_id; + RGWRados *store = sync_env->store; + int ret = store->meta_mgr->get_log_shard_id(*sections_iter, *iter, &shard_id); + if (ret < 0) { + ldout(cct, 0) << "ERROR: could not determine shard id for " << *sections_iter << ":" << *iter << dendl; + ret_status = ret; + break; + } + if (!entries_index->append(s, shard_id)) { + break; + } } } yield { @@ -951,6 +953,7 @@ } yield { int ret = http_op->wait_bl(pbl); + http_op->put(); if (ret < 0) { return set_cr_error(ret); } @@ -1230,6 +1233,7 @@ RGWMetadataLog* mdlog; //< log of syncing period uint32_t shard_id; rgw_meta_sync_marker& sync_marker; + boost::optional temp_marker; //< for pending updates string marker; string max_marker; const std::string& period_marker; //< max marker stored in next period @@ -1452,18 +1456,21 @@ if (!lost_lock) { /* update marker to reflect we're done with full sync */ - if (can_adjust_marker) yield { - sync_marker.state = rgw_meta_sync_marker::IncrementalSync; - sync_marker.marker = sync_marker.next_step_marker; - sync_marker.next_step_marker.clear(); - - RGWRados *store = sync_env->store; - ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << sync_marker.marker << dendl; - using WriteMarkerCR = RGWSimpleRadosWriteCR; - call(new WriteMarkerCR(sync_env->async_rados, store, pool, - sync_env->shard_obj_name(shard_id), - sync_marker)); + if (can_adjust_marker) { + // apply updates to a temporary marker, or operate() will send us + // to incremental_sync() after we yield + temp_marker = sync_marker; + temp_marker->state = rgw_meta_sync_marker::IncrementalSync; + temp_marker->marker = std::move(temp_marker->next_step_marker); + temp_marker->next_step_marker.clear(); + ldout(sync_env->cct, 0) << *this << ": saving marker pos=" << temp_marker->marker << dendl; + + using WriteMarkerCR = RGWSimpleRadosWriteCR; + yield call(new WriteMarkerCR(sync_env->async_rados, sync_env->store, + pool, sync_env->shard_obj_name(shard_id), + *temp_marker)); } + if (retcode < 0) { ldout(sync_env->cct, 0) << "ERROR: failed to set sync marker: retcode=" << retcode << dendl; return retcode; @@ -1489,6 +1496,12 @@ if (lost_lock) { return -EBUSY; } + + // apply the sync marker update + assert(temp_marker); + sync_marker = std::move(*temp_marker); + temp_marker = boost::none; + // must not yield after this point! } return 0; } diff -Nru ceph-10.2.7/src/rgw/rgw_token.cc ceph-10.2.9/src/rgw/rgw_token.cc --- ceph-10.2.7/src/rgw/rgw_token.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_token.cc 2017-07-13 13:05:37.000000000 +0000 @@ -64,7 +64,8 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); char *v{nullptr}; diff -Nru ceph-10.2.7/src/rgw/rgw_user.cc ceph-10.2.9/src/rgw/rgw_user.cc --- ceph-10.2.7/src/rgw/rgw_user.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/rgw/rgw_user.cc 2017-07-13 13:05:37.000000000 +0000 @@ -373,9 +373,11 @@ int rgw_remove_email_index(RGWRados *store, string& email) { + if (email.empty()) { + return 0; + } rgw_obj obj(store->get_zone_params().user_email_pool, email); - int ret = store->delete_system_obj(obj); - return ret; + return store->delete_system_obj(obj); } int rgw_remove_swift_name_index(RGWRados *store, string& swift_name) @@ -443,11 +445,11 @@ } } - rgw_obj email_obj(store->get_zone_params().user_email_pool, info.user_email); ldout(store->ctx(), 10) << "removing email index: " << info.user_email << dendl; - ret = store->delete_system_obj(email_obj); + ret = rgw_remove_email_index(store, info.user_email); if (ret < 0 && ret != -ENOENT) { - ldout(store->ctx(), 0) << "ERROR: could not remove " << info.user_id << ":" << email_obj << ", should be fixed (err=" << ret << ")" << dendl; + ldout(store->ctx(), 0) << "ERROR: could not remove email index object for " + << info.user_email << ", should be fixed (err=" << ret << ")" << dendl; return ret; } @@ -1389,6 +1391,7 @@ { std::string subprocess_msg; int ret; + int32_t key_type = op_state.get_key_type(); ret = check_op(op_state, &subprocess_msg); if (ret < 0) { @@ -1396,6 +1399,10 @@ return ret; } + if (key_type == KEY_TYPE_S3 && op_state.get_access_key().empty()) { + op_state.set_gen_access(); + } + if (op_state.get_secret_key().empty()) { op_state.set_gen_secret(); } diff -Nru ceph-10.2.7/src/test/admin_socket.cc ceph-10.2.9/src/test/admin_socket.cc --- ceph-10.2.7/src/test/admin_socket.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/admin_socket.cc 2017-07-13 13:05:37.000000000 +0000 @@ -12,15 +12,12 @@ * */ -#include - #include "common/Mutex.h" #include "common/Cond.h" #include "common/admin_socket.h" #include "common/admin_socket_client.h" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "global/global_context.h" +#include "test/unit.h" #include #include @@ -293,17 +290,6 @@ } } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - vector def_args; - global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd .. ; diff -Nru ceph-10.2.7/src/test/bench/small_io_bench_dumb.cc ceph-10.2.9/src/test/bench/small_io_bench_dumb.cc --- ceph-10.2.7/src/test/bench/small_io_bench_dumb.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/bench/small_io_bench_dumb.cc 2017-07-13 13:05:37.000000000 +0000 @@ -95,7 +95,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); diff -Nru ceph-10.2.7/src/test/bench/small_io_bench_fs.cc ceph-10.2.9/src/test/bench/small_io_bench_fs.cc --- ceph-10.2.7/src/test/bench/small_io_bench_fs.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/bench/small_io_bench_fs.cc 2017-07-13 13:05:37.000000000 +0000 @@ -106,7 +106,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); diff -Nru ceph-10.2.7/src/test/bench/tp_bench.cc ceph-10.2.9/src/test/bench/tp_bench.cc --- ceph-10.2.7/src/test/bench/tp_bench.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/bench/tp_bench.cc 2017-07-13 13:05:37.000000000 +0000 @@ -142,7 +142,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); diff -Nru ceph-10.2.7/src/test/bench_log.cc ceph-10.2.9/src/test/bench_log.cc --- ceph-10.2.7/src/test/bench_log.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/bench_log.cc 2017-07-13 13:05:37.000000000 +0000 @@ -39,7 +39,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_UTILITY, 0); utime_t start = ceph_clock_now(NULL); diff -Nru ceph-10.2.7/src/test/centos-6/ceph.spec.in ceph-10.2.9/src/test/centos-6/ceph.spec.in --- ceph-10.2.7/src/test/centos-6/ceph.spec.in 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/centos-6/ceph.spec.in 2017-07-13 13:05:36.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/src/test/centos-7/ceph.spec.in ceph-10.2.9/src/test/centos-7/ceph.spec.in --- ceph-10.2.7/src/test/centos-7/ceph.spec.in 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/centos-7/ceph.spec.in 2017-07-13 13:05:36.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/src/test/ceph_crypto.cc ceph-10.2.9/src/test/ceph_crypto.cc --- ceph-10.2.7/src/test/ceph_crypto.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/ceph_crypto.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,11 @@ #include "common/ceph_crypto.h" -#include "test/unit.h" +#include "gtest/gtest.h" +#include "common/ceph_argparse.h" +#include "common/ceph_crypto.h" +#include "common/common_init.h" +#include "global/global_init.h" +#include "global/global_context.h" class CryptoEnvironment: public ::testing::Environment { public: @@ -138,3 +143,15 @@ ASSERT_EXIT(do_simple_crypto(), ::testing::ExitedWithCode(0), "^$"); } #endif //GTEST_HAS_DEATH_TEST + +int main(int argc, char **argv) { + std::vector args(argv, argv + argc); + env_to_vec(args); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + common_init_finish(g_ceph_context); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff -Nru ceph-10.2.7/src/test/cli/radosgw-admin/help.t ceph-10.2.9/src/test/cli/radosgw-admin/help.t --- ceph-10.2.7/src/test/cli/radosgw-admin/help.t 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/cli/radosgw-admin/help.t 2017-07-13 13:05:37.000000000 +0000 @@ -17,6 +17,7 @@ key create create access key key rm remove access key bucket list list buckets + bucket limit check show bucket sharding stats bucket link link bucket to specified user bucket unlink unlink bucket from specified user bucket stats returns bucket statistics @@ -27,6 +28,7 @@ bi put store bucket index object entries bi list list raw bucket index entries object rm remove object + object stat stat an object for its metadata object unlink unlink object from bucket index objects expire run expired objects cleanup period delete delete a period @@ -66,8 +68,6 @@ zonegroup placement modify modify a placement target of a specific zonegroup zonegroup placement rm remove a placement target from a zonegroup zonegroup placement default set a zonegroup's default placement target - zonegroup-map get show zonegroup-map - zonegroup-map set set zonegroup-map (requires infile) zone create create a new zone zone delete delete a zone zone get show zone cluster params @@ -200,7 +200,9 @@ --categories= comma separated list of categories, used in usage show --caps= list of caps (e.g., "usage=read, write; user=read" --yes-i-really-mean-it required for certain operations - --reset-regions reset regionmap when regionmap update + --warnings-only when specified with bucket limit check, list + only buckets nearing or over the current max + objects per shard value --bypass-gc when specified with bucket deletion, triggers object deletions by not involving GC --inconsistent-index when specified with bucket deletion and bypass-gc set to true, diff -Nru ceph-10.2.7/src/test/common/get_command_descriptions.cc ceph-10.2.9/src/test/common/get_command_descriptions.cc --- ceph-10.2.7/src/test/common/get_command_descriptions.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/common/get_command_descriptions.cc 2017-07-13 13:05:37.000000000 +0000 @@ -87,7 +87,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); if (args.empty()) { diff -Nru ceph-10.2.7/src/test/common/test_async_compressor.cc ceph-10.2.9/src/test/common/test_async_compressor.cc --- ceph-10.2.7/src/test/common/test_async_compressor.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/common/test_async_compressor.cc 2017-07-13 13:05:37.000000000 +0000 @@ -207,7 +207,7 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/common/test_shared_cache.cc ceph-10.2.9/src/test/common/test_shared_cache.cc --- ceph-10.2.7/src/test/common/test_shared_cache.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/common/test_shared_cache.cc 2017-07-13 13:05:37.000000000 +0000 @@ -25,8 +25,7 @@ #include "common/Thread.h" #include "common/shared_cache.hpp" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include +#include "test/unit.h" class SharedLRUTest : public SharedLRU { public: @@ -393,17 +392,6 @@ ASSERT_TRUE(cache.lookup(0).get()); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - // Local Variables: // compile-command: "cd ../.. ; make unittest_shared_cache && ./unittest_shared_cache # --gtest_filter=*.* --log-to-stderr=true" // End: diff -Nru ceph-10.2.7/src/test/common/test_sharedptr_registry.cc ceph-10.2.9/src/test/common/test_sharedptr_registry.cc --- ceph-10.2.7/src/test/common/test_sharedptr_registry.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/common/test_sharedptr_registry.cc 2017-07-13 13:05:37.000000000 +0000 @@ -24,8 +24,7 @@ #include "common/Thread.h" #include "common/sharedptr_registry.hpp" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include +#include "test/unit.h" class SharedPtrRegistryTest : public SharedPtrRegistry { public: @@ -324,17 +323,6 @@ EXPECT_FALSE(registry.lookup(key)); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - // Local Variables: // compile-command: "cd ../.. ; make unittest_sharedptr_registry && ./unittest_sharedptr_registry # --gtest_filter=*.* --log-to-stderr=true" // End: diff -Nru ceph-10.2.7/src/test/common/Throttle.cc ceph-10.2.9/src/test/common/Throttle.cc --- ceph-10.2.7/src/test/common/Throttle.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/common/Throttle.cc 2017-07-13 13:05:37.000000000 +0000 @@ -25,8 +25,7 @@ #include "common/Thread.h" #include "common/Throttle.h" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include +#include "test/unit.h" #include #include @@ -412,17 +411,6 @@ ASSERT_GT(results.second.count(), 0.0005); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; diff -Nru ceph-10.2.7/src/test/compressor/test_compression_plugin.cc ceph-10.2.9/src/test/compressor/test_compression_plugin.cc --- ceph-10.2.7/src/test/compressor/test_compression_plugin.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/compressor/test_compression_plugin.cc 2017-07-13 13:05:37.000000000 +0000 @@ -18,10 +18,10 @@ #include #include #include -#include -#include "global/global_init.h" +#include "gtest/gtest.h" #include "compressor/CompressionPlugin.h" #include "common/ceph_argparse.h" +#include "global/global_init.h" #include "global/global_context.h" #include "common/config.h" @@ -51,7 +51,10 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/compressor/test_compression_plugin_snappy.cc ceph-10.2.9/src/test/compressor/test_compression_plugin_snappy.cc --- ceph-10.2.7/src/test/compressor/test_compression_plugin_snappy.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/compressor/test_compression_plugin_snappy.cc 2017-07-13 13:05:37.000000000 +0000 @@ -17,10 +17,10 @@ #include #include #include -#include -#include "global/global_init.h" +#include "gtest/gtest.h" #include "compressor/Compressor.h" #include "common/ceph_argparse.h" +#include "global/global_init.h" #include "global/global_context.h" #include "common/config.h" @@ -35,7 +35,10 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/compressor/test_compression_plugin_zlib.cc ceph-10.2.9/src/test/compressor/test_compression_plugin_zlib.cc --- ceph-10.2.7/src/test/compressor/test_compression_plugin_zlib.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/compressor/test_compression_plugin_zlib.cc 2017-07-13 13:05:37.000000000 +0000 @@ -17,10 +17,10 @@ #include #include #include -#include -#include "global/global_init.h" +#include "gtest/gtest.h" #include "compressor/Compressor.h" #include "common/ceph_argparse.h" +#include "global/global_init.h" #include "global/global_context.h" #include "common/config.h" @@ -34,7 +34,10 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/compressor/test_compression_snappy.cc ceph-10.2.9/src/test/compressor/test_compression_snappy.cc --- ceph-10.2.7/src/test/compressor/test_compression_snappy.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/compressor/test_compression_snappy.cc 2017-07-13 13:05:37.000000000 +0000 @@ -16,12 +16,11 @@ #include #include -#include -#include "global/global_init.h" #include "compressor/snappy/SnappyCompressor.h" #include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" +#include "test/unit.h" TEST(SnappyCompressor, compress_decompress) { @@ -70,21 +69,6 @@ EXPECT_EQ(res, 0); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("plugin_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/compressor/test_compression_zlib.cc ceph-10.2.9/src/test/compressor/test_compression_zlib.cc --- ceph-10.2.7/src/test/compressor/test_compression_zlib.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/compressor/test_compression_zlib.cc 2017-07-13 13:05:37.000000000 +0000 @@ -17,12 +17,11 @@ #include #include #include -#include -#include "global/global_init.h" #include "compressor/zlib/CompressionZlib.h" #include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" +#include "test/unit.h" TEST(CompressionZlib, compress_decompress) { @@ -62,21 +61,6 @@ EXPECT_TRUE(exp.contents_equal(after)); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("compression_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/crush/crush.cc ceph-10.2.9/src/test/crush/crush.cc --- ceph-10.2.7/src/test/crush/crush.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/crush/crush.cc 2017-07-13 13:05:37.000000000 +0000 @@ -12,12 +12,10 @@ #include #include "include/stringify.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "global/global_context.h" #include "crush/CrushWrapper.h" #include "osd/osd_types.h" +#include "test/unit.h" #include @@ -635,16 +633,3 @@ cout << " vs " << estddev << std::endl; } } - - - -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/crush/CrushWrapper.cc ceph-10.2.9/src/test/crush/CrushWrapper.cc --- ceph-10.2.7/src/test/crush/CrushWrapper.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/crush/CrushWrapper.cc 2017-07-13 13:05:37.000000000 +0000 @@ -948,7 +948,8 @@ vector def_args; def_args.push_back("--debug-crush=0"); - global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff -Nru ceph-10.2.7/src/test/crypto_init.cc ceph-10.2.9/src/test/crypto_init.cc --- ceph-10.2.7/src/test/crypto_init.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/crypto_init.cc 2017-07-13 13:05:37.000000000 +0000 @@ -23,8 +23,9 @@ // multiple times simultaneously from different threads. TEST(CRYPTO_INIT, NSS_RACE) { std::vector args; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); // Most reliably reproduced with more threads than cores. long n_thread = sysconf(_SC_NPROCESSORS_ONLN) * 2; pthread_t *ts = (pthread_t*)malloc(n_thread * sizeof(pthread_t)); diff -Nru ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_benchmark.cc ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_benchmark.cc --- ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_benchmark.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_benchmark.cc 2017-07-13 13:05:37.000000000 +0000 @@ -81,7 +81,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); diff -Nru ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_benchmark.h ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_benchmark.h --- ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_benchmark.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_benchmark.h 2017-07-13 13:05:37.000000000 +0000 @@ -38,6 +38,7 @@ ErasureCodeProfile profile; bool verbose; + boost::intrusive_ptr cct; public: int setup(int argc, char** argv); int run(); diff -Nru ceph-10.2.7/src/test/erasure-code/ceph_erasure_code.cc ceph-10.2.9/src/test/erasure-code/ceph_erasure_code.cc --- ceph-10.2.7/src/test/erasure-code/ceph_erasure_code.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/ceph_erasure_code.cc 2017-07-13 13:05:37.000000000 +0000 @@ -37,6 +37,7 @@ class ErasureCodeCommand { po::variables_map vm; ErasureCodeProfile profile; + boost::intrusive_ptr cct; public: int setup(int argc, char** argv); int run(); @@ -82,7 +83,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); diff -Nru ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_non_regression.cc ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_non_regression.cc --- ceph-10.2.7/src/test/erasure-code/ceph_erasure_code_non_regression.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/ceph_erasure_code_non_regression.cc 2017-07-13 13:05:37.000000000 +0000 @@ -44,6 +44,7 @@ string base; string directory; ErasureCodeProfile profile; + boost::intrusive_ptr cct; public: int setup(int argc, char** argv); int run(); @@ -94,10 +95,9 @@ ceph_options.push_back(i->c_str()); } - global_init( - &def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + cct = global_init(&def_args, ceph_options, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCode.cc ceph-10.2.9/src/test/erasure-code/TestErasureCode.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCode.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCode.cc 2017-07-13 13:05:37.000000000 +0000 @@ -17,12 +17,11 @@ #include #include -#include "global/global_init.h" #include "erasure-code/ErasureCode.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" class ErasureCodeTest : public ErasureCode { public: @@ -153,22 +152,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeExample.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeExample.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeExample.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeExample.cc 2017-07-13 13:05:37.000000000 +0000 @@ -16,11 +16,11 @@ #include #include "include/stringify.h" -#include "global/global_init.h" #include "ErasureCodeExample.h" #include "common/ceph_argparse.h" #include "global/global_context.h" #include "gtest/gtest.h" +#include "test/unit.h" TEST(ErasureCodeExample, chunk_size) { @@ -234,21 +234,6 @@ EXPECT_EQ(0, example.create_ruleset("myrule", *c, &ss)); } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeIsa.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeIsa.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeIsa.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeIsa.cc 2017-07-13 13:05:37.000000000 +0000 @@ -20,13 +20,13 @@ #include "crush/CrushWrapper.h" #include "include/stringify.h" -#include "global/global_init.h" #include "erasure-code/isa/ErasureCodeIsa.h" #include "erasure-code/isa/xor_op.h" #include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" ErasureCodeIsaTableCache tcache; @@ -955,22 +955,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **) argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 unittest_erasure_code_isa && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeJerasure.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeJerasure.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeJerasure.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeJerasure.cc 2017-07-13 13:05:37.000000000 +0000 @@ -20,12 +20,12 @@ #include "crush/CrushWrapper.h" #include "include/stringify.h" -#include "global/global_init.h" #include "erasure-code/jerasure/ErasureCodeJerasure.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" + template class ErasureCodeTest : public ::testing::Test { @@ -357,22 +357,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeLrc.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeLrc.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeLrc.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeLrc.cc 2017-07-13 13:05:37.000000000 +0000 @@ -21,12 +21,12 @@ #include "crush/CrushWrapper.h" #include "common/config.h" #include "include/stringify.h" -#include "global/global_init.h" #include "erasure-code/lrc/ErasureCodeLrc.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" + TEST(ErasureCodeLrc, parse_ruleset) { @@ -908,22 +908,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodePlugin.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodePlugin.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodePlugin.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodePlugin.cc 2017-07-13 13:05:37.000000000 +0000 @@ -19,12 +19,12 @@ #include #include #include "common/Thread.h" -#include "global/global_init.h" #include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" + class ErasureCodePluginRegistryTest : public ::testing::Test { protected: @@ -126,21 +126,6 @@ } } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginIsa.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginIsa.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginIsa.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginIsa.cc 2017-07-13 13:05:37.000000000 +0000 @@ -22,6 +22,7 @@ #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" TEST(ErasureCodePlugin, factory) { @@ -52,22 +53,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginJerasure.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginJerasure.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginJerasure.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginJerasure.cc 2017-07-13 13:05:37.000000000 +0000 @@ -20,12 +20,11 @@ #include "arch/probe.h" #include "arch/intel.h" #include "arch/arm.h" -#include "global/global_init.h" #include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" TEST(ErasureCodePlugin, factory) { @@ -249,22 +248,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginLrc.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginLrc.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginLrc.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginLrc.cc 2017-07-13 13:05:37.000000000 +0000 @@ -19,12 +19,12 @@ #include #include "arch/probe.h" #include "arch/intel.h" -#include "global/global_init.h" #include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" +#include "test/unit.h" + TEST(ErasureCodePlugin, factory) { @@ -40,22 +40,6 @@ EXPECT_TRUE(erasure_code.get()); } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginShec.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginShec.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodePluginShec.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodePluginShec.cc 2017-07-13 13:05:37.000000000 +0000 @@ -21,12 +21,10 @@ #include "arch/probe.h" #include "arch/intel.h" #include "arch/arm.h" -#include "global/global_init.h" #include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "common/config.h" -#include "gtest/gtest.h" +#include "test/unit.h" TEST(ErasureCodePlugin, factory) { @@ -245,22 +243,6 @@ } } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make -j4 && diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_all.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_all.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_all.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_all.cc 2017-07-13 13:05:37.000000000 +0000 @@ -294,7 +294,8 @@ vector args; argv_to_vec(argc, (const char **) argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_arguments.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_arguments.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_arguments.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_arguments.cc 2017-07-13 13:05:37.000000000 +0000 @@ -393,7 +393,8 @@ vector args; argv_to_vec(argc, (const char **) argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); const char* env = getenv("CEPH_LIB"); diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec.cc 2017-07-13 13:05:37.000000000 +0000 @@ -31,6 +31,7 @@ #include "common/ceph_argparse.h" #include "global/global_context.h" #include "gtest/gtest.h" +#include "test/unit.h" void* thread1(void* pParam); void* thread2(void* pParam); @@ -2667,22 +2668,6 @@ delete profile; } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **) argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - void* thread1(void* pParam) { ErasureCodeShec* shec = (ErasureCodeShec*) pParam; diff -Nru ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_thread.cc ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_thread.cc --- ceph-10.2.7/src/test/erasure-code/TestErasureCodeShec_thread.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/erasure-code/TestErasureCodeShec_thread.cc 2017-07-13 13:05:37.000000000 +0000 @@ -25,12 +25,11 @@ #include "crush/CrushWrapper.h" #include "osd/osd_types.h" #include "include/stringify.h" -#include "global/global_init.h" #include "erasure-code/shec/ErasureCodeShec.h" #include "erasure-code/ErasureCodePlugin.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "gtest/gtest.h" +#include "test/unit.h" void* thread1(void* pParam); @@ -86,22 +85,6 @@ pthread_join(tid5, NULL); } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **) argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - const char* env = getenv("CEPH_LIB"); - std::string directory(env ? env : "lib"); - g_conf->set_val("erasure_code_dir", directory, false, false); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - void* thread1(void* pParam) { TestParam* param = static_cast(pParam); diff -Nru ceph-10.2.7/src/test/fedora-21/ceph.spec.in ceph-10.2.9/src/test/fedora-21/ceph.spec.in --- ceph-10.2.7/src/test/fedora-21/ceph.spec.in 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/fedora-21/ceph.spec.in 2017-07-13 13:05:36.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/src/test/filestore/TestFileStore.cc ceph-10.2.9/src/test/filestore/TestFileStore.cc --- ceph-10.2.7/src/test/filestore/TestFileStore.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/filestore/TestFileStore.cc 2017-07-13 13:05:37.000000000 +0000 @@ -68,7 +68,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val("osd_journal_size", "100"); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/journal/test_main.cc ceph-10.2.9/src/test/journal/test_main.cc --- ceph-10.2.7/src/test/journal/test_main.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/journal/test_main.cc 2017-07-13 13:05:37.000000000 +0000 @@ -16,11 +16,11 @@ std::vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); g_conf->set_val("lockdep", "true"); common_init_finish(g_ceph_context); int r = RUN_ALL_TESTS(); - g_ceph_context->put(); return r; } diff -Nru ceph-10.2.7/src/test/libcephfs/test.cc ceph-10.2.9/src/test/libcephfs/test.cc --- ceph-10.2.7/src/test/libcephfs/test.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/libcephfs/test.cc 2017-07-13 13:05:37.000000000 +0000 @@ -361,7 +361,7 @@ // test getdents struct dirent *getdents_entries; - getdents_entries = (struct dirent *)malloc(r * sizeof(*getdents_entries)); + getdents_entries = (struct dirent *)malloc((r + 2) * sizeof(*getdents_entries)); int count = 0; std::vector found; diff -Nru ceph-10.2.7/src/test/librados/list.cc ceph-10.2.9/src/test/librados/list.cc --- ceph-10.2.7/src/test/librados/list.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librados/list.cc 2017-07-13 13:05:37.000000000 +0000 @@ -6,9 +6,7 @@ #include "test/librados/test.h" #include "test/librados/TestCase.h" #include "global/global_context.h" -#include "global/global_init.h" -#include "common/ceph_argparse.h" -#include "common/common_init.h" +#include "test/unit.h" #include "include/types.h" #include "common/hobject.h" @@ -960,18 +958,3 @@ #pragma GCC diagnostic pop #pragma GCC diagnostic warning "-Wpragmas" - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args); - cout << args << std::endl; - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/librados/misc.cc ceph-10.2.9/src/test/librados/misc.cc --- ceph-10.2.7/src/test/librados/misc.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librados/misc.cc 2017-07-13 13:05:37.000000000 +0000 @@ -9,8 +9,6 @@ #include "include/rados/librados.hpp" #include "include/stringify.h" #include "global/global_context.h" -#include "global/global_init.h" -#include "common/ceph_argparse.h" #include "common/common_init.h" #include "test/librados/test.h" #include "test/librados/TestCase.h" @@ -19,6 +17,8 @@ #include #include #include +#include "test/unit.h" + using namespace librados; using std::map; @@ -67,8 +67,18 @@ ASSERT_EQ(-ENOTCONN, rados_monitor_log(cluster, "error", test_rados_log_cb, NULL)); - ASSERT_NE(0, rados_connect(cluster)); - ASSERT_NE(0, rados_connect(cluster)); + // try this a few times; sometimes we don't schedule fast enough for the + // cond to time out + int r; + for (unsigned i=0; i<16; ++i) { + cout << i << std::endl; + r = rados_connect(cluster); + if (r < 0) + break; // yay, we timed out + // try again + rados_shutdown(cluster); + } + ASSERT_NE(0, r); rados_shutdown(cluster); } @@ -944,18 +954,3 @@ cout << "done waiting" << std::endl; } } - - - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/librados/tier.cc ceph-10.2.9/src/test/librados/tier.cc --- ceph-10.2.7/src/test/librados/tier.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librados/tier.cc 2017-07-13 13:05:37.000000000 +0000 @@ -10,13 +10,11 @@ #include "include/stringify.h" #include "include/types.h" #include "global/global_context.h" -#include "global/global_init.h" -#include "common/ceph_argparse.h" -#include "common/common_init.h" #include "common/Cond.h" #include "test/librados/test.h" #include "test/librados/TestCase.h" #include "json_spirit/json_spirit.h" +#include "test/unit.h" #include "osd/HitSet.h" @@ -5347,17 +5345,3 @@ // wait for maps to settle before next test cluster.wait_for_latest_osdmap(); } - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args), - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/librados/tmap_migrate.cc ceph-10.2.9/src/test/librados/tmap_migrate.cc --- ceph-10.2.7/src/test/librados/tmap_migrate.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librados/tmap_migrate.cc 2017-07-13 13:05:37.000000000 +0000 @@ -2,6 +2,7 @@ #include "include/rados/librados.hpp" #include "test/librados/test.h" #include "test/librados/TestCase.h" +#include "test/unit.h" #include "include/encoding.h" #include "tools/cephfs/DataScan.h" #include "global/global_init.h" @@ -16,10 +17,6 @@ typedef RadosTestPP TmapMigratePP; TEST_F(TmapMigratePP, DataScan) { - std::vector args; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - // DataScan isn't namespace-aware, so override RadosTestPP's default // behaviour of putting everything into a namespace ioctx.set_namespace(""); diff -Nru ceph-10.2.7/src/test/librados_test_stub/LibradosTestStub.cc ceph-10.2.9/src/test/librados_test_stub/LibradosTestStub.cc --- ceph-10.2.7/src/test/librados_test_stub/LibradosTestStub.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librados_test_stub/LibradosTestStub.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1062,7 +1062,7 @@ keys->insert(last_key); } } while (!vals.empty()); - return 0; + return keys->size(); } int cls_cxx_map_get_val(cls_method_context_t hctx, const string &key, @@ -1090,8 +1090,12 @@ std::map *vals) { librados::TestClassHandler::MethodContext *ctx = reinterpret_cast(hctx); - return ctx->io_ctx_impl->omap_get_vals(ctx->oid, start_obj, filter_prefix, - max_to_get, vals); + int r = ctx->io_ctx_impl->omap_get_vals(ctx->oid, start_obj, filter_prefix, + max_to_get, vals); + if (r < 0) { + return r; + } + return vals->size(); } int cls_cxx_map_remove_key(cls_method_context_t hctx, const string &key) { diff -Nru ceph-10.2.7/src/test/librbd/mock/MockExclusiveLock.h ceph-10.2.9/src/test/librbd/mock/MockExclusiveLock.h --- ceph-10.2.7/src/test/librbd/mock/MockExclusiveLock.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/mock/MockExclusiveLock.h 2017-07-13 13:05:37.000000000 +0000 @@ -15,8 +15,6 @@ struct MockExclusiveLock { MOCK_CONST_METHOD0(is_lock_owner, bool()); - MOCK_METHOD1(assert_header_locked, void(librados::ObjectWriteOperation *)); - MOCK_METHOD2(init, void(uint64_t features, Context*)); MOCK_METHOD1(shut_down, void(Context*)); diff -Nru ceph-10.2.7/src/test/librbd/object_map/test_mock_InvalidateRequest.cc ceph-10.2.9/src/test/librbd/object_map/test_mock_InvalidateRequest.cc --- ceph-10.2.7/src/test/librbd/object_map/test_mock_InvalidateRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/object_map/test_mock_InvalidateRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -56,9 +56,6 @@ AsyncRequest<> *request = new InvalidateRequest<>(*ictx, CEPH_NOSNAP, false, &cond_ctx); EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .WillOnce(DoDefault()); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(DoDefault()); @@ -86,9 +83,6 @@ &cond_ctx); EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .Times(0); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(DoDefault()); @@ -134,9 +128,6 @@ AsyncRequest<> *request = new InvalidateRequest<>(*ictx, CEPH_NOSNAP, false, &cond_ctx); EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .WillOnce(DoDefault()); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(Return(-EINVAL)); diff -Nru ceph-10.2.7/src/test/librbd/object_map/test_mock_ResizeRequest.cc ceph-10.2.9/src/test/librbd/object_map/test_mock_ResizeRequest.cc --- ceph-10.2.7/src/test/librbd/object_map/test_mock_ResizeRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/object_map/test_mock_ResizeRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -42,8 +42,6 @@ void expect_invalidate(librbd::ImageCtx *ictx) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)).Times(0); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(DoDefault()); } diff -Nru ceph-10.2.7/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc ceph-10.2.9/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc --- ceph-10.2.7/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/object_map/test_mock_SnapshotRemoveRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -64,9 +64,6 @@ void expect_invalidate(librbd::ImageCtx *ictx) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .Times(0); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(DoDefault()); } diff -Nru ceph-10.2.7/src/test/librbd/object_map/test_mock_SnapshotRollbackRequest.cc ceph-10.2.9/src/test/librbd/object_map/test_mock_SnapshotRollbackRequest.cc --- ceph-10.2.7/src/test/librbd/object_map/test_mock_SnapshotRollbackRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/object_map/test_mock_SnapshotRollbackRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -53,9 +53,6 @@ void expect_invalidate(librbd::ImageCtx *ictx, uint32_t times) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .Times(0); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .Times(times) .WillRepeatedly(DoDefault()); diff -Nru ceph-10.2.7/src/test/librbd/object_map/test_mock_UpdateRequest.cc ceph-10.2.9/src/test/librbd/object_map/test_mock_UpdateRequest.cc --- ceph-10.2.7/src/test/librbd/object_map/test_mock_UpdateRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/object_map/test_mock_UpdateRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -44,9 +44,6 @@ void expect_invalidate(librbd::ImageCtx *ictx) { EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), - exec(ictx->header_oid, _, StrEq("lock"), StrEq("assert_locked"), _, _, _)) - .Times(0); - EXPECT_CALL(get_mock_io_ctx(ictx->md_ctx), exec(ictx->header_oid, _, StrEq("rbd"), StrEq("set_flags"), _, _, _)) .WillOnce(DoDefault()); } diff -Nru ceph-10.2.7/src/test/librbd/operation/test_mock_ResizeRequest.cc ceph-10.2.9/src/test/librbd/operation/test_mock_ResizeRequest.cc --- ceph-10.2.7/src/test/librbd/operation/test_mock_ResizeRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/operation/test_mock_ResizeRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -101,9 +101,6 @@ .WillOnce(Return(r)); } else { expect_is_lock_owner(mock_image_ctx); - if (mock_image_ctx.exclusive_lock != nullptr) { - EXPECT_CALL(*mock_image_ctx.exclusive_lock, assert_header_locked(_)); - } EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx), exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq("set_size"), _, _, _)) .WillOnce(Return(r)); diff -Nru ceph-10.2.7/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc ceph-10.2.9/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc --- ceph-10.2.7/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/operation/test_mock_SnapshotCreateRequest.cc 2017-07-13 13:05:37.000000000 +0000 @@ -63,12 +63,6 @@ } void expect_snap_create(MockImageCtx &mock_image_ctx, int r) { - if (!mock_image_ctx.old_format && - mock_image_ctx.exclusive_lock != nullptr) { - EXPECT_CALL(*mock_image_ctx.exclusive_lock, assert_header_locked(_)) - .Times(r == -ESTALE ? 2 : 1); - } - auto &expect = EXPECT_CALL(get_mock_io_ctx(mock_image_ctx.md_ctx), exec(mock_image_ctx.header_oid, _, StrEq("rbd"), StrEq(mock_image_ctx.old_format ? "snap_add" : diff -Nru ceph-10.2.7/src/test/librbd/test_librbd.cc ceph-10.2.9/src/test/librbd/test_librbd.cc --- ceph-10.2.7/src/test/librbd/test_librbd.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librbd/test_librbd.cc 2017-07-13 13:05:37.000000000 +0000 @@ -179,6 +179,12 @@ ASSERT_NE("", m_pool_name = create_pool()); } + bool is_skip_partial_discard_enabled() { + std::string value; + EXPECT_EQ(0, _rados.conf_get("rbd_skip_partial_discard", value)); + return value == "true"; + } + void validate_object_map(rbd_image_t image, bool *passed) { uint64_t flags; ASSERT_EQ(0, rbd_get_flags(image, &flags)); @@ -1250,8 +1256,7 @@ rados_ioctx_t ioctx; rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); - CephContext* cct = reinterpret_cast(_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = is_skip_partial_discard_enabled(); rbd_image_t image; int order = 0; @@ -1329,8 +1334,7 @@ rados_ioctx_t ioctx; rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); - CephContext* cct = reinterpret_cast(_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = is_skip_partial_discard_enabled(); rbd_image_t image; int order = 0; @@ -1543,8 +1547,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = is_skip_partial_discard_enabled(); { librbd::RBD rbd; @@ -2578,8 +2581,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(this->_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = this->is_skip_partial_discard_enabled(); { librbd::RBD rbd; @@ -2729,8 +2731,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(this->_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = this->is_skip_partial_discard_enabled(); librbd::RBD rbd; librbd::Image image; @@ -2851,8 +2852,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(this->_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = this->is_skip_partial_discard_enabled(); librbd::RBD rbd; librbd::Image image; @@ -2903,8 +2903,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(this->_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = this->is_skip_partial_discard_enabled(); { librbd::RBD rbd; @@ -2936,8 +2935,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, this->_rados.ioctx_create(this->m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(this->_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = this->is_skip_partial_discard_enabled(); librbd::RBD rbd; librbd::Image image; @@ -3631,10 +3629,208 @@ ASSERT_PASSED(validate_object_map, image1); } +void memset_rand(char *buf, size_t len) { + for (size_t i = 0; i < len; ++i) { + buf[i] = (char) (rand() % (126 - 33) + 33); + } +} + TEST_F(TestLibRBD, Metadata) { REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + rados_ioctx_t ioctx; + rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); + + std::string name = get_temp_image_name(); + uint64_t size = 2 << 20; + int order = 0; + ASSERT_EQ(0, create_image(ioctx, name.c_str(), size, &order)); + + rbd_image_t image; + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + rbd_image_t image1; + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image1, NULL)); + + char keys[1024]; + char vals[1024]; + size_t keys_len = sizeof(keys); + size_t vals_len = sizeof(vals); + + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + + ASSERT_EQ(0, rbd_metadata_list(image, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(0U, keys_len); + ASSERT_EQ(0U, vals_len); + + char value[1024]; + size_t value_len = sizeof(value); + memset_rand(value, value_len); + + ASSERT_EQ(0, rbd_metadata_set(image1, "key1", "value1")); + ASSERT_EQ(0, rbd_metadata_set(image1, "key2", "value2")); + ASSERT_EQ(0, rbd_metadata_get(image1, "key1", value, &value_len)); + ASSERT_STREQ(value, "value1"); + value_len = 1; + ASSERT_EQ(-ERANGE, rbd_metadata_get(image1, "key1", value, &value_len)); + ASSERT_EQ(value_len, strlen("value1") + 1); + + ASSERT_EQ(-ERANGE, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + keys_len = sizeof(keys); + vals_len = sizeof(vals); + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key1") + 1 + strlen("key2") + 1); + ASSERT_EQ(vals_len, strlen("value1") + 1 + strlen("value2") + 1); + ASSERT_STREQ(keys, "key1"); + ASSERT_STREQ(keys + strlen(keys) + 1, "key2"); + ASSERT_STREQ(vals, "value1"); + ASSERT_STREQ(vals + strlen(vals) + 1, "value2"); + + ASSERT_EQ(0, rbd_metadata_remove(image1, "key1")); + ASSERT_EQ(0, rbd_metadata_remove(image1, "key3")); + value_len = sizeof(value); + ASSERT_EQ(-ENOENT, rbd_metadata_get(image1, "key3", value, &value_len)); + ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key2") + 1); + ASSERT_EQ(vals_len, strlen("value2") + 1); + ASSERT_STREQ(keys, "key2"); + ASSERT_STREQ(vals, "value2"); + + // test config setting + ASSERT_EQ(0, rbd_metadata_set(image1, "conf_rbd_cache", "false")); + ASSERT_EQ(-EINVAL, rbd_metadata_set(image1, "conf_rbd_cache", "INVALID_VAL")); + ASSERT_EQ(0, rbd_metadata_remove(image1, "conf_rbd_cache")); + + // test metadata with snapshot adding + ASSERT_EQ(0, rbd_snap_create(image1, "snap1")); + ASSERT_EQ(0, rbd_snap_protect(image1, "snap1")); + ASSERT_EQ(0, rbd_snap_set(image1, "snap1")); + + ASSERT_EQ(0, rbd_metadata_set(image1, "key1", "value1")); + ASSERT_EQ(0, rbd_metadata_set(image1, "key3", "value3")); + + keys_len = sizeof(keys); + vals_len = sizeof(vals); + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, + strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + 1); + ASSERT_EQ(vals_len, + strlen("value1") + 1 + strlen("value2") + 1 + strlen("value3") + 1); + ASSERT_STREQ(keys, "key1"); + ASSERT_STREQ(keys + strlen("key1") + 1, "key2"); + ASSERT_STREQ(keys + strlen("key1") + 1 + strlen("key2") + 1, "key3"); + ASSERT_STREQ(vals, "value1"); + ASSERT_STREQ(vals + strlen("value1") + 1, "value2"); + ASSERT_STREQ(vals + strlen("value1") + 1 + strlen("value2") + 1, "value3"); + + ASSERT_EQ(0, rbd_snap_set(image1, NULL)); + keys_len = sizeof(keys); + vals_len = sizeof(vals); + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, + strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + 1); + ASSERT_EQ(vals_len, + strlen("value1") + 1 + strlen("value2") + 1 + strlen("value3") + 1); + ASSERT_STREQ(keys, "key1"); + ASSERT_STREQ(keys + strlen("key1") + 1, "key2"); + ASSERT_STREQ(keys + strlen("key1") + 1 + strlen("key2") + 1, "key3"); + ASSERT_STREQ(vals, "value1"); + ASSERT_STREQ(vals + strlen("value1") + 1, "value2"); + ASSERT_STREQ(vals + strlen("value1") + 1 + strlen("value2") + 1, "value3"); + + // test metadata with cloning + uint64_t features; + ASSERT_EQ(0, rbd_get_features(image1, &features)); + + string cname = get_temp_image_name(); + EXPECT_EQ(0, rbd_clone(ioctx, name.c_str(), "snap1", ioctx, + cname.c_str(), features, &order)); + rbd_image_t image2; + ASSERT_EQ(0, rbd_open(ioctx, cname.c_str(), &image2, NULL)); + ASSERT_EQ(0, rbd_metadata_set(image2, "key4", "value4")); + + keys_len = sizeof(keys); + vals_len = sizeof(vals); + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image2, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + + 1 + strlen("key4") + 1); + ASSERT_EQ(vals_len, strlen("value1") + 1 + strlen("value2") + 1 + + strlen("value3") + 1 + strlen("value4") + 1); + ASSERT_STREQ(keys + strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + + 1, "key4"); + ASSERT_STREQ(vals + strlen("value1") + 1 + strlen("value2") + 1 + + strlen("value3") + 1, "value4"); + + ASSERT_EQ(0, rbd_metadata_list(image1, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, + strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + 1); + ASSERT_EQ(vals_len, + strlen("value1") + 1 + strlen("value2") + 1 + strlen("value3") + 1); + ASSERT_EQ(-ENOENT, rbd_metadata_get(image1, "key4", value, &value_len)); + + // test short buffer cases + keys_len = strlen("key1") + 1; + vals_len = strlen("value1") + 1; + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image2, "", 1, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key1") + 1); + ASSERT_EQ(vals_len, strlen("value1") + 1); + ASSERT_STREQ(keys, "key1"); + ASSERT_STREQ(vals, "value1"); + + ASSERT_EQ(-ERANGE, rbd_metadata_list(image2, "", 2, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key1") + 1 + strlen("key2") + 1); + ASSERT_EQ(vals_len, strlen("value1") + 1 + strlen("value2") + 1); + + ASSERT_EQ(-ERANGE, rbd_metadata_list(image2, "", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key1") + 1 + strlen("key2") + 1 + strlen("key3") + + 1 + strlen("key4") + 1); + ASSERT_EQ(vals_len, strlen("value1") + 1 + strlen("value2") + 1 + + strlen("value3") + 1 + strlen("value4") + 1); + + // test `start` param + keys_len = sizeof(keys); + vals_len = sizeof(vals); + memset_rand(keys, keys_len); + memset_rand(vals, vals_len); + ASSERT_EQ(0, rbd_metadata_list(image2, "key2", 0, keys, &keys_len, vals, + &vals_len)); + ASSERT_EQ(keys_len, strlen("key3") + 1 + strlen("key4") + 1); + ASSERT_EQ(vals_len, strlen("value3") + 1 + strlen("value4") + 1); + ASSERT_STREQ(keys, "key3"); + ASSERT_STREQ(vals, "value3"); + + ASSERT_EQ(0, rbd_close(image)); + ASSERT_EQ(0, rbd_close(image1)); + ASSERT_EQ(0, rbd_close(image2)); +} + +TEST_F(TestLibRBD, MetadataPP) +{ + REQUIRE_FEATURE(RBD_FEATURE_LAYERING); + librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); @@ -3893,8 +4089,7 @@ librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); - CephContext* cct = reinterpret_cast(_rados.cct()); - bool skip_discard = cct->_conf->rbd_skip_partial_discard; + bool skip_discard = is_skip_partial_discard_enabled(); librbd::RBD rbd; std::string name = get_temp_image_name(); @@ -4449,8 +4644,7 @@ TEST_F(TestLibRBD, DiscardAfterWrite) { - CephContext* cct = reinterpret_cast(_rados.cct()); - REQUIRE(!cct->_conf->rbd_skip_partial_discard); + REQUIRE(!is_skip_partial_discard_enabled()); librados::IoCtx ioctx; ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); diff -Nru ceph-10.2.7/src/test/librgw_file.cc ceph-10.2.9/src/test/librgw_file.cc --- ceph-10.2.7/src/test/librgw_file.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librgw_file.cc 2017-07-13 13:05:37.000000000 +0000 @@ -73,7 +73,8 @@ } extern "C" { - static bool r1_cb(const char* name, void *arg, uint64_t offset) { + static bool r1_cb(const char* name, void *arg, uint64_t offset, + uint32_t flags) { // don't need arg--it would point to fids1 fids1.push_back(fid_type(name, offset, nullptr /* handle */)); return true; /* XXX ? */ @@ -139,7 +140,8 @@ } extern "C" { - static bool r2_cb(const char* name, void *arg, uint64_t offset) { + static bool r2_cb(const char* name, void *arg, uint64_t offset, + uint32_t flags) { std::vector& obj_vector = *(static_cast*>(arg)); obj_vector.push_back(fid_type(name, offset, nullptr)); return true; /* XXX ? */ diff -Nru ceph-10.2.7/src/test/librgw_file_gp.cc ceph-10.2.9/src/test/librgw_file_gp.cc --- ceph-10.2.7/src/test/librgw_file_gp.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librgw_file_gp.cc 2017-07-13 13:05:37.000000000 +0000 @@ -191,7 +191,8 @@ } extern "C" { - static bool r2_cb(const char* name, void *arg, uint64_t offset) { + static bool r2_cb(const char* name, void *arg, uint64_t offset, + uint32_t flags) { // don't need arg--it would point to fids fids.push_back(fid_type(name, offset, nullptr)); return true; /* XXX ? */ diff -Nru ceph-10.2.7/src/test/librgw_file_nfsns.cc ceph-10.2.9/src/test/librgw_file_nfsns.cc --- ceph-10.2.7/src/test/librgw_file_nfsns.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/librgw_file_nfsns.cc 2017-07-13 13:05:37.000000000 +0000 @@ -853,7 +853,8 @@ } extern "C" { - static bool r1_cb(const char* name, void *arg, uint64_t offset) { + static bool r1_cb(const char* name, void *arg, uint64_t offset, + uint32_t flags) { struct rgw_file_handle* parent_fh = static_cast(arg); RGWFileHandle* rgw_fh = get_rgwfh(parent_fh); @@ -861,6 +862,7 @@ << " bucket=" << rgw_fh->bucket_name() << " dir=" << rgw_fh->full_object_name() << " called back name=" << name + << " flags=" << flags << dendl; string name_str{name}; if (! ((name_str == ".") || @@ -1008,7 +1010,8 @@ } extern "C" { - static bool r2_cb(const char* name, void *arg, uint64_t offset) { + static bool r2_cb(const char* name, void *arg, uint64_t offset, + uint32_t flags) { dirent_vec& dvec = *(static_cast(arg)); lsubdout(cct, rgw, 10) << __func__ @@ -1016,6 +1019,7 @@ << " dir=" << marker_dir << " iv count=" << dvec.count << " called back name=" << name + << " flags=" << flags << dendl; string name_str{name}; if (! ((name_str == ".") || diff -Nru ceph-10.2.7/src/test/mds/TestMDSAuthCaps.cc ceph-10.2.9/src/test/mds/TestMDSAuthCaps.cc --- ceph-10.2.7/src/test/mds/TestMDSAuthCaps.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/mds/TestMDSAuthCaps.cc 2017-07-13 13:05:37.000000000 +0000 @@ -16,9 +16,7 @@ #include "include/stringify.h" #include "mds/MDSAuthCaps.h" -#include "common/ceph_argparse.h" -#include "common/common_init.h" -#include "global/global_init.h" +#include "test/unit.h" #include "gtest/gtest.h" @@ -244,17 +242,3 @@ ASSERT_EQ(test_values[i].output, stringify(cap)); } } - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args, NULL); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/messenger/simple_client.cc ceph-10.2.9/src/test/messenger/simple_client.cc --- ceph-10.2.7/src/test/messenger/simple_client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/messenger/simple_client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -69,8 +69,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_ANY, CODE_ENVIRONMENT_UTILITY, - 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_ANY, + CODE_ENVIRONMENT_UTILITY, + 0); for (arg_iter = args.begin(); arg_iter != args.end();) { if (ceph_argparse_witharg(args, arg_iter, &val, "--addr", diff -Nru ceph-10.2.7/src/test/messenger/simple_server.cc ceph-10.2.9/src/test/messenger/simple_server.cc --- ceph-10.2.7/src/test/messenger/simple_server.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/messenger/simple_server.cc 2017-07-13 13:05:37.000000000 +0000 @@ -52,8 +52,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_ANY, CODE_ENVIRONMENT_DAEMON, - 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_ANY, + CODE_ENVIRONMENT_DAEMON, + 0); for (arg_iter = args.begin(); arg_iter != args.end();) { if (ceph_argparse_witharg(args, arg_iter, &val, "--addr", diff -Nru ceph-10.2.7/src/test/messenger/xio_client.cc ceph-10.2.9/src/test/messenger/xio_client.cc --- ceph-10.2.7/src/test/messenger/xio_client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/messenger/xio_client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -75,8 +75,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, - CEPH_ENTITY_TYPE_ANY, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_ANY, + CODE_ENVIRONMENT_UTILITY, 0); for (arg_iter = args.begin(); arg_iter != args.end();) { if (ceph_argparse_witharg(args, arg_iter, &val, "--addr", diff -Nru ceph-10.2.7/src/test/mon/PGMap.cc ceph-10.2.9/src/test/mon/PGMap.cc --- ceph-10.2.7/src/test/mon/PGMap.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/mon/PGMap.cc 2017-07-13 13:05:37.000000000 +0000 @@ -14,9 +14,8 @@ #include "mon/PGMap.h" #include "gtest/gtest.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "global/global_context.h" +#include "include/stringify.h" +#include "test/unit.h" TEST(pgmap, min_last_epoch_clean) { @@ -115,15 +114,3 @@ ASSERT_EQ(123u, pg_map.get_min_last_epoch_clean()); } } - -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args); - - vector def_args; - global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/mon/test-mon-msg.cc ceph-10.2.9/src/test/mon/test-mon-msg.cc --- ceph-10.2.7/src/test/mon/test-mon-msg.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/mon/test-mon-msg.cc 2017-07-13 13:05:37.000000000 +0000 @@ -321,13 +321,12 @@ int main(int argc, char *argv[]) { - vector def_args; vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(&def_args, args, - CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - 0); + auto cct = global_init(nullptr, args, + CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); ::testing::InitGoogleTest(&argc, argv); diff -Nru ceph-10.2.7/src/test/mon/test_mon_workloadgen.cc ceph-10.2.9/src/test/mon/test_mon_workloadgen.cc --- ceph-10.2.7/src/test/mon/test_mon_workloadgen.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/mon/test_mon_workloadgen.cc 2017-07-13 13:05:37.000000000 +0000 @@ -995,9 +995,9 @@ our_name = argv[0]; argv_to_vec(argc, argv, args); - global_init(&def_args, args, - CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY, - 0); + auto cct = global_init(&def_args, args, + CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/msgr/perf_msgr_client.cc ceph-10.2.9/src/test/msgr/perf_msgr_client.cc --- ceph-10.2.7/src/test/msgr/perf_msgr_client.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/msgr/perf_msgr_client.cc 2017-07-13 13:05:37.000000000 +0000 @@ -172,7 +172,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/msgr/perf_msgr_server.cc ceph-10.2.9/src/test/msgr/perf_msgr_server.cc --- ceph-10.2.7/src/test/msgr/perf_msgr_server.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/msgr/perf_msgr_server.cc 2017-07-13 13:05:37.000000000 +0000 @@ -144,7 +144,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/msgr/test_async_driver.cc ceph-10.2.9/src/test/msgr/test_async_driver.cc --- ceph-10.2.7/src/test/msgr/test_async_driver.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/msgr/test_async_driver.cc 2017-07-13 13:05:37.000000000 +0000 @@ -52,9 +52,11 @@ #include "msg/async/EventKqueue.h" #endif #include "msg/async/EventSelect.h" +#include "test/unit.h" #include + #if GTEST_HAS_PARAM_TEST class EventDriverTest : public ::testing::TestWithParam { @@ -349,17 +351,6 @@ #endif -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: * compile-command: "cd ../.. ; make ceph_test_async_driver && diff -Nru ceph-10.2.7/src/test/msgr/test_msgr.cc ceph-10.2.9/src/test/msgr/test_msgr.cc --- ceph-10.2.7/src/test/msgr/test_msgr.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/msgr/test_msgr.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1432,7 +1432,7 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); g_ceph_context->_conf->set_val("auth_cluster_required", "none"); g_ceph_context->_conf->set_val("auth_service_required", "none"); g_ceph_context->_conf->set_val("auth_client_required", "none"); diff -Nru ceph-10.2.7/src/test/ObjectMap/CMakeLists.txt ceph-10.2.9/src/test/ObjectMap/CMakeLists.txt --- ceph-10.2.7/src/test/ObjectMap/CMakeLists.txt 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/ObjectMap/CMakeLists.txt 2017-07-13 13:05:37.000000000 +0000 @@ -5,6 +5,7 @@ ) set_target_properties(ceph_test_object_map PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS}) +add_ceph_unittest(ceph_test_object_map ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ceph_test_object_map) target_link_libraries(ceph_test_object_map os common diff -Nru ceph-10.2.7/src/test/ObjectMap/test_keyvaluedb_iterators.cc ceph-10.2.9/src/test/ObjectMap/test_keyvaluedb_iterators.cc --- ceph-10.2.7/src/test/ObjectMap/test_keyvaluedb_iterators.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/ObjectMap/test_keyvaluedb_iterators.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1786,7 +1786,7 @@ vector args; argv_to_vec(argc, (const char **) argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); diff -Nru ceph-10.2.7/src/test/ObjectMap/test_object_map.cc ceph-10.2.9/src/test/ObjectMap/test_object_map.cc --- ceph-10.2.7/src/test/ObjectMap/test_object_map.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/ObjectMap/test_object_map.cc 2017-07-13 13:05:37.000000000 +0000 @@ -73,6 +73,17 @@ db->set_keys(hoid, to_write); } + void set_keys(ghobject_t hoid, const map &to_set) { + map to_write; + for (auto &&i: to_set) { + bufferptr bp(i.second.data(), i.second.size()); + bufferlist bl; + bl.append(bp); + to_write.insert(make_pair(i.first, bl)); + } + db->set_keys(hoid, to_write); + } + void set_xattr(ghobject_t hoid, string key, string value) { map to_write; @@ -145,8 +156,10 @@ map got; db->get_values(hoid, to_get, &got); if (!got.empty()) { - *value = string(got.begin()->second.c_str(), - got.begin()->second.length()); + if (value) { + *value = string(got.begin()->second.c_str(), + got.begin()->second.length()); + } return 1; } else { return 0; @@ -158,6 +171,11 @@ key); } + void remove_keys(const string &objname, const set &to_remove) { + remove_keys(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + to_remove); + } + void remove_key(ghobject_t hoid, string key) { set to_remove; @@ -165,6 +183,11 @@ db->rm_keys(hoid, to_remove); } + void remove_keys(ghobject_t hoid, + const set &to_remove) { + db->rm_keys(hoid, to_remove); + } + void remove_xattr(const string &objname, const string &key) { remove_xattr(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), key); @@ -187,10 +210,30 @@ db->clone(hoid, hoid2); } + void rename(const string &objname, const string &target) { + rename(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + ghobject_t(hobject_t(sobject_t(target, CEPH_NOSNAP)))); + } + + void rename(ghobject_t hoid, + ghobject_t hoid2) { + db->rename(hoid, hoid2); + } + void clear(const string &objname) { clear(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP)))); } + void legacy_clone(const string &objname, const string &target) { + legacy_clone(ghobject_t(hobject_t(sobject_t(objname, CEPH_NOSNAP))), + ghobject_t(hobject_t(sobject_t(target, CEPH_NOSNAP)))); + } + + void legacy_clone(ghobject_t hoid, + ghobject_t hoid2) { + db->legacy_clone(hoid, hoid2); + } + void clear(ghobject_t hoid) { db->clear(hoid); } @@ -204,10 +247,10 @@ } void def_init() { - for (unsigned i = 0; i < 1000; ++i) { + for (unsigned i = 0; i < 10000; ++i) { key_space.insert("key_" + num_str(i)); } - for (unsigned i = 0; i < 1000; ++i) { + for (unsigned i = 0; i < 100; ++i) { object_name_space.insert("name_" + num_str(i)); } } @@ -233,17 +276,35 @@ << value << std::endl; } - void auto_set_key(ostream &out) { - set::iterator key = rand_choose(key_space); + void test_set_key(const string &obj, const string &key, const string &val) { + omap[obj][key] = val; + set_key(obj, key, val); + } + + void test_set_keys(const string &obj, const map &to_set) { + for (auto &&i: to_set) { + omap[obj][i.first] = i.second; + } + set_keys( + ghobject_t(hobject_t(sobject_t(obj, CEPH_NOSNAP))), + to_set); + } + + void auto_set_keys(ostream &out) { set::iterator object = rand_choose(object_name_space); - string value = val_from_key(*object, *key); + map to_set; + unsigned amount = (rand() % 10) + 1; + for (unsigned i = 0; i < amount; ++i) { + set::iterator key = rand_choose(key_space); + string value = val_from_key(*object, *key); + out << "auto_set_key " << *object << ": " << *key << " -> " + << value << std::endl; + to_set.insert(make_pair(*key, value)); + } - omap[*object][*key] = value; - set_key(*object, *key, value); - out << "auto_set_key " << *object << ": " << *key << " -> " - << value << std::endl; + test_set_keys(*object, to_set); } void xattrs_on_object(const string &object, set *out) { @@ -403,48 +464,65 @@ return 0; } - void auto_clone_key(ostream &out) { - set::iterator object = rand_choose(object_name_space); - set::iterator target = rand_choose(object_name_space); - while (target == object) { - target = rand_choose(object_name_space); - } - out << "clone " << *object << " to " << *target; - clone(*object, *target); - if (!omap.count(*object)) { + void test_clone(const string &object, const string &target, ostream &out) { + clone(object, target); + if (!omap.count(object)) { out << " source missing."; - omap.erase(*target); + omap.erase(target); } else { out << " source present."; - omap[*target] = omap[*object]; + omap[target] = omap[object]; } - if (!hmap.count(*object)) { + if (!hmap.count(object)) { out << " hmap source missing." << std::endl; - hmap.erase(*target); + hmap.erase(target); } else { out << " hmap source present." << std::endl; - hmap[*target] = hmap[*object]; + hmap[target] = hmap[object]; } - if (!xattrs.count(*object)) { + if (!xattrs.count(object)) { out << " hmap source missing." << std::endl; - xattrs.erase(*target); + xattrs.erase(target); } else { out << " hmap source present." << std::endl; - xattrs[*target] = xattrs[*object]; + xattrs[target] = xattrs[object]; } } - void auto_remove_key(ostream &out) { + void auto_clone_key(ostream &out) { + set::iterator object = rand_choose(object_name_space); + set::iterator target = rand_choose(object_name_space); + while (target == object) { + target = rand_choose(object_name_space); + } + out << "clone " << *object << " to " << *target; + test_clone(*object, *target, out); + } + + void test_remove_keys(const string &obj, const set &to_remove) { + for (auto &&k: to_remove) + omap[obj].erase(k); + remove_keys(obj, to_remove); + } + + void test_remove_key(const string &obj, const string &key) { + omap[obj].erase(key); + remove_key(obj, key); + } + + void auto_remove_keys(ostream &out) { set::iterator object = rand_choose(object_name_space); set kspace; keys_on_object(*object, &kspace); - set::iterator key = rand_choose(kspace); - if (key == kspace.end()) { - return; + set to_remove; + for (unsigned i = 0; i < 3; ++i) { + set::iterator key = rand_choose(kspace); + if (key == kspace.end()) + continue; + out << "removing " << *key << " from " << *object << std::endl; + to_remove.insert(*key); } - out << "removing " << *key << " from " << *object << std::endl; - omap[*object].erase(*key); - remove_key(*object, *key); + test_remove_keys(*object, to_remove); } void auto_remove_xattr(ostream &out) { @@ -469,12 +547,16 @@ xattrs.erase(*object); } + void test_clear(const string &obj) { + clear_omap(obj); + omap.erase(obj); + hmap.erase(obj); + } + void auto_clear_omap(ostream &out) { set::iterator object = rand_choose(object_name_space); out << "auto_clear_object " << *object << std::endl; - clear_omap(*object); - omap.erase(*object); - hmap.erase(*object); + test_clear(*object); } void auto_write_header(ostream &out) { @@ -516,6 +598,37 @@ return 0; } } + + void verify_keys(const std::string &obj, ostream &out) { + set in_db; + ObjectMap::ObjectMapIterator iter = db->get_iterator( + ghobject_t(hobject_t(sobject_t(obj, CEPH_NOSNAP)))); + for (iter->seek_to_first(); iter->valid(); iter->next()) { + in_db.insert(iter->key()); + } + bool err = false; + for (auto &&i: omap[obj]) { + if (!in_db.count(i.first)) { + out << __func__ << ": obj " << obj << " missing key " + << i.first << std::endl; + err = true; + } else { + in_db.erase(i.first); + } + } + if (!in_db.empty()) { + out << __func__ << ": obj " << obj << " found extra keys " + << in_db << std::endl; + err = true; + } + ASSERT_FALSE(err); + } + + void auto_verify_objects(ostream &out) { + for (auto &&i: omap) { + verify_keys(i.first, out); + } + } }; class ObjectMapTest : public ::testing::Test { @@ -542,7 +655,7 @@ virtual void TearDown() { std::cerr << "Checking..." << std::endl; - assert(db->check(std::cerr)); + ASSERT_EQ(0, db->check(std::cerr)); } }; @@ -551,7 +664,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); @@ -744,6 +858,128 @@ db->clear(hoid2); } +TEST_F(ObjectMapTest, Rename) { + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP))); + + for (unsigned i = 0; i < 1000; ++i) { + tester.set_key(hoid, "foo" + num_str(i), "bar" + num_str(i)); + } + + db->rename(hoid, hoid2); + // Verify rename where target exists + db->clone(hoid2, hoid); + db->rename(hoid, hoid2); + + int r = 0; + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + + if (i % 2) { + tester.remove_key(hoid2, "foo" + num_str(i)); + } + } + + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + if (i % 2) { + ASSERT_EQ(0, r); + } else { + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + } + } + + { + ObjectMap::ObjectMapIterator iter = db->get_iterator(hoid2); + iter->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (!(i % 2)) { + ASSERT_TRUE(iter->valid()); + ASSERT_EQ("foo" + num_str(i), iter->key()); + iter->next(); + } + } + } + + db->clear(hoid2); +} + +TEST_F(ObjectMapTest, OddEvenOldClone) { + ghobject_t hoid(hobject_t(sobject_t("foo", CEPH_NOSNAP))); + ghobject_t hoid2(hobject_t(sobject_t("foo2", CEPH_NOSNAP))); + + for (unsigned i = 0; i < 1000; ++i) { + tester.set_key(hoid, "foo" + num_str(i), "bar" + num_str(i)); + } + + db->legacy_clone(hoid, hoid2); + + int r = 0; + for (unsigned i = 0; i < 1000; ++i) { + string result; + r = tester.get_key(hoid, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + r = tester.get_key(hoid2, "foo" + num_str(i), &result); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + + if (i % 2) { + tester.remove_key(hoid, "foo" + num_str(i)); + } else { + tester.remove_key(hoid2, "foo" + num_str(i)); + } + } + + for (unsigned i = 0; i < 1000; ++i) { + string result; + string result2; + r = tester.get_key(hoid, "foo" + num_str(i), &result); + int r2 = tester.get_key(hoid2, "foo" + num_str(i), &result2); + if (i % 2) { + ASSERT_EQ(0, r); + ASSERT_EQ(1, r2); + ASSERT_EQ("bar" + num_str(i), result2); + } else { + ASSERT_EQ(0, r2); + ASSERT_EQ(1, r); + ASSERT_EQ("bar" + num_str(i), result); + } + } + + { + ObjectMap::ObjectMapIterator iter = db->get_iterator(hoid); + iter->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (!(i % 2)) { + ASSERT_TRUE(iter->valid()); + ASSERT_EQ("foo" + num_str(i), iter->key()); + iter->next(); + } + } + } + + { + ObjectMap::ObjectMapIterator iter2 = db->get_iterator(hoid2); + iter2->seek_to_first(); + for (unsigned i = 0; i < 1000; ++i) { + if (i % 2) { + ASSERT_TRUE(iter2->valid()); + ASSERT_EQ("foo" + num_str(i), iter2->key()); + iter2->next(); + } + } + } + + db->clear(hoid); + db->clear(hoid2); +} + TEST_F(ObjectMapTest, RandomTest) { tester.def_init(); for (unsigned i = 0; i < 5000; ++i) { @@ -759,7 +995,7 @@ } else if (val < 14) { ASSERT_TRUE(tester.auto_verify_header(std::cerr)); } else if (val < 30) { - tester.auto_set_key(std::cerr); + tester.auto_set_keys(std::cerr); } else if (val < 42) { tester.auto_set_xattr(std::cerr); } else if (val < 55) { @@ -779,7 +1015,117 @@ } else if (val < 92) { tester.auto_remove_xattr(std::cerr); } else { - tester.auto_remove_key(std::cerr); + tester.auto_remove_keys(std::cerr); + } + + if (i % 500) { + tester.auto_verify_objects(std::cerr); + } + } +} + +TEST_F(ObjectMapTest, RandomTestNoDeletesXattrs) { + tester.def_init(); + for (unsigned i = 0; i < 5000; ++i) { + unsigned val = rand(); + val <<= 8; + val %= 100; + if (!(i%100)) + std::cout << "on op " << i + << " val is " << val << std::endl; + + if (val < 45) { + tester.auto_set_keys(std::cerr); + } else if (val < 90) { + tester.auto_remove_keys(std::cerr); + } else { + tester.auto_clone_key(std::cerr); + } + + if (i % 500) { + tester.auto_verify_objects(std::cerr); } } } + +string num_to_key(unsigned i) { + char buf[100]; + int ret = snprintf(buf, sizeof(buf), "%010u", i); + assert(ret > 0); + return string(buf, ret); +} + +TEST_F(ObjectMapTest, TestMergeNewCompleteContainBug) { + /* This test exploits a bug in kraken and earlier where merge_new_complete + * could miss complete entries fully contained by a new entry. To get this + * to actually result in an incorrect return value, you need to remove at + * least two values, one before a complete region, and one which occurs in + * the parent after the complete region (but within 20 not yet completed + * parent points of the first value). + */ + for (unsigned i = 10; i < 160; i+=2) { + tester.test_set_key("foo", num_to_key(i), "asdf"); + } + tester.test_clone("foo", "foo2", std::cout); + tester.test_clear("foo"); + + tester.test_set_key("foo2", num_to_key(15), "asdf"); + tester.test_set_key("foo2", num_to_key(13), "asdf"); + tester.test_set_key("foo2", num_to_key(57), "asdf"); + + tester.test_remove_key("foo2", num_to_key(15)); + + set to_remove; + to_remove.insert(num_to_key(13)); + to_remove.insert(num_to_key(58)); + to_remove.insert(num_to_key(60)); + to_remove.insert(num_to_key(62)); + tester.test_remove_keys("foo2", to_remove); + + tester.verify_keys("foo2", std::cout); + ASSERT_EQ(tester.get_key("foo2", num_to_key(10), nullptr), 1); + ASSERT_EQ(tester.get_key("foo2", num_to_key(1), nullptr), 0); + ASSERT_EQ(tester.get_key("foo2", num_to_key(56), nullptr), 1); + // this one triggers the bug + ASSERT_EQ(tester.get_key("foo2", num_to_key(58), nullptr), 0); +} + +TEST_F(ObjectMapTest, TestIterateBug18533) { + /* This test starts with the one immediately above to create a pair of + * complete regions where one contains the other. Then, it deletes the + * key at the start of the contained region. The logic in next_parent() + * skips ahead to the end of the contained region, and we start copying + * values down again from the parent into the child -- including some + * that had actually been deleted. I think this works for any removal + * within the outer complete region after the start of the contained + * region. + */ + for (unsigned i = 10; i < 160; i+=2) { + tester.test_set_key("foo", num_to_key(i), "asdf"); + } + tester.test_clone("foo", "foo2", std::cout); + tester.test_clear("foo"); + + tester.test_set_key("foo2", num_to_key(15), "asdf"); + tester.test_set_key("foo2", num_to_key(13), "asdf"); + tester.test_set_key("foo2", num_to_key(57), "asdf"); + tester.test_set_key("foo2", num_to_key(91), "asdf"); + + tester.test_remove_key("foo2", num_to_key(15)); + + set to_remove; + to_remove.insert(num_to_key(13)); + to_remove.insert(num_to_key(58)); + to_remove.insert(num_to_key(60)); + to_remove.insert(num_to_key(62)); + to_remove.insert(num_to_key(82)); + to_remove.insert(num_to_key(84)); + tester.test_remove_keys("foo2", to_remove); + + //tester.test_remove_key("foo2", num_to_key(15)); also does the trick + tester.test_remove_key("foo2", num_to_key(80)); + + // the iterator in verify_keys will return an extra value + tester.verify_keys("foo2", std::cout); +} + diff -Nru ceph-10.2.7/src/test/objectstore/chain_xattr.cc ceph-10.2.9/src/test/objectstore/chain_xattr.cc --- ceph-10.2.7/src/test/objectstore/chain_xattr.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/chain_xattr.cc 2017-07-13 13:05:37.000000000 +0000 @@ -377,7 +377,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val("err_to_stderr", "false"); g_ceph_context->_conf->set_val("log_to_stderr", "false"); diff -Nru ceph-10.2.7/src/test/objectstore/ObjectStoreTransactionBenchmark.cc ceph-10.2.9/src/test/objectstore/ObjectStoreTransactionBenchmark.cc --- ceph-10.2.7/src/test/objectstore/ObjectStoreTransactionBenchmark.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/ObjectStoreTransactionBenchmark.cc 2017-07-13 13:05:37.000000000 +0000 @@ -246,7 +246,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); Cycles::init(); diff -Nru ceph-10.2.7/src/test/objectstore/store_test.cc ceph-10.2.9/src/test/objectstore/store_test.cc --- ceph-10.2.7/src/test/objectstore/store_test.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/store_test.cc 2017-07-13 13:05:37.000000000 +0000 @@ -3676,7 +3676,8 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val("osd_journal_size", "400"); g_ceph_context->_conf->set_val("filestore_index_retry_probability", "0.5"); @@ -3694,9 +3695,7 @@ g_ceph_context->_conf->apply_changes(NULL); ::testing::InitGoogleTest(&argc, argv); - int r = RUN_ALL_TESTS(); - g_ceph_context->put(); - return r; + return RUN_ALL_TESTS(); } /* diff -Nru ceph-10.2.7/src/test/objectstore/test_bluefs.cc ceph-10.2.9/src/test/objectstore/test_bluefs.cc --- ceph-10.2.7/src/test/objectstore/test_bluefs.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/test_bluefs.cc 2017-07-13 13:05:37.000000000 +0000 @@ -136,7 +136,13 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + vector def_args; + def_args.push_back("--debug-bluefs=1/20"); + def_args.push_back("--debug-bdev=1/20"); + + auto cct = global_init(&def_args, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val( "enable_experimental_unrecoverable_data_corrupting_features", diff -Nru ceph-10.2.7/src/test/objectstore/test_idempotent.cc ceph-10.2.9/src/test/objectstore/test_idempotent.cc --- ceph-10.2.7/src/test/objectstore/test_idempotent.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/test_idempotent.cc 2017-07-13 13:05:37.000000000 +0000 @@ -45,7 +45,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/objectstore/test_idempotent_sequence.cc ceph-10.2.9/src/test/objectstore/test_idempotent_sequence.cc --- ceph-10.2.7/src/test/objectstore/test_idempotent_sequence.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/test_idempotent_sequence.cc 2017-07-13 13:05:37.000000000 +0000 @@ -203,9 +203,9 @@ our_name = argv[0]; argv_to_vec(argc, argv, args); - global_init(&def_args, args, - CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(&def_args, args, + CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/objectstore/test_kv.cc ceph-10.2.9/src/test/objectstore/test_kv.cc --- ceph-10.2.7/src/test/objectstore/test_kv.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/test_kv.cc 2017-07-13 13:05:37.000000000 +0000 @@ -182,7 +182,8 @@ argv_to_vec(argc, (const char **)argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->set_val( "enable_experimental_unrecoverable_data_corrupting_features", diff -Nru ceph-10.2.7/src/test/objectstore/TestRocksdbOptionParse.cc ceph-10.2.9/src/test/objectstore/TestRocksdbOptionParse.cc --- ceph-10.2.7/src/test/objectstore/TestRocksdbOptionParse.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/TestRocksdbOptionParse.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,12 +1,12 @@ #include #include "include/Context.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/thread_status.h" #include "kv/RocksDBStore.h" +#include "test/unit.h" #include + using namespace std; const string dir("store_test_temp_dir"); @@ -73,13 +73,3 @@ //high pri threads is flusher_threads ASSERT_EQ(5, num_high_pri_threads); } - -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/objectstore/workload_generator.cc ceph-10.2.9/src/test/objectstore/workload_generator.cc --- ceph-10.2.7/src/test/objectstore/workload_generator.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore/workload_generator.cc 2017-07-13 13:05:37.000000000 +0000 @@ -568,9 +568,9 @@ // def_args.push_back("workload_gen_dir/journal"); argv_to_vec(argc, argv, args); - global_init(&def_args, args, - CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(&def_args, args, + CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); diff -Nru ceph-10.2.7/src/test/objectstore_bench.cc ceph-10.2.9/src/test/objectstore_bench.cc --- ceph-10.2.7/src/test/objectstore_bench.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/objectstore_bench.cc 2017-07-13 13:05:37.000000000 +0000 @@ -155,7 +155,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_OSD, + CODE_ENVIRONMENT_UTILITY, 0); std::string val; vector::iterator i = args.begin(); diff -Nru ceph-10.2.7/src/test/opensuse-13.2/ceph.spec.in ceph-10.2.9/src/test/opensuse-13.2/ceph.spec.in --- ceph-10.2.7/src/test/opensuse-13.2/ceph.spec.in 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/opensuse-13.2/ceph.spec.in 2017-07-13 13:05:36.000000000 +0000 @@ -14,7 +14,7 @@ # # Please submit bugfixes or comments via http://tracker.ceph.com/ # -%bcond_with ocf +%bcond_without ocf %bcond_without cephfs_java %bcond_with tests %bcond_with xio @@ -214,6 +214,7 @@ Requires: hdparm Requires: cryptsetup Requires: findutils +Requires: psmisc Requires: which %if 0%{?suse_version} Recommends: ntp-daemon @@ -667,11 +668,13 @@ --without-lttng \ --without-babeltrace \ %endif - $CEPH_EXTRA_CONFIGURE_ARGS \ - %{?_with_ocf} \ +%if 0%{with ocf} + --with-ocf \ +%endif %if %{without tcmalloc} --without-tcmalloc \ %endif + $CEPH_EXTRA_CONFIGURE_ARGS \ CFLAGS="$RPM_OPT_FLAGS" CXXFLAGS="$RPM_OPT_FLAGS" %if %{with lowmem_builder} diff -Nru ceph-10.2.7/src/test/os/TestLFNIndex.cc ceph-10.2.9/src/test/os/TestLFNIndex.cc --- ceph-10.2.7/src/test/os/TestLFNIndex.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/os/TestLFNIndex.cc 2017-07-13 13:05:37.000000000 +0000 @@ -450,7 +450,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); diff -Nru ceph-10.2.7/src/test/osd/osd-scrub-repair.sh ceph-10.2.9/src/test/osd/osd-scrub-repair.sh --- ceph-10.2.7/src/test/osd/osd-scrub-repair.sh 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/osd/osd-scrub-repair.sh 2017-07-13 13:05:37.000000000 +0000 @@ -403,6 +403,19 @@ rados --pool $poolname setomapheader $objname hdr-$objname || return 1 rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1 + done + + local pg=$(get_pg $poolname ROBJ0) + + # Compute an old omap digest and save oi + CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \ + config set osd_deep_scrub_update_digest_min_age 0 + CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \ + config set osd_deep_scrub_update_digest_min_age 0 + pg_deep_scrub $pg + + for i in $(seq 1 $total_objs) ; do + objname=ROBJ${i} # Alternate corruption between osd.0 and osd.1 local osd=$(expr $i % 2) @@ -533,7 +546,7 @@ "osd": 1 } ], - "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(16'3 client.4130.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5)", + "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6)", "union_shard_errors": [ "size_mismatch_oi" ], @@ -562,13 +575,13 @@ "osd": 1 } ], - "selected_object_info": "2:bc819597:::ROBJ12:head(98'39 client.4320.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5)", + "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 39, + "version": 36, "snap": "head", "locator": "", "nspace": "", @@ -589,13 +602,13 @@ "osd": 1 } ], - "selected_object_info": "2:d60617f9:::ROBJ13:head(100'42 client.4325.0:1 dirty|omap|data_digest s 7 uv 42 dd 2ddbf8f5)", + "selected_object_info": "2:d60617f9:::ROBJ13:head(47'55 osd.0.0:54 dirty|omap|data_digest|omap_digest s 7 uv 39 dd 2ddbf8f5 od 6441854d)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 42, + "version": 39, "snap": "head", "locator": "", "nspace": "", @@ -666,7 +679,7 @@ "osd": 1 } ], - "selected_object_info": "2:30259878:::ROBJ15:head(113'48 client.4357.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5)", + "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e)", "union_shard_errors": [ "oi_attr_missing" ], @@ -674,7 +687,7 @@ "attr_name_mismatch" ], "object": { - "version": 48, + "version": 45, "snap": "head", "locator": "", "nspace": "", @@ -695,7 +708,7 @@ "osd": 1 } ], - "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(30'9 client.4162.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5)", + "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd)", "union_shard_errors": [ "missing" ], @@ -765,14 +778,14 @@ "osd": 1 } ], - "selected_object_info": "2:86586531:::ROBJ8:head(65'26 client.4244.0:1 dirty|omap|data_digest s 7 uv 26 dd 2ddbf8f5)", + "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc)", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 26, + "version": 62, "snap": "head", "locator": "", "nspace": "", @@ -794,7 +807,7 @@ "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(87'30 client.4294.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d)", + "object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539)", "size": 1, "errors": [], "osd": 0 @@ -812,20 +825,20 @@ "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(82'29 client.4282.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5)", + "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539)", "size": 1, "errors": [], "osd": 1 } ], - "selected_object_info": "2:ffdb2004:::ROBJ9:head(87'30 client.4294.0:1 dirty|omap|data_digest s 1 uv 30 dd 2b63260d)", + "selected_object_info": "2:ffdb2004:::ROBJ9:head(102'63 client.4433.0:1 dirty|omap|data_digest|omap_digest s 1 uv 63 dd 2b63260d od 2eecc539)", "union_shard_errors": [], "errors": [ "object_info_inconsistency", "attr_value_mismatch" ], "object": { - "version": 30, + "version": 63, "snap": "head", "locator": "", "nspace": "", @@ -849,13 +862,6 @@ jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1 fi - # Compute an old omap digest and save oi - CEPH_ARGS='' ceph daemon $dir//ceph-osd.0.asok \ - config set osd_deep_scrub_update_digest_min_age 0 - CEPH_ARGS='' ceph daemon $dir//ceph-osd.1.asok \ - config set osd_deep_scrub_update_digest_min_age 0 - pg_deep_scrub $pg - objname=ROBJ9 # Change data and size again because digest was recomputed echo -n ZZZ > $dir/change @@ -916,7 +922,7 @@ "osd": 1 } ], - "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(16'3 client.4130.0:1 dirty|omap|data_digest s 7 uv 3 dd 2ddbf8f5)", + "selected_object_info": "2:ce3f1d6a:::ROBJ1:head(47'54 osd.0.0:53 dirty|omap|data_digest|omap_digest s 7 uv 3 dd 2ddbf8f5 od f5fba2c6)", "union_shard_errors": [ "data_digest_mismatch_oi", "size_mismatch_oi" @@ -954,13 +960,13 @@ "osd": 1 } ], - "selected_object_info": "2:b1f19cbd:::ROBJ10:head(118'52 osd.0.0:10 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od c2025a24)", + "selected_object_info": "2:b1f19cbd:::ROBJ10:head(47'51 osd.0.0:50 dirty|omap|data_digest|omap_digest s 7 uv 30 dd 2ddbf8f5 od c2025a24)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], "errors": [], "object": { - "version": 33, + "version": 30, "snap": "head", "locator": "", "nspace": "", @@ -984,13 +990,13 @@ "osd": 1 } ], - "selected_object_info": "2:87abbf36:::ROBJ11:head(96'36 client.4315.0:1 dirty|omap|data_digest s 7 uv 36 dd 2ddbf8f5)", + "selected_object_info": "2:87abbf36:::ROBJ11:head(47'48 osd.0.0:47 dirty|omap|data_digest|omap_digest s 7 uv 33 dd 2ddbf8f5 od a03cef03)", "union_shard_errors": [ "read_error" ], "errors": [], "object": { - "version": 36, + "version": 33, "snap": "head", "locator": "", "nspace": "", @@ -1013,13 +1019,13 @@ "osd": 1 } ], - "selected_object_info": "2:bc819597:::ROBJ12:head(98'39 client.4320.0:1 dirty|omap|data_digest s 7 uv 39 dd 2ddbf8f5)", + "selected_object_info": "2:bc819597:::ROBJ12:head(47'52 osd.0.0:51 dirty|omap|data_digest|omap_digest s 7 uv 36 dd 2ddbf8f5 od 67f306a)", "union_shard_errors": [ "stat_error" ], "errors": [], "object": { - "version": 39, + "version": 36, "snap": "head", "locator": "", "nspace": "", @@ -1127,7 +1133,7 @@ "osd": 1 } ], - "selected_object_info": "2:30259878:::ROBJ15:head(113'48 client.4357.0:1 dirty|omap|data_digest s 7 uv 48 dd 2ddbf8f5)", + "selected_object_info": "2:30259878:::ROBJ15:head(47'46 osd.0.0:45 dirty|omap|data_digest|omap_digest s 7 uv 45 dd 2ddbf8f5 od 2d2a4d6e)", "union_shard_errors": [ "oi_attr_missing" ], @@ -1135,7 +1141,7 @@ "attr_name_mismatch" ], "object": { - "version": 48, + "version": 45, "snap": "head", "locator": "", "nspace": "", @@ -1161,7 +1167,7 @@ "osd": 1 } ], - "selected_object_info": "2:e97ce31e:::ROBJ2:head(23'6 client.4146.0:1 dirty|omap|data_digest s 7 uv 6 dd 2ddbf8f5)", + "selected_object_info": "2:e97ce31e:::ROBJ2:head(47'56 osd.0.0:55 dirty|omap|data_digest|omap_digest s 7 uv 6 dd 2ddbf8f5 od f8e11918)", "union_shard_errors": [ "data_digest_mismatch_oi" ], @@ -1192,7 +1198,7 @@ "osd": 1 } ], - "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(30'9 client.4162.0:1 dirty|omap|data_digest s 7 uv 9 dd 2ddbf8f5)", + "selected_object_info": "2:f2a5b2a4:::ROBJ3:head(47'57 osd.0.0:56 dirty|omap|data_digest|omap_digest s 7 uv 9 dd 2ddbf8f5 od b35dfd)", "union_shard_errors": [ "missing" ], @@ -1211,20 +1217,20 @@ "data_digest": "0x2ddbf8f5", "omap_digest": "0xd7178dfe", "size": 7, - "errors": [], + "errors": [ + "omap_digest_mismatch_oi" + ], "osd": 0 }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0xe2d46ea4", "size": 7, - "errors": [ - "omap_digest_mismatch_oi" - ], + "errors": [], "osd": 1 } ], - "selected_object_info": "2:f4981d31:::ROBJ4:head(118'53 osd.0.0:11 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od d7178dfe)", + "selected_object_info": "2:f4981d31:::ROBJ4:head(47'58 osd.0.0:57 dirty|omap|data_digest|omap_digest s 7 uv 12 dd 2ddbf8f5 od e2d46ea4)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1258,7 +1264,7 @@ "osd": 1 } ], - "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(118'54 osd.0.0:12 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41)", + "selected_object_info": "2:f4bfd4d1:::ROBJ5:head(47'59 osd.0.0:58 dirty|omap|data_digest|omap_digest s 7 uv 15 dd 2ddbf8f5 od 1a862a41)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1279,20 +1285,20 @@ "data_digest": "0x2ddbf8f5", "omap_digest": "0x689ee887", "size": 7, - "errors": [], + "errors": [ + "omap_digest_mismatch_oi" + ], "osd": 0 }, { "data_digest": "0x2ddbf8f5", "omap_digest": "0x179c919f", "size": 7, - "errors": [ - "omap_digest_mismatch_oi" - ], + "errors": [], "osd": 1 } ], - "selected_object_info": "2:a53c12e8:::ROBJ6:head(118'51 osd.0.0:9 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 689ee887)", + "selected_object_info": "2:a53c12e8:::ROBJ6:head(47'50 osd.0.0:49 dirty|omap|data_digest|omap_digest s 7 uv 18 dd 2ddbf8f5 od 179c919f)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1326,7 +1332,7 @@ "osd": 1 } ], - "selected_object_info": "2:8b55fa4b:::ROBJ7:head(118'50 osd.0.0:8 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a)", + "selected_object_info": "2:8b55fa4b:::ROBJ7:head(47'49 osd.0.0:48 dirty|omap|data_digest|omap_digest s 7 uv 21 dd 2ddbf8f5 od efced57a)", "union_shard_errors": [ "omap_digest_mismatch_oi" ], @@ -1402,14 +1408,14 @@ "osd": 1 } ], - "selected_object_info": "2:86586531:::ROBJ8:head(118'49 osd.0.0:7 dirty|omap|data_digest|omap_digest s 7 uv 26 dd 2ddbf8f5 od d6be81dc)", + "selected_object_info": "2:86586531:::ROBJ8:head(82'62 client.4351.0:1 dirty|omap|data_digest|omap_digest s 7 uv 62 dd 2ddbf8f5 od d6be81dc)", "union_shard_errors": [], "errors": [ "attr_value_mismatch", "attr_name_mismatch" ], "object": { - "version": 26, + "version": 62, "snap": "head", "locator": "", "nspace": "", @@ -1431,7 +1437,7 @@ "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(82'29 client.4282.0:1 dirty|omap|data_digest s 7 uv 29 dd 2ddbf8f5)", + "object_info": "2:ffdb2004:::ROBJ9:head(47'60 osd.0.0:59 dirty|omap|data_digest|omap_digest s 7 uv 27 dd 2ddbf8f5 od 2eecc539)", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, @@ -1451,7 +1457,7 @@ "name": "snapset" } ], - "object_info": "2:ffdb2004:::ROBJ9:head(118'56 client.4386.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539)", + "object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539)", "data_digest": "0x1f26fb26", "omap_digest": "0x2eecc539", "size": 3, @@ -1459,14 +1465,14 @@ "osd": 1 } ], - "selected_object_info": "2:ffdb2004:::ROBJ9:head(118'56 client.4386.0:1 dirty|omap|data_digest|omap_digest s 3 uv 56 dd 1f26fb26 od 2eecc539)", + "selected_object_info": "2:ffdb2004:::ROBJ9:head(122'64 client.4532.0:1 dirty|omap|data_digest|omap_digest s 3 uv 64 dd 1f26fb26 od 2eecc539)", "union_shard_errors": [], "errors": [ "object_info_inconsistency", "attr_value_mismatch" ], "object": { - "version": 56, + "version": 64, "snap": "head", "locator": "", "nspace": "", diff -Nru ceph-10.2.7/src/test/osd/TestOSDMap.cc ceph-10.2.9/src/test/osd/TestOSDMap.cc --- ceph-10.2.7/src/test/osd/TestOSDMap.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/osd/TestOSDMap.cc 2017-07-13 13:05:37.000000000 +0000 @@ -11,10 +11,10 @@ using namespace std; int main(int argc, char **argv) { - std::vector preargs; std::vector args(argv, argv+argc); - global_init(&preargs, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); // make sure we have 3 copies, or some tests won't work g_ceph_context->_conf->set_val("osd_pool_default_size", "3", false); diff -Nru ceph-10.2.7/src/test/osd/TestOSDScrub.cc ceph-10.2.9/src/test/osd/TestOSDScrub.cc --- ceph-10.2.7/src/test/osd/TestOSDScrub.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/osd/TestOSDScrub.cc 2017-07-13 13:05:37.000000000 +0000 @@ -21,13 +21,13 @@ #include #include +#include #include "osd/OSD.h" #include "os/ObjectStore.h" #include "mon/MonClient.h" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include #include "msg/Messenger.h" +#include "test/unit.h" class TestOSDScrub: public OSD { @@ -141,17 +141,6 @@ } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - // Local Variables: // compile-command: "cd ../.. ; make unittest_osdscrub ; ./unittest_osdscrub --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " // End: diff -Nru ceph-10.2.7/src/test/osd/TestPGLog.cc ceph-10.2.9/src/test/osd/TestPGLog.cc --- ceph-10.2.7/src/test/osd/TestPGLog.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/osd/TestPGLog.cc 2017-07-13 13:05:37.000000000 +0000 @@ -23,9 +23,7 @@ #include #include "osd/PGLog.h" #include "osd/OSDMap.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include +#include "test/unit.h" class PGLogTest : public ::testing::Test, protected PGLog { public: @@ -2047,17 +2045,6 @@ } } -int main(int argc, char **argv) { - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - // Local Variables: // compile-command: "cd ../.. ; make unittest_pglog ; ./unittest_pglog --log-to-stderr=true --debug-osd=20 # --gtest_filter=*.* " // End: diff -Nru ceph-10.2.7/src/test/osdc/object_cacher_stress.cc ceph-10.2.9/src/test/osdc/object_cacher_stress.cc --- ceph-10.2.7/src/test/osdc/object_cacher_stress.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/osdc/object_cacher_stress.cc 2017-07-13 13:05:37.000000000 +0000 @@ -355,7 +355,8 @@ std::vector args; argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); long long delay_ns = 0; long long num_ops = 1000; diff -Nru ceph-10.2.7/src/test/perf_counters.cc ceph-10.2.9/src/test/perf_counters.cc --- ceph-10.2.7/src/test/perf_counters.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/perf_counters.cc 2017-07-13 13:05:37.000000000 +0000 @@ -50,8 +50,9 @@ preargs.push_back("--admin-socket"); preargs.push_back(get_rand_socket_path()); std::vector args; - global_init(&preargs, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(&preargs, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff -Nru ceph-10.2.7/src/test/perf_local.cc ceph-10.2.9/src/test/perf_local.cc --- ceph-10.2.7/src/test/perf_local.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/perf_local.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1018,7 +1018,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); Cycles::init(); diff -Nru ceph-10.2.7/src/test/rbd_mirror/random_write.cc ceph-10.2.9/src/test/rbd_mirror/random_write.cc --- ceph-10.2.7/src/test/rbd_mirror/random_write.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/rbd_mirror/random_write.cc 2017-07-13 13:05:37.000000000 +0000 @@ -155,7 +155,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); for (auto i = args.begin(); i != args.end(); ++i) { if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { @@ -182,33 +183,29 @@ int r = rados.init_with_context(g_ceph_context); if (r < 0) { derr << "could not initialize RADOS handle" << dendl; - goto cleanup; + return EXIT_FAILURE; } r = rados.connect(); if (r < 0) { derr << "error connecting to local cluster" << dendl; - goto cleanup; + return EXIT_FAILURE; } r = rados.ioctx_create(pool_name.c_str(), io_ctx); if (r < 0) { derr << "error finding local pool " << pool_name << ": " << cpp_strerror(r) << dendl; - goto cleanup; + return EXIT_FAILURE; } r = rbd.open(io_ctx, image, image_name.c_str()); if (r < 0) { derr << "error opening image " << image_name << ": " << cpp_strerror(r) << dendl; - goto cleanup; + return EXIT_FAILURE; } write_image(image); - - cleanup: - g_ceph_context->put(); - - return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE; + return EXIT_SUCCESS; } diff -Nru ceph-10.2.7/src/test/rgw/test_rgw_period_history.cc ceph-10.2.9/src/test/rgw/test_rgw_period_history.cc --- ceph-10.2.7/src/test/rgw/test_rgw_period_history.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/rgw/test_rgw_period_history.cc 2017-07-13 13:05:37.000000000 +0000 @@ -322,7 +322,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); ::testing::InitGoogleTest(&argc, argv); diff -Nru ceph-10.2.7/src/test/signals.cc ceph-10.2.9/src/test/signals.cc --- ceph-10.2.7/src/test/signals.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/signals.cc 2017-07-13 13:05:37.000000000 +0000 @@ -118,6 +118,7 @@ { g_ceph_context->_log->inject_segv(); ASSERT_DEATH(derr << "foo" << dendl, ".*"); + g_ceph_context->_log->reset_segv(); } diff -Nru ceph-10.2.7/src/test/test_arch.cc ceph-10.2.9/src/test/test_arch.cc --- ceph-10.2.7/src/test/test_arch.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_arch.cc 2017-07-13 13:05:37.000000000 +0000 @@ -19,10 +19,10 @@ #include "arch/probe.h" #include "arch/intel.h" #include "arch/arm.h" -#include "global/global_init.h" -#include "common/ceph_argparse.h" #include "global/global_context.h" #include "gtest/gtest.h" +#include "test/unit.h" + #define FLAGS_SIZE 4096 @@ -84,18 +84,6 @@ #endif } -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} - /* * Local Variables: diff -Nru ceph-10.2.7/src/test/test_cors.cc ceph-10.2.9/src/test/test_cors.cc --- ceph-10.2.7/src/test/test_cors.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_cors.cc 2017-07-13 13:05:37.000000000 +0000 @@ -883,7 +883,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_test = new test_cors_helper(); finisher = new Finisher(g_ceph_context); @@ -904,6 +905,8 @@ } #endif finisher->stop(); + delete g_test; + delete finisher; return 0; } diff -Nru ceph-10.2.7/src/test/test_filejournal.cc ceph-10.2.9/src/test/test_filejournal.cc --- ceph-10.2.7/src/test/test_filejournal.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_filejournal.cc 2017-07-13 13:05:37.000000000 +0000 @@ -70,7 +70,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); char mb[10]; diff -Nru ceph-10.2.7/src/test/test_ipaddr.cc ceph-10.2.9/src/test/test_ipaddr.cc --- ceph-10.2.7/src/test/test_ipaddr.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_ipaddr.cc 2017-07-13 13:05:37.000000000 +0000 @@ -252,7 +252,7 @@ TEST(CommonIPAddr, ParseNetwork_Empty) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -262,7 +262,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_Junk) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -272,27 +272,27 @@ TEST(CommonIPAddr, ParseNetwork_Bad_SlashNum) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; - ok = parse_network("/24", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("/24", &network, &prefix_len); ASSERT_EQ(ok, false); } TEST(CommonIPAddr, ParseNetwork_Bad_Slash) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; - ok = parse_network("/", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("/", &network, &prefix_len); ASSERT_EQ(ok, false); } TEST(CommonIPAddr, ParseNetwork_Bad_IPv4) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -302,7 +302,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv4Slash) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -312,7 +312,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv4SlashNegative) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -322,7 +322,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv4SlashJunk) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -332,7 +332,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv6) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -342,7 +342,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv6Slash) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -352,7 +352,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv6SlashNegative) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -362,7 +362,7 @@ TEST(CommonIPAddr, ParseNetwork_Bad_IPv6SlashJunk) { - struct sockaddr network; + struct sockaddr_storage network; unsigned int prefix_len; bool ok; @@ -373,10 +373,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv4_0) { struct sockaddr_in network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("123.123.123.123/0", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("123.123.123.123/0", &net_storage, &prefix_len); + network = *(struct sockaddr_in *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(0U, prefix_len); ASSERT_EQ(AF_INET, network.sin_family); @@ -389,10 +391,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv4_13) { struct sockaddr_in network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("123.123.123.123/13", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("123.123.123.123/13", &net_storage, &prefix_len); + network = *(struct sockaddr_in *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(13U, prefix_len); ASSERT_EQ(AF_INET, network.sin_family); @@ -405,10 +409,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv4_32) { struct sockaddr_in network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("123.123.123.123/32", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("123.123.123.123/32", &net_storage, &prefix_len); + network = *(struct sockaddr_in *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(32U, prefix_len); ASSERT_EQ(AF_INET, network.sin_family); @@ -421,10 +427,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv4_42) { struct sockaddr_in network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("123.123.123.123/42", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("123.123.123.123/42", &net_storage, &prefix_len); + network = *(struct sockaddr_in *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(42U, prefix_len); ASSERT_EQ(AF_INET, network.sin_family); @@ -437,10 +445,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv6_0) { struct sockaddr_in6 network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("2001:1234:5678:90ab::dead:beef/0", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("2001:1234:5678:90ab::dead:beef/0", &net_storage, &prefix_len); + network = *(struct sockaddr_in6 *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(0U, prefix_len); ASSERT_EQ(AF_INET6, network.sin6_family); @@ -453,10 +463,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv6_67) { struct sockaddr_in6 network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("2001:1234:5678:90ab::dead:beef/67", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("2001:1234:5678:90ab::dead:beef/67", &net_storage, &prefix_len); + network = *(struct sockaddr_in6 *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(67U, prefix_len); ASSERT_EQ(AF_INET6, network.sin6_family); @@ -469,10 +481,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv6_128) { struct sockaddr_in6 network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("2001:1234:5678:90ab::dead:beef/128", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("2001:1234:5678:90ab::dead:beef/128", &net_storage, &prefix_len); + network = *(struct sockaddr_in6 *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(128U, prefix_len); ASSERT_EQ(AF_INET6, network.sin6_family); @@ -485,10 +499,12 @@ TEST(CommonIPAddr, ParseNetwork_IPv6_9000) { struct sockaddr_in6 network; + struct sockaddr_storage net_storage; unsigned int prefix_len; bool ok; - ok = parse_network("2001:1234:5678:90ab::dead:beef/9000", (struct sockaddr*)&network, &prefix_len); + ok = parse_network("2001:1234:5678:90ab::dead:beef/9000", &net_storage, &prefix_len); + network = *(struct sockaddr_in6 *) &net_storage; ASSERT_EQ(ok, true); ASSERT_EQ(9000U, prefix_len); ASSERT_EQ(AF_INET6, network.sin6_family); diff -Nru ceph-10.2.7/src/test/testkeys.cc ceph-10.2.9/src/test/testkeys.cc --- ceph-10.2.7/src/test/testkeys.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/testkeys.cc 2017-07-13 13:05:37.000000000 +0000 @@ -11,7 +11,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); KeyRing extra; KeyServer server(g_ceph_context, &extra); diff -Nru ceph-10.2.7/src/test/test_mutate.cc ceph-10.2.9/src/test/test_mutate.cc --- ceph-10.2.7/src/test/test_mutate.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_mutate.cc 2017-07-13 13:05:37.000000000 +0000 @@ -44,7 +44,8 @@ vector args; argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); string val; diff -Nru ceph-10.2.7/src/test/test_rgw_admin_log.cc ceph-10.2.9/src/test/test_rgw_admin_log.cc --- ceph-10.2.7/src/test/test_rgw_admin_log.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_rgw_admin_log.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1572,7 +1572,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_test = new admin_log::test_helper(); finisher = new Finisher(g_ceph_context); diff -Nru ceph-10.2.7/src/test/test_rgw_admin_meta.cc ceph-10.2.9/src/test/test_rgw_admin_meta.cc --- ceph-10.2.7/src/test/test_rgw_admin_meta.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_rgw_admin_meta.cc 2017-07-13 13:05:37.000000000 +0000 @@ -908,7 +908,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); g_test = new admin_meta::test_helper(); finisher = new Finisher(g_ceph_context); diff -Nru ceph-10.2.7/src/test/test_rgw_admin_opstate.cc ceph-10.2.9/src/test/test_rgw_admin_opstate.cc --- ceph-10.2.7/src/test/test_rgw_admin_opstate.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_rgw_admin_opstate.cc 2017-07-13 13:05:37.000000000 +0000 @@ -805,7 +805,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); store = RGWStoreManager::get_storage(g_ceph_context, false, false, false); g_test = new admin_log::test_helper(); diff -Nru ceph-10.2.7/src/test/TestSignalHandlers.cc ceph-10.2.9/src/test/TestSignalHandlers.cc --- ceph-10.2.7/src/test/TestSignalHandlers.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/TestSignalHandlers.cc 2017-07-13 13:05:37.000000000 +0000 @@ -71,7 +71,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); test_fn_t fn = NULL; diff -Nru ceph-10.2.7/src/test/test_snap_mapper.cc ceph-10.2.9/src/test/test_snap_mapper.cc --- ceph-10.2.7/src/test/test_snap_mapper.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_snap_mapper.cc 2017-07-13 13:05:37.000000000 +0000 @@ -4,15 +4,14 @@ #include #include #include +#include #include "include/buffer.h" #include "common/map_cacher.hpp" #include "osd/SnapMapper.h" -#include "global/global_init.h" -#include "common/ceph_argparse.h" +#include "test/unit.h" #include "gtest/gtest.h" -#include "stdlib.h" using namespace std; @@ -657,14 +656,3 @@ init(50); run(); } - -int main(int argc, char **argv) -{ - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/test_striper.cc ceph-10.2.9/src/test/test_striper.cc --- ceph-10.2.7/src/test/test_striper.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_striper.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,8 +1,6 @@ #include "gtest/gtest.h" #include "global/global_context.h" -#include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "common/common_init.h" +#include "test/unit.h" #include "osdc/Striper.h" @@ -73,18 +71,3 @@ numobjs = Striper::get_num_objects(l, size); ASSERT_EQ(6u, numobjs); } - - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - env_to_vec(args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/TestTimers.cc ceph-10.2.9/src/test/TestTimers.cc --- ceph-10.2.7/src/test/TestTimers.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/TestTimers.cc 2017-07-13 13:05:37.000000000 +0000 @@ -255,7 +255,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); int ret; diff -Nru ceph-10.2.7/src/test/test_trans.cc ceph-10.2.9/src/test/test_trans.cc --- ceph-10.2.7/src/test/test_trans.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_trans.cc 2017-07-13 13:05:37.000000000 +0000 @@ -38,7 +38,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); // args diff -Nru ceph-10.2.7/src/test/test_workqueue.cc ceph-10.2.9/src/test/test_workqueue.cc --- ceph-10.2.7/src/test/test_workqueue.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/test_workqueue.cc 2017-07-13 13:05:37.000000000 +0000 @@ -1,10 +1,8 @@ #include "gtest/gtest.h" #include "common/WorkQueue.h" -#include "global/global_context.h" #include "common/ceph_argparse.h" -#include "global/global_init.h" -#include "common/common_init.h" +#include "test/unit.h" TEST(WorkQueue, StartStop) { @@ -56,17 +54,3 @@ sleep(1); tp.stop(); } - - -int main(int argc, char **argv) -{ - ::testing::InitGoogleTest(&argc, argv); - - vector args; - argv_to_vec(argc, (const char **)argv, args); - - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); - common_init_finish(g_ceph_context); - - return RUN_ALL_TESTS(); -} diff -Nru ceph-10.2.7/src/test/unit.h ceph-10.2.9/src/test/unit.h --- ceph-10.2.7/src/test/unit.h 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/unit.h 2017-07-13 13:05:37.000000000 +0000 @@ -18,7 +18,9 @@ #include "include/types.h" // FIXME: ordering shouldn't be important, but right // now, this include has to come before the others. +#include "common/ceph_argparse.h" #include "common/code_environment.h" +#include "common/config.h" #include "global/global_context.h" #include "global/global_init.h" #include "include/msgr.h" // for CEPH_ENTITY_TYPE_CLIENT @@ -32,10 +34,19 @@ * initialization for you. */ int main(int argc, char **argv) { - std::vector args; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + std::vector args(argv, argv + argc); + env_to_vec(args); + auto cct = global_init(NULL, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); + + const char* env = getenv("CEPH_LIB"); + if (env) { + g_conf->set_val("erasure_code_dir", env, false, false); + g_conf->set_val("plugin_dir", env, false, false); + } + ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff -Nru ceph-10.2.7/src/test/xattr_bench.cc ceph-10.2.9/src/test/xattr_bench.cc --- ceph-10.2.7/src/test/xattr_bench.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/test/xattr_bench.cc 2017-07-13 13:05:37.000000000 +0000 @@ -152,7 +152,8 @@ vector args; argv_to_vec(argc, (const char **)argv, args); - global_init(0, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(0, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); std::cerr << "args: " << args << std::endl; diff -Nru ceph-10.2.7/src/tools/ceph_authtool.cc ceph-10.2.9/src/tools/ceph_authtool.cc --- ceph-10.2.7/src/tools/ceph_authtool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_authtool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -63,8 +63,9 @@ map caps; std::string fn; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); bool gen_key = false; bool gen_print_key = false; diff -Nru ceph-10.2.7/src/tools/ceph-client-debug.cc ceph-10.2.9/src/tools/ceph-client-debug.cc --- ceph-10.2.7/src/tools/ceph-client-debug.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph-client-debug.cc 2017-07-13 13:05:37.000000000 +0000 @@ -85,8 +85,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); common_init_finish(g_ceph_context); diff -Nru ceph-10.2.7/src/tools/ceph_conf.cc ceph-10.2.9/src/tools/ceph_conf.cc --- ceph-10.2.7/src/tools/ceph_conf.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_conf.cc 2017-07-13 13:05:37.000000000 +0000 @@ -153,6 +153,12 @@ global_pre_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, CINIT_FLAG_NO_DAEMON_ACTIONS); + std::unique_ptr > cct_deleter{ + g_ceph_context, + [](CephContext *p) {p->put();} + }; + g_conf->apply_changes(NULL); g_conf->complain_about_parse_errors(g_ceph_context); diff -Nru ceph-10.2.7/src/tools/cephfs/cephfs-data-scan.cc ceph-10.2.9/src/tools/cephfs/cephfs-data-scan.cc --- ceph-10.2.7/src/tools/cephfs/cephfs-data-scan.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/cephfs/cephfs-data-scan.cc 2017-07-13 13:05:37.000000000 +0000 @@ -14,7 +14,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); DataScan data_scan; diff -Nru ceph-10.2.7/src/tools/cephfs/cephfs-journal-tool.cc ceph-10.2.9/src/tools/cephfs/cephfs-journal-tool.cc --- ceph-10.2.7/src/tools/cephfs/cephfs-journal-tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/cephfs/cephfs-journal-tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -27,7 +27,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); JournalTool jt; diff -Nru ceph-10.2.7/src/tools/cephfs/cephfs-table-tool.cc ceph-10.2.9/src/tools/cephfs/cephfs-table-tool.cc --- ceph-10.2.7/src/tools/cephfs/cephfs-table-tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/cephfs/cephfs-table-tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -14,7 +14,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); TableTool tt; diff -Nru ceph-10.2.7/src/tools/cephfs/DataScan.cc ceph-10.2.9/src/tools/cephfs/DataScan.cc --- ceph-10.2.7/src/tools/cephfs/DataScan.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/cephfs/DataScan.cc 2017-07-13 13:05:37.000000000 +0000 @@ -350,6 +350,7 @@ // they don't have to mount the filesystem to correct it? inode.inode.layout = file_layout_t::get_default(); inode.inode.layout.pool_id = data_pool_id; + inode.inode.dir_layout.dl_dir_hash = g_conf->mds_default_dir_hash; // Assume that we will get our stats wrong, and that we may // be ignoring dirfrags that exist @@ -1423,6 +1424,8 @@ // accurate, but it should avoid functional issues. ancestor_dentry.inode.dirstat.nfiles = 1; + ancestor_dentry.inode.dir_layout.dl_dir_hash = + g_conf->mds_default_dir_hash; ancestor_dentry.inode.nlink = 1; ancestor_dentry.inode.ino = ino; @@ -1764,6 +1767,7 @@ out->inode.ctime.tv.tv_sec = fragstat.mtime; out->inode.layout = layout; + out->inode.dir_layout.dl_dir_hash = g_conf->mds_default_dir_hash; out->inode.truncate_seq = 1; out->inode.truncate_size = -1ull; diff -Nru ceph-10.2.7/src/tools/ceph_kvstore_tool.cc ceph-10.2.9/src/tools/ceph_kvstore_tool.cc --- ceph-10.2.7/src/tools/ceph_kvstore_tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_kvstore_tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -230,7 +230,7 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init( + auto cct = global_init( NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); diff -Nru ceph-10.2.7/src/tools/ceph_monstore_tool.cc ceph-10.2.9/src/tools/ceph_monstore_tool.cc --- ceph-10.2.7/src/tools/ceph_monstore_tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_monstore_tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -787,7 +787,7 @@ ceph_options.push_back(i->c_str()); } - global_init( + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_MON, CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); diff -Nru ceph-10.2.7/src/tools/ceph_objectstore_tool.cc ceph-10.2.9/src/tools/ceph_objectstore_tool.cc --- ceph-10.2.7/src/tools/ceph_objectstore_tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_objectstore_tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -316,13 +316,6 @@ return 0; } -void myexit(int ret) -{ - if (g_ceph_context) - g_ceph_context->put(); - exit(ret); -} - int get_log(ObjectStore *fs, __u8 struct_ver, coll_t coll, spg_t pgid, const pg_info_t &info, PGLog::IndexedLog &log, pg_missing_t &missing, @@ -334,7 +327,9 @@ PGLog::read_log(fs, coll, struct_ver >= 8 ? coll : coll_t::meta(), struct_ver >= 8 ? pgid.make_pgmeta_oid() : log_oid, - info, divergent_priors, log, missing, oss); + info, divergent_priors, log, + missing, oss, + g_ceph_context->_conf->osd_ignore_stale_divergent_priors); if (debug && oss.str().size()) cerr << oss.str() << std::endl; } @@ -2318,12 +2313,12 @@ po::include_positional); } catch(po::error &e) { std::cerr << e.what() << std::endl; - myexit(1); + return 1; } if (vm.count("help")) { usage(desc); - myexit(1); + return 1; } if (!vm.count("debug")) { @@ -2381,7 +2376,7 @@ !(op == "dump-journal" && type == "filestore")) { cerr << "Must provide --data-path" << std::endl; usage(desc); - myexit(1); + return 1; } if (type == "filestore" && !vm.count("journal-path")) { jpath = dpath + "/journal"; @@ -2389,29 +2384,29 @@ if (!vm.count("op") && !vm.count("object")) { cerr << "Must provide --op or object command..." << std::endl; usage(desc); - myexit(1); + return 1; } if (op != "list" && vm.count("op") && vm.count("object")) { cerr << "Can't specify both --op and object command syntax" << std::endl; usage(desc); - myexit(1); + return 1; } if (op == "apply-layout-settings" && !(vm.count("pool") ^ vm.count("pgid"))) { cerr << "apply-layout-settings requires either --pool or --pgid" << std::endl; usage(desc); - myexit(1); + return 1; } if (op != "list" && vm.count("object") && !vm.count("objcmd")) { cerr << "Invalid syntax, missing command" << std::endl; usage(desc); - myexit(1); + return 1; } if (op == "fuse" && mountpoint.length() == 0) { cerr << "Missing fuse mountpoint" << std::endl; usage(desc); - myexit(1); + return 1; } outistty = isatty(STDOUT_FILENO); @@ -2420,7 +2415,7 @@ if (!vm.count("file") || file == "-") { if (outistty) { cerr << "stdout is a tty and no --file filename specified" << std::endl; - myexit(1); + return 1; } file_fd = STDOUT_FILENO; } else { @@ -2430,7 +2425,7 @@ if (!vm.count("file") || file == "-") { if (isatty(STDIN_FILENO)) { cerr << "stdin is a tty and no --file filename specified" << std::endl; - myexit(1); + return 1; } file_fd = STDIN_FILENO; } else { @@ -2443,16 +2438,16 @@ if (vm.count("file") && file_fd == fd_none && !dry_run) { cerr << "--file option only applies to import, export, " << "get-osdmap, set-osdmap, get-inc-osdmap or set-inc-osdmap" << std::endl; - myexit(1); + return 1; } if (file_fd != fd_none && file_fd < 0) { string err = string("file: ") + file; perror(err.c_str()); - myexit(1); + return 1; } - global_init( + auto cct = global_init( NULL, ceph_options, CEPH_ENTITY_TYPE_OSD, CODE_ENVIRONMENT_UTILITY_NODOUT, 0); //CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); @@ -2475,7 +2470,7 @@ formatter = Formatter::create(format); if (formatter == NULL) { cerr << "unrecognized format: " << format << std::endl; - myexit(1); + return 1; } // Special handling for filestore journal, so we can dump it without mounting @@ -2484,10 +2479,10 @@ if (ret < 0) { cerr << "journal-path: " << jpath << ": " << cpp_strerror(ret) << std::endl; - myexit(1); + return 1; } formatter->flush(cout); - myexit(0); + return 0; } //Verify that data-path really exists @@ -2495,40 +2490,40 @@ if (::stat(dpath.c_str(), &st) == -1) { string err = string("data-path: ") + dpath; perror(err.c_str()); - myexit(1); + return 1; } if (pgidstr.length() && !pgid.parse(pgidstr.c_str())) { cerr << "Invalid pgid '" << pgidstr << "' specified" << std::endl; - myexit(1); + return 1; } ObjectStore *fs = ObjectStore::create(g_ceph_context, type, dpath, jpath, flags); if (fs == NULL) { cerr << "Unable to create store of type " << type << std::endl; - myexit(1); + return 1; } if (op == "fsck") { int r = fs->fsck(); if (r < 0) { cerr << "fsck failed: " << cpp_strerror(r) << std::endl; - myexit(1); + return 1; } if (r > 0) { cerr << "fsck found " << r << " errors" << std::endl; - myexit(1); + return 1; } cout << "fsck found no errors" << std::endl; - exit(0); + return 0; } if (op == "mkfs") { int r = fs->mkfs(); if (r < 0) { cerr << "fsck failed: " << cpp_strerror(r) << std::endl; - myexit(1); + return 1; } - myexit(0); + return 0; } ObjectStore::Sequencer *osr = new ObjectStore::Sequencer(__func__); @@ -2539,7 +2534,7 @@ } else { cerr << "Mount failed with '" << cpp_strerror(ret) << "'" << std::endl; } - myexit(1); + return 1; } if (op == "fuse") { @@ -2549,12 +2544,12 @@ int r = fuse.main(); if (r < 0) { cerr << "failed to mount fuse: " << cpp_strerror(r) << std::endl; - myexit(1); + return 1; } #else cerr << "fuse support not enabled" << std::endl; #endif - myexit(0); + return 0; } vector ls; @@ -3264,5 +3259,5 @@ if (ret < 0) ret = 1; - myexit(ret); + return ret; } diff -Nru ceph-10.2.7/src/tools/ceph_osdomap_tool.cc ceph-10.2.9/src/tools/ceph_osdomap_tool.cc --- ceph-10.2.7/src/tools/ceph_osdomap_tool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/ceph_osdomap_tool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -27,14 +27,17 @@ int main(int argc, char **argv) { po::options_description desc("Allowed options"); - string store_path, cmd, out_path; + string store_path, cmd, out_path, oid; + bool debug = false; desc.add_options() ("help", "produce help message") ("omap-path", po::value(&store_path), "path to mon directory, mandatory (current/omap usually)") ("paranoid", "use paranoid checking") + ("debug", "Additional debug output from DBObjectMap") + ("oid", po::value(&oid), "Restrict to this object id when dumping objects") ("command", po::value(&cmd), - "command arg is one of [dump-raw-keys, dump-raw-key-vals, dump-objects, dump-objects-with-keys, check], mandatory") + "command arg is one of [dump-raw-keys, dump-raw-key-vals, dump-objects, dump-objects-with-keys, check, dump-headers, repair], mandatory") ; po::positional_options_description p; p.add("command", 1); @@ -64,12 +67,19 @@ ceph_options.push_back(i->c_str()); } - global_init( + if (vm.count("debug")) debug = true; + + auto cct = global_init( &def_args, ceph_options, CEPH_ENTITY_TYPE_OSD, - CODE_ENVIRONMENT_UTILITY, 0); + CODE_ENVIRONMENT_UTILITY_NODOUT, 0); common_init_finish(g_ceph_context); g_ceph_context->_conf->apply_changes(NULL); g_conf = g_ceph_context->_conf; + if (debug) { + g_conf->set_val_or_die("log_to_stderr", "true"); + g_conf->set_val_or_die("err_to_stderr", "true"); + } + g_conf->apply_changes(NULL); if (vm.count("help")) { std::cerr << desc << std::endl; @@ -99,6 +109,9 @@ std::cerr << "Output: " << out.str() << std::endl; goto done; } + // We don't call omap.init() here because it will repair + // the DBObjectMap which we might want to examine for diagnostic + // reasons. Instead use --command repair. r = 0; @@ -123,6 +136,8 @@ for (vector::iterator i = objects.begin(); i != objects.end(); ++i) { + if (vm.count("oid") != 0 && i->hobj.oid.name != oid) + continue; std::cout << *i << std::endl; } r = 0; @@ -136,6 +151,8 @@ for (vector::iterator i = objects.begin(); i != objects.end(); ++i) { + if (vm.count("oid") != 0 && i->hobj.oid.name != oid) + continue; std::cout << "Object: " << *i << std::endl; ObjectMap::ObjectMapIterator j = omap.get_iterator(ghobject_t(i->hobj)); for (j->seek_to_first(); j->valid(); j->next()) { @@ -143,17 +160,35 @@ j->value().hexdump(std::cout); } } - } else if (cmd == "check") { - r = omap.check(std::cout); - if (!r) { - std::cerr << "check got: " << cpp_strerror(r) << std::endl; + } else if (cmd == "check" || cmd == "repair") { + ostringstream ss; + bool repair = (cmd == "repair"); + r = omap.check(ss, repair); + if (r) { + std::cerr << ss.str() << std::endl; + if (r > 0) { + std::cerr << "check got " << r << " error(s)" << std::endl; + r = 1; + goto done; + } + } + std::cout << (repair ? "repair" : "check") << " succeeded" << std::endl; + } else if (cmd == "dump-headers") { + vector headers; + r = omap.list_object_headers(&headers); + if (r < 0) { + std::cerr << "list_object_headers got: " << cpp_strerror(r) << std::endl; + r = 1; goto done; } - std::cout << "check succeeded" << std::endl; + for (auto i : headers) + std::cout << i << std::endl; } else { std::cerr << "Did not recognize command " << cmd << std::endl; + r = 1; goto done; } + r = 0; done: return r; diff -Nru ceph-10.2.7/src/tools/crushtool.cc ceph-10.2.9/src/tools/crushtool.cc --- ceph-10.2.7/src/tools/crushtool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/crushtool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -270,8 +270,12 @@ // only parse arguments from CEPH_ARGS, if in the environment vector env_args; env_to_vec(env_args); - global_init(NULL, env_args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, env_args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + // crushtool times out occasionally when quits. so do not + // release the g_ceph_context. + cct->get(); common_init_finish(g_ceph_context); int x; @@ -285,7 +289,7 @@ break; } else if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { usage(); - exit(0); + return EXIT_SUCCESS; } else if (ceph_argparse_witharg(args, i, &val, "-d", "--decompile", (char*)NULL)) { infn = val; decompile = true; @@ -356,17 +360,17 @@ } else if (ceph_argparse_witharg(args, i, &add_item, err, "--add_item", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } if (i == args.end()) { cerr << "expecting additional argument to --add-item" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } add_weight = atof(*i); i = args.erase(i); if (i == args.end()) { cerr << "expecting additional argument to --add-item" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } add_name.assign(*i); i = args.erase(i); @@ -374,17 +378,17 @@ update_item = true; if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } if (i == args.end()) { cerr << "expecting additional argument to --update-item" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } add_weight = atof(*i); i = args.erase(i); if (i == args.end()) { cerr << "expecting additional argument to --update-item" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } add_name.assign(*i); i = args.erase(i); @@ -392,7 +396,7 @@ std::string type(val); if (i == args.end()) { cerr << "expecting additional argument to --loc" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } std::string name(*i); i = args.erase(i); @@ -403,12 +407,12 @@ tester.set_output_csv(true); } else if (ceph_argparse_flag(args, i, "--help-output", (char*)NULL)) { data_analysis_usage(); - exit(0); + return EXIT_SUCCESS; } else if (ceph_argparse_witharg(args, i, &val, "--output-name", (char*)NULL)) { std::string name(val); if (i == args.end()) { cerr << "expecting additional argument to --output-name" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } else { tester.set_output_data_file_name(name + "-"); @@ -419,7 +423,7 @@ reweight_name = val; if (i == args.end()) { cerr << "expecting additional argument to --reweight-item" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } reweight_weight = atof(*i); i = args.erase(i); @@ -428,83 +432,83 @@ } else if (ceph_argparse_witharg(args, i, &num_osds, err, "--num_osds", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } } else if (ceph_argparse_witharg(args, i, &x, err, "--num_rep", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_num_rep(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--max_x", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_max_x(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--min_x", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_min_x(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--x", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_x(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--max_rule", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_max_rule(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--min_rule", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_min_rule(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--rule", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_rule(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--ruleset", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_ruleset(x); } else if (ceph_argparse_witharg(args, i, &x, err, "--batches", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_batches(x); } else if (ceph_argparse_witharg(args, i, &y, err, "--mark-down-ratio", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_device_down_ratio(y); } else if (ceph_argparse_witharg(args, i, &y, err, "--mark-down-bucket-ratio", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } tester.set_bucket_down_ratio(y); } else if (ceph_argparse_witharg(args, i, &tmp, err, "--weight", (char*)NULL)) { if (!err.str().empty()) { cerr << err.str() << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } int dev = tmp; if (i == args.end()) { cerr << "expecting additional argument to --weight" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } float f = atof(*i); i = args.erase(i); @@ -521,24 +525,24 @@ if (decompile + compile + build > 1) { cerr << "cannot specify more than one of compile, decompile, and build" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } if (!check && !compile && !decompile && !build && !test && !reweight && !adjust && !tree && add_item < 0 && full_location < 0 && remove_name.empty() && reweight_name.empty()) { cerr << "no action specified; -h for help" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } if ((!build) && (!args.empty())) { cerr << "unrecognized arguments: " << args << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } else { if ((args.size() % 3) != 0U) { cerr << "remaining args: " << args << std::endl; cerr << "layers must be specified with 3-tuples of (name, buckettype, size)" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } for (size_t j = 0; j < args.size(); j += 3) { layer_t l; @@ -567,19 +571,19 @@ if (infn == "-") { if (isatty(STDIN_FILENO)) { cerr << "stdin must not be from a tty" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } r = get_fd_data(STDIN_FILENO, bl); if (r < 0) { cerr << "error reading data from STDIN" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } } else { r = bl.read_file(infn.c_str(), &error); if (r < 0) { cerr << me << ": error reading '" << infn << "': " << error << std::endl; - exit(1); + return EXIT_FAILURE; } } bufferlist::iterator p = bl.begin(); @@ -587,7 +591,7 @@ crush.decode(p); } catch(...) { cerr << me << ": unable to decode " << infn << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } } @@ -606,7 +610,7 @@ cc.enable_unsafe_tunables(); int r = cc.compile(in, srcfn.c_str()); if (r < 0) - exit(1); + return EXIT_FAILURE; modified = true; } @@ -614,7 +618,7 @@ if (build) { if (layers.empty()) { cerr << me << ": must specify at least one layer" << std::endl; - exit(1); + return EXIT_FAILURE; } crush.create(); @@ -650,7 +654,7 @@ } if (buckettype < 0) { cerr << "unknown bucket type '" << l.buckettype << "'" << std::endl; - exit(EXIT_FAILURE); + return EXIT_FAILURE; } // build items @@ -725,7 +729,7 @@ } if (OSDMap::build_simple_crush_rulesets(g_ceph_context, crush, root, &cerr)) - exit(EXIT_FAILURE); + return EXIT_FAILURE; modified = true; } @@ -843,7 +847,7 @@ o.open(outfn.c_str(), ios::out | ios::binary | ios::trunc); if (!o.is_open()) { cerr << me << ": error writing '" << outfn << "'" << std::endl; - exit(1); + return EXIT_FAILURE; } cc.decompile(o); o.close(); @@ -856,7 +860,7 @@ tester.check_overlapped_rules(); if (max_id >= 0) { if (!tester.check_name_maps(max_id)) { - exit(1); + return EXIT_FAILURE; } } } @@ -868,7 +872,7 @@ int r = tester.test(); if (r < 0) - exit(1); + return EXIT_FAILURE; } // output --- @@ -883,7 +887,7 @@ int r = bl.write_file(outfn.c_str()); if (r < 0) { cerr << me << ": error writing '" << outfn << "': " << cpp_strerror(r) << std::endl; - exit(1); + return EXIT_FAILURE; } if (verbose) cout << "wrote crush map to " << outfn << std::endl; diff -Nru ceph-10.2.7/src/tools/monmaptool.cc ceph-10.2.9/src/tools/monmaptool.cc --- ceph-10.2.7/src/tools/monmaptool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/monmaptool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -45,8 +45,9 @@ map add; list rm; - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); std::string val; for (std::vector::iterator i = args.begin(); i != args.end(); ) { diff -Nru ceph-10.2.7/src/tools/osdmaptool.cc ceph-10.2.9/src/tools/osdmaptool.cc --- ceph-10.2.7/src/tools/osdmaptool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/osdmaptool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -45,8 +45,9 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, - CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); common_init_finish(g_ceph_context); const char *me = argv[0]; diff -Nru ceph-10.2.7/src/tools/rados/rados.cc ceph-10.2.9/src/tools/rados/rados.cc --- ceph-10.2.7/src/tools/rados/rados.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rados/rados.cc 2017-07-13 13:05:37.000000000 +0000 @@ -3445,7 +3445,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); common_init_finish(g_ceph_context); std::map < std::string, std::string > opts; diff -Nru ceph-10.2.7/src/tools/rados/RadosImport.cc ceph-10.2.9/src/tools/rados/RadosImport.cc --- ceph-10.2.7/src/tools/rados/RadosImport.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rados/RadosImport.cc 2017-07-13 13:05:37.000000000 +0000 @@ -42,7 +42,11 @@ cerr << "Error " << ret << " in cluster.conf_read_env" << std::endl; return ret; } - cluster.connect(); + ret = cluster.connect(); + if (ret) { + cerr << "Error " << ret << " in cluster.connect" << std::endl; + return ret; + } ret = cluster.ioctx_create(pool.c_str(), ioctx); if (ret < 0) { diff -Nru ceph-10.2.7/src/tools/rbd/action/MirrorPool.cc ceph-10.2.9/src/tools/rbd/action/MirrorPool.cc --- ceph-10.2.7/src/tools/rbd/action/MirrorPool.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rbd/action/MirrorPool.cc 2017-07-13 13:05:37.000000000 +0000 @@ -153,7 +153,20 @@ return r; } + // TODO: temporary restriction to prevent adding multiple peers + // until rbd-mirror daemon can properly handle the scenario librbd::RBD rbd; + std::vector mirror_peers; + r = rbd.mirror_peer_list(io_ctx, &mirror_peers); + if (r < 0) { + std::cerr << "rbd: failed to list mirror peers" << std::endl; + return r; + } + if (!mirror_peers.empty()) { + std::cerr << "rbd: multiple peers are not currently supported" << std::endl; + return -EINVAL; + } + std::string uuid; r = rbd.mirror_peer_add(io_ctx, &uuid, remote_cluster, remote_client_name); if (r < 0) { diff -Nru ceph-10.2.7/src/tools/rbd/action/Rename.cc ceph-10.2.9/src/tools/rbd/action/Rename.cc --- ceph-10.2.7/src/tools/rbd/action/Rename.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rbd/action/Rename.cc 2017-07-13 13:05:37.000000000 +0000 @@ -42,9 +42,9 @@ return r; } - std::string dst_pool_name; std::string dst_image_name; std::string dst_snap_name; + std::string dst_pool_name = pool_name; r = utils::get_pool_image_snapshot_names( vm, at::ARGUMENT_MODIFIER_DEST, &arg_index, &dst_pool_name, &dst_image_name, &dst_snap_name, utils::SNAPSHOT_PRESENCE_NONE, utils::SPEC_VALIDATION_FULL); diff -Nru ceph-10.2.7/src/tools/rbd/rbd.cc ceph-10.2.9/src/tools/rbd/rbd.cc --- ceph-10.2.7/src/tools/rbd/rbd.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rbd/rbd.cc 2017-07-13 13:05:37.000000000 +0000 @@ -13,7 +13,8 @@ argv_to_vec(argc, argv, args); env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, 0); rbd::Shell shell; return shell.execute(args); diff -Nru ceph-10.2.7/src/tools/rbd_mirror/main.cc ceph-10.2.9/src/tools/rbd_mirror/main.cc --- ceph-10.2.7/src/tools/rbd_mirror/main.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rbd_mirror/main.cc 2017-07-13 13:05:37.000000000 +0000 @@ -35,9 +35,9 @@ env_to_vec(args); argv_to_vec(argc, argv, args); - global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); for (auto i = args.begin(); i != args.end(); ++i) { if (ceph_argparse_flag(args, i, "-h", "--help", (char*)NULL)) { @@ -80,7 +80,6 @@ shutdown_async_signal_handler(); delete mirror; - g_ceph_context->put(); return r < 0 ? EXIT_SUCCESS : EXIT_FAILURE; } diff -Nru ceph-10.2.7/src/tools/rbd_nbd/rbd-nbd.cc ceph-10.2.9/src/tools/rbd_nbd/rbd-nbd.cc --- ceph-10.2.7/src/tools/rbd_nbd/rbd-nbd.cc 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/src/tools/rbd_nbd/rbd-nbd.cc 2017-07-13 13:05:37.000000000 +0000 @@ -34,6 +34,7 @@ #include #include +#include #include #include "mon/MonClient.h" @@ -49,6 +50,7 @@ #include "include/rados/librados.hpp" #include "include/rbd/librbd.hpp" +#include "include/stringify.h" #define dout_subsys ceph_subsys_rbd #undef dout_prefix @@ -73,6 +75,8 @@ static int nbds_max = 0; static int max_part = 255; +#define RBD_NBD_BLKSIZE 512UL + #ifdef CEPH_BIG_ENDIAN #define ntohll(a) (a) #elif defined(CEPH_LITTLE_ENDIAN) @@ -463,7 +467,29 @@ return nbd; } -static int do_map() +static int check_device_size(int nbd_index, unsigned long expected_size) +{ + unsigned long size = 0; + std::string path = "/sys/block/nbd" + stringify(nbd_index) + "/size"; + std::ifstream ifs; + ifs.open(path.c_str(), std::ifstream::in); + if (!ifs.is_open()) { + cerr << "rbd-nbd: failed to open " << path << std::endl; + return -EINVAL; + } + ifs >> size; + size *= RBD_NBD_BLKSIZE; + + if (size != expected_size) { + cerr << "rbd-nbd: kernel reported invalid device size (" << size + << ", expected " << expected_size << ")" << std::endl; + return -EINVAL; + } + + return 0; +} + +static int do_map(int argc, const char *argv[]) { int r; @@ -476,6 +502,7 @@ unsigned long flags; unsigned long size; + int index = 0; int fd[2]; int nbd; @@ -484,6 +511,15 @@ Preforker forker; + vector args; + argv_to_vec(argc, argv, args); + env_to_vec(args); + + auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + g_ceph_context->_conf->set_val_or_die("pid_file", ""); + if (global_init_prefork(g_ceph_context) >= 0) { std::string err; r = forker.prefork(err); @@ -493,8 +529,9 @@ } if (forker.is_parent()) { + global_init_postfork_start(g_ceph_context); if (forker.parent_wait(err) != 0) { - return -ENXIO; + return -ENXIO; } return 0; } @@ -510,7 +547,6 @@ if (devpath.empty()) { char dev[64]; - int index = 0; while (true) { snprintf(dev, sizeof(dev), "/dev/nbd%d", index); @@ -532,6 +568,12 @@ break; } } else { + r = sscanf(devpath.c_str(), "/dev/nbd%d", &index); + if (r < 0) { + cerr << "rbd-nbd: invalid device path: " << devpath + << " (expected /dev/nbd{num})" << std::endl; + goto close_fd; + } nbd = open_device(devpath.c_str(), true); if (nbd < 0) { r = nbd; @@ -578,27 +620,32 @@ if (r < 0) goto close_nbd; - r = ioctl(nbd, NBD_SET_BLKSIZE, 512UL); + r = ioctl(nbd, NBD_SET_BLKSIZE, RBD_NBD_BLKSIZE); if (r < 0) { r = -errno; goto close_nbd; } - size = info.size; - - if (size > (1UL << 32) * 512) { + if (info.size > ULONG_MAX) { r = -EFBIG; - cerr << "rbd-nbd: image is too large (" << prettybyte_t(size) << ", max is " - << prettybyte_t((1UL << 32) * 512) << ")" << std::endl; + cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size) + << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl; goto close_nbd; } + size = info.size; + r = ioctl(nbd, NBD_SET_SIZE, size); if (r < 0) { r = -errno; goto close_nbd; } + r = check_device_size(index, size); + if (r < 0) { + goto close_nbd; + } + ioctl(nbd, NBD_SET_FLAGS, flags); read_only = snapname.empty() ? 0 : 1; @@ -661,8 +708,6 @@ static int do_unmap() { - common_init_finish(g_ceph_context); - int nbd = open_device(devpath.c_str()); if (nbd < 0) { cerr << "rbd-nbd: failed to open device: " << devpath << std::endl; @@ -704,8 +749,6 @@ int m = 0; int fd[2]; - common_init_finish(g_ceph_context); - if (socketpair(AF_UNIX, SOCK_STREAM, 0, fd) == -1) { int r = -errno; cerr << "rbd-nbd: socketpair failed: " << cpp_strerror(-r) << std::endl; @@ -744,10 +787,7 @@ vector args; argv_to_vec(argc, argv, args); - env_to_vec(args); - global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); - g_ceph_context->_conf->set_val_or_die("pid_file", ""); + md_config_t().parse_argv(args); std::vector::iterator i; std::ostringstream err; @@ -836,7 +876,7 @@ return EXIT_FAILURE; } - r = do_map(); + r = do_map(argc, argv); if (r < 0) return EXIT_FAILURE; break; diff -Nru ceph-10.2.7/systemd/ceph-osd@.service ceph-10.2.9/systemd/ceph-osd@.service --- ceph-10.2.7/systemd/ceph-osd@.service 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/systemd/ceph-osd@.service 2017-07-13 13:05:37.000000000 +0000 @@ -1,6 +1,6 @@ [Unit] Description=Ceph object storage daemon -After=network-online.target local-fs.target time-sync.target +After=network-online.target local-fs.target time-sync.target ceph-mon.target Wants=network-online.target local-fs.target time-sync.target PartOf=ceph-osd.target diff -Nru ceph-10.2.7/systemd/rbdmap.service ceph-10.2.9/systemd/rbdmap.service --- ceph-10.2.7/systemd/rbdmap.service 2017-04-10 11:44:25.000000000 +0000 +++ ceph-10.2.9/systemd/rbdmap.service 2017-07-13 13:05:37.000000000 +0000 @@ -11,7 +11,7 @@ RemainAfterExit=yes ExecStart=/usr/bin/rbdmap map ExecReload=/usr/bin/rbdmap map -ExecStop=/usr/bin/rbdmap unmap +ExecStop=/usr/bin/rbdmap unmap-all [Install] WantedBy=multi-user.target