diff -Nru ceph-0.80.8/ceph.spec ceph-0.80.9/ceph.spec --- ceph-0.80.8/ceph.spec 2015-01-14 18:18:08.000000000 +0000 +++ ceph-0.80.9/ceph.spec 2015-03-09 19:54:08.000000000 +0000 @@ -9,7 +9,7 @@ # common ################################################################################# Name: ceph -Version: 0.80.8 +Version: 0.80.9 Release: 0%{?dist} Summary: User space components of the Ceph file system License: GPL-2.0 diff -Nru ceph-0.80.8/configure ceph-0.80.9/configure --- ceph-0.80.8/configure 2015-01-14 18:17:15.000000000 +0000 +++ ceph-0.80.9/configure 2015-03-09 19:53:15.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68 for ceph 0.80.8. +# Generated by GNU Autoconf 2.68 for ceph 0.80.9. # # Report bugs to . # @@ -570,8 +570,8 @@ # Identity of this package. PACKAGE_NAME='ceph' PACKAGE_TARNAME='ceph' -PACKAGE_VERSION='0.80.8' -PACKAGE_STRING='ceph 0.80.8' +PACKAGE_VERSION='0.80.9' +PACKAGE_STRING='ceph 0.80.9' PACKAGE_BUGREPORT='ceph-devel@vger.kernel.org' PACKAGE_URL='' @@ -1441,7 +1441,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures ceph 0.80.8 to adapt to many kinds of systems. +\`configure' configures ceph 0.80.9 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1512,7 +1512,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of ceph 0.80.8:";; + short | recursive ) echo "Configuration of ceph 0.80.9:";; esac cat <<\_ACEOF @@ -1657,7 +1657,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -ceph configure 0.80.8 +ceph configure 0.80.9 generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. @@ -2682,7 +2682,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by ceph $as_me 0.80.8, which was +It was created by ceph $as_me 0.80.9, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ @@ -4682,7 +4682,7 @@ # Define the identity of the package. PACKAGE='ceph' - VERSION='0.80.8' + VERSION='0.80.9' cat >>confdefs.h <<_ACEOF @@ -12660,7 +12660,7 @@ # Define the identity of the package. PACKAGE='ceph' - VERSION='0.80.8' + VERSION='0.80.9' cat >>confdefs.h <<_ACEOF @@ -22464,7 +22464,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by ceph $as_me 0.80.8, which was +This file was extended by ceph $as_me 0.80.9, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22530,7 +22530,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -ceph config.status 0.80.8 +ceph config.status 0.80.9 configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" diff -Nru ceph-0.80.8/configure.ac ceph-0.80.9/configure.ac --- ceph-0.80.8/configure.ac 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/configure.ac 2015-03-09 19:51:17.000000000 +0000 @@ -8,7 +8,7 @@ # VERSION define is not used by the code. It gets a version string # from 'git describe'; see src/ceph_ver.[ch] -AC_INIT([ceph], [0.80.8], [ceph-devel@vger.kernel.org]) +AC_INIT([ceph], [0.80.9], [ceph-devel@vger.kernel.org]) # Create release string. Used with VERSION for RPMs. RPM_RELEASE=0 diff -Nru ceph-0.80.8/debian/changelog ceph-0.80.9/debian/changelog --- ceph-0.80.8/debian/changelog 2015-01-23 16:29:11.000000000 +0000 +++ ceph-0.80.9/debian/changelog 2015-03-11 09:14:37.000000000 +0000 @@ -1,3 +1,14 @@ +ceph (0.80.9-0ubuntu0.14.04.1) trusty; urgency=medium + + [ James Page ] + * New upstream stable point release (LP: #1413917). + + [ Dave Chiluk ] + * Increase file limit to prevent hitting the file limit on large + installations (LP: #1420647) + + -- James Page Wed, 11 Mar 2015 09:14:35 +0000 + ceph (0.80.8-0ubuntu0.14.04.1) trusty; urgency=low * New upstream stable point release (LP: #1413917). diff -Nru ceph-0.80.8/debian/patches/increaseFileLimit.patch ceph-0.80.9/debian/patches/increaseFileLimit.patch --- ceph-0.80.8/debian/patches/increaseFileLimit.patch 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/debian/patches/increaseFileLimit.patch 2015-03-10 21:30:02.000000000 +0000 @@ -0,0 +1,32 @@ +Description: Increase file limit to prevent hitting the file limit + +Upstream commit text +" +commit 5773a374d0089ce824dec0a0c74a76e57806cc43 +Author: Sage Weil +Date: Mon Jul 28 09:27:20 2014 -0700 + + upstart/ceph-osd.conf: bump nofile limit up by 10x + + This should ensure that we don't hit this limit on all but the very biggest + clusters. We seen it hit on a ~500 OSD dumpling cluster. + + Backport: firefly, dumpling + Signed-off-by: Sage Weil +" + +Author: Dave Chiluk +Bug-Ubuntu: https://bugs.launchpad.net/bugs/1420647 +Origin: https://github.com/ceph/ceph/commit/5773a374d0089ce824dec0a0c74a76e57806cc43 +--- +--- ceph-0.80.8.orig/src/upstart/ceph-osd.conf ++++ ceph-0.80.8/src/upstart/ceph-osd.conf +@@ -6,7 +6,7 @@ stop on runlevel [!2345] or stopping cep + respawn + respawn limit 5 30 + +-limit nofile 32768 32768 ++limit nofile 327680 327680 + + pre-start script + set -e diff -Nru ceph-0.80.8/debian/patches/series ceph-0.80.9/debian/patches/series --- ceph-0.80.8/debian/patches/series 2015-01-23 09:07:04.000000000 +0000 +++ ceph-0.80.9/debian/patches/series 2015-03-10 21:30:02.000000000 +0000 @@ -1,2 +1,3 @@ virtualenv-never-download modules.patch +increaseFileLimit.patch diff -Nru ceph-0.80.8/man/ceph.8 ceph-0.80.9/man/ceph.8 --- ceph-0.80.8/man/ceph.8 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/man/ceph.8 2015-03-09 19:51:17.000000000 +0000 @@ -1,8 +1,8 @@ .\" Man page generated from reStructuredText. . -.TH "CEPH" "8" "January 12, 2014" "dev" "Ceph" +.TH "CEPH" "8" "December 18, 2014" "dev" "Ceph" .SH NAME -ceph \- ceph file system control utility +ceph \- ceph administration tool . .nr rst2man-indent-level 0 . @@ -59,103 +59,2320 @@ .. .SH SYNOPSIS .nf -\fBceph\fP [ \-m \fImonaddr\fP ] [ \-w | \fIcommand\fP ... ] +\fBceph\fP \fBauth\fP \fIadd\fP \fI\fP {\fI\fP [\fI\fP\&...]} +.fi +.sp +.nf +\fBceph\fP \fBauth\fP \fIexport\fP \fI\fP +.fi +.sp +.nf +\fBceph\fP \fBconfig\-key\fP \fIget\fP \fI\fP +.fi +.sp +.nf +\fBceph\fP \fBmds\fP \fIadd_data_pool\fP \fI\fP +.fi +.sp +.nf +\fBceph\fP \fBmds\fP \fIgetmap\fP {\fI\fP} +.fi +.sp +.nf +\fBceph\fP \fBmon\fP \fIadd\fP \fI\fP <\fIIPaddr[:port]\fP> +.fi +.sp +.nf +\fBceph\fP \fBmon_status\fP +.fi +.sp +.nf +\fBceph\fP \fBosd\fP \fIcreate\fP {\fI\fP} +.fi +.sp +.nf +\fBceph\fP \fBosd\fP \fBcrush\fP \fIadd\fP \fI\fP +.fi +.sp +.sp +\fI\fP \fI\fP [\fI\fP\&...] +.nf +\fBceph\fP \fBpg\fP \fIforce_create_pg\fP \fI\fP +.fi +.sp +.nf +\fBceph\fP \fBpg\fP \fIstat\fP +.fi +.sp +.nf +\fBceph\fP \fBquorum_status\fP .fi .sp .SH DESCRIPTION .sp -\fBceph\fP is a control utility for communicating with the monitor -cluster of a running Ceph distributed storage system. +\fBceph\fP is a control utility which is used for manual deployment and maintenance +of a Ceph cluster. It provides a diverse set of commands that allows deployment of +monitors, OSDs, placement groups, MDS and overall maintenance, administration +of the cluster. +.SH COMMANDS +.SS auth .sp -There are three basic modes of operation. -.SS Interactive mode +Manage authentication keys. It is used for adding, removing, exporting +or updating of authentication keys for a particular entity such as a monitor or +OSD. It uses some additional subcommands. .sp -To start in interactive mode, no arguments are necessary. Control\-d or -\(aqquit\(aq will exit. -.SS Watch mode +Subcommand \fBadd\fP adds authentication info for a particular entity from input +file, or random key if no input given and/or any caps specified in the command. .sp -Watch mode shows cluster state changes as they occur. For example: +Usage: .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -ceph \-w +ceph auth add { [...]} .ft P .fi .UNINDENT .UNINDENT -.SS Command line mode .sp -Finally, to send a single instruction to the monitor cluster (and wait -for a response), the command can be specified on the command line. -.SH OPTIONS +Subcommand \fBcaps\fP updates caps for \fBname\fP from caps specified in the command. +.sp +Usage: .INDENT 0.0 -.TP -.B \-i infile -will specify an input file to be passed along as a payload with the -command to the monitor cluster. This is only used for specific -monitor commands. +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth caps [...] +.ft P +.fi +.UNINDENT .UNINDENT +.sp +Subcommand \fBdel\fP deletes all caps for \fBname\fP\&. +.sp +Usage: .INDENT 0.0 -.TP -.B \-o outfile -will write any payload returned by the monitor cluster with its -reply to outfile. Only specific monitor commands (e.g. osd getmap) -return a payload. +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth del +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBexport\fP writes keyring for requested entity, or master keyring if +none given. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth export {} +.ft P +.fi .UNINDENT +.UNINDENT +.sp +Subcommand \fBget\fP writes keyring file with requested key. +.sp +Usage: .INDENT 0.0 -.TP -.B \-c ceph.conf, \-\-conf=ceph.conf -Use ceph.conf configuration file instead of the default -/etc/ceph/ceph.conf to determine monitor addresses during startup. +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth get +.ft P +.fi .UNINDENT +.UNINDENT +.sp +Subcommand \fBget\-key\fP displays requested key. +.sp +Usage: .INDENT 0.0 -.TP -.B \-m monaddress[:port] -Connect to specified monitor (instead of looking through ceph.conf). +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth get\-key +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBget\-or\-create\fP adds authentication info for a particular entity +from input file, or random key if no input given and/or any caps specified in the +command. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth get\-or\-create { [...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBget\-or\-create\-key\fP gets or adds key for \fBname\fP from system/caps +pairs specified in the command. If key already exists, any given caps must match +the existing caps for that key. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth get\-or\-create\-key { [...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBimport\fP reads keyring from input file. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth import +.ft P +.fi +.UNINDENT .UNINDENT -.SH EXAMPLES .sp -To grab a copy of the current OSD map: +Subcommand \fBlist\fP lists authentication state. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth list +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBprint\-key\fP displays requested key. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth print\-key +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBprint_key\fP displays requested key. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph auth print_key +.ft P +.fi +.UNINDENT +.UNINDENT +.SS compact +.sp +Causes compaction of monitor\(aqs leveldb storage. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph compact +.ft P +.fi +.UNINDENT +.UNINDENT +.SS config\-key +.sp +Manage configuration key. It uses some additional subcommands. +.sp +Subcommand \fBget\fP gets the configuration key. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph config\-key get +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBput\fP puts configuration key and values. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph config\-key put {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBexists\fP checks for configuration keys existence. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph config\-key exists +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBlist\fP lists configuration keys. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph config\-key list +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdel\fP deletes configuration key. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph config\-key del +.ft P +.fi +.UNINDENT +.UNINDENT +.SS df +.sp +Show cluster\(aqs free space status. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph df +.ft P +.fi +.UNINDENT +.UNINDENT +.SS fsid +.sp +Show cluster\(aqs FSID/UUID. +.sp +Usage: .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -ceph \-m 1.2.3.4:6789 osd getmap \-o osdmap +ceph fsid .ft P .fi .UNINDENT .UNINDENT +.SS health .sp -To get a dump of placement group (PG) state: +Show cluster\(aqs health. +.sp +Usage: .INDENT 0.0 .INDENT 3.5 .sp .nf .ft C -ceph pg dump \-o pg.txt +ceph health .ft P .fi .UNINDENT .UNINDENT -.SH MONITOR COMMANDS +.SS heap +.sp +Show heap usage info (available only if compiled with tcmalloc) .sp -A more complete summary of commands understood by the monitor cluster can be found in the -online documentation, at +Usage: .INDENT 0.0 .INDENT 3.5 -\fI\%http://ceph.com/docs/master/rados/operations/control\fP +.sp +.nf +.ft C +ceph heap dump|start_profiler|stop_profiler|release|stats +.ft P +.fi +.UNINDENT .UNINDENT +.SS injectargs +.sp +Inject configuration arguments into monitor. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph injectargs [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.SS log +.sp +Log supplied text to the monitor log. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph log [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.SS mds +.sp +Manage metadata server configuration and administration. It uses some +additional subcommands. +.sp +Subcommand \fBadd_data_pool\fP adds data pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds add_data_pool +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcluster_down\fP takes mds cluster down. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds cluster_down +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcluster_up\fP brings mds cluster up. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds cluster_up +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcompat\fP manages compatible features. It uses some additional +subcommands. +.sp +Subcommand \fBrm_compat\fP removes compatible feature. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds compat rm_compat +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm_incompat\fP removes incompatible feature. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds compat rm_incompat +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBshow\fP shows mds compatibility settings. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds compat show +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdeactivate\fP stops mds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds deactivate +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump\fP dumps information, optionally from epoch. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds dump {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBfail\fP forces mds to status fail. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds fail +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetmap\fP gets MDS map, optionally from epoch. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds getmap {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBnewfs\fP makes new filesystem using pools and . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds newfs {\-\-yes\-i\-really\-mean\-it} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBremove_data_pool\fP removes data pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds remove_data_pool +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm\fP removes inactive mds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds rm (type.id)> +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrmfailed\fP removes failed mds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds rmfailed +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset_max_mds\fP sets max MDS index. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds set_max_mds +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset_state\fP sets mds state of to . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds set_state +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBsetmap\fP sets mds map; must supply correct epoch number. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds setmap +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstat\fP shows MDS status. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds stat +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstop\fP stops mds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds stop +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBtell\fP sends command to particular mds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mds tell [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.SS mon +.sp +Manage monitor configuration and administration. It uses some additional +subcommands. +.sp +Subcommand \fBadd\fP adds new monitor named at . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon add +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump\fP dumps formatted monmap (optionally from epoch) +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon dump {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetmap\fP gets monmap. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon getmap {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBremove\fP removes monitor named . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon remove +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstat\fP summarizes monitor status. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon stat +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmon_status\fP reports status of monitors. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph mon_status +.ft P +.fi +.UNINDENT +.UNINDENT +.SS osd +.sp +Manage OSD configuration and administration. It uses some additional +subcommands. +.sp +Subcommand \fBcreate\fP creates new osd (with optional UUID). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd create {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcrush\fP is used for CRUSH management. It uses some additional +subcommands. +.sp +Subcommand \fBadd\fP adds or updates crushmap position and weight for with + and location . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush add [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBadd\-bucket\fP adds no\-parent (probably root) crush bucket of +type . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush add\-bucket +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcreate\-or\-move\fP creates entry or moves existing entry for + at/to location . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush create\-or\-move +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +[...] +.sp +Subcommand \fBdump\fP dumps crush map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush dump +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBlink\fP links existing entry for under location . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush link [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmove\fP moves existing entry for to location . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush move [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBremove\fP removes from crush map (everywhere, or just at +). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush remove {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBreweight\fP change \(aqs weight to in crush map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush reweight +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm\fP removes from crush map (everywhere, or just at +). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rm {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrule\fP is used for creating crush rules. It uses some additional +subcommands. +.sp +Subcommand \fBcreate\-erasure\fP creates crush rule for erasure coded pool +created with (default default). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule create\-erasure {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcreate\-simple\fP creates crush rule to start from , +replicate across buckets of type , using a choose mode of +(default firstn; indep best for erasure pools). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule create\-simple {firstn|indep} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump\fP dumps crush rule (default all). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule dump {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBlist\fP lists crush rules. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule list +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBls\fP lists crush rules. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule ls +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm\fP removes crush rule . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush rule rm +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\fP sets crush map from input file. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush set +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\fP with osdname/osd.id update crushmap position and weight +for to with location . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush set [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBshow\-tunables\fP shows current crush tunables. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush show\-tunables +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBtunables\fP sets crush tunables values to . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush tunables legacy|argonaut|bobtail|firefly|optimal|default +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBunlink\fP unlinks from crush map (everywhere, or just at +). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd crush unlink {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdeep\-scrub\fP initiates deep scrub on specified osd. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd deep\-scrub +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdown\fP sets osd(s) [...] down. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd down [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump\fP prints summary of OSD map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd dump {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBerasure\-code\-profile\fP is used for managing the erasure code +profiles. It uses some additional subcommands. +.sp +Subcommand \fBget\fP gets erasure code profile . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd erasure\-code\-profile get +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBls\fP lists all erasure code profiles. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd erasure\-code\-profile ls +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm\fP removes erasure code profile . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd erasure\-code\-profile rm +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\fP creates erasure code profile with [ ...] +pairs. Add a \-\-force at the end to override an existing profile (IT IS RISKY). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd erasure\-code\-profile set { [...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBfind\fP find osd in the CRUSH map and shows its location. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd find +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetcrushmap\fP gets CRUSH map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd getcrushmap {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetmap\fP gets OSD map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd getmap {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetmaxosd\fP shows largest OSD id. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd getmaxosd +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBin\fP sets osd(s) [...] in. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd in [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBlost\fP marks osd as permanently lost. THIS DESTROYS DATA IF NO +MORE REPLICAS EXIST, BE CAREFUL. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd lost {\-\-yes\-i\-really\-mean\-it} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBls\fP shows all OSD ids. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd ls {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBlspools\fP lists pools. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd lspools {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmap\fP finds pg for in . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd map +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmetadata\fP fetches metadata for osd . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd metadata +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBout\fP sets osd(s) [...] out. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd out [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBpause\fP pauses osd. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pause +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBperf\fP prints dump of OSD perf summary stats. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd perf +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBpg\-temp\fP set pg_temp mapping pgid:[ [...]] (developers +only). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pg\-temp { [...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBpool\fP is used for managing data pools. It uses some additional +subcommands. +.sp +Subcommand \fBcreate\fP creates pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool create {} {replicated|erasure} +{} {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdelete\fP deletes pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool delete {} {\-\-yes\-i\-really\-really\-mean\-it} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBget\fP gets pool parameter . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool get size|min_size|crash_replay_interval|pg_num| +pgp_num|crush_ruleset|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp| + +ceph osd pool get auid|target_max_objects|target_max_bytes + +ceph osd pool get cache_target_dirty_ratio|cache_target_full_ratio + +ceph osd pool get cache_min_flush_age|cache_min_evict_age| +erasure_code_profile +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBget\-quota\fP obtains object or byte limits for pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool get\-quota +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmksnap\fP makes snapshot in . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool mksnap +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrename\fP renames to . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool rename +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrmsnap\fP removes snapshot from . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool rmsnap +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\fP sets pool parameter to . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool set size|min_size|crash_replay_interval|pg_num| +pgp_num|crush_ruleset|hashpspool|hit_set_type|hit_set_period| + +ceph osd pool set hit_set_count|hit_set_fpp|debug_fake_ec_pool + +ceph osd pool set target_max_bytes|target_max_objects + +ceph osd pool set cache_target_dirty_ratio|cache_target_full_ratio + +ceph osd pool set cache_min_flush_age + +ceph osd pool set cache_min_evict_age|auid +{\-\-yes\-i\-really\-mean\-it} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\-quota\fP sets object or byte limit on pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool set\-quota max_objects|max_bytes +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstats\fP obtain stats from all pools, or from specified pool. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd pool stats {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBprimary\-affinity\fP adjust osd primary\-affinity from 0.0 <= +<= 1.0 +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd primary\-affinity +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBprimary\-temp\fP sets primary_temp mapping pgid:|\-1 (developers +only). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd primary\-temp +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrepair\fP initiates repair on a specified osd. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd repair +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBreweight\fP reweights osd to 0.0 < < 1.0. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +osd reweight +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBreweight\-by\-utilization\fP reweight OSDs by utilization +[overload\-percentage\-for\-consideration, default 120]. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd reweight\-by\-utilization {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrm\fP removes osd(s) [...] in the cluster. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd rm [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBscrub\fP initiates scrub on specified osd. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd scrub +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\fP sets . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd set pause|noup|nodown|noout|noin|nobackfill|norecover|noscrub| +nodeep\-scrub|notieragent +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBsetcrushmap\fP sets crush map from input file. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd setcrushmap +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBsetmaxosd\fP sets new maximum osd value. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd setmaxosd +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstat\fP prints summary of OSD map. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd stat +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBthrash\fP thrashes OSDs for . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd thrash +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBtier\fP is used for managing tiers. It uses some additional +subcommands. +.sp +Subcommand \fBadd\fP adds the tier (the second one) to base pool +(the first one). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier add {\-\-force\-nonempty} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBadd\-cache\fP adds a cache (the second one) of size +to existing pool (the first one). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier add\-cache +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBcache\-mode\fP specifies the caching mode for cache tier . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier cache\-mode none|writeback|forward|readonly +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBremove\fP removes the tier (the second one) from base pool + (the first one). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier remove +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBremove\-overlay\fP removes the overlay pool for base pool . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier remove\-overlay +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset\-overlay\fP set the overlay pool for base pool to be +. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tier set\-overlay +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBtree\fP prints OSD tree. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd tree {} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBunpause\fP unpauses osd. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph osd unpause +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBunset\fP unsets . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +osd unset pause|noup|nodown|noout|noin|nobackfill|norecover|noscrub| +nodeep\-scrub|notieragent +.ft P +.fi +.UNINDENT +.UNINDENT +.SS pg +.sp +It is used for managing the placement groups in OSDs. It uses some +additional subcommands. +.sp +Subcommand \fBdebug\fP shows debug info about pgs. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg debug unfound_objects_exist|degraded_pgs_exist +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdeep\-scrub\fP starts deep\-scrub on . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg deep\-scrub +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump\fP shows human\-readable versions of pg map (only \(aqall\(aq valid +with plain). +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg dump {all|summary|sum|delta|pools|osds|pgs|pgs_brief} + +ceph pg dump {all|summary|sum|delta|pools|osds|pgs|pgs_brief...} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump_json\fP shows human\-readable version of pg map in json only. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg dump_json {all|summary|sum|pools|osds|pgs[all|summary|sum|pools| +osds|pgs...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump_pools_json\fP shows pg pools info in json only. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg dump_pools_json +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBdump_stuck\fP shows information about stuck pgs. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg dump_stuck {inactive|unclean|stale[inactive|unclean|stale...]} +{} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBforce_create_pg\fP forces creation of pg . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg force_create_pg +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBgetmap\fP gets binary pg map to \-o/stdout. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg getmap +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBmap\fP shows mapping of pg to osds. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg map +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBrepair\fP starts repair on . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg repair +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBscrub\fP starts scrub on . +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg scrub +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBsend_pg_creates\fP triggers pg creates to be issued. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg send_pg_creates +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset_full_ratio\fP sets ratio at which pgs are considered full. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg set_full_ratio +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBset_nearfull_ratio\fP sets ratio at which pgs are considered nearly +full. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg set_nearfull_ratio +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Subcommand \fBstat\fP shows placement group status. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph pg stat +.ft P +.fi +.UNINDENT +.UNINDENT +.SS quorum +.sp +Enter or exit quorum. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph quorum enter|exit +.ft P +.fi +.UNINDENT +.UNINDENT +.SS quorum_status +.sp +Reports status of monitor quorum. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph quorum_status +.ft P +.fi +.UNINDENT +.UNINDENT +.SS report +.sp +Reports full status of cluster, optional title tag strings. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph report { [...]} +.ft P +.fi +.UNINDENT +.UNINDENT +.SS scrub +.sp +Scrubs the monitor stores. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph scrub +.ft P +.fi +.UNINDENT +.UNINDENT +.SS status +.sp +Shows cluster status. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph status +.ft P +.fi +.UNINDENT +.UNINDENT +.SS sync force +.sp +Forces sync of and clear monitor store. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph sync force {\-\-yes\-i\-really\-mean\-it} {\-\-i\-know\-what\-i\-am\-doing} +.ft P +.fi +.UNINDENT +.UNINDENT +.SS tell +.sp +Sends a command to a specific daemon. +.sp +Usage: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +ceph tell [...] +.ft P +.fi +.UNINDENT +.UNINDENT +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-i infile +will specify an input file to be passed along as a payload with the +command to the monitor cluster. This is only used for specific +monitor commands. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o outfile +will write any payload returned by the monitor cluster with its +reply to outfile. Only specific monitor commands (e.g. osd getmap) +return a payload. +.UNINDENT +.INDENT 0.0 +.TP +.B \-c ceph.conf, \-\-conf=ceph.conf +Use ceph.conf configuration file instead of the default +\fB/etc/ceph/ceph.conf\fP to determine monitor addresses during startup. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-id CLIENT_ID, \-\-user CLIENT_ID +Client id for authentication. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-name CLIENT_NAME, \-n CLIENT_NAME +Client name for authentication. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-cluster CLUSTER +Name of the Ceph cluster. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-admin\-daemon ADMIN_SOCKET +Submit admin\-socket commands. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-admin\-socket ADMIN_SOCKET_NOPE +You probably mean \-\-admin\-daemon +.UNINDENT +.INDENT 0.0 +.TP +.B \-s, \-\-status +Show cluster status. +.UNINDENT +.INDENT 0.0 +.TP +.B \-w, \-\-watch +Watch live cluster changes. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-watch\-debug +Watch debug events. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-watch\-info +Watch info events. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-watch\-sec +Watch security events. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-watch\-warn +Watch warning events. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-watch\-error +Watch error events. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version, \-v +Display version. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-verbose +Make verbose. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-concise +Make less verbose. +.UNINDENT +.INDENT 0.0 +.TP +.B \-f {json,json\-pretty,xml,xml\-pretty,plain}, \-\-format +Format of output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-connect\-timeout CLUSTER_TIMEOUT +Set a timeout for connecting to the cluster. .UNINDENT .SH AVAILABILITY .sp -\fBceph\fP is part of the Ceph distributed storage system. Please refer to the Ceph documentation at -\fI\%http://ceph.com/docs\fP for more information. +\fBceph\fP is a part of the Ceph distributed storage system. Please refer to +the Ceph documentation at \fI\%http://ceph.com/docs\fP for more information. .SH SEE ALSO .sp -\fBceph\fP(8), +\fBceph\-mon\fP(8), +\fBceph\-osd\fP(8), +\fBceph\-mds\fP(8) .SH COPYRIGHT 2010-2014, Inktank Storage, Inc. and contributors. Licensed under Creative Commons BY-SA .\" Generated by docutils manpage writer. diff -Nru ceph-0.80.8/src/civetweb/include/civetweb_conf.h ceph-0.80.9/src/civetweb/include/civetweb_conf.h --- ceph-0.80.8/src/civetweb/include/civetweb_conf.h 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/civetweb/include/civetweb_conf.h 2015-03-09 19:52:38.000000000 +0000 @@ -0,0 +1,6 @@ +#ifndef CIVETWEB_CONF_H +#define CIVETWEB_CONF_H + +#define USE_IPV6 1 + +#endif diff -Nru ceph-0.80.8/src/client/Client.cc ceph-0.80.9/src/client/Client.cc --- ceph-0.80.8/src/client/Client.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/Client.cc 2015-03-09 19:51:17.000000000 +0000 @@ -53,7 +53,7 @@ #include "mon/MonClient.h" -#include "mds/MDSMap.h" +#include "mds/flock.h" #include "osd/OSDMap.h" #include "mon/MonMap.h" @@ -149,6 +149,7 @@ logger(NULL), m_command_hook(this), timer(m->cct, client_lock), + switch_interrupt_cb(NULL), ino_invalidate_cb(NULL), ino_invalidate_cb_handle(NULL), dentry_invalidate_cb(NULL), @@ -157,6 +158,7 @@ getgroups_cb_handle(NULL), async_ino_invalidator(m->cct), async_dentry_invalidator(m->cct), + interrupt_finisher(m->cct), tick_event(NULL), monclient(mc), messenger(m), whoami(m->get_myname().num()), initialized(false), mounted(false), unmounting(false), @@ -443,6 +445,12 @@ async_dentry_invalidator.stop(); } + if (switch_interrupt_cb) { + ldout(cct, 10) << "shutdown stopping interrupt finisher" << dendl; + interrupt_finisher.wait_for_empty(); + interrupt_finisher.stop(); + } + objectcacher->stop(); // outside of client_lock! this does a join. client_lock.Lock(); @@ -1437,15 +1445,15 @@ void Client::put_request(MetaRequest *request) { - if (request->get_num_ref() == 1) { + if (request->_put()) { if (request->inode()) put_inode(request->take_inode()); if (request->old_inode()) put_inode(request->take_old_inode()); if (request->other_inode()) put_inode(request->take_other_inode()); + delete request; } - request->_put(); } int Client::encode_inode_release(Inode *in, MetaRequest *req, @@ -2034,6 +2042,9 @@ in->make_long_path(path); ldout(cct, 10) << " path " << path << dendl; + bufferlist flockbl; + _encode_filelocks(in, flockbl); + in->caps[mds]->seq = 0; // reset seq. in->caps[mds]->issue_seq = 0; // reset seq. in->caps[mds]->mseq = 0; // reset seq. @@ -2042,7 +2053,8 @@ path.get_ino(), path.get_path(), // ino in->caps_wanted(), // wanted in->caps[mds]->issued, // issued - in->snaprealm->ino); + in->snaprealm->ino, + flockbl); if (did_snaprealm.count(in->snaprealm->ino) == 0) { ldout(cct, 10) << " snaprealm " << *in->snaprealm << dendl; @@ -2182,6 +2194,8 @@ in->snaprealm_item.remove_myself(); if (in == root) root = 0; + delete in->fcntl_locks; + delete in->flock_locks; delete in; } } @@ -5870,6 +5884,8 @@ in->snap_cap_refs--; } + _release_filelocks(f); + put_inode(in); delete f; @@ -6858,6 +6874,290 @@ return rval; } +int Client::_do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep, + struct flock *fl, uint64_t owner, void *fuse_req) +{ + ldout(cct, 10) << "_do_filelock ino " << in->ino + << (lock_type == CEPH_LOCK_FCNTL ? " fcntl" : " flock") + << " type " << fl->l_type << " owner " << owner + << " " << fl->l_start << "~" << fl->l_len << dendl; + + int lock_cmd; + if (F_RDLCK == fl->l_type) + lock_cmd = CEPH_LOCK_SHARED; + else if (F_WRLCK == fl->l_type) + lock_cmd = CEPH_LOCK_EXCL; + else if (F_UNLCK == fl->l_type) + lock_cmd = CEPH_LOCK_UNLOCK; + else + return -EIO; + + if (op != CEPH_MDS_OP_SETFILELOCK || lock_cmd == CEPH_LOCK_UNLOCK) + sleep = 0; + + /* + * Set the most significant bit, so that MDS knows the 'owner' + * is sufficient to identify the owner of lock. (old code uses + * both 'owner' and 'pid') + */ + owner |= (1ULL << 63); + + MetaRequest *req = new MetaRequest(op); + filepath path; + in->make_nosnap_relative_path(path); + req->set_filepath(path); + req->set_inode(in); + + req->head.args.filelock_change.rule = lock_type; + req->head.args.filelock_change.type = lock_cmd; + req->head.args.filelock_change.owner = owner; + req->head.args.filelock_change.pid = fl->l_pid; + req->head.args.filelock_change.start = fl->l_start; + req->head.args.filelock_change.length = fl->l_len; + req->head.args.filelock_change.wait = sleep; + + int ret; + bufferlist bl; + + if (sleep && switch_interrupt_cb && fuse_req) { + // enable interrupt + switch_interrupt_cb(fuse_req, req->get()); + + ret = make_request(req, -1, -1, NULL, NULL, -1, &bl); + + // disable interrupt + switch_interrupt_cb(fuse_req, NULL); + put_request(req); + } else { + ret = make_request(req, -1, -1, NULL, NULL, -1, &bl); + } + + if (ret == 0) { + if (op == CEPH_MDS_OP_GETFILELOCK) { + ceph_filelock filelock; + bufferlist::iterator p = bl.begin(); + ::decode(filelock, p); + + if (CEPH_LOCK_SHARED == filelock.type) + fl->l_type = F_RDLCK; + else if (CEPH_LOCK_EXCL == filelock.type) + fl->l_type = F_WRLCK; + else + fl->l_type = F_UNLCK; + + fl->l_whence = SEEK_SET; + fl->l_start = filelock.start; + fl->l_len = filelock.length; + fl->l_pid = filelock.pid; + } else if (op == CEPH_MDS_OP_SETFILELOCK) { + ceph_lock_state_t *lock_state; + if (lock_type == CEPH_LOCK_FCNTL) { + if (!in->fcntl_locks) + in->fcntl_locks = new ceph_lock_state_t(cct); + lock_state = in->fcntl_locks; + } else if (lock_type == CEPH_LOCK_FLOCK) { + if (!in->flock_locks) + in->flock_locks = new ceph_lock_state_t(cct); + lock_state = in->flock_locks; + } else + assert(0); + _update_lock_state(fl, owner, lock_state); + + if (fh) { + if (lock_type == CEPH_LOCK_FCNTL) { + if (!fh->fcntl_locks) + fh->fcntl_locks = new ceph_lock_state_t(cct); + lock_state = fh->fcntl_locks; + } else { + if (!fh->flock_locks) + fh->flock_locks = new ceph_lock_state_t(cct); + lock_state = fh->flock_locks; + } + _update_lock_state(fl, owner, lock_state); + } + } else + assert(0); + } + return ret; +} + +int Client::_interrupt_filelock(MetaRequest *req) +{ + Inode *in = req->inode(); + + int lock_type; + if (req->head.args.filelock_change.rule == CEPH_LOCK_FLOCK) + lock_type = CEPH_LOCK_FLOCK_INTR; + else if (req->head.args.filelock_change.rule == CEPH_LOCK_FCNTL) + lock_type = CEPH_LOCK_FCNTL_INTR; + else + assert(0); + + MetaRequest *intr_req = new MetaRequest(CEPH_MDS_OP_SETFILELOCK); + filepath path; + in->make_nosnap_relative_path(path); + intr_req->set_filepath(path); + intr_req->set_inode(in); + intr_req->head.args.filelock_change = req->head.args.filelock_change; + intr_req->head.args.filelock_change.rule = lock_type; + intr_req->head.args.filelock_change.type = CEPH_LOCK_UNLOCK; + + return make_request(intr_req, -1, -1, NULL, NULL, -1); +} + +void Client::_encode_filelocks(Inode *in, bufferlist& bl) +{ + if (!in->fcntl_locks && !in->flock_locks) + return; + + unsigned nr_fcntl_locks = in->fcntl_locks ? in->fcntl_locks->held_locks.size() : 0; + ::encode(nr_fcntl_locks, bl); + if (nr_fcntl_locks) { + ceph_lock_state_t* lock_state = in->fcntl_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + ::encode(p->second, bl); + } + + unsigned nr_flock_locks = in->flock_locks ? in->flock_locks->held_locks.size() : 0; + ::encode(nr_flock_locks, bl); + if (nr_flock_locks) { + ceph_lock_state_t* lock_state = in->flock_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + ::encode(p->second, bl); + } + + ldout(cct, 10) << "_encode_filelocks ino " << in->ino << ", " << nr_fcntl_locks + << " fcntl locks, " << nr_flock_locks << " flock locks" << dendl; +} + +void Client::_release_filelocks(Fh *fh) +{ + if (!fh->fcntl_locks && !fh->flock_locks) + return; + + Inode *in = fh->inode; + ldout(cct, 10) << "_release_filelocks " << fh << " ino " << in->ino << dendl; + + list > to_release; + + if (fh->fcntl_locks) { + ceph_lock_state_t* lock_state = fh->fcntl_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + to_release.push_back(pair(CEPH_LOCK_FCNTL, p->second)); + delete fh->fcntl_locks; + } + if (fh->flock_locks) { + ceph_lock_state_t* lock_state = fh->flock_locks; + for(multimap::iterator p = lock_state->held_locks.begin(); + p != lock_state->held_locks.end(); + ++p) + to_release.push_back(pair(CEPH_LOCK_FLOCK, p->second)); + delete fh->flock_locks; + } + + if (to_release.empty()) + return; + + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_whence = SEEK_SET; + fl.l_type = F_UNLCK; + + for (list >::iterator p = to_release.begin(); + p != to_release.end(); + ++p) { + fl.l_start = p->second.start; + fl.l_len = p->second.length; + fl.l_pid = p->second.pid; + _do_filelock(in, NULL, p->first, CEPH_MDS_OP_SETFILELOCK, 0, &fl, p->second.owner); + } +} + +void Client::_update_lock_state(struct flock *fl, uint64_t owner, + ceph_lock_state_t *lock_state) +{ + int lock_cmd; + if (F_RDLCK == fl->l_type) + lock_cmd = CEPH_LOCK_SHARED; + else if (F_WRLCK == fl->l_type) + lock_cmd = CEPH_LOCK_EXCL; + else + lock_cmd = CEPH_LOCK_UNLOCK;; + + ceph_filelock filelock; + filelock.start = fl->l_start; + filelock.length = fl->l_len; + filelock.client = 0; + // see comment in _do_filelock() + filelock.owner = owner | (1ULL << 63); + filelock.pid = fl->l_pid; + filelock.type = lock_cmd; + + if (filelock.type == CEPH_LOCK_UNLOCK) { + list activated_locks; + lock_state->remove_lock(filelock, activated_locks); + } else { + bool r = lock_state->add_lock(filelock, false, false); + assert(r); + } +} + +int Client::_getlk(Fh *fh, struct flock *fl, uint64_t owner) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_getlk " << fh << " ino " << in->ino << dendl; + int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_GETFILELOCK, 0, fl, owner); + return ret; +} + +int Client::_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << dendl; + int ret = _do_filelock(in, fh, CEPH_LOCK_FCNTL, CEPH_MDS_OP_SETFILELOCK, sleep, fl, owner, fuse_req); + ldout(cct, 10) << "_setlk " << fh << " ino " << in->ino << " result=" << ret << dendl; + return ret; +} + +int Client::_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req) +{ + Inode *in = fh->inode; + ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << dendl; + + int sleep = !(cmd & LOCK_NB); + cmd &= ~LOCK_NB; + + int type; + switch (cmd) { + case LOCK_SH: + type = F_RDLCK; + break; + case LOCK_EX: + type = F_WRLCK; + break; + case LOCK_UN: + type = F_UNLCK; + break; + default: + return -EINVAL; + } + + struct flock fl; + memset(&fl, 0, sizeof(fl)); + fl.l_type = type; + fl.l_whence = SEEK_SET; + + int ret = _do_filelock(in, fh, CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, sleep, &fl, owner, fuse_req); + ldout(cct, 10) << "_flock " << fh << " ino " << in->ino << " result=" << ret << dendl; + return ret; +} + int Client::ll_statfs(Inode *in, struct statvfs *stbuf) { /* Since the only thing this does is wrap a call to statfs, and @@ -6888,6 +7188,16 @@ async_dentry_invalidator.start(); } +void Client::ll_register_switch_interrupt_cb(client_switch_interrupt_callback_t cb) +{ + Mutex::Locker l(client_lock); + ldout(cct, 10) << "ll_register_switch_interrupt_cb cb " << (void*)cb << dendl; + if (cb == NULL) + return; + switch_interrupt_cb = cb; + interrupt_finisher.start(); +} + void Client::ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle) { Mutex::Locker l(client_lock); @@ -8740,6 +9050,59 @@ return 0; } +int Client::ll_getlk(Fh *fh, struct flock *fl, uint64_t owner) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_getlk (fh)" << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_getk (fh)" << (unsigned long)fh << std::endl; + + return _getlk(fh, fl, owner); +} + +int Client::ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_setlk (fh) " << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_setk (fh)" << (unsigned long)fh << std::endl; + + return _setlk(fh, fl, owner, sleep, fuse_req); +} + +int Client::ll_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req) +{ + Mutex::Locker lock(client_lock); + + ldout(cct, 3) << "ll_flock (fh) " << fh << " " << fh->inode->ino << dendl; + tout(cct) << "ll_flock (fh)" << (unsigned long)fh << std::endl; + + return _flock(fh, cmd, owner, fuse_req); +} + +class C_Client_RequestInterrupt : public Context { +private: + Client *client; + MetaRequest *req; +public: + C_Client_RequestInterrupt(Client *c, MetaRequest *r) : client(c), req(r) { + req->get(); + } + void finish(int r) { + Mutex::Locker l(client->client_lock); + assert(req->head.op == CEPH_MDS_OP_SETFILELOCK); + client->_interrupt_filelock(req); + client->put_request(req); + } +}; + +void Client::ll_interrupt(void *d) +{ + MetaRequest *req = static_cast(d); + ldout(cct, 3) << "ll_interrupt tid " << req->get_tid() << dendl; + tout(cct) << "ll_interrupt tid " << req->get_tid() << std::endl; + interrupt_finisher.queue(new C_Client_RequestInterrupt(this, req)); +} // ========================================= // layout diff -Nru ceph-0.80.8/src/client/Client.h ceph-0.80.9/src/client/Client.h --- ceph-0.80.8/src/client/Client.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/Client.h 2015-03-09 19:51:17.000000000 +0000 @@ -119,6 +119,7 @@ struct MetaSession; struct MetaRequest; +class ceph_lock_state_t; typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len); @@ -127,6 +128,7 @@ vinodeno_t ino, string& name); typedef int (*client_getgroups_callback_t)(void *handle, uid_t uid, gid_t **sgids); +typedef void(*client_switch_interrupt_callback_t)(void *req, void *data); // ======================================================== // client interface @@ -214,6 +216,8 @@ SafeTimer timer; + client_switch_interrupt_callback_t switch_interrupt_cb; + client_ino_callback_t ino_invalidate_cb; void *ino_invalidate_cb_handle; @@ -225,6 +229,7 @@ Finisher async_ino_invalidator; Finisher async_dentry_invalidator; + Finisher interrupt_finisher; Context *tick_event; utime_t last_cap_renew; @@ -374,6 +379,7 @@ friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb friend class C_Client_DentryInvalidate; // calls dentry_invalidate_cb friend class C_Block_Sync; // Calls block map and protected helpers + friend class C_Client_RequestInterrupt; //int get_cache_size() { return lru.lru_get_size(); } //void set_cache_size(int m) { lru.lru_set_max(m); } @@ -604,6 +610,9 @@ int _fsync(Fh *fh, bool syncdataonly); int _sync_fs(); int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length); + int _getlk(Fh *fh, struct flock *fl, uint64_t owner); + int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req=NULL); + int _flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req=NULL); int get_or_create(Inode *dir, const char* name, Dentry **pdn, bool expect_null=false); @@ -613,6 +622,12 @@ vinodeno_t _get_vino(Inode *in); inodeno_t _get_inodeno(Inode *in); + int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep, + struct flock *fl, uint64_t owner, void *fuse_req=NULL); + int _interrupt_filelock(MetaRequest *req); + void _encode_filelocks(Inode *in, bufferlist& bl); + void _release_filelocks(Fh *fh); + void _update_lock_state(struct flock *fl, uint64_t owner, ceph_lock_state_t *lock_state); public: int mount(const std::string &mount_root); void unmount(); @@ -818,6 +833,10 @@ int ll_fsync(Fh *fh, bool syncdataonly); int ll_fallocate(Fh *fh, int mode, loff_t offset, loff_t length); int ll_release(Fh *fh); + int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner); + int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep, void *fuse_req); + int ll_flock(Fh *fh, int cmd, uint64_t owner, void *fuse_req); + void ll_interrupt(void *d); int ll_get_stripe_osd(struct Inode *in, uint64_t blockno, ceph_file_layout* layout); uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno); @@ -825,11 +844,11 @@ int ll_num_osds(void); int ll_osdaddr(int osd, uint32_t *addr); int ll_osdaddr(int osd, char* buf, size_t size); - void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle); + void ll_register_ino_invalidate_cb(client_ino_callback_t cb, void *handle); void ll_register_dentry_invalidate_cb(client_dentry_callback_t cb, void *handle); - void ll_register_getgroups_cb(client_getgroups_callback_t cb, void *handle); + void ll_register_switch_interrupt_cb(client_switch_interrupt_callback_t cb); }; #endif diff -Nru ceph-0.80.8/src/client/Fh.h ceph-0.80.9/src/client/Fh.h --- ceph-0.80.8/src/client/Fh.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/Fh.h 2015-03-09 19:51:17.000000000 +0000 @@ -5,6 +5,7 @@ class Inode; class Cond; +class ceph_lock_state_t; // file handle for any open file state @@ -23,8 +24,13 @@ loff_t consec_read_bytes; int nr_consec_read; + // file lock + ceph_lock_state_t *fcntl_locks; + ceph_lock_state_t *flock_locks; + Fh() : inode(0), pos(0), mds(0), mode(0), flags(0), pos_locked(false), - last_pos(0), consec_read_bytes(0), nr_consec_read(0) {} + last_pos(0), consec_read_bytes(0), nr_consec_read(0), + fcntl_locks(NULL), flock_locks(NULL) {} }; diff -Nru ceph-0.80.8/src/client/fuse_ll.cc ceph-0.80.9/src/client/fuse_ll.cc --- ceph-0.80.8/src/client/fuse_ll.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/fuse_ll.cc 2015-03-09 19:51:17.000000000 +0000 @@ -636,6 +636,69 @@ cfuse->iput(in); // iput required } +static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner); + if (r == 0) + fuse_reply_lock(req, lock); + else + fuse_reply_err(req, -r); +} + +static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock, int sleep) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + sleep && lock->l_type != F_UNLCK) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep, req); + fuse_reply_err(req, -r); +} + +static void fuse_ll_interrupt(fuse_req_t req, void* data) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + cfuse->client->ll_interrupt(data); +} + +static void switch_interrupt_cb(void *req, void* data) +{ + if (data) + fuse_req_interrupt_func((fuse_req_t)req, fuse_ll_interrupt, data); + else + fuse_req_interrupt_func((fuse_req_t)req, NULL, NULL); +} + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) +static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int cmd) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + if (!cfuse->client->cct->_conf->fuse_multithreaded && + !(cmd & (LOCK_NB | LOCK_UN))) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner, req); + fuse_reply_err(req, -r); +} +#endif + #if 0 static int getgroups_cb(void *handle, uid_t uid, gid_t **sgids) { @@ -742,8 +805,8 @@ removexattr: fuse_ll_removexattr, access: fuse_ll_access, create: fuse_ll_create, - getlk: 0, - setlk: 0, + getlk: fuse_ll_getlk, + setlk: fuse_ll_setlk, bmap: 0, #if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) #ifdef FUSE_IOCTL_COMPAT @@ -752,13 +815,15 @@ ioctl: 0, #endif poll: 0, -#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) +#endif +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) write_buf: 0, retrieve_reply: 0, forget_multi: 0, - flock: 0, - fallocate: fuse_ll_fallocate + flock: fuse_ll_flock, #endif +#if FUSE_VERSION > FUSE_MAKE_VERSION(2, 9) + fallocate: fuse_ll_fallocate #endif }; @@ -859,6 +924,8 @@ fuse_session_add_chan(se, ch); + client->ll_register_switch_interrupt_cb(switch_interrupt_cb); + /* * this is broken: * diff -Nru ceph-0.80.8/src/client/Inode.h ceph-0.80.9/src/client/Inode.h --- ceph-0.80.8/src/client/Inode.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/Inode.h 2015-03-09 19:51:17.000000000 +0000 @@ -17,7 +17,8 @@ class Dentry; class Dir; struct SnapRealm; -class Inode; +struct Inode; +class ceph_lock_state_t; struct Cap { MetaSession *session; @@ -210,6 +211,10 @@ ll_ref -= n; } + // file locks + ceph_lock_state_t *fcntl_locks; + ceph_lock_state_t *flock_locks; + Inode(CephContext *cct_, vinodeno_t vino, ceph_file_layout *newlayout) : cct(cct_), ino(vino.ino), snapid(vino.snapid), rdev(0), mode(0), uid(0), gid(0), nlink(0), @@ -224,8 +229,8 @@ snaprealm(0), snaprealm_item(this), snapdir_parent(0), oset((void *)this, newlayout->fl_pg_pool, ino), reported_size(0), wanted_max_size(0), requested_max_size(0), - _ref(0), ll_ref(0), - dir(0), dn_set() + _ref(0), ll_ref(0), dir(0), dn_set(), + fcntl_locks(NULL), flock_locks(NULL) { memset(&dir_layout, 0, sizeof(dir_layout)); memset(&layout, 0, sizeof(layout)); diff -Nru ceph-0.80.8/src/client/MetaRequest.h ceph-0.80.9/src/client/MetaRequest.h --- ceph-0.80.8/src/client/MetaRequest.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/client/MetaRequest.h 2015-03-09 19:51:17.000000000 +0000 @@ -9,6 +9,7 @@ #include "msg/msg_types.h" #include "include/xlist.h" #include "include/filepath.h" +#include "include/atomic.h" #include "mds/mdstypes.h" #include "common/Mutex.h" @@ -47,7 +48,7 @@ __u32 sent_on_mseq; // mseq at last submission of this request int num_fwd; // # of times i've been forwarded int retry_attempt; - int ref; + atomic_t ref; MClientReply *reply; // the reply bool kick; @@ -126,17 +127,14 @@ Dentry *old_dentry(); MetaRequest* get() { - ++ref; + ref.inc(); return this; } /// psuedo-private put method; use Client::put_request() - void _put() { - if (--ref == 0) - delete this; - } - int get_num_ref() { - return ref; + bool _put() { + int v = ref.dec(); + return v == 0; } // normal fields diff -Nru ceph-0.80.8/src/common/config_opts.h ceph-0.80.9/src/common/config_opts.h --- ceph-0.80.8/src/common/config_opts.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/common/config_opts.h 2015-03-09 19:51:17.000000000 +0000 @@ -284,7 +284,7 @@ OPTION(fuse_big_writes, OPT_BOOL, true) OPTION(fuse_atomic_o_trunc, OPT_BOOL, true) OPTION(fuse_debug, OPT_BOOL, false) -OPTION(fuse_multithreaded, OPT_BOOL, false) +OPTION(fuse_multithreaded, OPT_BOOL, true) OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location diff -Nru ceph-0.80.8/src/common/Makefile.am ceph-0.80.9/src/common/Makefile.am --- ceph-0.80.8/src/common/Makefile.am 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/common/Makefile.am 2015-03-09 19:51:17.000000000 +0000 @@ -83,7 +83,8 @@ osd/HitSet.cc \ mds/MDSMap.cc \ mds/inode_backtrace.cc \ - mds/mdstypes.cc + mds/mdstypes.cc \ + mds/flock.cc # inject crc in common libcommon_crc_la_SOURCES = \ diff -Nru ceph-0.80.8/src/crush/builder.c ceph-0.80.9/src/crush/builder.c --- ceph-0.80.8/src/crush/builder.c 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/builder.c 2015-03-09 19:51:17.000000000 +0000 @@ -11,6 +11,8 @@ #include "builder.h" #include "hash.h" +#define dprintk(args...) /* printf(args) */ + #define BUG_ON(x) assert(!(x)) struct crush_map *crush_create() @@ -27,6 +29,7 @@ m->choose_total_tries = 19; m->chooseleaf_descend_once = 0; m->chooseleaf_vary_r = 0; + m->straw_calc_version = 0; return m; } @@ -265,7 +268,7 @@ w += weights[i]; bucket->sum_weights[i] = w; - /*printf("pos %d item %d weight %d sum %d\n", + /*dprintk("pos %d item %d weight %d sum %d\n", i, items[i], weights[i], bucket->sum_weights[i]);*/ } @@ -306,6 +309,10 @@ static int calc_depth(int size) { + if (size == 0) { + return 0; + } + int depth = 1; int t = size - 1; while (t) { @@ -334,6 +341,16 @@ bucket->h.type = type; bucket->h.size = size; + if (size == 0) { + bucket->h.items = NULL; + bucket->h.perm = NULL; + bucket->h.weight = 0; + bucket->node_weights = NULL; + bucket->num_nodes = 0; + /* printf("size 0 depth 0 nodes 0\n"); */ + return bucket; + } + bucket->h.items = malloc(sizeof(__s32)*size); if (!bucket->h.items) goto err; @@ -344,7 +361,7 @@ /* calc tree depth */ depth = calc_depth(size); bucket->num_nodes = 1 << depth; - printf("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes); + dprintk("size %d depth %d nodes %d\n", size, depth, bucket->num_nodes); bucket->node_weights = malloc(sizeof(__u32)*bucket->num_nodes); if (!bucket->node_weights) @@ -356,7 +373,7 @@ for (i=0; ih.items[i] = items[i]; node = crush_calc_tree_node(i); - printf("item %d node %d weight %d\n", i, node, weights[i]); + dprintk("item %d node %d weight %d\n", i, node, weights[i]); bucket->node_weights[node] = weights[i]; if (crush_addition_is_unsafe(bucket->h.weight, weights[i])) @@ -370,7 +387,7 @@ goto err; bucket->node_weights[node] += weights[i]; - printf(" node %d weight %d\n", node, bucket->node_weights[node]); + dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); } } BUG_ON(bucket->node_weights[bucket->num_nodes/2] != bucket->h.weight); @@ -388,7 +405,34 @@ /* straw bucket */ -int crush_calc_straw(struct crush_bucket_straw *bucket) +/* + * this code was written 8 years ago. i have a vague recollection of + * drawing boxes underneath bars of different lengths, where the bar + * length represented the probability/weight, and that there was some + * trial and error involved in arriving at this implementation. + * however, reading the code now after all this time, the intuition + * that motivated is lost on me. lame. my only excuse is that I now + * know that the approach is fundamentally flawed and am not + * particularly motivated to reconstruct the flawed reasoning. + * + * as best as i can remember, the idea is: sort the weights, and start + * with the smallest. arbitrarily scale it at 1.0 (16-bit fixed + * point). look at the next larger weight, and calculate the scaling + * factor for that straw based on the relative difference in weight so + * far. what's not clear to me now is why we are looking at wnext + * (the delta to the next bigger weight) for all remaining weights, + * and slicing things horizontally instead of considering just the + * next item or set of items. or why pow() is used the way it is. + * + * note that the original version 1 of this function made special + * accomodation for the case where straw lengths were identical. this + * is also flawed in a non-obvious way; version 2 drops the special + * handling and appears to work just as well. + * + * moral of the story: if you do something clever, write down why it + * works. + */ +int crush_calc_straw(struct crush_map *map, struct crush_bucket_straw *bucket) { int *reverse; int i, j, k; @@ -424,41 +468,82 @@ i=0; while (i < size) { - /* zero weight items get 0 length straws! */ - if (weights[reverse[i]] == 0) { - bucket->straws[reverse[i]] = 0; + if (map->straw_calc_version == 0) { + /* zero weight items get 0 length straws! */ + if (weights[reverse[i]] == 0) { + bucket->straws[reverse[i]] = 0; + i++; + continue; + } + + /* set this item's straw */ + bucket->straws[reverse[i]] = straw * 0x10000; + dprintk("item %d at %d weight %d straw %d (%lf)\n", + bucket->h.items[reverse[i]], + reverse[i], weights[reverse[i]], + bucket->straws[reverse[i]], straw); i++; - continue; - } + if (i == size) + break; - /* set this item's straw */ - bucket->straws[reverse[i]] = straw * 0x10000; - /*printf("item %d at %d weight %d straw %d (%lf)\n", - items[reverse[i]], - reverse[i], weights[reverse[i]], bucket->straws[reverse[i]], straw);*/ - i++; - if (i == size) break; - - /* same weight as previous? */ - if (weights[reverse[i]] == weights[reverse[i-1]]) { - /*printf("same as previous\n");*/ - continue; - } + /* same weight as previous? */ + if (weights[reverse[i]] == weights[reverse[i-1]]) { + dprintk("same as previous\n"); + continue; + } - /* adjust straw for next guy */ - wbelow += ((double)weights[reverse[i-1]] - lastw) * numleft; - for (j=i; jstraw_calc_version >= 1) { + /* zero weight items get 0 length straws! */ + if (weights[reverse[i]] == 0) { + bucket->straws[reverse[i]] = 0; + i++; numleft--; - else + continue; + } + + /* set this item's straw */ + bucket->straws[reverse[i]] = straw * 0x10000; + dprintk("item %d at %d weight %d straw %d (%lf)\n", + bucket->h.items[reverse[i]], + reverse[i], weights[reverse[i]], + bucket->straws[reverse[i]], straw); + i++; + if (i == size) break; - wnext = numleft * (weights[reverse[i]] - weights[reverse[i-1]]); - pbelow = wbelow / (wbelow + wnext); - /*printf("wbelow %lf wnext %lf pbelow %lf\n", wbelow, wnext, pbelow);*/ - straw *= pow((double)1.0 / pbelow, (double)1.0 / (double)numleft); + /* adjust straw for next guy */ + wbelow += ((double)weights[reverse[i-1]] - lastw) * + numleft; + numleft--; + wnext = numleft * (weights[reverse[i]] - + weights[reverse[i-1]]); + pbelow = wbelow / (wbelow + wnext); + dprintk("wbelow %lf wnext %lf pbelow %lf numleft %d\n", + wbelow, wnext, pbelow, numleft); - lastw = weights[reverse[i-1]]; + straw *= pow((double)1.0 / pbelow, (double)1.0 / + (double)numleft); + + lastw = weights[reverse[i-1]]; + } } free(reverse); @@ -466,7 +551,8 @@ } struct crush_bucket_straw * -crush_make_straw_bucket(int hash, +crush_make_straw_bucket(struct crush_map *map, + int hash, int type, int size, int *items, @@ -504,7 +590,7 @@ bucket->item_weights[i] = weights[i]; } - if (crush_calc_straw(bucket) < 0) + if (crush_calc_straw(map, bucket) < 0) goto err; return bucket; @@ -520,7 +606,8 @@ struct crush_bucket* -crush_make_bucket(int alg, int hash, int type, int size, +crush_make_bucket(struct crush_map *map, + int alg, int hash, int type, int size, int *items, int *weights) { @@ -541,7 +628,7 @@ return (struct crush_bucket *)crush_make_tree_bucket(hash, type, size, items, weights); case CRUSH_BUCKET_STRAW: - return (struct crush_bucket *)crush_make_straw_bucket(hash, type, size, items, weights); + return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights); } return 0; } @@ -650,27 +737,39 @@ node = crush_calc_tree_node(newsize-1); bucket->node_weights[node] = weight; + /* if the depth increase, we need to initialize the new root node's weight before add bucket item */ + int root = bucket->num_nodes/2; + if (depth >= 2 && (node - 1) == root) { + /* if the new item is the first node in right sub tree, so + * the root node initial weight is left sub tree's weight + */ + bucket->node_weights[root] = bucket->node_weights[root/2]; + } + for (j=1; jnode_weights[node], weight)) + if (crush_addition_is_unsafe(bucket->node_weights[node], weight)) return -ERANGE; bucket->node_weights[node] += weight; - printf(" node %d weight %d\n", node, bucket->node_weights[node]); + dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); } if (crush_addition_is_unsafe(bucket->h.weight, weight)) return -ERANGE; + bucket->h.items[newsize-1] = item; bucket->h.weight += weight; bucket->h.size++; return 0; } -int crush_add_straw_bucket_item(struct crush_bucket_straw *bucket, int item, int weight) +int crush_add_straw_bucket_item(struct crush_map *map, + struct crush_bucket_straw *bucket, + int item, int weight) { int newsize = bucket->h.size + 1; @@ -703,13 +802,14 @@ if (crush_addition_is_unsafe(bucket->h.weight, weight)) return -ERANGE; - bucket->h.weight += weight; - bucket->h.size++; + bucket->h.weight += weight; + bucket->h.size++; - return crush_calc_straw(bucket); + return crush_calc_straw(map, bucket); } -int crush_bucket_add_item(struct crush_bucket *b, int item, int weight) +int crush_bucket_add_item(struct crush_map *map, + struct crush_bucket *b, int item, int weight) { /* invalidate perm cache */ b->perm_n = 0; @@ -722,7 +822,7 @@ case CRUSH_BUCKET_TREE: return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight); case CRUSH_BUCKET_STRAW: - return crush_add_straw_bucket_item((struct crush_bucket_straw *)b, item, weight); + return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight); default: return -1; } @@ -768,7 +868,7 @@ { unsigned i, j; int newsize; - int weight; + unsigned weight; for (i = 0; i < bucket->h.size; i++) if (bucket->h.items[i] == item) @@ -820,7 +920,7 @@ for (i = 0; i < bucket->h.size; i++) { int node; - int weight; + unsigned weight; int j; int depth = calc_depth(bucket->h.size); @@ -834,7 +934,7 @@ for (j = 1; j < depth; j++) { node = parent(node); bucket->node_weights[node] -= weight; - printf(" node %d weight %d\n", node, bucket->node_weights[node]); + dprintk(" node %d weight %d\n", node, bucket->node_weights[node]); } if (weight < bucket->h.weight) bucket->h.weight -= weight; @@ -886,7 +986,8 @@ return 0; } -int crush_remove_straw_bucket_item(struct crush_bucket_straw *bucket, int item) +int crush_remove_straw_bucket_item(struct crush_map *map, + struct crush_bucket_straw *bucket, int item) { int newsize = bucket->h.size - 1; unsigned i, j; @@ -931,10 +1032,10 @@ bucket->straws = _realloc; } - return crush_calc_straw(bucket); + return crush_calc_straw(map, bucket); } -int crush_bucket_remove_item(struct crush_bucket *b, int item) +int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item) { /* invalidate perm cache */ b->perm_n = 0; @@ -947,7 +1048,7 @@ case CRUSH_BUCKET_TREE: return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item); case CRUSH_BUCKET_STRAW: - return crush_remove_straw_bucket_item((struct crush_bucket_straw *)b, item); + return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item); default: return -1; } @@ -1016,7 +1117,9 @@ return diff; } -int crush_adjust_straw_bucket_item_weight(struct crush_bucket_straw *bucket, int item, int weight) +int crush_adjust_straw_bucket_item_weight(struct crush_map *map, + struct crush_bucket_straw *bucket, + int item, int weight) { unsigned idx; int diff; @@ -1032,14 +1135,16 @@ bucket->item_weights[idx] = weight; bucket->h.weight += diff; - r = crush_calc_straw(bucket); + r = crush_calc_straw(map, bucket); if (r < 0) return r; return diff; } -int crush_bucket_adjust_item_weight(struct crush_bucket *b, int item, int weight) +int crush_bucket_adjust_item_weight(struct crush_map *map, + struct crush_bucket *b, + int item, int weight) { switch (b->alg) { case CRUSH_BUCKET_UNIFORM: @@ -1052,7 +1157,8 @@ return crush_adjust_tree_bucket_item_weight((struct crush_bucket_tree *)b, item, weight); case CRUSH_BUCKET_STRAW: - return crush_adjust_straw_bucket_item_weight((struct crush_bucket_straw *)b, + return crush_adjust_straw_bucket_item_weight(map, + (struct crush_bucket_straw *)b, item, weight); default: return -1; @@ -1155,6 +1261,7 @@ bucket->h.weight += bucket->item_weights[i]; } + crush_calc_straw(crush, bucket); return 0; } diff -Nru ceph-0.80.8/src/crush/builder.h ceph-0.80.9/src/crush/builder.h --- ceph-0.80.8/src/crush/builder.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/builder.h 2015-03-09 19:51:17.000000000 +0000 @@ -16,12 +16,12 @@ extern int crush_add_bucket(struct crush_map *map, int bucketno, struct crush_bucket *bucket, int *idout); -struct crush_bucket *crush_make_bucket(int alg, int hash, int type, int size, int *items, int *weights); -extern int crush_bucket_add_item(struct crush_bucket *bucket, int item, int weight); -extern int crush_bucket_adjust_item_weight(struct crush_bucket *bucket, int item, int weight); +struct crush_bucket *crush_make_bucket(struct crush_map *map, int alg, int hash, int type, int size, int *items, int *weights); +extern int crush_bucket_add_item(struct crush_map *map, struct crush_bucket *bucket, int item, int weight); +extern int crush_bucket_adjust_item_weight(struct crush_map *map, struct crush_bucket *bucket, int item, int weight); extern int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *bucket); extern int crush_remove_bucket(struct crush_map *map, struct crush_bucket *bucket); -extern int crush_bucket_remove_item(struct crush_bucket *bucket, int item); +extern int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *bucket, int item); struct crush_bucket_uniform * crush_make_uniform_bucket(int hash, int type, int size, @@ -36,7 +36,8 @@ int *items, /* in leaf order */ int *weights); struct crush_bucket_straw * -crush_make_straw_bucket(int hash, int type, int size, +crush_make_straw_bucket(struct crush_map *map, + int hash, int type, int size, int *items, int *weights); diff -Nru ceph-0.80.8/src/crush/CrushCompiler.cc ceph-0.80.9/src/crush/CrushCompiler.cc --- ceph-0.80.8/src/crush/CrushCompiler.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/CrushCompiler.cc 2015-03-09 19:51:17.000000000 +0000 @@ -191,6 +191,8 @@ out << "tunable chooseleaf_descend_once " << crush.get_chooseleaf_descend_once() << "\n"; if (crush.get_chooseleaf_vary_r() != 0) out << "tunable chooseleaf_vary_r " << crush.get_chooseleaf_vary_r() << "\n"; + if (crush.get_straw_calc_version() != 0) + out << "tunable straw_calc_version " << crush.get_straw_calc_version() << "\n"; out << "\n# devices\n"; for (int i=0; i out; if (use_crush) { - if (output_statistics) - err << "CRUSH"; // prepend CRUSH to placement output + if (output_mappings) + err << "CRUSH"; // prepend CRUSH to placement output crush.do_rule(r, x, out, nr, weight); } else { - if (output_statistics) - err << "RNG"; // prepend RNG to placement output to denote simulation + if (output_mappings) + err << "RNG"; // prepend RNG to placement output to denote simulation // test our new monte carlo placement generator random_placement(r, out, nr, weight); } - if (output_statistics) - err << " rule " << r << " x " << x << " " << out << std::endl; + if (output_mappings) + err << " rule " << r << " x " << x << " " << out << std::endl; if (output_data_file) write_integer_indexed_vector_data_string(tester_data.placement_information, x, out); @@ -539,14 +539,14 @@ if (output_statistics) for (unsigned i = 0; i < per.size(); i++) { - if (output_utilization && num_batches > 1){ + if (output_utilization) { if (num_objects_expected[i] > 0 && per[i] > 0) { err << " device " << i << ":\t" << "\t" << " stored " << ": " << per[i] << "\t" << " expected " << ": " << num_objects_expected[i] << std::endl; } - } else if (output_utilization_all && num_batches > 1) { + } else if (output_utilization_all) { err << " device " << i << ":\t" << "\t" << " stored " << ": " << per[i] << "\t" << " expected " << ": " << num_objects_expected[i] diff -Nru ceph-0.80.8/src/crush/CrushTester.h ceph-0.80.9/src/crush/CrushTester.h --- ceph-0.80.8/src/crush/CrushTester.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/CrushTester.h 2015-03-09 19:51:17.000000000 +0000 @@ -27,6 +27,7 @@ bool output_utilization; bool output_utilization_all; bool output_statistics; + bool output_mappings; bool output_bad_mappings; bool output_choose_tries; @@ -176,6 +177,7 @@ output_utilization(false), output_utilization_all(false), output_statistics(false), + output_mappings(false), output_bad_mappings(false), output_choose_tries(false), output_data_file(false), @@ -226,6 +228,13 @@ return output_statistics; } + void set_output_mappings(bool b) { + output_mappings = b; + } + bool get_output_mappings() const { + return output_mappings; + } + void set_output_bad_mappings(bool b) { output_bad_mappings = b; } diff -Nru ceph-0.80.8/src/crush/CrushWrapper.cc ceph-0.80.9/src/crush/CrushWrapper.cc --- ceph-0.80.8/src/crush/CrushWrapper.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/CrushWrapper.cc 2015-03-09 19:51:17.000000000 +0000 @@ -150,10 +150,10 @@ for (unsigned i=0; isize; ++i) { int id = b->items[i]; if (id == item) { - adjust_item_weight(cct, item, 0); ldout(cct, 5) << "remove_item removing item " << item << " from bucket " << b->id << dendl; - crush_bucket_remove_item(b, item); + crush_bucket_remove_item(crush, b, item); + adjust_item_weight(cct, b->id, b->weight); ret = 0; } } @@ -197,9 +197,9 @@ for (unsigned i=0; isize; ++i) { int id = b->items[i]; if (id == item) { - adjust_item_weight(cct, item, 0); ldout(cct, 5) << "_remove_item_under removing item " << item << " from bucket " << b->id << dendl; - crush_bucket_remove_item(b, item); + crush_bucket_remove_item(crush, b, item); + adjust_item_weight(cct, b->id, b->weight); ret = 0; } else if (id < 0) { int r = remove_item_under(cct, item, id, unlink_only); @@ -459,6 +459,8 @@ int cur = item; + // create locations if locations don't exist and add child in location with 0 weight + // the more detail in the insert_item method declaration in CrushWrapper.h for (map::iterator p = type_map.begin(); p != type_map.end(); ++p) { // ignore device type if (p->first == 0) @@ -518,17 +520,17 @@ ldout(cct, 5) << "insert_item adding " << cur << " weight " << weight << " to bucket " << id << dendl; - int r = crush_bucket_add_item(b, cur, 0); + int r = crush_bucket_add_item(crush, b, cur, 0); assert (!r); + break; + } - // now that we've added the (0-weighted) item and any parent buckets, adjust the weight. - adjust_item_weightf(cct, item, weight); - + // adjust the item's weight in location + if(adjust_item_weightf_in_loc(cct, item, weight, loc) > 0) { if (item >= crush->max_devices) { crush->max_devices = item + 1; ldout(cct, 5) << "insert_item max_devices now " << crush->max_devices << dendl; } - return 0; } @@ -620,7 +622,7 @@ if (old_iweight != iweight) { ldout(cct, 5) << "update_item " << item << " adjusting weight " << ((float)old_iweight/(float)0x10000) << " -> " << weight << dendl; - adjust_item_weight(cct, item, iweight); + adjust_item_weight_in_loc(cct, item, iweight, loc); ret = 1; } if (get_item_name(item) != name) { @@ -641,7 +643,7 @@ return ret; } -int CrushWrapper::get_item_weight(int id) +int CrushWrapper::get_item_weight(int id) const { for (int bidx = 0; bidx < crush->max_buckets; bidx++) { crush_bucket *b = crush->buckets[bidx]; @@ -654,6 +656,24 @@ return -ENOENT; } +int CrushWrapper::get_item_weight_in_loc(int id, const map &loc) +{ + for (map::const_iterator l = loc.begin(); l != loc.end(); l++) { + int bid = get_item_id(l->second); + if (!bucket_exists(bid)) + continue; + crush_bucket *b = get_bucket(bid); + if ( b == NULL) + continue; + for (unsigned int i = 0; i < b->size; i++) { + if (b->items[i] == id) { + return crush_get_bucket_item_weight(b, i); + } + } + } + return -ENOENT; +} + int CrushWrapper::adjust_item_weight(CephContext *cct, int id, int weight) { ldout(cct, 5) << "adjust_item_weight " << id << " weight " << weight << dendl; @@ -664,7 +684,7 @@ continue; for (unsigned i = 0; i < b->size; i++) { if (b->items[i] == id) { - int diff = crush_bucket_adjust_item_weight(b, id, weight); + int diff = crush_bucket_adjust_item_weight(crush, b, id, weight); ldout(cct, 5) << "adjust_item_weight " << id << " diff " << diff << " in bucket " << bidx << dendl; adjust_item_weight(cct, -1 - bidx, b->weight); changed++; @@ -676,7 +696,33 @@ return changed; } -bool CrushWrapper::check_item_present(int id) +int CrushWrapper::adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map& loc) +{ + ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " weight " << weight << " in " << loc << dendl; + int changed = 0; + + for (map::const_iterator l = loc.begin(); l != loc.end(); l++) { + int bid = get_item_id(l->second); + if (!bucket_exists(bid)) + continue; + crush_bucket *b = get_bucket(bid); + if ( b == NULL) + continue; + for (unsigned int i = 0; i < b->size; i++) { + if (b->items[i] == id) { + int diff = crush_bucket_adjust_item_weight(crush, b, id, weight); + ldout(cct, 5) << "adjust_item_weight_in_loc " << id << " diff " << diff << " in bucket " << bid << dendl; + adjust_item_weight(cct, bid, b->weight); + changed++; + } + } + } + if (!changed) + return -ENOENT; + return changed; +} + +bool CrushWrapper::check_item_present(int id) const { bool found = false; @@ -968,6 +1014,7 @@ ::encode(crush->choose_total_tries, bl); ::encode(crush->chooseleaf_descend_once, bl); ::encode(crush->chooseleaf_vary_r, bl); + ::encode(crush->straw_calc_version, bl); } static void decode_32_or_64_string_map(map& m, bufferlist::iterator& blp) @@ -1051,6 +1098,9 @@ if (!blp.end()) { ::decode(crush->chooseleaf_vary_r, blp); } + if (!blp.end()) { + ::decode(crush->straw_calc_version, blp); + } finalize(); } catch (...) { @@ -1234,6 +1284,8 @@ f->dump_int("choose_local_fallback_tries", get_choose_local_fallback_tries()); f->dump_int("choose_total_tries", get_choose_total_tries()); f->dump_int("chooseleaf_descend_once", get_chooseleaf_descend_once()); + f->dump_int("chooseleaf_vary_r", get_chooseleaf_vary_r()); + f->dump_int("straw_calc_version", get_straw_calc_version()); // be helpful about it if (has_firefly_tunables()) @@ -1249,6 +1301,9 @@ f->dump_int("require_feature_tunables", (int)has_nondefault_tunables()); f->dump_int("require_feature_tunables2", (int)has_nondefault_tunables2()); + f->dump_int("require_feature_tunables3", (int)has_nondefault_tunables3()); + f->dump_int("has_v2_rules", (int)has_v2_rules()); + f->dump_int("has_v3_rules", (int)has_v3_rules()); } void CrushWrapper::dump_rules(Formatter *f) const diff -Nru ceph-0.80.8/src/crush/CrushWrapper.h ceph-0.80.9/src/crush/CrushWrapper.h --- ceph-0.80.8/src/crush/CrushWrapper.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/crush/CrushWrapper.h 2015-03-09 19:51:17.000000000 +0000 @@ -52,24 +52,23 @@ class CrushWrapper { mutable Mutex mapper_lock; public: - struct crush_map *crush; std::map type_map; /* bucket/device type names */ std::map name_map; /* bucket/device names */ std::map rule_name_map; - /* reverse maps */ - bool have_rmaps; - std::map type_rmap, name_rmap, rule_name_rmap; - private: - void build_rmaps() { + struct crush_map *crush; + /* reverse maps */ + mutable bool have_rmaps; + mutable std::map type_rmap, name_rmap, rule_name_rmap; + void build_rmaps() const { if (have_rmaps) return; build_rmap(type_map, type_rmap); build_rmap(name_map, name_rmap); build_rmap(rule_name_map, rule_name_rmap); have_rmaps = true; } - void build_rmap(const map &f, std::map &r) { + void build_rmap(const map &f, std::map &r) const { r.clear(); for (std::map::const_iterator p = f.begin(); p != f.end(); ++p) r[p->second] = p->first; @@ -88,6 +87,8 @@ crush_destroy(crush); } + crush_map *get_crush_map() { return crush; } + /* building */ void create() { if (crush) @@ -124,12 +125,15 @@ void set_tunables_legacy() { set_tunables_argonaut(); + crush->straw_calc_version = 0; } void set_tunables_optimal() { set_tunables_firefly(); + crush->straw_calc_version = 1; } void set_tunables_default() { set_tunables_bobtail(); + crush->straw_calc_version = 1; } int get_choose_local_tries() const { @@ -167,13 +171,21 @@ crush->chooseleaf_vary_r = n; } + int get_straw_calc_version() const { + return crush->straw_calc_version; + } + void set_straw_calc_version(int n) { + crush->straw_calc_version = n; + } + bool has_argonaut_tunables() const { return crush->choose_local_tries == 2 && crush->choose_local_fallback_tries == 5 && crush->choose_total_tries == 19 && crush->chooseleaf_descend_once == 0 && - crush->chooseleaf_vary_r == 0; + crush->chooseleaf_vary_r == 0 && + crush->straw_calc_version == 0; } bool has_bobtail_tunables() const { return @@ -181,7 +193,8 @@ crush->choose_local_fallback_tries == 0 && crush->choose_total_tries == 50 && crush->chooseleaf_descend_once == 1 && - crush->chooseleaf_vary_r == 0; + crush->chooseleaf_vary_r == 0 && + crush->straw_calc_version == 0; } bool has_firefly_tunables() const { return @@ -189,7 +202,8 @@ crush->choose_local_fallback_tries == 0 && crush->choose_total_tries == 50 && crush->chooseleaf_descend_once == 1 && - crush->chooseleaf_vary_r == 1; + crush->chooseleaf_vary_r == 1 && + crush->straw_calc_version == 0; } bool has_optimal_tunables() const { @@ -223,7 +237,7 @@ int get_num_type_names() const { return type_map.size(); } - int get_type_id(const string& name) { + int get_type_id(const string& name) const { build_rmaps(); if (type_rmap.count(name)) return type_rmap[name]; @@ -242,14 +256,14 @@ } // item/bucket names - bool name_exists(const string& name) { + bool name_exists(const string& name) const { build_rmaps(); return name_rmap.count(name); } bool item_exists(int i) { return name_map.count(i); } - int get_item_id(const string& name) { + int get_item_id(const string& name) const { build_rmaps(); if (name_rmap.count(name)) return name_rmap[name]; @@ -271,11 +285,11 @@ } // rule names - bool rule_exists(string name) { + bool rule_exists(string name) const { build_rmaps(); return rule_name_rmap.count(name); } - int get_rule_id(string name) { + int get_rule_id(string name) const { build_rmaps(); if (rule_name_rmap.count(name)) return rule_name_rmap[name]; @@ -542,19 +556,27 @@ * @param id item id to check * @return weight of item */ - int get_item_weight(int id); - float get_item_weightf(int id) { + int get_item_weight(int id) const; + float get_item_weightf(int id) const { return (float)get_item_weight(id) / (float)0x10000; } + int get_item_weight_in_loc(int id, const map &loc); + float get_item_weightf_in_loc(int id, const map &loc) { + return (float)get_item_weight_in_loc(id, loc) / (float)0x10000; + } int adjust_item_weight(CephContext *cct, int id, int weight); int adjust_item_weightf(CephContext *cct, int id, float weight) { return adjust_item_weight(cct, id, (int)(weight * (float)0x10000)); } + int adjust_item_weight_in_loc(CephContext *cct, int id, int weight, const map& loc); + int adjust_item_weightf_in_loc(CephContext *cct, int id, float weight, const map& loc) { + return adjust_item_weight_in_loc(cct, id, (int)(weight * (float)0x10000), loc); + } void reweight(CephContext *cct); /// check if item id is present in the map hierarchy - bool check_item_present(int id); + bool check_item_present(int id) const; /*** devices ***/ @@ -745,9 +767,6 @@ crush_bucket *b = get_bucket(item); unsigned bucket_weight = b->weight; - // zero out the bucket weight - adjust_item_weight(cct, item, 0); - // get where the bucket is located pair bucket_location = get_immediate_parent(item); @@ -758,8 +777,12 @@ crush_bucket *parent_bucket = get_bucket(parent_id); if (!IS_ERR(parent_bucket)) { + // zero out the bucket weight + crush_bucket_adjust_item_weight(crush, parent_bucket, item, 0); + adjust_item_weight(cct, parent_bucket->id, parent_bucket->weight); + // remove the bucket from the parent - crush_bucket_remove_item(parent_bucket, item); + crush_bucket_remove_item(crush, parent_bucket, item); } else if (PTR_ERR(parent_bucket) != -ENOENT) { return PTR_ERR(parent_bucket); } @@ -839,7 +862,7 @@ int *items, int *weights, int *idout) { if (type == 0) return -EINVAL; - crush_bucket *b = crush_make_bucket(alg, hash, type, size, items, weights); + crush_bucket *b = crush_make_bucket(crush, alg, hash, type, size, items, weights); assert(b); return crush_add_bucket(crush, bucketno, b, idout); } diff -Nru ceph-0.80.8/src/.git_version ceph-0.80.9/src/.git_version --- ceph-0.80.8/src/.git_version 2015-01-14 18:18:24.000000000 +0000 +++ ceph-0.80.9/src/.git_version 2015-03-09 19:54:24.000000000 +0000 @@ -1,2 +1,2 @@ -69eaad7f8308f21573c604f121956e64679a52a7 -v0.80.8 +b5a67f0e1d15385bc0d60a6da6e7fc810bde6047 +v0.80.9 diff -Nru ceph-0.80.8/src/include/ceph_fs.h ceph-0.80.9/src/include/ceph_fs.h --- ceph-0.80.8/src/include/ceph_fs.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/include/ceph_fs.h 2015-03-09 19:51:17.000000000 +0000 @@ -505,8 +505,10 @@ __le32 dist[]; } __attribute__ ((packed)); -#define CEPH_LOCK_FCNTL 1 -#define CEPH_LOCK_FLOCK 2 +#define CEPH_LOCK_FCNTL 1 +#define CEPH_LOCK_FLOCK 2 +#define CEPH_LOCK_FCNTL_INTR 3 +#define CEPH_LOCK_FLOCK_INTR 4 #define CEPH_LOCK_SHARED 1 #define CEPH_LOCK_EXCL 2 diff -Nru ceph-0.80.8/src/init-radosgw.sysv ceph-0.80.9/src/init-radosgw.sysv --- ceph-0.80.8/src/init-radosgw.sysv 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/init-radosgw.sysv 2015-03-09 19:51:17.000000000 +0000 @@ -87,8 +87,8 @@ if [ $SYSTEMD -eq 1 ]; then systemd-run -r sudo -u "$user" bash -c "ulimit -n 32768; $RADOSGW -n $name" else - #start-stop-daemon --start -u $user -x $RADOSGW -- -n $name - daemon --user="$user" "ulimit -n 32768; $RADOSGW -n $name" + ulimit -n 32768 + daemon --user="$user" "$RADOSGW -n $name" fi echo "Starting $name..." done diff -Nru ceph-0.80.8/src/librbd/AioCompletion.h ceph-0.80.9/src/librbd/AioCompletion.h --- ceph-0.80.8/src/librbd/AioCompletion.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/librbd/AioCompletion.h 2015-03-09 19:51:17.000000000 +0000 @@ -93,6 +93,10 @@ void init_time(ImageCtx *i, aio_type_t t) { ictx = i; + { + Mutex::Locker l(ictx->aio_lock); + ++ictx->pending_aio; + } aio_type = t; start_time = ceph_clock_now(ictx->cct); } @@ -114,6 +118,14 @@ lderr(ictx->cct) << "completed invalid aio_type: " << aio_type << dendl; break; } + + { + Mutex::Locker l(ictx->aio_lock); + assert(ictx->pending_aio != 0); + --ictx->pending_aio; + ictx->pending_aio_cond.Signal(); + } + if (complete_cb) { complete_cb(rbd_comp, complete_arg); } diff -Nru ceph-0.80.8/src/librbd/ImageCtx.cc ceph-0.80.9/src/librbd/ImageCtx.cc --- ceph-0.80.8/src/librbd/ImageCtx.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/librbd/ImageCtx.cc 2015-03-09 19:51:17.000000000 +0000 @@ -45,13 +45,15 @@ snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + aio_lock("librbd::ImageCtx::aio_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), format_string(NULL), id(image_id), parent(NULL), stripe_unit(0), stripe_count(0), - object_cacher(NULL), writeback_handler(NULL), object_set(NULL) + object_cacher(NULL), writeback_handler(NULL), object_set(NULL), + pending_aio(0) { md_ctx.dup(p); data_ctx.dup(p); @@ -586,6 +588,7 @@ int r = flush_cache(); if (r) lderr(cct) << "flush_cache returned " << r << dendl; + wait_for_pending_aio(); cache_lock.Lock(); bool unclean = object_cacher->release_set(object_set); cache_lock.Unlock(); @@ -655,5 +658,12 @@ << ", object overlap " << len << " from image extents " << objectx << dendl; return len; - } + } + + void ImageCtx::wait_for_pending_aio() { + Mutex::Locker l(aio_lock); + while (pending_aio > 0) { + pending_aio_cond.Wait(aio_lock); + } + } } diff -Nru ceph-0.80.8/src/librbd/ImageCtx.h ceph-0.80.9/src/librbd/ImageCtx.h --- ceph-0.80.8/src/librbd/ImageCtx.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/librbd/ImageCtx.h 2015-03-09 19:51:17.000000000 +0000 @@ -10,6 +10,7 @@ #include #include +#include "common/Cond.h" #include "common/Mutex.h" #include "common/RWLock.h" #include "common/snap_types.h" @@ -59,7 +60,8 @@ /** * Lock ordering: - * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock + * md_lock, cache_lock, snap_lock, parent_lock, refresh_lock, + * aio_lock */ RWLock md_lock; // protects access to the mutable image metadata that // isn't guarded by other locks below @@ -68,6 +70,7 @@ RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex aio_lock; // protects pending_aio and pending_aio_cond unsigned extra_read_flags; @@ -89,6 +92,9 @@ LibrbdWriteback *writeback_handler; ObjectCacher::ObjectSet *object_set; + Cond pending_aio_cond; + uint64_t pending_aio; + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -147,7 +153,7 @@ librados::snap_t in_snap_id); uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); - + void wait_for_pending_aio(); }; } diff -Nru ceph-0.80.8/src/librbd/internal.cc ceph-0.80.9/src/librbd/internal.cc --- ceph-0.80.8/src/librbd/internal.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/librbd/internal.cc 2015-03-09 19:51:17.000000000 +0000 @@ -420,7 +420,10 @@ it != pools.end(); ++it) { IoCtx ioctx; r = rados.ioctx_create(it->c_str(), ioctx); - if (r < 0) { + if (r == -ENOENT) { + ldout(cct, 1) << "pool " << *it << " no longer exists" << dendl; + continue; + } else if (r < 0) { lderr(cct) << "Error accessing child image pool " << *it << dendl; return r; } @@ -638,32 +641,46 @@ parent_spec pspec(ictx->md_ctx.get_id(), ictx->id, snap_id); // search all pools for children depending on this snapshot Rados rados(ictx->md_ctx); - std::list pools; - rados.pool_list(pools); - std::set children; - for (std::list::const_iterator it = pools.begin(); it != pools.end(); ++it) { - IoCtx pool_ioctx; - r = rados.ioctx_create(it->c_str(), pool_ioctx); - if (r < 0) { - lderr(ictx->cct) << "snap_unprotect: can't create ioctx for pool " - << *it << dendl; - goto reprotect_and_return_err; - } - r = cls_client::get_children(&pool_ioctx, RBD_CHILDREN, pspec, children); - // key should not exist for this parent if there is no entry - if (((r < 0) && (r != -ENOENT))) { - lderr(ictx->cct) << "can't get children for pool " << *it << dendl; - goto reprotect_and_return_err; - } - // if we found a child, can't unprotect - if (r == 0) { - lderr(ictx->cct) << "snap_unprotect: can't unprotect; at least " - << children.size() << " child(ren) in pool " << it->c_str() << dendl; - r = -EBUSY; - goto reprotect_and_return_err; + + // protect against pools being renamed/deleted + bool retry_pool_check; + do { + retry_pool_check = false; + + std::list pools; + rados.pool_list(pools); + for (std::list::const_iterator it = pools.begin(); it != pools.end(); ++it) { + IoCtx pool_ioctx; + r = rados.ioctx_create(it->c_str(), pool_ioctx); + if (r == -ENOENT) { + ldout(ictx->cct, 1) << "pool " << *it << " no longer exists" << dendl; + retry_pool_check = true; + break; + } else if (r < 0) { + lderr(ictx->cct) << "snap_unprotect: can't create ioctx for pool " + << *it << dendl; + goto reprotect_and_return_err; + } + + std::set children; + r = cls_client::get_children(&pool_ioctx, RBD_CHILDREN, pspec, children); + // key should not exist for this parent if there is no entry + if (((r < 0) && (r != -ENOENT))) { + lderr(ictx->cct) << "can't get children for pool " << *it << dendl; + goto reprotect_and_return_err; + } + // if we found a child, can't unprotect + if (r == 0) { + lderr(ictx->cct) << "snap_unprotect: can't unprotect; at least " + << children.size() << " child(ren) in pool " + << it->c_str() << dendl; + r = -EBUSY; + goto reprotect_and_return_err; + } + pool_ioctx.close(); // last one out will self-destruct } - pool_ioctx.close(); // last one out will self-destruct - } + } while(retry_pool_check); + // didn't find any child in any pool, go ahead with unprotect r = cls_client::set_protection_status(&ictx->md_ctx, ictx->header_oid, @@ -2122,10 +2139,12 @@ void close_image(ImageCtx *ictx) { ldout(ictx->cct, 20) << "close_image " << ictx << dendl; - if (ictx->object_cacher) + if (ictx->object_cacher) { ictx->shutdown_cache(); // implicitly flushes - else + } else { flush(ictx); + ictx->wait_for_pending_aio(); + } if (ictx->parent) { close_image(ictx->parent); diff -Nru ceph-0.80.8/src/Makefile-env.am ceph-0.80.9/src/Makefile-env.am --- ceph-0.80.8/src/Makefile-env.am 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/Makefile-env.am 2015-03-09 19:51:17.000000000 +0000 @@ -150,6 +150,7 @@ LIBCLIENT_FUSE = libclient_fuse.la LIBRADOS = librados.la LIBRGW = librgw.la +LIBCIVETWEB = libcivetweb.la LIBRBD = librbd.la LIBCEPHFS = libcephfs.la LIBERASURE_CODE = liberasure_code.la diff -Nru ceph-0.80.8/src/Makefile.in ceph-0.80.9/src/Makefile.in --- ceph-0.80.8/src/Makefile.in 2015-01-14 18:17:25.000000000 +0000 +++ ceph-0.80.9/src/Makefile.in 2015-03-09 19:53:25.000000000 +0000 @@ -144,7 +144,7 @@ @LINUX_TRUE@am__append_32 = -lrt @LINUX_TRUE@am__append_33 = -export-symbols-regex '^rados_.*' @LINUX_TRUE@am__append_34 = -export-symbols-regex '^rbd_.*' -@WITH_RADOSGW_TRUE@am__append_35 = librgw.la +@WITH_RADOSGW_TRUE@am__append_35 = librgw.la libcivetweb.la @WITH_RADOSGW_TRUE@am__append_36 = \ @WITH_RADOSGW_TRUE@ $(LIBRADOS) \ @WITH_RADOSGW_TRUE@ libcls_rgw_client.la \ @@ -360,6 +360,19 @@ $(AM_CXXFLAGS) $(CXXFLAGS) $(libcephfs_jni_la_LDFLAGS) \ $(LDFLAGS) -o $@ @ENABLE_CEPHFS_JAVA_TRUE@am_libcephfs_jni_la_rpath = -rpath $(libdir) +libcivetweb_la_LIBADD = +am__libcivetweb_la_SOURCES_DIST = rgw/rgw_civetweb.cc \ + rgw/rgw_civetweb_log.cc civetweb/src/civetweb.c +@WITH_RADOSGW_TRUE@am_libcivetweb_la_OBJECTS = \ +@WITH_RADOSGW_TRUE@ rgw/libcivetweb_la-rgw_civetweb.lo \ +@WITH_RADOSGW_TRUE@ rgw/libcivetweb_la-rgw_civetweb_log.lo \ +@WITH_RADOSGW_TRUE@ civetweb/src/libcivetweb_la-civetweb.lo +libcivetweb_la_OBJECTS = $(am_libcivetweb_la_OBJECTS) +libcivetweb_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CXXLD) \ + $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +@WITH_RADOSGW_TRUE@am_libcivetweb_la_rpath = libclient_la_DEPENDENCIES = $(LIBOSDC) $(am__DEPENDENCIES_1) am_libclient_la_OBJECTS = client/Client.lo client/Inode.lo \ client/Dentry.lo client/MetaRequest.lo \ @@ -512,7 +525,7 @@ common/bloom_filter.lo common/linux_version.lo mon/MonCap.lo \ mon/MonClient.lo mon/MonMap.lo osd/OSDMap.lo osd/osd_types.lo \ osd/ECMsgTypes.lo osd/HitSet.lo mds/MDSMap.lo \ - mds/inode_backtrace.lo mds/mdstypes.lo + mds/inode_backtrace.lo mds/mdstypes.lo mds/flock.lo libcommon_la_OBJECTS = $(am_libcommon_la_OBJECTS) libcommon_crc_la_LIBADD = am__libcommon_crc_la_SOURCES_DIST = common/sctp_crc32.c \ @@ -710,11 +723,11 @@ liblog_la_OBJECTS = $(am_liblog_la_OBJECTS) libmds_la_DEPENDENCIES = $(LIBOSDC) am_libmds_la_OBJECTS = mds/Anchor.lo mds/Capability.lo mds/Dumper.lo \ - mds/Resetter.lo mds/MDS.lo mds/flock.lo mds/locks.lo \ - mds/journal.lo mds/Server.lo mds/Mutation.lo mds/MDCache.lo \ - mds/Locker.lo mds/Migrator.lo mds/MDBalancer.lo mds/CDentry.lo \ - mds/CDir.lo mds/CInode.lo mds/LogEvent.lo mds/MDSTable.lo \ - mds/InoTable.lo mds/MDSTableClient.lo mds/MDSTableServer.lo \ + mds/Resetter.lo mds/MDS.lo mds/locks.lo mds/journal.lo \ + mds/Server.lo mds/Mutation.lo mds/MDCache.lo mds/Locker.lo \ + mds/Migrator.lo mds/MDBalancer.lo mds/CDentry.lo mds/CDir.lo \ + mds/CInode.lo mds/LogEvent.lo mds/MDSTable.lo mds/InoTable.lo \ + mds/MDSTableClient.lo mds/MDSTableServer.lo \ mds/AnchorServer.lo mds/AnchorClient.lo mds/SnapRealm.lo \ mds/SnapServer.lo mds/snap.lo mds/SessionMap.lo mds/MDLog.lo \ mds/MDSUtility.lo @@ -1784,8 +1797,7 @@ rgw/rgw_rest_log.cc rgw/rgw_rest_opstate.cc \ rgw/rgw_rest_replica_log.cc rgw/rgw_rest_config.cc \ rgw/rgw_http_client.cc rgw/rgw_swift.cc rgw/rgw_swift_auth.cc \ - rgw/rgw_loadgen.cc rgw/rgw_civetweb.cc rgw/rgw_civetweb_log.cc \ - civetweb/src/civetweb.c rgw/rgw_main.cc + rgw/rgw_loadgen.cc rgw/rgw_main.cc @WITH_RADOSGW_TRUE@am_radosgw_OBJECTS = rgw/rgw_resolve.$(OBJEXT) \ @WITH_RADOSGW_TRUE@ rgw/rgw_rest.$(OBJEXT) \ @WITH_RADOSGW_TRUE@ rgw/rgw_rest_swift.$(OBJEXT) \ @@ -1803,12 +1815,9 @@ @WITH_RADOSGW_TRUE@ rgw/rgw_swift.$(OBJEXT) \ @WITH_RADOSGW_TRUE@ rgw/rgw_swift_auth.$(OBJEXT) \ @WITH_RADOSGW_TRUE@ rgw/rgw_loadgen.$(OBJEXT) \ -@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.$(OBJEXT) \ -@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb_log.$(OBJEXT) \ -@WITH_RADOSGW_TRUE@ civetweb/src/radosgw-civetweb.$(OBJEXT) \ @WITH_RADOSGW_TRUE@ rgw/rgw_main.$(OBJEXT) radosgw_OBJECTS = $(am_radosgw_OBJECTS) -@WITH_RADOSGW_TRUE@radosgw_DEPENDENCIES = $(LIBRGW) \ +@WITH_RADOSGW_TRUE@radosgw_DEPENDENCIES = $(LIBRGW) $(LIBCIVETWEB) \ @WITH_RADOSGW_TRUE@ $(am__DEPENDENCIES_12) \ @WITH_RADOSGW_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_6) am__radosgw_admin_SOURCES_DIST = rgw/rgw_admin.cc @@ -1887,7 +1896,7 @@ common/bloom_filter.cc common/linux_version.c mon/MonCap.cc \ mon/MonClient.cc mon/MonMap.cc osd/OSDMap.cc osd/osd_types.cc \ osd/ECMsgTypes.cc osd/HitSet.cc mds/MDSMap.cc \ - mds/inode_backtrace.cc mds/mdstypes.cc + mds/inode_backtrace.cc mds/mdstypes.cc mds/flock.cc am__objects_15 = test_build_libcommon-ceph_ver.$(OBJEXT) \ common/test_build_libcommon-DecayCounter.$(OBJEXT) \ common/test_build_libcommon-LogClient.$(OBJEXT) \ @@ -1969,7 +1978,8 @@ osd/test_build_libcommon-HitSet.$(OBJEXT) \ mds/test_build_libcommon-MDSMap.$(OBJEXT) \ mds/test_build_libcommon-inode_backtrace.$(OBJEXT) \ - mds/test_build_libcommon-mdstypes.$(OBJEXT) + mds/test_build_libcommon-mdstypes.$(OBJEXT) \ + mds/test_build_libcommon-flock.$(OBJEXT) @WITH_BUILD_TESTS_TRUE@am_test_build_libcommon_OBJECTS = test/test_build_libcommon-buildtest_skeleton.$(OBJEXT) \ @WITH_BUILD_TESTS_TRUE@ $(am__objects_15) test_build_libcommon_OBJECTS = $(am_test_build_libcommon_OBJECTS) @@ -2688,11 +2698,12 @@ $(libcls_version_client_a_SOURCES) $(libos_zfs_a_SOURCES) \ $(libarch_la_SOURCES) $(libauth_la_SOURCES) \ $(libcephfs_la_SOURCES) $(libcephfs_jni_la_SOURCES) \ - $(libclient_la_SOURCES) $(libclient_fuse_la_SOURCES) \ - $(libcls_hello_la_SOURCES) $(libcls_kvs_la_SOURCES) \ - $(libcls_lock_la_SOURCES) $(libcls_lock_client_la_SOURCES) \ - $(libcls_log_la_SOURCES) $(libcls_rbd_la_SOURCES) \ - $(libcls_rbd_client_la_SOURCES) $(libcls_refcount_la_SOURCES) \ + $(libcivetweb_la_SOURCES) $(libclient_la_SOURCES) \ + $(libclient_fuse_la_SOURCES) $(libcls_hello_la_SOURCES) \ + $(libcls_kvs_la_SOURCES) $(libcls_lock_la_SOURCES) \ + $(libcls_lock_client_la_SOURCES) $(libcls_log_la_SOURCES) \ + $(libcls_rbd_la_SOURCES) $(libcls_rbd_client_la_SOURCES) \ + $(libcls_refcount_la_SOURCES) \ $(libcls_refcount_client_la_SOURCES) \ $(libcls_replica_log_la_SOURCES) $(libcls_rgw_la_SOURCES) \ $(libcls_rgw_client_la_SOURCES) $(libcls_statelog_la_SOURCES) \ @@ -2842,7 +2853,8 @@ $(libcls_version_client_a_SOURCES) \ $(am__libos_zfs_a_SOURCES_DIST) $(libarch_la_SOURCES) \ $(libauth_la_SOURCES) $(libcephfs_la_SOURCES) \ - $(am__libcephfs_jni_la_SOURCES_DIST) $(libclient_la_SOURCES) \ + $(am__libcephfs_jni_la_SOURCES_DIST) \ + $(am__libcivetweb_la_SOURCES_DIST) $(libclient_la_SOURCES) \ $(am__libclient_fuse_la_SOURCES_DIST) \ $(libcls_hello_la_SOURCES) $(am__libcls_kvs_la_SOURCES_DIST) \ $(libcls_lock_la_SOURCES) $(libcls_lock_client_la_SOURCES) \ @@ -3243,10 +3255,10 @@ rgw/rgw_usage.h rgw/rgw_user.h rgw/rgw_bucket.h \ rgw/rgw_keystone.h rgw/rgw_civetweb.h rgw/rgw_civetweb_log.h \ civetweb/civetweb.h civetweb/include/civetweb.h \ - civetweb/src/md5.h cls/lock/cls_lock_types.h \ - cls/lock/cls_lock_ops.h cls/lock/cls_lock_client.h \ - cls/rbd/cls_rbd.h cls/rbd/cls_rbd_client.h \ - cls/refcount/cls_refcount_ops.h \ + civetweb/include/civetweb_conf.h civetweb/src/md5.h \ + cls/lock/cls_lock_types.h cls/lock/cls_lock_ops.h \ + cls/lock/cls_lock_client.h cls/rbd/cls_rbd.h \ + cls/rbd/cls_rbd_client.h cls/refcount/cls_refcount_ops.h \ cls/refcount/cls_refcount_client.h \ cls/version/cls_version_types.h cls/version/cls_version_ops.h \ cls/version/cls_version_client.h cls/log/cls_log_types.h \ @@ -3788,10 +3800,10 @@ rgw/rgw_usage.h rgw/rgw_user.h rgw/rgw_bucket.h \ rgw/rgw_keystone.h rgw/rgw_civetweb.h rgw/rgw_civetweb_log.h \ civetweb/civetweb.h civetweb/include/civetweb.h \ - civetweb/src/md5.h cls/lock/cls_lock_types.h \ - cls/lock/cls_lock_ops.h cls/lock/cls_lock_client.h \ - cls/rbd/cls_rbd.h cls/rbd/cls_rbd_client.h \ - cls/refcount/cls_refcount_ops.h \ + civetweb/include/civetweb_conf.h civetweb/src/md5.h \ + cls/lock/cls_lock_types.h cls/lock/cls_lock_ops.h \ + cls/lock/cls_lock_client.h cls/rbd/cls_rbd.h \ + cls/rbd/cls_rbd_client.h cls/refcount/cls_refcount_ops.h \ cls/refcount/cls_refcount_client.h \ cls/version/cls_version_types.h cls/version/cls_version_ops.h \ cls/version/cls_version_client.h cls/log/cls_log_types.h \ @@ -3975,6 +3987,7 @@ LIBCLIENT_FUSE = libclient_fuse.la LIBRADOS = librados.la LIBRGW = librgw.la +LIBCIVETWEB = libcivetweb.la LIBRBD = librbd.la LIBCEPHFS = libcephfs.la LIBERASURE_CODE = liberasure_code.la @@ -4055,7 +4068,6 @@ mds/Dumper.cc \ mds/Resetter.cc \ mds/MDS.cc \ - mds/flock.cc \ mds/locks.c \ mds/journal.cc \ mds/Server.cc \ @@ -4289,7 +4301,7 @@ common/bloom_filter.cc common/linux_version.c mon/MonCap.cc \ mon/MonClient.cc mon/MonMap.cc osd/OSDMap.cc osd/osd_types.cc \ osd/ECMsgTypes.cc osd/HitSet.cc mds/MDSMap.cc \ - mds/inode_backtrace.cc mds/mdstypes.cc + mds/inode_backtrace.cc mds/mdstypes.cc mds/flock.cc # inject crc in common libcommon_crc_la_SOURCES = common/sctp_crc32.c common/crc32c.cc \ @@ -4389,6 +4401,14 @@ @WITH_RADOSGW_TRUE@ rgw/rgw_dencoder.cc @WITH_RADOSGW_TRUE@librgw_la_CXXFLAGS = -Woverloaded-virtual ${AM_CXXFLAGS} +@WITH_RADOSGW_TRUE@CIVETWEB_INCLUDE = --include civetweb/include/civetweb_conf.h +@WITH_RADOSGW_TRUE@libcivetweb_la_SOURCES = \ +@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.cc \ +@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb_log.cc \ +@WITH_RADOSGW_TRUE@ civetweb/src/civetweb.c + +@WITH_RADOSGW_TRUE@libcivetweb_la_CXXFLAGS = ${CIVETWEB_INCLUDE} -Woverloaded-virtual ${AM_CXXFLAGS} +@WITH_RADOSGW_TRUE@libcivetweb_la_CFLAGS = -Icivetweb/include ${CIVETWEB_INCLUDE} @WITH_RADOSGW_TRUE@radosgw_SOURCES = \ @WITH_RADOSGW_TRUE@ rgw/rgw_resolve.cc \ @WITH_RADOSGW_TRUE@ rgw/rgw_rest.cc \ @@ -4407,13 +4427,10 @@ @WITH_RADOSGW_TRUE@ rgw/rgw_swift.cc \ @WITH_RADOSGW_TRUE@ rgw/rgw_swift_auth.cc \ @WITH_RADOSGW_TRUE@ rgw/rgw_loadgen.cc \ -@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb.cc \ -@WITH_RADOSGW_TRUE@ rgw/rgw_civetweb_log.cc \ -@WITH_RADOSGW_TRUE@ civetweb/src/civetweb.c \ @WITH_RADOSGW_TRUE@ rgw/rgw_main.cc -@WITH_RADOSGW_TRUE@radosgw_CFLAGS = -Icivetweb/include -@WITH_RADOSGW_TRUE@radosgw_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL) +@WITH_RADOSGW_TRUE@radosgw_CFLAGS = -I$(srcdir)/civetweb/include +@WITH_RADOSGW_TRUE@radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL) @WITH_RADOSGW_TRUE@radosgw_admin_SOURCES = rgw/rgw_admin.cc @WITH_RADOSGW_TRUE@radosgw_admin_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(CEPH_GLOBAL) @WITH_RADOSGW_TRUE@ceph_rgw_multiparser_SOURCES = rgw/rgw_multiparser.cc @@ -5633,6 +5650,26 @@ java/native/$(DEPDIR)/$(am__dirstamp) libcephfs_jni.la: $(libcephfs_jni_la_OBJECTS) $(libcephfs_jni_la_DEPENDENCIES) $(EXTRA_libcephfs_jni_la_DEPENDENCIES) $(AM_V_CXXLD)$(libcephfs_jni_la_LINK) $(am_libcephfs_jni_la_rpath) $(libcephfs_jni_la_OBJECTS) $(libcephfs_jni_la_LIBADD) $(LIBS) +rgw/$(am__dirstamp): + @$(MKDIR_P) rgw + @: > rgw/$(am__dirstamp) +rgw/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) rgw/$(DEPDIR) + @: > rgw/$(DEPDIR)/$(am__dirstamp) +rgw/libcivetweb_la-rgw_civetweb.lo: rgw/$(am__dirstamp) \ + rgw/$(DEPDIR)/$(am__dirstamp) +rgw/libcivetweb_la-rgw_civetweb_log.lo: rgw/$(am__dirstamp) \ + rgw/$(DEPDIR)/$(am__dirstamp) +civetweb/src/$(am__dirstamp): + @$(MKDIR_P) civetweb/src + @: > civetweb/src/$(am__dirstamp) +civetweb/src/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) civetweb/src/$(DEPDIR) + @: > civetweb/src/$(DEPDIR)/$(am__dirstamp) +civetweb/src/libcivetweb_la-civetweb.lo: civetweb/src/$(am__dirstamp) \ + civetweb/src/$(DEPDIR)/$(am__dirstamp) +libcivetweb.la: $(libcivetweb_la_OBJECTS) $(libcivetweb_la_DEPENDENCIES) $(EXTRA_libcivetweb_la_DEPENDENCIES) + $(AM_V_CXXLD)$(libcivetweb_la_LINK) $(am_libcivetweb_la_rpath) $(libcivetweb_la_OBJECTS) $(libcivetweb_la_LIBADD) $(LIBS) client/$(am__dirstamp): @$(MKDIR_P) client @: > client/$(am__dirstamp) @@ -5940,6 +5977,7 @@ mds/inode_backtrace.lo: mds/$(am__dirstamp) \ mds/$(DEPDIR)/$(am__dirstamp) mds/mdstypes.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) +mds/flock.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) libcommon.la: $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(EXTRA_libcommon_la_DEPENDENCIES) $(AM_V_CXXLD)$(CXXLINK) $(libcommon_la_OBJECTS) $(libcommon_la_LIBADD) $(LIBS) common/libcommon_crc_la-sctp_crc32.lo: common/$(am__dirstamp) \ @@ -6265,7 +6303,6 @@ mds/Dumper.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/Resetter.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/MDS.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) -mds/flock.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/locks.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/journal.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/Server.lo: mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) @@ -6510,12 +6547,6 @@ librbd/$(DEPDIR)/$(am__dirstamp) librbd.la: $(librbd_la_OBJECTS) $(librbd_la_DEPENDENCIES) $(EXTRA_librbd_la_DEPENDENCIES) $(AM_V_CXXLD)$(librbd_la_LINK) -rpath $(libdir) $(librbd_la_OBJECTS) $(librbd_la_LIBADD) $(LIBS) -rgw/$(am__dirstamp): - @$(MKDIR_P) rgw - @: > rgw/$(am__dirstamp) -rgw/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) rgw/$(DEPDIR) - @: > rgw/$(DEPDIR)/$(am__dirstamp) rgw/librgw_la-librgw.lo: rgw/$(am__dirstamp) \ rgw/$(DEPDIR)/$(am__dirstamp) rgw/librgw_la-rgw_acl.lo: rgw/$(am__dirstamp) \ @@ -7572,18 +7603,6 @@ rgw/$(DEPDIR)/$(am__dirstamp) rgw/rgw_loadgen.$(OBJEXT): rgw/$(am__dirstamp) \ rgw/$(DEPDIR)/$(am__dirstamp) -rgw/rgw_civetweb.$(OBJEXT): rgw/$(am__dirstamp) \ - rgw/$(DEPDIR)/$(am__dirstamp) -rgw/rgw_civetweb_log.$(OBJEXT): rgw/$(am__dirstamp) \ - rgw/$(DEPDIR)/$(am__dirstamp) -civetweb/src/$(am__dirstamp): - @$(MKDIR_P) civetweb/src - @: > civetweb/src/$(am__dirstamp) -civetweb/src/$(DEPDIR)/$(am__dirstamp): - @$(MKDIR_P) civetweb/src/$(DEPDIR) - @: > civetweb/src/$(DEPDIR)/$(am__dirstamp) -civetweb/src/radosgw-civetweb.$(OBJEXT): civetweb/src/$(am__dirstamp) \ - civetweb/src/$(DEPDIR)/$(am__dirstamp) rgw/rgw_main.$(OBJEXT): rgw/$(am__dirstamp) \ rgw/$(DEPDIR)/$(am__dirstamp) radosgw$(EXEEXT): $(radosgw_OBJECTS) $(radosgw_DEPENDENCIES) $(EXTRA_radosgw_DEPENDENCIES) @@ -7794,6 +7813,8 @@ mds/$(am__dirstamp) mds/$(DEPDIR)/$(am__dirstamp) mds/test_build_libcommon-mdstypes.$(OBJEXT): mds/$(am__dirstamp) \ mds/$(DEPDIR)/$(am__dirstamp) +mds/test_build_libcommon-flock.$(OBJEXT): mds/$(am__dirstamp) \ + mds/$(DEPDIR)/$(am__dirstamp) test_build_libcommon$(EXEEXT): $(test_build_libcommon_OBJECTS) $(test_build_libcommon_DEPENDENCIES) $(EXTRA_test_build_libcommon_DEPENDENCIES) @rm -f test_build_libcommon$(EXEEXT) $(AM_V_CXXLD)$(test_build_libcommon_LINK) $(test_build_libcommon_OBJECTS) $(test_build_libcommon_LDADD) $(LIBS) @@ -8519,7 +8540,8 @@ -rm -f auth/none/AuthNoneAuthorizeHandler.lo -rm -f auth/unknown/AuthUnknownAuthorizeHandler.$(OBJEXT) -rm -f auth/unknown/AuthUnknownAuthorizeHandler.lo - -rm -f civetweb/src/radosgw-civetweb.$(OBJEXT) + -rm -f civetweb/src/libcivetweb_la-civetweb.$(OBJEXT) + -rm -f civetweb/src/libcivetweb_la-civetweb.lo -rm -f client/Client.$(OBJEXT) -rm -f client/Client.lo -rm -f client/ClientSnapRealm.$(OBJEXT) @@ -9079,6 +9101,7 @@ -rm -f mds/snap.$(OBJEXT) -rm -f mds/snap.lo -rm -f mds/test_build_libcommon-MDSMap.$(OBJEXT) + -rm -f mds/test_build_libcommon-flock.$(OBJEXT) -rm -f mds/test_build_libcommon-inode_backtrace.$(OBJEXT) -rm -f mds/test_build_libcommon-mdstypes.$(OBJEXT) -rm -f mon/AuthMonitor.$(OBJEXT) @@ -9256,6 +9279,10 @@ -rm -f rgw/ceph_dencoder-rgw_dencoder.$(OBJEXT) -rm -f rgw/ceph_dencoder-rgw_env.$(OBJEXT) -rm -f rgw/ceph_dencoder-rgw_json_enc.$(OBJEXT) + -rm -f rgw/libcivetweb_la-rgw_civetweb.$(OBJEXT) + -rm -f rgw/libcivetweb_la-rgw_civetweb.lo + -rm -f rgw/libcivetweb_la-rgw_civetweb_log.$(OBJEXT) + -rm -f rgw/libcivetweb_la-rgw_civetweb_log.lo -rm -f rgw/librgw_la-librgw.$(OBJEXT) -rm -f rgw/librgw_la-librgw.lo -rm -f rgw/librgw_la-rgw_acl.$(OBJEXT) @@ -9325,8 +9352,6 @@ -rm -f rgw/librgw_la-rgw_xml.$(OBJEXT) -rm -f rgw/librgw_la-rgw_xml.lo -rm -f rgw/rgw_admin.$(OBJEXT) - -rm -f rgw/rgw_civetweb.$(OBJEXT) - -rm -f rgw/rgw_civetweb_log.$(OBJEXT) -rm -f rgw/rgw_common.$(OBJEXT) -rm -f rgw/rgw_env.$(OBJEXT) -rm -f rgw/rgw_http_client.$(OBJEXT) @@ -9634,7 +9659,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@auth/cephx/$(DEPDIR)/CephxSessionHandler.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@auth/none/$(DEPDIR)/AuthNoneAuthorizeHandler.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@auth/unknown/$(DEPDIR)/AuthUnknownAuthorizeHandler.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@civetweb/src/$(DEPDIR)/radosgw-civetweb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@client/$(DEPDIR)/Client.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@client/$(DEPDIR)/ClientSnapRealm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@client/$(DEPDIR)/Dentry.Plo@am__quote@ @@ -9970,6 +9995,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/mdstypes.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/snap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/test_build_libcommon-MDSMap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/test_build_libcommon-flock.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/test_build_libcommon-inode_backtrace.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@mds/$(DEPDIR)/test_build_libcommon-mdstypes.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@mon/$(DEPDIR)/AuthMonitor.Plo@am__quote@ @@ -10070,6 +10096,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/ceph_dencoder-rgw_dencoder.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/ceph_dencoder-rgw_env.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/ceph_dencoder-rgw_json_enc.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/librgw_la-librgw.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/librgw_la-rgw_acl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/librgw_la-rgw_acl_s3.Plo@am__quote@ @@ -10105,8 +10133,6 @@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/librgw_la-rgw_user.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/librgw_la-rgw_xml.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_admin.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_civetweb.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_civetweb_log.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_common.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_env.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@rgw/$(DEPDIR)/rgw_http_client.Po@am__quote@ @@ -10430,6 +10456,13 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< +civetweb/src/libcivetweb_la-civetweb.lo: civetweb/src/civetweb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CFLAGS) $(CFLAGS) -MT civetweb/src/libcivetweb_la-civetweb.lo -MD -MP -MF civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Tpo -c -o civetweb/src/libcivetweb_la-civetweb.lo `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Tpo civetweb/src/$(DEPDIR)/libcivetweb_la-civetweb.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/libcivetweb_la-civetweb.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CFLAGS) $(CFLAGS) -c -o civetweb/src/libcivetweb_la-civetweb.lo `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c + common/libcommon_crc_la-sctp_crc32.lo: common/sctp_crc32.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(libcommon_crc_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT common/libcommon_crc_la-sctp_crc32.lo -MD -MP -MF common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Tpo -c -o common/libcommon_crc_la-sctp_crc32.lo `test -f 'common/sctp_crc32.c' || echo '$(srcdir)/'`common/sctp_crc32.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Tpo common/$(DEPDIR)/libcommon_crc_la-sctp_crc32.Plo @@ -10801,20 +10834,6 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(ceph_test_librbd_fsx_CFLAGS) $(CFLAGS) -c -o test/librbd/ceph_test_librbd_fsx-fsx.obj `if test -f 'test/librbd/fsx.c'; then $(CYGPATH_W) 'test/librbd/fsx.c'; else $(CYGPATH_W) '$(srcdir)/test/librbd/fsx.c'; fi` -civetweb/src/radosgw-civetweb.o: civetweb/src/civetweb.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -MT civetweb/src/radosgw-civetweb.o -MD -MP -MF civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo -c -o civetweb/src/radosgw-civetweb.o `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo civetweb/src/$(DEPDIR)/radosgw-civetweb.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/radosgw-civetweb.o' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -c -o civetweb/src/radosgw-civetweb.o `test -f 'civetweb/src/civetweb.c' || echo '$(srcdir)/'`civetweb/src/civetweb.c - -civetweb/src/radosgw-civetweb.obj: civetweb/src/civetweb.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -MT civetweb/src/radosgw-civetweb.obj -MD -MP -MF civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo -c -o civetweb/src/radosgw-civetweb.obj `if test -f 'civetweb/src/civetweb.c'; then $(CYGPATH_W) 'civetweb/src/civetweb.c'; else $(CYGPATH_W) '$(srcdir)/civetweb/src/civetweb.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) civetweb/src/$(DEPDIR)/radosgw-civetweb.Tpo civetweb/src/$(DEPDIR)/radosgw-civetweb.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='civetweb/src/civetweb.c' object='civetweb/src/radosgw-civetweb.obj' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(radosgw_CFLAGS) $(CFLAGS) -c -o civetweb/src/radosgw-civetweb.obj `if test -f 'civetweb/src/civetweb.c'; then $(CYGPATH_W) 'civetweb/src/civetweb.c'; else $(CYGPATH_W) '$(srcdir)/civetweb/src/civetweb.c'; fi` - test_build_libcommon-ceph_ver.o: ceph_ver.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CFLAGS) $(CFLAGS) -MT test_build_libcommon-ceph_ver.o -MD -MP -MF $(DEPDIR)/test_build_libcommon-ceph_ver.Tpo -c -o test_build_libcommon-ceph_ver.o `test -f 'ceph_ver.c' || echo '$(srcdir)/'`ceph_ver.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/test_build_libcommon-ceph_ver.Tpo $(DEPDIR)/test_build_libcommon-ceph_ver.Po @@ -11231,6 +11250,20 @@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libcephfs_jni_la_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o java/native/libcephfs_jni_la-JniConstants.lo `test -f 'java/native/JniConstants.cpp' || echo '$(srcdir)/'`java/native/JniConstants.cpp +rgw/libcivetweb_la-rgw_civetweb.lo: rgw/rgw_civetweb.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/libcivetweb_la-rgw_civetweb.lo -MD -MP -MF rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Tpo -c -o rgw/libcivetweb_la-rgw_civetweb.lo `test -f 'rgw/rgw_civetweb.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Tpo rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='rgw/rgw_civetweb.cc' object='rgw/libcivetweb_la-rgw_civetweb.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/libcivetweb_la-rgw_civetweb.lo `test -f 'rgw/rgw_civetweb.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb.cc + +rgw/libcivetweb_la-rgw_civetweb_log.lo: rgw/rgw_civetweb_log.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -MT rgw/libcivetweb_la-rgw_civetweb_log.lo -MD -MP -MF rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Tpo -c -o rgw/libcivetweb_la-rgw_civetweb_log.lo `test -f 'rgw/rgw_civetweb_log.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb_log.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Tpo rgw/$(DEPDIR)/libcivetweb_la-rgw_civetweb_log.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='rgw/rgw_civetweb_log.cc' object='rgw/libcivetweb_la-rgw_civetweb_log.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libcivetweb_la_CXXFLAGS) $(CXXFLAGS) -c -o rgw/libcivetweb_la-rgw_civetweb_log.lo `test -f 'rgw/rgw_civetweb_log.cc' || echo '$(srcdir)/'`rgw/rgw_civetweb_log.cc + common/libcommon_crc_la-crc32c.lo: common/crc32c.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LIBTOOL) $(AM_V_lt) --tag=CXX $(libcommon_crc_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT common/libcommon_crc_la-crc32c.lo -MD -MP -MF common/$(DEPDIR)/libcommon_crc_la-crc32c.Tpo -c -o common/libcommon_crc_la-crc32c.lo `test -f 'common/crc32c.cc' || echo '$(srcdir)/'`common/crc32c.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) common/$(DEPDIR)/libcommon_crc_la-crc32c.Tpo common/$(DEPDIR)/libcommon_crc_la-crc32c.Plo @@ -13758,6 +13791,20 @@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-mdstypes.obj `if test -f 'mds/mdstypes.cc'; then $(CYGPATH_W) 'mds/mdstypes.cc'; else $(CYGPATH_W) '$(srcdir)/mds/mdstypes.cc'; fi` +mds/test_build_libcommon-flock.o: mds/flock.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -MT mds/test_build_libcommon-flock.o -MD -MP -MF mds/$(DEPDIR)/test_build_libcommon-flock.Tpo -c -o mds/test_build_libcommon-flock.o `test -f 'mds/flock.cc' || echo '$(srcdir)/'`mds/flock.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) mds/$(DEPDIR)/test_build_libcommon-flock.Tpo mds/$(DEPDIR)/test_build_libcommon-flock.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='mds/flock.cc' object='mds/test_build_libcommon-flock.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-flock.o `test -f 'mds/flock.cc' || echo '$(srcdir)/'`mds/flock.cc + +mds/test_build_libcommon-flock.obj: mds/flock.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -MT mds/test_build_libcommon-flock.obj -MD -MP -MF mds/$(DEPDIR)/test_build_libcommon-flock.Tpo -c -o mds/test_build_libcommon-flock.obj `if test -f 'mds/flock.cc'; then $(CYGPATH_W) 'mds/flock.cc'; else $(CYGPATH_W) '$(srcdir)/mds/flock.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) mds/$(DEPDIR)/test_build_libcommon-flock.Tpo mds/$(DEPDIR)/test_build_libcommon-flock.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='mds/flock.cc' object='mds/test_build_libcommon-flock.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_libcommon_CXXFLAGS) $(CXXFLAGS) -c -o mds/test_build_libcommon-flock.obj `if test -f 'mds/flock.cc'; then $(CYGPATH_W) 'mds/flock.cc'; else $(CYGPATH_W) '$(srcdir)/mds/flock.cc'; fi` + test/test_build_librados-buildtest_skeleton.o: test/buildtest_skeleton.cc @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(test_build_librados_CXXFLAGS) $(CXXFLAGS) -MT test/test_build_librados-buildtest_skeleton.o -MD -MP -MF test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Tpo -c -o test/test_build_librados-buildtest_skeleton.o `test -f 'test/buildtest_skeleton.cc' || echo '$(srcdir)/'`test/buildtest_skeleton.cc @am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Tpo test/$(DEPDIR)/test_build_librados-buildtest_skeleton.Po @@ -15262,6 +15309,7 @@ -rm -rf auth/cephx/.libs auth/cephx/_libs -rm -rf auth/none/.libs auth/none/_libs -rm -rf auth/unknown/.libs auth/unknown/_libs + -rm -rf civetweb/src/.libs civetweb/src/_libs -rm -rf client/.libs client/_libs -rm -rf cls/hello/.libs cls/hello/_libs -rm -rf cls/lock/.libs cls/lock/_libs diff -Nru ceph-0.80.8/src/mds/CInode.h ceph-0.80.9/src/mds/CInode.h --- ceph-0.80.8/src/mds/CInode.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/CInode.h 2015-03-09 19:51:17.000000000 +0000 @@ -442,6 +442,7 @@ parent(0), inode_auth(CDIR_AUTH_DEFAULT), replica_caps_wanted(0), + fcntl_locks(g_ceph_context), flock_locks(g_ceph_context), item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), item_dirty_dirfrag_dir(this), item_dirty_dirfrag_nest(this), diff -Nru ceph-0.80.8/src/mds/flock.cc ceph-0.80.9/src/mds/flock.cc --- ceph-0.80.8/src/mds/flock.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/flock.cc 2015-03-09 19:51:18.000000000 +0000 @@ -44,33 +44,33 @@ bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, bool wait_on_fail, bool replay) { - dout(15) << "add_lock " << new_lock << dendl; + ldout(cct,15) << "add_lock " << new_lock << dendl; bool ret = false; list::iterator> overlapping_locks, self_overlapping_locks, neighbor_locks; // first, get any overlapping locks and split them into owned-by-us and not if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) { - dout(15) << "got overlapping lock, splitting by owner" << dendl; + ldout(cct,15) << "got overlapping lock, splitting by owner" << dendl; split_by_owner(new_lock, overlapping_locks, self_overlapping_locks); } if (!overlapping_locks.empty()) { //overlapping locks owned by others :( if (CEPH_LOCK_EXCL == new_lock.type) { //can't set, we want an exclusive - dout(15) << "overlapping lock, and this lock is exclusive, can't set" + ldout(cct,15) << "overlapping lock, and this lock is exclusive, can't set" << dendl; if (wait_on_fail && !replay) { waiting_locks.insert(pair(new_lock.start, new_lock)); } } else { //shared lock, check for any exclusive locks blocking us if (contains_exclusive_lock(overlapping_locks)) { //blocked :( - dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl; + ldout(cct,15) << " blocked by exclusive lock in overlapping_locks" << dendl; if (wait_on_fail && !replay) { waiting_locks.insert(pair(new_lock.start, new_lock)); } } else { //yay, we can insert a shared lock - dout(15) << "inserting shared lock" << dendl; + ldout(cct,15) << "inserting shared lock" << dendl; remove_waiting(new_lock); adjust_locks(self_overlapping_locks, new_lock, neighbor_locks); held_locks.insert(pair(new_lock.start, new_lock)); @@ -80,7 +80,7 @@ } else { //no overlapping locks except our own remove_waiting(new_lock); adjust_locks(self_overlapping_locks, new_lock, neighbor_locks); - dout(15) << "no conflicts, inserting " << new_lock << dendl; + ldout(cct,15) << "no conflicts, inserting " << new_lock << dendl; held_locks.insert(pair (new_lock.start, new_lock)); ret = true; @@ -123,9 +123,9 @@ list::iterator> overlapping_locks, self_overlapping_locks; if (get_overlapping_locks(removal_lock, overlapping_locks)) { - dout(15) << "splitting by owner" << dendl; + ldout(cct,15) << "splitting by owner" << dendl; split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks); - } else dout(15) << "attempt to remove lock at " << removal_lock.start + } else ldout(cct,15) << "attempt to remove lock at " << removal_lock.start << " but no locks there!" << dendl; bool remove_to_end = (0 == removal_lock.length); uint64_t removal_start = removal_lock.start; @@ -134,13 +134,13 @@ __s64 old_lock_client = 0; ceph_filelock *old_lock; - dout(15) << "examining " << self_overlapping_locks.size() + ldout(cct,15) << "examining " << self_overlapping_locks.size() << " self-overlapping locks for removal" << dendl; for (list::iterator>::iterator iter = self_overlapping_locks.begin(); iter != self_overlapping_locks.end(); ++iter) { - dout(15) << "self overlapping lock " << (*iter)->second << dendl; + ldout(cct,15) << "self overlapping lock " << (*iter)->second << dendl; old_lock = &(*iter)->second; bool old_lock_to_end = (0 == old_lock->length); old_lock_end = old_lock->start + old_lock->length - 1; @@ -149,7 +149,7 @@ if (old_lock->start < removal_start) { old_lock->length = removal_start - old_lock->start; } else { - dout(15) << "erasing " << (*iter)->second << dendl; + ldout(cct,15) << "erasing " << (*iter)->second << dendl; held_locks.erase(*iter); --client_held_lock_counts[old_lock_client]; } @@ -160,7 +160,7 @@ (append_lock.start, append_lock)); ++client_held_lock_counts[(client_t)old_lock->client]; if (old_lock->start >= removal_start) { - dout(15) << "erasing " << (*iter)->second << dendl; + ldout(cct,15) << "erasing " << (*iter)->second << dendl; held_locks.erase(*iter); --client_held_lock_counts[old_lock_client]; } else old_lock->length = removal_start - old_lock->start; @@ -176,7 +176,7 @@ if (old_lock->start < removal_start) { old_lock->length = removal_start - old_lock->start; } else { - dout(15) << "erasing " << (*iter)->second << dendl; + ldout(cct,15) << "erasing " << (*iter)->second << dendl; held_locks.erase(*iter); --client_held_lock_counts[old_lock_client]; } @@ -207,7 +207,7 @@ list::iterator> neighbor_locks) { - dout(15) << "adjust_locks" << dendl; + ldout(cct,15) << "adjust_locks" << dendl; bool new_lock_to_end = (0 == new_lock.length); uint64_t new_lock_start = new_lock.start; uint64_t new_lock_end = new_lock.start + new_lock.length - 1; @@ -219,7 +219,7 @@ iter != old_locks.end(); ++iter) { old_lock = &(*iter)->second; - dout(15) << "adjusting lock: " << *old_lock << dendl; + ldout(cct,15) << "adjusting lock: " << *old_lock << dendl; bool old_lock_to_end = (0 == old_lock->length); old_lock_start = old_lock->start; old_lock_end = old_lock->start + old_lock->length - 1; @@ -228,17 +228,17 @@ old_lock_client = old_lock->client; if (new_lock_to_end || old_lock_to_end) { //special code path to deal with a length set at 0 - dout(15) << "one lock extends forever" << dendl; + ldout(cct,15) << "one lock extends forever" << dendl; if (old_lock->type == new_lock.type) { //just unify them in new lock, remove old lock - dout(15) << "same lock type, unifying" << dendl; + ldout(cct,15) << "same lock type, unifying" << dendl; new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start : old_lock_start; new_lock.length = 0; held_locks.erase(*iter); --client_held_lock_counts[old_lock_client]; } else { //not same type, have to keep any remains of old lock around - dout(15) << "shrinking old lock" << dendl; + ldout(cct,15) << "shrinking old lock" << dendl; if (new_lock_to_end) { if (old_lock_start < new_lock_start) { old_lock->length = new_lock_start - old_lock_start; @@ -262,17 +262,17 @@ } } else { if (old_lock->type == new_lock.type) { //just merge them! - dout(15) << "merging locks, they're the same type" << dendl; + ldout(cct,15) << "merging locks, they're the same type" << dendl; new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start : new_lock_start; int new_end = (new_lock_end > old_lock_end) ? new_lock_end : old_lock_end; new_lock.length = new_end - new_lock.start + 1; - dout(15) << "erasing lock " << (*iter)->second << dendl; + ldout(cct,15) << "erasing lock " << (*iter)->second << dendl; held_locks.erase(*iter); --client_held_lock_counts[old_lock_client]; } else { //we'll have to update sizes and maybe make new locks - dout(15) << "locks aren't same type, changing sizes" << dendl; + ldout(cct,15) << "locks aren't same type, changing sizes" << dendl; if (old_lock_end > new_lock_end) { //add extra lock after new_lock ceph_filelock appended_lock = *old_lock; appended_lock.start = new_lock_end + 1; @@ -302,7 +302,7 @@ ++iter) { old_lock = &(*iter)->second; old_lock_client = old_lock->client; - dout(15) << "lock to coalesce: " << *old_lock << dendl; + ldout(cct,15) << "lock to coalesce: " << *old_lock << dendl; /* because if it's a neighboring lock there can't be any self-overlapping locks that covered it */ if (old_lock->type == new_lock.type) { //merge them @@ -354,8 +354,8 @@ && (start != 0) && (lower_bound != lock_map.begin())) --lower_bound; if (lock_map.end() == lower_bound) - dout(15) << "get_lower_dout(15)eturning end()" << dendl; - else dout(15) << "get_lower_bound returning iterator pointing to " + ldout(cct,15) << "get_lower_dout(15)eturning end()" << dendl; + else ldout(cct,15) << "get_lower_bound returning iterator pointing to " << lower_bound->second << dendl; return lower_bound; } @@ -368,8 +368,8 @@ lock_map.upper_bound(end); if (last != lock_map.begin()) --last; if (lock_map.end() == last) - dout(15) << "get_last_before returning end()" << dendl; - else dout(15) << "get_last_before returning iterator pointing to " + ldout(cct,15) << "get_last_before returning end()" << dendl; + else ldout(cct,15) << "get_last_before returning iterator pointing to " << last->second << dendl; return last; } @@ -382,7 +382,7 @@ ((iter->first < start) && (((iter->first + iter->second.length - 1) >= start) || (0 == iter->second.length)))); - dout(15) << "share_space got start: " << start << ", end: " << end + ldout(cct,15) << "share_space got start: " << start << ", end: " << end << ", lock: " << iter->second << ", returning " << ret << dendl; return ret; } @@ -393,7 +393,7 @@ list::iterator> *self_neighbors) { - dout(15) << "get_overlapping_locks" << dendl; + ldout(cct,15) << "get_overlapping_locks" << dendl; // create a lock starting one earlier and ending one later // to check for neighbors ceph_filelock neighbor_check_lock = lock; @@ -419,8 +419,7 @@ if (share_space(iter, lock)) { overlaps.push_front(iter); } else if (self_neighbors && - (neighbor_check_lock.client == iter->second.client) && - (neighbor_check_lock.pid == iter->second.pid) && + ceph_filelock_owner_equal(neighbor_check_lock, iter->second) && share_space(iter, neighbor_check_lock)) { self_neighbors->push_front(iter); } @@ -438,7 +437,7 @@ ceph_filelock>::iterator>& overlaps) { - dout(15) << "get_waiting_overlaps" << dendl; + ldout(cct,15) << "get_waiting_overlaps" << dendl; multimap::iterator iter = get_last_before(lock.start + lock.length - 1, waiting_locks); bool cont = iter != waiting_locks.end(); @@ -459,15 +458,15 @@ { list::iterator>::iterator iter = locks.begin(); - dout(15) << "owner lock: " << owner << dendl; + ldout(cct,15) << "owner lock: " << owner << dendl; while (iter != locks.end()) { - dout(15) << "comparing to " << (*iter)->second << dendl; + ldout(cct,15) << "comparing to " << (*iter)->second << dendl; if (ceph_filelock_owner_equal((*iter)->second, owner)) { - dout(15) << "success, pushing to owned_locks" << dendl; + ldout(cct,15) << "success, pushing to owned_locks" << dendl; owned_locks.push_back(*iter); iter = locks.erase(iter); } else { - dout(15) << "failure, something not equal in this group " + ldout(cct,15) << "failure, something not equal in this group " << (*iter)->second.client << ":" << owner.client << "," << (*iter)->second.owner << ":" << owner.owner << "," << (*iter)->second.pid << ":" << owner.pid << dendl; diff -Nru ceph-0.80.8/src/mds/flock.h ceph-0.80.9/src/mds/flock.h --- ceph-0.80.8/src/mds/flock.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/flock.h 2015-03-09 19:51:18.000000000 +0000 @@ -37,7 +37,9 @@ } class ceph_lock_state_t { + CephContext *cct; public: + ceph_lock_state_t(CephContext *cct_) : cct(cct_) {} multimap held_locks; // current locks multimap waiting_locks; // locks waiting for other locks // both of the above are keyed by starting offset diff -Nru ceph-0.80.8/src/mds/Makefile.am ceph-0.80.9/src/mds/Makefile.am --- ceph-0.80.8/src/mds/Makefile.am 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/Makefile.am 2015-03-09 19:51:18.000000000 +0000 @@ -4,7 +4,6 @@ mds/Dumper.cc \ mds/Resetter.cc \ mds/MDS.cc \ - mds/flock.cc \ mds/locks.c \ mds/journal.cc \ mds/Server.cc \ diff -Nru ceph-0.80.8/src/mds/mdstypes.h ceph-0.80.9/src/mds/mdstypes.h --- ceph-0.80.8/src/mds/mdstypes.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/mdstypes.h 2015-03-09 19:51:18.000000000 +0000 @@ -703,7 +703,8 @@ cap_reconnect_t() { memset(&capinfo, 0, sizeof(capinfo)); } - cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, inodeno_t sr) : + cap_reconnect_t(uint64_t cap_id, inodeno_t pino, const string& p, int w, int i, + inodeno_t sr, bufferlist& lb) : path(p) { capinfo.cap_id = cap_id; capinfo.wanted = w; @@ -711,6 +712,7 @@ capinfo.snaprealm = sr; capinfo.pathbase = pino; capinfo.flock_len = 0; + flockbl.claim(lb); } void encode(bufferlist& bl) const; void decode(bufferlist::iterator& bl); diff -Nru ceph-0.80.8/src/mds/Server.cc ceph-0.80.9/src/mds/Server.cc --- ceph-0.80.8/src/mds/Server.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mds/Server.cc 2015-03-09 19:51:18.000000000 +0000 @@ -3085,13 +3085,20 @@ dout(0) << "handle_client_file_setlock: " << set_lock << dendl; ceph_lock_state_t *lock_state = NULL; + bool interrupt = false; // get the appropriate lock state switch (req->head.args.filelock_change.rule) { + case CEPH_LOCK_FLOCK_INTR: + interrupt = true; + // fall-thru case CEPH_LOCK_FLOCK: lock_state = &cur->flock_locks; break; + case CEPH_LOCK_FCNTL_INTR: + interrupt = true; + // fall-thru case CEPH_LOCK_FCNTL: lock_state = &cur->fcntl_locks; break; @@ -3110,16 +3117,15 @@ if (lock_state->is_waiting(set_lock)) { dout(10) << " unlock removing waiting lock " << set_lock << dendl; lock_state->remove_waiting(set_lock); - } else { + cur->take_waiting(CInode::WAIT_FLOCK, waiters); + } else if (!interrupt) { dout(10) << " unlock attempt on " << set_lock << dendl; lock_state->remove_lock(set_lock, activated_locks); cur->take_waiting(CInode::WAIT_FLOCK, waiters); } - reply_request(mdr, 0); - /* For now we're ignoring the activated locks because their responses - * will be sent when the lock comes up again in rotation by the MDS. - * It's a cheap hack, but it's easy to code. */ mds->queue_waiters(waiters); + + reply_request(mdr, 0); } else { dout(10) << " lock attempt on " << set_lock << dendl; if (mdr->more()->flock_was_waiting && diff -Nru ceph-0.80.8/src/messages/MClientReconnect.h ceph-0.80.9/src/messages/MClientReconnect.h --- ceph-0.80.8/src/messages/MClientReconnect.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/messages/MClientReconnect.h 2015-03-09 19:51:18.000000000 +0000 @@ -40,9 +40,9 @@ } void add_cap(inodeno_t ino, uint64_t cap_id, inodeno_t pathbase, const string& path, - int wanted, int issued, - inodeno_t sr) { - caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr); + int wanted, int issued, inodeno_t sr, bufferlist& lb) + { + caps[ino] = cap_reconnect_t(cap_id, pathbase, path, wanted, issued, sr, lb); } void add_snaprealm(inodeno_t ino, snapid_t seq, inodeno_t parent) { ceph_mds_snaprealm_reconnect r; diff -Nru ceph-0.80.8/src/mon/MonCommands.h ceph-0.80.9/src/mon/MonCommands.h --- ceph-0.80.8/src/mon/MonCommands.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mon/MonCommands.h 2015-03-09 19:51:18.000000000 +0000 @@ -425,6 +425,9 @@ "name=ancestor,type=CephString,req=false,goodchars=[A-Za-z0-9-_.]", \ "unlink from crush map (everywhere, or just at )", \ "osd", "rw", "cli,rest") +COMMAND("osd crush reweight-all", + "recalculate the weights for the tree to ensure they sum correctly", + "osd", "rw", "cli,rest") COMMAND("osd crush reweight " \ "name=name,type=CephString,goodchars=[A-Za-z0-9-_.] " \ "name=weight,type=CephFloat,range=0.0", \ @@ -433,6 +436,15 @@ COMMAND("osd crush tunables " \ "name=profile,type=CephChoices,strings=legacy|argonaut|bobtail|firefly|optimal|default", \ "set crush tunables values to ", "osd", "rw", "cli,rest") +COMMAND("osd crush set-tunable " \ + "name=tunable,type=CephChoices,strings=straw_calc_version " \ + "name=value,type=CephInt", + "set crush tunable to ", + "osd", "rw", "cli,rest") +COMMAND("osd crush get-tunable " \ + "name=tunable,type=CephChoices,strings=straw_calc_version", + "get crush tunable ", + "osd", "rw", "cli,rest") COMMAND("osd crush show-tunables", \ "show current crush tunables", "osd", "r", "cli,rest") COMMAND("osd crush rule create-simple " \ diff -Nru ceph-0.80.8/src/mon/OSDMonitor.cc ceph-0.80.9/src/mon/OSDMonitor.cc --- ceph-0.80.8/src/mon/OSDMonitor.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/mon/OSDMonitor.cc 2015-03-09 19:51:18.000000000 +0000 @@ -2465,6 +2465,31 @@ } ss << "listed " << osdmap.blacklist.size() << " entries"; + } else if (prefix == "osd crush get-tunable") { + string tunable; + cmd_getval(g_ceph_context, cmdmap, "tunable", tunable); + int value; + cmd_getval(g_ceph_context, cmdmap, "value", value); + ostringstream rss; + if (f) + f->open_object_section("tunable"); + if (tunable == "straw_calc_version") { + if (f) + f->dump_int(tunable.c_str(), osdmap.crush->get_straw_calc_version()); + else + rss << osdmap.crush->get_straw_calc_version() << "\n"; + } else { + r = -EINVAL; + goto reply; + } + if (f) { + f->close_section(); + f->flush(rdata); + } else { + rdata.append(rss.str()); + } + r = 0; + } else if (prefix == "osd pool get") { string poolstr; cmd_getval(g_ceph_context, cmdmap, "pool", poolstr); @@ -4142,6 +4167,19 @@ } } while (false); + } else if (prefix == "osd crush reweight-all") { + // osd crush reweight + CrushWrapper newcrush; + _get_pending_crush(newcrush); + + newcrush.reweight(g_ceph_context); + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush); + ss << "reweighted crush hierarchy"; + getline(ss, rs); + wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, + get_last_committed() + 1)); + return true; } else if (prefix == "osd crush reweight") { do { // osd crush reweight @@ -4216,6 +4254,46 @@ getline(ss, rs); wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, get_last_committed() + 1)); + return true; + } else if (prefix == "osd crush set-tunable") { + CrushWrapper newcrush; + _get_pending_crush(newcrush); + + err = 0; + string tunable; + cmd_getval(g_ceph_context, cmdmap, "tunable", tunable); + + int64_t value = -1; + if (!cmd_getval(g_ceph_context, cmdmap, "value", value)) { + err = -EINVAL; + ss << "failed to parse integer value " << cmd_vartype_stringify(cmdmap["value"]); + goto reply; + } + + if (tunable == "straw_calc_version") { + if (value < 0 || value > 2) { + ss << "value must be 0 or 1; got " << value; + err = -EINVAL; + goto reply; + } + newcrush.set_straw_calc_version(value); + } else { + ss << "unrecognized tunable '" << tunable << "'"; + err = -EINVAL; + goto reply; + } + + if (!validate_crush_against_features(&newcrush, ss)) { + err = -EINVAL; + goto reply; + } + + pending_inc.crush.clear(); + newcrush.encode(pending_inc.crush); + ss << "adjusted tunable " << tunable << " to " << value; + getline(ss, rs); + wait_for_finished_proposal(new Monitor::C_Command(mon, m, 0, rs, + get_last_committed() + 1)); return true; } else if (prefix == "osd crush rule create-simple") { diff -Nru ceph-0.80.8/src/os/FileJournal.cc ceph-0.80.9/src/os/FileJournal.cc --- ceph-0.80.8/src/os/FileJournal.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/os/FileJournal.cc 2015-03-09 19:51:18.000000000 +0000 @@ -806,7 +806,8 @@ } dout(20) << "prepare_multi_write queue_pos now " << queue_pos << dendl; - //assert(write_pos + bl.length() == queue_pos); + assert((write_pos + bl.length() == queue_pos) || + (write_pos + bl.length() - header.max_size + get_top() == queue_pos)); return 0; } @@ -1005,22 +1006,32 @@ dout(10) << "do_write wrapping, first bit at " << pos << " len " << first.length() << " second bit len " << second.length() << " (orig len " << bl.length() << ")" << dendl; - if (write_bl(pos, first)) { - derr << "FileJournal::do_write: write_bl(pos=" << pos - << ") failed" << dendl; - ceph_abort(); - } - assert(pos == get_top()); + //Save pos to write first piece second + off64_t first_pos = pos; + off64_t orig_pos; + pos = get_top(); + // header too? if (hbp.length()) { // be sneaky: include the header in the second fragment second.push_front(hbp); pos = 0; // we included the header } + // Write the second portion first possible with the header, so + // do_read_entry() won't even get a valid entry_header_t if there + // is a crash between the two writes. + orig_pos = pos; if (write_bl(pos, second)) { - derr << "FileJournal::do_write: write_bl(pos=" << pos + derr << "FileJournal::do_write: write_bl(pos=" << orig_pos + << ") failed" << dendl; + ceph_abort(); + } + orig_pos = first_pos; + if (write_bl(first_pos, first)) { + derr << "FileJournal::do_write: write_bl(pos=" << orig_pos << ") failed" << dendl; ceph_abort(); } + assert(first_pos == get_top()); } else { // header too? if (hbp.length()) { diff -Nru ceph-0.80.8/src/osd/OSD.h ceph-0.80.9/src/osd/OSD.h --- ceph-0.80.8/src/osd/OSD.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/osd/OSD.h 2015-03-09 19:51:18.000000000 +0000 @@ -1740,7 +1740,9 @@ pg->put("SnapTrimWQ"); } void _clear() { - osd->snap_trim_queue.clear(); + while (PG *pg = _dequeue()) { + pg->put("SnapTrimWQ"); + } } } snap_trim_wq; diff -Nru ceph-0.80.8/src/osd/ReplicatedPG.cc ceph-0.80.9/src/osd/ReplicatedPG.cc --- ceph-0.80.8/src/osd/ReplicatedPG.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/osd/ReplicatedPG.cc 2015-03-09 19:51:18.000000000 +0000 @@ -5168,7 +5168,8 @@ } } } else if (ctx->new_snapset.clones.size() && - !ctx->cache_evict) { + !ctx->cache_evict && + (!ctx->snapset_obc || !ctx->snapset_obc->obs.exists)) { // save snapset on _snap hobject_t snapoid(soid.oid, soid.get_key(), CEPH_SNAPDIR, soid.hash, info.pgid.pool(), soid.get_namespace()); @@ -5179,7 +5180,8 @@ eversion_t(), 0, osd_reqid_t(), ctx->mtime)); - ctx->snapset_obc = get_object_context(snapoid, true); + if (!ctx->snapset_obc) + ctx->snapset_obc = get_object_context(snapoid, true); bool got = ctx->snapset_obc->get_write_greedy(ctx->op); assert(got); dout(20) << " got greedy write on snapset_obc " << *ctx->snapset_obc << dendl; diff -Nru ceph-0.80.8/src/osdc/ObjectCacher.cc ceph-0.80.9/src/osdc/ObjectCacher.cc --- ceph-0.80.8/src/osdc/ObjectCacher.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/osdc/ObjectCacher.cc 2015-03-09 19:51:18.000000000 +0000 @@ -710,9 +710,6 @@ } } - ls.splice(ls.end(), waitfor_read); - waitfor_read.clear(); - // apply to bh's! loff_t opos = start; while (true) { @@ -799,6 +796,8 @@ ldout(cct, 20) << "finishing waiters " << ls << dendl; finish_contexts(cct, ls, err); + retry_waiting_reads(); + --reads_outstanding; read_cond.Signal(); } @@ -1108,32 +1107,36 @@ // TODO: make read path not call _readx for every completion hits.insert(errors.begin(), errors.end()); } - + if (!missing.empty() || !rx.empty()) { // read missing for (map::iterator bh_it = missing.begin(); bh_it != missing.end(); ++bh_it) { - loff_t clean = get_stat_clean() + get_stat_rx() + - bh_it->second->length(); - if (get_stat_rx() > 0 && static_cast(clean) > max_size) { - // cache is full -- wait for rx's to complete - ldout(cct, 10) << "readx missed, waiting on cache to free " - << (clean - max_size) << " bytes" << dendl; - if (success) { - waitfor_read.push_back(new C_RetryRead(this, rd, oset, onfinish)); - } - bh_remove(o, bh_it->second); - delete bh_it->second; - } else { - bh_read(bh_it->second); - if (success && onfinish) { - ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second - << " off " << bh_it->first << dendl; + uint64_t rx_bytes = static_cast( + stat_rx + bh_it->second->length()); + if (!waitfor_read.empty() || rx_bytes > max_size) { + // cache is full with concurrent reads -- wait for rx's to complete + // to constrain memory growth (especially during copy-ups) + if (success) { + ldout(cct, 10) << "readx missed, waiting on cache to complete " + << waitfor_read.size() << " blocked reads, " + << (MAX(rx_bytes, max_size) - max_size) + << " read bytes" << dendl; + waitfor_read.push_back(new C_RetryRead(this, rd, oset, onfinish)); + } + + bh_remove(o, bh_it->second); + delete bh_it->second; + } else { + bh_read(bh_it->second); + if (success && onfinish) { + ldout(cct, 10) << "readx missed, waiting on " << *bh_it->second + << " off " << bh_it->first << dendl; bh_it->second->waitfor_read[bh_it->first].push_back( new C_RetryRead(this, rd, oset, onfinish) ); - } - bytes_not_in_cache += bh_it->second->length(); - } + } + } + bytes_not_in_cache += bh_it->second->length(); success = false; } @@ -1246,7 +1249,7 @@ // no misses... success! do the read. assert(!hit_ls.empty()); ldout(cct, 10) << "readx has all buffers" << dendl; - + // ok, assemble into result buffer. uint64_t pos = 0; if (rd->bl && !error) { @@ -1279,6 +1282,18 @@ return ret; } +void ObjectCacher::retry_waiting_reads() +{ + list ls; + ls.swap(waitfor_read); + + while (!ls.empty() && waitfor_read.empty()) { + Context *ctx = ls.front(); + ls.pop_front(); + ctx->complete(0); + } + waitfor_read.splice(waitfor_read.end(), ls); +} int ObjectCacher::writex(OSDWrite *wr, ObjectSet *oset, Mutex& wait_on_lock, Context *onfreespace) diff -Nru ceph-0.80.8/src/osdc/ObjectCacher.h ceph-0.80.9/src/osdc/ObjectCacher.h --- ceph-0.80.8/src/osdc/ObjectCacher.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/osdc/ObjectCacher.h 2015-03-09 19:51:18.000000000 +0000 @@ -340,6 +340,7 @@ void *flush_set_callback_arg; vector > objects; // indexed by pool_id + list waitfor_read; ceph_tid_t last_read_tid; @@ -458,6 +459,7 @@ int _readx(OSDRead *rd, ObjectSet *oset, Context *onfinish, bool external_call); + void retry_waiting_reads(); public: void bh_read_finish(int64_t poolid, sobject_t oid, ceph_tid_t tid, diff -Nru ceph-0.80.8/src/rgw/Makefile.am ceph-0.80.9/src/rgw/Makefile.am --- ceph-0.80.8/src/rgw/Makefile.am 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/Makefile.am 2015-03-09 19:51:18.000000000 +0000 @@ -53,6 +53,18 @@ -lfcgi \ -ldl +CIVETWEB_INCLUDE = --include civetweb/include/civetweb_conf.h + +libcivetweb_la_SOURCES = \ + rgw/rgw_civetweb.cc \ + rgw/rgw_civetweb_log.cc \ + civetweb/src/civetweb.c + +libcivetweb_la_CXXFLAGS = ${CIVETWEB_INCLUDE} -Woverloaded-virtual ${AM_CXXFLAGS} +libcivetweb_la_CFLAGS = -Icivetweb/include ${CIVETWEB_INCLUDE} + +noinst_LTLIBRARIES += libcivetweb.la + radosgw_SOURCES = \ rgw/rgw_resolve.cc \ rgw/rgw_rest.cc \ @@ -71,12 +83,9 @@ rgw/rgw_swift.cc \ rgw/rgw_swift_auth.cc \ rgw/rgw_loadgen.cc \ - rgw/rgw_civetweb.cc \ - rgw/rgw_civetweb_log.cc \ - civetweb/src/civetweb.c \ rgw/rgw_main.cc -radosgw_CFLAGS = -Icivetweb/include -radosgw_LDADD = $(LIBRGW) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL) +radosgw_CFLAGS = -I$(srcdir)/civetweb/include +radosgw_LDADD = $(LIBRGW) $(LIBCIVETWEB) $(LIBRGW_DEPS) $(RESOLV_LIBS) $(CEPH_GLOBAL) bin_PROGRAMS += radosgw radosgw_admin_SOURCES = rgw/rgw_admin.cc @@ -162,5 +171,6 @@ rgw/rgw_civetweb_log.h \ civetweb/civetweb.h \ civetweb/include/civetweb.h \ + civetweb/include/civetweb_conf.h \ civetweb/src/md5.h diff -Nru ceph-0.80.8/src/rgw/rgw_admin.cc ceph-0.80.9/src/rgw/rgw_admin.cc --- ceph-0.80.8/src/rgw/rgw_admin.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_admin.cc 2015-03-09 19:51:18.000000000 +0000 @@ -679,7 +679,11 @@ quota.max_objects = max_objects; } if (have_max_size) { - quota.max_size_kb = rgw_rounded_kb(max_size); + if (max_size < 0) { + quota.max_size_kb = -1; + } else { + quota.max_size_kb = rgw_rounded_kb(max_size); + } } break; case OPT_QUOTA_DISABLE: @@ -1364,7 +1368,13 @@ cerr << "could not create user: " << err_msg << std::endl; return -ret; } - + if (!subuser.empty()) { + ret = user.subusers.add(user_op, &err_msg); + if (ret < 0) { + cerr << "could not create subuser: " << err_msg << std::endl; + return -ret; + } + } break; case OPT_USER_RM: ret = user.remove(user_op, &err_msg); diff -Nru ceph-0.80.8/src/rgw/rgw_civetweb.cc ceph-0.80.9/src/rgw/rgw_civetweb.cc --- ceph-0.80.8/src/rgw/rgw_civetweb.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_civetweb.cc 2015-03-09 19:51:18.000000000 +0000 @@ -42,7 +42,7 @@ if (0 && data.length() == 0) { has_content_length = true; - print("Transfer-Enconding: %s\n", "chunked"); + print("Transfer-Enconding: %s\r\n", "chunked"); data.append("0\r\n\r\n", sizeof("0\r\n\r\n")-1); } else { int r = send_content_length(data.length()); @@ -128,7 +128,7 @@ if (!status_name) status_name = ""; - snprintf(buf, sizeof(buf), "HTTP/1.1 %s %s\n", status, status_name); + snprintf(buf, sizeof(buf), "HTTP/1.1 %s %s\r\n", status, status_name); bufferlist bl; bl.append(buf); @@ -168,5 +168,5 @@ has_content_length = true; char buf[21]; snprintf(buf, sizeof(buf), "%"PRIu64, len); - return print("Content-Length: %s\n", buf); + return print("Content-Length: %s\r\n", buf); } diff -Nru ceph-0.80.8/src/rgw/rgw_common.cc ceph-0.80.9/src/rgw/rgw_common.cc --- ceph-0.80.8/src/rgw/rgw_common.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_common.cc 2015-03-09 19:51:18.000000000 +0000 @@ -506,7 +506,7 @@ } string substr, nameval; substr = str.substr(pos, fpos - pos); - url_decode(substr, nameval); + url_decode(substr, nameval, true); NameVal nv(nameval); int ret = nv.parse(); if (ret >= 0) { @@ -690,14 +690,13 @@ return hex_table.to_num(c); } -bool url_decode(string& src_str, string& dest_str) +bool url_decode(string& src_str, string& dest_str, bool in_query) { const char *src = src_str.c_str(); char dest[src_str.size() + 1]; int pos = 0; char c; - bool in_query = false; while (*src) { if (*src != '%') { if (!in_query || *src != '+') { diff -Nru ceph-0.80.8/src/rgw/rgw_common.h ceph-0.80.9/src/rgw/rgw_common.h --- ceph-0.80.8/src/rgw/rgw_common.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_common.h 2015-03-09 19:51:18.000000000 +0000 @@ -1343,7 +1343,7 @@ extern bool verify_object_permission(struct req_state *s, int perm); /** Convert an input URL into a sane object name * by converting %-escaped strings into characters, etc*/ -extern bool url_decode(string& src_str, string& dest_str); +extern bool url_decode(string& src_str, string& dest_str, bool in_query = false); extern void url_encode(const string& src, string& dst); extern void calc_hmac_sha1(const char *key, int key_len, diff -Nru ceph-0.80.8/src/rgw/rgw_fcgi.cc ceph-0.80.9/src/rgw/rgw_fcgi.cc --- ceph-0.80.8/src/rgw/rgw_fcgi.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_fcgi.cc 2015-03-09 19:51:18.000000000 +0000 @@ -32,7 +32,7 @@ int RGWFCGX::send_status(const char *status, const char *status_name) { - return print("Status: %s\n", status); + return print("Status: %s %s\r\n", status, status_name); } int RGWFCGX::send_100_continue() @@ -48,7 +48,7 @@ { char buf[21]; snprintf(buf, sizeof(buf), "%"PRIu64, len); - return print("Content-Length: %s\n", buf); + return print("Content-Length: %s\r\n", buf); } int RGWFCGX::complete_header() diff -Nru ceph-0.80.8/src/rgw/rgw_json_enc.cc ceph-0.80.9/src/rgw/rgw_json_enc.cc --- ceph-0.80.8/src/rgw/rgw_json_enc.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_json_enc.cc 2015-03-09 19:51:18.000000000 +0000 @@ -429,7 +429,7 @@ { RGWAccessKey k; k.decode_json(o, true); - m[k.subuser] = k; + m[k.id] = k; } static void decode_subusers(map& m, JSONObj *o) diff -Nru ceph-0.80.8/src/rgw/rgw_main.cc ceph-0.80.9/src/rgw/rgw_main.cc --- ceph-0.80.8/src/rgw/rgw_main.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_main.cc 2015-03-09 19:51:18.000000000 +0000 @@ -144,6 +144,8 @@ bool get_val(const string& key, const string& def_val, string *out); bool get_val(const string& key, int def_val, int *out); + map& get_config_map() { return config_map; } + string get_framework() { return framework; } }; @@ -916,6 +918,12 @@ struct mg_context *ctx; RGWProcessEnv env; + void set_conf_default(map& m, const string& key, const string& def_val) { + if (m.find(key) == m.end()) { + m[key] = def_val; + } + } + public: RGWMongooseFrontend(RGWProcessEnv& pe, RGWFrontendConfig *_conf) : conf(_conf), ctx(NULL), env(pe) { } @@ -928,9 +936,23 @@ char thread_pool_buf[32]; snprintf(thread_pool_buf, sizeof(thread_pool_buf), "%d", (int)g_conf->rgw_thread_pool_size); string port_str; + map conf_map = conf->get_config_map(); conf->get_val("port", "80", &port_str); - const char *options[] = {"listening_ports", port_str.c_str(), "enable_keep_alive", "yes", "num_threads", thread_pool_buf, - "decode_url", "no", NULL}; + conf_map.erase("port"); + conf_map["listening_ports"] = port_str; + set_conf_default(conf_map, "enable_keep_alive", "yes"); + set_conf_default(conf_map, "num_threads", thread_pool_buf); + set_conf_default(conf_map, "decode_url", "no"); + + const char *options[conf_map.size() * 2 + 1]; + int i = 0; + for (map::iterator iter = conf_map.begin(); iter != conf_map.end(); ++iter) { + options[i] = iter->first.c_str(); + options[i + 1] = iter->second.c_str(); + dout(20)<< "civetweb config: " << options[i] << ": " << (options[i + 1] ? options[i + 1] : "") << dendl; + i += 2; + } + options[i] = NULL; struct mg_callbacks cb; memset((void *)&cb, 0, sizeof(cb)); diff -Nru ceph-0.80.8/src/rgw/rgw_op.cc ceph-0.80.9/src/rgw/rgw_op.cc --- ceph-0.80.8/src/rgw/rgw_op.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_op.cc 2015-03-09 19:51:18.000000000 +0000 @@ -33,7 +33,8 @@ static string mp_ns = RGW_OBJ_NS_MULTIPART; static string shadow_ns = RGW_OBJ_NS_SHADOW; -#define MULTIPART_UPLOAD_ID_PREFIX "2/" // must contain a unique char that may not come up in gen_rand_alpha() +#define MULTIPART_UPLOAD_ID_PREFIX_LEGACY "2/" +#define MULTIPART_UPLOAD_ID_PREFIX "2~" // must contain a unique char that may not come up in gen_rand_alpha() class MultipartMetaFilter : public RGWAccessListFilter { public: @@ -1438,7 +1439,8 @@ { const char *uid = upload_id.c_str(); - return (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX, sizeof(MULTIPART_UPLOAD_ID_PREFIX) - 1) == 0); + return (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX, sizeof(MULTIPART_UPLOAD_ID_PREFIX) - 1) == 0) || + (strncmp(uid, MULTIPART_UPLOAD_ID_PREFIX_LEGACY, sizeof(MULTIPART_UPLOAD_ID_PREFIX_LEGACY) - 1) == 0); } int RGWPutObjProcessor_Multipart::do_complete(string& etag, time_t *mtime, time_t set_mtime, map& attrs) @@ -1524,45 +1526,6 @@ rgw_bucket_object_pre_exec(s); } -static int put_obj_user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs, - void *param) -{ - RGWPutObj *op = (RGWPutObj *)param; - return op->user_manifest_iterate_cb(bucket, ent, bucket_policy, start_ofs, end_ofs); -} - -int RGWPutObj::user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs) -{ - rgw_obj part(bucket, ent.name); - - map attrs; - - int ret = get_obj_attrs(store, s, part, attrs, NULL, NULL); - if (ret < 0) { - return ret; - } - map::iterator iter = attrs.find(RGW_ATTR_ETAG); - if (iter == attrs.end()) { - return 0; - } - bufferlist& bl = iter->second; - const char *buf = bl.c_str(); - int len = bl.length(); - while (len > 0 && buf[len - 1] == '\0') { - len--; - } - if (len > 0) { - user_manifest_parts_hash->Update((const byte *)bl.c_str(), len); - } - - if (s->cct->_conf->subsys.should_gather(ceph_subsys_rgw, 20)) { - string e(bl.c_str(), bl.length()); - ldout(s->cct, 20) << __func__ << ": appending user manifest etag: " << e << dendl; - } - - return 0; -} - static int put_data_and_throttle(RGWPutObjProcessor *processor, bufferlist& data, off_t ofs, MD5 *hash, bool need_to_wait) { @@ -1731,7 +1694,6 @@ bufferlist manifest_bl; string manifest_obj_prefix; string manifest_bucket; - RGWBucketInfo bucket_info; char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE]; char etag_buf_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; @@ -1749,16 +1711,6 @@ manifest_bucket = prefix_str.substr(0, pos); manifest_obj_prefix = prefix_str.substr(pos + 1); - ret = store->get_bucket_info(NULL, manifest_bucket, bucket_info, NULL, NULL); - if (ret < 0) { - ldout(s->cct, 0) << "could not get bucket info for bucket=" << manifest_bucket << dendl; - } - ret = iterate_user_manifest_parts(s->cct, store, 0, -1, bucket_info.bucket, manifest_obj_prefix, - NULL, NULL, put_obj_user_manifest_iterate_cb, (void *)this); - if (ret < 0) { - goto done; - } - hash.Final((byte *)etag_buf); buf_to_hex((const unsigned char *)etag_buf, CEPH_CRYPTO_MD5_DIGESTSIZE, etag_buf_str); @@ -1934,10 +1886,14 @@ /* no need to track object versioning, need it for bucket's data only */ RGWObjVersionTracker *ptracker = (s->object ? NULL : &s->bucket_info.objv_tracker); - /* check if obj exists, read orig attrs */ - ret = get_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker); - if (ret < 0) - return; + if (s->object) { + /* check if obj exists, read orig attrs */ + ret = get_obj_attrs(store, s, obj, orig_attrs, NULL, ptracker); + if (ret < 0) + return; + } else { + orig_attrs = s->bucket_attrs; + } /* only remove meta attrs */ for (iter = orig_attrs.begin(); iter != orig_attrs.end(); ++iter) { @@ -2208,6 +2164,7 @@ replace_attrs, attrs, RGW_OBJ_CATEGORY_MAIN, &s->req_id, /* use req_id as tag */ + &etag, &s->err, copy_obj_progress_cb, (void *)this ); @@ -2271,7 +2228,6 @@ RGWAccessControlPolicy_S3 new_policy(s->cct); stringstream ss; char *new_data = NULL; - ACLOwner owner; rgw_obj obj; ret = 0; @@ -2281,8 +2237,10 @@ return; } - owner.set_id(s->user.user_id); - owner.set_name(s->user.display_name); + + RGWAccessControlPolicy *existing_policy = (s->object == NULL? s->bucket_acl : s->object_acl); + + owner = existing_policy->get_owner(); ret = get_params(); if (ret < 0) @@ -2530,7 +2488,7 @@ do { char buf[33]; gen_rand_alphanumeric(s->cct, buf, sizeof(buf) - 1); - upload_id = "2/"; /* v2 upload id */ + upload_id = MULTIPART_UPLOAD_ID_PREFIX; /* v2 upload id */ upload_id.append(buf); string tmp_obj_name; diff -Nru ceph-0.80.8/src/rgw/rgw_op.h ceph-0.80.9/src/rgw/rgw_op.h --- ceph-0.80.8/src/rgw/rgw_op.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_op.h 2015-03-09 19:51:18.000000000 +0000 @@ -345,8 +345,6 @@ RGWPutObjProcessor *select_processor(bool *is_multipart); void dispose_processor(RGWPutObjProcessor *processor); - int user_manifest_iterate_cb(rgw_bucket& bucket, RGWObjEnt& ent, RGWAccessControlPolicy *bucket_policy, off_t start_ofs, off_t end_ofs); - int verify_permission(); void pre_exec(); void execute(); @@ -490,6 +488,7 @@ string source_zone; string client_id; string op_id; + string etag; off_t last_ofs; @@ -556,6 +555,7 @@ int ret; size_t len; char *data; + ACLOwner owner; public: RGWPutACLs() { diff -Nru ceph-0.80.8/src/rgw/rgw_rados.cc ceph-0.80.9/src/rgw/rgw_rados.cc --- ceph-0.80.8/src/rgw/rgw_rados.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rados.cc 2015-03-09 19:51:18.000000000 +0000 @@ -3141,6 +3141,7 @@ map& attrs, RGWObjCategory category, string *ptag, + string *petag, struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data) @@ -3237,6 +3238,10 @@ if (ret < 0) goto set_err_state; + if (petag) { + *petag = etag; + } + { /* opening scope so that we can do goto, sorry */ bufferlist& extra_data_bl = processor.get_extra_data(); if (extra_data_bl.length()) { @@ -3302,6 +3307,10 @@ if (ret < 0) return ret; + if (petag) { + *petag = etag; + } + return 0; } @@ -3331,7 +3340,7 @@ } if (copy_data) { /* refcounting tail wouldn't work here, just copy the data */ - return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, max_chunk_size, mtime, src_attrs, category, ptag, err); + return copy_obj_data(ctx, dest_bucket_info.owner, &handle, end, dest_obj, src_obj, max_chunk_size, mtime, src_attrs, category, ptag, petag, err); } RGWObjManifest::obj_iterator miter = astate->manifest.obj_begin(); @@ -3410,6 +3419,14 @@ if (mtime) obj_stat(ctx, dest_obj, NULL, mtime, NULL, NULL, NULL, NULL); + if (petag) { + map::iterator iter = src_attrs.find(RGW_ATTR_ETAG); + if (iter != src_attrs.end()) { + bufferlist& etagbl = iter->second; + *petag = string(etagbl.c_str(), etagbl.length()); + } + } + return 0; done_ret: @@ -3446,6 +3463,7 @@ map& attrs, RGWObjCategory category, string *ptag, + string *petag, struct rgw_err *err) { bufferlist first_chunk; @@ -3492,6 +3510,9 @@ if (iter != attrs.end()) { bufferlist& bl = iter->second; etag = string(bl.c_str(), bl.length()); + if (petag) { + *petag = etag; + } } ret = processor.complete(etag, NULL, 0, attrs); @@ -4140,7 +4161,33 @@ if (!op.size()) return 0; + string tag; + if (state) { + r = prepare_update_index(state, bucket, CLS_RGW_OP_ADD, obj, tag); + if (r < 0) + return r; + } + r = ref.ioctx.operate(ref.oid, &op); + if (state) { + if (r >= 0) { + bufferlist acl_bl = attrs[RGW_ATTR_ACL]; + bufferlist etag_bl = attrs[RGW_ATTR_ETAG]; + bufferlist content_type_bl = attrs[RGW_ATTR_CONTENT_TYPE]; + string etag(etag_bl.c_str(), etag_bl.length()); + string content_type(content_type_bl.c_str(), content_type_bl.length()); + uint64_t epoch = ref.ioctx.get_last_version(); + int64_t poolid = ref.ioctx.get_id(); + utime_t mtime = ceph_clock_now(cct); + r = complete_update_index(bucket, obj.object, tag, poolid, epoch, state->size, + mtime, etag, content_type, &acl_bl, RGW_OBJ_CATEGORY_MAIN, NULL); + } else { + int ret = complete_update_index_cancel(bucket, obj.object, tag); + if (ret < 0) { + ldout(cct, 0) << "ERROR: comlete_update_index_cancel() returned r=" << r << dendl; + } + } + } if (r < 0) return r; @@ -4644,7 +4691,7 @@ bl.append(read_bl); done: - if (bl.length() > 0) { + if (r >= 0) { r = bl.length(); } if (r < 0 || !len || ((off_t)(ofs + len - 1) == end)) { diff -Nru ceph-0.80.8/src/rgw/rgw_rados.h ceph-0.80.9/src/rgw/rgw_rados.h --- ceph-0.80.8/src/rgw/rgw_rados.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rados.h 2015-03-09 19:51:18.000000000 +0000 @@ -1566,6 +1566,7 @@ map& attrs, RGWObjCategory category, string *ptag, + string *petag, struct rgw_err *err, void (*progress_cb)(off_t, void *), void *progress_data); @@ -1580,6 +1581,7 @@ map& attrs, RGWObjCategory category, string *ptag, + string *petag, struct rgw_err *err); /** * Delete a bucket. diff -Nru ceph-0.80.8/src/rgw/rgw_rest.cc ceph-0.80.9/src/rgw/rgw_rest.cc --- ceph-0.80.8/src/rgw/rgw_rest.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rest.cc 2015-03-09 19:51:18.000000000 +0000 @@ -244,7 +244,7 @@ if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } - r = s->cio->print("Accept-Ranges: %s\n", "bytes"); + r = s->cio->print("Accept-Ranges: %s\r\n", "bytes"); if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } @@ -254,9 +254,9 @@ { int r; if (s->prot_flags & RGW_REST_SWIFT) - r = s->cio->print("etag: %s\n", etag); + r = s->cio->print("etag: %s\r\n", etag); else - r = s->cio->print("ETag: \"%s\"\n", etag); + r = s->cio->print("ETag: \"%s\"\r\n", etag); if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } @@ -265,7 +265,7 @@ void dump_pair(struct req_state *s, const char *key, const char *value) { if ( (strlen(key) > 0) && (strlen(value) > 0)) - s->cio->print("%s: %s\n", key, value); + s->cio->print("%s: %s\r\n", key, value); } void dump_bucket_from_state(struct req_state *s) @@ -273,16 +273,10 @@ int expose_bucket = g_conf->rgw_expose_bucket; if (expose_bucket) { if (!s->bucket_name_str.empty()) - s->cio->print("Bucket: \"%s\"\n", s->bucket_name_str.c_str()); + s->cio->print("Bucket: \"%s\"\r\n", s->bucket_name_str.c_str()); } } -void dump_object_from_state(struct req_state *s) -{ - if (!s->object_str.empty()) - s->cio->print("Key: \"%s\"\n", s->object_str.c_str()); -} - void dump_uri_from_state(struct req_state *s) { if (strcmp(s->info.request_uri.c_str(), "/") == 0) { @@ -296,12 +290,12 @@ location += "/"; if (!s->object_str.empty()) { location += s->object_str; - s->cio->print("Location: %s\n", location.c_str()); + s->cio->print("Location: %s\r\n", location.c_str()); } } } else { - s->cio->print("Location: \"%s\"\n", s->info.request_uri.c_str()); + s->cio->print("Location: \"%s\"\r\n", s->info.request_uri.c_str()); } } @@ -310,7 +304,7 @@ if (redirect.empty()) return; - s->cio->print("Location: %s\n", redirect.c_str()); + s->cio->print("Location: %s\r\n", redirect.c_str()); } static void dump_time_header(struct req_state *s, const char *name, time_t t) @@ -325,7 +319,7 @@ if (strftime(timestr, sizeof(timestr), "%a, %d %b %Y %H:%M:%S %Z", tmp) == 0) return; - int r = s->cio->print("%s: %s\n", name, timestr); + int r = s->cio->print("%s: %s\r\n", name, timestr); if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } @@ -341,7 +335,7 @@ char buf[32]; snprintf(buf, sizeof(buf), "%lld", (long long)t); - int r = s->cio->print("%s: %s\n", name, buf); + int r = s->cio->print("%s: %s\r\n", name, buf); if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } @@ -374,16 +368,16 @@ void dump_access_control(struct req_state *s, const char *origin, const char *meth, const char *hdr, const char *exp_hdr, uint32_t max_age) { if (origin && (origin[0] != '\0')) { - s->cio->print("Access-Control-Allow-Origin: %s\n", origin); + s->cio->print("Access-Control-Allow-Origin: %s\r\n", origin); if (meth && (meth[0] != '\0')) - s->cio->print("Access-Control-Allow-Methods: %s\n", meth); + s->cio->print("Access-Control-Allow-Methods: %s\r\n", meth); if (hdr && (hdr[0] != '\0')) - s->cio->print("Access-Control-Allow-Headers: %s\n", hdr); + s->cio->print("Access-Control-Allow-Headers: %s\r\n", hdr); if (exp_hdr && (exp_hdr[0] != '\0')) { - s->cio->print("Access-Control-Expose-Headers: %s\n", exp_hdr); + s->cio->print("Access-Control-Expose-Headers: %s\r\n", exp_hdr); } if (max_age != CORS_MAX_AGE_INVALID) { - s->cio->print("Access-Control-Max-Age: %d\n", max_age); + s->cio->print("Access-Control-Max-Age: %d\r\n", max_age); } } } @@ -483,7 +477,7 @@ /* dumping range into temp buffer first, as libfcgi will fail to digest %lld */ snprintf(range_buf, sizeof(range_buf), "%lld-%lld/%lld", (long long)ofs, (long long)end, (long long)total); - int r = s->cio->print("Content-Range: bytes %s\n", range_buf); + int r = s->cio->print("Content-Range: bytes %s\r\n", range_buf); if (r < 0) { ldout(s->cct, 0) << "ERROR: s->cio->print() returned err=" << r << dendl; } diff -Nru ceph-0.80.8/src/rgw/rgw_rest.h ceph-0.80.9/src/rgw/rgw_rest.h --- ceph-0.80.8/src/rgw/rgw_rest.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rest.h 2015-03-09 19:51:18.000000000 +0000 @@ -369,7 +369,6 @@ extern void list_all_buckets_end(struct req_state *s); extern void dump_time(struct req_state *s, const char *name, time_t *t); extern void dump_bucket_from_state(struct req_state *s); -extern void dump_object_from_state(struct req_state *s); extern void dump_uri_from_state(struct req_state *s); extern void dump_redirect(struct req_state *s, const string& redirect); extern void dump_pair(struct req_state *s, const char *key, const char *value); diff -Nru ceph-0.80.8/src/rgw/rgw_rest_s3.cc ceph-0.80.9/src/rgw/rgw_rest_s3.cc --- ceph-0.80.8/src/rgw/rgw_rest_s3.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rest_s3.cc 2015-03-09 19:51:18.000000000 +0000 @@ -161,7 +161,7 @@ dump_errno(s); for (riter = response_attrs.begin(); riter != response_attrs.end(); ++riter) { - s->cio->print("%s: %s\n", riter->first.c_str(), riter->second.c_str()); + s->cio->print("%s: %s\r\n", riter->first.c_str(), riter->second.c_str()); } if (!content_type) @@ -303,9 +303,9 @@ { char buf[32]; snprintf(buf, sizeof(buf), "%lld", (long long)bucket.count); - s->cio->print("X-RGW-Object-Count: %s\n", buf); + s->cio->print("X-RGW-Object-Count: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size); - s->cio->print("X-RGW-Bytes-Used: %s\n", buf); + s->cio->print("X-RGW-Bytes-Used: %s\r\n", buf); } void RGWStatBucket_ObjStore_S3::send_response() @@ -321,16 +321,16 @@ dump_start(s); } -static int create_s3_policy(struct req_state *s, RGWRados *store, RGWAccessControlPolicy_S3& s3policy) +static int create_s3_policy(struct req_state *s, RGWRados *store, RGWAccessControlPolicy_S3& s3policy, ACLOwner& owner) { if (s->has_acl_header) { if (!s->canned_acl.empty()) return -ERR_INVALID_REQUEST; - return s3policy.create_from_headers(store, s->info.env, s->owner); + return s3policy.create_from_headers(store, s->info.env, owner); } - return s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl); + return s3policy.create_canned(owner, s->bucket_owner, s->canned_acl); } class RGWLocationConstraint : public XMLObj @@ -386,7 +386,7 @@ { RGWAccessControlPolicy_S3 s3policy(s->cct); - int r = create_s3_policy(s, store, s3policy); + int r = create_s3_policy(s, store, s3policy, s->owner); if (r < 0) return r; @@ -487,7 +487,7 @@ if (!s->length) return -ERR_LENGTH_REQUIRED; - int r = create_s3_policy(s, store, s3policy); + int r = create_s3_policy(s, store, s3policy, s->owner); if (r < 0) return r; @@ -1198,7 +1198,7 @@ RGWAccessControlPolicy_S3 s3policy(s->cct); /* build a policy for the target object */ - int r = create_s3_policy(s, store, s3policy); + int r = create_s3_policy(s, store, s3policy, s->owner); if (r < 0) return r; @@ -1264,7 +1264,7 @@ set_req_state_err(s, ret); dump_errno(s); - end_header(s, this, "binary/octet-stream"); + end_header(s, this, "application/xml"); if (ret == 0) { s->formatter->open_object_section("CopyObjectResult"); } @@ -1285,13 +1285,8 @@ if (ret == 0) { dump_time(s, "LastModified", &mtime); - map::iterator iter = attrs.find(RGW_ATTR_ETAG); - if (iter != attrs.end()) { - bufferlist& bl = iter->second; - if (bl.length()) { - char *etag = bl.c_str(); - s->formatter->dump_string("ETag", etag); - } + if (!etag.empty()) { + s->formatter->dump_string("ETag", etag); } s->formatter->close_section(); rgw_flush_formatter_and_reset(s, s->formatter); @@ -1318,7 +1313,7 @@ s->canned_acl.clear(); } - int r = create_s3_policy(s, store, s3policy); + int r = create_s3_policy(s, store, s3policy, owner); if (r < 0) return r; @@ -1460,7 +1455,7 @@ int RGWInitMultipart_ObjStore_S3::get_params() { RGWAccessControlPolicy_S3 s3policy(s->cct); - ret = create_s3_policy(s, store, s3policy); + ret = create_s3_policy(s, store, s3policy, s->owner); if (ret < 0) return ret; diff -Nru ceph-0.80.8/src/rgw/rgw_rest_swift.cc ceph-0.80.9/src/rgw/rgw_rest_swift.cc --- ceph-0.80.8/src/rgw/rgw_rest_swift.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_rest_swift.cc 2015-03-09 19:51:18.000000000 +0000 @@ -223,11 +223,11 @@ { char buf[32]; snprintf(buf, sizeof(buf), "%lld", (long long)bucket.count); - s->cio->print("X-Container-Object-Count: %s\n", buf); + s->cio->print("X-Container-Object-Count: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size); - s->cio->print("X-Container-Bytes-Used: %s\n", buf); + s->cio->print("X-Container-Bytes-Used: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)bucket.size_rounded); - s->cio->print("X-Container-Bytes-Used-Actual: %s\n", buf); + s->cio->print("X-Container-Bytes-Used-Actual: %s\r\n", buf); if (!s->object) { RGWAccessControlPolicy_SWIFT *swift_policy = static_cast(s->bucket_acl); @@ -247,13 +247,13 @@ { char buf[32]; snprintf(buf, sizeof(buf), "%lld", (long long)buckets_count); - s->cio->print("X-Account-Container-Count: %s\n", buf); + s->cio->print("X-Account-Container-Count: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)buckets_object_count); - s->cio->print("X-Account-Object-Count: %s\n", buf); + s->cio->print("X-Account-Object-Count: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)buckets_size); - s->cio->print("X-Account-Bytes-Used: %s\n", buf); + s->cio->print("X-Account-Bytes-Used: %s\r\n", buf); snprintf(buf, sizeof(buf), "%lld", (long long)buckets_size_rounded); - s->cio->print("X-Account-Bytes-Used-Actual: %s\n", buf); + s->cio->print("X-Account-Bytes-Used-Actual: %s\r\n", buf); } void RGWStatAccount_ObjStore_SWIFT::send_response() @@ -552,7 +552,6 @@ int RGWGetObj_ObjStore_SWIFT::send_response_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) { const char *content_type = NULL; - int orig_ret = ret; map response_attrs; map::iterator riter; @@ -593,15 +592,11 @@ } } - if (partial_content && !ret) - ret = -STATUS_PARTIAL_CONTENT; - - if (ret) - set_req_state_err(s, ret); + set_req_state_err(s, (partial_content && !ret) ? STATUS_PARTIAL_CONTENT : ret); dump_errno(s); for (riter = response_attrs.begin(); riter != response_attrs.end(); ++riter) { - s->cio->print("%s: %s\n", riter->first.c_str(), riter->second.c_str()); + s->cio->print("%s: %s\r\n", riter->first.c_str(), riter->second.c_str()); } if (!content_type) @@ -611,7 +606,7 @@ sent_header = true; send_data: - if (get_data && !orig_ret) { + if (get_data && !ret) { int r = s->cio->write(bl.c_str() + bl_ofs, bl_len); if (r < 0) return r; @@ -788,8 +783,6 @@ if (!authorized) return -EPERM; - s->perm_mask = RGW_PERM_FULL_CONTROL; - return 0; } diff -Nru ceph-0.80.8/src/rgw/rgw_swift_auth.cc ceph-0.80.9/src/rgw/rgw_swift_auth.cc --- ceph-0.80.8/src/rgw/rgw_swift_auth.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_swift_auth.cc 2015-03-09 19:51:18.000000000 +0000 @@ -56,7 +56,7 @@ return ret; } -int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info) +int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info, string *pswift_user) { if (strncmp(token, "AUTH_rgwtk", 10) != 0) return -EINVAL; @@ -123,6 +123,7 @@ dout(0) << "NOTICE: tokens mismatch tok=" << buf << dendl; return -EPERM; } + *pswift_user = swift_user; return 0; } @@ -205,7 +206,7 @@ tenant_path.append(g_conf->rgw_swift_tenant_name); } - s->cio->print("X-Storage-Url: %s/%s/v1%s\n", swift_url.c_str(), + s->cio->print("X-Storage-Url: %s/%s/v1%s\r\n", swift_url.c_str(), swift_prefix.c_str(), tenant_path.c_str()); if ((ret = encode_token(s->cct, swift_key->id, swift_key->key, bl)) < 0) @@ -215,8 +216,8 @@ char buf[bl.length() * 2 + 1]; buf_to_hex((const unsigned char *)bl.c_str(), bl.length(), buf); - s->cio->print("X-Storage-Token: AUTH_rgwtk%s\n", buf); - s->cio->print("X-Auth-Token: AUTH_rgwtk%s\n", buf); + s->cio->print("X-Storage-Token: AUTH_rgwtk%s\r\n", buf); + s->cio->print("X-Auth-Token: AUTH_rgwtk%s\r\n", buf); } ret = STATUS_NO_CONTENT; diff -Nru ceph-0.80.8/src/rgw/rgw_swift_auth.h ceph-0.80.9/src/rgw/rgw_swift_auth.h --- ceph-0.80.8/src/rgw/rgw_swift_auth.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_swift_auth.h 2015-03-09 19:51:18.000000000 +0000 @@ -6,7 +6,7 @@ #define RGW_SWIFT_TOKEN_EXPIRATION (15 * 60) -extern int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info); +extern int rgw_swift_verify_signed_token(CephContext *cct, RGWRados *store, const char *token, RGWUserInfo& info, string *pswift_user); class RGW_SWIFT_Auth_Get : public RGWOp { public: diff -Nru ceph-0.80.8/src/rgw/rgw_swift.cc ceph-0.80.9/src/rgw/rgw_swift.cc --- ceph-0.80.8/src/rgw/rgw_swift.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_swift.cc 2015-03-09 19:51:18.000000000 +0000 @@ -611,13 +611,41 @@ bool RGWSwift::verify_swift_token(RGWRados *store, req_state *s) { + if (!do_verify_swift_token(store, s)) { + return false; + } + + if (!s->swift_user.empty()) { + string subuser; + ssize_t pos = s->swift_user.find(':'); + if (pos < 0) { + subuser = s->swift_user; + } else { + subuser = s->swift_user.substr(pos + 1); + } + s->perm_mask = 0; + map::iterator iter = s->user.subusers.find(subuser); + if (iter != s->user.subusers.end()) { + RGWSubUser& subuser = iter->second; + s->perm_mask = subuser.perm_mask; + } + } else { + s->perm_mask = RGW_PERM_FULL_CONTROL; + } + + return true; + +} + +bool RGWSwift::do_verify_swift_token(RGWRados *store, req_state *s) +{ if (!s->os_auth_token) { int ret = authenticate_temp_url(store, s); return (ret >= 0); } if (strncmp(s->os_auth_token, "AUTH_rgwtk", 10) == 0) { - int ret = rgw_swift_verify_signed_token(s->cct, store, s->os_auth_token, s->user); + int ret = rgw_swift_verify_signed_token(s->cct, store, s->os_auth_token, s->user, &s->swift_user); if (ret < 0) return false; diff -Nru ceph-0.80.8/src/rgw/rgw_swift.h ceph-0.80.9/src/rgw/rgw_swift.h --- ceph-0.80.8/src/rgw/rgw_swift.h 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/rgw/rgw_swift.h 2015-03-09 19:51:18.000000000 +0000 @@ -53,6 +53,7 @@ bool supports_keystone() { return !cct->_conf->rgw_keystone_url.empty(); } + bool do_verify_swift_token(RGWRados *store, req_state *s); protected: int check_revoked(); public: diff -Nru ceph-0.80.8/src/test/cli/crushtool/add-item-in-tree.t ceph-0.80.9/src/test/cli/crushtool/add-item-in-tree.t --- ceph-0.80.8/src/test/cli/crushtool/add-item-in-tree.t 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/add-item-in-tree.t 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,10 @@ + $ crushtool -i "$TESTDIR/tree.template" --add-item 0 1.0 device0 --loc host host0 --loc cluster cluster0 -o one > /dev/null + $ crushtool -i one --add-item 1 1.0 device1 --loc host host0 --loc cluster cluster0 -o two > /dev/null + $ crushtool -i two --add-item 2 1.0 device2 --loc host host0 --loc cluster cluster0 -o tree > /dev/null + $ crushtool -i tree --add-item 3 1.0 device3 --loc host host0 --loc cluster cluster0 -o four > /dev/null + $ crushtool -i four --add-item 4 1.0 device4 --loc host host0 --loc cluster cluster0 -o five > /dev/null + $ crushtool -i five --add-item 5 1.0 device5 --loc host host0 --loc cluster cluster0 -o six > /dev/null + $ crushtool -i six --add-item 6 1.0 device6 --loc host host0 --loc cluster cluster0 -o seven > /dev/null + $ crushtool -i seven --add-item 7 1.0 device7 --loc host host0 --loc cluster cluster0 -o eight > /dev/null + $ crushtool -d eight -o final + $ cmp final "$TESTDIR/tree.template.final" diff -Nru ceph-0.80.8/src/test/cli/crushtool/adjust-item-weight.t ceph-0.80.9/src/test/cli/crushtool/adjust-item-weight.t --- ceph-0.80.8/src/test/cli/crushtool/adjust-item-weight.t 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/adjust-item-weight.t 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,17 @@ + $ crushtool -i "$TESTDIR/simple.template" --add-item 0 1.0 device0 --loc host host0 --loc cluster cluster0 -o one > /dev/null + +# +# add device0 into host=fake, the weight of device0 in host=host0 is 1.0, the weight of device0 in host=fake is 2.0 +# + + $ crushtool -i one --add-item 0 2.0 device0 --loc host fake --loc cluster cluster0 -o two > /dev/null + $ crushtool -d two -o final + $ cmp final "$TESTDIR/simple.template.adj.two" + +# +# update the weight of device0 in host=host0, it will not affect the weight of device0 in host=fake +# + + $ crushtool -i two --update-item 0 3.0 device0 --loc host host0 --loc cluster cluster0 -o three > /dev/null + $ crushtool -d three -o final + $ cmp final "$TESTDIR/simple.template.adj.three" diff -Nru ceph-0.80.8/src/test/cli/crushtool/build.t ceph-0.80.9/src/test/cli/crushtool/build.t --- ceph-0.80.8/src/test/cli/crushtool/build.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/build.t 2015-03-09 19:51:18.000000000 +0000 @@ -52,7 +52,7 @@ # # crush rulesets are generated using the OSDMap helpers # - $ CEPH_ARGS="--debug-crush 0" crushtool --outfn "$map" --build --num_osds 1 root straw 0 + $ CEPH_ARGS="--debug-crush 0" crushtool --outfn "$map" --set-straw-calc-version 0 --build --num_osds 1 root straw 0 $ crushtool -o "$map.txt" -d "$map" $ cat "$map.txt" # begin crush map diff -Nru ceph-0.80.8/src/test/cli/crushtool/help.t ceph-0.80.9/src/test/cli/crushtool/help.t --- ceph-0.80.8/src/test/cli/crushtool/help.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/help.t 2015-03-09 19:51:18.000000000 +0000 @@ -33,6 +33,7 @@ --show utilization-all include zero weight items --show-statistics show chi squared statistics + --show-mappings show mappings --show-bad-mappings show bad mappings --show-choose-tries show choose tries histogram --set-choose-local-tries N diff -Nru ceph-0.80.8/src/test/cli/crushtool/set-choose.t ceph-0.80.9/src/test/cli/crushtool/set-choose.t --- ceph-0.80.8/src/test/cli/crushtool/set-choose.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/set-choose.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,5 +1,6 @@ $ crushtool -c "$TESTDIR/set-choose.crushmap.txt" -o set-choose.crushmap - $ crushtool -i set-choose.crushmap --test --show-statistics + $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --set-straw-calc-version 0 + crushtool successfully built or modified map. Use '-o ' to write it out. rule 0 (choose), x = 0..1023, numrep = 2..3 CRUSH rule 0 x 0 [0,3] CRUSH rule 0 x 1 [0,8] @@ -12306,7 +12307,8 @@ CRUSH rule 5 x 1022 [1,6,4] CRUSH rule 5 x 1023 [3,2,8] rule 5 (chooseleaf-set) num_rep 3 result size == 3:\t1024/1024 (esc) - $ crushtool -i set-choose.crushmap --test --show-statistics --weight 0 0 --weight 1 0 --weight 3 0 --weight 4 0 + $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --weight 0 0 --weight 1 0 --weight 3 0 --weight 4 0 --set-straw-calc-version 0 + crushtool successfully built or modified map. Use '-o ' to write it out. rule 0 (choose), x = 0..1023, numrep = 2..3 CRUSH rule 0 x 0 [2,5] CRUSH rule 0 x 1 [2,8] @@ -24618,7 +24620,8 @@ CRUSH rule 5 x 1022 [2,6,5] CRUSH rule 5 x 1023 [5,2,8] rule 5 (chooseleaf-set) num_rep 3 result size == 3:\t1024/1024 (esc) - $ crushtool -i set-choose.crushmap --test --show-statistics --weight 0 0 --weight 3 0 --weight 4 .5 --weight 5 0 --weight 6 .1 --weight 7 0 + $ crushtool -i set-choose.crushmap --test --show-mappings --show-statistics --weight 0 0 --weight 3 0 --weight 4 .5 --weight 5 0 --weight 6 .1 --weight 7 0 --set-straw-calc-version 0 + crushtool successfully built or modified map. Use '-o ' to write it out. rule 0 (choose), x = 0..1023, numrep = 2..3 CRUSH rule 0 x 0 [2,4] CRUSH rule 0 x 1 [2,8] diff -Nru ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.one ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.one --- ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.one 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.one 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,56 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 1.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 1.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff -Nru ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.three ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.three --- ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.three 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.three 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,64 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 3.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 3.000 +} +host fake { + id -3 # do not change unnecessarily + # weight 2.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 2.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 5.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 3.000 + item fake weight 2.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff -Nru ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.two ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.two --- ceph-0.80.8/src/test/cli/crushtool/simple.template.adj.two 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/simple.template.adj.two 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,64 @@ +# begin crush map + +# devices +device 0 device0 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 1.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 1.000 +} +host fake { + id -3 # do not change unnecessarily + # weight 2.000 + alg straw + hash 0 # rjenkins1 + item device0 weight 2.000 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 3.000 + alg straw + hash 0 # rjenkins1 + item host0 weight 1.000 + item fake weight 2.000 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-bobtail-tunables.t ceph-0.80.9/src/test/cli/crushtool/test-map-bobtail-tunables.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-bobtail-tunables.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-bobtail-tunables.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 + $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-mappings --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 crushtool successfully built or modified map. Use '-o ' to write it out. rule 0 (data), x = 0..1023, numrep = 1..10 CRUSH rule 0 x 0 [36] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-firefly-tunables.t ceph-0.80.9/src/test/cli/crushtool/test-map-firefly-tunables.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-firefly-tunables.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-firefly-tunables.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 --set-chooseleaf-vary-r 1 --weight 12 0 --weight 20 0 --weight 30 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 0 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 1 --set-chooseleaf-vary-r 1 --weight 12 0 --weight 20 0 --weight 30 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 0 (data), x = 0..1023, numrep = 1..10 CRUSH rule 0 x 0 [101] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-indep.t ceph-0.80.9/src/test/cli/crushtool/test-map-indep.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-indep.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-indep.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-indep.crushmap" --test --show-statistics --rule 1 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 2 + $ crushtool -i "$TESTDIR/test-map-indep.crushmap" --test --show-mappings --show-statistics --rule 1 --set-choose-local-tries 0 --set-choose-local-fallback-tries 0 --set-choose-total-tries 50 --set-chooseleaf-descend-once 2 crushtool successfully built or modified map. Use '-o ' to write it out. rule 1 (metadata), x = 0..1023, numrep = 1..10 CRUSH rule 1 x 0 [36] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-legacy-tunables.t ceph-0.80.9/src/test/cli/crushtool/test-map-legacy-tunables.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-legacy-tunables.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-legacy-tunables.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-statistics --rule 0 + $ crushtool -i "$TESTDIR/test-map-a.crushmap" --test --show-mappings --show-statistics --rule 0 rule 0 (data), x = 0..1023, numrep = 1..10 CRUSH rule 0 x 0 [36] CRUSH rule 0 x 1 [876] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-tries-vs-retries.t ceph-0.80.9/src/test/cli/crushtool/test-map-tries-vs-retries.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-tries-vs-retries.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-tries-vs-retries.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-tries-vs-retries.crushmap" --test --show-statistics --weight 0 0 --weight 8 0 + $ crushtool -i "$TESTDIR/test-map-tries-vs-retries.crushmap" --test --show-mappings --show-statistics --weight 0 0 --weight 8 0 rule 0 (replicated_ruleset), x = 0..1023, numrep = 1..10 CRUSH rule 0 x 0 [7] CRUSH rule 0 x 1 [10] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-0.t ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-0.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-0.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-0.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 0 --weight 0 0 --weight 4 0 --weight 9 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 0 --weight 0 0 --weight 4 0 --weight 9 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 3 (delltestrule), x = 0..1023, numrep = 2..4 CRUSH rule 3 x 0 [94,85] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-1.t ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-1.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-1.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-1.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 1 --weight 0 0 --weight 4 0 --weight 9 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 1 --weight 0 0 --weight 4 0 --weight 9 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 3 (delltestrule), x = 0..1023, numrep = 2..4 CRUSH rule 3 x 0 [94,6] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-2.t ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-2.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-2.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-2.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 2 --weight 0 0 --weight 4 0 --weight 9 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 2 --weight 0 0 --weight 4 0 --weight 9 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 3 (delltestrule), x = 0..1023, numrep = 2..4 CRUSH rule 3 x 0 [94,45] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-3.t ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-3.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-3.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-3.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 3 --weight 0 0 --weight 4 0 --weight 9 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 3 --weight 0 0 --weight 4 0 --weight 9 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 3 (delltestrule), x = 0..1023, numrep = 2..4 CRUSH rule 3 x 0 [94,85] diff -Nru ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-4.t ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-4.t --- ceph-0.80.8/src/test/cli/crushtool/test-map-vary-r-4.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/test-map-vary-r-4.t 2015-03-09 19:51:18.000000000 +0000 @@ -1,4 +1,4 @@ - $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-statistics --rule 3 --set-chooseleaf-vary-r 4 --weight 0 0 --weight 4 0 --weight 9 0 + $ crushtool -i "$TESTDIR/test-map-vary-r.crushmap" --test --show-mappings --show-statistics --rule 3 --set-chooseleaf-vary-r 4 --weight 0 0 --weight 4 0 --weight 9 0 crushtool successfully built or modified map. Use '-o ' to write it out. rule 3 (delltestrule), x = 0..1023, numrep = 2..4 CRUSH rule 3 x 0 [94,85] Binary files /tmp/GVO48tsPH5/ceph-0.80.8/src/test/cli/crushtool/tree.template and /tmp/Q8_3upm_Eg/ceph-0.80.9/src/test/cli/crushtool/tree.template differ diff -Nru ceph-0.80.8/src/test/cli/crushtool/tree.template.final ceph-0.80.9/src/test/cli/crushtool/tree.template.final --- ceph-0.80.8/src/test/cli/crushtool/tree.template.final 1970-01-01 00:00:00.000000000 +0000 +++ ceph-0.80.9/src/test/cli/crushtool/tree.template.final 2015-03-09 19:51:18.000000000 +0000 @@ -0,0 +1,70 @@ +# begin crush map + +# devices +device 0 device0 +device 1 device1 +device 2 device2 +device 3 device3 +device 4 device4 +device 5 device5 +device 6 device6 +device 7 device7 + +# types +type 0 device +type 1 host +type 2 cluster + +# buckets +host host0 { + id -2 # do not change unnecessarily + # weight 8.000 + alg tree # do not change pos for existing items unnecessarily + hash 0 # rjenkins1 + item device0 weight 1.000 pos 0 + item device1 weight 1.000 pos 1 + item device2 weight 1.000 pos 2 + item device3 weight 1.000 pos 3 + item device4 weight 1.000 pos 4 + item device5 weight 1.000 pos 5 + item device6 weight 1.000 pos 6 + item device7 weight 1.000 pos 7 +} +cluster cluster0 { + id -1 # do not change unnecessarily + # weight 8.000 + alg tree # do not change pos for existing items unnecessarily + hash 0 # rjenkins1 + item host0 weight 8.000 pos 0 +} + +# rules +rule data { + ruleset 0 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule metadata { + ruleset 1 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} +rule rbd { + ruleset 2 + type replicated + min_size 1 + max_size 10 + step take cluster0 + step chooseleaf firstn 0 type host + step emit +} + +# end crush map diff -Nru ceph-0.80.8/src/test/cli/osdmaptool/create-print.t ceph-0.80.9/src/test/cli/osdmaptool/create-print.t --- ceph-0.80.8/src/test/cli/osdmaptool/create-print.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/osdmaptool/create-print.t 2015-03-09 19:51:18.000000000 +0000 @@ -11,6 +11,7 @@ tunable choose_local_fallback_tries 0 tunable choose_total_tries 50 tunable chooseleaf_descend_once 1 + tunable straw_calc_version 1 # devices device 0 osd.0 diff -Nru ceph-0.80.8/src/test/cli/osdmaptool/create-racks.t ceph-0.80.9/src/test/cli/osdmaptool/create-racks.t --- ceph-0.80.8/src/test/cli/osdmaptool/create-racks.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/osdmaptool/create-racks.t 2015-03-09 19:51:18.000000000 +0000 @@ -10,6 +10,7 @@ tunable choose_local_fallback_tries 0 tunable choose_total_tries 50 tunable chooseleaf_descend_once 1 + tunable straw_calc_version 1 # devices device 0 device0 diff -Nru ceph-0.80.8/src/test/cli/osdmaptool/crush.t ceph-0.80.9/src/test/cli/osdmaptool/crush.t --- ceph-0.80.8/src/test/cli/osdmaptool/crush.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/osdmaptool/crush.t 2015-03-09 19:51:18.000000000 +0000 @@ -6,5 +6,5 @@ osdmaptool: exported crush map to oc $ osdmaptool --import-crush oc myosdmap osdmaptool: osdmap file 'myosdmap' - osdmaptool: imported 486 byte crush map from oc + osdmaptool: imported 487 byte crush map from oc osdmaptool: writing epoch 3 to myosdmap diff -Nru ceph-0.80.8/src/test/cli/osdmaptool/help.t ceph-0.80.9/src/test/cli/osdmaptool/help.t --- ceph-0.80.8/src/test/cli/osdmaptool/help.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/osdmaptool/help.t 2015-03-09 19:51:18.000000000 +0000 @@ -4,6 +4,7 @@ --export-crush write osdmap's crush map to --import-crush replace osdmap's crush map with --test-map-pgs [--pool ] map all pgs + --test-map-pgs-dump [--pool ] map all pgs --mark-up-in mark osds up and in (but do not persist) --clear-temp clear pg_temp and primary_temp --test-random do random placements diff -Nru ceph-0.80.8/src/test/cli/osdmaptool/missing-argument.t ceph-0.80.9/src/test/cli/osdmaptool/missing-argument.t --- ceph-0.80.8/src/test/cli/osdmaptool/missing-argument.t 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/cli/osdmaptool/missing-argument.t 2015-03-09 19:51:18.000000000 +0000 @@ -4,6 +4,7 @@ --export-crush write osdmap's crush map to --import-crush replace osdmap's crush map with --test-map-pgs [--pool ] map all pgs + --test-map-pgs-dump [--pool ] map all pgs --mark-up-in mark osds up and in (but do not persist) --clear-temp clear pg_temp and primary_temp --test-random do random placements diff -Nru ceph-0.80.8/src/test/crush/indep.cc ceph-0.80.9/src/test/crush/indep.cc --- ceph-0.80.8/src/test/crush/indep.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/crush/indep.cc 2015-03-09 19:51:18.000000000 +0000 @@ -51,18 +51,21 @@ } } } - - crush_rule *rule = crush_make_rule(4, 0, 123, 1, 20); - assert(rule); - crush_rule_set_step(rule, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0); - crush_rule_set_step(rule, 1, CRUSH_RULE_TAKE, rootno, 0); - crush_rule_set_step(rule, 2, - CRUSH_RULE_CHOOSELEAF_INDEP, - CRUSH_CHOOSE_N, - 1); - crush_rule_set_step(rule, 3, CRUSH_RULE_EMIT, 0, 0); - int rno = crush_add_rule(c->crush, rule, -1); - c->set_rule_name(rno, "data"); + int ret; + int ruleno = 0; + int ruleset = 0; + ruleno = ruleset; + ret = c->add_rule(4, ruleset, 123, 1, 20, ruleno); + assert(ret == ruleno); + ret = c->set_rule_step(ruleno, 0, CRUSH_RULE_SET_CHOOSELEAF_TRIES, 10, 0); + assert(ret == 0); + ret = c->set_rule_step(ruleno, 1, CRUSH_RULE_TAKE, rootno, 0); + assert(ret == 0); + ret = c->set_rule_step(ruleno, 2, CRUSH_RULE_CHOOSELEAF_INDEP, CRUSH_CHOOSE_N, 1); + assert(ret == 0); + ret = c->set_rule_step(ruleno, 3, CRUSH_RULE_EMIT, 0, 0); + assert(ret == 0); + c->set_rule_name(ruleno, "data"); if (false) { Formatter *f = new_formatter("json-pretty"); @@ -140,7 +143,7 @@ c->dump_tree(weight, &cout, NULL); // need more retries to get 9/9 hosts for x in 0..99 - c->crush->choose_total_tries = 100; + c->set_choose_total_tries(100); for (int x = 0; x < 100; ++x) { vector out; c->do_rule(0, x, out, 9, weight); @@ -166,7 +169,7 @@ weight[i] = 0; c->dump_tree(weight, &cout, NULL); - c->crush->choose_total_tries = 100; + c->set_choose_total_tries(100); for (int x = 0; x < 100; ++x) { vector out; c->do_rule(0, x, out, 7, weight); @@ -185,7 +188,7 @@ TEST(CRUSH, indep_out_progressive) { CrushWrapper *c = build_indep_map(g_ceph_context, 3, 3, 3); - c->crush->choose_total_tries = 100; + c->set_choose_total_tries(100); vector<__u32> tweight(c->get_max_devices(), 0x10000); c->dump_tree(tweight, &cout, NULL); diff -Nru ceph-0.80.8/src/test/crush/TestCrushWrapper.cc ceph-0.80.9/src/test/crush/TestCrushWrapper.cc --- ceph-0.80.8/src/test/crush/TestCrushWrapper.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/crush/TestCrushWrapper.cc 2015-03-09 19:51:18.000000000 +0000 @@ -67,6 +67,166 @@ delete c; } +TEST(CrushWrapper, straw_zero) { + // zero weight items should have no effect on placement. + + CrushWrapper *c = new CrushWrapper; + const int ROOT_TYPE = 1; + c->set_type_name(ROOT_TYPE, "root"); + const int OSD_TYPE = 0; + c->set_type_name(OSD_TYPE, "osd"); + + int n = 5; + int items[n], weights[n]; + for (int i=0; i set_max_devices(n); + + string root_name0("root0"); + int root0; + EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n, items, weights, &root0)); + EXPECT_EQ(0, c->set_item_name(root0, root_name0)); + + string name0("rule0"); + int ruleset0 = c->add_simple_ruleset(name0, root_name0, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(0, ruleset0); + + string root_name1("root1"); + int root1; + EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n-1, items, weights, &root1)); + EXPECT_EQ(0, c->set_item_name(root1, root_name1)); + + string name1("rule1"); + int ruleset1 = c->add_simple_ruleset(name1, root_name1, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(1, ruleset1); + + vector reweight(n, 0x10000); + for (int i=0; i<10000; ++i) { + vector out0, out1; + c->do_rule(ruleset0, i, out0, 1, reweight); + ASSERT_EQ(1u, out0.size()); + c->do_rule(ruleset1, i, out1, 1, reweight); + ASSERT_EQ(1u, out1.size()); + ASSERT_EQ(out0[0], out1[0]); + //cout << i << "\t" << out0 << "\t" << out1 << std::endl; + } +} + +TEST(CrushWrapper, straw_same) { + // items with the same weight should map about the same as items + // with very similar weights. + // + // give the 0 vector a paired stair pattern, with dup weights. note + // that the original straw flaw does not appear when there are 2 of + // the initial weight, but it does when there is just 1. + // + // give the 1 vector a similar stair pattern, but make the same + // steps weights slightly different (no dups). this works. + // + // compare the result and verify that the resulting mapping is + // almost identical. + + CrushWrapper *c = new CrushWrapper; + const int ROOT_TYPE = 1; + c->set_type_name(ROOT_TYPE, "root"); + const int OSD_TYPE = 0; + c->set_type_name(OSD_TYPE, "osd"); + + int n = 10; + int items[n], weights[n]; + for (int i=0; i set_max_devices(n); + + string root_name0("root0"); + int root0; + EXPECT_EQ(0, c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n, items, weights, &root0)); + EXPECT_EQ(0, c->set_item_name(root0, root_name0)); + + string name0("rule0"); + int ruleset0 = c->add_simple_ruleset(name0, root_name0, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(0, ruleset0); + + for (int i=0; i add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, n, items, weights, &root1)); + EXPECT_EQ(0, c->set_item_name(root1, root_name1)); + + string name1("rule1"); + int ruleset1 = c->add_simple_ruleset(name1, root_name1, "osd", + "firstn", pg_pool_t::TYPE_REPLICATED); + EXPECT_EQ(1, ruleset1); + + if (0) { + crush_bucket_straw *sb0 = reinterpret_cast(c->get_crush_map()->buckets[-1-root0]); + crush_bucket_straw *sb1 = reinterpret_cast(c->get_crush_map()->buckets[-1-root1]); + + for (int i=0; iitem_weights[i] + << "\t" << sb1->item_weights[i] + << "\t" + << "\t" << sb0->straws[i] + << "\t" << sb1->straws[i] + << std::endl; + } + } + + if (0) { + JSONFormatter jf(true); + jf.open_object_section("crush"); + c->dump(&jf); + jf.close_section(); + jf.flush(cout); + } + + vector sum0(n, 0), sum1(n, 0); + vector reweight(n, 0x10000); + int different = 0; + int max = 100000; + for (int i=0; i out0, out1; + c->do_rule(ruleset0, i, out0, 1, reweight); + ASSERT_EQ(1u, out0.size()); + c->do_rule(ruleset1, i, out1, 1, reweight); + ASSERT_EQ(1u, out1.size()); + sum0[out0[0]]++; + sum1[out1[0]]++; + if (out0[0] != out1[0]) + different++; + } + for (int i=0; iset_type_name(ROOT_TYPE, "root"); + const int HOST_TYPE = 1; + c->set_type_name(HOST_TYPE, "host"); + const int OSD_TYPE = 0; + c->set_type_name(OSD_TYPE, "osd"); + + int rootno; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + ROOT_TYPE, 0, NULL, NULL, &rootno); + c->set_item_name(rootno, "default"); + + const string HOST0("host0"); + int host0; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + HOST_TYPE, 0, NULL, NULL, &host0); + c->set_item_name(host0, HOST0); + + const string FAKE("fake"); + int hostfake; + c->add_bucket(0, CRUSH_BUCKET_STRAW, CRUSH_HASH_RJENKINS1, + HOST_TYPE, 0, NULL, NULL, &hostfake); + c->set_item_name(hostfake, FAKE); + + int item = 0; + + // construct crush map + + { + map loc; + loc["host"] = "host0"; + float host_weight = 2.0; + int bucket_id = 0; + + item = 0; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + item = 1; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + + bucket_id = c->get_item_id("host0"); + EXPECT_EQ(true, c->bucket_exists(bucket_id)); + EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id)); + + } + + { + map loc; + loc["host"] = "fake"; + float host_weight = 2.0; + int bucket_id = 0; + + item = 0; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + item = 1; + EXPECT_EQ(0, c->insert_item(g_ceph_context, item, 1.0, + "osd." + stringify(item), loc)); + + bucket_id = c->get_item_id("fake"); + EXPECT_EQ(true, c->bucket_exists(bucket_id)); + EXPECT_EQ(host_weight, c->get_bucket_weightf(bucket_id)); + } + + // + // When there is: + // + // default --> host0 --> osd.0 1.0 + // | | + // | +-> osd.1 1.0 + // | + // +-> fake --> osd.0 1.0 + // | + // +-> osd.1 1.0 + // + // Trying to adjust osd.0 weight to 2.0 in all buckets + // Trying to adjust osd.1 weight to 2.0 in host=fake + // + // So the crush map will be: + // + // default --> host0 --> osd.0 2.0 + // | | + // | +-> osd.1 1.0 + // | + // +-> fake --> osd.0 2.0 + // | + // +-> osd.1 2.0 + // + + float original_weight = 1.0; + float modified_weight = 2.0; + map loc_one, loc_two; + loc_one["host"] = "host0"; + loc_two["host"] = "fake"; + + item = 0; + EXPECT_EQ(2, c->adjust_item_weightf(g_ceph_context, item, modified_weight)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_one)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two)); + + item = 1; + EXPECT_EQ(1, c->adjust_item_weightf_in_loc(g_ceph_context, item, modified_weight, loc_two)); + EXPECT_EQ(original_weight, c->get_item_weightf_in_loc(item, loc_one)); + EXPECT_EQ(modified_weight, c->get_item_weightf_in_loc(item, loc_two)); +} + TEST(CrushWrapper, insert_item) { CrushWrapper *c = new CrushWrapper; diff -Nru ceph-0.80.8/src/test/librados/misc.cc ceph-0.80.9/src/test/librados/misc.cc --- ceph-0.80.8/src/test/librados/misc.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/librados/misc.cc 2015-03-09 19:51:18.000000000 +0000 @@ -299,7 +299,8 @@ bufferlist::iterator iter = bl.begin(); uint64_t all_features; ::decode(all_features, iter); - ASSERT_EQ(all_features, (uint64_t)RBD_FEATURES_ALL); + // make sure *some* features are specified; don't care which ones + ASSERT_NE(all_features, 0); } TEST_F(LibRadosMiscPP, ExecPP) { @@ -311,7 +312,8 @@ bufferlist::iterator iter = out.begin(); uint64_t all_features; ::decode(all_features, iter); - ASSERT_EQ(all_features, (uint64_t)RBD_FEATURES_ALL); + // make sure *some* features are specified; don't care which ones + ASSERT_NE(all_features, 0); } TEST_F(LibRadosMiscPP, Operate1PP) { diff -Nru ceph-0.80.8/src/test/librados/snapshots.cc ceph-0.80.9/src/test/librados/snapshots.cc --- ceph-0.80.8/src/test/librados/snapshots.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/librados/snapshots.cc 2015-03-09 19:51:18.000000000 +0000 @@ -145,6 +145,24 @@ EXPECT_EQ(0, ioctx.snap_remove("snapfoo")); } +TEST_F(LibRadosSnapshotsPP, SnapCreateRemovePP) { + // reproduces http://tracker.ceph.com/issues/10262 + bufferlist bl; + bl.append("foo"); + ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0)); + ASSERT_EQ(0, ioctx.snap_create("snapfoo")); + ASSERT_EQ(0, ioctx.remove("foo")); + ASSERT_EQ(0, ioctx.snap_create("snapbar")); + + librados::ObjectWriteOperation *op = new librados::ObjectWriteOperation(); + op->create(false); + op->remove(); + ASSERT_EQ(0, ioctx.operate("foo", op)); + + EXPECT_EQ(0, ioctx.snap_remove("snapfoo")); + EXPECT_EQ(0, ioctx.snap_remove("snapbar")); +} + TEST_F(LibRadosSnapshotsSelfManaged, Snap) { std::vector my_snaps; my_snaps.push_back(-2); diff -Nru ceph-0.80.8/src/test/librbd/test_librbd.cc ceph-0.80.9/src/test/librbd/test_librbd.cc --- ceph-0.80.8/src/test/librbd/test_librbd.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/test/librbd/test_librbd.cc 2015-03-09 19:51:18.000000000 +0000 @@ -21,6 +21,7 @@ #include "global/global_context.h" #include "global/global_init.h" #include "common/ceph_argparse.h" +#include "common/config.h" #include "gtest/gtest.h" @@ -40,6 +41,8 @@ #include "include/interval_set.h" #include "include/stringify.h" +#include + using namespace std; static int get_features(bool *old_format, uint64_t *features) @@ -67,6 +70,8 @@ { if (old_format) { return rbd_create(ioctx, name, size, order); + } else if ((features & RBD_FEATURE_STRIPINGV2) != 0) { + return rbd_create3(ioctx, name, size, features, order, 65536, 16); } else { return rbd_create2(ioctx, name, size, features, order); } @@ -1857,6 +1862,107 @@ rados_ioctx_destroy(ioctx); ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); +} + +TEST(LibRBD, LargeCacheRead) +{ + if (!g_conf->rbd_cache) { + std::cout << "SKIPPING due to disabled cache" << std::endl; + return; + } + + rados_t cluster; + rados_ioctx_t ioctx; + string pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool(pool_name, &cluster)); + rados_ioctx_create(cluster, pool_name.c_str(), &ioctx); + + uint64_t orig_cache_size = g_conf->rbd_cache_size; + g_conf->set_val("rbd_cache_size", "16777216"); + BOOST_SCOPE_EXIT( (orig_cache_size) ) { + g_conf->set_val("rbd_cache_size", stringify(orig_cache_size).c_str()); + } BOOST_SCOPE_EXIT_END; + ASSERT_EQ(16777216, g_conf->rbd_cache_size); + + rbd_image_t image; + int order = 0; + const char *name = "testimg"; + uint64_t size = g_conf->rbd_cache_size + 1; + + ASSERT_EQ(0, create_image(ioctx, name, size, &order)); + ASSERT_EQ(0, rbd_open(ioctx, name, &image, NULL)); + + std::string buffer(1 << order, '1'); + for (size_t offs = 0; offs < size; offs += buffer.size()) { + size_t len = std::min(buffer.size(), size - offs); + ASSERT_EQ(static_cast(len), + rbd_write(image, offs, len, buffer.c_str())); + } + + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + buffer.resize(size); + ASSERT_EQ(static_cast(size-1024), rbd_read(image, 1024, size, &buffer[0])); + + ASSERT_EQ(0, rbd_close(image)); + + rados_ioctx_destroy(ioctx); + ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); +} + +TEST(LibRBD, TestPendingAio) +{ + rados_t cluster; + rados_ioctx_t ioctx; + string pool_name = get_temp_pool_name(); + ASSERT_EQ("", create_one_pool(pool_name, &cluster)); + rados_ioctx_create(cluster, pool_name.c_str(), &ioctx); + + int features = RBD_FEATURE_LAYERING; + rbd_image_t image; + int order = 0; + + std::string name = "testimg"; + + uint64_t size = 4 << 20; + ASSERT_EQ(0, create_image_full(ioctx, name.c_str(), size, &order, + false, features)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + + char test_data[TEST_IO_SIZE]; + for (size_t i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + + size_t num_aios = 256; + rbd_completion_t comps[num_aios]; + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_EQ(0, rbd_aio_write(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_wait_for_complete(comps[i])); + rbd_aio_release(comps[i]); + } + ASSERT_EQ(0, rbd_invalidate_cache(image)); + + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(0, rbd_aio_create_completion(NULL, NULL, &comps[i])); + uint64_t offset = rand() % (size - TEST_IO_SIZE); + ASSERT_LE(0, rbd_aio_read(image, offset, TEST_IO_SIZE, test_data, + comps[i])); + } + + ASSERT_EQ(0, rbd_close(image)); + for (size_t i = 0; i < num_aios; ++i) { + ASSERT_EQ(1, rbd_aio_is_complete(comps[i])); + rbd_aio_release(comps[i]); + } + + rados_ioctx_destroy(ioctx); + ASSERT_EQ(0, destroy_one_pool(pool_name, &cluster)); } int main(int argc, char **argv) diff -Nru ceph-0.80.8/src/tools/crushtool.cc ceph-0.80.9/src/tools/crushtool.cc --- ceph-0.80.8/src/tools/crushtool.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/tools/crushtool.cc 2015-03-09 19:51:18.000000000 +0000 @@ -118,6 +118,7 @@ cout << " --show utilization-all\n"; cout << " include zero weight items\n"; cout << " --show-statistics show chi squared statistics\n"; + cout << " --show-mappings show mappings\n"; cout << " --show-bad-mappings show bad mappings\n"; cout << " --show-choose-tries show choose tries histogram\n"; cout << " --set-choose-local-tries N\n"; @@ -190,6 +191,7 @@ int choose_total_tries = -1; int chooseleaf_descend_once = -1; int chooseleaf_vary_r = -1; + int straw_calc_version = -1; CrushWrapper crush; @@ -233,6 +235,9 @@ } else if (ceph_argparse_flag(args, i, "--show_statistics", (char*)NULL)) { display = true; tester.set_output_statistics(true); + } else if (ceph_argparse_flag(args, i, "--show_mappings", (char*)NULL)) { + display = true; + tester.set_output_mappings(true); } else if (ceph_argparse_flag(args, i, "--show_bad_mappings", (char*)NULL)) { display = true; tester.set_output_bad_mappings(true); @@ -263,6 +268,9 @@ } else if (ceph_argparse_withint(args, i, &chooseleaf_vary_r, &err, "--set_chooseleaf_vary_r", (char*)NULL)) { adjust = true; + } else if (ceph_argparse_withint(args, i, &straw_calc_version, &err, + "--set_straw_calc_version", (char*)NULL)) { + adjust = true; } else if (ceph_argparse_flag(args, i, "--reweight", (char*)NULL)) { reweight = true; } else if (ceph_argparse_withint(args, i, &add_item, &err, "--add_item", (char*)NULL)) { @@ -581,10 +589,8 @@ dout(2) << " item " << items[j] << " weight " << weights[j] << dendl; } - crush_bucket *b = crush_make_bucket(buckettype, CRUSH_HASH_DEFAULT, type, j, items, weights); - assert(b); int id; - int r = crush_add_bucket(crush.crush, 0, b, &id); + int r = crush.add_bucket(0, buckettype, CRUSH_HASH_DEFAULT, type, j, items, weights, &id); if (r < 0) { dout(2) << "Couldn't add bucket: " << cpp_strerror(r) << dendl; } @@ -712,6 +718,10 @@ crush.set_chooseleaf_vary_r(chooseleaf_vary_r); modified = true; } + if (straw_calc_version >= 0) { + crush.set_straw_calc_version(straw_calc_version); + modified = true; + } if (modified) { crush.finalize(); diff -Nru ceph-0.80.8/src/tools/osdmaptool.cc ceph-0.80.9/src/tools/osdmaptool.cc --- ceph-0.80.8/src/tools/osdmaptool.cc 2015-01-14 18:15:23.000000000 +0000 +++ ceph-0.80.9/src/tools/osdmaptool.cc 2015-03-09 19:51:18.000000000 +0000 @@ -35,6 +35,7 @@ cout << " --export-crush write osdmap's crush map to " << std::endl; cout << " --import-crush replace osdmap's crush map with " << std::endl; cout << " --test-map-pgs [--pool ] map all pgs" << std::endl; + cout << " --test-map-pgs-dump [--pool ] map all pgs" << std::endl; cout << " --mark-up-in mark osds up and in (but do not persist)" << std::endl; cout << " --clear-temp clear pg_temp and primary_temp" << std::endl; cout << " --test-random do random placements" << std::endl; @@ -75,6 +76,7 @@ bool mark_up_in = false; bool clear_temp = false; bool test_map_pgs = false; + bool test_map_pgs_dump = false; bool test_random = false; std::string val; @@ -104,6 +106,8 @@ clear_temp = true; } else if (ceph_argparse_flag(args, i, "--test-map-pgs", (char*)NULL)) { test_map_pgs = true; + } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump", (char*)NULL)) { + test_map_pgs_dump = true; } else if (ceph_argparse_flag(args, i, "--test-random", (char*)NULL)) { test_random = true; } else if (ceph_argparse_flag(args, i, "--clobber", (char*)NULL)) { @@ -313,7 +317,7 @@ << ") acting (" << acting << ", p" << acting_primary << ")" << std::endl; } - if (test_map_pgs) { + if (test_map_pgs || test_map_pgs_dump) { if (pool != -1 && !osdmap.have_pg_pool(pool)) { cerr << "There is no pool " << pool << std::endl; exit(1); @@ -348,6 +352,9 @@ } size[osds.size()]++; + if (test_map_pgs_dump) + cout << pgid << "\t" << osds << "\t" << primary << std::endl; + for (unsigned i=0; i