diff -Nru drbd8-8.3.7/.filelist drbd8-8.4.1+git55a81dc~cmd1/.filelist
--- drbd8-8.3.7/.filelist	2010-01-13 16:17:27.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.filelist	1970-01-01 00:00:00.000000000 +0000
@@ -1,107 +0,0 @@
-drbd-8.3.7/.gitignore
-drbd-8.3.7/COPYING
-drbd-8.3.7/ChangeLog
-drbd-8.3.7/Makefile.in
-drbd-8.3.7/README
-drbd-8.3.7/autogen.sh
-drbd-8.3.7/benchmark/Makefile
-drbd-8.3.7/benchmark/README
-drbd-8.3.7/benchmark/dm.c
-drbd-8.3.7/benchmark/io-latency-test.c
-drbd-8.3.7/configure.ac
-drbd-8.3.7/documentation/Makefile.in
-drbd-8.3.7/documentation/Makefile.lang
-drbd-8.3.7/documentation/aspell.en.per
-drbd-8.3.7/documentation/drbd.conf.xml
-drbd-8.3.7/documentation/drbd.xml
-drbd-8.3.7/documentation/drbdadm.xml
-drbd-8.3.7/documentation/drbddisk.xml
-drbd-8.3.7/documentation/drbdmeta.xml
-drbd-8.3.7/documentation/drbdsetup.xml
-drbd-8.3.7/drbd-km.spec.in
-drbd-8.3.7/drbd.spec.in
-drbd-8.3.7/drbd/Kconfig
-drbd-8.3.7/drbd/Makefile
-drbd-8.3.7/drbd/Makefile-2.6
-drbd-8.3.7/drbd/cn_queue.c
-drbd-8.3.7/drbd/connector.c
-drbd-8.3.7/drbd/drbd_actlog.c
-drbd-8.3.7/drbd/drbd_bitmap.c
-drbd-8.3.7/drbd/drbd_int.h
-drbd-8.3.7/drbd/drbd_main.c
-drbd-8.3.7/drbd/drbd_nl.c
-drbd-8.3.7/drbd/drbd_proc.c
-drbd-8.3.7/drbd/drbd_receiver.c
-drbd-8.3.7/drbd/drbd_req.c
-drbd-8.3.7/drbd/drbd_req.h
-drbd-8.3.7/drbd/drbd_strings.c
-drbd-8.3.7/drbd/drbd_tracing.c
-drbd-8.3.7/drbd/drbd_tracing.h
-drbd-8.3.7/drbd/drbd_vli.h
-drbd-8.3.7/drbd/drbd_worker.c
-drbd-8.3.7/drbd/drbd_wrappers.h
-drbd-8.3.7/drbd/linux/connector.h
-drbd-8.3.7/drbd/linux/drbd.h
-drbd-8.3.7/drbd/linux/drbd_config.h
-drbd-8.3.7/drbd/linux/drbd_limits.h
-drbd-8.3.7/drbd/linux/drbd_nl.h
-drbd-8.3.7/drbd/linux/drbd_tag_magic.h
-drbd-8.3.7/drbd/linux/hardirq.h
-drbd-8.3.7/drbd/linux/lru_cache.h
-drbd-8.3.7/drbd/linux/memcontrol.h
-drbd-8.3.7/drbd/linux/mutex.h
-drbd-8.3.7/drbd/linux/tracepoint.h
-drbd-8.3.7/drbd/lru_cache.c
-drbd-8.3.7/scripts/Makefile.in
-drbd-8.3.7/scripts/README
-drbd-8.3.7/scripts/adjust_drbd_config_h.sh
-drbd-8.3.7/scripts/block-drbd
-drbd-8.3.7/scripts/crm-fence-peer.sh
-drbd-8.3.7/scripts/drbd
-drbd-8.3.7/scripts/drbd-overview.pl
-drbd-8.3.7/scripts/drbd.conf
-drbd-8.3.7/scripts/drbd.conf.example
-drbd-8.3.7/scripts/drbd.gentoo
-drbd-8.3.7/scripts/drbd.metadata.rhcs
-drbd-8.3.7/scripts/drbd.ocf
-drbd-8.3.7/scripts/drbd.rules
-drbd-8.3.7/scripts/drbd.sh.rhcs
-drbd-8.3.7/scripts/drbdadm.bash_completion
-drbd-8.3.7/scripts/drbddisk
-drbd-8.3.7/scripts/drbdupper
-drbd-8.3.7/scripts/get_uts_release.sh
-drbd-8.3.7/scripts/global_common.conf
-drbd-8.3.7/scripts/notify.sh
-drbd-8.3.7/scripts/outdate-peer.sh
-drbd-8.3.7/scripts/patch-kernel
-drbd-8.3.7/scripts/pretty-proc-drbd.sh
-drbd-8.3.7/scripts/snapshot-resync-target-lvm.sh
-drbd-8.3.7/scripts/unsnapshot-resync-target-lvm.sh
-drbd-8.3.7/user/Makefile.in
-drbd-8.3.7/user/drbd_endian.h
-drbd-8.3.7/user/drbdadm.h
-drbd-8.3.7/user/drbdadm_adjust.c
-drbd-8.3.7/user/drbdadm_main.c
-drbd-8.3.7/user/drbdadm_minor_table.c
-drbd-8.3.7/user/drbdadm_parser.c
-drbd-8.3.7/user/drbdadm_parser.h
-drbd-8.3.7/user/drbdadm_scanner.fl
-drbd-8.3.7/user/drbdadm_usage_cnt.c
-drbd-8.3.7/user/drbdmeta.c
-drbd-8.3.7/user/drbdmeta_parser.h
-drbd-8.3.7/user/drbdmeta_scanner.fl
-drbd-8.3.7/user/drbdsetup.c
-drbd-8.3.7/user/drbdtool_common.c
-drbd-8.3.7/user/drbdtool_common.h
-drbd-8.3.7/user/unaligned.h
-drbd-8.3.7/documentation/drbdsetup.8
-drbd-8.3.7/documentation/drbd.conf.5
-drbd-8.3.7/documentation/drbd.8
-drbd-8.3.7/documentation/drbdadm.8
-drbd-8.3.7/documentation/drbdmeta.8
-drbd-8.3.7/documentation/drbddisk.8
-drbd-8.3.7/drbd_config.h
-drbd-8.3.7/drbd/drbd_buildtag.c
-drbd-8.3.7/.filelist
-drbd-8.3.7/configure
-drbd-8.3.7/user/config.h.in
diff -Nru drbd8-8.3.7/.git/COMMIT_EDITMSG drbd8-8.4.1+git55a81dc~cmd1/.git/COMMIT_EDITMSG
--- drbd8-8.3.7/.git/COMMIT_EDITMSG	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/COMMIT_EDITMSG	2012-09-03 21:31:23.000000000 +0000
@@ -0,0 +1,25 @@
+drbd: fix binary-incompatible build on some platforms
+
+The type of the make_request_fn changed from int to void in upstream
+kernel. To be compatible with both older and newer kernels, our compat.h
+auto-detect magic tries to figure this out.
+
+However, at least on Ubuntu Lucid, the compiler ignores the
+pragma -Werror we rely on there, leading to us using void,
+where the rest of the kernel expects an int return code.
+
+This caused interesting BUGs like (short version):
+ BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
+ IP: [<ffffffff81439544>] clone_endio+0x34/0xe0
+ Pid: 3517, comm: kdmflush Not tainted 2.6.32-38-server #83-Ubuntu X8DTN
+ Call Trace:
+  [<ffffffff81173d2d>] bio_endio+0x1d/0x40
+  [<ffffffffa03a87fb>] drbd_make_request+0x34b/0x350 [drbd]
+  [<ffffffff812a22a1>] generic_make_request+0x1b1/0x4f0
+  [<ffffffff81438fcd>] __map_bio+0xad/0x130
+  [<ffffffff814394fd>] __clone_and_map+0x4ad/0x4c0
+  [<ffffffff8143a5d8>] __split_and_process_bio+0x108/0x190
+  [<ffffffff8143a6b6>] dm_flush+0x56/0x70
+
+Fix: in compat/tests/have_void_make_request.c, don't rely on -Werror, but
+ BUILD_BUG_ON(!(__same_type(&drbd_make_request, make_request_fn)));
diff -Nru drbd8-8.3.7/.git/FETCH_HEAD drbd8-8.4.1+git55a81dc~cmd1/.git/FETCH_HEAD
--- drbd8-8.3.7/.git/FETCH_HEAD	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/FETCH_HEAD	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1,5 @@
+56ed9398f8e74a8b1b1e3e4b8770acc58fd1cd17	not-for-merge	branch 'master' of git://git.drbd.org/drbd-8.4
+e5b8bd4b2b2bfb746de97fd4881feefb47f0b8fc	not-for-merge	branch 'zero-copy-receive' of git://git.drbd.org/drbd-8.4
+7a59a5b69271df94c1f10b5a4dad48a5a3b1aea5	not-for-merge	tag 'drbd-8.4.2rc1' of git://git.drbd.org/drbd-8.4
+5ab1ece053485cf9b9b3e775fe58a746ed1c20df	not-for-merge	tag 'drbd-8.4.2rc2' of git://git.drbd.org/drbd-8.4
+ac15f759f6055a930b3aad30b57781315b5abbef	not-for-merge	tag 'drbd-8.4.2rc3' of git://git.drbd.org/drbd-8.4
diff -Nru drbd8-8.3.7/.git/HEAD drbd8-8.4.1+git55a81dc~cmd1/.git/HEAD
--- drbd8-8.3.7/.git/HEAD	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/HEAD	2012-09-03 21:31:23.000000000 +0000
@@ -0,0 +1 @@
+e3169387b068d825dd433287f7fd7ba48ed07919
diff -Nru drbd8-8.3.7/.git/config drbd8-8.4.1+git55a81dc~cmd1/.git/config
--- drbd8-8.3.7/.git/config	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/config	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+[core]
+	repositoryformatversion = 0
+	filemode = true
+	bare = false
+	logallrefupdates = true
+[remote "origin"]
+	fetch = +refs/heads/*:refs/remotes/origin/*
+	url = git://git.drbd.org/drbd-8.4.git
+[branch "master"]
+	remote = origin
+	merge = refs/heads/master
diff -Nru drbd8-8.3.7/.git/description drbd8-8.4.1+git55a81dc~cmd1/.git/description
--- drbd8-8.3.7/.git/description	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/description	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1 @@
+Unnamed repository; edit this file 'description' to name the repository.
diff -Nru drbd8-8.3.7/.git/hooks/applypatch-msg.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/applypatch-msg.sample
--- drbd8-8.3.7/.git/hooks/applypatch-msg.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/applypatch-msg.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,15 @@
+#!/bin/sh
+#
+# An example hook script to check the commit log message taken by
+# applypatch from an e-mail message.
+#
+# The hook should exit with non-zero status after issuing an
+# appropriate message if it wants to stop the commit.  The hook is
+# allowed to edit the commit message file.
+#
+# To enable this hook, rename this file to "applypatch-msg".
+
+. git-sh-setup
+test -x "$GIT_DIR/hooks/commit-msg" &&
+	exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
+:
diff -Nru drbd8-8.3.7/.git/hooks/commit-msg.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/commit-msg.sample
--- drbd8-8.3.7/.git/hooks/commit-msg.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/commit-msg.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# An example hook script to check the commit log message.
+# Called by git-commit with one argument, the name of the file
+# that has the commit message.  The hook should exit with non-zero
+# status after issuing an appropriate message if it wants to stop the
+# commit.  The hook is allowed to edit the commit message file.
+#
+# To enable this hook, rename this file to "commit-msg".
+
+# Uncomment the below to add a Signed-off-by line to the message.
+# Doing this in a hook is a bad idea in general, but the prepare-commit-msg
+# hook is more suited to it.
+#
+# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
+# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
+
+# This example catches duplicate Signed-off-by lines.
+
+test "" = "$(grep '^Signed-off-by: ' "$1" |
+	 sort | uniq -c | sed -e '/^[ 	]*1[ 	]/d')" || {
+	echo >&2 Duplicate Signed-off-by lines.
+	exit 1
+}
diff -Nru drbd8-8.3.7/.git/hooks/post-commit.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-commit.sample
--- drbd8-8.3.7/.git/hooks/post-commit.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-commit.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# An example hook script that is called after a successful
+# commit is made.
+#
+# To enable this hook, rename this file to "post-commit".
+
+: Nothing
diff -Nru drbd8-8.3.7/.git/hooks/post-receive.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-receive.sample
--- drbd8-8.3.7/.git/hooks/post-receive.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-receive.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,15 @@
+#!/bin/sh
+#
+# An example hook script for the "post-receive" event.
+#
+# The "post-receive" script is run after receive-pack has accepted a pack
+# and the repository has been updated.  It is passed arguments in through
+# stdin in the form
+#  <oldrev> <newrev> <refname>
+# For example:
+#  aa453216d1b3e49e7f6f98441fa56946ddcd6a20 68f7abf4e6f922807889f52bc043ecd31b79f814 refs/heads/master
+#
+# see contrib/hooks/ for a sample, or uncomment the next line and
+# rename the file to "post-receive".
+
+#. /usr/share/doc/git-core/contrib/hooks/post-receive-email
diff -Nru drbd8-8.3.7/.git/hooks/post-update.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-update.sample
--- drbd8-8.3.7/.git/hooks/post-update.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/post-update.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,8 @@
+#!/bin/sh
+#
+# An example hook script to prepare a packed repository for use over
+# dumb transports.
+#
+# To enable this hook, rename this file to "post-update".
+
+exec git-update-server-info
diff -Nru drbd8-8.3.7/.git/hooks/pre-applypatch.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-applypatch.sample
--- drbd8-8.3.7/.git/hooks/pre-applypatch.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-applypatch.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,14 @@
+#!/bin/sh
+#
+# An example hook script to verify what is about to be committed
+# by applypatch from an e-mail message.
+#
+# The hook should exit with non-zero status after issuing an
+# appropriate message if it wants to stop the commit.
+#
+# To enable this hook, rename this file to "pre-applypatch".
+
+. git-sh-setup
+test -x "$GIT_DIR/hooks/pre-commit" &&
+	exec "$GIT_DIR/hooks/pre-commit" ${1+"$@"}
+:
diff -Nru drbd8-8.3.7/.git/hooks/pre-commit.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-commit.sample
--- drbd8-8.3.7/.git/hooks/pre-commit.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-commit.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+# An example hook script to verify what is about to be committed.
+# Called by git-commit with no arguments.  The hook should
+# exit with non-zero status after issuing an appropriate message if
+# it wants to stop the commit.
+#
+# To enable this hook, rename this file to "pre-commit".
+
+if git-rev-parse --verify HEAD >/dev/null 2>&1
+then
+	against=HEAD
+else
+	# Initial commit: diff against an empty tree object
+	against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
+fi
+
+# If you want to allow non-ascii filenames set this variable to true.
+allownonascii=$(git config hooks.allownonascii)
+
+# Cross platform projects tend to avoid non-ascii filenames; prevent
+# them from being added to the repository. We exploit the fact that the
+# printable range starts at the space character and ends with tilde.
+if [ "$allownonascii" != "true" ] &&
+	# Note that the use of brackets around a tr range is ok here, (it's
+	# even required, for portability to Solaris 10's /usr/bin/tr), since
+	# the square bracket bytes happen to fall in the designated range.
+	test "$(git diff --cached --name-only --diff-filter=A -z $against |
+	  LC_ALL=C tr -d '[ -~]\0')"
+then
+	echo "Error: Attempt to add a non-ascii file name."
+	echo
+	echo "This can cause problems if you want to work"
+	echo "with people on other platforms."
+	echo
+	echo "To be portable it is advisable to rename the file ..."
+	echo
+	echo "If you know what you are doing you can disable this"
+	echo "check using:"
+	echo
+	echo "  git config hooks.allownonascii true"
+	echo
+	exit 1
+fi
+
+exec git diff-index --check --cached $against --
diff -Nru drbd8-8.3.7/.git/hooks/pre-rebase.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-rebase.sample
--- drbd8-8.3.7/.git/hooks/pre-rebase.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/pre-rebase.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,169 @@
+#!/bin/sh
+#
+# Copyright (c) 2006, 2008 Junio C Hamano
+#
+# The "pre-rebase" hook is run just before "git-rebase" starts doing
+# its job, and can prevent the command from running by exiting with
+# non-zero status.
+#
+# The hook is called with the following parameters:
+#
+# $1 -- the upstream the series was forked from.
+# $2 -- the branch being rebased (or empty when rebasing the current branch).
+#
+# This sample shows how to prevent topic branches that are already
+# merged to 'next' branch from getting rebased, because allowing it
+# would result in rebasing already published history.
+
+publish=next
+basebranch="$1"
+if test "$#" = 2
+then
+	topic="refs/heads/$2"
+else
+	topic=`git symbolic-ref HEAD` ||
+	exit 0 ;# we do not interrupt rebasing detached HEAD
+fi
+
+case "$topic" in
+refs/heads/??/*)
+	;;
+*)
+	exit 0 ;# we do not interrupt others.
+	;;
+esac
+
+# Now we are dealing with a topic branch being rebased
+# on top of master.  Is it OK to rebase it?
+
+# Does the topic really exist?
+git show-ref -q "$topic" || {
+	echo >&2 "No such branch $topic"
+	exit 1
+}
+
+# Is topic fully merged to master?
+not_in_master=`git-rev-list --pretty=oneline ^master "$topic"`
+if test -z "$not_in_master"
+then
+	echo >&2 "$topic is fully merged to master; better remove it."
+	exit 1 ;# we could allow it, but there is no point.
+fi
+
+# Is topic ever merged to next?  If so you should not be rebasing it.
+only_next_1=`git-rev-list ^master "^$topic" ${publish} | sort`
+only_next_2=`git-rev-list ^master           ${publish} | sort`
+if test "$only_next_1" = "$only_next_2"
+then
+	not_in_topic=`git-rev-list "^$topic" master`
+	if test -z "$not_in_topic"
+	then
+		echo >&2 "$topic is already up-to-date with master"
+		exit 1 ;# we could allow it, but there is no point.
+	else
+		exit 0
+	fi
+else
+	not_in_next=`git-rev-list --pretty=oneline ^${publish} "$topic"`
+	perl -e '
+		my $topic = $ARGV[0];
+		my $msg = "* $topic has commits already merged to public branch:\n";
+		my (%not_in_next) = map {
+			/^([0-9a-f]+) /;
+			($1 => 1);
+		} split(/\n/, $ARGV[1]);
+		for my $elem (map {
+				/^([0-9a-f]+) (.*)$/;
+				[$1 => $2];
+			} split(/\n/, $ARGV[2])) {
+			if (!exists $not_in_next{$elem->[0]}) {
+				if ($msg) {
+					print STDERR $msg;
+					undef $msg;
+				}
+				print STDERR " $elem->[1]\n";
+			}
+		}
+	' "$topic" "$not_in_next" "$not_in_master"
+	exit 1
+fi
+
+exit 0
+
+################################################################
+
+This sample hook safeguards topic branches that have been
+published from being rewound.
+
+The workflow assumed here is:
+
+ * Once a topic branch forks from "master", "master" is never
+   merged into it again (either directly or indirectly).
+
+ * Once a topic branch is fully cooked and merged into "master",
+   it is deleted.  If you need to build on top of it to correct
+   earlier mistakes, a new topic branch is created by forking at
+   the tip of the "master".  This is not strictly necessary, but
+   it makes it easier to keep your history simple.
+
+ * Whenever you need to test or publish your changes to topic
+   branches, merge them into "next" branch.
+
+The script, being an example, hardcodes the publish branch name
+to be "next", but it is trivial to make it configurable via
+$GIT_DIR/config mechanism.
+
+With this workflow, you would want to know:
+
+(1) ... if a topic branch has ever been merged to "next".  Young
+    topic branches can have stupid mistakes you would rather
+    clean up before publishing, and things that have not been
+    merged into other branches can be easily rebased without
+    affecting other people.  But once it is published, you would
+    not want to rewind it.
+
+(2) ... if a topic branch has been fully merged to "master".
+    Then you can delete it.  More importantly, you should not
+    build on top of it -- other people may already want to
+    change things related to the topic as patches against your
+    "master", so if you need further changes, it is better to
+    fork the topic (perhaps with the same name) afresh from the
+    tip of "master".
+
+Let's look at this example:
+
+		   o---o---o---o---o---o---o---o---o---o "next"
+		  /       /           /           /
+		 /   a---a---b A     /           /
+		/   /               /           /
+	       /   /   c---c---c---c B         /
+	      /   /   /             \         /
+	     /   /   /   b---b C     \       /
+	    /   /   /   /             \     /
+    ---o---o---o---o---o---o---o---o---o---o---o "master"
+
+
+A, B and C are topic branches.
+
+ * A has one fix since it was merged up to "next".
+
+ * B has finished.  It has been fully merged up to "master" and "next",
+   and is ready to be deleted.
+
+ * C has not merged to "next" at all.
+
+We would want to allow C to be rebased, refuse A, and encourage
+B to be deleted.
+
+To compute (1):
+
+	git-rev-list ^master ^topic next
+	git-rev-list ^master        next
+
+	if these match, topic has not merged in next at all.
+
+To compute (2):
+
+	git-rev-list master..topic
+
+	if this is empty, it is fully merged to "master".
diff -Nru drbd8-8.3.7/.git/hooks/prepare-commit-msg.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/prepare-commit-msg.sample
--- drbd8-8.3.7/.git/hooks/prepare-commit-msg.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/prepare-commit-msg.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,36 @@
+#!/bin/sh
+#
+# An example hook script to prepare the commit log message.
+# Called by git-commit with the name of the file that has the
+# commit message, followed by the description of the commit
+# message's source.  The hook's purpose is to edit the commit
+# message file.  If the hook fails with a non-zero status,
+# the commit is aborted.
+#
+# To enable this hook, rename this file to "prepare-commit-msg".
+
+# This hook includes three examples.  The first comments out the
+# "Conflicts:" part of a merge commit.
+#
+# The second includes the output of "git diff --name-status -r"
+# into the message, just before the "git status" output.  It is
+# commented because it doesn't cope with --amend or with squashed
+# commits.
+#
+# The third example adds a Signed-off-by line to the message, that can
+# still be edited.  This is rarely a good idea.
+
+case "$2,$3" in
+  merge,)
+    perl -i.bak -ne 's/^/# /, s/^# #/#/ if /^Conflicts/ .. /#/; print' "$1" ;;
+
+# ,|template,)
+#   perl -i.bak -pe '
+#      print "\n" . `git diff --cached --name-status -r`
+#	 if /^#/ && $first++ == 0' "$1" ;;
+
+  *) ;;
+esac
+
+# SOB=$(git var GIT_AUTHOR_IDENT | sed -n 's/^\(.*>\).*$/Signed-off-by: \1/p')
+# grep -qs "^$SOB" "$1" || echo "$SOB" >> "$1"
diff -Nru drbd8-8.3.7/.git/hooks/update.sample drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/update.sample
--- drbd8-8.3.7/.git/hooks/update.sample	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/hooks/update.sample	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,128 @@
+#!/bin/sh
+#
+# An example hook script to blocks unannotated tags from entering.
+# Called by git-receive-pack with arguments: refname sha1-old sha1-new
+#
+# To enable this hook, rename this file to "update".
+#
+# Config
+# ------
+# hooks.allowunannotated
+#   This boolean sets whether unannotated tags will be allowed into the
+#   repository.  By default they won't be.
+# hooks.allowdeletetag
+#   This boolean sets whether deleting tags will be allowed in the
+#   repository.  By default they won't be.
+# hooks.allowmodifytag
+#   This boolean sets whether a tag may be modified after creation. By default
+#   it won't be.
+# hooks.allowdeletebranch
+#   This boolean sets whether deleting branches will be allowed in the
+#   repository.  By default they won't be.
+# hooks.denycreatebranch
+#   This boolean sets whether remotely creating branches will be denied
+#   in the repository.  By default this is allowed.
+#
+
+# --- Command line
+refname="$1"
+oldrev="$2"
+newrev="$3"
+
+# --- Safety check
+if [ -z "$GIT_DIR" ]; then
+	echo "Don't run this script from the command line." >&2
+	echo " (if you want, you could supply GIT_DIR then run" >&2
+	echo "  $0 <ref> <oldrev> <newrev>)" >&2
+	exit 1
+fi
+
+if [ -z "$refname" -o -z "$oldrev" -o -z "$newrev" ]; then
+	echo "Usage: $0 <ref> <oldrev> <newrev>" >&2
+	exit 1
+fi
+
+# --- Config
+allowunannotated=$(git config --bool hooks.allowunannotated)
+allowdeletebranch=$(git config --bool hooks.allowdeletebranch)
+denycreatebranch=$(git config --bool hooks.denycreatebranch)
+allowdeletetag=$(git config --bool hooks.allowdeletetag)
+allowmodifytag=$(git config --bool hooks.allowmodifytag)
+
+# check for no description
+projectdesc=$(sed -e '1q' "$GIT_DIR/description")
+case "$projectdesc" in
+"Unnamed repository"* | "")
+	echo "*** Project description file hasn't been set" >&2
+	exit 1
+	;;
+esac
+
+# --- Check types
+# if $newrev is 0000...0000, it's a commit to delete a ref.
+zero="0000000000000000000000000000000000000000"
+if [ "$newrev" = "$zero" ]; then
+	newrev_type=delete
+else
+	newrev_type=$(git-cat-file -t $newrev)
+fi
+
+case "$refname","$newrev_type" in
+	refs/tags/*,commit)
+		# un-annotated tag
+		short_refname=${refname##refs/tags/}
+		if [ "$allowunannotated" != "true" ]; then
+			echo "*** The un-annotated tag, $short_refname, is not allowed in this repository" >&2
+			echo "*** Use 'git tag [ -a | -s ]' for tags you want to propagate." >&2
+			exit 1
+		fi
+		;;
+	refs/tags/*,delete)
+		# delete tag
+		if [ "$allowdeletetag" != "true" ]; then
+			echo "*** Deleting a tag is not allowed in this repository" >&2
+			exit 1
+		fi
+		;;
+	refs/tags/*,tag)
+		# annotated tag
+		if [ "$allowmodifytag" != "true" ] && git rev-parse $refname > /dev/null 2>&1
+		then
+			echo "*** Tag '$refname' already exists." >&2
+			echo "*** Modifying a tag is not allowed in this repository." >&2
+			exit 1
+		fi
+		;;
+	refs/heads/*,commit)
+		# branch
+		if [ "$oldrev" = "$zero" -a "$denycreatebranch" = "true" ]; then
+			echo "*** Creating a branch is not allowed in this repository" >&2
+			exit 1
+		fi
+		;;
+	refs/heads/*,delete)
+		# delete branch
+		if [ "$allowdeletebranch" != "true" ]; then
+			echo "*** Deleting a branch is not allowed in this repository" >&2
+			exit 1
+		fi
+		;;
+	refs/remotes/*,commit)
+		# tracking branch
+		;;
+	refs/remotes/*,delete)
+		# delete tracking branch
+		if [ "$allowdeletebranch" != "true" ]; then
+			echo "*** Deleting a tracking branch is not allowed in this repository" >&2
+			exit 1
+		fi
+		;;
+	*)
+		# Anything else (is there anything else?)
+		echo "*** Update hook: unknown type of update to ref $refname of type $newrev_type" >&2
+		exit 1
+		;;
+esac
+
+# --- Finished
+exit 0
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/index and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/index differ
diff -Nru drbd8-8.3.7/.git/info/exclude drbd8-8.4.1+git55a81dc~cmd1/.git/info/exclude
--- drbd8-8.3.7/.git/info/exclude	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/info/exclude	2012-02-02 14:09:06.000000000 +0000
@@ -0,0 +1,6 @@
+# git-ls-files --others --exclude-from=.git/info/exclude
+# Lines that start with '#' are comments.
+# For a project mostly in C, the following would be a good set of
+# exclude patterns (uncomment them if you want to use them):
+# *.[oa]
+# *~
diff -Nru drbd8-8.3.7/.git/logs/HEAD drbd8-8.4.1+git55a81dc~cmd1/.git/logs/HEAD
--- drbd8-8.3.7/.git/logs/HEAD	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/logs/HEAD	2012-09-03 21:31:23.000000000 +0000
@@ -0,0 +1,3 @@
+0000000000000000000000000000000000000000 91b4c048c1a0e06777b5f65d312b38d47abaea80 root <root@flashcode0.commandprompt.com> 1328191754 -0600	clone: from git://git.drbd.org/drbd-8.4.git
+91b4c048c1a0e06777b5f65d312b38d47abaea80 91b4c048c1a0e06777b5f65d312b38d47abaea80 root <root@flashcode0.commandprompt.com> 1328191774 -0600	checkout: moving from master to drbd-8.4.1
+91b4c048c1a0e06777b5f65d312b38d47abaea80 e3169387b068d825dd433287f7fd7ba48ed07919 Ildefonso Camargo <ildefonso@commandprompt.com> 1346707883 -0430	cherry-pick: drbd: fix binary-incompatible build on some platforms
diff -Nru drbd8-8.3.7/.git/logs/refs/heads/master drbd8-8.4.1+git55a81dc~cmd1/.git/logs/refs/heads/master
--- drbd8-8.3.7/.git/logs/refs/heads/master	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/logs/refs/heads/master	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+0000000000000000000000000000000000000000 91b4c048c1a0e06777b5f65d312b38d47abaea80 root <root@flashcode0.commandprompt.com> 1328191754 -0600	clone: from git://git.drbd.org/drbd-8.4.git
diff -Nru drbd8-8.3.7/.git/logs/refs/remotes/origin/master drbd8-8.4.1+git55a81dc~cmd1/.git/logs/refs/remotes/origin/master
--- drbd8-8.3.7/.git/logs/refs/remotes/origin/master	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/logs/refs/remotes/origin/master	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1 @@
+91b4c048c1a0e06777b5f65d312b38d47abaea80 56ed9398f8e74a8b1b1e3e4b8770acc58fd1cd17 Ildefonso Camargo <ildefonso@commandprompt.com> 1346707857 -0430	fetch: fast-forward
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/19/57f36865f12f413186301639c90f18c5b6383a and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/19/57f36865f12f413186301639c90f18c5b6383a differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/39/d387426544fdd10293597bfae99a825ac48cf7 and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/39/d387426544fdd10293597bfae99a825ac48cf7 differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/e3/169387b068d825dd433287f7fd7ba48ed07919 and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/e3/169387b068d825dd433287f7fd7ba48ed07919 differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/pack/pack-7cd34c0c501af2938b3310c64932d0fb210658d7.idx and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/pack/pack-7cd34c0c501af2938b3310c64932d0fb210658d7.idx differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/pack/pack-7cd34c0c501af2938b3310c64932d0fb210658d7.pack and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/pack/pack-7cd34c0c501af2938b3310c64932d0fb210658d7.pack differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/pack/pack-fe1890efcc4e4e5aea4829936564928a4a8a3b08.idx and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/pack/pack-fe1890efcc4e4e5aea4829936564928a4a8a3b08.idx differ
Binary files /tmp/eLC1L4pXeJ/drbd8-8.3.7/.git/objects/pack/pack-fe1890efcc4e4e5aea4829936564928a4a8a3b08.pack and /tmp/EtA86naCDw/drbd8-8.4.1+git55a81dc~cmd1/.git/objects/pack/pack-fe1890efcc4e4e5aea4829936564928a4a8a3b08.pack differ
diff -Nru drbd8-8.3.7/.git/packed-refs drbd8-8.4.1+git55a81dc~cmd1/.git/packed-refs
--- drbd8-8.3.7/.git/packed-refs	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/packed-refs	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,19 @@
+# pack-refs with: peeled 
+311dc112cb6f3c9c2658c3e9e3de33d788af7902 refs/tags/drbd-8.4.1rc2
+^23a65b276f93f211aebb992513aade5bb0e76a69
+004fa9964f91021633ac05b3fac0bc2a3ce16941 refs/tags/drbd-8.4.1rc1
+^eb252062fcc810de0b7d6ff5a67867ba4bf3f9b1
+66248dfa824afe2525aa33e99d8f71a9899c7ea4 refs/tags/drbd-8.4.1
+^91b4c048c1a0e06777b5f65d312b38d47abaea80
+a96878f6e589e4a3e83b2925ea927f6e22699a82 refs/tags/drbd-8.4.0rc4
+^4cae5718c19dbcb1505dadc6d078626e6af205ed
+5f6efb0772b729d2396b18db5ba3c89a2923eb9e refs/tags/drbd-8.4.0rc3
+^a222a5af13886743c47e9d44329923cf94ba8d18
+7ec7671696654597347758a6fbd4931717dbdb80 refs/tags/drbd-8.4.0rc2
+^c0014a5ec7b162d3e4a3a81df829f9d4d84de94a
+c9b4428e663c24c74ec33af97f36f5acc4b6bc2c refs/tags/drbd-8.4.0rc1
+^2712ba1a920636bfa324920409f949dcd4e0f5d5
+7a7f1aebba8cbbb3651ff6babea441d06e0a36fc refs/tags/drbd-8.4.0
+^28753f559ab51b549d16bcf487fe625d5919c49c
+e5b8bd4b2b2bfb746de97fd4881feefb47f0b8fc refs/remotes/origin/zero-copy-receive
+91b4c048c1a0e06777b5f65d312b38d47abaea80 refs/remotes/origin/master
diff -Nru drbd8-8.3.7/.git/refs/heads/master drbd8-8.4.1+git55a81dc~cmd1/.git/refs/heads/master
--- drbd8-8.3.7/.git/refs/heads/master	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/heads/master	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+91b4c048c1a0e06777b5f65d312b38d47abaea80
diff -Nru drbd8-8.3.7/.git/refs/remotes/origin/HEAD drbd8-8.4.1+git55a81dc~cmd1/.git/refs/remotes/origin/HEAD
--- drbd8-8.3.7/.git/refs/remotes/origin/HEAD	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/remotes/origin/HEAD	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+ref: refs/remotes/origin/master
diff -Nru drbd8-8.3.7/.git/refs/remotes/origin/master drbd8-8.4.1+git55a81dc~cmd1/.git/refs/remotes/origin/master
--- drbd8-8.3.7/.git/refs/remotes/origin/master	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/remotes/origin/master	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1 @@
+56ed9398f8e74a8b1b1e3e4b8770acc58fd1cd17
diff -Nru drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc1 drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc1
--- drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc1	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc1	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1 @@
+7a59a5b69271df94c1f10b5a4dad48a5a3b1aea5
diff -Nru drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc2 drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc2
--- drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc2	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc2	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1 @@
+5ab1ece053485cf9b9b3e775fe58a746ed1c20df
diff -Nru drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc3 drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc3
--- drbd8-8.3.7/.git/refs/tags/drbd-8.4.2rc3	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.git/refs/tags/drbd-8.4.2rc3	2012-09-03 21:30:57.000000000 +0000
@@ -0,0 +1 @@
+ac15f759f6055a930b3aad30b57781315b5abbef
diff -Nru drbd8-8.3.7/.gitignore drbd8-8.4.1+git55a81dc~cmd1/.gitignore
--- drbd8-8.3.7/.gitignore	2008-11-24 10:43:32.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/.gitignore	2012-02-02 14:09:14.000000000 +0000
@@ -1,35 +1,46 @@
-ID
-TODO
-tags
+/autom4te.cache
+/config.log
+/config.status
+/configure
+/drbd-*.tar.gz
+/drbd.spec
+/drbd-kernel.spec
+/drbd-km.spec
+/ID
+/TODO
+/tags
+/Makefile
+
+user/Makefile
+scripts/Makefile
+documentation/Makefile
 
 ./.filelist
 ./drbd_config.h
 
+*.gcda
+*.gcno
 *.o
 
 drbd/drbd.ko
-drbd/.drbd.ko.cmd
-drbd/.drbd.mod.o.cmd
-drbd/.drbd.o.cmd
-drbd/.drbd_actlog.o.cmd
-drbd/.drbd_bitmap.o.cmd
-drbd/.drbd_buildtag.o.cmd
-drbd/.drbd_kernelrelease
-drbd/.drbd_main.o.cmd
-drbd/.drbd_nl.o.cmd
-drbd/.drbd_proc.o.cmd
-drbd/.drbd_receiver.o.cmd
-drbd/.drbd_req.o.cmd
-drbd/.drbd_strings.o.cmd
-drbd/.drbd_worker.o.cmd
+drbd/drbd.ko.unsigned
+drbd/.*.cmd
+drbd/compat/.*.cmd
+drbd/.compat.h.d
+drbd/.config.timestamp
 drbd/.kernel.config.gz
-drbd/.lru_cache.o.cmd
+drbd/.drbd_kernelrelease
+drbd/.drbd_kernelrelease.new
 drbd/.tmp_versions
 drbd/Module.symvers
+drbd/compat.h
 drbd/drbd.mod.c
 drbd/drbd_buildtag.c
+drbd/modules.order
 drbd/linux/drbd_config.h.orig
 
+user/config.h
+user/config.h.in
 user/drbd_buildtag.c
 user/drbd_strings.c
 user/drbdadm
@@ -46,5 +57,6 @@
 documentation/drbdsetup.8
 documentation/manpage.links
 documentation/manpage.refs
+documentation/drbdsetup_*.xml
 
 benchmark/dm
diff -Nru drbd8-8.3.7/ChangeLog drbd8-8.4.1+git55a81dc~cmd1/ChangeLog
--- drbd8-8.3.7/ChangeLog	2010-01-13 16:13:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/ChangeLog	2012-02-02 14:09:14.000000000 +0000
@@ -2,7 +2,166 @@
 ------
  For even more detail, use "git log" or visit http://git.drbd.org/.
 
-8.3.7 (api:86/proto:86-91)
+8.4.1 (api:genl1/proto:86-100)
+--------
+ * Fixed a bug that might cause in kernel list corruption triggered by
+   simultaneous IO on multiple volumes in a single resource
+ * Fixed a bug that might cause a kernel OOPS in the worker thread while
+   the receiver tied to establish a connection (drbd-8.4.0 regression)
+ * Fixed an issue in the receiver that could cause connection triggered by
+   simultaneous IO on multiple volumes in a single resource
+ * Consider the discard-my-data flag for all volumes
+ * Fixed attaching to backing devices that do not support barriers/flushes,
+   when barriers/flushes are not disabled by the configuration.
+   (drbd-8.4.0 regression)
+ * Fixed a rare compatibility issue with DRBD's older than 8.3.7
+   when negotiating the bio_size
+ * Fixed a rare race condition where an empty resync could stall with
+   if pause/unpause events happen in parallel
+ * Made the re-establishing of connections quicker, if it got a broken pipe
+   once. Previously there was a bug in the code caused it to waste the first
+   successful established connection after a broken pipe event.
+ * crm-fence-peer.sh: Can now deal with multiple DRBD instances being in
+   a master/slave group
+ * Optional load balancing for read requests: new keyword "read-balance"
+
+8.4.0 (api:genl1/proto:86-100)
+--------
+ * Fixed handling of read errors during online verify
+ * Fix for connecting on high latency network links
+ * Fixed state transitions if fence-peer handler returns after connection was
+   established again
+ * Go into inconsistent disk state with on-io-error=pass-on policy
+ * Timeouts for requests processing on the peer (previously that
+   worked only if the data socket was congested)
+ * Reworked Linux backward compatibility mechanism
+ * Conflicting write detection is now based on an interval tree,
+   removed the hash-tables (necessary for the unlimited BIO sizes)
+ * Removed the tracing framework
+ * Support for multiple volumes (minors, block devices) per connection;
+   up to 65536 volumes per connection supported
+ * Reduced IO latencies during some state changes (esp. start resync)
+ * New on disk format for the AL: double capacity; 4k aligned IO; same space
+ * Multiple AL changes in a single transaction (precondition for
+   unlimited BIO sizes)
+ * DRBD no longer imposes any limit on BIO sizes
+ * Removed DRBD's limits on the number of minor devices
+ * DRBD's minors can now be removed (not only unconfigured)
+ * Switched the user space interface form connector to generic netlink
+ * drbdadm, configuration changes: volume sections; syncer section removed;
+   bool options got yes/no values, that improves option inheritance;
+   resource options
+ * drbdsetup: new commands for creating and removing resources
+   and minors
+ * drbdsetup: new commands for changing disk options while the disk
+   is attached; ...for changing net options while the connection is
+   established
+ * drbdsetup/drbdadm the wire-protocol is now a regular connection option
+ * Removed drbdadm option --force
+ * IO freezing/thawing is done on connection (all volumes) level
+ * fencing is done on connection (all volumes) level
+ * Enforce application of activity log after primary crash in user space
+ * Features from drbd-8.3: Allow detach from frozen backing devices with the
+   new --force option; configurable timeout for backing devices by the new
+   disk-timeout option
+ * Renamed --dry-run of connect to --tentative; plus alias in drbdsetup
+ * drbdadm got a "help" sub command, that shows the specific options
+ * drbdadm now knows all drbdsetup options, and verify ...
+ * drbdadm can now process all options in random order, and ignores the "--"
+   separator; compatibility aliases with the old calling conventions; now it
+   is compatible with the pre 8.4 way of calling.
+ * New default values (compared to drbd-8.3) for: minor-count, ko-count, al-extents,
+   c-plan-ahead, c-fill-target, c-min-rate, use-rle, on-io-error
+
+8.3.10 (api:88/proto:86-96)
+--------
+ * Fixed a subtle performance degradation that might affected synchronous
+   work loads (databases) (introduced in 8.3.9)
+ * Fixed a locking regression (introduced in 8.3.9)
+ * Fixed on-no-data-accessible for Primary, SyncTarget nodes (Bugz 332)
+ * Progress bar for online verify
+ * Optionally use the resync speed control loop code for the online verify
+   process as well
+ * Added code to detect false positives when using data-integrity-alg
+ * New config option on-congestion and new connection states ahead and behind
+ * Reduced IO latencies during resync, bitmap exchange and temporal states
+ * Only build a single kernel module package on distributions that provide
+   the infrastructure to have kernel version independent modules
+ * On 64bit architectures allow device sizes up to one petabyte
+
+8.3.9 (api:88/proto:86-95)
+--------
+ * Fix for possible deadlock on IO error during resync
+ * Fixed a race condition between adding and removing network configuration.
+   Lead to a BUG_ON() when triggered.
+ * Fixed spurious full syncs that could happen after an empty resync and
+   concurrent connection loss.
+ * Fixed spurious full syncs that happened when connection got lost while
+   one node was in WFSyncUUID state (Bugz 318)
+ * Fixed a race in the meta-data update code path, that could lead to forgotten
+   updates to the meta-data. That in fact could lead to unexpected behavior
+   at the next connect
+ * Fixed potential deadlock on detach
+ * Fixed potential data divergence after multiple failures
+ * Implicitly create unconfigured devices which are referenced in sync-after
+   dependencies.
+ * OCF RA now also works with pacemaker 1.1
+ * Allow BIO sizes of up to 128kByte. Note: In case drbd-proxy is used, at least
+   version 1.0.16 of drbd-proxy is required.
+ * New configuration keyword on-no-data-accessible. Possible values
+   io-error, and suspend-io. The default is "io-error", which matches the
+   previous behavior.
+ * If the fencing policy is set to resource-and-stonith, the primary node
+   will creates the new current UUID _after_ the fencing handler
+   returned. (Before it did immediately)
+ * Rewrote the resync speed control loop code. New configuration parameters
+   c-plan-ahead, c-fill-target, c-delay-target, c-max-rate, c-min-rate.
+ * Disable activity log updates when all blocks of an unconnected device is
+   are out of sync. That can be activated by using "invalidate-remote" on an
+   unconnected primary.
+ * Improved IPv6 support: link local addresses
+ * Improved resync speed display in /proc/drbd
+
+8.3.8 (api:88/proto:86-94)
+--------
+ * Do not expose failed local READs to upper layers, regression introduced
+   in 8.3.3
+ * Fixed support for devices with 4k hard sector size (again)
+ * Fixed a potential Oops in the disconnect code
+ * Fixed a race condition that could cause DRBD to consider the peers disk
+   as Inconstent after resync instead of UpToDate (Bugz 271)
+ * Fixed a reace condition that could cause DRBD to consider the peers disk
+   as Outdated instead of Inconsistent during resync (Bugz 277)
+ * Disallow to start a resync with invalidate / invalidate-remote when the
+   source disk is not UpToDate
+ * Forcing primary works now also for Consistent, not only for Outdated and
+   Inconsistent (Bugz 266)
+ * Improved robustness against corrupt or malicous sector addresses when
+   receiving data
+ * Added the initial-split-brain, it gets called also if the split-brain gets
+   automatically resolved
+ * Added the --assume-clean option for the resize command, it causes drbd to
+   not resync the new storage after an online grow operation
+ * drbdadm: Do not segfault if stacked-on-top-of refers to an undefined res
+ * drbdadm: Do not consider configs with invalid after statements as invalid
+ * drbdadm: Do not segfault if the peer's proxy section is missing
+ * drbdadm: Allow nullglob in include statement
+ * drbdadm: Fixed the use of waitpid
+ * init script: fix insserv headers (Debian 576901)
+ * Gave the receiving code the ability to use multiple BIOs for writing a
+   single data packet; now DRBD works with BIOs up to 32kByte also on LVM
+   devices; from now on the use_bmbv config option does nothing
+ * New command check-resize, that allows DRBD to detect offline resizing
+   and to move internal meta-data accordingly
+ * Added a control loop, that allows DRBD to find auto tune the resync
+   speed, on connections with large queues (drbd-proxy)
+ * --dry-run option for connect; disconnects after sync handshake
+ * --overwrite-data-of-peer got an alias named --force
+ * Improvements to crm-fence-peer
+ * Fixed option parsing and stacking in snapshot-resync-target-lvm.sh
+ * Compiles on 2.6.33 and 2.6.34
+
+8.3.7 (api:88/proto:86-91)
 --------
  * Lots of fixes to the new RPM packaging
  * Lots of fixes to the autoconfig stuff
@@ -29,12 +188,12 @@
  * Following Linux upstream changes 2.6.32 (SHASH and in_flight issues)
  * New /etc/drbd.conf example that suggests the use of /etc/drbd.d/xxx.res
 
-8.3.6 (api:86/proto:86-91)
+8.3.6 (api:88/proto:86-91)
 --------
  * Make sure that we ship all unplug events
  * Introduced autoconf, new RPM packaging
 
-8.3.5 (api:86/proto:86-91)
+8.3.5 (api:88/proto:86-91)
 --------
  * Fixed a regression introduced shortly before 8.3.3, which might
    case a deadlock in DRBD's disconnect code path. (Bugz 258)
@@ -44,12 +203,12 @@
    to avoid unnecessary migrations
  * Do not display the usage count dialog for /etc/inti.d/drbd status
 
-8.3.4 (api:86/proto:86-91)
+8.3.4 (api:88/proto:86-91)
 --------
  * Fixed a regression in the connector backport introduced with 8.3.3.
    Affected only kernels older than 2.6.14. I.e. RHEL4 and SLES9.
 
-8.3.3 (api:86/proto:86-91)
+8.3.3 (api:88/proto:86-91)
 --------
  * Correctly deal with large bitmaps (Bugz 239, 240)
  * Fixed a segfault in drbdadm's parser for unknown sync-after dependencies
@@ -71,7 +230,7 @@
  * Install bash completion stuff on SLES11
  * Following Linux upstream changes 2.6.31
 
-8.3.2 (api:86/proto:86-90)
+8.3.2 (api:88/proto:86-90)
 --------
  * Fixed the "Not a digest" issue for hash functions already ported to shash
  * Fixed a race condition between device configuration and de-configuration
@@ -103,7 +262,7 @@
  * Using Linux's own tracing framework instead of our own
  * Compatibility with Linux 2.6.30 and 2.6.31-rc1
 
-8.3.1 (api:86/proto:86-89)
+8.3.1 (api:88/proto:86-89)
 --------
  * Fixed drbdadm invalidate on disconnected devices (reg in 8.2.7)
  * Fixed a hard to trigger spinlock deadlock when using device stacking
@@ -123,7 +282,7 @@
  * Do not force a full resync after a detach on a primary node
  * Compatibility with Linux 2.6.27, 2.6.28 and 2.6.29
 
-8.3.0 (api:86/proto:86-89)
+8.3.0 (api:88/proto:86-89)
 --------
  * Fixed 'sleep with spinlock held' in case online verify found a difference
  * Fixed error code pathes in request processing.
@@ -149,7 +308,7 @@
  * More build compatibility with older vendor kernels
  * Added drbd-overview.pl to the packages
 
-8.2.7 (api:86/proto:86-88)
+8.2.7 (api:88/proto:86-88)
 --------
  * Fixed possible Oops on connection loss during sync handshake
  * Fixed various possible deadlocks in the disconnect/reconnect and
@@ -163,7 +322,7 @@
    node. New config options: no-disk-barrier, no-disk-drain
  * Merged all changes from 8.0.12 -> 8.0.14 into 8.2
 
-8.2.6 (api:86/proto:86-88)
+8.2.6 (api:88/proto:86-88)
 --------
  * The details of the LRU data structures is now hidden from
    /proc/drbd but can be re-enabled by echoing 1 to
@@ -184,7 +343,7 @@
  * Fixed online resizing in case it is triggered from the
    secondary node.
 
-8.2.5 (api:86/proto:86-88)
+8.2.5 (api:88/proto:86-88)
 --------
  * Fixed a race between online-verify and application writes.
    It caused drbd to report false positives, and very likely
@@ -195,13 +354,13 @@
    lockup after the first connection loss. Fixed.
  * Merged all changes from 8.0.8 -> 8.0.11 into 8.2
 
-8.2.4 (api:86/proto:86-88)
+8.2.4 (api:88/proto:86-88)
 --------
  * Fixed the online-verify and data-integrity-alg features.
    While preparing DRBD for Linux-2.6.24 a bug was introduced
    that rendered most digest based functionality in DRBD useless.
 
-8.2.3 (api:86/proto:86-88)
+8.2.3 (api:88/proto:86-88)
 --------
  * Released the online-verify feature from DRBD+ into drbd-8.2
  * Fixed the data-integrity-alg feature to work correctly
diff -Nru drbd8-8.3.7/Makefile drbd8-8.4.1+git55a81dc~cmd1/Makefile
--- drbd8-8.3.7/Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/Makefile	2012-09-03 22:37:14.000000000 +0000
@@ -25,9 +25,7 @@
 # and call those from here.	-- lge
 
 # variables set by configure
-GIT = 
-KDIR ?= 
-KVER ?= 
+GIT = /usr/bin/git
 LN_S = ln -s
 PREFIX = /usr
 RPMBUILD = 
@@ -40,16 +38,27 @@
 WITH_XEN = yes
 WITH_PACEMAKER = yes
 WITH_HEARTBEAT = yes
-WITH_RGMANAGER = yes
+WITH_RGMANAGER = no
 WITH_BASHCOMPLETION = yes
 
+# default for KDIR/KVER
+ifndef KVER
+ ifndef KDIR
+KVER = `uname -r`
+KDIR = /lib/modules/$(KVER)/build
+ else
+KVER := $(shell make -s -C $(KDIR) kernelrelease)
+ endif
+endif
+KDIR ?= /lib/modules/$(KVER)/build
+
 # for some reason some of the commands below only work correctly in bash,
 # and not in e.g. dash. I'm too lazy to fix it to be compatible.
 SHELL=/bin/bash
 
 SUBDIRS     = user scripts documentation drbd
 
-REL_VERSION := $(shell $(SED) -ne '/REL_VERSION/{s/^[^"]*"\([^ "]*\).*/\1/;p;q;}' drbd/linux/drbd_config.h)
+REL_VERSION := $(shell $(SED) -ne '/^\#define REL_VERSION/{s/^[^"]*"\([^ "]*\).*/\1/;p;q;}' drbd/linux/drbd_config.h)
 ifdef FORCE
 #
 # NOTE to generate a tgz even if too lazy to update the changelogs,
@@ -84,7 +93,7 @@
 .PHONY: module
 module: check-kdir
 ifeq ($(WITH_KM),yes)
-	@ $(MAKE) -C drbd
+	@ $(MAKE) -C drbd KVER=$(KVER) KDIR=$(KDIR)
 	@ echo -e "\n\tModule build was successful."
 endif
 
@@ -133,6 +142,18 @@
 	then \
 	   echo -e "\n\t%changelog in drbd.spec.in needs update"; \
 	   up2date=false; fi; \
+	in_changelog=$$(sed -n -e '0,/^%changelog/d' \
+			     -e '/- '"$$dver_re"'-/p' < drbd-km.spec.in) ; \
+	if test -z "$$in_changelog" ; \
+	then \
+	   echo -e "\n\t%changelog in drbd-km.spec.in needs update"; \
+	   up2date=false; fi; \
+	in_changelog=$$(sed -n -e '0,/^%changelog/d' \
+			     -e '/- '"$$dver_re"'-/p' < drbd-kernel.spec.in) ; \
+	if test -z "$$in_changelog" ; \
+	then \
+	   echo -e "\n\t%changelog in drbd-kernel.spec.in needs update"; \
+	   up2date=false; fi; \
 	if ! grep "^$$dver_re\>" >/dev/null 2>&1 ChangeLog; \
 	then \
 	   echo -e "\n\tChangeLog needs update"; \
@@ -141,7 +162,7 @@
 	then \
 	   echo -e "\n\tconfigure.ac needs update"; \
 	   up2date=false; fi ; \
-	if ! grep "^drbd8 ($$dver_re-" >/dev/null 2>&1 debian/changelog; \
+	if ! grep "^drbd8 (2:$$dver_re-" >/dev/null 2>&1 debian/changelog; \
 	then \
 	   echo -e "\n\tdebian/changelog needs update [ignored]\n"; \
 	   : do not fail the build because of outdated debian/changelog ; fi ; \
@@ -161,12 +182,12 @@
 	@$(GIT) ls-files | sed '$(if $(PRESERVE_DEBIAN),,/^debian/d);s#^#drbd-$(DIST_VERSION)/#' > .filelist
 	@[ -s .filelist ] # assert there is something in .filelist now
 	@find documentation -name "[^.]*.[58]" -o -name "*.html" | \
-	sed "s/^/drbd-$(DIST_VERSION)\//" >> .filelist           ;\
-	echo drbd-$(DIST_VERSION)/drbd_config.h >> .filelist     ;\
-	echo drbd-$(DIST_VERSION)/drbd/drbd_buildtag.c >> .filelist ;\
-	echo drbd-$(DIST_VERSION)/.filelist >> .filelist         ;\
-	echo drbd-$(DIST_VERSION)/configure >> .filelist ;\
-	echo drbd-$(DIST_VERSION)/user/config.h.in >> .filelist ;\
+	sed "s/^/drbd-$(DIST_VERSION)\//"              >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/drbd_config.h        >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/drbd/drbd_buildtag.c >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/.filelist            >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/configure            >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/user/config.h.in     >> .filelist ; \
 	echo "./.filelist updated."
 
 # tgz will no longer automatically update .filelist,
@@ -206,7 +227,7 @@
 tarball: check_all_committed distclean doc configure .filelist
 	$(MAKE) tgz
 
-all tools doc .filelist: drbd/drbd_buildtag.c
+all module tools doc .filelist: drbd/drbd_buildtag.c
 
 kernel-patch: drbd/drbd_buildtag.c
 	set -o errexit; \
@@ -223,6 +244,9 @@
 drbd-km.spec: drbd-km.spec.in configure
 	./configure --enable-spec --without-utils --with-km
 
+drbd-kernel.spec: drbd-kernel.spec.in configure
+	./configure --enable-spec --without-utils --with-km
+
 .PHONY: rpm
 rpm: tgz drbd.spec
 	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
@@ -241,13 +265,29 @@
 	    drbd-km.spec
 	@echo "You have now:" ; find `rpm -E "%_rpmdir"` -name *.rpm
 
+# kernel module package using the system macros.
+# result is kABI aware and uses the weak-updates mechanism.
+# Only define %kernel_version, it it was set outside of this file,
+# i.e. was inherited from environment, or set explicitly on command line.
+# If unset, the macro will figure it out internally, and not depend on
+# uname -r, which may be wrong in a chroot build environment.
+.PHONY: kmp-rpm
+kmp-rpm: tgz drbd-kernel.spec
+	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
+	$(RPMBUILD) -bb \
+	    $(if $(filter file,$(origin KVER)), --define "kernel_version $(KVER)") \
+	    $(RPMOPT) \
+	    drbd-kernel.spec
+	@echo "You have now:" ; find `rpm -E "%_rpmdir"` -name *.rpm
+
 .PHONY: srpm
 srpm: tgz drbd.spec drbd-km.spec
 	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
 	$(RPMBUILD) -bs \
 	    --define "kernelversion $(KVER)" \
+	    --define "kernel_version $(KVER)" \
 	    --define "kdir $(KDIR)" \
 		$(RPMOPT) \
-		drbd.spec drbd-km.spec
+		drbd.spec drbd-km.spec drbd-kernel.spec
 	@echo "You have now:" ; find `rpm -E "%_srcrpmdir"` -name *.src.rpm
 endif
diff -Nru drbd8-8.3.7/Makefile.in drbd8-8.4.1+git55a81dc~cmd1/Makefile.in
--- drbd8-8.3.7/Makefile.in	2010-01-13 16:04:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/Makefile.in	2012-02-02 14:09:14.000000000 +0000
@@ -26,8 +26,6 @@
 
 # variables set by configure
 GIT = @GIT@
-KDIR ?= @KDIR@
-KVER ?= @KVER@
 LN_S = @LN_S@
 PREFIX = @prefix@
 RPMBUILD = @RPMBUILD@
@@ -43,13 +41,24 @@
 WITH_RGMANAGER = @WITH_RGMANAGER@
 WITH_BASHCOMPLETION = @WITH_BASHCOMPLETION@
 
+# default for KDIR/KVER
+ifndef KVER
+ ifndef KDIR
+KVER = `uname -r`
+KDIR = /lib/modules/$(KVER)/build
+ else
+KVER := $(shell make -s -C $(KDIR) kernelrelease)
+ endif
+endif
+KDIR ?= /lib/modules/$(KVER)/build
+
 # for some reason some of the commands below only work correctly in bash,
 # and not in e.g. dash. I'm too lazy to fix it to be compatible.
 SHELL=/bin/bash
 
 SUBDIRS     = user scripts documentation drbd
 
-REL_VERSION := $(shell $(SED) -ne '/REL_VERSION/{s/^[^"]*"\([^ "]*\).*/\1/;p;q;}' drbd/linux/drbd_config.h)
+REL_VERSION := $(shell $(SED) -ne '/^\#define REL_VERSION/{s/^[^"]*"\([^ "]*\).*/\1/;p;q;}' drbd/linux/drbd_config.h)
 ifdef FORCE
 #
 # NOTE to generate a tgz even if too lazy to update the changelogs,
@@ -84,7 +93,7 @@
 .PHONY: module
 module: check-kdir
 ifeq ($(WITH_KM),yes)
-	@ $(MAKE) -C drbd
+	@ $(MAKE) -C drbd KVER=$(KVER) KDIR=$(KDIR)
 	@ echo -e "\n\tModule build was successful."
 endif
 
@@ -133,6 +142,18 @@
 	then \
 	   echo -e "\n\t%changelog in drbd.spec.in needs update"; \
 	   up2date=false; fi; \
+	in_changelog=$$(sed -n -e '0,/^%changelog/d' \
+			     -e '/- '"$$dver_re"'-/p' < drbd-km.spec.in) ; \
+	if test -z "$$in_changelog" ; \
+	then \
+	   echo -e "\n\t%changelog in drbd-km.spec.in needs update"; \
+	   up2date=false; fi; \
+	in_changelog=$$(sed -n -e '0,/^%changelog/d' \
+			     -e '/- '"$$dver_re"'-/p' < drbd-kernel.spec.in) ; \
+	if test -z "$$in_changelog" ; \
+	then \
+	   echo -e "\n\t%changelog in drbd-kernel.spec.in needs update"; \
+	   up2date=false; fi; \
 	if ! grep "^$$dver_re\>" >/dev/null 2>&1 ChangeLog; \
 	then \
 	   echo -e "\n\tChangeLog needs update"; \
@@ -141,7 +162,7 @@
 	then \
 	   echo -e "\n\tconfigure.ac needs update"; \
 	   up2date=false; fi ; \
-	if ! grep "^drbd8 ($$dver_re-" >/dev/null 2>&1 debian/changelog; \
+	if ! grep "^drbd8 (2:$$dver_re-" >/dev/null 2>&1 debian/changelog; \
 	then \
 	   echo -e "\n\tdebian/changelog needs update [ignored]\n"; \
 	   : do not fail the build because of outdated debian/changelog ; fi ; \
@@ -161,12 +182,12 @@
 	@$(GIT) ls-files | sed '$(if $(PRESERVE_DEBIAN),,/^debian/d);s#^#drbd-$(DIST_VERSION)/#' > .filelist
 	@[ -s .filelist ] # assert there is something in .filelist now
 	@find documentation -name "[^.]*.[58]" -o -name "*.html" | \
-	sed "s/^/drbd-$(DIST_VERSION)\//" >> .filelist           ;\
-	echo drbd-$(DIST_VERSION)/drbd_config.h >> .filelist     ;\
-	echo drbd-$(DIST_VERSION)/drbd/drbd_buildtag.c >> .filelist ;\
-	echo drbd-$(DIST_VERSION)/.filelist >> .filelist         ;\
-	echo drbd-$(DIST_VERSION)/configure >> .filelist ;\
-	echo drbd-$(DIST_VERSION)/user/config.h.in >> .filelist ;\
+	sed "s/^/drbd-$(DIST_VERSION)\//"              >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/drbd_config.h        >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/drbd/drbd_buildtag.c >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/.filelist            >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/configure            >> .filelist ; \
+	echo drbd-$(DIST_VERSION)/user/config.h.in     >> .filelist ; \
 	echo "./.filelist updated."
 
 # tgz will no longer automatically update .filelist,
@@ -206,7 +227,7 @@
 tarball: check_all_committed distclean doc configure .filelist
 	$(MAKE) tgz
 
-all tools doc .filelist: drbd/drbd_buildtag.c
+all module tools doc .filelist: drbd/drbd_buildtag.c
 
 kernel-patch: drbd/drbd_buildtag.c
 	set -o errexit; \
@@ -223,6 +244,9 @@
 drbd-km.spec: drbd-km.spec.in configure
 	./configure --enable-spec --without-utils --with-km
 
+drbd-kernel.spec: drbd-kernel.spec.in configure
+	./configure --enable-spec --without-utils --with-km
+
 .PHONY: rpm
 rpm: tgz drbd.spec
 	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
@@ -241,13 +265,29 @@
 	    drbd-km.spec
 	@echo "You have now:" ; find `rpm -E "%_rpmdir"` -name *.rpm
 
+# kernel module package using the system macros.
+# result is kABI aware and uses the weak-updates mechanism.
+# Only define %kernel_version, it it was set outside of this file,
+# i.e. was inherited from environment, or set explicitly on command line.
+# If unset, the macro will figure it out internally, and not depend on
+# uname -r, which may be wrong in a chroot build environment.
+.PHONY: kmp-rpm
+kmp-rpm: tgz drbd-kernel.spec
+	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
+	$(RPMBUILD) -bb \
+	    $(if $(filter file,$(origin KVER)), --define "kernel_version $(KVER)") \
+	    $(RPMOPT) \
+	    drbd-kernel.spec
+	@echo "You have now:" ; find `rpm -E "%_rpmdir"` -name *.rpm
+
 .PHONY: srpm
 srpm: tgz drbd.spec drbd-km.spec
 	cp drbd-$(FDIST_VERSION).tar.gz `rpm -E "%_sourcedir"`
 	$(RPMBUILD) -bs \
 	    --define "kernelversion $(KVER)" \
+	    --define "kernel_version $(KVER)" \
 	    --define "kdir $(KDIR)" \
 		$(RPMOPT) \
-		drbd.spec drbd-km.spec
+		drbd.spec drbd-km.spec drbd-kernel.spec
 	@echo "You have now:" ; find `rpm -E "%_srcrpmdir"` -name *.src.rpm
 endif
diff -Nru drbd8-8.3.7/autogen.sh drbd8-8.4.1+git55a81dc~cmd1/autogen.sh
--- drbd8-8.3.7/autogen.sh	2010-01-13 16:04:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/autogen.sh	2012-02-02 14:09:14.000000000 +0000
@@ -8,5 +8,8 @@
 
 echo "
 suggested configure parameters:
+# prepare for rpmbuild, only generate spec files
+./configure --with-km --enable-spec
+# or prepare for direct build
 ./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc
 "
diff -Nru drbd8-8.3.7/benchmark/io-latency-test.c drbd8-8.4.1+git55a81dc~cmd1/benchmark/io-latency-test.c
--- drbd8-8.3.7/benchmark/io-latency-test.c	2009-07-27 08:47:42.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/benchmark/io-latency-test.c	2012-02-02 14:09:14.000000000 +0000
@@ -57,6 +57,7 @@
 	unsigned long record_nr;
 	unsigned int write_duration_us;
 	unsigned int write_duration_records;
+	unsigned int max_write_duration_us;
 	double avg_write_duration;
 };
 
@@ -64,7 +65,7 @@
 {
 	struct shared_data *data = (struct shared_data*) arg;
 	unsigned long last_record_nr=-1, current_record_nr=0;
-	unsigned int avg_write,wd,wr;
+	unsigned int avg_write,wd,wr,mwd;
 	double avg_write_duration;
 
 	enum { IO_RUNNING, IO_BLOCKED } io_state = IO_RUNNING;
@@ -76,8 +77,10 @@
 		current_record_nr = data->record_nr;
 		wd = data->write_duration_us;
 		wr = data->write_duration_records;
+		mwd = data->max_write_duration_us;
 		data->write_duration_us = 0;
 		data->write_duration_records = 0;
+		data->max_write_duration_us = 0;
 		avg_write_duration = data->avg_write_duration;
 		pthread_mutex_unlock(&data->mutex);
 
@@ -106,7 +109,8 @@
 			last_record_nr = current_record_nr;
 		case IO_BLOCKED:
 			if(current_record_nr != last_record_nr) {
-				printf("IO just resumed.\n");
+				printf("IO just resumed. Blocked for %d.%02dms\n",
+				       mwd/1000, (mwd%1000)/10);
 				io_state = IO_RUNNING;
 			}
 		}
@@ -216,6 +220,7 @@
 	data.record_nr = record_nr;
 	data.write_duration_us = 0;
 	data.write_duration_records = 1;
+	data.max_write_duration_us = 0;
 	pthread_create(&watch_dog,NULL,wd_thread,&data);
 
 	for( ; !records || record_nr < records ; record_nr++) {
@@ -268,6 +273,8 @@
 		data.write_duration_us += write_duration_us;
 		data.write_duration_records++;
 		data.avg_write_duration = avg_write_duration;
+		if (write_duration_us > data.max_write_duration_us)
+			data.max_write_duration_us = write_duration_us;
 		pthread_mutex_unlock(&data.mutex);
 
 		if(write_duration_us < record_time ) {
diff -Nru drbd8-8.3.7/config.log drbd8-8.4.1+git55a81dc~cmd1/config.log
--- drbd8-8.3.7/config.log	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/config.log	2012-09-03 22:37:14.000000000 +0000
@@ -1,20 +1,20 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by DRBD configure 8.3.7, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+It was created by DRBD configure 8.4.1, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
 
-  $ ./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc --with-utils --with-udev --with-xen --with-pacemaker --with-rgmanager --with-bashcompletion
+  $ ./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc
 
 ## --------- ##
 ## Platform. ##
 ## --------- ##
 
-hostname = lucid
-uname -m = i686
-uname -r = 2.6.32-13-generic-pae
+hostname = rexy
+uname -m = x86_64
+uname -r = 3.2.27
 uname -s = Linux
-uname -v = #18-Ubuntu SMP Wed Feb 10 22:52:52 UTC 2010
+uname -v = #1 SMP Sat Aug 11 17:21:58 VET 2012
 
 /usr/bin/uname -p = unknown
 /bin/uname -X     = unknown
@@ -40,111 +40,118 @@
 ## Core tests. ##
 ## ----------- ##
 
-configure:1939: checking for gcc
-configure:1955: found /usr/bin/gcc
-configure:1966: result: gcc
-configure:2195: checking for C compiler version
-configure:2204: gcc --version >&5
-gcc-4.4.real (Ubuntu 4.4.3-2ubuntu2) 4.4.3
-Copyright (C) 2010 Free Software Foundation, Inc.
+configure:1951: checking for gcc
+configure:1967: found /usr/bin/gcc
+configure:1978: result: gcc
+configure:2207: checking for C compiler version
+configure:2216: gcc --version >&5
+gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
+Copyright (C) 2011 Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
-configure:2215: $? = 0
-configure:2204: gcc -v >&5
+configure:2227: $? = 0
+configure:2216: gcc -v >&5
 Using built-in specs.
-Target: i486-linux-gnu
-Configured with: ../src/configure -v --with-pkgversion='Ubuntu 4.4.3-2ubuntu2' --with-bugurl=file:///usr/share/doc/gcc-4.4/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --enable-shared --enable-multiarch --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.4 --program-suffix=-4.4 --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-objc-gc --enable-targets=all --disable-werror --with-arch-32=i486 --with-tune=generic --enable-checking=release --build=i486-linux-gnu --host=i486-linux-gnu --target=i486-linux-gnu
+COLLECT_GCC=gcc
+COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/4.6/lto-wrapper
+Target: x86_64-linux-gnu
+Configured with: ../src/configure -v --with-pkgversion='Ubuntu/Linaro 4.6.3-1ubuntu5' --with-bugurl=file:///usr/share/doc/gcc-4.6/README.Bugs --enable-languages=c,c++,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-4.6 --enable-shared --enable-linker-build-id --with-system-zlib --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --with-gxx-include-dir=/usr/include/c++/4.6 --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --enable-gnu-unique-object --enable-plugin --enable-objc-gc --disable-werror --with-arch-32=i686 --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
 Thread model: posix
-gcc version 4.4.3 (Ubuntu 4.4.3-2ubuntu2) 
-configure:2215: $? = 0
-configure:2204: gcc -V >&5
-gcc-4.4.real: '-V' option must have argument
-configure:2215: $? = 1
-configure:2204: gcc -qversion >&5
-gcc-4.4.real: unrecognized option '-qversion'
-gcc-4.4.real: no input files
-configure:2215: $? = 1
-configure:2237: checking for C compiler default output file name
-configure:2259: gcc -Wall -g -O2  -Wl,-Bsymbolic-functions conftest.c  >&5
-configure:2263: $? = 0
-configure:2300: result: a.out
-configure:2316: checking whether the C compiler works
-configure:2325: ./a.out
-configure:2329: $? = 0
-configure:2344: result: yes
-configure:2351: checking whether we are cross compiling
-configure:2353: result: no
-configure:2356: checking for suffix of executables
-configure:2363: gcc -o conftest -Wall -g -O2  -Wl,-Bsymbolic-functions conftest.c  >&5
-configure:2367: $? = 0
-configure:2389: result: 
-configure:2395: checking for suffix of object files
-configure:2417: gcc -c -Wall -g -O2  conftest.c >&5
-configure:2421: $? = 0
-configure:2442: result: o
-configure:2446: checking whether we are using the GNU C compiler
-configure:2465: gcc -c -Wall -g -O2  conftest.c >&5
-configure:2465: $? = 0
-configure:2474: result: yes
-configure:2483: checking whether gcc accepts -g
-configure:2503: gcc -c -g  conftest.c >&5
-configure:2503: $? = 0
-configure:2544: result: yes
-configure:2561: checking for gcc option to accept ISO C89
-configure:2625: gcc  -c -Wall -g -O2  conftest.c >&5
-configure:2625: $? = 0
-configure:2638: result: none needed
-configure:2658: checking whether ln -s works
-configure:2662: result: yes
-configure:2671: checking for sed
-configure:2689: found /bin/sed
-configure:2701: result: /bin/sed
-configure:2711: checking for grep
-configure:2729: found /bin/grep
-configure:2741: result: /bin/grep
-configure:2751: checking for flex
-configure:2769: found /usr/bin/flex
-configure:2781: result: /usr/bin/flex
-configure:2791: checking for rpmbuild
-configure:2824: result: no
-configure:2831: checking for xsltproc
-configure:2849: found /usr/bin/xsltproc
-configure:2861: result: /usr/bin/xsltproc
-configure:2871: checking for tar
-configure:2889: found /bin/tar
-configure:2901: result: /bin/tar
-configure:2911: checking for git
-configure:2944: result: no
-configure:2951: checking for dpkg-buildpackage
-configure:2969: found /usr/bin/dpkg-buildpackage
-configure:2981: result: /usr/bin/dpkg-buildpackage
-configure:2991: checking for udevadm
-configure:3009: found /sbin/udevadm
-configure:3021: result: /sbin/udevadm
-configure:3031: checking for udevinfo
-configure:3064: result: no
-configure:3086: WARNING: No rpmbuild found, building RPM packages is disabled.
-configure:3101: WARNING: Cannot update buildtag without git. You may safely ignore this warning when building from a tarball.
-configure:3161: checking for /etc/gentoo-release
-configure:3174: result: no
-configure:3180: checking for /etc/redhat-release
-configure:3193: result: no
-configure:3199: checking for /etc/slackware-version
-configure:3212: result: no
-configure:3218: checking for /etc/debian_version
-configure:3231: result: yes
-configure:3237: checking for /etc/SuSE-release
-configure:3250: result: no
-configure:3309: configured for Debian (includes Ubuntu).
-configure:3504: creating ./config.status
+gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5) 
+configure:2227: $? = 0
+configure:2216: gcc -V >&5
+gcc: error: unrecognized option '-V'
+gcc: fatal error: no input files
+compilation terminated.
+configure:2227: $? = 4
+configure:2216: gcc -qversion >&5
+gcc: error: unrecognized option '-qversion'
+gcc: fatal error: no input files
+compilation terminated.
+configure:2227: $? = 4
+configure:2247: checking whether the C compiler works
+configure:2269: gcc -Wall -g -O2 -D_FORTIFY_SOURCE=2 -Wl,-Bsymbolic-functions -Wl,-z,relro conftest.c  >&5
+configure:2273: $? = 0
+configure:2322: result: yes
+configure:2325: checking for C compiler default output file name
+configure:2327: result: a.out
+configure:2333: checking for suffix of executables
+configure:2340: gcc -o conftest -Wall -g -O2 -D_FORTIFY_SOURCE=2 -Wl,-Bsymbolic-functions -Wl,-z,relro conftest.c  >&5
+configure:2344: $? = 0
+configure:2366: result: 
+configure:2388: checking whether we are cross compiling
+configure:2396: gcc -o conftest -Wall -g -O2 -D_FORTIFY_SOURCE=2 -Wl,-Bsymbolic-functions -Wl,-z,relro conftest.c  >&5
+configure:2400: $? = 0
+configure:2407: ./conftest
+configure:2411: $? = 0
+configure:2426: result: no
+configure:2431: checking for suffix of object files
+configure:2453: gcc -c -Wall -g -O2 -D_FORTIFY_SOURCE=2 conftest.c >&5
+configure:2457: $? = 0
+configure:2478: result: o
+configure:2482: checking whether we are using the GNU C compiler
+configure:2501: gcc -c -Wall -g -O2 -D_FORTIFY_SOURCE=2 conftest.c >&5
+configure:2501: $? = 0
+configure:2510: result: yes
+configure:2519: checking whether gcc accepts -g
+configure:2539: gcc -c -g -D_FORTIFY_SOURCE=2 conftest.c >&5
+configure:2539: $? = 0
+configure:2580: result: yes
+configure:2597: checking for gcc option to accept ISO C89
+configure:2661: gcc  -c -Wall -g -O2 -D_FORTIFY_SOURCE=2 conftest.c >&5
+configure:2661: $? = 0
+configure:2674: result: none needed
+configure:2694: checking whether ln -s works
+configure:2698: result: yes
+configure:2707: checking for sed
+configure:2725: found /bin/sed
+configure:2737: result: /bin/sed
+configure:2747: checking for grep
+configure:2765: found /bin/grep
+configure:2777: result: /bin/grep
+configure:2787: checking for flex
+configure:2805: found /usr/bin/flex
+configure:2817: result: /usr/bin/flex
+configure:2827: checking for rpmbuild
+configure:2860: result: no
+configure:2867: checking for xsltproc
+configure:2885: found /usr/bin/xsltproc
+configure:2897: result: /usr/bin/xsltproc
+configure:2907: checking for tar
+configure:2925: found /bin/tar
+configure:2937: result: /bin/tar
+configure:2947: checking for git
+configure:2965: found /usr/bin/git
+configure:2977: result: /usr/bin/git
+configure:2987: checking for dpkg-buildpackage
+configure:3005: found /usr/bin/dpkg-buildpackage
+configure:3017: result: /usr/bin/dpkg-buildpackage
+configure:3027: checking for udevadm
+configure:3045: found /sbin/udevadm
+configure:3058: result: /sbin/udevadm
+configure:3068: checking for udevinfo
+configure:3099: result: false
+configure:3124: WARNING: No rpmbuild found, building RPM packages is disabled.
+configure:3168: checking for /etc/gentoo-release
+configure:3181: result: no
+configure:3187: checking for /etc/redhat-release
+configure:3200: result: no
+configure:3206: checking for /etc/slackware-version
+configure:3219: result: no
+configure:3225: checking for /etc/debian_version
+configure:3238: result: yes
+configure:3244: checking for /etc/SuSE-release
+configure:3257: result: no
+configure:3316: configured for Debian (includes Ubuntu).
+configure:3522: creating ./config.status
 
 ## ---------------------- ##
 ## Running config.status. ##
 ## ---------------------- ##
 
-This file was extended by DRBD config.status 8.3.7, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+This file was extended by DRBD config.status 8.4.1, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
 
   CONFIG_FILES    = 
   CONFIG_HEADERS  = 
@@ -152,13 +159,17 @@
   CONFIG_COMMANDS = 
   $ ./config.status 
 
-on lucid
+on rexy
 
-config.status:828: creating Makefile
-config.status:828: creating user/Makefile
-config.status:828: creating scripts/Makefile
-config.status:828: creating documentation/Makefile
-config.status:828: creating user/config.h
+config.status:836: creating Makefile
+config.status:836: creating user/Makefile
+config.status:836: creating user/legacy/Makefile
+config.status:836: creating scripts/Makefile
+config.status:836: creating documentation/Makefile
+config.status:836: creating user/config.h
+config.status:997: user/config.h is unchanged
+config.status:836: creating user/legacy/config.h
+config.status:997: user/legacy/config.h is unchanged
 
 ## ---------------- ##
 ## Cache variables. ##
@@ -170,9 +181,9 @@
 ac_cv_env_CFLAGS_set=set
 ac_cv_env_CFLAGS_value='-Wall -g -O2'
 ac_cv_env_CPPFLAGS_set=set
-ac_cv_env_CPPFLAGS_value=
+ac_cv_env_CPPFLAGS_value=-D_FORTIFY_SOURCE=2
 ac_cv_env_LDFLAGS_set=set
-ac_cv_env_LDFLAGS_value=-Wl,-Bsymbolic-functions
+ac_cv_env_LDFLAGS_value='-Wl,-Bsymbolic-functions -Wl,-z,relro'
 ac_cv_env_LIBS_set=
 ac_cv_env_LIBS_value=
 ac_cv_env_build_alias_set=
@@ -189,10 +200,12 @@
 ac_cv_objext=o
 ac_cv_path_DPKG_BUILDPACKAGE=/usr/bin/dpkg-buildpackage
 ac_cv_path_FLEX=/usr/bin/flex
+ac_cv_path_GIT=/usr/bin/git
 ac_cv_path_GREP=/bin/grep
 ac_cv_path_SED=/bin/sed
 ac_cv_path_TAR=/bin/tar
 ac_cv_path_UDEVADM=/sbin/udevadm
+ac_cv_path_UDEVINFO=false
 ac_cv_path_XSLTPROC=/usr/bin/xsltproc
 ac_cv_prog_ac_ct_CC=gcc
 ac_cv_prog_cc_c89=
@@ -205,7 +218,7 @@
 BASH_COMPLETION_SUFFIX=''
 CC='gcc'
 CFLAGS='-Wall -g -O2'
-CPPFLAGS=''
+CPPFLAGS='-D_FORTIFY_SOURCE=2'
 DEFS='-DHAVE_CONFIG_H'
 DISTRO='debian'
 DPKG_BUILDPACKAGE='/usr/bin/dpkg-buildpackage'
@@ -214,13 +227,11 @@
 ECHO_T=''
 EXEEXT=''
 FLEX='/usr/bin/flex'
-GIT=''
+GIT='/usr/bin/git'
 GREP='/bin/grep'
 INITDIR='/etc/init.d'
 INITSCRIPT_SYMLINK=''
-KDIR=''
-KVER=''
-LDFLAGS='-Wl,-Bsymbolic-functions'
+LDFLAGS='-Wl,-Bsymbolic-functions -Wl,-z,relro'
 LIBOBJS=''
 LIBS=''
 LN_S='ln -s'
@@ -228,10 +239,10 @@
 OBJEXT='o'
 PACKAGE_BUGREPORT='drbd-dev@lists.linbit.com'
 PACKAGE_NAME='DRBD'
-PACKAGE_STRING='DRBD 8.3.7'
+PACKAGE_STRING='DRBD 8.4.1'
 PACKAGE_TARNAME='drbd'
 PACKAGE_URL=''
-PACKAGE_VERSION='8.3.7'
+PACKAGE_VERSION='8.4.1'
 PATH_SEPARATOR=':'
 RPMBUILD=''
 RPM_BUILDREQ_DEFAULT='gcc flex glibc-devel make'
@@ -249,13 +260,14 @@
 SHELL='/bin/bash'
 TAR='/bin/tar'
 UDEVADM='/sbin/udevadm'
-UDEVINFO=''
+UDEVINFO='false'
 UDEV_RULE_SUFFIX=''
 WITH_BASHCOMPLETION='yes'
 WITH_HEARTBEAT='yes'
 WITH_KM='no'
+WITH_LEGACY_UTILS='yes'
 WITH_PACEMAKER='yes'
-WITH_RGMANAGER='yes'
+WITH_RGMANAGER='no'
 WITH_UDEV='yes'
 WITH_UTILS='yes'
 WITH_XEN='yes'
@@ -294,12 +306,14 @@
 /* confdefs.h */
 #define PACKAGE_NAME "DRBD"
 #define PACKAGE_TARNAME "drbd"
-#define PACKAGE_VERSION "8.3.7"
-#define PACKAGE_STRING "DRBD 8.3.7"
+#define PACKAGE_VERSION "8.4.1"
+#define PACKAGE_STRING "DRBD 8.4.1"
 #define PACKAGE_BUGREPORT "drbd-dev@lists.linbit.com"
 #define PACKAGE_URL ""
 #define DRBD_LIB_DIR "/var/lib/drbd"
+#define DRBD_RUN_DIR "/var/run/drbd"
 #define DRBD_LOCK_DIR "/var/lock"
 #define DRBD_CONFIG_DIR "/etc"
+#define DRBD_LEGACY_83 1
 
 configure: exit 0
diff -Nru drbd8-8.3.7/config.status drbd8-8.4.1+git55a81dc~cmd1/config.status
--- drbd8-8.3.7/config.status	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/config.status	2012-09-03 22:37:14.000000000 +0000
@@ -391,8 +391,8 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by DRBD $as_me 8.3.7, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+This file was extended by DRBD $as_me 8.4.1, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -404,8 +404,8 @@
 "
 
 # Files that config.status was made for.
-config_files=" Makefile user/Makefile scripts/Makefile documentation/Makefile"
-config_headers=" user/config.h"
+config_files=" Makefile user/Makefile user/legacy/Makefile scripts/Makefile documentation/Makefile"
+config_headers=" user/config.h user/legacy/config.h"
 
 ac_cs_usage="\
 \`$as_me' instantiates files and other configuration actions
@@ -416,6 +416,7 @@
 
   -h, --help       print this help, then exit
   -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
   -q, --quiet, --silent
                    do not print progress messages
   -d, --debug      don't remove temporary files
@@ -433,16 +434,17 @@
 
 Report bugs to <drbd-dev@lists.linbit.com>."
 
+ac_cs_config="'--prefix=/usr' '--localstatedir=/var' '--sysconfdir=/etc' 'CFLAGS=-Wall -g -O2' 'LDFLAGS=-Wl,-Bsymbolic-functions -Wl,-z,relro' 'CPPFLAGS=-D_FORTIFY_SOURCE=2'"
 ac_cs_version="\
-DRBD config.status 8.3.7
-configured by ./configure, generated by GNU Autoconf 2.64,
-  with options \"'--prefix=/usr' '--localstatedir=/var' '--sysconfdir=/etc' '--with-utils' '--with-udev' '--with-xen' '--with-pacemaker' '--with-rgmanager' '--with-bashcompletion' 'CFLAGS=-Wall -g -O2' 'LDFLAGS=-Wl,-Bsymbolic-functions' 'CPPFLAGS='\"
+DRBD config.status 8.4.1
+configured by ./configure, generated by GNU Autoconf 2.65,
+  with options \"$ac_cs_config\"
 
 Copyright (C) 2009 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
-ac_pwd='/home/ivoks/Cluster/drbd8-8.3.7'
+ac_pwd='/home/ildefonso/trabajo/commandprompt/cmd/drbd/drbd-8.4.1'
 srcdir='.'
 test -n "$AWK" || AWK=awk
 # The default lists apply if the user does not specify any file.
@@ -468,6 +470,8 @@
     ac_cs_recheck=: ;;
   --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
     $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
   --debug | --debu | --deb | --de | --d | -d )
     debug=: ;;
   --file | --fil | --fi | --f )
@@ -513,7 +517,7 @@
 fi
 
 if $ac_cs_recheck; then
-  set X '/bin/bash' './configure'  '--prefix=/usr' '--localstatedir=/var' '--sysconfdir=/etc' '--with-utils' '--with-udev' '--with-xen' '--with-pacemaker' '--with-rgmanager' '--with-bashcompletion' 'CFLAGS=-Wall -g -O2' 'LDFLAGS=-Wl,-Bsymbolic-functions' 'CPPFLAGS=' $ac_configure_extra_args --no-create --no-recursion
+  set X '/bin/bash' './configure'  '--prefix=/usr' '--localstatedir=/var' '--sysconfdir=/etc' 'CFLAGS=-Wall -g -O2' 'LDFLAGS=-Wl,-Bsymbolic-functions -Wl,-z,relro' 'CPPFLAGS=-D_FORTIFY_SOURCE=2' $ac_configure_extra_args --no-create --no-recursion
   shift
   $as_echo "running CONFIG_SHELL=/bin/bash $*" >&6
   CONFIG_SHELL='/bin/bash'
@@ -537,11 +541,14 @@
   case $ac_config_target in
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
     "user/Makefile") CONFIG_FILES="$CONFIG_FILES user/Makefile" ;;
+    "user/legacy/Makefile") CONFIG_FILES="$CONFIG_FILES user/legacy/Makefile" ;;
     "scripts/Makefile") CONFIG_FILES="$CONFIG_FILES scripts/Makefile" ;;
     "documentation/Makefile") CONFIG_FILES="$CONFIG_FILES documentation/Makefile" ;;
     "user/config.h") CONFIG_HEADERS="$CONFIG_HEADERS user/config.h" ;;
+    "user/legacy/config.h") CONFIG_HEADERS="$CONFIG_HEADERS user/legacy/config.h" ;;
     "drbd.spec") CONFIG_FILES="$CONFIG_FILES drbd.spec" ;;
     "drbd-km.spec") CONFIG_FILES="$CONFIG_FILES drbd-km.spec" ;;
+    "drbd-kernel.spec") CONFIG_FILES="$CONFIG_FILES drbd-kernel.spec" ;;
 
   *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
@@ -622,12 +629,10 @@
 S["BASH_COMPLETION_SUFFIX"]=""
 S["INITDIR"]="/etc/init.d"
 S["DISTRO"]="debian"
-S["KDIR"]=""
-S["KVER"]=""
-S["UDEVINFO"]=""
+S["UDEVINFO"]="false"
 S["UDEVADM"]="/sbin/udevadm"
 S["DPKG_BUILDPACKAGE"]="/usr/bin/dpkg-buildpackage"
-S["GIT"]=""
+S["GIT"]="/usr/bin/git"
 S["TAR"]="/bin/tar"
 S["XSLTPROC"]="/usr/bin/xsltproc"
 S["RPMBUILD"]=""
@@ -638,17 +643,18 @@
 S["OBJEXT"]="o"
 S["EXEEXT"]=""
 S["ac_ct_CC"]="gcc"
-S["CPPFLAGS"]=""
-S["LDFLAGS"]="-Wl,-Bsymbolic-functions"
+S["CPPFLAGS"]="-D_FORTIFY_SOURCE=2"
+S["LDFLAGS"]="-Wl,-Bsymbolic-functions -Wl,-z,relro"
 S["CFLAGS"]="-Wall -g -O2"
 S["CC"]="gcc"
 S["WITH_BASHCOMPLETION"]="yes"
-S["WITH_RGMANAGER"]="yes"
+S["WITH_RGMANAGER"]="no"
 S["WITH_HEARTBEAT"]="yes"
 S["WITH_PACEMAKER"]="yes"
 S["WITH_XEN"]="yes"
 S["WITH_UDEV"]="yes"
 S["WITH_KM"]="no"
+S["WITH_LEGACY_UTILS"]="yes"
 S["WITH_UTILS"]="yes"
 S["target_alias"]=""
 S["host_alias"]=""
@@ -682,8 +688,8 @@
 S["exec_prefix"]="/usr"
 S["PACKAGE_URL"]=""
 S["PACKAGE_BUGREPORT"]="drbd-dev@lists.linbit.com"
-S["PACKAGE_STRING"]="DRBD 8.3.7"
-S["PACKAGE_VERSION"]="8.3.7"
+S["PACKAGE_STRING"]="DRBD 8.4.1"
+S["PACKAGE_VERSION"]="8.4.1"
 S["PACKAGE_TARNAME"]="drbd"
 S["PACKAGE_NAME"]="DRBD"
 S["PATH_SEPARATOR"]=":"
@@ -731,13 +737,15 @@
 BEGIN {
 D["PACKAGE_NAME"]=" \"DRBD\""
 D["PACKAGE_TARNAME"]=" \"drbd\""
-D["PACKAGE_VERSION"]=" \"8.3.7\""
-D["PACKAGE_STRING"]=" \"DRBD 8.3.7\""
+D["PACKAGE_VERSION"]=" \"8.4.1\""
+D["PACKAGE_STRING"]=" \"DRBD 8.4.1\""
 D["PACKAGE_BUGREPORT"]=" \"drbd-dev@lists.linbit.com\""
 D["PACKAGE_URL"]=" \"\""
 D["DRBD_LIB_DIR"]=" \"/var/lib/drbd\""
+D["DRBD_RUN_DIR"]=" \"/var/run/drbd\""
 D["DRBD_LOCK_DIR"]=" \"/var/lock\""
 D["DRBD_CONFIG_DIR"]=" \"/etc\""
+D["DRBD_LEGACY_83"]=" 1"
   for (key in D) D_is_set[key] = 1
   FS = ""
 }
diff -Nru drbd8-8.3.7/configure drbd8-8.4.1+git55a81dc~cmd1/configure
--- drbd8-8.3.7/configure	2010-01-13 16:17:27.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/configure	2012-02-02 14:09:44.000000000 +0000
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.64 for DRBD 8.3.7.
+# Generated by GNU Autoconf 2.65 for DRBD 8.4.1.
 #
 # Report bugs to <drbd-dev@lists.linbit.com>.
 #
+#
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
-# Foundation, Inc.
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+#
 #
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
@@ -526,7 +528,8 @@
 as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
 
 
-exec 7<&0 </dev/null 6>&1
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
 
 # Name of the host.
 # hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
@@ -548,8 +551,8 @@
 # Identity of this package.
 PACKAGE_NAME='DRBD'
 PACKAGE_TARNAME='drbd'
-PACKAGE_VERSION='8.3.7'
-PACKAGE_STRING='DRBD 8.3.7'
+PACKAGE_VERSION='8.4.1'
+PACKAGE_STRING='DRBD 8.4.1'
 PACKAGE_BUGREPORT='drbd-dev@lists.linbit.com'
 PACKAGE_URL=''
 
@@ -571,8 +574,6 @@
 BASH_COMPLETION_SUFFIX
 INITDIR
 DISTRO
-KDIR
-KVER
 UDEVINFO
 UDEVADM
 DPKG_BUILDPACKAGE
@@ -598,6 +599,7 @@
 WITH_XEN
 WITH_UDEV
 WITH_KM
+WITH_LEGACY_UTILS
 WITH_UTILS
 target_alias
 host_alias
@@ -641,6 +643,7 @@
 ac_user_opts='
 enable_option_checking
 with_utils
+with_legacy_utils
 with_km
 with_udev
 with_xen
@@ -1202,7 +1205,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures DRBD 8.3.7 to adapt to many kinds of systems.
+\`configure' configures DRBD 8.4.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1263,7 +1266,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of DRBD 8.3.7:";;
+     short | recursive ) echo "Configuration of DRBD 8.4.1:";;
    esac
   cat <<\_ACEOF
 
@@ -1278,6 +1281,7 @@
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
   --with-utils            Enable management utilities
+  --without-legacy_utils  Do not include legacy <= 8.3 drbdsetup/drbdadm
   --with-km               Enable kernel module
   --with-udev             Enable udev integration
   --with-xen              Enable Xen integration
@@ -1300,7 +1304,7 @@
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
   LIBS        libraries to pass to the linker, e.g. -l<library>
-  CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
 
 Use these variables to override the choices made by `configure' or to help
@@ -1369,8 +1373,8 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-DRBD configure 8.3.7
-generated by GNU Autoconf 2.64
+DRBD configure 8.4.1
+generated by GNU Autoconf 2.65
 
 Copyright (C) 2009 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
@@ -1417,15 +1421,15 @@
 	ac_retval=1
 fi
   eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  return $ac_retval
+  as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_compile
 cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by DRBD $as_me 8.3.7, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+It was created by DRBD $as_me 8.4.1, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
 
   $ $0 $@
 
@@ -1678,7 +1682,7 @@
 for ac_site_file in "$ac_site_file1" "$ac_site_file2"
 do
   test "x$ac_site_file" = xNONE && continue
-  if test -r "$ac_site_file"; then
+  if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
 $as_echo "$as_me: loading site script $ac_site_file" >&6;}
     sed 's/^/| /' "$ac_site_file" >&5
@@ -1687,9 +1691,9 @@
 done
 
 if test -r "$cache_file"; then
-  # Some versions of bash will fail to source /dev/null (special
-  # files actually), so we avoid doing that.
-  if test -f "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5
 $as_echo "$as_me: loading cache $cache_file" >&6;}
     case $cache_file in
@@ -1795,6 +1799,7 @@
 docdir="`eval echo ${docdir}`"
 
 WITH_UTILS=yes
+WITH_LEGACY_UTILS=yes
 WITH_KM=no
 WITH_UDEV=yes
 WITH_XEN=yes
@@ -1810,6 +1815,12 @@
 fi
 
 
+# Check whether --with-legacy_utils was given.
+if test "${with_legacy_utils+set}" = set; then :
+  withval=$with_legacy_utils; WITH_LEGACY_UTILS=$withval
+fi
+
+
 # Check whether --with-km was given.
 if test "${with_km+set}" = set; then :
   withval=$with_km; WITH_KM=$withval
@@ -1888,6 +1899,7 @@
 
 
 
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -2210,32 +2222,30 @@
 ... rest of stderr output deleted ...
          10q' conftest.err >conftest.er1
     cat conftest.er1 >&5
-    rm -f conftest.er1 conftest.err
   fi
+  rm -f conftest.er1 conftest.err
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }
 done
 
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <stdio.h>
+
 int
 main ()
 {
-FILE *f = fopen ("conftest.out", "w");
- return ferror (f) || fclose (f) != 0;
 
   ;
   return 0;
 }
 _ACEOF
 ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out conftest.out"
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
 # Try to create an executable without -o first, disregard a.out.
 # It will help us diagnose broken compilers, and finding out an intuition
 # of exeext.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
-$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
+$as_echo_n "checking whether the C compiler works... " >&6; }
 ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
 
 # The possible output files:
@@ -2297,10 +2307,10 @@
 else
   ac_file=''
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
-$as_echo "$ac_file" >&6; }
 if test -z "$ac_file"; then :
-  $as_echo "$as_me: failed program was:" >&5
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+$as_echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
@@ -2308,51 +2318,18 @@
 { as_fn_set_status 77
 as_fn_error "C compiler cannot create executables
 See \`config.log' for more details." "$LINENO" 5; }; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5
+$as_echo_n "checking for C compiler default output file name... " >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+$as_echo "$ac_file" >&6; }
 ac_exeext=$ac_cv_exeext
 
-# Check that the compiler produces executables we can run.  If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5
-$as_echo_n "checking whether the C compiler works... " >&6; }
-# If not cross compiling, check that we can run a simple program.
-if test "$cross_compiling" != yes; then
-  if { ac_try='./$ac_file'
-  { { case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; }; then
-    cross_compiling=no
-  else
-    if test "$cross_compiling" = maybe; then
-	cross_compiling=yes
-    else
-	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run C compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." "$LINENO" 5; }
-    fi
-  fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out conftest.out
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
 ac_clean_files=$ac_clean_files_save
-# Check that the compiler produces executables we can run.  If not, either
-# the compiler is broken, or we cross compile.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
-$as_echo_n "checking whether we are cross compiling... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
-$as_echo "$cross_compiling" >&6; }
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5
 $as_echo_n "checking for suffix of executables... " >&6; }
 if { { ac_try="$ac_link"
@@ -2385,13 +2362,72 @@
 as_fn_error "cannot compute suffix of executables: cannot compile and link
 See \`config.log' for more details." "$LINENO" 5; }
 fi
-rm -f conftest$ac_cv_exeext
+rm -f conftest conftest$ac_cv_exeext
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
 $as_echo "$ac_cv_exeext" >&6; }
 
 rm -f conftest.$ac_ext
 EXEEXT=$ac_cv_exeext
 ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main ()
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5
+$as_echo_n "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." "$LINENO" 5; }
+    fi
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+$as_echo "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
 $as_echo_n "checking for suffix of object files... " >&6; }
 if test "${ac_cv_objext+set}" = set; then :
@@ -2999,7 +3035,7 @@
   ;;
   *)
   as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
+for as_dir in /sbin$PATH_SEPARATOR$PATH
 do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
@@ -3013,6 +3049,7 @@
   done
 IFS=$as_save_IFS
 
+  test -z "$ac_cv_path_UDEVADM" && ac_cv_path_UDEVADM="false"
   ;;
 esac
 fi
@@ -3039,7 +3076,7 @@
   ;;
   *)
   as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
+for as_dir in /sbin$PATH_SEPARATOR$PATH
 do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
@@ -3053,6 +3090,7 @@
   done
 IFS=$as_save_IFS
 
+  test -z "$ac_cv_path_UDEVINFO" && ac_cv_path_UDEVINFO="false"
   ;;
 esac
 fi
@@ -3067,7 +3105,7 @@
 
 
 
-if test -z $CC; then
+if test -z "$CC"; then
    if test "$WITH_UTILS" = "yes"; then
       as_fn_error "Cannot build utils without a C compiler, either install a compiler or pass the --without-utils option." "$LINENO" 5
    fi
@@ -3095,6 +3133,7 @@
 if test -z $XSLTPROC; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Cannot build man pages without xsltproc. You may safely ignore this warning when building from a tarball." >&5
 $as_echo "$as_me: WARNING: Cannot build man pages without xsltproc. You may safely ignore this warning when building from a tarball." >&2;}
+            XSLTPROC=xsltproc
 fi
 
 if test -z $GIT; then
@@ -3102,7 +3141,7 @@
 $as_echo "$as_me: WARNING: Cannot update buildtag without git. You may safely ignore this warning when building from a tarball." >&2;}
 fi
 
-if test -z $UDEVADM && test -z $UDEVINFO; then
+if test $UDEVADM = false && test $UDEVINFO = false; then
    if test "$WITH_UDEV" = "yes"; then
      { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: udev support enabled, but neither udevadm nor udevinfo found on this system." >&5
 $as_echo "$as_me: WARNING: udev support enabled, but neither udevadm nor udevinfo found on this system." >&2;}
@@ -3111,38 +3150,6 @@
 
 
 
-if test "$WITH_KM" = "yes"; then
-   as_ac_File=`$as_echo "ac_cv_file_$KDIR/Makefile" | $as_tr_sh`
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $KDIR/Makefile" >&5
-$as_echo_n "checking for $KDIR/Makefile... " >&6; }
-if { as_var=$as_ac_File; eval "test \"\${$as_var+set}\" = set"; }; then :
-  $as_echo_n "(cached) " >&6
-else
-  test "$cross_compiling" = yes &&
-  as_fn_error "cannot check for file existence when cross compiling" "$LINENO" 5
-if test -r "$KDIR/Makefile"; then
-  eval "$as_ac_File=yes"
-else
-  eval "$as_ac_File=no"
-fi
-fi
-eval ac_res=\$$as_ac_File
-	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-eval as_val=\$$as_ac_File
-   if test "x$as_val" = x""yes; then :
-
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Unable to find a kernel Makefile in $KDIR. You will have to set KDIR correctly when invoking make." >&5
-$as_echo "$as_me: WARNING: Unable to find a kernel Makefile in $KDIR. You will have to set KDIR correctly when invoking make." >&2;}
-fi
-
-   KVER="`uname -r`"
-   KDIR="/lib/modules/$KVER/build"
-fi
-
-
-
 BASH_COMPLETION_SUFFIX=""
 UDEV_RULE_SUFFIX=""
 RPM_DIST_TAG=""
@@ -3338,8 +3345,8 @@
 test -z $INITDIR && INITDIR="$sysconfdir/init.d"
 
 if test "$WITH_UDEV" = "yes"; then
-   udev_version=`$UDEVADM version 2>/dev/null` || udev_version=`$UDEVINFO -V | cut -d " " -f 3` || udev_version=0
-   if test $udev_version -lt 85; then
+   udev_version=`$UDEVADM version 2>/dev/null` || udev_version=`$UDEVINFO -V | cut -d " " -f 3`
+   if test -z $udev_version || test $udev_version -lt 85; then
       UDEV_RULE_SUFFIX=".disabled"
       { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Obsolete or unknown udev version. Installing disabled udev rules." >&5
 $as_echo "$as_me: WARNING: Obsolete or unknown udev version. Installing disabled udev rules." >&2;}
@@ -3370,11 +3377,17 @@
 
 
 
+
+
 cat >>confdefs.h <<_ACEOF
 #define DRBD_LIB_DIR "$localstatedir/lib/$PACKAGE_TARNAME"
 _ACEOF
 
 cat >>confdefs.h <<_ACEOF
+#define DRBD_RUN_DIR "$localstatedir/run/$PACKAGE_TARNAME"
+_ACEOF
+
+cat >>confdefs.h <<_ACEOF
 #define DRBD_LOCK_DIR "$localstatedir/lock"
 _ACEOF
 
@@ -3383,10 +3396,15 @@
 _ACEOF
 
 
+if test "$WITH_LEGACY_UTILS" = "yes"; then
+   $as_echo "#define DRBD_LEGACY_83 1" >>confdefs.h
+
+fi
+
 if test -z $SPECMODE; then
-   ac_config_files="$ac_config_files Makefile user/Makefile scripts/Makefile documentation/Makefile"
+   ac_config_files="$ac_config_files Makefile user/Makefile user/legacy/Makefile scripts/Makefile documentation/Makefile"
 
-   ac_config_headers="$ac_config_headers user/config.h"
+   ac_config_headers="$ac_config_headers user/config.h user/legacy/config.h"
 
 else
    if test "$WITH_UTILS" = "yes"; then
@@ -3394,7 +3412,7 @@
 
    fi
    if test "$WITH_KM" = "yes"; then
-     ac_config_files="$ac_config_files drbd-km.spec"
+     ac_config_files="$ac_config_files drbd-km.spec drbd-kernel.spec"
 
    fi
 fi
@@ -3904,8 +3922,8 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by DRBD $as_me 8.3.7, which was
-generated by GNU Autoconf 2.64.  Invocation command line was
+This file was extended by DRBD $as_me 8.4.1, which was
+generated by GNU Autoconf 2.65.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -3944,6 +3962,7 @@
 
   -h, --help       print this help, then exit
   -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
   -q, --quiet, --silent
                    do not print progress messages
   -d, --debug      don't remove temporary files
@@ -3963,10 +3982,11 @@
 
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-DRBD config.status 8.3.7
-configured by $0, generated by GNU Autoconf 2.64,
-  with options \\"`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+DRBD config.status 8.4.1
+configured by $0, generated by GNU Autoconf 2.65,
+  with options \\"\$ac_cs_config\\"
 
 Copyright (C) 2009 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
@@ -4001,6 +4021,8 @@
     ac_cs_recheck=: ;;
   --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
     $as_echo "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    $as_echo "$ac_cs_config"; exit ;;
   --debug | --debu | --deb | --de | --d | -d )
     debug=: ;;
   --file | --fil | --fi | --f )
@@ -4079,11 +4101,14 @@
   case $ac_config_target in
     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
     "user/Makefile") CONFIG_FILES="$CONFIG_FILES user/Makefile" ;;
+    "user/legacy/Makefile") CONFIG_FILES="$CONFIG_FILES user/legacy/Makefile" ;;
     "scripts/Makefile") CONFIG_FILES="$CONFIG_FILES scripts/Makefile" ;;
     "documentation/Makefile") CONFIG_FILES="$CONFIG_FILES documentation/Makefile" ;;
     "user/config.h") CONFIG_HEADERS="$CONFIG_HEADERS user/config.h" ;;
+    "user/legacy/config.h") CONFIG_HEADERS="$CONFIG_HEADERS user/legacy/config.h" ;;
     "drbd.spec") CONFIG_FILES="$CONFIG_FILES drbd.spec" ;;
     "drbd-km.spec") CONFIG_FILES="$CONFIG_FILES drbd-km.spec" ;;
+    "drbd-kernel.spec") CONFIG_FILES="$CONFIG_FILES drbd-kernel.spec" ;;
 
   *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
@@ -4186,7 +4211,7 @@
 t delim
 :nl
 h
-s/\(.\{148\}\).*/\1/
+s/\(.\{148\}\)..*/\1/
 t more1
 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
 p
@@ -4200,7 +4225,7 @@
 t nl
 :delim
 h
-s/\(.\{148\}\).*/\1/
+s/\(.\{148\}\)..*/\1/
 t more2
 s/["\\]/\\&/g; s/^/"/; s/$/"/
 p
diff -Nru drbd8-8.3.7/configure.ac drbd8-8.4.1+git55a81dc~cmd1/configure.ac
--- drbd8-8.3.7/configure.ac	2010-01-13 16:14:13.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/configure.ac	2012-02-02 14:09:14.000000000 +0000
@@ -7,7 +7,7 @@
 AC_PREREQ(2.53)
 
 dnl What we are, our version, who to bug in case of problems
-AC_INIT(DRBD, 8.3.7, [drbd-dev@lists.linbit.com])
+AC_INIT(DRBD, 8.4.1, [drbd-dev@lists.linbit.com])
 
 dnl Sanitize $prefix. Autoconf does this by itself, but so late in the
 dnl generated configure script that the expansion does not occur until
@@ -41,6 +41,7 @@
 
 dnl "--with-<foo>" options (all except km enabled by default, pass --without-<foo> to disable)
 WITH_UTILS=yes
+WITH_LEGACY_UTILS=yes
 WITH_KM=no
 WITH_UDEV=yes
 WITH_XEN=yes
@@ -53,6 +54,10 @@
 	[AS_HELP_STRING([--with-utils],
 			[Enable management utilities])],
 	[WITH_UTILS=$withval])
+AC_ARG_WITH([legacy_utils],
+	[AS_HELP_STRING([--without-legacy_utils],
+			[Do not include legacy <= 8.3 drbdsetup/drbdadm])],
+	[WITH_LEGACY_UTILS=$withval])
 AC_ARG_WITH([km],
 	[AS_HELP_STRING([--with-km],
 			[Enable kernel module])],
@@ -101,6 +106,7 @@
 
 
 AC_SUBST(WITH_UTILS)
+AC_SUBST(WITH_LEGACY_UTILS)
 AC_SUBST(WITH_KM)
 AC_SUBST(WITH_UDEV)
 AC_SUBST(WITH_XEN)
@@ -121,10 +127,10 @@
 AC_PATH_PROG(TAR, tar)
 AC_PATH_PROG(GIT, git)
 AC_PATH_PROG(DPKG_BUILDPACKAGE, dpkg-buildpackage)
-AC_PATH_PROG(UDEVADM, udevadm)
-AC_PATH_PROG(UDEVINFO, udevinfo)
+AC_PATH_PROG(UDEVADM, udevadm, [false], [/sbin$PATH_SEPARATOR$PATH])
+AC_PATH_PROG(UDEVINFO, udevinfo, [false], [/sbin$PATH_SEPARATOR$PATH])
 
-if test -z $CC; then
+if test -z "$CC"; then
    if test "$WITH_UTILS" = "yes"; then
       AC_MSG_ERROR([Cannot build utils without a C compiler, either install a compiler or pass the --without-utils option.])
    fi 
@@ -149,13 +155,17 @@
 
 if test -z $XSLTPROC; then
    AC_MSG_WARN([Cannot build man pages without xsltproc. You may safely ignore this warning when building from a tarball.])
+   dnl default to some sane value at least,
+   dnl so the error message about command not found makes sense
+   dnl otherwise you get "--xinclude ... command not found" :-/
+   XSLTPROC=xsltproc
 fi
 
 if test -z $GIT; then
    AC_MSG_WARN(Cannot update buildtag without git. You may safely ignore this warning when building from a tarball.)
 fi
 
-if test -z $UDEVADM && test -z $UDEVINFO; then
+if test $UDEVADM = false && test $UDEVINFO = false; then
    if test "$WITH_UDEV" = "yes"; then
      AC_MSG_WARN([udev support enabled, but neither udevadm nor udevinfo found on this system.])
    fi
@@ -164,17 +174,6 @@
 
 dnl Checks for system services
 
-dnl figure out the kernel versin and kernel headers directory
-if test "$WITH_KM" = "yes"; then
-   AC_CHECK_FILE($KDIR/Makefile,
-		,
-      		AC_MSG_WARN([Unable to find a kernel Makefile in $KDIR. You will have to set KDIR correctly when invoking make.]))
-   KVER="`uname -r`"
-   KDIR="/lib/modules/$KVER/build"
-fi
-AC_SUBST(KVER)
-AC_SUBST(KDIR)
-
 BASH_COMPLETION_SUFFIX=""
 UDEV_RULE_SUFFIX=""
 RPM_DIST_TAG=""
@@ -262,8 +261,8 @@
 
 dnl Our udev rules file is known to work only with udev >= 85
 if test "$WITH_UDEV" = "yes"; then
-   udev_version=`$UDEVADM version 2>/dev/null` || udev_version=`$UDEVINFO -V | cut -d " " -f 3` || udev_version=0
-   if test $udev_version -lt 85; then
+   udev_version=`$UDEVADM version 2>/dev/null` || udev_version=`$UDEVINFO -V | cut -d " " -f 3`
+   if test -z $udev_version || test $udev_version -lt 85; then
       UDEV_RULE_SUFFIX=".disabled"
       AC_MSG_WARN([Obsolete or unknown udev version. Installing disabled udev rules.])
    fi
@@ -293,27 +292,35 @@
 
 AH_TEMPLATE(DRBD_LIB_DIR, [Local state directory. Commonly
 			  /var/lib/drbd or /usr/local/var/lib/drbd])
+AH_TEMPLATE(DRBD_RUN_DIR, [Runtime state directory. Commonly
+			  /var/run/drbd or /usr/local/var/run/drbd])
 AH_TEMPLATE(DRBD_LOCK_DIR, [Local lock directory. Commonly
 			   /var/lock or
 			   /usr/local/var/lock])
 AH_TEMPLATE(DRBD_CONFIG_DIR, [Local configuration directory. Commonly
 			   /etc or
 			   /usr/local/etc])
+AH_TEMPLATE(DRBD_LEGACY_83, [Include support for drbd-8.3 kernel code])
 
 AC_DEFINE_UNQUOTED(DRBD_LIB_DIR, ["$localstatedir/lib/$PACKAGE_TARNAME"])
+AC_DEFINE_UNQUOTED(DRBD_RUN_DIR, ["$localstatedir/run/$PACKAGE_TARNAME"])
 AC_DEFINE_UNQUOTED(DRBD_LOCK_DIR, ["$localstatedir/lock"])
 AC_DEFINE_UNQUOTED(DRBD_CONFIG_DIR, ["$sysconfdir"])
 
+if test "$WITH_LEGACY_UTILS" = "yes"; then
+   AC_DEFINE(DRBD_LEGACY_83, [1])
+fi
+
 dnl The configuration files we create (from their .in template)
 if test -z $SPECMODE; then
-   AC_CONFIG_FILES(Makefile user/Makefile scripts/Makefile documentation/Makefile)
-   AC_CONFIG_HEADERS(user/config.h)
+   AC_CONFIG_FILES(Makefile user/Makefile user/legacy/Makefile scripts/Makefile documentation/Makefile)
+   AC_CONFIG_HEADERS(user/config.h user/legacy/config.h)
 else
    if test "$WITH_UTILS" = "yes"; then
      AC_CONFIG_FILES(drbd.spec)
    fi
    if test "$WITH_KM" = "yes"; then
-     AC_CONFIG_FILES(drbd-km.spec)
+     AC_CONFIG_FILES(drbd-km.spec drbd-kernel.spec)
    fi
 fi
 
diff -Nru drbd8-8.3.7/debian/README.Debian drbd8-8.4.1+git55a81dc~cmd1/debian/README.Debian
--- drbd8-8.3.7/debian/README.Debian	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/README.Debian	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,10 @@
+drbd for Debian
+---------------
+
+To make sure the default installation of drbd is non-interactive, I
+have set the default value of the inittimeout parameter to be a
+negative number.  This may not be what you would like for a production
+setup.  See the drbd.conf man page and pay special attention to the
+inittimeout, skip-wait, and load-only options.
+
+ -- David Krovich <dkrovich@csee.wvu.edu>, Tue May 25 12:47:11 2004
diff -Nru drbd8-8.3.7/debian/TODO drbd8-8.4.1+git55a81dc~cmd1/debian/TODO
--- drbd8-8.3.7/debian/TODO	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/TODO	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,4 @@
+Create lintian overrides to deal with bash not handling extglob
+syntax.  I'll also contact lintian package maintainers to make sure
+this is the correct thing to do.  Refer to #247605 in the BTS for
+background on this.
diff -Nru drbd8-8.3.7/debian/changelog drbd8-8.4.1+git55a81dc~cmd1/debian/changelog
--- drbd8-8.3.7/debian/changelog	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/changelog	2012-09-03 22:50:38.000000000 +0000
@@ -1,951 +1,221 @@
-drbd8 (2:8.3.7-1ubuntu2) lucid; urgency=low
+drbd8 (2:8.4.1+git55a81dc~cmd1-1~lucid1) lucid; urgency=low
 
-  * Drop recommends on kernel-package, we use dkms instead.
+  * Add git commit 55a81dcc13bf199cfa1bb6695267deec5adc92ca, for compatibility with older kernels.
 
- -- Steve Langasek <steve.langasek@ubuntu.com>  Thu, 15 Apr 2010 13:24:49 +0000
+ -- Jose Ildefonso Camargo Tolosa <ildefonso@commandprompt.com>  Mon, 03 Sep 2012 18:04:49 -0430
 
-drbd8 (2:8.3.7-1ubuntu1) lucid; urgency=low
-
-  * Merge from Debian, remaining Ubuntu changes:
-    - switch to DKMS:
-      + debian/control: drbd8-utils depends on drbd8-source
-      + debian/control: drbd8-source depends on dkms and kernel headers
-      + debian/dkms.conf
-      + debian/drbd8-source.postinst
-      + debian/drbd8-source.prerm
-      + debian/rules
-      + removed 10_different-kernels.dpatch; not needed with DKMS
-      + debian/drbd8-source.dirs: removed modass directory
-  * Use /usr/lib/dkms/common.postinst in debian/drbd8-source.postinst
-    (LP: #497149)
-
- -- Ante Karamatic <ivoks@ubuntu.com>  Wed, 27 Jan 2010 17:57:54 +0000
-
-drbd8 (2:8.3.7-1) unstable; urgency=low
+drbd8 (2:8.4.1-0) unstable; urgency=low
 
   * New upstream release.
-    + Correct dependencies in init.d script. (closes: 547566, #563783)
-  * Acknowledge NMU of 2:8.3.4-1.1, thanks Iustin for taking care of this!
-    (closes: #499516)
-  * Ship scripts/adjust_drbd_config_h.sh and run it before building the kernel
-    module. (closes: #551479, #552439)
-
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 18 Jan 2010 16:29:43 +0100
-
-drbd8 (2:8.3.4-1.1) unstable; urgency=low
 
-  * Non-maintainer upload.
-  * Fix watch file
-  * No longer stops the drbd resources on upgrades of the drbd8-utils
-    package, since this is not needed and shutdowns must be done by the
-    administrator. (closes: #499516)
+ -- Philipp Reisner <phil@linbit.com>  Tue, 20 Dec 2011 13:00:00 +0200
 
- -- Iustin Pop <iusty@k1024.org>  Fri, 13 Nov 2009 19:58:29 +0100
-
-drbd8 (2:8.3.4-1) unstable; urgency=low
+drbd8 (2:8.4.0-0) unstable; urgency=low
 
   * New upstream release.
 
- -- Norbert Tretkowski <nobse@debian.org>  Fri, 16 Oct 2009 09:18:11 +0200
-
-drbd8 (2:8.3.3-0ubuntu1) karmic; urgency=low
-
-  * Merge from Debian, remaining Ubuntu changes:
-    - switch to DKMS:
-      + debian/control: drbd8-utils depends on drbd8-source
-      + debian/control: drbd8-source depends on dkms and kernel headers
-      + debian/dkms.conf
-      + debian/drbd8-source.postinst
-      + debian/drbd8-source.prerm
-      + debian/rules
-     - removed 10_different-kernels.dpatch; not needed with DKMS
-   * Source update to final 8.3.3 version
-
- -- Ante Karamatic <ivoks@ubuntu.com>  Wed, 07 Oct 2009 09:47:00 +0200
-
-drbd8 (2:8.3.3~rc3-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Tue, 29 Sep 2009 21:43:01 +0200
-
-drbd8 (2:8.3.3~rc2-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 16 Sep 2009 19:17:52 +0200
-
-drbd8 (2:8.3.3~rc1-1) experimental; urgency=low
-
-  * New upstream release candidate.
+ -- Philipp Reisner <phil@linbit.com>  Mon, 18 Jul 2011 15:05:06 +0200
 
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 31 Aug 2009 11:44:25 +0200
-
-drbd8 (2:8.3.2-3) unstable; urgency=low
-
-  * Drop DKMS support for now, to get the package back into testing. 
-    (closes: #537986, #539218, #539219)
-
- -- Norbert Tretkowski <nobse@debian.org>  Sun, 16 Aug 2009 12:23:59 +0200
-
-drbd8 (2:8.3.2-2) unstable; urgency=low
-
-  * Switch to DKMS, patch from Ante Karamatić (Ubuntu).
-
- -- Norbert Tretkowski <nobse@debian.org>  Tue, 21 Jul 2009 20:17:47 +0200
-
-drbd8 (2:8.3.2-1) unstable; urgency=low
+drbd8 (2:8.3.10-0) unstable; urgency=low
 
   * New upstream release.
-  * Section of drbd8-source is kernel.
-
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 06 Jul 2009 21:17:28 +0200
-
-drbd8 (2:8.3.2~rc2-1) unstable; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Sat, 27 Jun 2009 16:28:20 +0200
-
-drbd8 (2:8.3.2~rc1-1) unstable; urgency=low
-
-  * New upstream release candidate.
-    + Make it compile on Linux 2.6.30. (closes: #533261, #533654)
-  * Update Standards-Version to 3.8.2, no changes required.
-  * Fix maintainer-script-ignores-errors lintian warning.
-
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 22 Jun 2009 20:52:22 +0200
-
-drbd8 (2:8.3.1-2) unstable; urgency=medium
-
-  * Added a new patch from Michael Prokop to fix build with kernel 2.6.28 and
-    older. (closes: #522891)
 
- -- Norbert Tretkowski <nobse@debian.org>  Fri, 03 Apr 2009 19:54:44 +0200
+ -- Philipp Reisner <phil@linbit.com>  Fri, 28 Jan 2011 11:44:13 +0200
 
-drbd8 (2:8.3.1-1) unstable; urgency=low
+drbd8 (2:8.3.9-0) unstable; urgency=low
 
   * New upstream release.
 
- -- Norbert Tretkowski <nobse@debian.org>  Fri, 27 Mar 2009 14:16:36 +0100
+ -- Philipp Reisner <phil@linbit.com>  Fri, 22 Oct 2010 15:01:16 +0200
 
-drbd8 (2:8.3.1~rc2-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 26 Mar 2009 10:52:06 +0100
-
-drbd8 (2:8.3.1~rc1-1) experimental; urgency=low
-
-  * New upstream release candidate.
-  * Drop patch 10_lsb-init-script.dpatch, merged upstream.
-
- -- Norbert Tretkowski <nobse@debian.org>  Tue, 17 Mar 2009 16:38:52 +0100
-
-drbd8 (2:8.3.0-3) unstable; urgency=low
-
-  [ Martin G. Loschwitz ]
-  * Sigh. Remove SVN-Files from debian-diff file.
-
- -- Martin Loschwitz <madkiss@debian.org>  Wed, 11 Mar 2009 17:46:00 +0100
-
-drbd8 (2:8.3.0-2) unstable; urgency=low
-
-  [ Norbert Tretkowski ]
-  * Package is now team-maintained.
-  * Use dpatch for patch management.
-  * New patch 10_lsb-init-script.dpatch to make init-script a bit more LSB
-    compliant.
-
-  [ Martin G. Loschwitz ]
-  * Add myself to the Uploaders:-Field.
-
- -- Martin Loschwitz <madkiss@debian.org>  Wed, 11 Mar 2009 16:45:00 +0100
-
-drbd8 (2:8.3.0-1) unstable; urgency=low
+drbd8 (2:8.3.8-0) unstable; urgency=low
 
   * New upstream release.
 
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 24 Dec 2008 15:05:27 +0100
-
-drbd8 (2:8.3.0~rc3-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 15 Dec 2008 10:14:37 +0100
+ -- Philipp Reisner <phil@linbit.com>  Wed,  2 Jun 2010 10:44:46 +0200
 
-drbd8 (2:8.3.0~rc2-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Sun, 07 Dec 2008 15:09:01 +0100
-
-drbd8 (2:8.3.0~rc1-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 01 Dec 2008 16:19:08 +0100
-
-drbd8 (2:8.2.7-2) experimental; urgency=low
-
-  * Merge 2:8.0.14-2.
-
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 13 Nov 2008 12:47:22 +0100
-
-drbd8 (2:8.2.7-1) experimental; urgency=low
+drbd8 (2:8.3.7-0) unstable; urgency=low
 
   * New upstream release.
-  * Merge 2:8.0.14~rc1-1 and 2:8.0.14-1.
-
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 13 Nov 2008 10:55:02 +0100
-
-drbd8 (2:8.2.7~rc2-1) experimental; urgency=low
-
-  * New upstream release candidate.
-
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 29 Oct 2008 17:34:12 +0100
-
-drbd8 (2:8.2.6-4) experimental; urgency=low
-
-  * Fix kernel panic during verify.
-
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 23 Oct 2008 08:51:09 +0200
 
-drbd8 (2:8.2.6-3) experimental; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Thu, 13 Jan 2010 13:00:00 +0200
 
-  * Merge 2:8.0.13-1 and 2:8.0.13-2.
-
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 09 Oct 2008 16:26:00 +0200
-
-drbd8 (2:8.2.6-2) experimental; urgency=low
-
-  * Fix build on Linux 2.6.26.
-
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 27 Aug 2008 11:40:53 +0200
-
-drbd8 (2:8.2.6-1) experimental; urgency=low
+drbd8 (2:8.3.6-0) unstable; urgency=low
 
   * New upstream release.
-  * Merge changes from 2:8.0.10-1, 2:8.0.11-1 and 2:8.0.12-1.
-
- -- Norbert Tretkowski <nobse@debian.org>  Tue, 17 Jun 2008 14:28:39 +0200
-
-drbd8 (2:8.2.4-1) experimental; urgency=low
-
-  * New upstream release
-  * Upload for experimental
-
- -- Philipp Hug <debian@hug.cx>  Mon, 21 Jan 2008 21:35:06 +0100
-
-drbd8 (2:8.0.14-2) unstable; urgency=low
-
-  * Drop dpatch build-dependency.
-  * Drop homepage from description.
-  * Don't ignore make clean errors.
 
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 13 Nov 2008 11:46:25 +0100
+ -- Philipp Reisner <phil@linbit.com>  Sun,  8 Nov 2009 10:04:24 +0200
 
-drbd8 (2:8.0.14-1) unstable; urgency=low
+drbd8 (2:8.3.5-0) unstable; urgency=low
 
   * New upstream release.
 
- -- Norbert Tretkowski <nobse@debian.org>  Thu, 13 Nov 2008 10:50:27 +0100
+ -- Philipp Reisner <phil@linbit.com>  Tue, 27 Oct 2009 12:30:41 +0200
 
-drbd8 (2:8.0.14~rc1-1) unstable; urgency=low
+drbd8 (2:8.3.4-12) unstable; urgency=low
 
-  * New upstream release candidate.
-  * New maintainer. (closes: #500353)
+  * Packaging makeover.
 
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 29 Oct 2008 17:43:24 +0100
+ -- Florian Haas <florian@linbit.com>  Wed, 21 Oct 2009 13:55:45 +0200
 
-drbd8 (2:8.0.13-2) unstable; urgency=low
-
-  * Run depmod from generated module package using dh_installmodules.
-    (closes: #496882)
-
- -- Norbert Tretkowski <nobse@debian.org>  Wed, 03 Sep 2008 12:07:22 +0200
-
-drbd8 (2:8.0.13-1) unstable; urgency=low
+drbd8 (2:8.3.4-0) unstable; urgency=low
 
   * New upstream release.
-    + Make it compile on Linux 2.6.26. (closes: #493145)
 
- -- Norbert Tretkowski <nobse@debian.org>  Mon, 04 Aug 2008 17:56:03 +0200
+ -- Philipp Reisner <phil@linbit.com>  Tue,  6 Oct 2009 14:32:15 +0200
 
-drbd8 (2:8.0.12-1) unstable; urgency=low
+drbd8 (2:8.3.3-0) unstable; urgency=low
 
   * New upstream release.
-    + Make it compile on Linux 2.6.25. (closes: #480418, #481992, #483676)
-  * Add myself as co-maintainer.
-
- -- Norbert Tretkowski <nobse@debian.org>  Tue, 17 Jun 2008 14:18:36 +0200
 
-drbd8 (2:8.0.11-1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Mon,  5 Oct 2009 11:25:55 +0200
 
-  * New upstream release
-
- -- Philipp Hug <debian@hug.cx>  Wed, 13 Feb 2008 19:12:53 +0100
-
-drbd8 (2:8.0.10-1) unstable; urgency=low
-
-  * New upstream release
-
- -- Philipp Hug <debian@hug.cx>  Tue, 12 Feb 2008 22:10:04 +0100
-
-drbd8 (2:8.0.8-1) unstable; urgency=low
-
-  * New upstream release
-  * Provide drbd8-module-source for compatibility
-  * Use EXTRA_CFLAGS (Closes: #461750)
-
- -- Philipp Hug <debian@hug.cx>  Mon, 21 Jan 2008 21:23:02 +0100
-
-drbd8 (2:8.0.7-1) unstable; urgency=low
-
-  * New upstream release (Closes: #449241)
-  * Integrated NMU changes (Closes: #448876)
+drbd8 (2:8.3.2-0) unstable; urgency=low
 
- -- Philipp Hug <debian@hug.cx>  Sun, 04 Nov 2007 13:46:53 +0100
+  * New upstream release.
 
-drbd8 (2:8.0.6-0.1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Fri,  3 Jul 2009 15:33:12 +0200
 
-  * Non-Maintainer upload with permission of Philipp Hug.
-  * New upstream release (Closes: #438167)
-  * Switch to debhelper 5.
-  * Rename kernel module package from drbd8-module-source to
-    drbd8-source.
-  * Compress the module source tarball with bzip2.
-  * Provide modules/drbd8 to allow parallel installation with
-    drbd0.7-module-source, and remove obsolete Conflict header.
-  * Provide own Makefiles in the module tarball for automated building
-    within linux-modules-extra-2.6 (closes: #431771)
+drbd8 (2:8.3.1-0) unstable; urgency=low
 
- -- Frederik Schüler <fs@debian.org>  Thu, 01 Nov 2007 15:13:29 +0100
+  * New upstream release.
 
-drbd8 (2:8.0.4-1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Fri, 27 Mar 2009 12:16:00 +0200
 
-  * New upstream release (Closes: #432104)
+drbd8 (2:8.3.0-0) unstable; urgency=low
 
- -- Philipp Hug <debian@hug.cx>  Sun, 08 Jul 2007 12:45:33 +0200
+  * New upstream release.
 
-drbd8 (2:8.0.3-2) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Thu, 18 Dec 2008 14:03:03 +0200
 
-  * Updated Maintainer in control.modules.in
-  * Added documentation about how to install source package
+drbd8 (8.2.7-0) unstable; urgency=low
 
- -- Philipp Hug <debian@hug.cx>  Thu, 21 Jun 2007 18:05:16 +0100
+  * New upstream release.
 
-drbd8 (2:8.0.3-1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Thu, 12 Nov 2008 10:01:00 +0200
 
-  * New upstream release
-  * Added watch file
+drbd8 (8.2.6-0) unstable; urgency=low
 
- -- Philipp Hug <debian@hug.cx>  Tue, 22 May 2007 21:59:01 +0200
+  * New upstream release.
 
-drbd8 (2:8.0.2-1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Fri, 30 May 2008 09:51:15 +0200
 
-  * New upstream release
+drbd8 (8.2.5-0) unstable; urgency=low
 
- -- Philipp Hug <debian@hug.cx>  Sat, 14 Apr 2007 19:49:41 +0200
+  * New upstream release.
 
-drbd8 (2:8.0.1-1) unstable; urgency=low
+ -- Philipp Reisner <phil@linbit.com>  Tue, 12 Feb 2008 13:37:59 +0200
 
-  * New upstream release
+drbd8 (8.2.4-0) unstable; urgency=low
+	
+  * New upstream release.
 
- -- Philipp Hug <debian@hug.cx>  Thu,  8 Mar 2007 11:50:35 +0100
+ -- Philipp Reisner <phil@linbit.com>  Fri, 11 Jan 2008 13:37:50 +0200
 
-drbd8 (2:8.0.0-1) unstable; urgency=low
+drbd8 (8.2.3-0) unstable; urgency=low
 
-  * New upstream release
-  * debian/control: updated Maintainer and Uploaders fields to match
-    reality.
+  * New upstream release.
 
- -- Philipp Hug <debian@hug.cx>  Sun, 18 Feb 2007 18:50:04 +0100
+ -- Philipp Reisner <phil@linbit.com>  Wed, 9 Jan 2008 15:27:53 +0200
 
-drbd8 (2:8.0pre5-1) unstable; urgency=low
+drbd8 (8.2.1-0) unstable; urgency=low
 
-  * New upstream release
-  * scripts/drbd: patch for LSB compliance, submitted the patch upstream.
-  * debian/rules: the documentation/Makefile 'clean' target as been
-    renamed to 'doc-clean'
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Mon,  2 Oct 2006 13:48:33 +0300
+ -- Philipp Reisner <phil@linbit.com>  Fri, 2 Nov 2007 13:10:27 +0200
 
-drbd8 (8.0-pre4-3) unstable; urgency=low
+drbd8 (8.2.0-0) unstable; urgency=low
 
-  * debian/rules: applyied patch to fix building for x86_64 on i386 from
-    Guido Guenther <agx@sigxcpu.org> and Philipp Hug <hug@abanet.ch>.
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Wed, 23 Aug 2006 00:13:22 +0300
+ -- Philipp Reisner <phil@linbit.com>  Fri, 28 Sep 2007 12:15:07 +0200
 
-drbd8 (8.0-pre4-2) unstable; urgency=low
+drbd8 (8.0.6-0) unstable; urgency=low
 
-  * debian/drbd8-module-_KVERS_.postinst.modules.in: take care of chroot
-    environments when calling depmod (closes: 381767).
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Sun, 20 Aug 2006 22:13:01 +0300
+ -- Philipp Reisner <phil@linbit.com>  Mon, 3 Sep 2007 10:00:00 +0200
 
-drbd8 (8.0-pre4-1) unstable; urgency=low
+drbd8 (8.0.5-0) unstable; urgency=low
 
-  * New upstream release
-  * debian/control: updated standards version from 3.6.2.1 to 3.7.2
-  * debian/drbd8-utils.prerm: use invoke-rc.d
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 31 Jul 2006 18:01:22 +0300
+ -- Philipp Reisner <phil@linbit.com>  Fri, 3 Aug 2007 09:34:49 +0200
 
-drbd8 (8.0-pre3-1) unstable; urgency=low
+drbd8 (8.0.4-0) unstable; urgency=low
 
-  * New upstream release
-  * debian/control.modules.in: fixed "Source" field thanks to Guido
-    Guenther <agx@sigxcpu.org> (closes #361957).
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Wed, 26 Apr 2006 11:04:54 +0200
+ -- Philipp Reisner <phil@linbit.com>  Wed, 27 Jun 2007 10:00:00 +0200
 
-drbd8 (8.0-pre2-1) unstable; urgency=low
+drbd8 (8.0.3-0) unstable; urgency=low
 
-  * New upstream release
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Thu,  6 Apr 2006 19:08:52 +0200
+ -- Philipp Reisner <phil@linbit.com>  Fri, 7 May 2007 17:10:14 +0200
 
-drbd8 (8.0-pre1-2) unstable; urgency=low
+drbd8 (8.0.2-0) unstable; urgency=low
 
-  * Renamed source from drbd to drbd8
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 27 Mar 2006 00:14:03 +0200
+ -- Philipp Reisner <phil@linbit.com>  Fri, 6 Apr 2007 21:32:39 +0200
 
-drbd (8.0-pre1-1) unstable; urgency=low
+drbd8 (8.0.1-0) unstable; urgency=low
 
-  * New upstream release
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Wed, 22 Mar 2006 12:15:03 +0300
+ -- Philipp Reisner <phil@linbit.com>  Mon, 3 Mar 2007 10:10:26 +0200
 
-drbd (0.7.17-1) unstable; urgency=low
+drbd8 (8.0.0-0) unstable; urgency=low
 
-  * New upstream release
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Wed,  8 Mar 2006 17:26:36 +0300
+ -- Philipp Reisner <phil@linbit.com>  Wed, 24 Jan 2007 16:10:09 +0200
 
-drbd (0.7.16-1) unstable; urgency=low
+drbd8 (8.0rc2-0) unstable; urgency=low
 
-  * New upstream release
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Tue, 14 Feb 2006 15:13:49 +0300
+ -- Philipp Reisner <phil@linbit.com>  Wed, 17 Jan 2007 17:30:23 +0200
 
-drbd (0.7.15-2) unstable; urgency=low
+drbd8 (8.0rc1-0) unstable; urgency=low
 
-  * debian/control: removed hard-coded dependency on libc6 thanks to
-    Adeodato Simó (closes: #349927).
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Tue, 31 Jan 2006 09:13:32 +0300
+ -- Philipp Reisner <phil@linbit.com>  Fri, 22 Dec 2006 15:19:10 +0200
 
-drbd (0.7.15-1) unstable; urgency=low
+drbd8 (8.0pre6-0) unstable; urgency=low
 
-  * New upstream release
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Tue, 20 Dec 2005 17:55:40 +0300
+ -- Philipp Reisner <phil@linbit.com>  Fri, 3 Nov 2006 15:20:54 +0200
 
-drbd (0.7.14-3) unstable; urgency=low
+drbd8 (8.0pre4-0) unstable; urgency=low
 
-  * debian/control: added explicit dependency on libc6 >= 2.3.5
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Sun, 18 Dec 2005 10:15:14 +0300
+ -- Philipp Reisner <phil@linbit.com>  Mon, 31 Jul 2006 12:04:41 +0200
 
-drbd (0.7.14-2) unstable; urgency=low
+drbd8 (8.0pre3-0) unstable; urgency=low
 
-  * debian/control: depends on debconf or debconf-2.0 (closes: #331806).
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Sat, 17 Dec 2005 10:43:22 +0300
+ -- Philipp Reisner <phil@linbit.com>  Thu, 20 Apr 2006 13:46:18 +0200
 
-drbd (0.7.14-1) unstable; urgency=low
+drbd8 (8.0-pre2-0) unstable; urgency=low
 
-  * New upstream release (closes: #310993, #338994).
-  * debian/control: added dependency to dpatch (closes: #338994).
+  * New upstream release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Fri, 16 Dec 2005 13:10:25 +0300
+ -- Philipp Reisner <phil@linbit.com>  Thu, 6 Apr 2006 17:53:56 +0200
 
-drbd (0.7.12-1) unstable; urgency=low
+drbd8 (8.0_pre1-0) unstable; urgency=low
 
-  * New upstream release
+  * New major release.
 
- -- Cyril Bouthors <cyril@bouthors.org>  Sat, 27 Aug 2005 18:25:47 +0300
+ -- Philipp Reisner <phil@linbit.com>  Thu, 14 Mar 2006 11:37:56 +0200
 
-drbd (0.7.11-1) unstable; urgency=low
+drbd (0.7.13-0) unstable; urgency=low
 
   * New upstream release
 
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 20 Jun 2005 15:49:40 +0300
-
-drbd (0.7.10-4) unstable; urgency=low
-
-  * debian/control: added missing dependency to dpatch for
-    drbd0.7-module-source (closes: #308295).
-  * debian/control: updated Maintainer and Uploaders fields to match
-    reality.
-
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 30 May 2005 11:22:46 +0300
-
-drbd (0.7.10-3) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - scripts/drbd: explicit modprobe and rmmod pathnames
-       (initscript_explicit_pathname.patch) (closes: #303060, #302556).
-
- -- Cyril Bouthors <cyril@bouthors.org>  Sun, 17 Apr 2005 18:08:30 +0300
-
-drbd (0.7.10-2) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - debian/drbd0.7-utils.prerm: silently ignore the initscript return
-       code if we remove or deconfigure the package or carefully pay
-       attention to it if we upgrade the package. (closes: #295533).
-     - debian/control: fixed drbd0.7-module-source description.
-
- -- Cyril Bouthors <cyril@bouthors.org>  Wed, 16 Feb 2005 21:05:51 +0100
-
-drbd (0.7.10-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - New upstream release
-
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 31 Jan 2005 17:29:27 +0300
-
-drbd (0.7.9-2) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - Applied patch from Lars Marowsky-Bree <lmb@suse.de> that fixes a
-       "severe [...] memory corruption bug [...]".
-
- -- Cyril Bouthors <cyril@bouthors.org>  Thu, 27 Jan 2005 13:55:00 +0300
-
-drbd (0.7.9-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - New upstream release
-
- -- Cyril Bouthors <cyril@bouthors.org>  Thu, 27 Jan 2005 11:35:19 +0300
-
-drbd (0.7.8-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - New upstream release
-
- -- Cyril Bouthors <cyril@bouthors.org>  Mon, 17 Jan 2005 18:50:49 +0400
-
-drbd (0.7.7-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - New upstream release
-
- -- Cyril Bouthors <cyril@bouthors.org>  Wed, 15 Dec 2004 17:15:35 +0300
-
-drbd (0.7.6-2) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - drbd0.7-module-source: Moved debhelper from Recommends to Depends
-
- -- Cyril Bouthors <cyril@bouthors.org>  Thu,  9 Dec 2004 20:37:41 +0300
-
-drbd (0.7.6-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-     - New upstream release
-     - debian/control.modules.in: updated description
-     - debian/TODO: removed
-
- -- Cyril Bouthors <cyril@bouthors.org>  Tue, 30 Nov 2004 19:43:27 +0300
-
-drbd (0.7.5-2) unstable; urgency=low
-
-  * (Philipp Hug)
-    - debian/control: Added Conflict with drbd-util and drbd-module-source
-    - debian/control.in.modules: Fixed description for kernel module
-    - debian/control.in.modules: Added Conflict line in kernel module package
-    - debian/control.in.modules: Change depends to drbd0.7-util
-    - debian/rules: don't use top-level makefile, to prevent re-creation of
-      drbd_buildtag.c
-    - call depmod in postinst of kernel module
-    - debian/rules: remove obsolete upstream ./debian/ files in clean target
-    - added module-assistant override file
-    - this version is ready for sarge (Closes: #277669)
-
- -- Philipp Hug <debian@hug.cx>  Tue, 19 Oct 2004 20:50:49 +0200
-
-drbd (0.7.5-1) unstable; urgency=low
-
-  * (Cyril Bouthors)
-    - New upstream release (closes: #276640).
-    - debian/drbd0.7.dirs: removed usr/bin (closes: #276643).
-    - debian/control: changed Section from misc to admin.
-    - scripts/drbd: prevent the "stop" target from failing if drbd is not running
-    - debian/rules: call upstream Makefile targets instead of doing hardcoded stuff, clean.
-    - debian/drbd0.7.docs: added upgrade_0.6.x_to_0.7.0.txt and upgrade_0.7.0_to_0.7.1.txt.
-    - The package drbd as been renamed to drbd0.7-utils and drbd-source as
-      been renamed to drbd0.7-module-source.  We'll introduce drbd0.6-*
-      and drbd*-module soon.
-    - Added myself as Uploader.
-
-  * (Philipp Hug)
-    - Conflict with drbd and drbd-source
-    - Fixed description for kernel modules
-    - Added Conflict line in kernel-module package
-    - Added bison, flex to Build-Depends
-    - Call 'make clean' in drbd directory
-    - Upgraded to debian-policy 3.6.1
-    - Rewritten debian/rules using module-assistant
-    - Change binary package name to drbd0.7
-    - Added myself as Uploader
-
- -- Cyril Bouthors <cyril@bouthors.org>  Sat, 16 Oct 2004 23:43:27 +0200
-
-drbd (0.7.4-1) unstable; urgency=low
-
-  * Fixed a critical bug with Linux-2.4.x and HIGHMEM!
-  * Fixed a bug that only showed up with the HIGHMEM problem on
-    Linux-2.4.x -> It caused the resync process to starve.
-  * The drbd.spec file now creates /dev/drbd in the post-install stage.
-  * Fixed support for more than 2TB storage. Now DRBD supports up to
-    3.99TB storage. It will also tell you, that it is not supported if
-    you try to set up a bigger device.
-  * Debian's build rules file now knows about the adjust_drbd_config_h.sh
-    file.
-  * DRBD_DISABLE_SENDPAGE available in drbd_config.h
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 9 Sep 2004 19:50:00 +0200
-
-drbd (0.7.3-2) unstable; urgency=low
-
-  * Fixed debian/rules: Include adjust_drbd_config_h.sh in drbd-source
-
- -- Philipp Hug <debian@hug.cx>  Tue, 31 Aug 2004 15:37:38 +0000
-
-drbd (0.7.3-1) unstable; urgency=low
-
-  * Fixed minor bugs in the handling of the generation counters.
-  * prevent possible in-kernel buffer overflow in drbd_proc.c
-  * Fixed debian's postinst script to create /dev/drbd? instead of /dev/nb?
-  * drbd status:
-    be nice to heartbeat, include "OK" in output.
-  * added FullSync meta data flag to read/write gc.pl
-  * make the RHEL3 happy (page_count no longer in mm.h, but in mm_inline.h)
-  * [Patch by Pavel Semerad]. Also use the drbd_devfs_name on Linux-2.4.x
-  * fix missing dependencies on drbd_config.h
-
- -- Philipp Reisner <phil@linbit.com>  Fri, 27 Aug 2004 15:02:00 +0200
-
-drbd (0.7.2-1) unstable; urgency=low
-
-  * Proper handling of backing storage devices that occasionally fail
-    READA (=read ahead) requests. (E.g. LVM and MD)
-  * DRBD now fails READA requests itself, if a resynchronisation is running
-    and it would need to fetch the block from its peer.
-  * "drbdadm adjust" had a race, which caused random errors. ( Missing
-    waitpid() ). Fixed now.
-  * Proper subtract SyncPause times from the syncer performance numbers.
-  * Fix to the syncer progress bar in /proc/drbd.
-  * Fix to debian build rules.
-
- -- Philipp Reisner <phil@linbit.com>  Fri, 6 Aug 2004 14:44:31 +0200
-
-drbd (0.7.1-1) unstable; urgency=low
-
-  * Upgrade instructions for 0.6.x -> 0.7.0 and 0.7.0 -> 0.7.1
-  * Workaround for XFS' IO requests with page count of zero.
-  * Handle the human and the timeout count correctly in the new init script.
-  * The implementation of the incon-degr-cmd was missing, added.
-  * Fix for integer overflow in /proc/drbd syncer progress display
-  * Longer timeouts in drbdadm for drbdsetup commands witch operate on
-    meta data.
-  * New major number 147 (officially registered at lanana.org).
-  * Added a missing w_resume_next_wg() in case we stop syncing because
-    of connection loss.
-  * Fixed a Linux-2.2-ismus in recieve_data_tail(). Should considerably
-    speed up protocols A and B.
-  * Some work on vendor kernel compatibility
-
- -- Philipp Reisner <phil@linbit.com>  Fri, 30 Jul 2004 13:50:33 +0200
-
-drbd (0.7.0-1) unstable; urgency=low
-
-  * s/WriteHint/UnplugRemote/g
-  * new module parameter major_nr to allow "arbitrary" major numbers
-  * adjusted CTH to cope with that
-  * fix copy'n'paste and conversion errors in initial bitmap handshake
-  * warning "please upgrade me" if peer speaks (PRO_VERSION+1)
-  * drbd_set_in_sync and drbd_set_out_of_sync are now macros
-    calling to __*, giving file and line information,
-    to be able to easily track causes of "strange state"s there.
-  * rs_total is now != 0 only if we actually ARE syncing.
-    it is reset
-  * when sync is done
-  * when connection is lost
-  * when storage is lost on either node
-    this way we can optimize and call drbd_set_in_sync only if rs_total != 0
-    (and it feels somewhat more clean, too)
-  * makefile adjusted to recognize svn revision and date tags
-  * updates and fixes to the test helpers and bash test cases
-
- -- Philipp Reisner <phil@linbit.com>  Fri, 16 Jul 2004 10:13:33 +0200
-
-drbd (0.7_pre10-1) unstable; urgency=low
-
-  * A fix to a generic bug in the bitmap code introduced with the -pre9
-    release (with the 64 bit work)
-  * A fix to a bug in the bitmap code only relevant for 64 bit platforms.
-  * Better 2.4.x compatibility and compatibility to 2.4.x vendor kernels.
-  * Improvements in the way to deal with incompatible protocol releases.
-  * Added the "dialog-refresh" config option.
-
-  changes up to -pre9:
-  * Re-enabled zero copy IO for protocols B and C. (Zero copy IO is not
-    used with protocol A)
-  * Implemented the unpopular user dialog in the boot process.
-  * Some fixes for Linux-2.4.x compatibility.
-  * drbd.conf man page updated
-  * Bugfixes for 64bit architectures
-  * Ensured protocol compatibility between hosts of different word sizes
-    (Tested with i386 and alpha)
-  * Support for meta-data on block devices with hardsect size != 512 Byte
-    (e.g. dasd on s390x)
-  * New debian subdir
-
- -- Lars Ellenberg <l.g.e@web.de>  Fri, 09 Jul 2004 20:00:19 +0200
-
-drbd (0.7_pre8-2) unstable; urgency=low
-
-  * fix up the modules source package
-
- -- Bernd Schubert <bernd-schubert@web.de>  Mon,  05 Jul 2004 00:57:38 -0100
-
-drbd (0.7_pre8-1) unstable; urgency=low
-
-  * initial 0.7 debian package
-
- -- Bernd Schubert <bernd-schubert@web.de>  Mon,  21 Jun 2004 19:57:38 -0400
-
-drbd (0.6.12-5) unstable; urgency=low
-
-  * Changed default drbd.conf file to set a negative inittimeout value and
-    updated the README.Debian file to reflect this change.
-    (Closes Bug#221751)
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Tue, 25 May 2004 12:51:15 -0400
-
-drbd (0.6.12-4) unstable; urgency=low
-
-  * Refactored rules file in an attempt to use binary-arch and binary-indep
-    targets more wisely. This is an attempt to fix Bug#244392.
-  * Listed /etc/ha.d/resource.d/drbd in debian/conffiles.  (Closes Bug#247606)
-  * Moved drbdsetup from /usr/bin/ to /usr/sbin.  I think I introduced this
-    when I overhauled the debian directory in the 0.6.12-1 release.
-    (Closes Bug#247607)
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Sun, 16 May 2004 15:20:59 -0400
-
-drbd (0.6.12-3) unstable; urgency=low
-
-  * After discussing with upstream, tweak /etc/init.d/drbd script so the
-    stop target works if the module is not loaded.  (Closes: Bug#243417)
-  * Put the drbd script in the /etc/ha.d/resource.d directory.  (Closes: Bug#245219)
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Thu, 22 Apr 2004 18:12:47 -0400
-
-drbd (0.6.12-2) unstable; urgency=low
-
-  * Create /dev/nb[0-7] devices in postinst script.  (Closes: Bug#221545)
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Sat, 17 Apr 2004 15:18:29 -0400
-
-drbd (0.6.12-1) unstable; urgency=low
-
-  * new upstream release.  (Closes: Bug#239804)
-  * Completely overhauled the debian/ directory.
-  * Changed sequence number in the runlevel to start at 70 and stop
-    at 08.  drbd should start after things like ssh, but before
-    heartbeat.
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Mon, 22 Mar 2004 00:04:35 -0500
-
-drbd (0.6.10-3) unstable; urgency=low
-  * Added back the drbd.postinst, drbd.postrm, and drbd.prerm scripts until
-    I figure out why they aren't being handled by dh_installinit.
-  * As of drbd-0.6.9, The drbd module no longer builds against just the
-    kernel-headers package and now needs a full kernel-source tree.
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Mon, 26 Jan 2004 00:32:49 -0500
-
-drbd (0.6.10-2) unstable; urgency=low
-
-  * noel: fixed lintian warning:
-    W: drbd: package-contains-CVS-dir usr/share/doc/drbd/HOWTO/CVS/
-    W: drbd: script-in-etc-init.d-not-registered-via-update-rc.d /etc/init.d/drbd
-
-  * Lintian/Linda fixes.
-
-  * Tweaked the drbd-0.6.10.orig.tar.gz to not have a debian/ directory in it.
-  * Stopped tweaking the copyright notice on drbd_fs.c and drbd_receiver.c.
-    I'm not sure how that got there in the first place.
-  * Removed mystery report_to_html.pl.debdiff file.
-  * Put the datadisk in the correct location.  (Closes: Bug#221544)
-
-  * Removed drbd.postinst, drbd.postrm, and drbd.prerm as they are
-    being generated by dh_installinit during the build process and do not
-    need to part of the source package.
-  * removed dependancy on automake and autoconf
-  * Changed control.modules to require debhelper >= 4.
-  * Stop settting $KSRC in the rules file.
-
-  * Removed conffiles, files, kernel-patch-wup.substvars as they are
-    unneccessary.
-  * Tightened the build dependancy on debhelper. >=4
-  * Updated Debian packages up to newest upstream version. (Closes: Bug#197906)
-  * Updated Package descriptions.  (Closes: Bug#209462)
-  * Verified support for devfs.  (Closes: Bug#203552)
-  * I'd like to become a Debian Developer and take over maintenance for
-    this package.  I'm working with Debian Devolpers on making this happen.
-
- -- David Krovich <dkrovich@csee.wvu.edu>  Tue, 20 Jan 2004 01:36:58 -0500
-
-drbd (0.6.10-1) unstable; urgency=low
-
-  * With 0.6.9 there was a bug introduced which prevented the sending
-    of ACK packets during resync. Fixed.
-  * A fix to drbdsetup's wait_connect command.
-  * Replaced all invocations of the sleep_on() family functions with the
-    invocations of the wait_event() macros. This removes lost wakup events
-    and race conditions.
-  * New implementation of drbd_wait_ee(). This makes the
-    "(BUG?) Moving bh=%p to done_ee" go away.
-  * Handle the case if vmalloc() of the bitmap fails.
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 12 Dec 2003 15:10:44 +0200
-
-drbd (0.6.9-1) unstable; urgency=low
-
-  * New module build system (using kernel source tree build system)
-  * New net section option 'ko-count'. It allows you to kick out a
-    secondary node which does no longer process data in acceptable time.
-    Its default value is 0 which disables this feature.
-  * Changing syncgroups while resync runs has shows now the correct behaviour.
-  * In case thread creations fails DRBD would deadlock on its own
-    semaphore. Fixed now.
-  * BKL is no longer used on Linux-2.4.x.
-  * Now you can stack mapping block devices like LVM2 (and maybe md) on
-    top of drbd (a one character fix).
-  * drbdsetup wait_connect on a StandAlone node looked like a timeout and
-    forced primary. fixed.
-  * if drbdsetup wait_* in fact did timeout this looked like a failed ioctl.
-    this bug was newly introduced in 0.6.8. fixed.
-  * A fix to a race in _drbd_alloc_ee(). You could trigger this race if
-    your filesystem uses a blocksize < 4K and your machine has multiple CPUs.
-    By Eric W. Biederman.
-  * A maybe bugfix regarding calls to free_page() by Eric W. Biederman.
-  * A cleanup patch to drbd_process_done_ee() by Eric W. Biederman.
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 27 Nov 2003 08:21:34 +0200
-
-drbd (0.6.8-1) unstable; urgency=low
-
-  * Two fixes to the sync-group functionality.
-
- -- Philipp Reisner <phil@linbit.com>  Mon, 20 Oct 2003 11:45:33 +0200
-
-drbd (0.6.7-1) unstable; urgency=low
-
-  * A fix to a bug that could cause data corruption if you use a
-    other blocksize than 4k to access the DRBD device.
-  * A fix to a SMP race in the syncer code. The problem was tirggered
-    when using DRBD on QLogic fiber channel adapters.
-  * Replaced various calls to sleep_on() variants with the wait_event()
-    macros. -- This removes potential (, non-critical) SMP races.
-  * This release includes the sync-group option.
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 13 Oct 2003 11:17:27 +0200
-
-drbd (0.6.6-1) unstable; urgency=low
-
-  * In the 0.6.5 release the secondary_remote command was badly broken,
-    it succeeded when it should fail silently. This is fixed now.
-  * Probabely in all previous releases, the resyncer thread did not
-    exit properly if the secondary node goes away during resync.
-    This was not fatal sind the resyncher thread did exit at soon
-    as it gets a network error. This is fixed now.
-  * Some new switches to the drbd script.
-
- -- Philipp Reisner <phil@linbit.com>  Mon, 28 Jul 2003 14:40:43 +0200
-
-drbd (0.6.5-1) unstable; urgency=low
-
-  * Improvements to the build system
-  * Now it is possible to tune the socket send buffer size via drbdsetup/
-    drbd.conf. This is especially usefull for WAN mirroring / using
-    protocol A.
-  * Compatibility code to compile DRBD under RedHat 9.0 (RH's version of
-    Linux-2.4.20)
-  * Improved sample drbd.conf file
-
- -- Philipp Reisner <phil@linbit.com>  Sun, 06 Jul 2003 13:35:00 +0100
-
-drbd (0.6.4-1) unstable; urgency=low
-
-  * Reworked build system (i.e. better Makefiles)
-  * SyncAll works forward instead of backwards. Improves performance on
-    some storage controlers.
-  * Reworked /etc/init.d/drbd script (i.e. better support of
-    different bash releases)
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 01 May 2003 21:00:00 +0100
-
-drbd (0.6.3-1) unstable; urgency=low
-
-  * Lockup of primary if secondary fails during resync. Fixed. (Stupid!)
-  * Probabely SMP only deadlock in the drop-conection code path.
-  * Improved connect code. (The old code could trap into a distributed
-    deadlock, resulting in an endless connect/disconnect loop.)
-  * The 'BitMap too small bug' was actually caused by a patch in
-    SuSE's distribution kernel. This patch makes DRBD 'more' compatible
-    with SuSE's kernel.
-  * Improved code to allocate buffers for the rsynchronisation process.
-    The old code allocated physical adjacent pages although the syncer
-    does not need them! The old code could fail under high memory pressure.
-
- -- Philipp Reisner <phil@linbit.com>  Thu, 20 Mar 2003 20:23:40 +0100
-
-drbd (0.6.2-1) unstable; urgency=low
-
-  * SMP fix in drbd_dio_end_sec()
-  * /etc/init.d/drbd knows about returncodes of fsck
-  * SUSE style rcdrbd
-  * Fixes for uninstall Target of the Makefiles.
-
- -- Philipp Reisner <phil@linbit.com>  Tue, 11 Feb 2003 15:58:49 +0100
-
-drbd (0.6.1-1) unstable; urgency=low
-
-  * Stable release
-
- -- Philipp Reisner <phil@linbit.com>  Mon, 25 Nov 2002 14:51:39 +0100
-
-drbd (0.6-1.pre16-0cvs20020909.1) unstable; urgency=low
-
-  * changed the maintainer to jan@debian.org in agreement with
-    Ard who currently doesn't work on drbd.
-  * changed name of generated drbd-module-... package to include
-    the full version number of the kernel package
-  * place generated drbd-module-... package in $(KSRC)/..
-
- -- Jan Niehusmann <jan@debian.org>  Fri, 13 Sep 2002 15:57:01 +0200
-
-drbd (0.6-1.pre16-0cvs20020909) unstable; urgency=low
-
-  * updated version
-  * strange version number because debian versioning doesn't handle
-    -pre versions sanely
-  * uploading to unstable. (Closes: Bug#130031)
-
- -- Jan Niehusmann <jan@debian.org>  Wed, 11 Sep 2002 13:10:03 +0200
-
-drbd (cvs20010511-1) unstable; urgency=low
-
-  * First deb-anized version
-
- -- Ard van Breemen <ard@telegraafnet.nl>  Fri, 11 May 2001 11:59:53 +0200
+ -- Philipp Reisner <phil@linbit.com>  Thu, 1 Sep 2005 10:00:00 +0200
diff -Nru drbd8-8.3.7/debian/compat drbd8-8.4.1+git55a81dc~cmd1/debian/compat
--- drbd8-8.3.7/debian/compat	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/compat	2012-02-02 14:09:14.000000000 +0000
@@ -1 +1 @@
-5
+4
diff -Nru drbd8-8.3.7/debian/control drbd8-8.4.1+git55a81dc~cmd1/debian/control
--- drbd8-8.3.7/debian/control	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/control	2012-02-02 14:09:14.000000000 +0000
@@ -1,45 +1,47 @@
 Source: drbd8
 Section: admin
 Priority: extra
-Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
-XSBC-Original-Maintainer: Debian DRBD Maintainers <debian-ha-maintainers@lists.alioth.debian.org>
-Uploaders: Norbert Tretkowski <nobse@debian.org>, Martin Loschwitz <madkiss@debian.org>, Philipp Hug <debian@hug.cx>
-Build-Depends: debhelper (>= 5), debconf-utils, sp, docbook-utils, bison, flex, dpatch, bzip2, dpatch
-Standards-Version: 3.8.3
-Homepage: http://www.drbd.org/
-Vcs-Browser: http://svn.debian.org/wsvn/debian-ha/drbd8/
-Vcs-Svn: svn://svn.debian.org/svn/debian-ha/drbd8/
+Maintainer: DRBD dev <drbd-dev@lists.linbit.com>
+Uploaders: Philipp Reisner <philipp.reisner@linbit.com>, Lars Ellenberg <lars.ellenberg@linbit.com>
+Build-Depends: debhelper (>= 4), debconf-utils, docbook-xml, docbook-xsl, dpatch, flex, xsltproc
+Standards-Version: 3.6.2.1
 
 Package: drbd8-utils
 Architecture: any
 Section: admin
-Depends: debconf | debconf-2.0, ${shlibs:Depends}, drbd8-source
+Depends: debconf | debconf-2.0, ${shlibs:Depends}
 Conflicts: drbd-utils
 Provides: drbd-utils
 Replaces: drbd-utils, drbd
 Suggests: heartbeat
 Description: RAID 1 over tcp/ip for Linux utilities
- Drbd is a block device which is designed to build high availability
+ DRBD is a block device which is designed to build high availability
  clusters by providing a virtual shared device which keeps disks in
  nodes synchronised using TCP/IP. This simulates RAID 1 but avoiding
  the use of uncommon hardware (shared SCSI buses or Fibre Channel).
  It is currently limited to fail-over HA clusters.
  .
- This package contains the programs that will control the drbd kernel
+ This package contains the programs that will control the DRBD kernel
  module provided in drbd-source. You will need a clustering service
  (such as heartbeat) to fully implement it.
+ .
+ Homepage: http://www.drbd.org
 
-Package: drbd8-source
+Package: drbd8-module-source
 Architecture: all
-Section: kernel
-Depends: debhelper (>= 5), dpatch, bzip2, dkms, linux-headers-server | linux-headers-generic | linux-headers
-Provides: drbd-module-source, drbd-source, drbd8-module-source
-Recommends:  dpkg-dev, debconf-utils
+Section: admin
+Depends: module-assistant, debhelper (>= 4), dpatch
+Conflicts: drbd-module-source, drbd-source
+Provides: drbd-module-source
+Replaces: drbd-module-source, drbd-source
+Recommends:  dpkg-dev, kernel-package, debconf-utils
 Description: RAID 1 over tcp/ip for Linux module source
- Drbd is a block device which is designed to build high availability
+ DRBD is a block device which is designed to build high availability
  clusters by providing a virtual shared device which keeps disks in
  nodes synchronised using TCP/IP. This simulates RAID 1 but avoiding
  the use of uncommon hardware (shared SCSI buses or Fibre Channel).
  It is currently limited to fail-over HA clusters.
  .
- This package contains the source code for the drbd kernel module.
+ This package contains the source code for the DRBD kernel module.
+ .
+ Homepage: http://www.drbd.org
diff -Nru drbd8-8.3.7/debian/control.modules.in drbd8-8.4.1+git55a81dc~cmd1/debian/control.modules.in
--- drbd8-8.3.7/debian/control.modules.in	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/control.modules.in	2012-02-02 14:09:14.000000000 +0000
@@ -1,16 +1,17 @@
 Source: drbd8
 Section: misc
 Priority: extra
-Maintainer: Philipp Hug <debian@hug.cx>
-Build-Depends: debhelper (>= 4), drbd8-source, bzip2
+Maintainer: DRBD dev <drbd-dev@lists.linbit.com>
+Uploaders: Philipp Reisner <philipp.reisner@linbit.com>, Lars Ellenberg <lars.ellenberg@linbit.com>
+Build-Depends: debhelper (>= 4), drbd8-source
 Standards-Version: 3.6.1
 
-Package: drbd8-_KVERS_
+Package: drbd8-module-_KVERS_
 Architecture: any
 Depends: drbd8-utils
-Conflicts: drbd-_KVERS_
-Provides: drbd-_KVERS_
-Replaces: drbd-_KVERS_
+Conflicts: drbd-module-_KVERS_
+Provides: drbd-module-_KVERS_
+Replaces: drbd-module-_KVERS_
 Section: misc
 Recommends: kernel-image-_KVERS_
 Description: RAID 1 over tcp/ip for Linux kernel module
diff -Nru drbd8-8.3.7/debian/copyright drbd8-8.4.1+git55a81dc~cmd1/debian/copyright
--- drbd8-8.3.7/debian/copyright	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/copyright	2012-02-02 14:09:14.000000000 +0000
@@ -3,18 +3,19 @@
 upstream.
 
 Debianization of this package was started by Ard van Breemen <ard@telegraafnet.nl>.
-Later, Jan Niehusmann <jan@debian.org> finished the packaging and made the 
+Later, Jan Niehusmann <jan@debian.org> finished the packaging and made the
 initial upload.
 
-It was downloaded from http://www.linbit.com/en/article/articleview/34/1/11/
+It was downloaded from http://oss.linbit.com/drbd/
 More information can be found at http://www.drbd.org/
 
-Drbd is free software; you can redistribute them and/or modify them under 
-the terms of the GNU General Public License as published by the Free Software 
-Foundation; either version 2 of the License, or (at your option) any later 
+DRBD was written by Philipp Reisner and Lars Ellenberg
+for LINBIT Information Technologies, http://www.linbit.com
+
+Drbd is free software; you can redistribute them and/or modify them under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
 version.
 
 On Debian systems, the complete text of the GNU General Public
 License can be found in /usr/share/common-licenses/GPL file.
-
-
diff -Nru drbd8-8.3.7/debian/dkms.conf drbd8-8.4.1+git55a81dc~cmd1/debian/dkms.conf
--- drbd8-8.3.7/debian/dkms.conf	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/dkms.conf	1970-01-01 00:00:00.000000000 +0000
@@ -1,9 +0,0 @@
-PACKAGE_NAME="drbd"
-PACKAGE_VERSION="xxxVERSIONxxx"
-AUTOINSTALL=yes
-CLEAN="make -C drbd clean KERNELDIR=$kernel_source_dir"
-MAKE="make -C drbd KERNELDIR=$kernel_source_dir"
-BUILT_MODULE_NAME[0]="drbd"
-BUILT_MODULE_LOCATION[0]="drbd"
-DEST_MODULE_LOCATION[0]="/kernel/updates"
-MODULES_CONF[0]="options drbd cn_idx=7"
diff -Nru drbd8-8.3.7/debian/drbd8-module-_KVERS_.postrm.modules.in drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-module-_KVERS_.postrm.modules.in
--- drbd8-8.3.7/debian/drbd8-module-_KVERS_.postrm.modules.in	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-module-_KVERS_.postrm.modules.in	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,14 @@
+#!/bin/sh
+set -e
+
+SYSTEMMAP=/boot/System.map-_KVERS_
+
+if [ -f $SYSTEMMAP ]
+then
+    depmod -ae -F $SYSTEMMAP _KVERS_
+elif [ "`uname -r`" = "_KVERS_" ]
+then
+    depmod -a &
+fi
+
+#DEBHELPER#
diff -Nru drbd8-8.3.7/debian/drbd8-module-source.dirs drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-module-source.dirs
--- drbd8-8.3.7/debian/drbd8-module-source.dirs	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-module-source.dirs	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,3 @@
+usr/src/modules/drbd/debian
+usr/src/modules/drbd/drbd
+usr/share/modass/overrides
diff -Nru drbd8-8.3.7/debian/drbd8-source.Makefile drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.Makefile
--- drbd8-8.3.7/debian/drbd8-source.Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.Makefile	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-obj-m = drbd/
diff -Nru drbd8-8.3.7/debian/drbd8-source.README.Debian drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.README.Debian
--- drbd8-8.3.7/debian/drbd8-source.README.Debian	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.README.Debian	1970-01-01 00:00:00.000000000 +0000
@@ -1,17 +0,0 @@
-drbd for Debian
----------------
-
-The Debian drbd-source package can be used in several ways,
-
- - Using module-assistant(1) commands provided by the module-assistant Debian
-   package:
-
-        # module-assistant auto-install drbd8
-
- - Using the make-kpkg(1) command provided by the kernel-package Debian
-   package. See the "modules_image" section of the make-kpkg(1) man page.
-
- - Unpacking /usr/src/drbd*.tar.bz2 and installing the module on your own.
-
- -- Philipp Hug <hug@debian.org>  Thu, 21 Jun 2007 18:08:00 +0000
-
diff -Nru drbd8-8.3.7/debian/drbd8-source.dirs drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.dirs
--- drbd8-8.3.7/debian/drbd8-source.dirs	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.dirs	1970-01-01 00:00:00.000000000 +0000
@@ -1,3 +0,0 @@
-usr/src/modules/drbd8/debian
-usr/src/modules/drbd8/drbd
-usr/src/modules/drbd8/scripts/
diff -Nru drbd8-8.3.7/debian/drbd8-source.drbd-Makefile drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.drbd-Makefile
--- drbd8-8.3.7/debian/drbd8-source.drbd-Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.drbd-Makefile	1970-01-01 00:00:00.000000000 +0000
@@ -1,14 +0,0 @@
-#CFLAGS_drbd_sizeof_sanity_check.o = -Wpadded # -Werror
-
-EXTRA_CFLAGS += -I$(src)
-
-obj-m := drbd.o
-
-drbd-objs  :=	drbd_buildtag.o drbd_bitmap.o drbd_proc.o \
-		drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o \
-		lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o
-
-ifndef CONFIG_CONNECTOR
-	drbd-objs += connector.o cn_queue.o
-endif
-
diff -Nru drbd8-8.3.7/debian/drbd8-source.postinst drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.postinst
--- drbd8-8.3.7/debian/drbd8-source.postinst	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.postinst	1970-01-01 00:00:00.000000000 +0000
@@ -1,48 +0,0 @@
-#!/bin/sh
-# Copyright (C) 2002-2005 Flavio Stanchina
-# Copyright (C) 2005-2006 Aric Cyr
-# Copyright (C) 2007 Mario Limonciello
-# Copyright (C) 2009 Alberto Milone
-
-set -e
-
-NAME=drbd8
-PACKAGE_NAME=$NAME-source
-CVERSION=`dpkg-query -W -f='${Version}' $PACKAGE_NAME | awk -F "-" '{print $1}' | cut -d\: -f2`
-ARCH=`dpkg --print-architecture`
-
-dkms_configure () {
-        for POSTINST in /usr/lib/dkms/common.postinst "/usr/share/$PACKAGE_NAME/postinst"; do
-                if [ -f "$POSTINST" ]; then
-                        "$POSTINST" "$NAME" "$CVERSION" "/usr/share/$PACKAGE_NAME" "$ARCH" "$2"
-                        return $?
-                fi
-                echo "WARNING: $POSTINST does not exist." >&2
-        done
-        echo "ERROR: DKMS version is too old and $PACKAGE_NAME was not" >&2
-        echo "built with legacy DKMS support." >&2
-        echo "You must either rebuild $PACKAGE_NAME with legacy postinst" >&2
-        echo "support or upgrade DKMS to a more current version." >&2
-        return 1
-}
-
-case "$1" in
-        configure)
-                dkms_configure
-        ;;
-
-        abort-upgrade|abort-remove|abort-deconfigure)
-        ;;
-
-        *)
-                echo "postinst called with unknown argument \`$1'" >&2
-                exit 1
-        ;;
-esac
-
-# dh_installdeb will replace this with shell code automatically
-# generated by other debhelper scripts.
-
-#DEBHELPER#
-
-exit 0
diff -Nru drbd8-8.3.7/debian/drbd8-source.prerm drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.prerm
--- drbd8-8.3.7/debian/drbd8-source.prerm	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-source.prerm	1970-01-01 00:00:00.000000000 +0000
@@ -1,16 +0,0 @@
-#!/bin/sh
-
-set -e
-
-PKG="drbd8"
-PKGVER=`dpkg-query -W -f='${Version}' "$PKG"-source | cut -d: -f2 | cut -f1 -d-`
-
-case "$1" in
-        remove|upgrade)
-                echo "Removing all [$PKG-$PKGVER] DKMS Modules"
-                dkms remove -m $PKG -v $PKGVER --all >/dev/null || true
-                echo "Done."
-        ;;
-esac
-
-#DEBHELPER#
diff -Nru drbd8-8.3.7/debian/drbd8-utils.dirs drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.dirs
--- drbd8-8.3.7/debian/drbd8-utils.dirs	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.dirs	2012-02-02 14:09:14.000000000 +0000
@@ -1,4 +1,3 @@
 etc
 etc/init.d
 etc/ha.d/resource.d
-etc/udev/rules.d
diff -Nru drbd8-8.3.7/debian/drbd8-utils.postinst drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.postinst
--- drbd8-8.3.7/debian/drbd8-utils.postinst	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.postinst	2012-02-02 14:09:14.000000000 +0000
@@ -1,7 +1,5 @@
 #!/bin/sh
 
-set -e
-
 if [ -x "/etc/init.d/drbd" ]; then
 	update-rc.d drbd defaults 70 8 >/dev/null
 fi
@@ -9,8 +7,12 @@
 # Make sure /dev/nb[0-7] devices exist
 # cd /dev; for a in `seq 0 7`; do MAKEDEV nb$a; done
 
-for i in `seq 0 15` ; do 
-    test -b /dev/drbd$i || mknod -m 0660 /dev/drbd$i b 147 $i; 
-done
+if [ -d /etc/udev/rules.d ]; then
+	echo "Udev found. Not creating device nodes."
+else
+	for i in `seq 0 15` ; do
+		test -b /dev/drbd$i || mknod -m 0660 /dev/drbd$i b 147 $i;
+	done
+fi
 
 #DEBHELPER#
diff -Nru drbd8-8.3.7/debian/drbd8-utils.postrm drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.postrm
--- drbd8-8.3.7/debian/drbd8-utils.postrm	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.postrm	2012-02-02 14:09:14.000000000 +0000
@@ -1,7 +1,5 @@
 #!/bin/sh
 
-set -e
-
 if [ "$1" = "purge" ] ; then
 	update-rc.d drbd remove >/dev/null
 fi
diff -Nru drbd8-8.3.7/debian/drbd8-utils.prerm drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.prerm
--- drbd8-8.3.7/debian/drbd8-utils.prerm	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/drbd8-utils.prerm	2012-02-02 14:09:14.000000000 +0000
@@ -13,9 +13,11 @@
 then
     case "$1" in
 	remove|deconfigure)
-	    # we don't stop the drbd resources as it's not absolutely needed during
-	    # a utils upgrade and it would mean that unattended upgrades cause
-	    # outages
+	    /usr/sbin/invoke-rc.d drbd stop || true
+	    ;;
+
+	upgrade|failed-upgrade)
+	    /usr/sbin/invoke-rc.d drbd stop
 	    ;;
 
 	*)
diff -Nru drbd8-8.3.7/debian/modass.drbd8-module-source drbd8-8.4.1+git55a81dc~cmd1/debian/modass.drbd8-module-source
--- drbd8-8.3.7/debian/modass.drbd8-module-source	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/modass.drbd8-module-source	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,16 @@
+#!/bin/sh
+#
+# (c) Eduard Bloch <blade@debian.org>, 2003
+# generic maintainer script for module-assistant controled packages
+# to be sourced or copied as example code
+
+# autodetecting values. They may be overriden by the caller.
+
+MA_DIR=${MA_DIR:-/usr/share/modass}
+
+TARBALL=/usr/src/drbd8.tar.gz
+BUILDDIR=${MODULE_LOC:-/usr/src/modules}/drbd
+
+. $MA_DIR/packages/generic.sh
+
+$1 "$@"
diff -Nru drbd8-8.3.7/debian/modass.drbd8-source drbd8-8.4.1+git55a81dc~cmd1/debian/modass.drbd8-source
--- drbd8-8.3.7/debian/modass.drbd8-source	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/modass.drbd8-source	1970-01-01 00:00:00.000000000 +0000
@@ -1,247 +0,0 @@
-#!/bin/sh
-#
-# (c) Eduard Bloch <blade@debian.org>, 2003
-# generic maintainer script for module-assistant controled packages
-# to be sourced or copied as example code
-
-# autodetecting values. They may be overriden by the caller.
-
-MA_DIR=${MA_DIR:-/usr/share/modass}
-
-TARBALL=/usr/src/drbd8.tar.bz2
-BUILDDIR=${MODULE_LOC:-/usr/src/modules}/drbd8
-
-guess_source=${MA_SOURCE_PKG:-`basename $0`}
-export guess_source
-
-guess_package=${guess_source%-src}
-guess_package=${guess_package%-source}
-guess_package=${guess_package%-modules}
-guess_package=${guess_package%-driver}
-guess_package=${guess_package%-kernel}
-export guess_package
-
-topdir=${MOD_TOPDIR:-/usr/src}
-
-if test -n "$MA_DEBUG" ; then
-   set -x
-fi
-
-if [ "$TARBALL" ] ; then
-   tarball="$TARBALL"
-else
-   for suf in .tar.bz2 .tar.gz .tgz ; do
-      for presuf in "" -module -modules -driver -drivers -source -src -kernel-source -kernel-src ; do
-         if [ -r "$MOD_SRCDIR" -a -e "$MOD_SRCDIR/$guess_package$presuf$suf" ] ; then
-            tarball=$MOD_SRCDIR/$guess_package$presuf$suf
-            break 2;
-         fi
-         if [ -e /usr/src/$guess_package$presuf$suf ] ; then
-            tarball=/usr/src/$guess_package$presuf$suf
-            break 2;
-         fi
-      done
-   done
-fi
-
-MODULE_LOC=${MODULE_LOC:-/usr/src/modules}
-builddir_base=${BUILDDIR:-$MODULE_LOC/$guess_package}
-pkgprefix=${PKGPREFIX:-$guess_package} # target base name something like sl-modules
-sourcepkg=${SOURCEPKG:-$guess_source} # installed package that provides the source
-
-MA_VARDIR=${MA_VARDIR:-/var/cache/modass}
-
-if [ `id -u` != 0 ] ; then
-    if test -n "$ROOT_CMD" ; then
-        :
-    else
-        if which fakeroot >/dev/null 2>&1 ; then
-            ROOT_CMD=fakeroot
-        else
-            clear
-            echo
-            echo Warning, you are not root and fakeroot is not installed
-            sleep 3
-        fi
-    fi
-fi
-
-# and better not export ROOT_CMD, the targets in debian/rules do not
-# need to run fakeroot inside fakeroot
-
-action () {
-   if [ "$VERBOSE" ] ; then
-      echo " $@" >&2
-      "$@"
-   elif [ "$DRYRUN" ] ; then
-      echo " $@" >&2
-   else
-      "$@"
-   fi
-}
-
-locate_dir () {
-   for suf in "" -module -modules -driver -drivers -source -src -kernel-source -kernel-src -module-source -module-src -kernel; do
-      if [ -d "$builddir_base$suf/" ] ; then
-         builddir=$builddir_base$suf
-         return 0;
-      fi
-   done
-   return 1;
-}
-
-locate_dir
-
-update () {
-   export sourcepkg
-   #   action $dpkg -s $sourcepkg  2>/dev/null | grep ^Version: | cut -f2 -d\  > \
-   #   $MA_VARDIR/cache/$pkgprefix.cur_version|| rm $MA_VARDIR/cache/$pkgprefix.cur_version 
-
-   if test -e $MA_VARDIR/$sourcepkg.apt_policy ; then
-      newinfo=`cat $MA_VARDIR/$sourcepkg.apt_policy`
-   else
-      newinfo=`LANG=C apt-cache policy $sourcepkg 2>/dev/null`
-   fi
-   IFS=''
-   if test "$newinfo" ; then
-      export newinfo
-      echo -n $newinfo |tr -s " " | grep Candidate: | cut -f3 -d\  | tr -d '\n' > \
-      $MA_VARDIR/$sourcepkg.avail_version
-      instvers=$(echo -n $newinfo |tr -s " " | grep Installed: | cut -f3  -d\  | tr -d '\n')
-      if [ "$instvers" = "(none)" ] ; then
-         rm -f $MA_VARDIR/$sourcepkg.cur_version
-      else
-         echo -n $instvers > $MA_VARDIR/$sourcepkg.cur_version
-      fi
-   else
-      rm -f $MA_VARDIR/$sourcepkg.avail_version $MA_VARDIR/$sourcepkg.cur_version
-   fi
-}
-
-cur_version() {
-   cat $MA_VARDIR/$sourcepkg.cur_version 2>/dev/null
-}
-
-avail_version() {
-   cat $MA_VARDIR/$sourcepkg.avail_version
-}
-
-build() {
-   shift
-   eval `echo "$@" | tr ' ' '\n' | grep "KVERS\|KSRC\|KDREV"` 2>&1
-   logfile=$MA_VARDIR/$sourcepkg.buildlog.$KVERS.`date +%s`
-   flag=$MA_VARDIR/$sourcepkg.flag.$KVERS.`date +%s`
-   export KVERS KDREV KSRC MA_VARDIR logfile flag
-
-   ( echo Build log starting, file: $logfile ;
-     echo Date: `date -R` ;
-     echo ;
-   ) > $logfile
-
-   if test -z "$builddir" || ! test -d $builddir ; then
-      if ! unpack || ! locate_dir ; then
-         echo "Source not found. Run: module-assistant auto-install" | tee $logfile
-         exit 1;
-      fi
-   fi
-   cd $builddir || exit 1
-
-   action $ROOT_CMD debian/rules kdist_clean | tee $logfile || true
-
-   # bash cannot evaluate the return codes of the command in pipe, so
-   # make this groovy workaround. I have tried flag process and tail
-   # constructs, they all suck
-
-   ( touch $flag && action $ROOT_CMD debian/rules "$@" 2>&1 || rm $flag
-   )  | tee -a $logfile
-
-   # if flag has survived, okay, otherwise sth. failed
-   if test -f $flag ; then
-       file=`action $ROOT_CMD debian/rules echo-debfile 2>/dev/null`
-       if test -n "$file" && test -r "$file" ; then
-           echo "$file" >> $MA_VARDIR/$sourcepkg.buildstate.$KVERS
-       else
-           perl -mCwd -pe 'if (/^dpkg-deb/) { s,\.deb.*\n,.deb,; s,.*\p{Zs}[^\w./]+,,; s,//,/,g; $_=Cwd::abs_path($_)."\n";} else {undef $_}' $logfile >> $MA_VARDIR/$sourcepkg.buildstate.$KVERS
-       fi
-      # extra stuff
-      begin=`ls -l --time-style=+%s $flag | tr -s ' ' | cut -f6 -d\ `
-      echo Build time: $(expr $(date +%s) - $begin) seconds >> $logfile
-      rm -f $flag
-   else
-      tput smso ; echo BUILD FAILED! ; 
-      tput rmso ; echo See $logfile for details.
-      exit 1;
-   fi
-}
-
-lastpkg() {
-# assume that KVERS is in the environment
-   action tail -n1 $MA_VARDIR/$sourcepkg.buildstate.$KVERS 2>/dev/null
-}
-
-unpack() {
-
-   if test -n "$MA_NOTUNP" ; then return 0 ; fi
-
-   cd $topdir || exit 1
-#   test -r $target || return 1
-   if test -e "$tarball" ; then
-      if ! test -r "$tarball" ; then
-         echo "Could not read $tarball!"
-         exit 1
-      fi
-   else
-      echo "The source tarball could not be found!"
-      echo "Package $sourcepkg not installed?"
-      echo "Running \"m-a -f get $sourcepkg\" may help."
-      exit 1
-  fi
-  echo Extracting the package tarball, $tarball, please wait...
-  if [ ${tarball%gz} != $tarball ] ; then
-      action tar --gzip -x -f $tarball
-  elif [ ${tarball%bz2} != $tarball ] ; then
-      action action tar --bzip2 -x -f $tarball
-  else
-      echo Unknown compression method, $tarball
-      exit 1
-  fi
-
-  cd /usr/src/modules/drbd8/drbd/
-  ../scripts/adjust_drbd_config_h.sh
-
-}
-
-download() {
-   action $ROOT_CMD apt-get $REINSTALL install $sourcepkg
-}
-
-# wipes the builddir
-clean() {
-   rm -rf $builddir
-}
-   
-purge() {
-   action rm -f `cat $MA_VARDIR/$sourcepkg.buildstate.*`
-   action rm -rf $builddir $MA_VARDIR/$sourcepkg.*
-}
-
-installed() {
-#   action test "`cat $MA_VARDIR/$pkgprefix.cur_version` 2>/dev/null"
-#   exit $?
-  test -s $MA_VARDIR/$sourcepkg.cur_version && test -e $tarball
-  exit $?
-}
-
-prefix() {
-   echo $pkgprefix
-}
-
-echodebfile() {
-   eval `echo "$@" | tr ' ' '\n' | grep "KVERS\|KDREV\|KSRC"`
-   logfile=$MA_VARDIR/$sourcepkg.buildlog.$KVERS.`date +%s`
-   export KVERS KDREV KSRC MA_VARDIR
-   cd $builddir 2>/dev/null || exit 1
-   $ROOT_CMD debian/rules echo-debfile 2>/dev/null
-}
-
-$1 "$@"
diff -Nru drbd8-8.3.7/debian/patches/00list drbd8-8.4.1+git55a81dc~cmd1/debian/patches/00list
--- drbd8-8.3.7/debian/patches/00list	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/patches/00list	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-#10_different-kernels.dpatch
diff -Nru drbd8-8.3.7/debian/patches/10_different-kernels.dpatch drbd8-8.4.1+git55a81dc~cmd1/debian/patches/10_different-kernels.dpatch
--- drbd8-8.3.7/debian/patches/10_different-kernels.dpatch	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/patches/10_different-kernels.dpatch	1970-01-01 00:00:00.000000000 +0000
@@ -1,20 +0,0 @@
-#! /bin/sh /usr/share/dpatch/dpatch-run
-## 10_different-kernels.dpatch by Michael Prokop <mika@grml.org>
-##
-## All lines beginning with `## DP:' are a description of the patch.
-## DP: Fix build with older kernels
-
-@DPATCH@
---- drbd8-8.3.2~rc2.orig/drbd/linux/drbd_config.h	2009-06-25 15:13:04.000000000 +0200
-+++ drbd8-8.3.2~rc2/drbd/linux/drbd_config.h	2009-06-26 09:59:46.000000000 +0200
-@@ -79,6 +79,10 @@
- 
- /* 2.6.29 and up no longer have swabb.h */
- //#define HAVE_LINUX_BYTEORDER_SWABB_H
-+#include <linux/version.h>
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
-+ #define HAVE_LINUX_BYTEORDER_SWABB_H
-+#endif
- 
- /* Some vendor kernels < 2.6.7 might define msleep in one or
-  * another way .. */
diff -Nru drbd8-8.3.7/debian/rules drbd8-8.4.1+git55a81dc~cmd1/debian/rules
--- drbd8-8.3.7/debian/rules	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/rules	2012-02-02 14:09:14.000000000 +0000
@@ -14,7 +14,7 @@
 # This has to be exported to make some magic below work.
 export DH_OPTIONS
 
-PACKAGE=drbd8
+PACKAGE=drbd8-module
 MA_DIR ?= /usr/share/modass
 -include $(MA_DIR)/include/generic.make
 -include $(MA_DIR)/include/common-rules.make
@@ -30,26 +30,9 @@
 	INSTALL_PROGRAM += -s
 endif
 
-DEB_VERSION ?= $(shell dpkg-parsechangelog | egrep '^Version:' | cut -f 2 -d ' ')
-DEB_NOEPOCH_VERSION ?= $(shell echo $(DEB_VERSION) | cut -d: -f2-)
-DEB_UPSTREAM_VERSION ?= $(shell echo $(DEB_NOEPOCH_VERSION) | cut -d- -f1)
-
-# module-assistant stuff
-MAJOR=$(shell echo $(KVERS) | sed -e 's/\(...\).*/\1/')
-ifeq ($(MAJOR),2.6)
-KO=k
-endif
-
-ifeq ($(DEB_BUILD_ARCH),i386)
-ifeq ($(KPKG_ARCH),amd64)
-KBUILD_PARAMS := "CROSS_COMPILE=amd64-linux- ARCH=x86_64"
-CC=amd64-linux-gcc
-endif
-endif
-
-kdist_clean:
+kdist_clean: unpatch
 	dh_clean
-	$(MAKE) -C $(KSRC) M=$(CURDIR)/drbd/ clean 
+	-$(MAKE) -C drbd clean
 
 # prep-deb-files rewrites the debian/ files as needed. See RATIONALE for
 # details
@@ -62,31 +45,30 @@
 	dh_testdir
 	dh_testroot
 	dh_clean -k
-	make -C $(KSRC) M=$(CURDIR)/drbd/ modules
-	#install -m644 -b -D drbd/drbd.$(KO)o $(CURDIR)/debian/$(PKGNAME)/lib/modules/$(KVERS)/kernel/extra/drbd.$(KO)o
-	# this is broken, dunno why:
-	#make -C $(KSRC) M=$(CURDIR)/drbd/ modules_install INSTALL_MOD_PATH=$(CURDIR)/debian/$(PKGNAME) INSTALL_MOD_DIR=extra/
+	$(MAKE) -C drbd KERNEL_SOURCES=$(KSRC) MODVERSIONS=detect KERNEL=linux-$(KVERS) KDIR=$(KSRC)
+	install -m644 -b -D drbd/drbd.ko $(CURDIR)/debian/$(PKGNAME)/lib/modules/$(KVERS)/updates/drbd.ko
 	dh_installdocs
 	dh_installchangelogs
 	dh_compress
 	dh_fixperms
-	dh_installmodules
 	dh_installdeb
 	dh_gencontrol -- -v$(VERSION)
 	dh_md5sums
 	dh_builddeb --destdir=$(DEB_DESTDIR)
 
 #Architecture
-configure: patch
-	./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc --with-utils --with-udev --with-xen --with-pacemaker --with-rgmanager --with-bashcompletion
-
-build: configure build-arch build-indep
-
+build: patch build-arch build-indep
 
 build-arch: build-arch-stamp
-build-arch-stamp: patch
+build-arch-stamp:
 	dh_testdir
-	make
+#	build this first, so user/drbd_buildtag.c does not use the stale thing from the tgz
+	[ -f configure ] || ( autoheader && autoconf )
+	./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc
+	$(MAKE) drbd/drbd_buildtag.c
+	$(MAKE) -C user
+	$(MAKE) -C scripts
+	$(MAKE) -C documentation doc
 	touch build-arch-stamp
 
 build-indep: build-indep-stamp
@@ -99,11 +81,10 @@
 	rm -f build-arch-stamp build-indep-stamp #CONFIGURE-STAMP#
 #	remove these files from upstream tgz
 	rm -f debian/drbd8.*
-	#rm -f debian/drbd8-source.dirs
+	rm -f debian/drbd8-source.dirs
 	rm -f debian/kernel-patch-wup.kpatches debian/kernel-patch-wup.README.Debian debian/kernel-export-wup.patch
 	-$(MAKE) -C user clean
 	-$(MAKE) -C scripts clean
-	-$(MAKE) -C benchmark clean
 	-$(MAKE) -C documentation clean
 	-$(MAKE) -C drbd clean
 	dh_clean
@@ -114,28 +95,18 @@
 	dh_testroot
 	dh_clean -k -i
 	dh_installdirs -i
-	cp -a drbd/* debian/$(PACKAGE)-source/usr/src/modules/drbd8/drbd/
-	rm debian/$(PACKAGE)-source/usr/src/modules/drbd8/drbd/Makefile \
-		debian/$(PACKAGE)-source/usr/src/modules/drbd8/drbd/Makefile-2.6
-	cp debian/drbd8-source.Makefile debian/$(PACKAGE)-source/usr/src/modules/drbd8/Makefile
-	cp debian/drbd8-source.drbd-Makefile debian/$(PACKAGE)-source/usr/src/modules/drbd8/drbd/Makefile
-	cp drbd/linux/drbd_config.h debian/$(PACKAGE)-source/usr/src/modules/drbd8
-	cp -a scripts/adjust_drbd_config_h.sh debian/$(PACKAGE)-source/usr/src/modules/drbd8/scripts/
-	
+	cp -a drbd/* debian/$(PACKAGE)-source/usr/src/modules/drbd/drbd
+	cp Makefile debian/$(PACKAGE)-source/usr/src/modules/drbd
+	cp drbd/linux/drbd_config.h debian/$(PACKAGE)-source/usr/src/modules/drbd
+	mkdir debian/$(PACKAGE)-source/usr/src/modules/drbd/scripts/
+
 #	 install debian/ files
-	mkdir -p debian/$(PACKAGE)-source/usr/src/modules/drbd8/debian/
-	cd debian ; cp changelog control compat *.modules.in rules copyright $(PACKAGE)-source/usr/src/modules/drbd8/debian
-	
+	cd debian ; cp changelog control compat *.modules.in rules copyright $(PACKAGE)-source/usr/src/modules/drbd/debian
+
 #	 tar the stuff
-	cd debian/$(PACKAGE)-source/usr/src/ ; tar cjpvf drbd8.tar.bz2 modules ; rm -rf modules
-	
-	#install -m 755 $(CURDIR)/debian/modass.drbd8-source $(CURDIR)/debian/$(PACKAGE)-source/usr/share/modass/overrides/drbd8-source
-	
-	rm debian/$(PACKAGE)-source/usr/src/drbd8.tar.bz2
-	mkdir -p debian/$(PACKAGE)-source/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/
-	cp -a drbd debian/$(PACKAGE)-source/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/
-	cp debian/dkms.conf debian/$(PACKAGE)-source/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/
-	sed -i -e 's/xxxVERSIONxxx/$(DEB_UPSTREAM_VERSION)/g' debian/$(PACKAGE)-source/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/dkms.conf
+	cd debian/$(PACKAGE)-source/usr/src/ ; tar pzfvc drbd8.tar.gz modules ; rm -rf modules
+
+	install -m 755 $(CURDIR)/debian/modass.drbd8-module-source $(CURDIR)/debian/$(PACKAGE)-source/usr/share/modass/overrides/drbd8-module-source
 	dh_install -i
 
 install-arch:
@@ -143,10 +114,11 @@
 	dh_testroot
 	dh_clean -k -s
 	dh_installdirs -s
+	$(MAKE) DESTDIR=$(CURDIR)/debian/drbd8-utils/ -C user install
+	$(MAKE) DESTDIR=$(CURDIR)/debian/drbd8-utils/ -C scripts install
+	$(MAKE) DESTDIR=$(CURDIR)/debian/drbd8-utils/ -C documentation install
 
-	$(MAKE) DESTDIR=$(CURDIR)/debian/drbd8-utils/ install
-	dh_install --source=debian/drbd8-utils --list-missing -s
-
+	dh_install -s
 # Must not depend on anything. This is to be called by
 # binary-arch/binary-indep
 # in another 'make' thread.
@@ -156,22 +128,12 @@
 	dh_installchangelogs ChangeLog
 	dh_installdocs
 	dh_installexamples
-#	dh_installmenu
-#	dh_installdebconf
-#	dh_installlogrotate
-#	dh_installemacsen
-#	dh_installpam
-#	dh_installmime
 #	dh_installinit
-#	dh_installcron
-#	dh_installinfo
 	dh_installman
 	dh_link
 	dh_strip
 	dh_compress
 	dh_fixperms
-#	dh_perl
-#	dh_python
 	dh_makeshlibs
 	dh_installdeb
 	dh_shlibdeps
@@ -187,4 +149,4 @@
 	$(MAKE) -f debian/rules DH_OPTIONS=-a binary-common
 
 binary: binary-arch binary-indep
-.PHONY: build clean binary-indep binary-arch binary install install-indep install-arch unpatch
+.PHONY: build clean binary-indep binary-arch binary install install-indep install-arch patch unpatch
diff -Nru drbd8-8.3.7/debian/substvars drbd8-8.4.1+git55a81dc~cmd1/debian/substvars
--- drbd8-8.3.7/debian/substvars	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/substvars	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+shlibs:Depends=libc6 (>= 2.3.2.ds1-4)
diff -Nru drbd8-8.3.7/debian/watch drbd8-8.4.1+git55a81dc~cmd1/debian/watch
--- drbd8-8.3.7/debian/watch	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/debian/watch	1970-01-01 00:00:00.000000000 +0000
@@ -1,2 +0,0 @@
-version=3
-http://oss.linbit.com/drbd/ 8.3/drbd-(.*).tar.gz
diff -Nru drbd8-8.3.7/documentation/Makefile drbd8-8.4.1+git55a81dc~cmd1/documentation/Makefile
--- drbd8-8.3.7/documentation/Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/Makefile	2012-09-03 22:37:14.000000000 +0000
@@ -29,7 +29,6 @@
 BASH_COMPLETION_SUFFIX = 
 UDEV_RULE_SUFFIX = 
 INITDIR = /etc/init.d
-KDIR ?= 
 LIBDIR = /usr/lib/drbd
 CC = gcc
 CFLAGS = -Wall -g -O2
@@ -42,7 +41,7 @@
 WITH_XEN = yes
 WITH_PACEMAKER = yes
 WITH_HEARTBEAT = yes
-WITH_RGMANAGER = yes
+WITH_RGMANAGER = no
 WITH_BASHCOMPLETION = yes
 
 # variables meant to be overridden from the make command line
@@ -66,6 +65,15 @@
 XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS)
 XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS)
 
+DRBDSETUP_CMDS = new-resource new-minor del-resource del-minor
+DRBDSETUP_CMDS += attach connect disk-options net-options resource-options
+DRBDSETUP_CMDS += disconnect detach primary secondary verify invalidate invalidate-remote
+DRBDSETUP_CMDS += down wait-connect wait-sync role cstate dstate
+DRBDSETUP_CMDS += resize check-resize pause-sync resume-sync
+DRBDSETUP_CMDS += outdate show-gi get-gi show events
+DRBDSETUP_CMDS += suspend-io resume-io new-current-uuid
+
+
 all:
 	@echo "To (re)make the documentation: make doc"
 
@@ -102,9 +110,19 @@
 	$(FO_STYLESHEET) $<
 endif
 
+../user/drbdsetup:
+	(cd ../user; make drbdsetup)
+
+drbdsetup_xml-help_%.xml: ../user/drbdsetup
+	../user/drbdsetup xml-help $* > $@
+
+drbdsetup_%.xml: drbdsetup_xml-help_%.xml xml-usage-to-docbook.xsl
+	$(XSLTPROC) -o $@ xml-usage-to-docbook.xsl $<
+
 distclean:
 	rm -f *.[58] manpage.links manpage.refs *~ manpage.log
 	rm -f *.ps.gz *.pdf *.ps *.html pod2htm*
+	rm -f drbdsetup_*.xml
 
 #######
 
@@ -131,5 +149,4 @@
 
 ps:	$(SOURCES:.xml=.ps)
 
-
-
+drbdsetup.8: drbdsetup.xml $(patsubst %,drbdsetup_%.xml,$(DRBDSETUP_CMDS))
diff -Nru drbd8-8.3.7/documentation/Makefile.in drbd8-8.4.1+git55a81dc~cmd1/documentation/Makefile.in
--- drbd8-8.3.7/documentation/Makefile.in	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/Makefile.in	2012-02-02 14:09:14.000000000 +0000
@@ -29,7 +29,6 @@
 BASH_COMPLETION_SUFFIX = @BASH_COMPLETION_SUFFIX@
 UDEV_RULE_SUFFIX = @UDEV_RULE_SUFFIX@
 INITDIR = @INITDIR@
-KDIR ?= @KDIR@
 LIBDIR = @prefix@/lib/@PACKAGE_TARNAME@
 CC = @CC@
 CFLAGS = @CFLAGS@
@@ -66,6 +65,15 @@
 XSLTPROC_HTML_OPTIONS ?= $(XSLTPROC_OPTIONS)
 XSLTPROC_FO_OPTIONS ?= $(XSLTPROC_OPTIONS)
 
+DRBDSETUP_CMDS = new-resource new-minor del-resource del-minor
+DRBDSETUP_CMDS += attach connect disk-options net-options resource-options
+DRBDSETUP_CMDS += disconnect detach primary secondary verify invalidate invalidate-remote
+DRBDSETUP_CMDS += down wait-connect wait-sync role cstate dstate
+DRBDSETUP_CMDS += resize check-resize pause-sync resume-sync
+DRBDSETUP_CMDS += outdate show-gi get-gi show events
+DRBDSETUP_CMDS += suspend-io resume-io new-current-uuid
+
+
 all:
 	@echo "To (re)make the documentation: make doc"
 
@@ -102,9 +110,19 @@
 	$(FO_STYLESHEET) $<
 endif
 
+../user/drbdsetup:
+	(cd ../user; make drbdsetup)
+
+drbdsetup_xml-help_%.xml: ../user/drbdsetup
+	../user/drbdsetup xml-help $* > $@
+
+drbdsetup_%.xml: drbdsetup_xml-help_%.xml xml-usage-to-docbook.xsl
+	$(XSLTPROC) -o $@ xml-usage-to-docbook.xsl $<
+
 distclean:
 	rm -f *.[58] manpage.links manpage.refs *~ manpage.log
 	rm -f *.ps.gz *.pdf *.ps *.html pod2htm*
+	rm -f drbdsetup_*.xml
 
 #######
 
@@ -131,5 +149,4 @@
 
 ps:	$(SOURCES:.xml=.ps)
 
-
-
+drbdsetup.8: drbdsetup.xml $(patsubst %,drbdsetup_%.xml,$(DRBDSETUP_CMDS))
diff -Nru drbd8-8.3.7/documentation/drbd.8 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.8
--- drbd8-8.3.7/documentation/drbd.8	2010-01-13 16:17:24.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.8	2012-02-02 14:09:57.000000000 +0000
@@ -1,7 +1,7 @@
 '\" t
 .\"     Title: drbd
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
+.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
 .\"      Date: 15 Oct 2008
 .\"    Manual: System Administration
 .\"    Source: DRBD 8.3.2
@@ -9,6 +9,15 @@
 .\"
 .TH "DRBD" "8" "15 Oct 2008" "DRBD 8.3.2" "System Administration"
 .\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
 .\" disable hyphenation
diff -Nru drbd8-8.3.7/documentation/drbd.conf.5 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.conf.5
--- drbd8-8.3.7/documentation/drbd.conf.5	2010-01-13 16:17:23.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.conf.5	2012-02-02 14:09:56.000000000 +0000
@@ -1,13 +1,22 @@
 '\" t
 .\"     Title: drbd.conf
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
-.\"      Date: 5 Dec 2008
+.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
+.\"      Date: 6 May 2011
 .\"    Manual: Configuration Files
-.\"    Source: DRBD 8.3.2
+.\"    Source: DRBD 8.4.0
 .\"  Language: English
 .\"
-.TH "DRBD\&.CONF" "5" "5 Dec 2008" "DRBD 8.3.2" "Configuration Files"
+.TH "DRBD\&.CONF" "5" "6 May 2011" "DRBD 8.4.0" "Configuration Files"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
 .\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
@@ -19,7 +28,7 @@
 .\" * MAIN CONTENT STARTS HERE *
 .\" -----------------------------------------------------------------
 .SH "NAME"
-drbd.conf \- Configuration file for DRBD\'s devices .\" drbd.conf
+drbd.conf \- Configuration file for DRBD\*(Aqs devices .\" drbd.conf
 .SH "INTRODUCTION"
 .PP
 The file
@@ -32,24 +41,29 @@
 should be the same on both nodes of the cluster\&. Changes to
 \fB/etc/drbd\&.conf\fR
 do not apply immediately\&.
-.PP \fBExample\ \&1.\ \&A small drbd.conf file\fR .sp .if n \{\ .RS 4 .\} .nf global { usage\-count yes; } common { syncer { rate 10M; } } resource r0 { protocol C; net { cram\-hmac\-alg sha1; shared\-secret "FooFunFactory"; } on alice { device minor 1; disk /dev/sda7; address 10\&.1\&.1\&.31:7789; meta\-disk internal; } on bob { device minor 1; disk /dev/sda7; address 10\&.1\&.1\&.32:7789; meta\-disk internal; } } .fi .if n \{\ .RE .\}
-In this example, there is a single DRBD resource (called r0) which uses protocol C for the connection between its devices\&. The device which runs on host
+.PP
+By convention the main config contains two include statements\&. The first one includes the file
+\fB/etc/drbd\&.d/global_common\&.conf\fR, the second one all file with a
+\fB\&.res\fR
+suffix\&.
+.PP
+.PP \fBExample\ \&1.\ \&A small example.res file\fR .sp .if n \{\ .RS 4 .\} .nf resource r0 { net { protocol C; cram\-hmac\-alg sha1; shared\-secret "FooFunFactory"; } disk { resync\-rate 10M; } on alice { volume 0 { device minor 1; disk /dev/sda7; meta\-disk internal; } address 10\&.1\&.1\&.31:7789; } on bob { volume 0 { device minor 1; disk /dev/sda7; meta\-disk internal; } address 10\&.1\&.1\&.32:7789; } } .fi .if n \{\ .RE .\}In this example, there is a single DRBD resource (called r0) which uses protocol C for the connection between its devices\&. It contains a single volume which runs on host
 \fIalice\fR
 uses
 \fI/dev/drbd1\fR
 as devices for its application, and
 \fI/dev/sda7\fR
-as low\-level storage for the data\&. The IP addresses are used to specify the networking interfaces to be used\&. An eventually running resync process should use about 10MByte/second of IO bandwidth\&.
+as low\-level storage for the data\&. The IP addresses are used to specify the networking interfaces to be used\&. An eventually running resync process should use about 10MByte/second of IO bandwidth\&. This sync\-rate statement is valid for volume 0, but would also be valid for further volumes\&. In this example it assigns full 10MByte/second to each volume\&.
 .PP
 There may be multiple resource sections in a single drbd\&.conf file\&. For more examples, please have a look at the
-\m[blue]\fBDRBD User\'s Guide\fR\m[]\&\s-2\u[1]\d\s+2\&.
+\m[blue]\fBDRBD User\*(Aqs Guide\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .SH "FILE FORMAT"
 .PP
 The file consists of sections and parameters\&. A section begins with a keyword, sometimes an additional name, and an opening brace (\(lq{\(rq)\&. A section ends with a closing brace (\(lq}\(rq\&. The braces enclose the parameters\&.
 .PP
 section [name] { parameter value; [\&.\&.\&.] }
 .PP
-A parameter starts with the identifier of the parameter followed by whitespace\&. Every subsequent character is considered as part of the parameter\'s value\&. A special case are Boolean parameters which only consist of the identifier\&. Parameters are terminated by a semicolon (\(lq;\(rq)\&.
+A parameter starts with the identifier of the parameter followed by whitespace\&. Every subsequent character is considered as part of the parameter\*(Aqs value\&. A special case are Boolean parameters which consist only of the identifier\&. Parameters are terminated by a semicolon (\(lq;\(rq)\&.
 .PP
 Some parameter values have default units which might be overruled by K, M or G\&. These units are defined in the usual way (K = 2^10 = 1024, M = 1024 K, G = 1024 M)\&.
 .PP
@@ -61,7 +75,7 @@
 .\" drbd.conf: skip
 Comments out chunks of text, even spanning more than one line\&. Characters between the keyword
 \fBskip\fR
-and the opening brace (\(lq{\(rq) are ignored\&. Everything enclosed by the braces is skipped\&. This comes in handy, if you just want to comment out some \'resource [name] {\&.\&.\&.}\' section: just precede it with \'skip\'\&.
+and the opening brace (\(lq{\(rq) are ignored\&. Everything enclosed by the braces is skipped\&. This comes in handy, if you just want to comment out some \*(Aq\fBresource [name] {\&.\&.\&.}\fR\*(Aq section: just precede it with \*(Aq\(lqskip\(rq\*(Aq\&.
 .RE
 .PP
 \fBglobal\fR
@@ -81,7 +95,7 @@
 .\" drbd.conf: common
 All resources inherit the options set in this section\&. The common section might have a
 \fBstartup\fR, a
-\fBsyncer\fR, a
+\fBoptions\fR, a
 \fBhandlers\fR, a
 \fBnet\fR
 and a
@@ -96,13 +110,13 @@
 \fBon \fR\fB\fIhost\fR\fR
 sections and may have a
 \fBstartup\fR, a
-\fBsyncer\fR, a
+\fBoptions\fR, a
 \fBhandlers\fR, a
 \fBnet\fR
 and a
 \fBdisk\fR
-section\&. Required parameter in this section:
-\fBprotocol\fR\&.
+section\&. It might contain
+\fBvolume\fRs sections\&.
 .RE
 .PP
 \fBon \fR\fB\fIhost\-name\fR\fR
@@ -110,7 +124,7 @@
 .\" drbd.conf: on
 Carries the necessary configuration parameters for a DRBD device of the enclosing resource\&.
 \fIhost\-name\fR
-is mandatory and must match the Linux host name (uname \-n) of one of the nodes\&. You may list more than one host name here, in case you want to use the same parameters on several hosts (you\'d have to move the IP around usually)\&. Or you may list more than two such sections\&.
+is mandatory and must match the Linux host name (uname \-n) of one of the nodes\&. You may list more than one host name here, in case you want to use the same parameters on several hosts (you\*(Aqd have to move the IP around usually)\&. Or you may list more than two such sections\&.
 .sp
 .if n \{\
 .RS 4
@@ -142,12 +156,19 @@
 .sp
 See also the
 \fBfloating\fR
-section keyword\&. Required parameters in this section:
+section keyword\&. Required statements in this section:
+\fBaddress\fR
+and
+\fBvolume\fR\&. Note for backward compatibility and convenience it is valid to embed the statements of a single volume directly into the host section\&.
+.RE
+.PP
+\fBvolume \fR\fB\fIvnr\fR\fR
+.RS 4
+.\" drbd.conf: volume
+Defines a volume within a connection\&. The minor numbers of a replicated volume might be different on different hosts, the volume number (\fIvnr\fR) is what groups them together\&. Required parameters in this section:
 \fBdevice\fR,
 \fBdisk\fR,
-\fBaddress\fR,
-\fBmeta\-disk\fR,
-\fBflexible\-meta\-disk\fR\&.
+\fBmeta\-disk\fR\&.
 .RE
 .PP
 \fBstacked\-on\-top\-of \fR\fB\fIresource\fR\fR
@@ -173,8 +194,7 @@
 section is that the matching of the host sections to machines is done by the IP\-address instead of the node name\&. Required parameters in this section:
 \fBdevice\fR,
 \fBdisk\fR,
-\fBmeta\-disk\fR,
-\fBflexible\-meta\-disk\fR, all of which
+\fBmeta\-disk\fR, all of which
 \fImay\fR
 be inherited from the resource section, in which case you may shorten this section down to just the address identifier\&.
 .sp
@@ -202,32 +222,41 @@
 .if n \{\
 .RE
 .\}
-.sp
 .RE
 .PP
 \fBdisk\fR
 .RS 4
 .\" drbd.conf: disk
-This section is used to fine tune DRBD\'s properties in respect to the low level storage\&. Please refer to
+This section is used to fine tune DRBD\*(Aqs properties in respect to the low level storage\&. Please refer to
 \fBdrbdsetup\fR(8)
-for detailed description of the parameters\&. Optional parameter:
+for detailed description of the parameters\&. Optional parameters:
 \fBon\-io\-error\fR,
 \fBsize\fR,
 \fBfencing\fR,
-\fBuse\-bmbv\fR,
-\fBno\-disk\-barrier\fR,
-\fBno\-disk\-flushes\fR,
-\fBno\-disk\-drain\fR,
-\fBno\-md\-flushes\fR,
-\fBmax\-bio\-bvecs\fR\&.
+\fBdisk\-barrier\fR,
+\fBdisk\-flushes\fR,
+\fBdisk\-drain\fR,
+\fBmd\-flushes\fR,
+\fBmax\-bio\-bvecs\fR,
+\fBresync\-rate\fR,
+\fBresync\-after\fR,
+\fBal\-extents\fR,
+\fBc\-plan\-ahead\fR,
+\fBc\-fill\-target\fR,
+\fBc\-delay\-target\fR,
+\fBc\-max\-rate\fR,
+\fBc\-min\-rate\fR,
+\fBdisk\-timeout\fR,
+\fBread\-balancing\fR\&.
 .RE
 .PP
 \fBnet\fR
 .RS 4
 .\" drbd.conf: net
-This section is used to fine tune DRBD\'s properties\&. Please refer to
+This section is used to fine tune DRBD\*(Aqs properties\&. Please refer to
 \fBdrbdsetup\fR(8)
-for a detailed description of this section\'s parameters\&. Optional parameters:
+for a detailed description of this section\*(Aqs parameters\&. Optional parameters:
+\fBprotocol\fR,
 \fBsndbuf\-size\fR,
 \fBrcvbuf\-size\fR,
 \fBtimeout\fR,
@@ -244,15 +273,21 @@
 \fBafter\-sb\-1pri\fR,
 \fBafter\-sb\-2pri\fR,
 \fBdata\-integrity\-alg\fR,
-\fBno\-tcp\-cork\fR
+\fBno\-tcp\-cork\fR,
+\fBon\-congestion\fR,
+\fBcongestion\-fill\fR,
+\fBcongestion\-extents\fR,
+\fBverify\-alg\fR,
+\fBuse\-rle\fR,
+\fBcsums\-alg\fR\&.
 .RE
 .PP
 \fBstartup\fR
 .RS 4
 .\" drbd.conf: startup
-This section is used to fine tune DRBD\'s properties\&. Please refer to
+This section is used to fine tune DRBD\*(Aqs properties\&. Please refer to
 \fBdrbdsetup\fR(8)
-for a detailed description of this section\'s parameters\&. Optional parameters:
+for a detailed description of this section\*(Aqs parameters\&. Optional parameters:
 \fBwfc\-timeout\fR,
 \fBdegr\-wfc\-timeout\fR,
 \fBoutdated\-wfc\-timeout\fR,
@@ -262,46 +297,104 @@
 \fBbecome\-primary\-on\fR\&.
 .RE
 .PP
-\fBsyncer\fR
+\fBoptions\fR
 .RS 4
-.\" drbd.conf: syncer
-This section is used to fine tune the synchronization daemon for the device\&. Please refer to
+.\" drbd.conf: options
+This section is used to fine tune the behaviour of the resource object\&. Please refer to
 \fBdrbdsetup\fR(8)
-for a detailed description of this section\'s parameters\&. Optional parameters:
-\fBrate\fR,
-\fBafter\fR,
-\fBal\-extents\fR,
-\fBuse\-rle\fR,
-\fBcpu\-mask\fR,
-\fBverify\-alg\fR
-and
-\fBcsums\-alg\fR\&.
+for a detailed description of this section\*(Aqs parameters\&. Optional parameters:
+\fBcpu\-mask\fR, and
+\fBon\-no\-data\-accessible\fR\&.
 .RE
 .PP
 \fBhandlers\fR
 .RS 4
 .\" drbd.conf: handlers
-In this section you can define handlers (executables) that are executed by the DRBD system in response to certain events\&. Optional parameters:
+In this section you can define handlers (executables) that are started by the DRBD system in response to certain events\&. Optional parameters:
 \fBpri\-on\-incon\-degr\fR,
 \fBpri\-lost\-after\-sb\fR,
 \fBpri\-lost\fR,
 \fBfence\-peer\fR
 (formerly oudate\-peer),
 \fBlocal\-io\-error\fR,
+\fBinitial\-split\-brain\fR,
 \fBsplit\-brain\fR,
 \fBbefore\-resync\-target\fR,
 \fBafter\-resync\-target\fR\&.
+.sp
+The interface is done via environment variables:
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fBDRBD_RESOURCE\fR
+is the name of the resource
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fBDRBD_MINOR\fR
+is the minor number of the DRBD device, in decimal\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fBDRBD_CONF\fR
+is the path to the primary configuration file; if you split your configuration into multiple files (e\&.g\&. in
+\fB/etc/drbd\&.conf\&.d/\fR), this will not be helpful\&.
+.RE
+.sp
+.RS 4
+.ie n \{\
+\h'-04'\(bu\h'+03'\c
+.\}
+.el \{\
+.sp -1
+.IP \(bu 2.3
+.\}
+\fBDRBD_PEER_AF\fR
+,
+\fBDRBD_PEER_ADDRESS\fR
+,
+\fBDRBD_PEERS\fR
+are the address family (e\&.g\&.
+\fBipv6\fR), the peer\*(Aqs address and hostnames\&.
+.RE
+.sp
+
+\fBDRBD_PEER\fR
+is deprecated\&.
+.sp
+Please note that not all of these might be set for all handlers, and that some values might not be useable for a
+\fBfloating\fR
+definition\&.
 .RE
 .SS "Parameters"
 .PP
 \fBminor\-count \fR\fB\fIcount\fR\fR
 .RS 4
 .\" drbd.conf: minor-count\fIcount\fR
-may be a number from 1 to 255\&.
+may be a number from 1 to FIXME\&.
 .sp
-Use
-\fIminor\-count\fR
-if you want to define massively more resources later without reloading the DRBD kernel module\&. Per default the module loads with 11 more resources than you have currently in your config but at least 32\&.
+\fIMinor\-count\fR
+is a sizing hint for DRBD\&. It helps to right\-size various memory pools\&. It should be set in the in the same order of magnitude than the actual number of minors you use\&. Per default the module loads with 11 more resources than you have currently in your config but at least 32\&.
 .RE
 .PP
 \fBdialog\-refresh \fR\fB\fItime\fR\fR
@@ -321,14 +414,18 @@
 .\" drbd.conf: disable-ip-verification
 Use
 \fIdisable\-ip\-verification\fR
-if, for some obscure reasons, drbdadm can/might not use ip or ifconfig to do a sanity check for the IP address\&. You can disable the IP verification with this option\&.
+if, for some obscure reasons, drbdadm can/might not use
+\fBip\fR
+or
+\fBifconfig\fR
+to do a sanity check for the IP address\&. You can disable the IP verification with this option\&.
 .RE
 .PP
 \fBusage\-count \fR\fB\fIval\fR\fR
 .RS 4
 .\" drbd.conf: usage-count
 Please participate in
-\m[blue]\fBDRBD\'s online usage counter\fR\m[]\&\s-2\u[2]\d\s+2\&. The most convenient way to do so is to set this option to
+\m[blue]\fBDRBD\*(Aqs online usage counter\fR\m[]\&\s-2\u[2]\d\s+2\&. The most convenient way to do so is to set this option to
 \fByes\fR\&. Valid options are:
 \fByes\fR,
 \fBno\fR
@@ -357,12 +454,12 @@
 \fBdisk\fR
 parameter\&.
 .sp
-One can ether ommit the
+One can ether omit the
 \fIname\fR
 or
 \fBminor\fR
 and the
-\fIminor number\fR\&. If you ommit the
+\fIminor number\fR\&. If you omit the
 \fIname\fR
 a default of /dev/drbd\fIminor\fR
 will be used\&.
@@ -373,7 +470,7 @@
 \fBdisk \fR\fB\fIname\fR\fR
 .RS 4
 .\" drbd.conf: disk
-DRBD uses this block device to actually store and retrieve the data\&. Never access such a device while DRBD is running on top of it\&. This holds also true for
+DRBD uses this block device to actually store and retrieve the data\&. Never access such a device while DRBD is running on top of it\&. This also holds true for
 \fBdumpe2fs\fR(8)
 and similar commands\&.
 .RE
@@ -390,61 +487,62 @@
 \fBipv6\fR,
 \fBssocks\fR
 or
-\fBsdp\fR\&. (For compatibility reasons
+\fBsdp\fR
+(for compatibility reasons
 \fBsci\fR
 is an alias for
-\fBssocks\fR) It may be ommited for IPv4 addresses\&. The actual IPv6 address that follows the
+\fBssocks\fR)\&. It may be omited for IPv4 addresses\&. The actual IPv6 address that follows the
 \fBipv6\fR
 keyword must be placed inside brackets:
 ipv6 [fd01:2345:6789:abcd::1]:7800\&.
 .sp
 Each DRBD resource needs a TCP
 \fIport\fR
-which is used to connect to the node\'s partner device\&. Two different DRBD resources may not use the same
+which is used to connect to the node\*(Aqs partner device\&. Two different DRBD resources may not use the same
 \fIaddr:port\fR
 combination on the same node\&.
 .RE
 .PP
-\fBmeta\-disk \fR\fB\fIinternal\fR\fR, \fBflexible\-meta\-disk \fR\fB\fIinternal\fR\fR, \fBmeta\-disk \fR\fB\fIdevice [index]\fR\fR, \fBflexible\-meta\-disk \fR\fB\fIdevice \fR\fR
+\fBmeta\-disk internal\fR, \fBmeta\-disk \fR\fB\fIdevice\fR\fR, \fBmeta\-disk \fR\fB\fIdevice\fR\fR\fB [\fR\fB\fIindex\fR\fR\fB]\fR
 .RS 4
-.\" drbd.conf: meta-disk.\" drbd.conf: flexible-meta-disk
-Internal means that the last part of the backing device is used to store the meta\-data\&. You must not use
-\fI[index]\fR
-with internal\&. Note: Regardless of whether you use the
-\fBmeta\-disk\fR
-or the
-\fBflexible\-meta\-disk\fR
-keyword, it will always be of the size needed for the remaining storage size\&.
+.\" drbd.conf: meta-disk
+Internal means that the last part of the backing device is used to store the meta\-data\&. The size of the meta\-data is computed based on the size of the device\&.
 .sp
-You can use a single block
+When a
 \fIdevice\fR
-to store meta\-data of multiple DRBD devices\&. E\&.g\&. use meta\-disk /dev/sde6[0]; and meta\-disk /dev/sde6[1]; for two different resources\&. In this case the meta\-disk would need to be at least 256 MB in size\&.
-.sp
-With the
-\fBflexible\-meta\-disk\fR
-keyword you specify a block device as meta\-data storage\&. You usually use this with LVM, which allows you to have many variable sized block devices\&. The required size of the meta\-disk block device is 36kB + Backing\-Storage\-size / 32k\&. Round this number to the next 4kb boundary up and you have the exact size\&. Rule of the thumb: 32kByte per 1GByte of storage, round up to the next MB\&.
+is specified, either with or without an
+\fIindex\fR, DRBD stores the meta\-data on this device\&. Without
+\fIindex\fR, the size of the meta\-data is determined by the size of the data device\&. This is usually used with LVM, which allows to have many variable sized block devices\&. The meta\-data size is 36kB + Backing\-Storage\-size / 32k, rounded up to the next 4kb boundary\&. (Rule of the thumb: 32kByte per 1GByte of storage, rounded up to the next MB\&.)
+.sp
+When an
+\fIindex\fR
+is specified, each index number refers to a fixed slot of meta\-data of 128 MB, which allows a maximum data size of 4 GB\&. This way, multiple DBRD devices can share the same meta\-data device\&. For example, if /dev/sde6[0] and /dev/sde6[1] are used, /dev/sde6 must be at least 256 MB big\&. Because of the hard size limit, use of meta\-disk indexes is discouraged\&.
 .RE
 .PP
 \fBon\-io\-error \fR\fB\fIhandler\fR\fR
 .RS 4
 .\" drbd.conf: on-io-error\fIhandler\fR
-is taken, if the lower level device reports io\-error to the upper layers\&.
+is taken, if the lower level device reports io\-errors to the upper layers\&.
 .sp
 \fIhandler\fR
-may be pass_on, call\-local\-io\-error or detach\&.
+may be
+\fBpass_on\fR,
+\fBcall\-local\-io\-error\fR
+or
+\fBdetach\&.\fR
 .sp
-pass_on: Report the io\-error to the upper layers\&. On Primary report it to the mounted file system\&. On Secondary ignore it\&.
+\fBpass_on\fR: The node downgrades the disk status to inconsistent, marks the erroneous block as inconsistent in the bitmap and retries the IO on the remote node\&.
 .sp
-call\-local\-io\-error: Call the handler script
+\fBcall\-local\-io\-error\fR: Call the handler script
 \fBlocal\-io\-error\fR\&.
 .sp
-detach: The node drops its low level device, and continues in diskless mode\&.
+\fBdetach\fR: The node drops its low level device, and continues in diskless mode\&.
 .RE
 .PP
 \fBfencing \fR\fB\fIfencing_policy\fR\fR
 .RS 4
 .\" drbd.conf: fencing
-Under
+By
 \fBfencing\fR
 we understand preventive measures to avoid situations where both nodes are primary and disconnected (AKA split brain)\&.
 .sp
@@ -452,94 +550,135 @@
 .PP
 \fBdont\-care\fR
 .RS 4
-This is the default policy\&. No fencing actions are undertaken\&.
+This is the default policy\&. No fencing actions are taken\&.
 .RE
 .PP
 \fBresource\-only\fR
 .RS 4
-If a node becomes a disconnected primary, it tries to fence the peer\'s disk\&. This is done by calling the fence\-peer handler\&. The handler is supposed to reach the other node over alternative communication paths and call \'drbdadm outdate res\' there\&.
+If a node becomes a disconnected primary, it tries to fence the peer\*(Aqs disk\&. This is done by calling the
+\fBfence\-peer\fR
+handler\&. The handler is supposed to reach the other node over alternative communication paths and call \*(Aq\fBdrbdadm outdate res\fR\*(Aq there\&.
 .RE
 .PP
 \fBresource\-and\-stonith\fR
 .RS 4
-If a node becomes a disconnected primary, it freezes all its IO operations and calls its fence\-peer handler\&. The fence\-peer handler is supposed to reach the peer over alternative communication paths and call \'drbdadm outdate res\' there\&. In case it cannot reach the peer it should stonith the peer\&. IO is resumed as soon as the situation is resolved\&. In case your handler fails, you can resume IO with the
+If a node becomes a disconnected primary, it freezes all its IO operations and calls its fence\-peer handler\&. The fence\-peer handler is supposed to reach the peer over alternative communication paths and call \*(Aqdrbdadm outdate res\*(Aq there\&. In case it cannot reach the peer it should stonith the peer\&. IO is resumed as soon as the situation is resolved\&. In case your handler fails, you can resume IO with the
 \fBresume\-io\fR
 command\&.
 .RE
 .RE
 .PP
-\fBuse\-bmbv\fR
-.RS 4
-.\" drbd.conf: use-bmbv
-In case the backing storage\'s driver has a merge_bvec_fn() function, DRBD has to pretend that it can only process IO requests in units not lager than 4kByte\&. (At time of writing the only known drivers which have such a function are: md (software raid driver), dm (device mapper \- LVM) and DRBD itself)
-.sp
-To get best performance out of DRBD on top of software RAID (or any other driver with a merge_bvec_fn() function) you might enable this function, if you know for sure that the merge_bvec_fn() function will deliver the same results on all nodes of your cluster\&. I\&.e\&. the physical disks of the software RAID are of exactly the same type\&.
-\fIUse this option only if you know what you are doing\&.\fR
-.RE
-.PP
-\fBno\-disk\-barrier\fR, \fBno\-disk\-flushes\fR, \fBno\-disk\-drain\fR
+\fBdisk\-barrier\fR, \fBdisk\-flushes\fR, \fBdisk\-drain\fR
 .RS 4
-.\" drbd.conf: no-disk-flushes
-.\" drbd.conf: no-disk-flushes
-.\" drbd.conf: no-disk-flushes
-DRBD has four implementations to express write\-after\-write dependencies to its backing storage device\&. DRBD will use the first method that is supported by the backing storage device and that is not disabled by the user\&.
-.sp
-When selecting the method you should not only base your decision on the measurable performance\&. In case your backing storage device has a volatile write cache (plain disks, RAID of plain disks) you should use one of the first two\&. In case your backing storage device has battery\-backed write cache you may go with option 3 or 4\&. Option 4 will deliver the best performance such devices\&.
-.sp
-Unfortunately device mapper (LVM) does not support barriers\&.
-.sp
-The letter after "wo:" in /proc/drbd indicates with method is currently in use for a device: b, f, d, n\&. The implementations:
+.\" drbd.conf: disk-barrier
+.\" drbd.conf: disk-flushes
+.\" drbd.conf: disk-drain
+DRBD has four implementations to express write\-after\-write dependencies to its backing storage device\&. DRBD will use the first method that is supported by the backing storage device and that is not disabled by the user\&. By default all three methods are enabled\&.
+.sp
+When selecting the method you should not only base your decision on the measurable performance\&. In case your backing storage device has a volatile write cache (plain disks, RAID of plain disks) you should use one of the first two\&. In case your backing storage device has battery\-backed write cache you may go with option 3\&. Option 4 (disable everything, use "none")
+\fIis dangerous\fR
+on most IO stacks, may result in write\-reordering, and if so, can theoretically be the reason for data corruption, or disturb the DRBD protocol, causing spurious disconnect/reconnect cycles\&.
+\fIDo not use\fR
+\fBno\-disk\-drain\fR\&.
+.sp
+Unfortunately device mapper (LVM) might not support barriers\&.
+.sp
+The letter after "wo:" in /proc/drbd indicates with method is currently in use for a device:
+\fBb\fR,
+\fBf\fR,
+\fBd\fR,
+\fBn\fR\&. The implementations are:
 .PP
 barrier
 .RS 4
-The first requirs that the driver of the backing storage device support barriers (called \'tagged command queuing\' in SCSI and \'native command queuing\' in SATA speak)\&. The use of this method can be disabled by the we
-\fBno\-disk\-barrier\fR
-option\&.
+The first requires that the driver of the backing storage device support barriers (called \*(Aqtagged command queuing\*(Aq in SCSI and \*(Aqnative command queuing\*(Aq in SATA speak)\&. The use of this method can be disabled by setting the
+\fBdisk\-barrier\fR
+options to
+\fBno\fR\&.
 .RE
 .PP
 flush
 .RS 4
-The second requires that the backing device support disk flushes (called \'force unit access\' in the drive vendors speak)\&. The use of this method can be disabled using the
-\fBno\-disk\-flushes\fR
-option\&.
+The second requires that the backing device support disk flushes (called \*(Aqforce unit access\*(Aq in the drive vendors speak)\&. The use of this method can be disabled setting
+\fBdisk\-flushes\fR
+to
+\fBno\fR\&.
 .RE
 .PP
 drain
 .RS 4
-The third method is simply to let write requests drain before write requests of a new reordering domain are issued\&. That was the only implementation before 8\&.0\&.9\&. You can prevent to use of this method by using the
-\fBno\-disk\-drain\fR
-option\&.
+The third method is simply to let write requests drain before write requests of a new reordering domain are issued\&. This was the only implementation before 8\&.0\&.9\&.
 .RE
 .PP
 none
 .RS 4
-The fourth method is to not express write\-after\-write dependencies to the backing store at all\&.
+The fourth method is to not express write\-after\-write dependencies to the backing store at all, by also specifying
+\fBno\-disk\-drain\fR\&. This
+\fIis dangerous\fR
+on most IO stacks, may result in write\-reordering, and if so, can theoretically be the reason for data corruption, or disturb the DRBD protocol, causing spurious disconnect/reconnect cycles\&.
+\fIDo not use\fR
+\fBno\-disk\-drain\fR\&.
 .RE
 .RE
 .PP
-\fBno\-md\-flushes\fR
+\fBmd\-flushes\fR
 .RS 4
-.\" drbd.conf: no-md-flushes
+.\" drbd.conf: md-flushes
 Disables the use of disk flushes and barrier BIOs when accessing the meta data device\&. See the notes on
-\fBno\-disk\-flushes\fR\&.
+\fBdisk\-flushes\fR\&.
 .RE
 .PP
 \fBmax\-bio\-bvecs\fR
 .RS 4
 .\" drbd.conf: max-bio-bvecs
-In some special circumstances the device mapper stack manages to pass BIOs to DRBD that violate the constraints that are set forth by DRBD\'s merge_bvec() function and which have more than one bvec\&. A known example is: phys\-disk \-> DRBD \-> LVM \-> Xen \-> missaligned partition (63) \-> DomU FS\&. Then you might see "bio would need to, but cannot, be split:" in the Dom0\'s kernel log\&.
+In some special circumstances the device mapper stack manages to pass BIOs to DRBD that violate the constraints that are set forth by DRBD\*(Aqs merge_bvec() function and which have more than one bvec\&. A known example is: phys\-disk \-> DRBD \-> LVM \-> Xen \-> misaligned partition (63) \-> DomU FS\&. Then you might see "bio would need to, but cannot, be split:" in the Dom0\*(Aqs kernel log\&.
 .sp
-The best workaround is to proper align the partition within the VM (E\&.g\&. start it at sector 1024)\&. Costs 480 KiByte of storage\&. Unfortunately the default of most Linux partitioning tools is to start the first partition at an odd number (63)\&. Therefore most distribution\'s install helpers for virtual linux machines will end up with missaligned partitions\&. The second best workaround is to limit DRBD\'s max bvecs per BIO (= max\-bio\-bvecs) to 1\&. Might cost performance\&.
+The best workaround is to proper align the partition within the VM (E\&.g\&. start it at sector 1024)\&. This costs 480 KiB of storage\&. Unfortunately the default of most Linux partitioning tools is to start the first partition at an odd number (63)\&. Therefore most distribution\*(Aqs install helpers for virtual linux machines will end up with misaligned partitions\&. The second best workaround is to limit DRBD\*(Aqs max bvecs per BIO (=
+\fBmax\-bio\-bvecs\fR) to 1, but that might cost performance\&.
 .sp
 The default value of
 \fBmax\-bio\-bvecs\fR
 is 0, which means that there is no user imposed limitation\&.
 .RE
 .PP
+\fBdisk\-timeout\fR
+.RS 4
+.\" drbd.conf: disk-timeout
+If the driver of the
+\fIlower_device\fR
+does not finish an IO request within
+\fIdisk_timeout\fR, DRBD considers the disk as failed\&. If DRBD is connected to a remote host, it will reissue local pending IO requests to the peer, and ship all new IO requests to the peer only\&. The disk state advances to diskless, as soon as the backing block device has finished all IO requests\&.
+.sp
+The default value of is 0, which means that no timeout is enforced\&. The default unit is 100ms\&. This option is available since 8\&.3\&.12\&.
+.RE
+.PP
+\fBread\-balancing \fR\fB\fImethod\fR\fR
+.RS 4
+.\" drbd.conf: read-balancing
+The supported
+\fImethods\fR
+for load balancing of read requests are
+\fBprefer\-local\fR,
+\fBprefer\-remote\fR,
+\fBround\-robin\fR,
+\fBleast\-pending\fR
+\fBwhen\-congested\-remote\fR,
+\fB32K\-striping\fR,
+\fB64K\-striping\fR,
+\fB128K\-striping\fR,
+\fB256K\-striping\fR,
+\fB512K\-striping\fR
+and
+\fB1M\-striping\fR\&.
+.sp
+The default value of is
+\fBprefer\-local\fR\&. This option is available since 8\&.4\&.1\&.
+.RE
+.PP
 \fBsndbuf\-size \fR\fB\fIsize\fR\fR
 .RS 4
 .\" drbd.conf: sndbuf-size\fIsize\fR
-is the size of the TCP socket send buffer\&. The default value is 0, i\&.e\&. autotune\&. You can specify smaller or larger values\&. Larger values are appropriate for reasonable write throughput with protocol A over high latency networks\&. Very large values like 1M may cause problems\&. Also values below 32K do not make much sense\&. Since 8\&.0\&.13 resp\&. 8\&.2\&.7, setting the
+is the size of the TCP socket send buffer\&. The default value is 0, i\&.e\&. autotune\&. You can specify smaller or larger values\&. Larger values are appropriate for reasonable write throughput with protocol A over high latency networks\&. Values below 32K do not make sense\&. Since 8\&.0\&.13 resp\&. 8\&.2\&.7, setting the
 \fIsize\fR
 value to 0 means that the kernel should autotune this\&.
 .RE
@@ -557,8 +696,7 @@
 .\" drbd.conf: timeout
 If the partner node fails to send an expected response packet within
 \fItime\fR
-10ths
-of a second, the partner node is considered dead and therefore the TCP/IP connection is abandoned\&. This must be lower than
+tenths of a second, the partner node is considered dead and therefore the TCP/IP connection is abandoned\&. This must be lower than
 \fIconnect\-int\fR
 and
 \fIping\-int\fR\&. The default value is 60 = 6 seconds, the unit 0\&.1 seconds\&.
@@ -567,7 +705,7 @@
 \fBconnect\-int \fR\fB\fItime\fR\fR
 .RS 4
 .\" drbd.conf: connect-int
-In case it is not possible to connect to the remote DRBD device immediately, DRBD keeps on trying to connect\&. With this option you can set the time between two tries\&. The default value is 10 seconds, the unit is 1 second\&.
+In case it is not possible to connect to the remote DRBD device immediately, DRBD keeps on trying to connect\&. With this option you can set the time between two retries\&. The default value is 10 seconds, the unit is 1 second\&.
 .RE
 .PP
 \fBping\-int \fR\fB\fItime\fR\fR
@@ -581,13 +719,13 @@
 \fBping\-timeout \fR\fB\fItime\fR\fR
 .RS 4
 .\" drbd.conf: ping-timeout
-The time the peer has time to answer to a keep\-alive packet\&. In case the peer\'s reply is not received within this time period, it is considered as dead\&. The default value is 500ms, the default unit is 100ms\&.
+The time the peer has time to answer to a keep\-alive packet\&. In case the peer\*(Aqs reply is not received within this time period, it is considered as dead\&. The default value is 500ms, the default unit are tenths of a second\&.
 .RE
 .PP
 \fBmax\-buffers \fR\fB\fInumber\fR\fR
 .RS 4
 .\" drbd.conf: max-buffers
-Maximum number of requests to be allocated by DRBD\&. Unit is PAGE_SIZE, which is 4 KB on most systems\&. The minimum is hard coded to 32 (=128 KB)\&. For high\-performance installations it might help, if you increase that number\&. These buffers are used to hold data blocks while they are written to disk\&.
+Maximum number of requests to be allocated by DRBD\&. Unit is PAGE_SIZE, which is 4 KiB on most systems\&. The minimum is hard coded to 32 (=128 KiB)\&. For high\-performance installations it might help if you increase that number\&. These buffers are used to hold data blocks while they are written to disk\&.
 .RE
 .PP
 \fBko\-count \fR\fB\fInumber\fR\fR
@@ -596,7 +734,9 @@
 In case the secondary node fails to complete a single write request for
 \fIcount\fR
 times the
-\fItimeout\fR, it is expelled from the cluster\&. (I\&.e\&. the primary node goes into StandAlone mode\&.) The default value is 0, which disables this feature\&.
+\fItimeout\fR, it is expelled from the cluster\&. (I\&.e\&. the primary node goes into
+\fBStandAlone\fR
+mode\&.) The default value is 0, which disables this feature\&.
 .RE
 .PP
 \fBmax\-epoch\-size \fR\fB\fInumber\fR\fR
@@ -608,25 +748,29 @@
 \fBallow\-two\-primaries\fR
 .RS 4
 .\" drbd.conf: allow-two-primaries
-With this option set you may assign primary role to both nodes\&. You only should use this option if you use a shared storage file system on top of DRBD\&. At the time of writing the only ones are: OCFS2 and GFS\&. If you use this option with any other file system, you are going to crash your nodes and to corrupt your data!
+With this option set you may assign the primary role to both nodes\&. You only should use this option if you use a shared storage file system on top of DRBD\&. At the time of writing the only ones are: OCFS2 and GFS\&. If you use this option with any other file system, you are going to crash your nodes and to corrupt your data!
 .RE
 .PP
 \fBunplug\-watermark \fR\fB\fInumber\fR\fR
 .RS 4
 .\" drbd.conf: unplug-watermark
-When the number of pending write requests on the standby (secondary) node exceeds the unplug\-watermark, we trigger the request processing of our backing storage device\&. Some storage controllers deliver better performance with small values, others deliver best performance when the value is set to the same value as max\-buffers\&. Minimum 16, default 128, maximum 131072\&.
+When the number of pending write requests on the standby (secondary) node exceeds the
+\fBunplug\-watermark\fR, we trigger the request processing of our backing storage device\&. Some storage controllers deliver better performance with small values, others deliver best performance when the value is set to the same value as max\-buffers\&. Minimum 16, default 128, maximum 131072\&.
 .RE
 .PP
 \fBcram\-hmac\-alg\fR
 .RS 4
 .\" drbd.conf: cram-hmac-alg
-You need to specify the HMAC algorithm to enable peer authentication at all\&. You are strongly encouraged to use peer authentication\&. The HMAC algorithm will be used for the challenge response authentication of the peer\&. You may specify any digest algorithm that is named in /proc/crypto\&.
+You need to specify the HMAC algorithm to enable peer authentication at all\&. You are strongly encouraged to use peer authentication\&. The HMAC algorithm will be used for the challenge response authentication of the peer\&. You may specify any digest algorithm that is named in
+\fB/proc/crypto\fR\&.
 .RE
 .PP
 \fBshared\-secret\fR
 .RS 4
 .\" drbd.conf: shared-secret
-The shared secret used in peer authentication\&. May be up to 64 characters\&. Note that peer authentication is disabled as long as no cram\-hmac\-alg (see above) is specified\&.
+The shared secret used in peer authentication\&. May be up to 64 characters\&. Note that peer authentication is disabled as long as no
+\fBcram\-hmac\-alg\fR
+(see above) is specified\&.
 .RE
 .PP
 \fBafter\-sb\-0pri \fR \fIpolicy\fR
@@ -679,19 +823,25 @@
 .RS 4
 Discard the version of the secondary if the outcome of the
 \fBafter\-sb\-0pri\fR
-algorithm would also destroy the current secondary\'s data\&. Otherwise disconnect\&.
+algorithm would also destroy the current secondary\*(Aqs data\&. Otherwise disconnect\&.
 .RE
 .PP
 \fBviolently\-as0p\fR
 .RS 4
 Always take the decision of the
 \fBafter\-sb\-0pri\fR
-algorithm\&. Even if that causes an erratic change of the primary\'s view of the data\&. This is only useful if you use a 1node FS (i\&.e\&. not OCFS2 or GFS) with the allow\-two\-primaries flag, _AND_ if you really know what you are doing\&. This is DANGEROUS and MAY CRASH YOUR MACHINE if you have an FS mounted on the primary node\&.
+algorithm, even if that causes an erratic change of the primary\*(Aqs view of the data\&. This is only useful if you use a one\-node FS (i\&.e\&. not OCFS2 or GFS) with the
+\fBallow\-two\-primaries\fR
+flag,
+\fIAND\fR
+if you really know what you are doing\&. This is
+\fIDANGEROUS and MAY CRASH YOUR MACHINE\fR
+if you have an FS mounted on the primary node\&.
 .RE
 .PP
 \fBdiscard\-secondary\fR
 .RS 4
-Discard the secondary\'s version\&.
+Discard the secondary\*(Aqs version\&.
 .RE
 .PP
 \fBcall\-pri\-lost\-after\-sb\fR
@@ -716,7 +866,13 @@
 .RS 4
 Always take the decision of the
 \fBafter\-sb\-0pri\fR
-algorithm\&. Even if that causes an erratic change of the primary\'s view of the data\&. This is only useful if you use a 1node FS (i\&.e\&. not OCFS2 or GFS) with the allow\-two\-primaries flag, _AND_ if you really know what you are doing\&. This is DANGEROUS and MAY CRASH YOUR MACHINE if you have an FS mounted on the primary node\&.
+algorithm, even if that causes an erratic change of the primary\*(Aqs view of the data\&. This is only useful if you use a one\-node FS (i\&.e\&. not OCFS2 or GFS) with the
+\fBallow\-two\-primaries\fR
+flag,
+\fIAND\fR
+if you really know what you are doing\&. This is
+\fIDANGEROUS and MAY CRASH YOUR MACHINE\fR
+if you have an FS mounted on the primary node\&.
 .RE
 .PP
 \fBcall\-pri\-lost\-after\-sb\fR
@@ -735,7 +891,7 @@
 \fBrr\-conflict \fR \fIpolicy\fR
 .RS 4
 .\" drbd.conf: rr-conflict
-To solve the cases when the outcome of the resync decision is incompatible with the current role assignment in the cluster\&.
+This option helps to solve the cases when the outcome of the resync decision is incompatible with the current role assignment in the cluster\&.
 .PP
 \fBdisconnect\fR
 .RS 4
@@ -757,9 +913,9 @@
 \fBdata\-integrity\-alg \fR \fIalg\fR
 .RS 4
 .\" drbd.conf: data-integrity-alg
-DRBD can ensure the data integrity of the user\'s data on the network by comparing hash values\&. Normally this is ensured by the 16 bit checksums in the headers of TCP/IP packets\&.
+DRBD can ensure the data integrity of the user\*(Aqs data on the network by comparing hash values\&. Normally this is ensured by the 16 bit checksums in the headers of TCP/IP packets\&.
 .sp
-This option can be set to any of the kernel\'s data digest algorithms\&. In a typical kernel configuration you should have at least one of
+This option can be set to any of the kernel\*(Aqs data digest algorithms\&. In a typical kernel configuration you should have at least one of
 \fBmd5\fR,
 \fBsha1\fR, and
 \fBcrc32c\fR
@@ -768,10 +924,37 @@
 See also the notes on data integrity\&.
 .RE
 .PP
-\fBno\-tcp\-cork\fR
+\fBtcp\-cork\fR
 .RS 4
-.\" drbd.conf: no-tcp-cork
-DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack when it can expect more data, and when it should flush out what it has in its send queue\&. It turned out that there is at lease one network stack that performs worse when one uses this hinting method\&. Therefore we introducted this option, which disable the setting and clearing of the TCP_CORK socket option by DRBD\&.
+.\" drbd.conf: tcp-cork
+DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack when it can expect more data, and when it should flush out what it has in its send queue\&. It turned out that there is at least one network stack that performs worse when one uses this hinting method\&. Therefore we introducted this option\&. By setting
+\fBtcp\-cork\fR
+to
+\fBno\fR
+you can disable the setting and clearing of the TCP_CORK socket option by DRBD\&.
+.RE
+.PP
+\fBon\-congestion \fR\fB\fIcongestion_policy\fR\fR, \fBcongestion\-fill \fR\fB\fIfill_threshold\fR\fR, \fBcongestion\-extents \fR\fB\fIactive_extents_threshold\fR\fR
+.RS 4
+By default DRBD blocks when the available TCP send queue becomes full\&. That means it will slow down the application that generates the write requests that cause DRBD to send more data down that TCP connection\&.
+.sp
+When DRBD is deployed with DRBD\-proxy it might be more desirable that DRBD goes into AHEAD/BEHIND mode shortly before the send queue becomes full\&. In AHEAD/BEHIND mode DRBD does no longer replicate data, but still keeps the connection open\&.
+.sp
+The advantage of the AHEAD/BEHIND mode is that the application is not slowed down, even if DRBD\-proxy\*(Aqs buffer is not sufficient to buffer all write requests\&. The downside is that the peer node falls behind, and that a resync will be necessary to bring it back into sync\&. During that resync the peer node will have an inconsistent disk\&.
+.sp
+Available
+\fIcongestion_policy\fRs are
+\fBblock\fR
+and
+\fBpull\-ahead\fR\&. The default is
+\fBblock\fR\&.
+\fIFill_threshold\fR
+might be in the range of 0 to 10GiBytes\&. The default is 0 which disables the check\&.
+\fIActive_extents_threshold\fR
+has the same limits as
+\fBal\-extents\fR\&.
+.sp
+The AHEAD/BEHIND mode and its settings are available since DRBD 8\&.3\&.10\&.
 .RE
 .PP
 \fBwfc\-timeout \fR\fB\fItime\fR\fR
@@ -804,7 +987,7 @@
 .RS 4
 Sets on which node the device should be promoted to primary role by the init script\&. The
 \fInode\-name\fR
-might either be a host name or the key word
+might either be a host name or the keyword
 \fBboth\fR\&. When this option is not set the devices stay in secondary role on both nodes\&. Usually one delegates the role assignment to a cluster manager (e\&.g\&. heartbeat)\&.
 .RE
 .PP
@@ -822,21 +1005,31 @@
 \fBwfc\-timeout\fR
 and
 \fBdegr\-wfc\-timeout\fR
-statements\&. Only do that if the peer of the stacked resource is usually not available or will not become primary usually\&. By using this option incorrectly, you run the risk of causing unexpected split brain\&.
+statements\&. Only do that if the peer of the stacked resource is usually not available or will usually not become primary\&. By using this option incorrectly, you run the risk of causing unexpected split brain\&.
 .RE
 .PP
-\fBrate \fR\fB\fIrate\fR\fR
+\fBresync\-rate \fR\fB\fIrate\fR\fR
 .RS 4
-.\" drbd.conf: rate
+.\" drbd.conf: resync-rate
 To ensure a smooth operation of the application on top of DRBD, it is possible to limit the bandwidth which may be used by background synchronizations\&. The default is 250 KB/sec, the default unit is KB/sec\&. Optional suffixes K, M, G are allowed\&.
 .RE
 .PP
-\fBafter \fR\fB\fIres\-name\fR\fR
+\fBuse\-rle\fR
+.RS 4
+.\" drbd.conf: use-rle
+During resync\-handshake, the dirty\-bitmaps of the nodes are exchanged and merged (using bit\-or), so the nodes will have the same understanding of which blocks are dirty\&. On large devices, the fine grained dirty\-bitmap can become large as well, and the bitmap exchange can take quite some time on low\-bandwidth links\&.
+.sp
+Because the bitmap typically contains compact areas where all bits are unset (clean) or set (dirty), a simple run\-length encoding scheme can considerably reduce the network traffic necessary for the bitmap exchange\&.
+.sp
+For backward compatibilty reasons, and because on fast links this possibly does not improve transfer time but consumes cpu cycles, this defaults to off\&.
+.RE
+.PP
+\fBresync\-after \fR\fB\fIres\-name\fR\fR
 .RS 4
-.\" drbd.conf: after
-By default, resynchronization of all devices would run in parallel\&. By defining a sync\-after dependency, the resynchronization of this resource will start only if the resource
+.\" drbd.conf: resync-after
+By default, resynchronization of all devices would run in parallel\&. By defining a resync\-after dependency, the resynchronization of this resource will start only if the resource
 \fIres\-name\fR
-is already in connected state (= finished its resynchronization)\&.
+is already in connected state (i\&.e\&., has finished its resynchronization)\&.
 .RE
 .PP
 \fBal\-extents \fR\fB\fIextents\fR\fR
@@ -851,7 +1044,7 @@
 .RS 4
 During online verification (as initiated by the
 \fBverify\fR
-sub\-command), rather than doing a bit\-wise comparison, DRBD applies a hash function to the contents of every block being verified, and compares that hash with the peer\&. This option defines the hash algorithm being used for that purpose\&. It can be set to any of the kernel\'s data digest algorithms\&. In a typical kernel configuration you should have at least one of
+sub\-command), rather than doing a bit\-wise comparison, DRBD applies a hash function to the contents of every block being verified, and compares that hash with the peer\&. This option defines the hash algorithm being used for that purpose\&. It can be set to any of the kernel\*(Aqs data digest algorithms\&. In a typical kernel configuration you should have at least one of
 \fBmd5\fR,
 \fBsha1\fR, and
 \fBcrc32c\fR
@@ -862,21 +1055,92 @@
 .PP
 \fBcsums\-alg \fR\fB\fIhash\-alg\fR\fR
 .RS 4
-A resync process sends all marked data blocks form the source to the destination node, as long as no
+A resync process sends all marked data blocks from the source to the destination node, as long as no
 \fBcsums\-alg\fR
-is given\&. When one is specified the resync process exchanges hash values of all marked blocks first, and sends only those data blocks over, that have different hash values\&.
+is given\&. When one is specified the resync process exchanges hash values of all marked blocks first, and sends only those data blocks that have different hash values\&.
 .sp
 This setting is useful for DRBD setups with low bandwidth links\&. During the restart of a crashed primary node, all blocks covered by the activity log are marked for resync\&. But a large part of those will actually be still in sync, therefore using
 \fBcsums\-alg\fR
 will lower the required bandwidth in exchange for CPU cycles\&.
 .RE
 .PP
+\fBc\-plan\-ahead \fR\fB\fIplan_time\fR\fR, \fBc\-fill\-target \fR\fB\fIfill_target\fR\fR, \fBc\-delay\-target \fR\fB\fIdelay_target\fR\fR, \fBc\-max\-rate \fR\fB\fImax_rate\fR\fR
+.RS 4
+The dynamic resync speed controller gets enabled with setting
+\fIplan_time\fR
+to a positive value\&. It aims to fill the buffers along the data path with either a constant amount of data
+\fIfill_target\fR, or aims to have a constant delay time of
+\fIdelay_target\fR
+along the path\&. The controller has an upper bound of
+\fImax_rate\fR\&.
+.sp
+By
+\fIplan_time\fR
+the agility of the controller is configured\&. Higher values yield for slower/lower responses of the controller to deviation from the target value\&. It should be at least 5 times RTT\&. For regular data paths a
+\fIfill_target\fR
+in the area of 4k to 100k is appropriate\&. For a setup that contains drbd\-proxy it is advisable to use
+\fIdelay_target\fR
+instead\&. Only when
+\fIfill_target\fR
+is set to 0 the controller will use
+\fIdelay_target\fR\&. 5 times RTT is a reasonable starting value\&.
+\fIMax_rate\fR
+should be set to the bandwidth available between the DRBD\-hosts and the machines hosting DRBD\-proxy, or to the available disk\-bandwidth\&.
+.sp
+The default value of
+\fIplan_time\fR
+is 0, the default unit is 0\&.1 seconds\&.
+\fIFill_target\fR
+has 0 and sectors as default unit\&.
+\fIDelay_target\fR
+has 1 (100ms) and 0\&.1 as default unit\&.
+\fIMax_rate\fR
+has 10240 (100MiB/s) and KiB/s as default unit\&.
+.sp
+The dynamic resync speed controller and its settings are available since DRBD 8\&.3\&.9\&.
+.RE
+.PP
+\fBc\-min\-rate \fR\fB\fImin_rate\fR\fR
+.RS 4
+A node that is primary and sync\-source has to schedule application IO requests and resync IO requests\&. The
+\fImin_rate\fR
+tells DRBD use only up to min_rate for resync IO and to dedicate all other available IO bandwidth to application requests\&.
+.sp
+Note: The value 0 has a special meaning\&. It disables the limitation of resync IO completely, which might slow down application IO considerably\&. Set it to a value of 1, if you prefer that resync IO never slows down application IO\&.
+.sp
+Note: Although the name might suggest that it is a lower bound for the dynamic resync speed controller, it is not\&. If the DRBD\-proxy buffer is full, the dynamic resync speed controller is free to lower the resync speed down to 0, completely independent of the
+\fBc\-min\-rate\fR
+setting\&.
+.sp
+\fIMin_rate\fR
+has 4096 (4MiB/s) and KiB/s as default unit\&.
+.RE
+.PP
+\fBon\-no\-data\-accessible \fR\fB\fIond\-policy\fR\fR
+.RS 4
+This setting controls what happens to IO requests on a degraded, disk less node (I\&.e\&. no data store is reachable)\&. The available policies are
+\fBio\-error\fR
+and
+\fBsuspend\-io\fR\&.
+.sp
+If
+\fIond\-policy\fR
+is set to
+\fBsuspend\-io\fR
+you can either resume IO by attaching/connecting the last lost data storage, or by the
+\fBdrbdadm resume\-io \fR\fB\fIres\fR\fR
+command\&. The latter will result in IO errors of course\&.
+.sp
+The default is
+\fBio\-error\fR\&. This setting is available since DRBD 8\&.3\&.9\&.
+.RE
+.PP
 \fBcpu\-mask \fR\fB\fIcpu\-mask\fR\fR
 .RS 4
 .\" drbd.conf: cpu-mask
-Sets the cpu\-affinity\-mask for DRBD\'s kernel threads of this device\&. The default value of
+Sets the cpu\-affinity\-mask for DRBD\*(Aqs kernel threads of this device\&. The default value of
 \fIcpu\-mask\fR
-is 0, which means that DRBD\'s kernel threads should be spread over all CPUs of the machine\&. This value must be given in hexadecimal notation\&. If it is too big it will be truncated\&.
+is 0, which means that DRBD\*(Aqs kernel threads should be spread over all CPUs of the machine\&. This value must be given in hexadecimal notation\&. If it is too big it will be truncated\&.
 .RE
 .PP
 \fBpri\-on\-incon\-degr \fR\fB\fIcmd\fR\fR
@@ -888,13 +1152,13 @@
 \fBpri\-lost\-after\-sb \fR\fB\fIcmd\fR\fR
 .RS 4
 .\" drbd.conf: pri-lost-after-sb
-The node is currently primary, but lost the after split brain auto recovery procedure\&. As as consequence, it should be abandoned\&.
+The node is currently primary, but lost the after\-split\-brain auto recovery procedure\&. As as consequence, it should be abandoned\&.
 .RE
 .PP
 \fBpri\-lost \fR\fB\fIcmd\fR\fR
 .RS 4
 .\" drbd.conf: pri-lost
-The node is currently primary, but DRBD\'s algorithm thinks that it should become sync target\&. As a consequence it should give up its primary role\&.
+The node is currently primary, but DRBD\*(Aqs algorithm thinks that it should become sync target\&. As a consequence it should give up its primary role\&.
 .RE
 .PP
 \fBfence\-peer \fR\fB\fIcmd\fR\fR
@@ -902,7 +1166,7 @@
 .\" drbd.conf: fence-peer
 The handler is part of the
 \fBfencing\fR
-mechanism\&. This handler is called in case the node needs to fence the peer\'s disk\&. It should use other communication paths than DRBD\'s network link\&.
+mechanism\&. This handler is called in case the node needs to fence the peer\*(Aqs disk\&. It should use other communication paths than DRBD\*(Aqs network link\&.
 .RE
 .PP
 \fBlocal\-io\-error \fR\fB\fIcmd\fR\fR
@@ -911,22 +1175,28 @@
 DRBD got an IO error from the local IO subsystem\&.
 .RE
 .PP
+\fBinitial\-split\-brain \fR\fB\fIcmd\fR\fR
+.RS 4
+.\" drbd.conf: initial-split-brain
+DRBD has connected and detected a split brain situation\&. This handler can alert someone in all cases of split brain, not just those that go unresolved\&.
+.RE
+.PP
 \fBsplit\-brain \fR\fB\fIcmd\fR\fR
 .RS 4
 .\" drbd.conf: split-brain
-DRBD detected a split brain situation\&. Manual recovery is necessary\&. This handler should alert someone on duty\&.
+DRBD detected a split brain situation but remains unresolved\&. Manual recovery is necessary\&. This handler should alert someone on duty\&.
 .RE
 .PP
 \fBbefore\-resync\-target \fR\fB\fIcmd\fR\fR
 .RS 4
 .\" drbd.conf: before-resync-target
-DRBD calls this handler just before a resync beginns on the node that becomes resync target\&. It might be used to take a snapshot of the backing block device\&.
+DRBD calls this handler just before a resync begins on the node that becomes resync target\&. It might be used to take a snapshot of the backing block device\&.
 .RE
 .PP
 \fBafter\-resync\-target \fR\fB\fIcmd\fR\fR
 .RS 4
 .\" drbd.conf: after-resync-target
-DRBD calls this handler just after a resync operation finished on the node which\'s disk just became consistent after beeing inconsistent for the duration of the resync\&. It might be used to remove a snapshot of the backing device that was created by the
+DRBD calls this handler just after a resync operation finished on the node whose disk just became consistent after being inconsistent for the duration of the resync\&. It might be used to remove a snapshot of the backing device that was created by the
 \fBbefore\-resync\-target\fR
 handler\&.
 .RE
@@ -948,20 +1218,25 @@
 \fBnetwork\fR
 section\&.
 .PP
-Both mechanisms might deliver false positives if the user of DRBD modifies the data which gets written to disk while the transfer goes on\&. Currently the swap code and ReiserFS are known to do so\&. In both cases this is not a problem, because when the initiator of the data transfer does this it already knows that that data block will not be part of an on disk data structure\&.
+Both mechanisms might deliver false positives if the user of DRBD modifies the data which gets written to disk while the transfer goes on\&. This may happen for swap, or for certain append while global sync, or truncate/rewrite workloads, and not necessarily poses a problem for the integrity of the data\&. Usually when the initiator of the data transfer does this, it already knows that that data block will not be part of an on disk data structure, or will be resubmitted with correct data soon enough\&.
 .PP
-The most recent (2007) example of systematically corruption was an issue with the TCP offloading engine and the driver of a certain type of GBit NIC\&. The actual corruption happened on the DMA transfer from core memory to the card\&. Since the TCP checksum gets calculated on the card this type of corruption stays undetected as long as you do not use either the online
+The
+\fBdata\-integrity\-alg\fR
+causes the receiving side to log an error about "Digest integrity check FAILED: Ns +x\en", where N is the sector offset, and x is the size of the request in bytes\&. It will then disconnect, and reconnect, thus causing a quick resync\&. If the sending side at the same time detected a modification, it warns about "Digest mismatch, buffer modified by upper layers during write: Ns +x\en", which shows that this was a false positive\&. The sending side may detect these buffer modifications immediately after the unmodified data has been copied to the tcp buffers, in which case the receiving side won\*(Aqt notice it\&.
+.PP
+The most recent (2007) example of systematic corruption was an issue with the TCP offloading engine and the driver of a certain type of GBit NIC\&. The actual corruption happened on the DMA transfer from core memory to the card\&. Since the TCP checksum gets calculated on the card, this type of corruption stays undetected as long as you do not use either the online
 \fBverify\fR
-or the data\-integrity\-alg\&.
+or the
+\fBdata\-integrity\-alg\fR\&.
 .PP
 We suggest to use the
 \fBdata\-integrity\-alg\fR
 only during a pre\-production phase due to its CPU costs\&. Further we suggest to do online
 \fBverify\fR
-runs regularly e\&.g\&. once a month during low load period\&.
+runs regularly e\&.g\&. once a month during a low load period\&.
 .SH "VERSION"
 .sp
-This document was revised for version 8\&.3\&.2 of the DRBD distribution\&.
+This document was revised for version 8\&.4\&.0 of the DRBD distribution\&.
 .SH "AUTHOR"
 .sp
 Written by Philipp Reisner philipp\&.reisner@linbit\&.com and Lars Ellenberg lars\&.ellenberg@linbit\&.com\&.
@@ -977,7 +1252,7 @@
 \fBdrbddisk\fR(8),
 \fBdrbdsetup\fR(8),
 \fBdrbdadm\fR(8),
-\m[blue]\fBDRBD User\'s Guide\fR\m[]\&\s-2\u[1]\d\s+2,
+\m[blue]\fBDRBD User\*(Aqs Guide\fR\m[]\&\s-2\u[1]\d\s+2,
 \m[blue]\fBDRBD web site\fR\m[]\&\s-2\u[3]\d\s+2
 .SH "NOTES"
 .IP " 1." 4
diff -Nru drbd8-8.3.7/documentation/drbd.conf.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.conf.xml
--- drbd8-8.3.7/documentation/drbd.conf.xml	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbd.conf.xml	2012-02-02 14:09:14.000000000 +0000
@@ -1,174 +1,189 @@
-<?xml version="1.0"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <refentry id="re-drbdconf">
   <refentryinfo>
-    <date>5 Dec 2008</date>
+    <date>6 May 2011</date>
+
     <productname>DRBD</productname>
-    <productnumber>8.3.2</productnumber>
+
+    <productnumber>8.4.0</productnumber>
   </refentryinfo>
+
   <refmeta>
     <refentrytitle>drbd.conf</refentrytitle>
+
     <manvolnum>5</manvolnum>
+
     <refmiscinfo class="manual">Configuration Files</refmiscinfo>
   </refmeta>
+
   <refnamediv>
     <refname>drbd.conf</refname>
-    <refpurpose>Configuration file for DRBD's devices
-    <indexterm significance="normal">
-      <primary>drbd.conf</primary>
-    </indexterm>
-    </refpurpose>
+
+    <refpurpose>Configuration file for DRBD's devices <indexterm significance="normal">
+        <primary>drbd.conf</primary>
+      </indexterm></refpurpose>
   </refnamediv>
+
   <refsect1>
     <title>Introduction</title>
-    <para> The file <option>/etc/drbd.conf</option> is read by
- <option>drbdadm</option>.
- </para>
-    <para> The file format was designed as to allow to have
- a verbatim copy of the file on both nodes of the cluster.
- It is highly recommended to do so in order to keep your configuration
- manageable. The file <option>/etc/drbd.conf</option> should be the same on both  nodes of the cluster. Changes to <option>/etc/drbd.conf</option> do not apply
- immediately.
-<example><title>A small drbd.conf file</title><programlisting format="linespecific">global { usage-count yes; }
-common { syncer { rate 10M; } }
-resource r0 {
-	protocol C;
+
+    <para>The file <option>/etc/drbd.conf</option> is read by <option>drbdadm</option>.</para>
+
+    <para>The file format was designed as to allow to have a verbatim copy of the file on both
+    nodes of the cluster. It is highly recommended to do so in order to keep your configuration
+    manageable. The file <option>/etc/drbd.conf</option> should be the same on both nodes of the
+    cluster. Changes to <option>/etc/drbd.conf</option> do not apply immediately.</para>
+
+    <para>By convention the main config contains two include statements. The first one includes
+    the file <option>/etc/drbd.d/global_common.conf</option>, the second one all file with a
+    <option>.res</option> suffix.</para>
+
+    <para><example>
+        <title>A small example.res file</title>
+
+        <programlisting format="linespecific">resource r0 {
 	net {
+		protocol C;
 		cram-hmac-alg sha1;
 		shared-secret "FooFunFactory";
 	}
+	disk {
+		resync-rate 10M;
+	}
 	on alice {
-		device    minor 1;
-		disk      /dev/sda7;
+		volume 0 {
+			device    minor 1;
+			disk      /dev/sda7;
+			meta-disk internal;
+		}
 		address   10.1.1.31:7789;
-		meta-disk internal;
 	}
 	on bob {
-		device    minor 1;
-		disk      /dev/sda7;
+		volume 0 {
+			device    minor 1;
+			disk      /dev/sda7;
+			meta-disk internal;
+		}
 		address   10.1.1.32:7789;
-		meta-disk internal;
 	}
-}</programlisting></example>
- In this example, there is a single DRBD resource (called r0) which uses
- protocol C for the connection between its devices.
- The device which runs
- on host <replaceable>alice</replaceable> uses
- <replaceable>/dev/drbd1</replaceable> as devices for its application, and
- <replaceable>/dev/sda7</replaceable> as low-level storage for the data.
- The IP addresses are used to specify the networking interfaces to be used.
- An eventually running resync process should use about 10MByte/second of IO
- bandwidth.
- </para>
-    <para> There may be multiple resource sections in a single drbd.conf file.
- For more examples, please have a look at the
- <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>.
- </para>
+}</programlisting>
+      </example>In this example, there is a single DRBD resource (called r0) which uses protocol C
+    for the connection between its devices. It contains a single volume which runs on host
+    <replaceable>alice</replaceable> uses <replaceable>/dev/drbd1</replaceable> as devices for its
+    application, and <replaceable>/dev/sda7</replaceable> as low-level storage for the data. The
+    IP addresses are used to specify the networking interfaces to be used. An eventually running
+    resync process should use about 10MByte/second of IO bandwidth. This sync-rate statement is
+    valid for volume 0, but would also be valid for further volumes. In this example it assigns
+    full 10MByte/second to each volume.</para>
+
+    <para>There may be multiple resource sections in a single drbd.conf file. For more examples,
+    please have a look at the
+    <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>.</para>
+
   </refsect1>
+
   <refsect1>
     <title>File Format</title>
-    <para>  The file consists of sections and parameters.
-  A section begins with a keyword, sometimes an additional name, and an
-  opening brace (<quote>{</quote>).
-  A section ends with a closing brace (<quote>}</quote>.
-  The braces enclose the parameters.
-  </para>
-    <para>  section [name] { parameter value; [...] }
-  </para>
-    <para>  A parameter starts with the identifier of the parameter followed
-  by whitespace. Every subsequent character
-  is considered
-  as part of the parameter's value. A special case are Boolean
-  parameters which only consist of the identifier.
-  Parameters are terminated by a semicolon (<quote>;</quote>).
-  </para>
-    <para>Some parameter values have default units which might be overruled
-  by K, M or G. These units are defined in the usual way (K = 2^10 = 1024,
-  M =  1024 K, G = 1024 M).
-  </para>
-    <para>  Comments may be placed into the configuration file and must
-  begin with a hash sign (<quote>#</quote>). Subsequent characters are ignored
-  until the end of the line.
-  </para>
+
+    <para>The file consists of sections and parameters. A section begins with a keyword, sometimes
+    an additional name, and an opening brace (<quote>{</quote>). A section ends with a closing
+    brace (<quote>}</quote>. The braces enclose the parameters.</para>
+
+    <para>section [name] { parameter value; [...] }</para>
+
+    <para>A parameter starts with the identifier of the parameter followed by whitespace. Every
+    subsequent character is considered as part of the parameter's value. A special case are
+    Boolean parameters which consist only of the identifier. Parameters are terminated by a
+    semicolon (<quote>;</quote>).</para>
+
+    <para>Some parameter values have default units which might be overruled by K, M or G. These
+    units are defined in the usual way (K = 2^10 = 1024, M = 1024 K, G = 1024 M).</para>
+
+    <para>Comments may be placed into the configuration file and must begin with a hash sign
+    (<quote>#</quote>). Subsequent characters are ignored until the end of the line.</para>
+
     <refsect2>
       <title>Sections</title>
+
       <variablelist>
         <varlistentry>
-          <term>
-            <option>skip</option>
-          </term>
+          <term><option>skip</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>skip</secondary></indexterm>
-    Comments out chunks of text, even spanning more than one line.
-    Characters between the keyword <option>skip</option> and the opening
-    brace (<quote>{</quote>) are ignored. Everything enclosed by the braces
-    is skipped.
-    This comes in handy, if you just want to comment out
-    some 'resource [name] {...}' section: just precede it with 'skip'.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>skip</secondary>
+              </indexterm> Comments out chunks of text, even spanning more than one line.
+            Characters between the keyword <option>skip</option> and the opening brace
+            (<quote>{</quote>) are ignored. Everything enclosed by the braces is skipped. This
+            comes in handy, if you just want to comment out some '<option>resource [name]
+            {...}</option>' section: just precede it with '<quote>skip</quote>'.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>global</option>
-          </term>
+          <term><option>global</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>global</secondary></indexterm>
-    Configures some global parameters. Currently only
-    <option>minor-count</option>, <option>dialog-refresh</option>,
-    <option>disable-ip-verification</option> and <option>usage-count</option>
-    are allowed here. You may only have one global section, preferably
-    as the first section.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>global</secondary>
+              </indexterm> Configures some global parameters. Currently only
+            <option>minor-count</option>, <option>dialog-refresh</option>,
+            <option>disable-ip-verification</option> and <option>usage-count</option> are allowed
+            here. You may only have one global section, preferably as the first section.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>common</option>
-          </term>
+          <term><option>common</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>common</secondary></indexterm>
-    All resources inherit the options set in this section.
-    The common section might have
-    a <option>startup</option>,
-    a <option>syncer</option>,
-    a <option>handlers</option>,
-    a <option>net</option> and a <option>disk</option> section.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>common</secondary>
+              </indexterm> All resources inherit the options set in this section. The common
+            section might have a <option>startup</option>, a <option>options</option>, a
+            <option>handlers</option>, a <option>net</option> and a <option>disk</option>
+            section.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>resource <replaceable>name</replaceable></option>
-          </term>
+          <term><option>resource <replaceable>name</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>resource</secondary></indexterm>
-    Configures a DRBD resource.
-    Each resource section needs to have two (or more)
-    <option>on <replaceable>host</replaceable></option> sections
-    and may have
-    a <option>startup</option>,
-    a <option>syncer</option>,
-    a <option>handlers</option>,
-    a <option>net</option> and a <option>disk</option> section.
-    Required parameter in this section: <option>protocol</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>resource</secondary>
+              </indexterm> Configures a DRBD resource. Each resource section needs to have two (or
+            more) <option>on <replaceable>host</replaceable></option> sections and may have a
+            <option>startup</option>, a <option>options</option>, a <option>handlers</option>, a
+            <option>net</option> and a <option>disk</option> section. It might contain
+            <option>volume</option>s sections.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>on <replaceable>host-name</replaceable></option>
-          </term>
+          <term><option>on <replaceable>host-name</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>on</secondary></indexterm>
-    Carries the necessary configuration parameters for a DRBD
-    device of the enclosing resource.
-    <replaceable>host-name</replaceable> is mandatory and must match the
-    Linux host name (uname -n) of one of the nodes.
-    You may list more than one host name here, in case you want to use the same
-    parameters on several hosts (you'd have to move the IP around usually).
-    Or you may list more than two such sections.
-	<programlisting format="linespecific">	resource r1 {
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>on</secondary>
+              </indexterm> Carries the necessary configuration parameters for a DRBD device of the
+            enclosing resource. <replaceable>host-name</replaceable> is mandatory and must match
+            the Linux host name (uname -n) of one of the nodes. You may list more than one host
+            name here, in case you want to use the same parameters on several hosts (you'd have to
+            move the IP around usually). Or you may list more than two such sections.
+            <programlisting format="linespecific">	resource r1 {
 		protocol C;
 		device minor 1;
 		meta-disk internal;
@@ -186,45 +201,61 @@
 			disk /dev/mapper/other-san-as-seen-from-daisy;
 		}
 	}
-	</programlisting>
-    See also the <option>floating</option> section keyword.
-    Required parameters in this section: <option>device</option>,
-    <option>disk</option>, <option>address</option>, <option>meta-disk</option>,
-    <option>flexible-meta-disk</option>.
-  </para>
+	</programlisting>See also the <option>floating</option> section keyword. Required statements in
+            this section: <option>address</option> and <option>volume</option>. Note for backward
+            compatibility and convenience it is valid to embed the statements of a single volume
+            directly into the host section.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>stacked-on-top-of <replaceable>resource</replaceable></option>
-          </term>
+          <term><option>volume <replaceable>vnr</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>stacked-on-top-of</secondary></indexterm>
-    For a stacked DRBD setup (3 or 4 nodes), a <option>stacked-on-top-of</option> is used
-    instead of an <option>on</option> section.
-    Required parameters in this section: <option>device</option> and
-    <option>address</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>volume</secondary>
+              </indexterm> Defines a volume within a connection. The minor numbers of a replicated
+            volume might be different on different hosts, the volume number
+            (<replaceable>vnr</replaceable>) is what groups them together. Required parameters in
+            this section: <option>device</option>, <option>disk</option>,
+            <option>meta-disk</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>floating <replaceable>AF addr:port</replaceable></option>
-          </term>
+          <term><option>stacked-on-top-of <replaceable>resource</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>on</secondary></indexterm>
-    Carries the necessary configuration parameters for a DRBD
-    device of the enclosing resource.
-    This section is very similar to the <option>on</option> section.
-    The difference to the <option>on</option> section is that
-    the matching of the host sections to machines is done by the IP-address
-    instead of the node name.
-    Required parameters in this section: <option>device</option>,
-    <option>disk</option>, <option>meta-disk</option>,
-    <option>flexible-meta-disk</option>, all of which <emphasis>may</emphasis> be
-    inherited from the resource section, in which case you may shorten this section
-    down to just the address identifier.
-	<programlisting format="linespecific">	resource r2 {
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>stacked-on-top-of</secondary>
+              </indexterm> For a stacked DRBD setup (3 or 4 nodes), a
+            <option>stacked-on-top-of</option> is used instead of an <option>on</option> section.
+            Required parameters in this section: <option>device</option> and
+            <option>address</option>.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>floating <replaceable>AF addr:port</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>on</secondary>
+              </indexterm> Carries the necessary configuration parameters for a DRBD device of the
+            enclosing resource. This section is very similar to the <option>on</option> section.
+            The difference to the <option>on</option> section is that the matching of the host
+            sections to machines is done by the IP-address instead of the node name. Required
+            parameters in this section: <option>device</option>, <option>disk</option>,
+            <option>meta-disk</option>, all of which
+            <emphasis>may</emphasis> be inherited from the resource section, in which case you may
+            shorten this section down to just the address identifier. <programlisting
+            format="linespecific">	resource r2 {
 		protocol C;
 		device minor 2;
 		disk      /dev/sda7;
@@ -239,1308 +270,1628 @@
 			meta-disk /dev/sdc8;
 		}
 	}
-	</programlisting>
-  </para>
+	</programlisting></para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>disk</option>
-          </term>
+          <term><option>disk</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>disk</secondary></indexterm>
-    This section is used to fine tune DRBD's properties
-    in respect to the low level storage. Please
-    refer to <citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry> for detailed description of
-    the parameters.
-    Optional parameter: <option>on-io-error</option>,
-    <option>size</option>, <option>fencing</option>, <option>use-bmbv</option>,
-    <option>no-disk-barrier</option>, <option>no-disk-flushes</option>,
-    <option>no-disk-drain</option>, <option>no-md-flushes</option>,
-    <option>max-bio-bvecs</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>disk</secondary>
+              </indexterm> This section is used to fine tune DRBD's properties in respect to the
+            low level storage. Please refer to <citerefentry>
+                <refentrytitle>drbdsetup</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> for detailed description of the parameters. Optional parameters:
+            <option>on-io-error</option>, <option>size</option>, <option>fencing</option>,
+            <option>disk-barrier</option>, <option>disk-flushes</option>,
+            <option>disk-drain</option>, <option>md-flushes</option>,
+            <option>max-bio-bvecs</option>, <option>resync-rate</option>,
+            <option>resync-after</option>, <option>al-extents</option>,
+            <option>c-plan-ahead</option>, <option>c-fill-target</option>,
+            <option>c-delay-target</option>, <option>c-max-rate</option>,
+            <option>c-min-rate</option>, <option>disk-timeout</option>,
+	    <option>read-balancing</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>net</option>
-          </term>
+          <term><option>net</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>net</secondary></indexterm>
-    This section is used to fine tune DRBD's properties. Please
-    refer to <citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry> for a detailed description
-    of this section's parameters.
-    Optional parameters:
-    <option>sndbuf-size</option>, <option>rcvbuf-size</option>,
-    <option>timeout</option>,
-    <option>connect-int</option>, <option>ping-int</option>,
-    <option>ping-timeout</option>,
-    <option>max-buffers</option>, <option>max-epoch-size</option>,
-    <option>ko-count</option>, <option>allow-two-primaries</option>,
-    <option>cram-hmac-alg</option>, <option>shared-secret</option>,
-    <option>after-sb-0pri</option>, <option>after-sb-1pri</option>,
-    <option>after-sb-2pri</option>, <option>data-integrity-alg</option>,
-    <option>no-tcp-cork</option>
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>net</secondary>
+              </indexterm> This section is used to fine tune DRBD's properties. Please refer to
+            <citerefentry>
+                <refentrytitle>drbdsetup</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> for a detailed description of this section's parameters. Optional
+            parameters: <option>protocol</option>, <option>sndbuf-size</option>,
+            <option>rcvbuf-size</option>, <option>timeout</option>, <option>connect-int</option>,
+            <option>ping-int</option>, <option>ping-timeout</option>,
+            <option>max-buffers</option>, <option>max-epoch-size</option>,
+            <option>ko-count</option>, <option>allow-two-primaries</option>,
+            <option>cram-hmac-alg</option>, <option>shared-secret</option>,
+            <option>after-sb-0pri</option>, <option>after-sb-1pri</option>,
+            <option>after-sb-2pri</option>, <option>data-integrity-alg</option>,
+            <option>no-tcp-cork</option>, <option>on-congestion</option>,
+            <option>congestion-fill</option>, <option>congestion-extents</option>,
+            <option>verify-alg</option>, <option>use-rle</option>,
+            <option>csums-alg</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>startup</option>
-          </term>
+          <term><option>startup</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>startup</secondary></indexterm>
-    This section is used to fine tune DRBD's properties. Please
-    refer to <citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry> for a detailed description
-    of this section's parameters.
-    Optional parameters:
-    <option>wfc-timeout</option>, <option>degr-wfc-timeout</option>,
-    <option>outdated-wfc-timeout</option>,
-    <option>wait-after-sb</option>, <option>stacked-timeouts</option>
-    and <option>become-primary-on</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>startup</secondary>
+              </indexterm> This section is used to fine tune DRBD's properties. Please refer to
+            <citerefentry>
+                <refentrytitle>drbdsetup</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> for a detailed description of this section's parameters. Optional
+            parameters: <option>wfc-timeout</option>, <option>degr-wfc-timeout</option>,
+            <option>outdated-wfc-timeout</option>, <option>wait-after-sb</option>,
+            <option>stacked-timeouts</option> and <option>become-primary-on</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>syncer</option>
-          </term>
+          <term><option>options</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>syncer</secondary></indexterm>
-    This section is used to fine tune the synchronization daemon
-    for the device. Please
-    refer to <citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry> for a detailed description
-    of this section's parameters.
-    Optional parameters:
-    <option>rate</option>, <option>after</option>, <option>al-extents</option>,
-    <option>use-rle</option>,
-    <option>cpu-mask</option>, <option>verify-alg</option> and <option>csums-alg</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>options</secondary>
+              </indexterm> This section is used to fine tune the behaviour of the resource object.
+            Please refer to <citerefentry>
+                <refentrytitle>drbdsetup</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> for a detailed description of this section's parameters. Optional
+            parameters: <option>cpu-mask</option>, and
+            <option>on-no-data-accessible</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>handlers</option>
-          </term>
+          <term><option>handlers</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>handlers</secondary></indexterm>
-    In this section you can define handlers (executables) that are executed
-    by the DRBD system in response to certain events.
-    Optional parameters:
-    <option>pri-on-incon-degr</option>, <option>pri-lost-after-sb</option>,
-    <option>pri-lost</option>, <option>fence-peer</option> (formerly oudate-peer),
-    <option>local-io-error</option>, <option>split-brain</option>,
-    <option>before-resync-target</option>, <option>after-resync-target</option>.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>handlers</secondary>
+              </indexterm> In this section you can define handlers (executables) that are started
+            by the DRBD system in response to certain events. Optional parameters:
+            <option>pri-on-incon-degr</option>, <option>pri-lost-after-sb</option>,
+            <option>pri-lost</option>, <option>fence-peer</option> (formerly oudate-peer),
+            <option>local-io-error</option>, <option>initial-split-brain</option>,
+            <option>split-brain</option>, <option>before-resync-target</option>,
+            <option>after-resync-target</option>.</para>
+
+            <para>The interface is done via environment variables:<itemizedlist>
+                <listitem>
+                  <para><option>DRBD_RESOURCE</option> is the name of the resource</para>
+                </listitem>
+
+                <listitem>
+                  <para><option>DRBD_MINOR</option> is the minor number of the DRBD device, in
+                  decimal.</para>
+                </listitem>
+
+                <listitem>
+                  <para><option>DRBD_CONF</option> is the path to the primary configuration file;
+                  if you split your configuration into multiple files (e.g. in
+                  <option>/etc/drbd.conf.d/</option>), this will not be helpful.</para>
+                </listitem>
+
+                <listitem>
+                  <para><option>DRBD_PEER_AF</option> , <option>DRBD_PEER_ADDRESS</option> ,
+                  <option>DRBD_PEERS</option> are the address family (e.g. <option>ipv6</option>),
+                  the peer's address and hostnames.</para>
+                </listitem>
+              </itemizedlist> <option>DRBD_PEER</option> is deprecated.</para>
+
+            <para>Please note that not all of these might be set for all handlers, and that some
+            values might not be useable for a <option>floating</option> definition.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>Parameters</title>
+
       <variablelist>
         <varlistentry>
-          <term>
-            <option>minor-count <replaceable>count</replaceable></option>
-          </term>
+          <term><option>minor-count <replaceable>count</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>minor-count </secondary></indexterm><replaceable>count</replaceable> may be a number from 1 to 255.
-  </para>
-            <para>Use <replaceable>minor-count</replaceable>
-   if you want to define massively more resources later without reloading
-   the DRBD kernel
-   module. Per default the module loads with 11 more resources than you have currently
-   in your config but at least 32.</para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>minor-count</secondary>
+              </indexterm><replaceable>count</replaceable> may be a number from 1 to FIXME.</para>
+
+            <para><replaceable>Minor-count</replaceable> is a sizing hint for DRBD. It helps to
+            right-size various memory pools. It should be set in the in the same order of
+            magnitude than the actual number of minors you use. Per default the module loads with
+            11 more resources than you have currently in your config but at least 32.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>dialog-refresh <replaceable>time</replaceable></option>
-          </term>
+          <term><option>dialog-refresh <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>dialog-refresh </secondary></indexterm><replaceable>time</replaceable> may be 0 or a positive number.
-  </para>
-            <para>The user dialog redraws the second count every
-   <replaceable>time</replaceable> seconds (or does no redraws if
-   <replaceable>time</replaceable> is 0). The default value is 1.</para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>dialog-refresh</secondary>
+              </indexterm><replaceable>time</replaceable> may be 0 or a positive number.</para>
+
+            <para>The user dialog redraws the second count every <replaceable>time</replaceable>
+            seconds (or does no redraws if <replaceable>time</replaceable> is 0). The default
+            value is 1.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>disable-ip-verification</option>
-          </term>
+          <term><option>disable-ip-verification</option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
+
               <secondary>disable-ip-verification</secondary>
             </indexterm>
-            <para>Use <replaceable>disable-ip-verification</replaceable>
-   if, for some obscure reasons, drbdadm can/might not use ip or ifconfig
-   to do a sanity check for the IP address. You can disable the IP verification  with
-   this option.
-   </para>
+
+            <para>Use <replaceable>disable-ip-verification</replaceable> if, for some obscure
+            reasons, drbdadm can/might not use <option>ip</option> or <option>ifconfig</option> to
+            do a sanity check for the IP address. You can disable the IP verification with this
+            option.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>usage-count <replaceable>val</replaceable></option>
-          </term>
+          <term><option>usage-count <replaceable>val</replaceable></option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>usage-count </secondary>
+
+              <secondary>usage-count</secondary>
             </indexterm>
+
             <para>Please participate in
-  <ulink url="http://usage.drbd.org"><citetitle>DRBD's online usage counter</citetitle></ulink>.
-  The most convenient way to do so
-  is to set this option to <option>yes</option>. Valid options are:
-  <option>yes</option>, <option>no</option> and <option>ask</option>.
-  </para>
+            <ulink url="http://usage.drbd.org"><citetitle>DRBD's online usage counter</citetitle></ulink>.
+             The most convenient way to do so is to set
+            this option to <option>yes</option>. Valid options are: <option>yes</option>,
+            <option>no</option> and <option>ask</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>protocol <replaceable>prot-id</replaceable></option>
-          </term>
+          <term><option>protocol <replaceable>prot-id</replaceable></option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
+
               <secondary>protocol</secondary>
             </indexterm>
-            <para>On the TCP/IP link the specified <replaceable>protocol</replaceable>
-  is used. Valid protocol specifiers are A, B, and C.</para>
-            <para>Protocol A: write IO is reported as completed, if it has
-  reached local disk and local TCP send buffer.</para>
-            <para>Protocol B: write IO is reported as completed, if it has reached
-  local disk and remote buffer cache.</para>
-            <para>Protocol C: write IO is reported as completed, if it has
-  reached both local and remote disk.</para>
+
+            <para>On the TCP/IP link the specified <replaceable>protocol</replaceable> is used.
+            Valid protocol specifiers are A, B, and C.</para>
+
+            <para>Protocol A: write IO is reported as completed, if it has reached local disk and
+            local TCP send buffer.</para>
+
+            <para>Protocol B: write IO is reported as completed, if it has reached local disk and
+            remote buffer cache.</para>
+
+            <para>Protocol C: write IO is reported as completed, if it has reached both local and
+            remote disk.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>device <replaceable>name</replaceable> minor <replaceable>nr</replaceable></option>
-          </term>
+          <term><option>device <replaceable>name</replaceable> minor
+          <replaceable>nr</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>device</secondary></indexterm>
-    The name of the block device node of the resource being described.
-    You must use this device with your application (file system) and
-    you must not use the low level block device which is specified with the
-    <option>disk</option> parameter.
-  </para>
-            <para>    One can ether ommit the <replaceable>name</replaceable> or <option>minor</option>
-    and the <replaceable>minor number</replaceable>. If you ommit the <replaceable>name</replaceable>
-    a default of /dev/drbd<replaceable>minor</replaceable> will be used.
-  </para>
-            <para>    Udev will create additional symlinks in /dev/drbd/by-res and /dev/drbd/by-disk.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>device</secondary>
+              </indexterm> The name of the block device node of the resource being described. You
+            must use this device with your application (file system) and you must not use the low
+            level block device which is specified with the <option>disk</option> parameter.</para>
+
+            <para>One can ether omit the <replaceable>name</replaceable> or <option>minor</option>
+            and the <replaceable>minor number</replaceable>. If you omit the
+            <replaceable>name</replaceable> a default of /dev/drbd<replaceable>minor</replaceable>
+            will be used.</para>
+
+            <para>Udev will create additional symlinks in /dev/drbd/by-res and
+            /dev/drbd/by-disk.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>disk <replaceable>name</replaceable></option>
-          </term>
+          <term><option>disk <replaceable>name</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>disk</secondary></indexterm>
-    DRBD uses this block device to actually store and retrieve the data.
-    Never access such a device while DRBD is running on top of it. This
-    holds also true for <citerefentry><refentrytitle>dumpe2fs</refentrytitle><manvolnum>8</manvolnum></citerefentry> and similar commands.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>disk</secondary>
+              </indexterm> DRBD uses this block device to actually store and retrieve the data.
+            Never access such a device while DRBD is running on top of it. This also holds true
+            for <citerefentry>
+                <refentrytitle>dumpe2fs</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> and similar commands.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>address <replaceable>AF addr:port</replaceable></option>
-          </term>
+          <term><option>address <replaceable>AF addr:port</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>address</secondary></indexterm>
-    A resource needs one <replaceable>IP</replaceable> address per device,
-    which is used to wait for incoming connections from the partner device
-    respectively to reach the partner device. <replaceable>AF</replaceable>
-    must be one of <option>ipv4</option>, <option>ipv6</option>, <option>ssocks</option>
-    or <option>sdp</option>.
-    (For compatibility reasons <option>sci</option> is an alias for <option>ssocks</option>)
-    It may be ommited for IPv4 addresses. The actual IPv6 address that follows
-    the <option>ipv6</option> keyword must be placed inside brackets:
-    <literal moreinfo="none">ipv6 [fd01:2345:6789:abcd::1]:7800</literal>.
-  </para>
-            <para>    Each DRBD resource needs a TCP <replaceable>port</replaceable>
-    which is used to connect to the node's partner device.
-    Two different DRBD resources may not use the same
-     <replaceable>addr:port</replaceable> combination on the same node.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>address</secondary>
+              </indexterm> A resource needs one <replaceable>IP</replaceable> address per device,
+            which is used to wait for incoming connections from the partner device respectively to
+            reach the partner device. <replaceable>AF</replaceable> must be one of
+            <option>ipv4</option>, <option>ipv6</option>, <option>ssocks</option> or
+            <option>sdp</option> (for compatibility reasons <option>sci</option> is an alias for
+            <option>ssocks</option>). It may be omited for IPv4 addresses. The actual IPv6 address
+            that follows the <option>ipv6</option> keyword must be placed inside brackets:
+            <literal moreinfo="none">ipv6 [fd01:2345:6789:abcd::1]:7800</literal>.</para>
+
+            <para>Each DRBD resource needs a TCP <replaceable>port</replaceable> which is used to
+            connect to the node's partner device. Two different DRBD resources may not use the
+            same <replaceable>addr:port</replaceable> combination on the same node.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>meta-disk <replaceable>internal</replaceable></option>
-          </term>
-          <term>
-            <option>flexible-meta-disk <replaceable>internal</replaceable></option>
-          </term>
-          <term>
-            <option>meta-disk <replaceable>device [index]</replaceable></option>
-          </term>
-          <term>
-            <option>flexible-meta-disk <replaceable>device </replaceable></option>
-          </term>
+          <term><option>meta-disk internal</option></term>
+
+          <term><option>meta-disk <replaceable>device</replaceable></option></term>
+
+	  <term><option>meta-disk <replaceable>device</replaceable> [<replaceable>index</replaceable>]</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>meta-disk</secondary></indexterm><indexterm significance="normal"><primary>drbd.conf</primary><secondary>flexible-meta-disk</secondary></indexterm>
-    Internal means that the last part of the backing device is used to store
-    the meta-data. You must not use <replaceable>[index]</replaceable> with
-    internal. Note: Regardless of whether you use the <option>meta-disk</option> or
-    the <option>flexible-meta-disk</option> keyword, it will always be of
-    the size needed for the remaining storage size.
-  </para>
-            <para>   You can use a single block <replaceable>device</replaceable> to store
-   meta-data of multiple DRBD devices.
-   E.g. use meta-disk /dev/sde6[0]; and meta-disk /dev/sde6[1];
-   for two different resources. In this case the meta-disk
-   would need to be at least 256 MB in size.
-  </para>
-            <para>   With the <option>flexible-meta-disk</option> keyword you specify
-   a block device as meta-data storage. You usually use this with LVM,
-   which allows you to have many variable sized block devices.
-   The required size of the meta-disk block device is
-   36kB + Backing-Storage-size / 32k. Round this number to the next 4kb
-   boundary up and you have the exact size.
-   Rule of the thumb: 32kByte per 1GByte of storage, round up to the next
-   MB.</para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>meta-disk</secondary>
+              </indexterm> Internal means that the last part of the backing device is used to
+	      store the meta-data.  The size of the meta-data is computed based on the size of the
+	      device.</para>
+
+            <para>When a <replaceable>device</replaceable> is specified, either with or without an
+	    <replaceable>index</replaceable>, DRBD stores the meta-data on this device.  Without
+	    <replaceable>index</replaceable>, the size of the meta-data is determined by the size
+	    of the data device. This is usually used with LVM, which allows to have many variable
+	    sized block devices. The meta-data size is 36kB + Backing-Storage-size / 32k, rounded up
+	    to the next 4kb boundary.  (Rule of the thumb: 32kByte per 1GByte of storage, rounded up
+	    to the next MB.)</para>
+
+	    <para>When an <replaceable>index</replaceable> is specified, each index number refers to
+	    a fixed slot of meta-data of 128 MB, which allows a maximum data size of 4 GB. This way,
+	    multiple DBRD devices can share the same meta-data device.  For example, if /dev/sde6[0]
+	    and /dev/sde6[1] are used, /dev/sde6 must be at least 256 MB big. Because of the hard size
+	    limit, use of meta-disk indexes is discouraged.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>on-io-error <replaceable>handler</replaceable></option>
-          </term>
+          <term><option>on-io-error <replaceable>handler</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>on-io-error</secondary></indexterm><replaceable>handler</replaceable> is taken, if the lower level
-    device reports io-error to the upper layers.
-  </para>
-            <para><replaceable>handler</replaceable> may be pass_on, call-local-io-error
-  or detach.
-  </para>
-            <para>pass_on: Report the io-error to the upper layers. On Primary report
-  it to the mounted file system. On Secondary ignore it.</para>
-            <para>call-local-io-error: Call the handler script
-         <option>local-io-error</option>.</para>
-            <para>detach: The node drops its low level device, and continues in diskless mode.</para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>on-io-error</secondary>
+              </indexterm><replaceable>handler</replaceable> is taken, if the lower level device
+            reports io-errors to the upper layers.</para>
+
+            <para><replaceable>handler</replaceable> may be <option>pass_on</option>,
+            <option>call-local-io-error</option> or <option>detach.</option></para>
+
+            <para><option>pass_on</option>: The node downgrades the disk status to inconsistent, marks the
+            erroneous block as inconsistent in the bitmap and retries the IO on the remote node.</para>
+
+            <para><option>call-local-io-error</option>: Call the handler script
+            <option>local-io-error</option>.</para>
+
+            <para><option>detach</option>: The node drops its low level device, and continues in
+            diskless mode.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>fencing <replaceable>fencing_policy</replaceable></option>
-          </term>
+          <term><option>fencing <replaceable>fencing_policy</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>fencing</secondary></indexterm>
-  Under <option>fencing</option> we understand preventive
-  measures to avoid situations where both nodes are primary
-  and disconnected (AKA split brain).
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>fencing</secondary>
+              </indexterm> By <option>fencing</option> we understand preventive measures to avoid
+            situations where both nodes are primary and disconnected (AKA split brain).</para>
+
             <para>Valid fencing policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>dont-care</option>
-                </term>
+                <term><option>dont-care</option></term>
+
                 <listitem>
-                  <para>	This is the default policy. No fencing actions are undertaken.
-      </para>
+                  <para>This is the default policy. No fencing actions are taken.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>resource-only</option>
-                </term>
+                <term><option>resource-only</option></term>
+
                 <listitem>
-                  <para>	If a node becomes a disconnected primary, it tries to fence
-	the peer's disk. This is done by calling the fence-peer
-	handler. The handler is supposed to reach the other node over
-	alternative communication paths and call 'drbdadm outdate
-	res' there.
-      </para>
+                  <para>If a node becomes a disconnected primary, it tries to fence the peer's
+                  disk. This is done by calling the <option>fence-peer</option> handler. The
+                  handler is supposed to reach the other node over alternative communication paths
+                  and call '<option>drbdadm outdate res</option>' there.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>resource-and-stonith</option>
-                </term>
+                <term><option>resource-and-stonith</option></term>
+
                 <listitem>
-                  <para>	If a node becomes a disconnected primary, it freezes all
-	its IO operations and calls its fence-peer handler. The
-	fence-peer handler is supposed to reach the peer over
-	alternative communication paths and call 'drbdadm outdate
-	res' there. In case it cannot reach the peer it should
-	stonith the peer. IO is resumed as soon as the situation
-	is resolved. In case your handler fails, you can resume
-	IO with the <option>resume-io</option> command.
-      </para>
+                  <para>If a node becomes a disconnected primary, it freezes all its IO operations
+                  and calls its fence-peer handler. The fence-peer handler is supposed to reach
+                  the peer over alternative communication paths and call 'drbdadm outdate res'
+                  there. In case it cannot reach the peer it should stonith the peer. IO is
+                  resumed as soon as the situation is resolved. In case your handler fails, you
+                  can resume IO with the <option>resume-io</option> command.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>use-bmbv</option>
-          </term>
-          <listitem>
-            <indexterm significance="normal">
-              <primary>drbd.conf</primary>
-              <secondary>use-bmbv</secondary>
-            </indexterm>
-            <para>      In case the backing storage's driver has a merge_bvec_fn() function,
-       DRBD has to pretend that it can only process IO requests in
-      units not lager than 4kByte. (At time of writing the only known drivers which have such a function
-      are: md (software raid driver), dm (device mapper - LVM) and DRBD
-      itself)</para>
-            <para>      To get best performance out of DRBD on top of software RAID (or any
-      other driver with a merge_bvec_fn() function) you might enable this
-      function, if you know for sure that the merge_bvec_fn() function will
-      deliver the same results on all nodes of your cluster. I.e. the
-      physical disks of the software RAID are of exactly the same
-      type. <emphasis>Use this option only if you know what you are
-      doing.</emphasis>
-    </para>
-          </listitem>
-        </varlistentry>
-        <varlistentry>
-          <term>
-            <option>no-disk-barrier</option>
-          </term>
-          <term>
-            <option>no-disk-flushes</option>
-          </term>
-          <term>
-            <option>no-disk-drain</option>
-          </term>
+          <term><option>disk-barrier</option></term>
+
+          <term><option>disk-flushes</option></term>
+
+          <term><option>disk-drain</option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>no-disk-flushes</secondary>
+              <secondary>disk-barrier</secondary>
             </indexterm>
+
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>no-disk-flushes</secondary>
+
+              <secondary>disk-flushes</secondary>
             </indexterm>
+
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>no-disk-flushes</secondary>
+              <secondary>disk-drain</secondary>
             </indexterm>
-            <para>              DRBD has four implementations to express write-after-write dependencies to
-              its backing storage device. DRBD will use the first method that is
-              supported by the backing storage device and that is not disabled by the user.
-	    </para>
-            <para>              When selecting the method you should not only base your decision on the
-              measurable performance. In case your backing storage device has a volatile
-              write cache (plain disks, RAID of plain disks) you should use one
-              of the first two. In case your backing storage device has battery-backed
-              write cache you may go with option 3 or 4. Option 4 will deliver the
-              best performance such devices.
-            </para>
-            <para>              Unfortunately device mapper (LVM) does not support barriers.
-            </para>
-            <para>              The letter after "wo:" in /proc/drbd indicates with method is currently in
-              use for a device: b, f, d, n. The implementations:
-            </para>
+
+            <para>DRBD has four implementations to express write-after-write dependencies to its
+            backing storage device. DRBD will use the first method that is supported by the
+            backing storage device and that is not disabled by the user. By default all three
+            methods are enabled.</para>
+
+            <para>When selecting the method you should not only base your decision on the
+            measurable performance. In case your backing storage device has a volatile write cache
+            (plain disks, RAID of plain disks) you should use one of the first two. In case your
+            backing storage device has battery-backed write cache you may go with option 3.
+	    Option 4 (disable everything, use "none") <emphasis>is dangerous</emphasis>
+	    on most IO stacks, may result in write-reordering, and if so,
+	    can theoretically be the reason for data corruption, or disturb
+	    the DRBD protocol, causing spurious disconnect/reconnect cycles.
+	    <emphasis>Do not use</emphasis> <option>no-disk-drain</option>.</para>
+
+            <para>Unfortunately device mapper (LVM) might not support barriers.</para>
+
+            <para>The letter after "wo:" in /proc/drbd indicates with method is currently in use
+            for a device: <option>b</option>, <option>f</option>, <option>d</option>,
+            <option>n</option>. The implementations are:</para>
+
             <variablelist>
               <varlistentry>
                 <term>barrier</term>
+
                 <listitem>
-                  <para>                  The first requirs that the driver of the
-                  backing storage device support barriers (called 'tagged command queuing' in
-                  SCSI and 'native command queuing' in SATA speak). The use of this
-                  method can be disabled by the we <option>no-disk-barrier</option> option.
-	        </para>
+                  <para>The first requires that the driver of the backing storage device support
+                  barriers (called 'tagged command queuing' in SCSI and 'native command queuing'
+                  in SATA speak). The use of this method can be disabled by setting the
+                  <option>disk-barrier</option> options to <option>no</option>.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>flush</term>
+
                 <listitem>
-                  <para>                  The second requires that the backing device support disk flushes (called
-                  'force unit access' in the drive vendors speak). The use of this method
-                  can be disabled using the <option>no-disk-flushes</option> option.
-	        </para>
+                  <para>The second requires that the backing device support disk flushes (called
+                  'force unit access' in the drive vendors speak). The use of this method can be
+                  disabled setting <option>disk-flushes</option> to <option>no</option>.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>drain</term>
+
                 <listitem>
-                  <para>                  The third method is simply to let write requests drain before
-                  write requests of a new reordering domain are issued. That was the
-                  only implementation before 8.0.9. You can prevent to use of this
-                  method by using the <option>no-disk-drain</option> option.
-	        </para>
+                  <para>The third method is simply to let write requests drain before write
+                  requests of a new reordering domain are issued. This was the only implementation
+                  before 8.0.9.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>none</term>
+
                 <listitem>
-                  <para>                  The fourth method is to not express write-after-write dependencies to
-                  the backing store at all.
-	        </para>
+                  <para>The fourth method is to not express write-after-write dependencies to
+		  the backing store at all, by also specifying <option>no-disk-drain</option>.
+		  This <emphasis>is dangerous</emphasis>
+		  on most IO stacks, may result in write-reordering, and if so,
+		  can theoretically be the reason for data corruption, or disturb
+		  the DRBD protocol, causing spurious disconnect/reconnect cycles.
+		  <emphasis>Do not use</emphasis> <option>no-disk-drain</option>.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>no-md-flushes</option>
-          </term>
+          <term><option>md-flushes</option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>no-md-flushes</secondary>
+
+              <secondary>md-flushes</secondary>
             </indexterm>
-            <para>      Disables the use of disk flushes and barrier BIOs when accessing
-      the meta data device. See the notes on <option>no-disk-flushes</option>.
-    </para>
+
+            <para>Disables the use of disk flushes and barrier BIOs when accessing the meta data
+            device. See the notes on <option>disk-flushes</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>max-bio-bvecs</option>
-          </term>
+          <term><option>max-bio-bvecs</option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
+
               <secondary>max-bio-bvecs</secondary>
             </indexterm>
-            <para>      In some special circumstances the device mapper stack manages to
-      pass BIOs to DRBD that violate the constraints that are set forth
-      by DRBD's merge_bvec() function and which have more than one bvec.
-      A known example is:
-      phys-disk -&gt; DRBD -&gt; LVM -&gt; Xen -&gt; missaligned partition (63) -&gt; DomU FS.
-      Then you might see "bio would need to, but cannot, be split:" in
-      the Dom0's kernel log. </para>
-            <para>      The best workaround is to proper align the partition within
-      the VM (E.g. start it at sector 1024). Costs 480 KiByte of storage.
-      Unfortunately the default of most Linux partitioning tools is
-      to start the first partition at an odd number (63). Therefore
-      most distribution's install helpers for virtual linux machines will
-      end up with missaligned partitions.
-      The second best workaround is to limit DRBD's max bvecs per BIO
-      (= max-bio-bvecs) to 1. Might cost performance.</para>
-            <para>      The default value of <option>max-bio-bvecs</option> is 0, which means that
-      there is no user imposed limitation.
-    </para>
+
+            <para>In some special circumstances the device mapper stack manages to pass BIOs to
+            DRBD that violate the constraints that are set forth by DRBD's merge_bvec() function
+            and which have more than one bvec. A known example is: phys-disk -&gt; DRBD -&gt; LVM
+            -&gt; Xen -&gt; misaligned partition (63) -&gt; DomU FS. Then you might see "bio would
+            need to, but cannot, be split:" in the Dom0's kernel log.</para>
+
+            <para>The best workaround is to proper align the partition within the VM (E.g. start
+            it at sector 1024). This costs 480 KiB of storage. Unfortunately the default of most
+            Linux partitioning tools is to start the first partition at an odd number (63).
+            Therefore most distribution's install helpers for virtual linux machines will end up
+            with misaligned partitions. The second best workaround is to limit DRBD's max bvecs
+            per BIO (= <option>max-bio-bvecs</option>) to 1, but that might cost
+            performance.</para>
+
+            <para>The default value of <option>max-bio-bvecs</option> is 0, which means that there
+            is no user imposed limitation.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
           <term>
-            <option>sndbuf-size <replaceable>size</replaceable></option>
+            <option>disk-timeout</option>
           </term>
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>sndbuf-size </secondary></indexterm><replaceable>size</replaceable> is the size of the TCP socket send buffer.
-  The default value is 0, i.e. autotune. You can specify smaller or larger values. Larger values
-  are appropriate for reasonable write throughput with protocol A over high
-  latency networks. Very large values like 1M may cause problems. Also values
-  below 32K do not make much sense. Since 8.0.13 resp. 8.2.7, setting the <replaceable>size</replaceable>
-  value to 0 means that the kernel should autotune this.
-  </para>
+            <indexterm significance="normal">
+              <primary>drbd.conf</primary>
+              <secondary>disk-timeout</secondary>
+            </indexterm>
+	    <para>
+	      If the driver of the <replaceable>lower_device</replaceable>
+	      does not finish an IO request within <replaceable>disk_timeout</replaceable>,
+	      DRBD considers the disk as failed. If DRBD is connected to a remote host,
+	      it will reissue local pending IO requests to the peer, and ship all new
+	      IO requests to the peer only. The disk state advances to diskless, as soon
+	      as the backing block device has finished all IO requests.</para>
+	      <para> The default value of is 0, which means that no timeout is enforced.
+	      The default unit is 100ms. This option is available since 8.3.12.
+	      </para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
           <term>
-            <option>rcvbuf-size <replaceable>size</replaceable></option>
+            <option>read-balancing <replaceable>method</replaceable></option>
           </term>
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>rcvbuf-size </secondary></indexterm><replaceable>size</replaceable> is the size of the TCP socket receive buffer.
-  The default value is 0, i.e. autotune. You can specify smaller or larger values.
-  Usually this should be left at its default. Setting the <replaceable>size</replaceable>
-  value to 0 means that the kernel should autotune this.
-  </para>
+            <indexterm significance="normal">
+              <primary>drbd.conf</primary>
+              <secondary>read-balancing</secondary>
+            </indexterm>
+	    <para>
+	      The supported <replaceable>methods</replaceable> for load balancing of
+	      read requests are <option>prefer-local</option>, <option>prefer-remote</option>,
+	      <option>round-robin</option>, <option>least-pending</option>
+	      <option>when-congested-remote</option>, <option>32K-striping</option>,
+	      <option>64K-striping</option>, <option>128K-striping</option>,
+	      <option>256K-striping</option>, <option>512K-striping</option>
+	      and <option>1M-striping</option>.</para>
+	      <para> The default value of is <option>prefer-local</option>.
+	      This option is available since 8.4.1.
+	      </para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>timeout <replaceable>time</replaceable></option>
-          </term>
+          <term><option>sndbuf-size <replaceable>size</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>timeout</secondary></indexterm>
-If the partner node fails to send an expected response packet within
-<replaceable>time</replaceable> 10<superscript>ths</superscript>
-of a second, the partner node
-is considered dead and therefore the TCP/IP connection is abandoned. This must be lower than <replaceable>connect-int</replaceable> and <replaceable>ping-int</replaceable>.
-The default value is 60 = 6 seconds, the unit 0.1 seconds.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>sndbuf-size</secondary>
+              </indexterm><replaceable>size</replaceable> is the size of the TCP socket send
+            buffer. The default value is 0, i.e. autotune. You can specify smaller or larger
+            values. Larger values are appropriate for reasonable write throughput with protocol A
+            over high latency networks. Values below 32K do not make sense. Since 8.0.13 resp.
+            8.2.7, setting the <replaceable>size</replaceable> value to 0 means that the kernel
+            should autotune this.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>connect-int <replaceable>time</replaceable></option>
-          </term>
+          <term><option>rcvbuf-size <replaceable>size</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>connect-int</secondary></indexterm>
-In case it is not possible to connect to the remote DRBD device immediately,
-DRBD keeps on trying to connect. With this option you can set the time
-between two tries. The default value is 10 seconds, the unit is 1 second.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>rcvbuf-size</secondary>
+              </indexterm><replaceable>size</replaceable> is the size of the TCP socket receive
+            buffer. The default value is 0, i.e. autotune. You can specify smaller or larger
+            values. Usually this should be left at its default. Setting the
+            <replaceable>size</replaceable> value to 0 means that the kernel should autotune
+            this.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>ping-int <replaceable>time</replaceable></option>
-          </term>
+          <term><option>timeout <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>ping-int</secondary></indexterm>
-If the TCP/IP connection linking a DRBD device pair is idle for more than
-<replaceable>time</replaceable> seconds, DRBD will generate a keep-alive
-packet to check if its partner is still alive. The default is 10 seconds,
-the unit is 1 second.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>timeout</secondary>
+              </indexterm> If the partner node fails to send an expected response packet within
+            <replaceable>time</replaceable> tenths of a second, the partner node is considered
+            dead and therefore the TCP/IP connection is abandoned. This must be lower than
+            <replaceable>connect-int</replaceable> and <replaceable>ping-int</replaceable>. The
+            default value is 60 = 6 seconds, the unit 0.1 seconds.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>ping-timeout <replaceable>time</replaceable></option>
-          </term>
+          <term><option>connect-int <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>ping-timeout</secondary></indexterm>
-    The time the peer has time to answer to a keep-alive packet. In case
-    the peer's reply is not received within this time period, it is
-    considered as dead. The default value is 500ms, the default unit is 100ms.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>connect-int</secondary>
+              </indexterm> In case it is not possible to connect to the remote DRBD device
+            immediately, DRBD keeps on trying to connect. With this option you can set the time
+            between two retries. The default value is 10 seconds, the unit is 1 second.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>max-buffers <replaceable>number</replaceable></option>
-          </term>
+          <term><option>ping-int <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>max-buffers </secondary></indexterm>
-  Maximum number of requests to be allocated by DRBD. Unit is PAGE_SIZE,
-  which is 4 KB on most systems.
-  The minimum is hard coded to 32 (=128 KB).
-  For high-performance installations it might help, if you
-  increase that number. These buffers are used to hold
-  data blocks while they are written to disk.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>ping-int</secondary>
+              </indexterm> If the TCP/IP connection linking a DRBD device pair is idle for more
+            than <replaceable>time</replaceable> seconds, DRBD will generate a keep-alive packet
+            to check if its partner is still alive. The default is 10 seconds, the unit is 1
+            second.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>ko-count <replaceable>number</replaceable></option>
-          </term>
+          <term><option>ping-timeout <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>ko-count </secondary></indexterm>
-	      In case the secondary node fails to complete a single write
-	      request for <replaceable>count</replaceable> times the
-	      <replaceable>timeout</replaceable>, it is expelled from the
-	      cluster. (I.e. the primary node goes into StandAlone mode.)
-	      The default value is 0, which disables this feature.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>ping-timeout</secondary>
+              </indexterm> The time the peer has time to answer to a keep-alive packet. In case
+            the peer's reply is not received within this time period, it is considered as dead.
+            The default value is 500ms, the default unit are tenths of a second.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>max-epoch-size <replaceable>number</replaceable></option>
-          </term>
+          <term><option>max-buffers <replaceable>number</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>max-epoch-size </secondary></indexterm>
-  The highest number of data blocks between two write barriers.
-  If you set this smaller than 10, you might decrease your performance.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>max-buffers</secondary>
+              </indexterm> Maximum number of requests to be allocated by DRBD. Unit is PAGE_SIZE,
+            which is 4 KiB on most systems. The minimum is hard coded to 32 (=128 KiB). For
+            high-performance installations it might help if you increase that number. These
+            buffers are used to hold data blocks while they are written to disk.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>allow-two-primaries</option>
-          </term>
+          <term><option>ko-count <replaceable>number</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>allow-two-primaries</secondary></indexterm>
-    With this option set you may assign primary role to both nodes. You only should
-    use this option if you use a shared storage file system on top of
-    DRBD. At the time of writing the only ones are: OCFS2 and GFS. If you
-    use this option with any other file system, you are going to crash your
-    nodes and to corrupt your data!
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>ko-count</secondary>
+              </indexterm> In case the secondary node fails to complete a single write request for
+            <replaceable>count</replaceable> times the <replaceable>timeout</replaceable>, it is
+            expelled from the cluster. (I.e. the primary node goes into
+            <option>StandAlone</option> mode.) The default value is 0, which disables this
+            feature.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>unplug-watermark <replaceable>number</replaceable></option>
-          </term>
+          <term><option>max-epoch-size <replaceable>number</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>max-epoch-size</secondary>
+              </indexterm> The highest number of data blocks between two write barriers. If you
+            set this smaller than 10, you might decrease your performance.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>allow-two-primaries</option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>allow-two-primaries</secondary>
+              </indexterm> With this option set you may assign the primary role to both nodes. You
+            only should use this option if you use a shared storage file system on top of DRBD. At
+            the time of writing the only ones are: OCFS2 and GFS. If you use this option with any
+            other file system, you are going to crash your nodes and to corrupt your data!</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>unplug-watermark <replaceable>number</replaceable></option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>unplug-watermark </secondary>
+
+              <secondary>unplug-watermark</secondary>
             </indexterm>
-            <para>      When the number of pending write requests on the standby
-      (secondary) node exceeds the unplug-watermark, we trigger
-      the request processing of our backing storage device.
-      Some storage controllers deliver better performance with small
-      values, others deliver best performance when the value is set to
-      the same value as max-buffers. Minimum 16, default 128, maximum
-      131072.
-    </para>
+
+            <para>When the number of pending write requests on the standby (secondary) node
+            exceeds the <option>unplug-watermark</option>, we trigger the request processing of
+            our backing storage device. Some storage controllers deliver better performance with
+            small values, others deliver best performance when the value is set to the same value
+            as max-buffers. Minimum 16, default 128, maximum 131072.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>cram-hmac-alg</option>
-          </term>
+          <term><option>cram-hmac-alg</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>cram-hmac-alg</secondary></indexterm>
-    You need to specify the HMAC algorithm to enable peer authentication
-    at all. You are strongly encouraged to use peer authentication. The HMAC
-    algorithm will be used for the challenge response authentication
-    of the peer. You may specify any digest algorithm that is named in
-    /proc/crypto.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>cram-hmac-alg</secondary>
+              </indexterm> You need to specify the HMAC algorithm to enable peer authentication at
+            all. You are strongly encouraged to use peer authentication. The HMAC algorithm will
+            be used for the challenge response authentication of the peer. You may specify any
+            digest algorithm that is named in <option>/proc/crypto</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>shared-secret</option>
-          </term>
+          <term><option>shared-secret</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>shared-secret</secondary></indexterm>
-    The shared secret used in peer authentication. May be up to 64 characters.
-    Note that peer authentication is disabled as long as no cram-hmac-alg
-    (see above) is specified.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>shared-secret</secondary>
+              </indexterm> The shared secret used in peer authentication. May be up to 64
+            characters. Note that peer authentication is disabled as long as no
+            <option>cram-hmac-alg</option> (see above) is specified.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>after-sb-0pri </option>
-            <replaceable>policy</replaceable>
-          </term>
+          <term><option>after-sb-0pri </option> <replaceable>policy</replaceable></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>after-sb-0pri </secondary>
+
+              <secondary>after-sb-0pri</secondary>
             </indexterm>
-            <para>	    possible policies are:
-	    </para>
+
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-younger-primary</option>
-                </term>
+                <term><option>discard-younger-primary</option></term>
+
                 <listitem>
-                  <para>		  Auto sync from the node that was primary before the split-brain situation happened.
-		</para>
+                  <para>Auto sync from the node that was primary before the split-brain situation
+                  happened.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-older-primary</option>
-                </term>
+                <term><option>discard-older-primary</option></term>
+
                 <listitem>
-                  <para>		Auto sync from the node that became primary as second during
-		the split-brain situation.
-		</para>
+                  <para>Auto sync from the node that became primary as second during the
+                  split-brain situation.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-zero-changes</option>
-                </term>
+                <term><option>discard-zero-changes</option></term>
+
                 <listitem>
-                  <para>		In case one node did not write anything since the split
-		brain became evident, sync from the node that wrote something
-		to the node that did not write anything. In case none wrote
-		anything this policy uses a random decision to perform
-		a "resync" of 0 blocks. In case both have written something
-		this policy disconnects the nodes.
-		</para>
+                  <para>In case one node did not write anything since the split brain became
+                  evident, sync from the node that wrote something to the node that did not write
+                  anything. In case none wrote anything this policy uses a random decision to
+                  perform a "resync" of 0 blocks. In case both have written something this policy
+                  disconnects the nodes.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-least-changes</option>
-                </term>
+                <term><option>discard-least-changes</option></term>
+
                 <listitem>
-                  <para>		Auto sync from the node that touched more blocks during the
-		split brain situation.
-		</para>
+                  <para>Auto sync from the node that touched more blocks during the split brain
+                  situation.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-node-NODENAME</option>
-                </term>
+                <term><option>discard-node-NODENAME</option></term>
+
                 <listitem>
-                  <para>		Auto sync to the named node.
-		</para>
+                  <para>Auto sync to the named node.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>after-sb-1pri </option>
-            <replaceable>policy</replaceable>
-          </term>
+          <term><option>after-sb-1pri </option> <replaceable>policy</replaceable></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>after-sb-1pri </secondary>
+
+              <secondary>after-sb-1pri</secondary>
             </indexterm>
-            <para>	    possible policies are:
-	    </para>
+
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>consensus</option>
-                </term>
+                <term><option>consensus</option></term>
+
                 <listitem>
-                  <para>		  Discard the version of the secondary if the outcome
-                  of the <option>after-sb-0pri</option> algorithm would also
-		  destroy the current secondary's data. Otherwise disconnect.
-		</para>
+                  <para>Discard the version of the secondary if the outcome of the
+                  <option>after-sb-0pri</option> algorithm would also destroy the current
+                  secondary's data. Otherwise disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>violently-as0p</option>
-                </term>
+                <term><option>violently-as0p</option></term>
+
                 <listitem>
-                  <para>                  Always take the decision of the <option>after-sb-0pri</option>
-                  algorithm. Even if that causes an erratic change of
-                  the primary's view of the data.  This is only useful if
-                  you use a 1node FS (i.e.  not OCFS2 or GFS) with the
-                  allow-two-primaries flag, _AND_ if you really know what you
-                  are doing.  This is DANGEROUS and MAY CRASH YOUR MACHINE
-                  if you have an FS mounted on the primary node.
-		</para>
+                  <para>Always take the decision of the <option>after-sb-0pri</option> algorithm,
+                  even if that causes an erratic change of the primary's view of the data. This is
+                  only useful if you use a one-node FS (i.e. not OCFS2 or GFS) with the
+                  <option>allow-two-primaries</option> flag, <emphasis>AND</emphasis> if you
+                  really know what you are doing. This is <emphasis>DANGEROUS and MAY CRASH YOUR
+                  MACHINE</emphasis> if you have an FS mounted on the primary node.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-secondary</option>
-                </term>
+                <term><option>discard-secondary</option></term>
+
                 <listitem>
-                  <para>		  Discard the secondary's version.
-		</para>
+                  <para>Discard the secondary's version.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>call-pri-lost-after-sb</option>
-                </term>
+                <term><option>call-pri-lost-after-sb</option></term>
+
                 <listitem>
-                  <para>		  Always honor the outcome of the <option>after-sb-0pri
-		  </option> algorithm. In case it decides the current
-		  secondary has the right data, it calls the "pri-lost-after-sb"
-		  handler on the current primary.
-		</para>
+                  <para>Always honor the outcome of the <option>after-sb-0pri </option> algorithm.
+                  In case it decides the current secondary has the right data, it calls the
+                  "pri-lost-after-sb" handler on the current primary.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>after-sb-2pri </option>
-            <replaceable>policy</replaceable>
-          </term>
+          <term><option>after-sb-2pri </option> <replaceable>policy</replaceable></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>after-sb-2pri </secondary>
+
+              <secondary>after-sb-2pri</secondary>
             </indexterm>
-            <para>	    possible policies are:
-	    </para>
+
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>violently-as0p</option>
-                </term>
+                <term><option>violently-as0p</option></term>
+
                 <listitem>
-                  <para>                  Always take the decision of the <option>after-sb-0pri</option>
-                  algorithm. Even if that causes an erratic change of
-                  the primary's view of the data.  This is only useful if
-                  you use a 1node FS (i.e.  not OCFS2 or GFS) with the
-                  allow-two-primaries flag, _AND_ if you really know what you
-                  are doing.  This is DANGEROUS and MAY CRASH YOUR MACHINE
-                  if you have an FS mounted on the primary node.
-		</para>
+                  <para>Always take the decision of the <option>after-sb-0pri</option> algorithm,
+                  even if that causes an erratic change of the primary's view of the data. This is
+                  only useful if you use a one-node FS (i.e. not OCFS2 or GFS) with the
+                  <option>allow-two-primaries</option> flag, <emphasis>AND</emphasis> if you
+                  really know what you are doing. This is <emphasis>DANGEROUS and MAY CRASH YOUR
+                  MACHINE</emphasis> if you have an FS mounted on the primary node.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>call-pri-lost-after-sb</option>
-                </term>
+                <term><option>call-pri-lost-after-sb</option></term>
+
                 <listitem>
-                  <para>                   Call the "pri-lost-after-sb" helper program on one of the
-    	           machines. This program is expected to reboot the
-    	           machine, i.e. make it secondary.
-		</para>
+                  <para>Call the "pri-lost-after-sb" helper program on one of the machines. This
+                  program is expected to reboot the machine, i.e. make it secondary.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>always-asbp</option>
-          </term>
+          <term><option>always-asbp</option></term>
+
           <listitem>
-            <para>	      Normally the automatic after-split-brain policies are only
-	      used if current states of the UUIDs do not indicate the
-	      presence of a third node.
-	    </para>
-            <para>	      With this option you request that the automatic
-	      after-split-brain policies are used as long as the data
-	      sets of the nodes are somehow related. This might cause
-	      a full sync, if the UUIDs indicate the presence of a third
-	      node. (Or double faults led to strange UUID sets.)
-	    </para>
+            <para>Normally the automatic after-split-brain policies are only used if current
+            states of the UUIDs do not indicate the presence of a third node.</para>
+
+            <para>With this option you request that the automatic after-split-brain policies are
+            used as long as the data sets of the nodes are somehow related. This might cause a
+            full sync, if the UUIDs indicate the presence of a third node. (Or double faults led
+            to strange UUID sets.)</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>rr-conflict </option>
-            <replaceable>policy</replaceable>
-          </term>
+          <term><option>rr-conflict </option> <replaceable>policy</replaceable></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>rr-conflict </secondary>
+
+              <secondary>rr-conflict</secondary>
             </indexterm>
-            <para>	    To solve the cases when the outcome of the resync decision is
-	    incompatible with the current role assignment in the cluster.
-	    </para>
+
+            <para>This option helps to solve the cases when the outcome of the resync decision is
+            incompatible with the current role assignment in the cluster.</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>violently</option>
-                </term>
+                <term><option>violently</option></term>
+
                 <listitem>
-                  <para>                   Sync to the primary node is allowed, violating the
-    	           assumption that data on a block device are stable for one
-    	           of the nodes. <emphasis>Dangerous, do not use.</emphasis>
-		</para>
+                  <para>Sync to the primary node is allowed, violating the assumption that data on
+                  a block device are stable for one of the nodes. <emphasis>Dangerous, do not
+                  use.</emphasis></para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>call-pri-lost</option>
-                </term>
+                <term><option>call-pri-lost</option></term>
+
                 <listitem>
-                  <para>                   Call the "pri-lost" helper program on one of the
-    	           machines. This program is expected to reboot the
-    	           machine, i.e. make it secondary.
-		</para>
+                  <para>Call the "pri-lost" helper program on one of the machines. This program is
+                  expected to reboot the machine, i.e. make it secondary.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>data-integrity-alg </option>
-            <replaceable>alg</replaceable>
-          </term>
+          <term><option>data-integrity-alg </option> <replaceable>alg</replaceable></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
+
               <secondary>data-integrity-alg</secondary>
             </indexterm>
-            <para>	      DRBD can ensure the data integrity of the user's data on the network
-	      by comparing hash values. Normally this is ensured by the 16 bit checksums 
-	      in the headers of TCP/IP packets.</para>
-            <para>This option can be set to any of the kernel's data digest algorithms.
-              In a typical kernel configuration you should have
-              at least one of <option>md5</option>, <option>sha1</option>, and <option>crc32c</option>
-              available. By default this is not enabled.</para>
+
+            <para>DRBD can ensure the data integrity of the user's data on the network by
+            comparing hash values. Normally this is ensured by the 16 bit checksums in the headers
+            of TCP/IP packets.</para>
+
+            <para>This option can be set to any of the kernel's data digest algorithms. In a
+            typical kernel configuration you should have at least one of <option>md5</option>,
+            <option>sha1</option>, and <option>crc32c</option> available. By default this is not
+            enabled.</para>
+
             <para>See also the notes on data integrity.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>no-tcp-cork</option>
-          </term>
+          <term><option>tcp-cork</option></term>
+
           <listitem>
             <indexterm significance="normal">
               <primary>drbd.conf</primary>
-              <secondary>no-tcp-cork</secondary>
+
+              <secondary>tcp-cork</secondary>
             </indexterm>
-            <para>	      DRBD usually uses the TCP socket option TCP_CORK to hint to the network
-              stack when it can expect more data, and when it should flush out what it
-              has in its send queue. It turned out that there is at lease one network
-              stack that performs worse when one uses this hinting method. Therefore
-              we introducted this option, which disable the setting and clearing of
-              the TCP_CORK socket option by DRBD.</para>
+
+            <para>DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack
+            when it can expect more data, and when it should flush out what it has in its send
+            queue. It turned out that there is at least one network stack that performs worse when
+            one uses this hinting method. Therefore we introducted this option. By setting
+            <option>tcp-cork</option> to <option>no</option> you can disable the setting and
+            clearing of the TCP_CORK socket option by DRBD.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>wfc-timeout <replaceable>time</replaceable></option>
-          </term>
+          <term><option>on-congestion <replaceable>congestion_policy</replaceable></option></term>
+
+          <term><option>congestion-fill <replaceable>fill_threshold</replaceable></option></term>
+
+          <term><option>congestion-extents
+          <replaceable>active_extents_threshold</replaceable></option></term>
+
           <listitem>
-            <para>Wait for connection timeout.
-  <indexterm significance="normal"><primary>drbd.conf</primary><secondary>wfc-timeout </secondary></indexterm>
-  The init script <citerefentry><refentrytitle>drbd</refentrytitle><manvolnum>8</manvolnum></citerefentry> blocks the boot process
-  until the DRBD resources are connected.
-  When the cluster manager starts later,
-  it does not see a resource with internal split-brain.
-  In case you want to limit the wait time, do it here.
-  Default is 0, which means unlimited. The unit is seconds.
-  </para>
+            <para>By default DRBD blocks when the available TCP send queue becomes full. That
+            means it will slow down the application that generates the write requests that cause
+            DRBD to send more data down that TCP connection.</para>
+
+            <para>When DRBD is deployed with DRBD-proxy it might be more desirable that DRBD goes
+            into AHEAD/BEHIND mode shortly before the send queue becomes full. In AHEAD/BEHIND
+            mode DRBD does no longer replicate data, but still keeps the connection open.</para>
+
+            <para>The advantage of the AHEAD/BEHIND mode is that the application is not slowed
+            down, even if DRBD-proxy's buffer is not sufficient to buffer all write requests. The
+            downside is that the peer node falls behind, and that a resync will be necessary to
+            bring it back into sync. During that resync the peer node will have an inconsistent
+            disk.</para>
+
+            <para>Available <replaceable>congestion_policy</replaceable>s are
+            <option>block</option> and <option>pull-ahead</option>. The default is
+            <option>block</option>. <replaceable>Fill_threshold</replaceable> might be in the
+            range of 0 to 10GiBytes. The default is 0 which disables the check.
+            <replaceable>Active_extents_threshold</replaceable> has the same limits as
+            <option>al-extents</option>.</para>
+
+            <para>The AHEAD/BEHIND mode and its settings are available since DRBD 8.3.10.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>degr-wfc-timeout <replaceable>time</replaceable></option>
-          </term>
+          <term><option>wfc-timeout <replaceable>time</replaceable></option></term>
+
+          <listitem>
+            <para>Wait for connection timeout. <indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>wfc-timeout</secondary>
+              </indexterm> The init script <citerefentry>
+                <refentrytitle>drbd</refentrytitle>
+
+                <manvolnum>8</manvolnum>
+              </citerefentry> blocks the boot process until the DRBD resources are connected. When
+            the cluster manager starts later, it does not see a resource with internal
+            split-brain. In case you want to limit the wait time, do it here. Default is 0, which
+            means unlimited. The unit is seconds.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>degr-wfc-timeout <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>degr-wfc-timeout </secondary></indexterm>
-  Wait for connection timeout, if this node was a degraded cluster.
-  In case a degraded cluster (= cluster with only one node left)
-  is rebooted, this timeout value is used instead of wfc-timeout,
-  because the peer is less likely to show up in time,
-  if it had been dead before. Value 0 means unlimited.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>degr-wfc-timeout</secondary>
+              </indexterm> Wait for connection timeout, if this node was a degraded cluster. In
+            case a degraded cluster (= cluster with only one node left) is rebooted, this timeout
+            value is used instead of wfc-timeout, because the peer is less likely to show up in
+            time, if it had been dead before. Value 0 means unlimited.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>outdated-wfc-timeout <replaceable>time</replaceable></option>
-          </term>
+          <term><option>outdated-wfc-timeout <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>outdated-wfc-timeout </secondary></indexterm>
-  Wait for connection timeout, if the peer was outdated.
-  In case a degraded cluster (= cluster with only one node left)
-  with an outdated peer disk is rebooted, this timeout value is used instead of wfc-timeout,
-  because the peer is not allowed to become primary in the meantime.
-  Value 0 means unlimited.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>outdated-wfc-timeout</secondary>
+              </indexterm> Wait for connection timeout, if the peer was outdated. In case a
+            degraded cluster (= cluster with only one node left) with an outdated peer disk is
+            rebooted, this timeout value is used instead of wfc-timeout, because the peer is not
+            allowed to become primary in the meantime. Value 0 means unlimited.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>wait-after-sb</option>
-          </term>
+          <term><option>wait-after-sb</option></term>
+
           <listitem>
-            <para>  By setting this option you can make the init script to continue 
-  to wait even if the device pair had a split brain situation
-  and therefore refuses to connect.
-  </para>
+            <para>By setting this option you can make the init script to continue to wait even if
+            the device pair had a split brain situation and therefore refuses to connect.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>become-primary-on <replaceable>node-name</replaceable></option>
-          </term>
+          <term><option>become-primary-on <replaceable>node-name</replaceable></option></term>
+
           <listitem>
-            <para>  Sets on which node the device should be promoted to primary role by
-  the init script. The <replaceable>node-name</replaceable> might either
-  be a host name or the key word <option>both</option>. When this option is 
-  not set the devices stay in secondary role on both nodes. Usually
-  one delegates the role assignment to a cluster manager (e.g. heartbeat).
-  </para>
+            <para>Sets on which node the device should be promoted to primary role by the init
+            script. The <replaceable>node-name</replaceable> might either be a host name or the
+            keyword <option>both</option>. When this option is not set the devices stay in
+            secondary role on both nodes. Usually one delegates the role assignment to a cluster
+            manager (e.g. heartbeat).</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>stacked-timeouts</option>
-          </term>
+          <term><option>stacked-timeouts</option></term>
+
           <listitem>
-            <para>  Usually <option>wfc-timeout</option> and <option>degr-wfc-timeout</option> are
-  ignored for stacked devices, instead twice the amount of <option>connect-int</option>
-  is used for the connection timeouts.
-  With the <option>stacked-timeouts</option> keyword you disable this, and force
-  DRBD to mind the <option>wfc-timeout</option> and <option>degr-wfc-timeout</option>
-  statements. Only do that if the peer of the stacked resource is usually not
-  available or will not become primary usually.
-  By using this option incorrectly, you run the risk of causing unexpected split brain.
-  </para>
+            <para>Usually <option>wfc-timeout</option> and <option>degr-wfc-timeout</option> are
+            ignored for stacked devices, instead twice the amount of <option>connect-int</option>
+            is used for the connection timeouts. With the <option>stacked-timeouts</option>
+            keyword you disable this, and force DRBD to mind the <option>wfc-timeout</option> and
+            <option>degr-wfc-timeout</option> statements. Only do that if the peer of the stacked
+            resource is usually not available or will usually not become primary. By using this
+            option incorrectly, you run the risk of causing unexpected split brain.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>rate <replaceable>rate</replaceable></option>
-          </term>
+          <term><option>resync-rate <replaceable>rate</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>rate </secondary></indexterm>
-  To ensure a smooth operation of the application on top of DRBD,
-  it is possible to limit the bandwidth which may be used by
-  background synchronizations. The default is 250 KB/sec, the
-  default unit is KB/sec. Optional suffixes K, M, G are allowed.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>resync-rate</secondary>
+              </indexterm> To ensure a smooth operation of the application on top of DRBD, it is
+            possible to limit the bandwidth which may be used by background synchronizations. The
+            default is 250 KB/sec, the default unit is KB/sec. Optional suffixes K, M, G are
+            allowed.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>after <replaceable>res-name</replaceable></option>
-          </term>
+          <term><option>use-rle</option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>after </secondary></indexterm>
-  By default, resynchronization of all devices would run in parallel.
-  By defining a sync-after dependency, the resynchronization of this
-  resource will start only if the resource <replaceable>res-name</replaceable>
-  is already in connected state (= finished its resynchronization).
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>use-rle</secondary>
+              </indexterm> During resync-handshake, the dirty-bitmaps of the nodes are exchanged
+            and merged (using bit-or), so the nodes will have the same understanding of which
+            blocks are dirty. On large devices, the fine grained dirty-bitmap can become large as
+            well, and the bitmap exchange can take quite some time on low-bandwidth links.</para>
+
+            <para>Because the bitmap typically contains compact areas where all bits are unset
+            (clean) or set (dirty), a simple run-length encoding scheme can considerably reduce
+            the network traffic necessary for the bitmap exchange.</para>
+
+            <para>For backward compatibilty reasons, and because on fast links this possibly does
+            not improve transfer time but consumes cpu cycles, this defaults to off.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>al-extents <replaceable>extents</replaceable></option>
-          </term>
+          <term><option>resync-after <replaceable>res-name</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>al-extents </secondary></indexterm>
-  DRBD automatically performs hot area detection. With this
-  parameter you control how big the hot area (= active set) can
-  get. Each extent marks 4M of the backing storage (= low-level device).
-  In case a primary node leaves the cluster unexpectedly, the areas covered
-  by the active set must be resynced upon rejoining of the failed
-  node. The data structure is stored in the meta-data area, therefore each
-  change of the active set is a write operation
-  to the meta-data device. A higher number of extents gives
-  longer resync times but less updates to the meta-data. The
-  default number of <replaceable>extents</replaceable> is
-  127. (Minimum: 7, Maximum: 3843)
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>resync-after</secondary>
+              </indexterm> By default, resynchronization of all devices would run in parallel. By
+            defining a resync-after dependency, the resynchronization of this resource will start
+            only if the resource <replaceable>res-name</replaceable> is already in connected state
+            (i.e., has finished its resynchronization).</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>verify-alg <replaceable>hash-alg</replaceable></option>
-          </term>
+          <term><option>al-extents <replaceable>extents</replaceable></option></term>
+
           <listitem>
-            <para>During online verification (as initiated by the
-  <command moreinfo="none">verify</command> sub-command),
-  rather than doing a bit-wise comparison, DRBD applies a hash function
-  to the contents of every block being verified, and compares that
-  hash with the peer. This option defines the hash algorithm being
-  used for that purpose. It can be set to any of the kernel's data
-  digest algorithms. In a typical kernel configuration you should have
-  at least one of <option>md5</option>, <option>sha1</option>, and <option>crc32c</option>
-  available. By default this is not enabled; you must set this
-  option explicitly in order to be able to use on-line device verification.</para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>al-extents</secondary>
+              </indexterm> DRBD automatically performs hot area detection. With this parameter you
+            control how big the hot area (= active set) can get. Each extent marks 4M of the
+            backing storage (= low-level device). In case a primary node leaves the cluster
+            unexpectedly, the areas covered by the active set must be resynced upon rejoining of
+            the failed node. The data structure is stored in the meta-data area, therefore each
+            change of the active set is a write operation to the meta-data device. A higher number
+            of extents gives longer resync times but less updates to the meta-data. The default
+            number of <replaceable>extents</replaceable> is 127. (Minimum: 7, Maximum:
+            3843)</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>verify-alg <replaceable>hash-alg</replaceable></option></term>
+
+          <listitem>
+            <para>During online verification (as initiated by the <command
+            moreinfo="none">verify</command> sub-command), rather than doing a bit-wise
+            comparison, DRBD applies a hash function to the contents of every block being
+            verified, and compares that hash with the peer. This option defines the hash algorithm
+            being used for that purpose. It can be set to any of the kernel's data digest
+            algorithms. In a typical kernel configuration you should have at least one of
+            <option>md5</option>, <option>sha1</option>, and <option>crc32c</option> available. By
+            default this is not enabled; you must set this option explicitly in order to be able
+            to use on-line device verification.</para>
+
             <para>See also the notes on data integrity.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>csums-alg <replaceable>hash-alg</replaceable></option>
-          </term>
+          <term><option>csums-alg <replaceable>hash-alg</replaceable></option></term>
+
           <listitem>
-            <para>A resync process sends all marked data blocks form the source to
-  the destination node, as long as no <option>csums-alg</option> is
-  given. When one is specified the resync process exchanges hash values of all
-  marked blocks first, and sends only those data blocks over, that have different
-  hash values.</para>
-            <para>This setting is useful for DRBD setups with low bandwidth links.
-  During the restart of a crashed primary node, all blocks covered by the
-  activity log are marked for resync. But a large part of those will actually
-  be still in sync, therefore using <option>csums-alg</option> will lower
-  the required bandwidth in exchange for CPU cycles.</para>
+            <para>A resync process sends all marked data blocks from the source to the destination
+            node, as long as no <option>csums-alg</option> is given. When one is specified the
+            resync process exchanges hash values of all marked blocks first, and sends only those
+            data blocks that have different hash values.</para>
+
+            <para>This setting is useful for DRBD setups with low bandwidth links. During the
+            restart of a crashed primary node, all blocks covered by the activity log are marked
+            for resync. But a large part of those will actually be still in sync, therefore using
+            <option>csums-alg</option> will lower the required bandwidth in exchange for CPU
+            cycles.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>cpu-mask <replaceable>cpu-mask</replaceable></option>
-          </term>
+          <term><option>c-plan-ahead <replaceable>plan_time</replaceable></option></term>
+
+          <term><option>c-fill-target <replaceable>fill_target</replaceable></option></term>
+
+          <term><option>c-delay-target <replaceable>delay_target</replaceable></option></term>
+
+          <term><option>c-max-rate <replaceable>max_rate</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>cpu-mask </secondary></indexterm>
-  Sets the cpu-affinity-mask for DRBD's kernel threads of this device. The
-  default value of <replaceable>cpu-mask</replaceable> is 0, which means
-  that DRBD's kernel threads should be spread over all CPUs of the machine.
-  This value must be given in hexadecimal notation. If it is too big it will
-  be truncated.
-  </para>
+            <para>The dynamic resync speed controller gets enabled with setting
+            <replaceable>plan_time</replaceable> to a positive value. It aims to fill the buffers
+            along the data path with either a constant amount of data
+            <replaceable>fill_target</replaceable>, or aims to have a constant delay time of
+            <replaceable>delay_target</replaceable> along the path. The controller has an upper
+            bound of <replaceable>max_rate</replaceable>.</para>
+
+            <para>By <replaceable>plan_time</replaceable> the agility of the controller is
+            configured. Higher values yield for slower/lower responses of the controller to
+            deviation from the target value. It should be at least 5 times RTT. For regular data
+            paths a <replaceable>fill_target</replaceable> in the area of 4k to 100k is
+            appropriate. For a setup that contains drbd-proxy it is advisable to use
+            <replaceable>delay_target</replaceable> instead. Only when
+            <replaceable>fill_target</replaceable> is set to 0 the controller will use
+            <replaceable>delay_target</replaceable>. 5 times RTT is a reasonable starting value.
+            <replaceable>Max_rate</replaceable> should be set to the bandwidth available between
+            the DRBD-hosts and the machines hosting DRBD-proxy, or to the available
+            disk-bandwidth.</para>
+
+            <para>The default value of <replaceable>plan_time</replaceable> is 0, the default unit
+            is 0.1 seconds. <replaceable>Fill_target</replaceable> has 0 and sectors as default
+            unit. <replaceable>Delay_target</replaceable> has 1 (100ms) and 0.1 as default unit.
+            <replaceable>Max_rate</replaceable> has 10240 (100MiB/s) and KiB/s as default
+            unit.</para>
+
+            <para>The dynamic resync speed controller and its settings are available since DRBD
+            8.3.9.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>pri-on-incon-degr <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>c-min-rate <replaceable>min_rate</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>pri-on-incon-degr </secondary></indexterm>
-  This handler is called if the node is primary, degraded
-  and if the local copy of the data is inconsistent.</para>
+            <para>A node that is primary and sync-source has to schedule application IO requests
+            and resync IO requests. The <replaceable>min_rate</replaceable> tells DRBD use only up
+            to min_rate for resync IO and to dedicate all other available IO bandwidth to
+            application requests.</para>
+
+            <para>Note: The value 0 has a special meaning. It disables the limitation of resync IO
+            completely, which might slow down application IO considerably. Set it to a value of 1,
+            if you prefer that resync IO never slows down application IO.</para>
+
+            <para>Note: Although the name might suggest that it is a lower bound for the dynamic
+            resync speed controller, it is not. If the DRBD-proxy buffer is full, the dynamic
+            resync speed controller is free to lower the resync speed down to 0, completely
+            independent of the <option>c-min-rate</option> setting.</para>
+
+            <para><replaceable>Min_rate</replaceable> has 4096 (4MiB/s) and KiB/s as default
+            unit.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>pri-lost-after-sb <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>on-no-data-accessible
+          <replaceable>ond-policy</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>pri-lost-after-sb </secondary></indexterm>
-  The node is currently primary, but lost the after split
-  brain auto recovery procedure. As as consequence, it should be abandoned.
-  </para>
+            <para>This setting controls what happens to IO requests on a degraded, disk less node
+            (I.e. no data store is reachable). The available policies are
+            <option>io-error</option> and <option>suspend-io</option>.</para>
+
+            <para>If <replaceable>ond-policy</replaceable> is set to <option>suspend-io</option>
+            you can either resume IO by attaching/connecting the last lost data storage, or by the
+            <command moreinfo="none">drbdadm resume-io <replaceable>res</replaceable></command>
+            command. The latter will result in IO errors of course.</para>
+
+            <para>The default is <option>io-error</option>. This setting is available since DRBD
+            8.3.9.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>pri-lost <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>cpu-mask <replaceable>cpu-mask</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>pri-lost </secondary></indexterm>
-  The node is currently primary, but DRBD's algorithm
-  thinks that it should become sync target. As a consequence it should
-  give up its primary role.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>cpu-mask</secondary>
+              </indexterm> Sets the cpu-affinity-mask for DRBD's kernel threads of this device.
+            The default value of <replaceable>cpu-mask</replaceable> is 0, which means that DRBD's
+            kernel threads should be spread over all CPUs of the machine. This value must be given
+            in hexadecimal notation. If it is too big it will be truncated.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>fence-peer <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>pri-on-incon-degr <replaceable>cmd</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>fence-peer </secondary></indexterm>
-  The handler is part of the <option>fencing</option>
-  mechanism.  This handler is called in case the node needs to fence the
-  peer's disk. It should use other communication paths than DRBD's network
-  link.  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>pri-on-incon-degr</secondary>
+              </indexterm> This handler is called if the node is primary, degraded and if the
+            local copy of the data is inconsistent.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>local-io-error <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>pri-lost-after-sb <replaceable>cmd</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>local-io-error </secondary></indexterm>
-  DRBD got an IO error from the local IO subsystem.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>pri-lost-after-sb</secondary>
+              </indexterm> The node is currently primary, but lost the after-split-brain auto
+            recovery procedure. As as consequence, it should be abandoned.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>split-brain <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>pri-lost <replaceable>cmd</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>split-brain </secondary></indexterm>
-  DRBD detected a split brain situation. Manual recovery is necessary.
-  This handler should alert someone on duty.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>pri-lost</secondary>
+              </indexterm> The node is currently primary, but DRBD's algorithm thinks that it
+            should become sync target. As a consequence it should give up its primary role.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>before-resync-target <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>fence-peer <replaceable>cmd</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>before-resync-target </secondary></indexterm>
-  DRBD calls this handler just before a resync beginns on the node
-  that becomes resync target. It might be used to take a snapshot of the
-  backing block device.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>fence-peer</secondary>
+              </indexterm> The handler is part of the <option>fencing</option> mechanism. This
+            handler is called in case the node needs to fence the peer's disk. It should use other
+            communication paths than DRBD's network link.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term>
-            <option>after-resync-target <replaceable>cmd</replaceable></option>
-          </term>
+          <term><option>local-io-error <replaceable>cmd</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>after-resync-target </secondary></indexterm>
-  DRBD calls this handler just after a resync operation finished on the
-  node which's disk just became consistent after beeing inconsistent for the
-  duration of the resync. It might be used to remove a snapshot of the backing device
-  that was created by the <option>before-resync-target</option> handler.
-  </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>local-io-error</secondary>
+              </indexterm> DRBD got an IO error from the local IO subsystem.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>initial-split-brain <replaceable>cmd</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>initial-split-brain</secondary>
+              </indexterm> DRBD has connected and detected a split brain situation. This handler
+            can alert someone in all cases of split brain, not just those that go
+            unresolved.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>split-brain <replaceable>cmd</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>split-brain</secondary>
+              </indexterm> DRBD detected a split brain situation but remains unresolved. Manual
+            recovery is necessary. This handler should alert someone on duty.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>before-resync-target <replaceable>cmd</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>before-resync-target</secondary>
+              </indexterm> DRBD calls this handler just before a resync begins on the node that
+            becomes resync target. It might be used to take a snapshot of the backing block
+            device.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>after-resync-target <replaceable>cmd</replaceable></option></term>
+
+          <listitem>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>after-resync-target</secondary>
+              </indexterm> DRBD calls this handler just after a resync operation finished on the
+            node whose disk just became consistent after being inconsistent for the duration of
+            the resync. It might be used to remove a snapshot of the backing device that was
+            created by the <option>before-resync-target</option> handler.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>Other Keywords</title>
+
       <variablelist>
         <varlistentry>
-          <term>
-            <option>include <replaceable>file-pattern</replaceable></option>
-          </term>
+          <term><option>include <replaceable>file-pattern</replaceable></option></term>
+
           <listitem>
-            <para><indexterm significance="normal"><primary>drbd.conf</primary><secondary>include</secondary></indexterm>
-       Include all files matching the wildcard pattern <replaceable>file-pattern</replaceable>.
-       The <option>include</option> statement
-       is only allowed on the top level, i.e. it is not allowed inside any section.
-      </para>
+            <para><indexterm significance="normal">
+                <primary>drbd.conf</primary>
+
+                <secondary>include</secondary>
+              </indexterm> Include all files matching the wildcard pattern
+            <replaceable>file-pattern</replaceable>. The <option>include</option> statement is
+            only allowed on the top level, i.e. it is not allowed inside any section.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
   </refsect1>
+
   <refsect1 id="data-integrity">
     <title>Notes on data integrity</title>
-    <para>There are two independent methods in DRBD to ensure the integrity of
-the mirrored data. The online-verify mechanism and the <option>data-integrity-alg</option>
-of the <option>network</option> section.</para>
-    <para>Both mechanisms might deliver false positives if the user of DRBD modifies the
-data which gets written to disk while the transfer goes on. Currently the swap code and
-ReiserFS are known to do so. In both cases this is not a problem, because when the
-initiator of the data transfer does this it already knows that that data block will
-not be part of an on disk data structure.</para>
-    <para>The most recent (2007) example of systematically corruption was an
-issue with the TCP offloading engine and the driver of a certain type
-of GBit NIC. The actual corruption happened on the DMA transfer from
-core memory to the card. Since the TCP checksum gets calculated on the card
-this type of corruption stays undetected as long as you do not use
-either the online <option>verify</option> or the data-integrity-alg.</para>
-    <para>We suggest to use the <option>data-integrity-alg</option> only during a
-pre-production phase due to its CPU costs. Further we suggest to do online
-<option>verify</option> runs regularly e.g. once a month during low load period.</para>
+
+    <para>There are two independent methods in DRBD to ensure the integrity of the mirrored data.
+    The online-verify mechanism and the <option>data-integrity-alg</option> of the
+    <option>network</option> section.</para>
+
+    <para>Both mechanisms might deliver false positives if the user of DRBD modifies the data
+    which gets written to disk while the transfer goes on. This may happen for swap, or for
+    certain append while global sync, or truncate/rewrite workloads, and not necessarily poses a
+    problem for the integrity of the data. Usually when the initiator of the data transfer does
+    this, it already knows that that data block will not be part of an on disk data structure, or
+    will be resubmitted with correct data soon enough.</para>
+
+    <para>The <option>data-integrity-alg</option> causes the receiving side to log an error about
+    "Digest integrity check FAILED: Ns +x\n", where N is the sector offset, and x is the size of
+    the request in bytes. It will then disconnect, and reconnect, thus causing a quick resync. If
+    the sending side at the same time detected a modification, it warns about "Digest mismatch,
+    buffer modified by upper layers during write: Ns +x\n", which shows that this was a false
+    positive. The sending side may detect these buffer modifications immediately after the
+    unmodified data has been copied to the tcp buffers, in which case the receiving side won't
+    notice it.</para>
+
+    <para>The most recent (2007) example of systematic corruption was an issue with the TCP
+    offloading engine and the driver of a certain type of GBit NIC. The actual corruption happened
+    on the DMA transfer from core memory to the card. Since the TCP checksum gets calculated on
+    the card, this type of corruption stays undetected as long as you do not use either the online
+    <option>verify</option> or the <option>data-integrity-alg</option>.</para>
+
+    <para>We suggest to use the <option>data-integrity-alg</option> only during a pre-production
+    phase due to its CPU costs. Further we suggest to do online <option>verify</option> runs
+    regularly e.g. once a month during a low load period.</para>
   </refsect1>
+
   <refsect1>
     <title>Version</title>
-    <simpara>This document was revised for version 8.3.2 of the DRBD distribution.</simpara>
+
+    <simpara>This document was revised for version 8.4.0 of the DRBD distribution.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Author</title>
-    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email>
-        and Lars Ellenberg <email>lars.ellenberg@linbit.com</email>.</simpara>
+
+    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email> and Lars
+    Ellenberg <email>lars.ellenberg@linbit.com</email>.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Reporting Bugs</title>
+
     <simpara>Report bugs to <email>drbd-user@lists.linbit.com</email>.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Copyright</title>
-    <simpara>Copyright 2001-2008 LINBIT Information Technologies,
-Philipp Reisner, Lars Ellenberg. This  is  free software;
-see the source for copying conditions.  There is NO warranty;
-not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.</simpara>
+
+    <simpara>Copyright 2001-2008 LINBIT Information Technologies, Philipp Reisner, Lars Ellenberg.
+    This is free software; see the source for copying conditions. There is NO warranty; not even
+    for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.</simpara>
   </refsect1>
+
   <refsect1>
     <title>See Also</title>
-    <para><citerefentry><refentrytitle>drbd</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-    <citerefentry><refentrytitle>drbddisk</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-    <citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-    <citerefentry><refentrytitle>drbdadm</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-    <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>,
-    <ulink url="http://www.drbd.org/"><citetitle>DRBD web site</citetitle></ulink></para>
+
+    <para><citerefentry>
+        <refentrytitle>drbd</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbddisk</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbdsetup</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbdadm</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>,
+      <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>,
+       <ulink url="http://www.drbd.org/"><citetitle>DRBD web site</citetitle></ulink></para>
+
   </refsect1>
 </refentry>
diff -Nru drbd8-8.3.7/documentation/drbdadm.8 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdadm.8
--- drbd8-8.3.7/documentation/drbdadm.8	2010-01-13 16:17:24.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdadm.8	2012-02-02 14:09:58.000000000 +0000
@@ -1,13 +1,22 @@
 '\" t
 .\"     Title: drbdadm
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
-.\"      Date: 5 Dec 2008
+.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
+.\"      Date: 6 May 2011
 .\"    Manual: System Administration
-.\"    Source: DRBD 8.3.2
+.\"    Source: DRBD 8.4.0
 .\"  Language: English
 .\"
-.TH "DRBDADM" "8" "5 Dec 2008" "DRBD 8.3.2" "System Administration"
+.TH "DRBDADM" "8" "6 May 2011" "DRBD 8.4.0" "System Administration"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
 .\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
@@ -22,42 +31,97 @@
 drbdadm \- Administration tool for DRBD .\" drbdadm
 .SH "SYNOPSIS"
 .HP \w'\fBdrbdadm\fR\ 'u
-\fBdrbdadm\fR [\-d] [\-c\ {\fIfile\fR}] [\-t\ {\fIfile\fR}] [\-s\ {\fIcmd\fR}] [\-m\ {\fIcmd\fR}] [\-S] [\-h\ {\fIhost\fR}] {\fIcommand\fR} [all | \fIresource\fR...]
+\fBdrbdadm\fR [\-d] [\-c\ {\fIfile\fR}] [\-t\ {\fIfile\fR}] [\-s\ {\fIcmd\fR}] [\-m\ {\fIcmd\fR}] [\-S] [\-h\ {\fIhost\fR}] [\-\-\ {\fIbackend\-options\fR}] {\fIcommand\fR} [{all} | {\fIresource\fR\fI[/volume>]\fR...}]
 .SH "DESCRIPTION"
 .PP
-Drbdadm is the high level tool of the DRBD program suite\&. Drbdadm is to drbdsetup and drbdmeta what ifup/ifdown is to ifconfig\&. Drbdadm reads its configuration file and performs the specified commands by calling the drbdsetup and/or the drbdmeta program\&.
+\fBDrbdadm\fR
+is the high level tool of the DRBD program suite\&.
+\fBDrbdadm\fR
+is to
+\fBdrbdsetup\fR
+and
+\fBdrbdmeta\fR
+what
+\fBifup\fR/\fBifdown\fR
+is to
+\fBifconfig\fR\&.
+\fBDrbdadm\fR
+reads its configuration file and performs the specified commands by calling the
+\fBdrbdsetup\fR
+and/or the
+\fBdrbdmeta\fR
+program\&.
+.PP
+\fBDrbdadm\fR
+can operate on whole resources or on individual volumes in a resource\&. The sub commands:
+\fBattach\fR,
+\fBdetach\fR,
+\fBprimary\fR,
+\fBsecondary\fR,
+\fBinvalidate\fR,
+\fBinvalidate\-remote\fR,
+\fBoutdate\fR,
+\fBresize\fR,
+\fBverify\fR,
+\fBpause\-sync\fR,
+\fBresume\-sync\fR,
+\fBrole\fR,
+\fBcsytate\fR,
+\fBdstate\fR,
+\fBcreate\-md\fR,
+\fBshow\-gi\fR,
+\fBget\-gi\fR,
+\fBdump\-md\fR,
+\fBwipe\-md\fR
+work on whole resources and on individual volumes\&.
+.PP
+Resource level only commands are:
+\fBconnect\fR,
+\fBdisconnect\fR,
+\fBup\fR,
+\fBdown\fR,
+\fBwait\-connect\fR
+and
+\fBdump\fR\&.
 .SH "OPTIONS"
 .PP
 \fB\-d\fR, \fB\-\-dry\-run\fR
 .RS 4
-Just prints the calls of drbdsetup to stdout, but does not run the commands\&.
+Just prints the calls of
+\fBdrbdsetup\fR
+to stdout, but does not run the commands\&.
 .RE
 .PP
-\fB\-c\fR, \fB\-\-config\-file\fR\fIfile\fR
+\fB\-c\fR, \fB\-\-config\-file\fR \fIfile\fR
 .RS 4
 Specifies the configuration file drbdadm will use\&. If this parameter is not specified, drbdadm will look for
+\fB/etc/drbd\-84\&.conf\fR,
 \fB/etc/drbd\-83\&.conf\fR,
 \fB/etc/drbd\-08\&.conf\fR
 and
 \fB/etc/drbd\&.conf\fR\&.
 .RE
 .PP
-\fB\-t\fR, \fB\-\-config\-to\-test\fR\fIfile\fR
+\fB\-t\fR, \fB\-\-config\-to\-test\fR \fIfile\fR
 .RS 4
 Specifies an additional configuration file drbdadm to check\&. This option is only allowed with the dump and the sh\-nop commands\&.
 .RE
 .PP
-\fB\-s\fR, \fB\-\-drbdsetup\fR\fIfile\fR
+\fB\-s\fR, \fB\-\-drbdsetup\fR \fIfile\fR
 .RS 4
-Specifies the full path to the drbdsetup program\&. If this option is omitted, drbdadm will look for
+Specifies the full path to the
+\fBdrbdsetup\fR
+program\&. If this option is omitted, drbdadm will look for
 \fB/sbin/drbdsetup\fR
 and
 \fB\&./drbdsetup\fR\&.
 .RE
 .PP
-\fB\-m\fR, \fB\-\-drbdmeta\fR\fIfile\fR
+\fB\-m\fR, \fB\-\-drbdmeta\fR \fIfile\fR
 .RS 4
-Specifies the full path to the drbdmeta program\&. If this option is omitted, drbdadm will look for
+Specifies the full path to the
+\fBdrbdmeta\fR
+program\&. If this option is omitted, drbdadm will look for
 \fB/sbin/drbdmeta\fR
 and
 \fB\&./drbdmeta\fR\&.
@@ -72,23 +136,33 @@
 .RS 4
 Specifies to which peer node to connect\&. Only necessary if there are more than two host sections in the resource you are working on\&.
 .RE
+.PP
+\fB\-\-\fR \fIbackend\-options\fR
+.RS 4
+All options following the doubly hyphen are considered
+\fIbackend\-options\fR\&. These are passed through to the backend command\&. I\&.e\&. to
+\fBdrbdsetup\fR,
+\fBdrbdmeta\fR
+or
+\fBdrbd\-proxy\-ctl\fR\&.
+.RE
 .SH "COMMANDS"
 .PP
 attach
 .RS 4
-Attaches a local backing block device to the DRBD resource\'s device\&.
+Attaches a local backing block device to the DRBD resource\*(Aqs device\&.
 .RE
 .PP
 detach
 .RS 4
 .\" drbdadm: detach
-Removes the backing storage device from a DRBD resource\'s device\&.
+Removes the backing storage device from a DRBD resource\*(Aqs device\&.
 .RE
 .PP
 connect
 .RS 4
 .\" drbdadm: connect
-Sets up the network configuration of the resource\'s device\&. If the peer device is already configured, the two DRBD devices will connect\&. If there are more than two host sections in the resource you need to use the
+Sets up the network configuration of the resource\*(Aqs device\&. If the peer device is already configured, the two DRBD devices will connect\&. If there are more than two host sections in the resource you need to use the
 \fB\-\-peer\fR
 option to select the peer you want to connect to\&.
 .RE
@@ -120,13 +194,15 @@
 primary
 .RS 4
 .\" drbdadm: primary
-Promote the resource\'s device into primary role\&. You need to do this before any access to the device, such as creating or mounting a file system\&.
+Promote the resource\*(Aqs device into primary role\&. You need to do this before any access to the device, such as creating or mounting a file system\&.
 .RE
 .PP
 secondary
 .RS 4
 .\" drbdadm: secondary
-Brings the device back into secondary role\&. This is needed since in a connected DRBD device pair, only one of the two peers may have primary role (except if allow\-two\-primaries is explicitly set in the configuration file)\&.
+Brings the device back into secondary role\&. This is needed since in a connected DRBD device pair, only one of the two peers may have primary role (except if
+\fBallow\-two\-primaries\fR
+is explicitly set in the configuration file)\&.
 .RE
 .PP
 invalidate
@@ -134,31 +210,40 @@
 .\" drbdadm: invalidate
 Forces DRBD to consider the data on the
 \fIlocal\fR
-backing storage device as out\-of\-sync\&. Therefore DRBD will copy each and every block over from its peer, to bring the local storage device back in sync\&.
+backing storage device as out\-of\-sync\&. Therefore DRBD will copy each and every block from its peer, to bring the local storage device back in sync\&.
 .RE
 .PP
 invalidate\-remote
 .RS 4
 .\" drbdadm: invalidate-remote
 This command is similar to the invalidate command, however, the
-\fIpeer\'s\fR
+\fIpeer\*(Aqs\fR
 backing storage is invalidated and hence rewritten with the data of the local node\&.
 .RE
 .PP
 resize
 .RS 4
 .\" drbdadm: resize
-Causes DRBD to re\-examine all sizing constraints, and resize the resource\'s device accordingly\&. For example in case you increased the size of your backing storage devices (on both nodes of course), then DRBD will adopt to the new size after you called this command on one of your nodes\&. Since new storage space must be synchronised this command only works if there is at least one primary node present\&.
+Causes DRBD to re\-examine all sizing constraints, and resize the resource\*(Aqs device accordingly\&. For example, if you increased the size of your backing storage devices (on both nodes, of course), then DRBD will adopt the new size after you called this command on one of your nodes\&. Since new storage space must be synchronised this command only works if there is at least one primary node present\&.
 .sp
 The
 \fB\-\-assume\-peer\-has\-space\fR
-allows you to resize a device which is currently not connected to the peer\&. Use with care, since if you do not resize the peer\'s disk as well, further connect attempts of the two will fail\&.
+allows you to resize a device which is currently not connected to the peer\&. Use with care, since if you do not resize the peer\*(Aqs disk as well, further connect attempts of the two will fail\&.
+.RE
+.PP
+check\-resize
+.RS 4
+.\" drbdadm: check-resize
+Calls drbdmeta to eventually move internal meta data\&. If the backing device was resized, while DRBD was not running, meta data has to be moved to the end of the device, so that the next
+\fBattach\fR
+command can succeed\&.
 .RE
 .PP
 create\-md
 .RS 4
 .\" drbdadm: create-md
-Initializes the meta data storage\&. This needs to be done before a DRBD resource can be taken online for the first time\&.
+Initializes the meta data storage\&. This needs to be done before a DRBD resource can be taken online for the first time\&. In case of issues with that command have a look at
+\fBdrbdmeta\fR(8)
 .RE
 .PP
 get\-gi
@@ -215,48 +300,22 @@
 Shows the current connection state of the devices\&.
 .RE
 .PP
-status
-.RS 4
-.\" drbdadm: status
-Shows the current status of all devices defined in the current config file, in xml\-like format\&. Example output:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-<drbd\-status version="8\&.3\&.2" api="88">
-<resources config_file="/etc/drbd\&.conf">
-<resource minor="0" name="s0" cs="SyncTarget" st1="Secondary" st2="Secondary"
-          ds1="Inconsistent" ds2="UpToDate" resynced_precent ="5\&.9" />
-<resource minor="1" name="s1" cs="WFConnection" st1="Secondary"
-          st2="Unknown" ds1="Inconsistent" ds2="Outdated" />
-<resource minor="3" name="dummy" cs="Unconfigured" />
-<!\-\- resource minor="4" name="scratch" not available or not yet created \-\->
-</resources>
-</drbd\-status>
-.fi
-.if n \{\
-.RE
-.\}
-.sp
-.RE
-.PP
 dump
 .RS 4
 .\" drbdadm: dump
-Just parse the configuration file and dump it to stdout\&. May be used to check the configuration file for syntactical correctness\&.
+Just parse the configuration file and dump it to stdout\&. May be used to check the configuration file for syntactic correctness\&.
 .RE
 .PP
 outdate
 .RS 4
 .\" drbdadm: outdate
-Used to mark the node\'s data as outdated\&. Usually used by the peer\'s fence\-peer handler\&.
+Used to mark the node\*(Aqs data as outdated\&. Usually used by the peer\*(Aqs fence\-peer handler\&.
 .RE
 .PP
 verify
 .RS 4
 .\" drbdadm: verify
-Starts online verify\&. During online verify, data on both nodes is compared for inconsistency\&. See
+Starts online verify\&. During online verify, data on both nodes is compared for equality\&. See
 /proc/drbd
 for online verify progress\&. If out\-of\-sync blocks are found, they are
 \fInot\fR
@@ -272,7 +331,7 @@
 pause\-sync
 .RS 4
 .\" drbdadm: pause-sync
-Temporarily suspend an ongoing resynchronization by setting the local pause flag\&. Resync only progresses if neither the local nor the remote pause flag is set\&. It might be desirable to postpone DRBD\'s resynchronization until after any resynchronization of the backing storage\'s RAID setup\&.
+Temporarily suspend an ongoing resynchronization by setting the local pause flag\&. Resync only progresses if neither the local nor the remote pause flag is set\&. It might be desirable to postpone DRBD\*(Aqs resynchronization until after any resynchronization of the backing storage\*(Aqs RAID setup\&.
 .RE
 .PP
 resume\-sync
@@ -286,7 +345,9 @@
 .\" drbdadm: new-current-uuid
 Generates a new currend UUID and rotates all other UUID values\&.
 .sp
-This can be used to shorten the initial resync of a cluster\&. See the drbdsetup manpage for a more details\&.
+This can be used to shorten the initial resync of a cluster\&. See the
+\fBdrbdsetup\fR
+manpage for a more details\&.
 .RE
 .PP
 dstate
@@ -301,7 +362,7 @@
 .RE
 .SH "VERSION"
 .sp
-This document was revised for version 8\&.3\&.2 of the DRBD distribution\&.
+This document was revised for version 8\&.4\&.0 of the DRBD distribution\&.
 .SH "AUTHOR"
 .sp
 Written by Philipp Reisner philipp\&.reisner@linbit\&.com and Lars Ellenberg lars\&.ellenberg@linbit\&.com
@@ -310,12 +371,16 @@
 Report bugs to drbd\-user@lists\&.linbit\&.com\&.
 .SH "COPYRIGHT"
 .sp
-Copyright 2001\-2008 LINBIT Information Technologies, Philipp Reisner, Lars Ellenberg\&. This is free software; see the source for copying conditions\&. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE\&.
+Copyright 2001\-2011 LINBIT Information Technologies, Philipp Reisner, Lars Ellenberg\&. This is free software; see the source for copying conditions\&. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE\&.
 .SH "SEE ALSO"
 .PP
 \fBdrbd.conf\fR(5),
 \fBdrbd\fR(8),
-\fBdrbddisk\fR(8)\fBdrbdsetup\fR(8)\fBdrbdmeta\fR(8)\m[blue]\fBDRBD project web site\fR\m[]\&\s-2\u[1]\d\s+2
+\fBdrbddisk\fR(8),
+\fBdrbdsetup\fR(8),
+\fBdrbdmeta\fR(8)
+and the
+\m[blue]\fBDRBD project web site\fR\m[]\&\s-2\u[1]\d\s+2
 .SH "NOTES"
 .IP " 1." 4
 DRBD project web site
diff -Nru drbd8-8.3.7/documentation/drbdadm.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdadm.xml
--- drbd8-8.3.7/documentation/drbdadm.xml	2010-01-07 09:09:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdadm.xml	2012-02-02 14:09:14.000000000 +0000
@@ -1,454 +1,651 @@
-<?xml version="1.0"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <refentry id="re-drbdadm">
   <refentryinfo>
-    <date>5 Dec 2008</date>
+    <date>6 May 2011</date>
+
     <productname>DRBD</productname>
-    <productnumber>8.3.2</productnumber>
+
+    <productnumber>8.4.0</productnumber>
   </refentryinfo>
+
   <refmeta>
     <refentrytitle>drbdadm</refentrytitle>
+
     <manvolnum>8</manvolnum>
+
     <refmiscinfo class="manual">System Administration</refmiscinfo>
   </refmeta>
+
   <refnamediv>
     <refname>drbdadm</refname>
-    <refpurpose>Administration tool for DRBD
-    <indexterm significance="normal">
-      <primary>drbdadm</primary>
-    </indexterm>
-    </refpurpose>
+
+    <refpurpose>Administration tool for DRBD <indexterm significance="normal">
+        <primary>drbdadm</primary>
+      </indexterm></refpurpose>
   </refnamediv>
+
   <refsynopsisdiv>
     <cmdsynopsis sepchar=" ">
       <command moreinfo="none">drbdadm</command>
+
       <arg choice="opt" rep="norepeat">-d</arg>
-      <arg choice="opt" rep="norepeat">-c<arg choice="req" rep="norepeat"><replaceable>file</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-t<arg choice="req" rep="norepeat"><replaceable>file</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-s<arg choice="req" rep="norepeat"><replaceable>cmd</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-m<arg choice="req" rep="norepeat"><replaceable>cmd</replaceable></arg></arg>
+
+      <arg choice="opt" rep="norepeat">-c<arg choice="req"
+      rep="norepeat"><replaceable>file</replaceable></arg></arg>
+
+      <arg choice="opt" rep="norepeat">-t<arg choice="req"
+      rep="norepeat"><replaceable>file</replaceable></arg></arg>
+
+      <arg choice="opt" rep="norepeat">-s<arg choice="req"
+      rep="norepeat"><replaceable>cmd</replaceable></arg></arg>
+
+      <arg choice="opt" rep="norepeat">-m<arg choice="req"
+      rep="norepeat"><replaceable>cmd</replaceable></arg></arg>
+
       <arg choice="opt" rep="norepeat">-S</arg>
-      <arg choice="opt" rep="norepeat">-h<arg choice="req" rep="norepeat"><replaceable>host</replaceable></arg></arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>command</replaceable>
-      </arg>
+
+      <arg choice="opt" rep="norepeat">-h<arg choice="req"
+      rep="norepeat"><replaceable>host</replaceable></arg></arg>
+
+      <arg choice="opt" rep="norepeat">--<arg choice="req"
+      rep="norepeat"><replaceable>backend-options</replaceable></arg></arg>
+
+      <arg choice="req" rep="norepeat"><replaceable>command</replaceable></arg>
+
       <group choice="opt" rep="norepeat">
-        <arg choice="plain" rep="norepeat">all</arg>
-        <arg choice="plain" rep="repeat">
-          <replaceable>resource</replaceable>
-        </arg>
+        <arg choice="req" rep="norepeat">all</arg>
+
+        <arg choice="req" rep="repeat"><replaceable>resource<arg choice="opt"
+        rep="norepeat">/volume&gt;</arg></replaceable></arg>
       </group>
     </cmdsynopsis>
   </refsynopsisdiv>
+
   <refsect1>
     <title>Description</title>
-    <para>      Drbdadm is the high level tool of the DRBD program suite. Drbdadm is to
-      drbdsetup and drbdmeta what ifup/ifdown is to ifconfig. Drbdadm reads its
-      configuration file and performs the specified commands by calling the
-      drbdsetup and/or the drbdmeta program.
-    </para>
+
+    <para><option>Drbdadm</option> is the high level tool of the DRBD program suite.
+    <option>Drbdadm</option> is to <option>drbdsetup</option> and <option>drbdmeta</option> what
+    <option>ifup</option>/<option>ifdown</option> is to <option>ifconfig</option>.
+    <option>Drbdadm</option> reads its configuration file and performs the specified commands by
+    calling the <option>drbdsetup</option> and/or the <option>drbdmeta</option> program.</para>
+
+    <para><option>Drbdadm</option> can operate on whole resources or on individual volumes in a
+    resource. The sub commands: <option>attach</option>, <option>detach</option>,
+    <option>primary</option>, <option>secondary</option>, <option>invalidate</option>,
+    <option>invalidate-remote</option>, <option>outdate</option>, <option>resize</option>,
+    <option>verify</option>, <option>pause-sync</option>, <option>resume-sync</option>,
+    <option>role</option>, <option>csytate</option>, <option>dstate</option>,
+    <option>create-md</option>, <option>show-gi</option>, <option>get-gi</option>,
+    <option>dump-md</option>, <option>wipe-md</option> work on whole resources and on
+    individual volumes. </para>
+
+    <para>Resource level only commands are: <option>connect</option>, <option>disconnect</option>,
+    <option>up</option>, <option>down</option>, <option>wait-connect</option> and
+    <option>dump</option>.</para>
   </refsect1>
+
   <refsect1>
     <title>Options</title>
+
     <variablelist>
       <varlistentry>
         <term><option>-d</option>, <option>--dry-run</option></term>
+
         <listitem>
-          <para>	  Just prints the calls of drbdsetup to stdout, but does not run
-	  the commands.
-	</para>
+          <para>Just prints the calls of <option>drbdsetup</option> to stdout, but does not run
+          the commands.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
-        <term><option>-c</option>, <option>--config-file</option><replaceable>file</replaceable></term>
+        <term><option>-c</option>, <option>--config-file</option>
+        <replaceable>file</replaceable></term>
+
         <listitem>
-          <para>          Specifies the configuration file drbdadm will use. If this parameter
-	  is not specified, drbdadm will look for <option>/etc/drbd-83.conf</option>,
-          <option>/etc/drbd-08.conf</option> and <option>/etc/drbd.conf</option>.
-	</para>
+          <para>Specifies the configuration file drbdadm will use. If this parameter is not
+          specified, drbdadm will look for <option>/etc/drbd-84.conf</option>,
+          <option>/etc/drbd-83.conf</option>, <option>/etc/drbd-08.conf</option> and
+          <option>/etc/drbd.conf</option>.</para>
         </listitem>
       </varlistentry>
 
       <varlistentry>
-        <term><option>-t</option>, <option>--config-to-test</option><replaceable>file</replaceable></term>
+        <term><option>-t</option>, <option>--config-to-test</option>
+        <replaceable>file</replaceable></term>
+
         <listitem>
-          <para>          Specifies an additional configuration file drbdadm to check. This option
-	    is only allowed with the dump and the sh-nop commands.
-	</para>
+          <para>Specifies an additional configuration file drbdadm to check. This option is only
+          allowed with the dump and the sh-nop commands.</para>
         </listitem>
       </varlistentry>
 
       <varlistentry>
-        <term><option>-s</option>, <option>--drbdsetup</option><replaceable>file</replaceable></term>
+        <term><option>-s</option>, <option>--drbdsetup</option>
+        <replaceable>file</replaceable></term>
+
         <listitem>
-          <para>          Specifies the full path to the drbdsetup program. If this option is
-	  omitted, drbdadm will look for <option>/sbin/drbdsetup</option> and <option>./drbdsetup</option>.
-	</para>
+          <para>Specifies the full path to the <option>drbdsetup</option> program. If this option
+          is omitted, drbdadm will look for <option>/sbin/drbdsetup</option> and
+          <option>./drbdsetup</option>.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
-        <term><option>-m</option>, <option>--drbdmeta</option><replaceable>file</replaceable></term>
+        <term><option>-m</option>, <option>--drbdmeta</option>
+        <replaceable>file</replaceable></term>
+
         <listitem>
-          <para>          Specifies the full path to the drbdmeta program. If this option is
-	  omitted, drbdadm will look for <option>/sbin/drbdmeta</option> and
-	  <option>./drbdmeta</option>.
-	</para>
+          <para>Specifies the full path to the <option>drbdmeta</option> program. If this option
+          is omitted, drbdadm will look for <option>/sbin/drbdmeta</option> and
+          <option>./drbdmeta</option>.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term><option>-S</option>, <option>--stacked</option></term>
+
         <listitem>
-          <para>          Specifies that this command should be performed on a stacked resource.
-	</para>
+          <para>Specifies that this command should be performed on a stacked resource.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term><option>-P</option>, <option>--peer</option></term>
+
         <listitem>
-          <para>          Specifies to which peer node to connect. Only necessary if there are
-	  more than two host sections in the resource you are working on.
-	</para>
+          <para>Specifies to which peer node to connect. Only necessary if there are more than two
+          host sections in the resource you are working on.</para>
+        </listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term><option>--</option> <replaceable>backend-options</replaceable></term>
+
+        <listitem>
+          <para>All options following the doubly hyphen are considered
+          <replaceable>backend-options</replaceable>. These are passed through to the backend
+          command. I.e. to <option>drbdsetup</option>, <option>drbdmeta</option> or
+          <option>drbd-proxy-ctl</option>.</para>
         </listitem>
       </varlistentry>
     </variablelist>
   </refsect1>
+
   <refsect1>
     <title>Commands</title>
+
     <variablelist>
       <varlistentry>
         <term>attach</term>
+
         <listitem>
-          <para>          Attaches a local backing block device to the DRBD resource's device.
-	</para>
+          <para>Attaches a local backing block device to the DRBD resource's device.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>detach</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>detach</secondary></indexterm>
-          Removes the backing storage device from a DRBD resource's device.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>detach</secondary>
+            </indexterm> Removes the backing storage device from a DRBD resource's device.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>connect</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>connect</secondary></indexterm>
-          Sets up the network configuration of the resource's device. If the
- 	  peer device is already configured, the two DRBD devices will connect.
-	  If there are more than two host sections in the resource you need
-	  to use the <option>--peer</option> option to select the peer you want to
-	  connect to.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>connect</secondary>
+            </indexterm> Sets up the network configuration of the resource's device. If the peer
+          device is already configured, the two DRBD devices will connect. If there are more than
+          two host sections in the resource you need to use the <option>--peer</option> option to
+          select the peer you want to connect to.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>disconnect</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>disconnect</secondary></indexterm>
-          Removes the network configuration from the resource. The device
-	  will then go into StandAlone state.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>disconnect</secondary>
+            </indexterm> Removes the network configuration from the resource. The device will then
+          go into StandAlone state.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>syncer</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>syncer</secondary></indexterm>
-          Loads the resynchronization parameters into the device.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>syncer</secondary>
+            </indexterm> Loads the resynchronization parameters into the device.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>up</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>up</secondary></indexterm>
-          Is a shortcut for attach and connect.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>up</secondary>
+            </indexterm> Is a shortcut for attach and connect.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>down</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>down</secondary></indexterm>
-          Is a shortcut for disconnect and detach.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>down</secondary>
+            </indexterm> Is a shortcut for disconnect and detach.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>primary</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>primary</secondary></indexterm>
-          Promote the resource's device into primary role. You need to do
-	  this before any access to the device, such as creating or mounting a file system.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>primary</secondary>
+            </indexterm> Promote the resource's device into primary role. You need to do this
+          before any access to the device, such as creating or mounting a file system.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>secondary</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>secondary</secondary></indexterm>
-          Brings the device back into secondary role. This is needed since in
-	  a connected DRBD device pair,  only one of the two peers may have
-	  primary role (except if allow-two-primaries is explicitly set in
-	  the configuration file).
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>secondary</secondary>
+            </indexterm> Brings the device back into secondary role. This is needed since in a
+          connected DRBD device pair, only one of the two peers may have primary role (except if
+          <option>allow-two-primaries</option> is explicitly set in the configuration
+          file).</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>invalidate</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>invalidate</secondary></indexterm>
-          Forces DRBD to consider the data on the <emphasis>local</emphasis> backing
-	  storage device as out-of-sync. Therefore DRBD will copy each
-	  and every block over from its peer, to bring the local storage
-	  device back in sync.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>invalidate</secondary>
+            </indexterm> Forces DRBD to consider the data on the <emphasis>local</emphasis>
+          backing storage device as out-of-sync. Therefore DRBD will copy each and every block
+          from its peer, to bring the local storage device back in sync.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>invalidate-remote</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>invalidate-remote</secondary></indexterm>
-          This command is similar to the invalidate command, however, the
-	  <emphasis>peer's</emphasis> backing storage is invalidated and hence rewritten
-	  with the data of the local node.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>invalidate-remote</secondary>
+            </indexterm> This command is similar to the invalidate command, however, the
+          <emphasis>peer's</emphasis> backing storage is invalidated and hence rewritten with the
+          data of the local node.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>resize</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>resize</secondary></indexterm>
-          Causes DRBD to re-examine all sizing constraints, and resize the
-	  resource's device accordingly. For example in case you increased the
-	  size of your backing storage devices (on both nodes of course),
-	  then DRBD will adopt to the new size after you called
-	  this command on one of your nodes. Since new storage space must be
-          synchronised this command only works if there is at least one primary
-          node present.
-        </para>
-        <para>The <option>--assume-peer-has-space</option> allows you to
-	  resize a device which is currently not connected to the peer.
-	  Use with care, since if you do not resize the peer's disk as well,
-	  further connect attempts of the two will fail.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>resize</secondary>
+            </indexterm> Causes DRBD to re-examine all sizing constraints, and resize the
+          resource's device accordingly. For example, if you increased the size of your backing
+          storage devices (on both nodes, of course), then DRBD will adopt the new size after you
+          called this command on one of your nodes. Since new storage space must be synchronised
+          this command only works if there is at least one primary node present.</para>
+
+          <para>The <option>--assume-peer-has-space</option> allows you to resize a device which
+          is currently not connected to the peer. Use with care, since if you do not resize the
+          peer's disk as well, further connect attempts of the two will fail.</para>
         </listitem>
       </varlistentry>
+
+      <varlistentry>
+        <term>check-resize</term>
+
+        <listitem>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>check-resize</secondary>
+            </indexterm> Calls drbdmeta to eventually move internal meta data. If the backing
+          device was resized, while DRBD was not running, meta data has to be moved to the end of
+          the device, so that the next <option>attach</option> command can succeed.</para>
+        </listitem>
+      </varlistentry>
+
       <varlistentry>
         <term>create-md</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>create-md</secondary></indexterm>
-          Initializes the meta data storage. This needs to be
-	  done before a DRBD resource can be taken online for the first
-	  time.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>create-md</secondary>
+            </indexterm> Initializes the meta data storage. This needs to be done before a DRBD
+          resource can be taken online for the first time. In case of issues with that command
+          have a look at <citerefentry>
+              <refentrytitle>drbdmeta</refentrytitle>
+
+              <manvolnum>8</manvolnum>
+            </citerefentry></para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>get-gi</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>get-gi</secondary></indexterm>
-          Shows a short textual representation of the data generation
-	  identifiers.
-	</para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>get-gi</secondary>
+            </indexterm> Shows a short textual representation of the data generation
+          identifiers.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>show-gi</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>show-gi</secondary></indexterm>
-	  Prints a textual representation of the data generation
-	  identifiers including explanatory information.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>show-gi</secondary>
+            </indexterm> Prints a textual representation of the data generation identifiers
+          including explanatory information.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>dump-md</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>dump-md</secondary></indexterm>
-          Dumps the whole contents of the meta data storage, including
-	  the stored bit-map and activity-log, in a textual representation.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>dump-md</secondary>
+            </indexterm> Dumps the whole contents of the meta data storage, including the stored
+          bit-map and activity-log, in a textual representation.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>outdate</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>outdate</secondary></indexterm>
-          Sets the outdated flag in the meta data.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>outdate</secondary>
+            </indexterm> Sets the outdated flag in the meta data.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>adjust</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>adjust</secondary></indexterm>
-          Synchronizes the configuration of the device with your configuration
-	  file. You should always examine the output of the dry-run
-	  mode before actually executing this command.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>adjust</secondary>
+            </indexterm> Synchronizes the configuration of the device with your configuration
+          file. You should always examine the output of the dry-run mode before actually executing
+          this command.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>wait-connect</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>wait-connect</secondary></indexterm>
-          Waits until the device is connected to its peer device.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>wait-connect</secondary>
+            </indexterm> Waits until the device is connected to its peer device.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>role</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>role</secondary></indexterm>
-	  Shows the current roles of the devices (local/peer).
-	  E.g. Primary/Secondary
-	</para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>role</secondary>
+            </indexterm> Shows the current roles of the devices (local/peer). E.g.
+          Primary/Secondary</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>state</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>state</secondary></indexterm>
-	  Deprecated alias for "role", see above.
-	</para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>state</secondary>
+            </indexterm> Deprecated alias for "role", see above.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>cstate</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>cstate</secondary></indexterm>
-	  Shows the current connection state of the devices.
-	</para>
-        </listitem>
-      </varlistentry>
-      <varlistentry>
-        <term>status</term>
-        <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>status</secondary></indexterm>
-	Shows the current status of all devices defined in the current config file,
-	in xml-like format. Example output:
-<programlisting format="linespecific">&lt;drbd-status version="8.3.2" api="88"&gt;
-&lt;resources config_file="/etc/drbd.conf"&gt;
-&lt;resource minor="0" name="s0" cs="SyncTarget" st1="Secondary" st2="Secondary"
-          ds1="Inconsistent" ds2="UpToDate" resynced_precent ="5.9" /&gt;
-&lt;resource minor="1" name="s1" cs="WFConnection" st1="Secondary"
-          st2="Unknown" ds1="Inconsistent" ds2="Outdated" /&gt;
-&lt;resource minor="3" name="dummy" cs="Unconfigured" /&gt;
-&lt;!-- resource minor="4" name="scratch" not available or not yet created --&gt;
-&lt;/resources&gt;
-&lt;/drbd-status&gt;</programlisting>
-	</para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>cstate</secondary>
+            </indexterm> Shows the current connection state of the devices.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>dump</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>dump</secondary></indexterm>
-          Just parse the configuration file and dump it to stdout. May
-	  be used to check the configuration file for syntactical correctness.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>dump</secondary>
+            </indexterm> Just parse the configuration file and dump it to stdout. May be used to
+          check the configuration file for syntactic correctness.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>outdate</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>outdate</secondary></indexterm>
-          Used to mark the node's data as outdated. Usually used by the
-	  peer's fence-peer handler.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>outdate</secondary>
+            </indexterm> Used to mark the node's data as outdated. Usually used by the peer's
+          fence-peer handler.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>verify</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>verify</secondary></indexterm>
-           Starts online verify. During online verify, data on
-           both nodes is compared for inconsistency. See
-   	   <filename moreinfo="none">/proc/drbd</filename> for online verify progress. If out-of-sync
-           blocks are found, they are <emphasis>not</emphasis> resynchronized
-           automatically. To do that, <command moreinfo="none">disconnect</command> and
-           <command moreinfo="none">connect</command> the resource when verification has completed.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>verify</secondary>
+            </indexterm> Starts online verify. During online verify, data on both nodes is
+          compared for equality. See <filename moreinfo="none">/proc/drbd</filename> for online
+          verify progress. If out-of-sync blocks are found, they are <emphasis>not</emphasis>
+          resynchronized automatically. To do that, <command moreinfo="none">disconnect</command>
+          and <command moreinfo="none">connect</command> the resource when verification has
+          completed.</para>
+
           <para>See also the notes on data integrity on the drbd.conf manpage.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>pause-sync</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>pause-sync</secondary></indexterm>
-          Temporarily suspend an ongoing resynchronization by setting the
-          local pause flag. Resync only progresses if neither the local
-          nor the remote pause flag is set. It might be desirable to
-          postpone DRBD's resynchronization until after any
-          resynchronization of the backing storage's RAID setup.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>pause-sync</secondary>
+            </indexterm> Temporarily suspend an ongoing resynchronization by setting the local
+          pause flag. Resync only progresses if neither the local nor the remote pause flag is
+          set. It might be desirable to postpone DRBD's resynchronization until after any
+          resynchronization of the backing storage's RAID setup.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>resume-sync</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>resume-sync</secondary></indexterm>
-          Unset the local sync pause flag.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>resume-sync</secondary>
+            </indexterm> Unset the local sync pause flag.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>new-current-uuid</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>new-current-uuid</secondary></indexterm>
-	  Generates a new currend UUID and rotates all other UUID values.
-        </para>
-          <para>          This can be used to shorten the initial resync of a cluster.
-          See the drbdsetup manpage for a more details.
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>new-current-uuid</secondary>
+            </indexterm> Generates a new currend UUID and rotates all other UUID values.</para>
+
+          <para>This can be used to shorten the initial resync of a cluster. See the
+          <option>drbdsetup</option> manpage for a more details.</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>dstate</term>
+
         <listitem>
-          <para><indexterm significance="normal"><primary>drbdadm</primary><secondary>dstate</secondary></indexterm>
-  	  Show the current state of the backing storage devices. (local/peer)
-        </para>
+          <para><indexterm significance="normal">
+              <primary>drbdadm</primary>
+
+              <secondary>dstate</secondary>
+            </indexterm> Show the current state of the backing storage devices.
+          (local/peer)</para>
         </listitem>
       </varlistentry>
+
       <varlistentry>
         <term>hidden-commands</term>
+
         <listitem>
-          <para>  	  Shows all commands undocumented on purpose.
-        </para>
+          <para>Shows all commands undocumented on purpose.</para>
         </listitem>
       </varlistentry>
     </variablelist>
   </refsect1>
+
   <refsect1>
     <title>Version</title>
-    <simpara>This document was revised for version 8.3.2 of the DRBD distribution.</simpara>
+
+    <simpara>This document was revised for version 8.4.0 of the DRBD distribution.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Author</title>
-    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email>
-            and Lars Ellenberg <email>lars.ellenberg@linbit.com</email>
-    </simpara>
+
+    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email> and Lars
+    Ellenberg <email>lars.ellenberg@linbit.com</email></simpara>
   </refsect1>
+
   <refsect1>
     <title>Reporting Bugs</title>
-    <simpara>Report bugs to <email>drbd-user@lists.linbit.com</email>.
-    </simpara>
+
+    <simpara>Report bugs to <email>drbd-user@lists.linbit.com</email>.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Copyright</title>
-    <simpara>Copyright 2001-2008 LINBIT Information Technologies,
-Philipp Reisner, Lars Ellenberg. This  is  free software;
-see the source for copying conditions.  There is NO warranty;
-not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-    </simpara>
+
+    <simpara>Copyright 2001-2011 LINBIT Information Technologies, Philipp Reisner, Lars Ellenberg.
+    This is free software; see the source for copying conditions. There is NO warranty; not even
+    for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.</simpara>
   </refsect1>
+
   <refsect1>
     <title>See Also</title>
-    <para><citerefentry><refentrytitle>drbd.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>drbd</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>drbddisk</refentrytitle><manvolnum>8</manvolnum></citerefentry><citerefentry><refentrytitle>drbdsetup</refentrytitle><manvolnum>8</manvolnum></citerefentry><citerefentry><refentrytitle>drbdmeta</refentrytitle><manvolnum>8</manvolnum></citerefentry><ulink url="http://www.drbd.org/"><citetitle>DRBD project web site</citetitle></ulink></para>
+
+    <para><citerefentry>
+        <refentrytitle>drbd.conf</refentrytitle>
+
+        <manvolnum>5</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbd</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbddisk</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbdsetup</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbdmeta</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry> and the
+      <ulink url="http://www.drbd.org/"><citetitle>DRBD project web site</citetitle></ulink></para>
+
   </refsect1>
 </refentry>
diff -Nru drbd8-8.3.7/documentation/drbddisk.8 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbddisk.8
--- drbd8-8.3.7/documentation/drbddisk.8	2010-01-13 16:17:26.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbddisk.8	2012-02-02 14:09:59.000000000 +0000
@@ -1,7 +1,7 @@
 '\" t
 .\"     Title: drbddisk
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
+.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
 .\"      Date: 15 Oct 2008
 .\"    Manual: System Administration
 .\"    Source: DRBD 8.3.2
@@ -9,6 +9,15 @@
 .\"
 .TH "DRBDDISK" "8" "15 Oct 2008" "DRBD 8.3.2" "System Administration"
 .\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
 .\" disable hyphenation
diff -Nru drbd8-8.3.7/documentation/drbdmeta.8 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdmeta.8
--- drbd8-8.3.7/documentation/drbdmeta.8	2010-01-13 16:17:25.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdmeta.8	2012-02-02 14:09:59.000000000 +0000
@@ -1,7 +1,7 @@
 '\" t
 .\"     Title: drbdmeta
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
+.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
 .\"      Date: 15 Oct 2008
 .\"    Manual: System Administration
 .\"    Source: DRBD 8.3.2
@@ -9,6 +9,15 @@
 .\"
 .TH "DRBDMETA" "8" "15 Oct 2008" "DRBD 8.3.2" "System Administration"
 .\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
 .\" disable hyphenation
@@ -19,15 +28,29 @@
 .\" * MAIN CONTENT STARTS HERE *
 .\" -----------------------------------------------------------------
 .SH "NAME"
-drbdmeta \- DRBD\'s meta data management tool .\" drbdmeta
+drbdmeta \- DRBD\*(Aqs meta data management tool .\" drbdmeta
 .SH "SYNOPSIS"
 .HP \w'\fBdrbdmeta\fR\ 'u
-\fBdrbdmeta\fR {\fIdevice\fR} [v06\ \fIminor\fR | v07\ \fImeta_dev\ index\fR | v08\ \fImeta_dev\ index\fR] {\fIcommand\fR} [\fIcmd\ args\fR...]
+\fBdrbdmeta\fR [\-\-force] [\-\-ignore\-sanity\-checks] {\fIdevice\fR} {v06\ \fIminor\fR | v07\ \fImeta_dev\ index\fR | v08\ \fImeta_dev\ index\fR} {\fIcommand\fR} [\fIcmd\ args\fR...]
 .SH "DESCRIPTION"
 .PP
-Drbdmeta is used to create, display and modify the contents of DRBD\'s meta data storage\&. Usually you do not want to use this command directly, but use it through the front end drbdadm\&.
+Drbdmeta is used to create, display and modify the contents of DRBD\*(Aqs meta data storage\&. Usually you do not want to use this command directly, but start it via the frontend
+\fBdrbdadm\fR(8)\&.
+.PP
+This command only works if the DRBD resource is currently down, or at least detached from its backing storage\&. The first parameter is the device node associated to the resource\&. With the second parameter you can select the version of the meta data\&. Currently all major DRBD releases (0\&.6, 0\&.7 and 8) are supported\&.
+.SH "OPTIONS"
+.PP
+\-\-force
+.RS 4
+.\" drbdmeta: --force
+All questions that get asked by drbdmeta are treated as if the user answered \*(Aqyes\*(Aq\&.
+.RE
 .PP
-This command only works if the DRBD resource is currently down, or at least detached from its backing storage\&. The first parameter is the device node associated to the resource\&. With the second parameter you select which the version of the meta data\&. Currently all major DRBD releases (0\&.6, 0\&.7 and 8) are supported\&.
+\-\-ignore\-sanity\-checks
+.RS 4
+.\" drbdmeta: --ignore-sanity-checks
+Some sanity checks cause drbdmeta to terminate\&. E\&.g\&. if a file system image would get destroyed by creating the meta data\&. By using that option you can force drbdmeta to ignore these checks\&.
+.RE
 .SH "COMMANDS"
 .PP
 create\-md
@@ -51,7 +74,7 @@
 dump\-md
 .RS 4
 .\" drbdmeta: dump-md
-Dumps the whole contents of the meta data storage including the stored bit\-map and activity\-log, in a textual representation\&.
+Dumps the whole contents of the meta data storage including the stored bit\-map and activity\-log in a textual representation\&.
 .RE
 .PP
 outdate
@@ -63,11 +86,17 @@
 dstate
 .RS 4
 .\" drbdmeta: dstate
-Prints the state of the data on the backing storage\&. The output is always followed by \'/DUnknown\' since drbdmeta only looks at the local meta data\&.
+Prints the state of the data on the backing storage\&. The output is always followed by \*(Aq/DUnknown\*(Aq since drbdmeta only looks at the local meta data\&.
+.RE
+.PP
+check\-resize
+.RS 4
+.\" drbdmeta: check-resize
+Examines the device size of a backing device, and it\*(Aqs last known device size, recorded in a file /var/lib/drbd/drbd\-minor\-??\&.lkbd\&. In case the size of the backing device changed, and the meta data can be found at the old position, it moves the meta data to the right position at the end of the block device\&.
 .RE
 .SH "EXPERT'S COMMANDS"
 .PP
-Drbdmeta allows you to modify the meta data as well\&. This is intentionally omitted for the command\'s usage output, since you should only use it if you really know what you are doing\&. By setting the generation identifiers to wrong values, you risk to overwrite your up\-to\-data data with an older version of your data\&.
+Drbdmeta allows you to modify the meta data as well\&. This is intentionally omitted for the command\*(Aqs usage output, since you should only use it if you really know what you are doing\&. By setting the generation identifiers to wrong values, you risk to overwrite your up\-to\-data data with an older version of your data\&.
 .PP
 set\-gi \fIgi\fR
 .RS 4
diff -Nru drbd8-8.3.7/documentation/drbdmeta.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdmeta.xml
--- drbd8-8.3.7/documentation/drbdmeta.xml	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdmeta.xml	2012-02-02 14:09:14.000000000 +0000
@@ -22,10 +22,12 @@
   <refsynopsisdiv>
     <cmdsynopsis sepchar=" ">
       <command moreinfo="none">drbdmeta</command>
+      <arg choice="opt" rep="norepeat">--force</arg>
+      <arg choice="opt" rep="norepeat">--ignore-sanity-checks</arg>
       <arg choice="req" rep="norepeat">
         <replaceable>device</replaceable>
       </arg>
-      <group choice="opt" rep="norepeat">
+      <group choice="req" rep="norepeat">
         <arg choice="plain" rep="norepeat">v06 <replaceable>minor</replaceable></arg>
         <arg choice="plain" rep="norepeat">v07 <replaceable>meta_dev index</replaceable></arg>
         <arg choice="plain" rep="norepeat">v08 <replaceable>meta_dev index</replaceable></arg>
@@ -42,16 +44,39 @@
     <title>Description</title>
     <para>      Drbdmeta is used to create, display and modify the contents of
       DRBD's meta data storage. Usually you do not want to use this
-      command directly, but use it through the front end drbdadm.
+      command directly, but start it via the frontend
+      <citerefentry><refentrytitle>drbdadm</refentrytitle><manvolnum>8</manvolnum></citerefentry>.
     </para>
     <para>      This command only works if the DRBD resource is currently down,
       or at least detached from its backing storage. The first parameter
       is the device node associated to the resource. With the second
-      parameter you select which the version of the meta data. Currently
+      parameter you can select the version of the meta data. Currently
       all major DRBD releases (0.6, 0.7 and 8) are supported.
     </para>
   </refsect1>
   <refsect1>
+    <title>Options</title>
+    <variablelist>
+      <varlistentry>
+        <term>--force</term>
+        <listitem>
+          <para><indexterm significance="normal"><primary>drbdmeta</primary><secondary>--force</secondary></indexterm>
+            All questions that get asked by drbdmeta are treated as if
+	    the user answered 'yes'.</para>
+        </listitem>
+      </varlistentry>
+      <varlistentry>
+        <term>--ignore-sanity-checks</term>
+        <listitem>
+          <para><indexterm significance="normal"><primary>drbdmeta</primary><secondary>--ignore-sanity-checks</secondary></indexterm>
+            Some sanity checks cause drbdmeta to terminate. E.g. if a file system image would get
+	    destroyed by creating the meta data. By using that option you can force drbdmeta
+	    to ignore these checks.</para>
+        </listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+  <refsect1>
     <title>Commands</title>
     <variablelist>
       <varlistentry>
@@ -90,7 +115,7 @@
         <listitem>
           <para><indexterm significance="normal"><primary>drbdmeta</primary><secondary>dump-md</secondary></indexterm>
           Dumps the whole contents of the meta data storage including
-	  the stored bit-map and activity-log, in a textual representation.
+	  the stored bit-map and activity-log in a textual representation.
         </para>
         </listitem>
       </varlistentry>
@@ -114,6 +139,19 @@
         </para>
         </listitem>
       </varlistentry>
+
+      <varlistentry>
+        <term>check-resize</term>
+        <listitem>
+          <para><indexterm significance="normal"><primary>drbdmeta</primary><secondary>check-resize</secondary></indexterm>
+          Examines the device size of a backing device, and it's last known device size,
+	  recorded in a file /var/lib/drbd/drbd-minor-??.lkbd. In case the size of the
+	  backing device changed, and the meta data can be found at the old position,
+	  it moves the meta data to the right position at the end of the block device.
+        </para>
+        </listitem>
+      </varlistentry>
+
     </variablelist>
   </refsect1>
   <refsect1>
diff -Nru drbd8-8.3.7/documentation/drbdsetup.8 drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup.8
--- drbd8-8.3.7/documentation/drbdsetup.8	2010-01-13 16:17:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup.8	2012-09-03 22:37:27.000000000 +0000
@@ -1,13 +1,22 @@
 '\" t
 .\"     Title: drbdsetup
 .\"    Author: [see the "Author" section]
-.\" Generator: DocBook XSL Stylesheets v1.75.1 <http://docbook.sf.net/>
-.\"      Date: 5 Dec 2008
+.\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
+.\"      Date: 6 May 2011
 .\"    Manual: System Administration
-.\"    Source: DRBD 8.3.2
+.\"    Source: DRBD 8.4.0
 .\"  Language: English
 .\"
-.TH "DRBDSETUP" "8" "5 Dec 2008" "DRBD 8.3.2" "System Administration"
+.TH "DRBDSETUP" "8" "6 May 2011" "DRBD 8.4.0" "System Administration"
+.\" -----------------------------------------------------------------
+.\" * Define some portability stuff
+.\" -----------------------------------------------------------------
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.\" http://bugs.debian.org/507673
+.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
+.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
 .\" -----------------------------------------------------------------
 .\" * set default formatting
 .\" -----------------------------------------------------------------
@@ -22,61 +31,73 @@
 drbdsetup \- Setup tool for DRBD .\" drbdsetup
 .SH "SYNOPSIS"
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {disk} {\fIlower_dev\fR} {\fImeta_data_dev\fR} {\fImeta_data_index\fR} [\-d\ {\fIsize\fR}] [\-e\ {\fIerr_handler\fR}] [\-f\ {\fIfencing_policy\fR}] [\-b]
+\fBdrbdsetup\fR new\-resource \fIresource\fR [\-\-cpu\-mask\ {\fIval\fR}] [\-\-on\-no\-data\-accessible\ {io\-error\ |\ suspend\-io}]
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR new\-minor \fIresource\fR \fIminor\fR \fIvolume\fR
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR del\-resource \fIresource\fR
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR del\-minor \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {net} [\fIaf:\fR] {\fIlocal_addr\fR} [\fI:port\fR] [\fIaf:\fR] {\fIremote_addr\fR} [\fI:port\fR] {\fIprotocol\fR} [\-c\ {\fItime\fR}] [\-i\ {\fItime\fR}] [\-t\ {\fIval\fR}] [\-S\ {\fIsize\fR}] [\-r\ {\fIsize\fR}] [\-k\ {\fIcount\fR}] [\-e\ {\fImax_epoch_size\fR}] [\-b\ {\fImax_buffers\fR}] [\-m] [\-a\ {\fIhash_alg\fR}] [\-x\ {\fIshared_secret\fR}] [\-A\ {\fIasb\-0p\-policy\fR}] [\-B\ {\fIasb\-1p\-policy\fR}] [\-C\ {\fIasb\-2p\-policy\fR}] [\-D] [\-R\ {\fIrole\-resync\-conflict\-policy\fR}] [\-p\ {\fIping_timeout\fR}] [\-u\ {\fIval\fR}] [\-d\ {\fIhash_alg\fR}] [\-o]
+\fBdrbdsetup\fR attach \fIminor\fR \fIlower_dev\fR \fImeta_data_dev\fR \fImeta_data_index\fR [\-\-size\ {\fIval\fR}] [\-\-max\-bio\-bvecs\ {\fIval\fR}] [\-\-on\-io\-error\ {pass_on\ |\ call\-local\-io\-error\ |\ detach}] [\-\-fencing\ {dont\-care\ |\ resource\-only\ |\ resource\-and\-stonith}] [\-\-disk\-barrier] [\-\-disk\-flushes] [\-\-disk\-drain] [\-\-md\-flushes] [\-\-resync\-rate\ {\fIval\fR}] [\-\-resync\-after\ {\fIval\fR}] [\-\-al\-extents\ {\fIval\fR}] [\-\-c\-plan\-ahead\ {\fIval\fR}] [\-\-c\-delay\-target\ {\fIval\fR}] [\-\-c\-fill\-target\ {\fIval\fR}] [\-\-c\-max\-rate\ {\fIval\fR}] [\-\-c\-min\-rate\ {\fIval\fR}] [\-\-disk\-timeout\ {\fIval\fR}] [\-\-read\-balancing\ {prefer\-local\ |\ prefer\-remote\ |\ round\-robin\ |\ least\-pending\ |\ when\-congested\-remote\ |\ 32K\-striping\ |\ 64K\-striping\ |\ 128K\-striping\ |\ 256K\-striping\ |\ 512K\-striping\ |\ 1M\-striping}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {syncer} [\-a\ {\fIdev_minor\fR}] [\-r\ {\fIrate\fR}] [\-e\ {\fIextents\fR}] [\-v\ {\fIverify\-hash\-alg\fR}] [\-c\ {\fIcpu\-mask\fR}] [\-C\ {\fIcsums\-hash\-alg\fR}] [\-R\ {\fIuse\-rle\fR}]
+\fBdrbdsetup\fR connect \fIresource\fR \fIlocal_addr\fR \fIremote_addr\fR [\-\-tentative] [\-\-discard\-my\-data] [\-\-protocol\ {A\ |\ B\ |\ C}] [\-\-timeout\ {\fIval\fR}] [\-\-max\-epoch\-size\ {\fIval\fR}] [\-\-max\-buffers\ {\fIval\fR}] [\-\-unplug\-watermark\ {\fIval\fR}] [\-\-connect\-int\ {\fIval\fR}] [\-\-ping\-int\ {\fIval\fR}] [\-\-sndbuf\-size\ {\fIval\fR}] [\-\-rcvbuf\-size\ {\fIval\fR}] [\-\-ko\-count\ {\fIval\fR}] [\-\-allow\-two\-primaries] [\-\-cram\-hmac\-alg\ {\fIval\fR}] [\-\-shared\-secret\ {\fIval\fR}] [\-\-after\-sb\-0pri\ {disconnect\ |\ discard\-younger\-primary\ |\ discard\-older\-primary\ |\ discard\-zero\-changes\ |\ discard\-least\-changes\ |\ discard\-local\ |\ discard\-remote}] [\-\-after\-sb\-1pri\ {disconnect\ |\ consensus\ |\ discard\-secondary\ |\ call\-pri\-lost\-after\-sb\ |\ violently\-as0p}] [\-\-after\-sb\-2pri\ {disconnect\ |\ call\-pri\-lost\-after\-sb\ |\ violently\-as0p}] [\-\-always\-asbp] [\-\-rr\-conflict\ {disconnect\ |\ call\-pri\-lost\ |\ violently}] [\-\-ping\-timeout\ {\fIval\fR}] [\-\-data\-integrity\-alg\ {\fIval\fR}] [\-\-tcp\-cork] [\-\-on\-congestion\ {block\ |\ pull\-ahead\ |\ disconnect}] [\-\-congestion\-fill\ {\fIval\fR}] [\-\-congestion\-extents\ {\fIval\fR}] [\-\-csums\-alg\ {\fIval\fR}] [\-\-verify\-alg\ {\fIval\fR}] [\-\-use\-rle]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {disconnect}
+\fBdrbdsetup\fR disk\-options \fIminor\fR [\-\-on\-io\-error\ {pass_on\ |\ call\-local\-io\-error\ |\ detach}] [\-\-fencing\ {dont\-care\ |\ resource\-only\ |\ resource\-and\-stonith}] [\-\-disk\-barrier] [\-\-disk\-flushes] [\-\-disk\-drain] [\-\-md\-flushes] [\-\-resync\-rate\ {\fIval\fR}] [\-\-resync\-after\ {\fIval\fR}] [\-\-al\-extents\ {\fIval\fR}] [\-\-c\-plan\-ahead\ {\fIval\fR}] [\-\-c\-delay\-target\ {\fIval\fR}] [\-\-c\-fill\-target\ {\fIval\fR}] [\-\-c\-max\-rate\ {\fIval\fR}] [\-\-c\-min\-rate\ {\fIval\fR}] [\-\-disk\-timeout\ {\fIval\fR}] [\-\-read\-balancing\ {prefer\-local\ |\ prefer\-remote\ |\ round\-robin\ |\ least\-pending\ |\ when\-congested\-remote\ |\ 32K\-striping\ |\ 64K\-striping\ |\ 128K\-striping\ |\ 256K\-striping\ |\ 512K\-striping\ |\ 1M\-striping}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {detach}
+\fBdrbdsetup\fR net\-options \fIlocal_addr\fR \fIremote_addr\fR [\-\-protocol\ {A\ |\ B\ |\ C}] [\-\-timeout\ {\fIval\fR}] [\-\-max\-epoch\-size\ {\fIval\fR}] [\-\-max\-buffers\ {\fIval\fR}] [\-\-unplug\-watermark\ {\fIval\fR}] [\-\-connect\-int\ {\fIval\fR}] [\-\-ping\-int\ {\fIval\fR}] [\-\-sndbuf\-size\ {\fIval\fR}] [\-\-rcvbuf\-size\ {\fIval\fR}] [\-\-ko\-count\ {\fIval\fR}] [\-\-allow\-two\-primaries] [\-\-cram\-hmac\-alg\ {\fIval\fR}] [\-\-shared\-secret\ {\fIval\fR}] [\-\-after\-sb\-0pri\ {disconnect\ |\ discard\-younger\-primary\ |\ discard\-older\-primary\ |\ discard\-zero\-changes\ |\ discard\-least\-changes\ |\ discard\-local\ |\ discard\-remote}] [\-\-after\-sb\-1pri\ {disconnect\ |\ consensus\ |\ discard\-secondary\ |\ call\-pri\-lost\-after\-sb\ |\ violently\-as0p}] [\-\-after\-sb\-2pri\ {disconnect\ |\ call\-pri\-lost\-after\-sb\ |\ violently\-as0p}] [\-\-always\-asbp] [\-\-rr\-conflict\ {disconnect\ |\ call\-pri\-lost\ |\ violently}] [\-\-ping\-timeout\ {\fIval\fR}] [\-\-data\-integrity\-alg\ {\fIval\fR}] [\-\-tcp\-cork] [\-\-on\-congestion\ {block\ |\ pull\-ahead\ |\ disconnect}] [\-\-congestion\-fill\ {\fIval\fR}] [\-\-congestion\-extents\ {\fIval\fR}] [\-\-csums\-alg\ {\fIval\fR}] [\-\-verify\-alg\ {\fIval\fR}] [\-\-use\-rle]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {down}
+\fBdrbdsetup\fR resource\-options \fIresource\fR [\-\-cpu\-mask\ {\fIval\fR}] [\-\-on\-no\-data\-accessible\ {io\-error\ |\ suspend\-io}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {primary} [\-o]
+\fBdrbdsetup\fR disconnect \fIlocal_addr\fR \fIremote_addr\fR [\-\-force]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {secondary}
+\fBdrbdsetup\fR detach \fIminor\fR [\-\-force]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {verify} [\-s\ {\fIstart\-position\fR}]
+\fBdrbdsetup\fR primary \fIminor\fR [\-\-force]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {invalidate}
+\fBdrbdsetup\fR secondary \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {invalidate\-remote}
+\fBdrbdsetup\fR down \fIresource\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {wait\-connect} [\-t\ {\fIwfc_timeout\fR}] [\-d\ {\fIdegr_wfc_timeout\fR}] [\-o\ {\fIoutdated_wfc_timeout\fR}] [\-w]
+\fBdrbdsetup\fR verify \fIminor\fR [\-\-start\ {\fIval\fR}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {wait\-sync} [\-t\ {\fIwfc_timeout\fR}] [\-d\ {\fIdegr_wfc_timeout\fR}] [\-o\ {\fIoutdated_wfc_timeout\fR}] [\-w]
+\fBdrbdsetup\fR invalidate \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {role}
+\fBdrbdsetup\fR invalidate\-remote \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {cstate}
+\fBdrbdsetup\fR wait\-connect \fIminor\fR [\-\-wfc\-timeout\ {\fIval\fR}] [\-\-degr\-wfc\-timeout\ {\fIval\fR}] [\-\-outdated\-wfc\-timeout\ {\fIval\fR}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {dstate}
+\fBdrbdsetup\fR wait\-sync \fIminor\fR [\-\-wfc\-timeout\ {\fIval\fR}] [\-\-degr\-wfc\-timeout\ {\fIval\fR}] [\-\-outdated\-wfc\-timeout\ {\fIval\fR}]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {status}
+\fBdrbdsetup\fR role \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {resize} [\-d\ {\fIsize\fR}] [\-f\ {\fIassume\-peer\-has\-space\fR}]
+\fBdrbdsetup\fR cstate \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {pause\-sync}
+\fBdrbdsetup\fR dstate \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {resume\-sync}
+\fBdrbdsetup\fR resize \fIminor\fR [\-\-size\ {\fIval\fR}] [\-\-assume\-peer\-has\-space] [\-\-assume\-clean]
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {outdate}
+\fBdrbdsetup\fR check\-resize \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {show\-gi}
+\fBdrbdsetup\fR pause\-sync \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {get\-gi}
+\fBdrbdsetup\fR resume\-sync \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {show}
+\fBdrbdsetup\fR outdate \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {suspend\-io}
+\fBdrbdsetup\fR show\-gi \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {resume\-io}
+\fBdrbdsetup\fR get\-gi \fIminor\fR
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {events} [\-u] [\-a]
+\fBdrbdsetup\fR show {\fIresource\fR | \fIminor\fR | \fIall\fR}
 .HP \w'\fBdrbdsetup\fR\ 'u
-\fBdrbdsetup\fR {\fIdevice\fR} {new\-current\-uuid} [\-c]
+\fBdrbdsetup\fR suspend\-io \fIminor\fR
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR resume\-io \fIminor\fR
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR events {\fIresource\fR | \fIminor\fR | \fIall\fR}
+.HP \w'\fBdrbdsetup\fR\ 'u
+\fBdrbdsetup\fR new\-current\-uuid \fIminor\fR [\-\-clear\-bitmap]
 .SH "DESCRIPTION"
 .PP
 drbdsetup is used to associate DRBD devices with their backing block devices, to set up DRBD device pairs to mirror their backing block devices, and to inspect the configuration of running DRBD devices\&.
@@ -94,17 +115,29 @@
 .RS 4
 In case the specified DRBD device (minor number) does not exist yet, create it implicitly\&.
 .RE
+.SS "new\-resource"
 .PP
-\fB\-\-set\-defaults\fR
-.RS 4
-When
-\fB\-\-set\-defaults\fR
-is given on the command line, all options of the invoked sub\-command that are not explicitly set are reset to their default values\&.
-.RE
-.SS "disk"
+Resources are the primary objects of any DRBD configuration\&. A resource must be created with the
+\fBnew\-resource\fR
+command before any volumes or minor devices can be created\&. Connections are referenced by name\&.
+.SS "new\-minor"
+.PP
+A
+\fIminor\fR
+is used as a synonym for replicated block device\&. It is represented in the /dev/ directory by a block device\&. It is the application\*(Aqs interface to the DRBD\-replicated block devices\&. These block devices get addressed by their minor numbers on the drbdsetup commandline\&.
+.PP
+A pair of replicated block devices may have different minor numbers on the two machines\&. They are associated by a common
+\fIvolume\-number\fR\&. Volume numbers are local to each connection\&. Minor numbers are global on one node\&.
+.SS "del\-resource"
+.PP
+Destroys a resource object\&. This is only possible if the resource has no volumes\&.
+.SS "del\-minor"
+.PP
+Minors can only be destroyed if its disk is detached\&.
+.SS "attach, disk\-options"
 .\" drbdsetup: disk
 .PP
-Associates
+Attach associates
 \fIdevice\fR
 with
 \fIlower_device\fR
@@ -118,9 +151,11 @@
 \fBnet\fR
 command\&.)
 .PP
-\fB\-d\fR, \fB\-\-disk\-size \fR\fB\fIsize\fR\fR
+With the disk\-options command it is possible to change the options of a minor while it is attached\&.
+.PP
+\fB\-\-disk\-size \fR\fB\fIsize\fR\fR
 .RS 4
-You can override DRBD\'s size determination method with this option\&. If you need to use the device before it was ever connected to its peer, use this option to pass the
+You can override DRBD\*(Aqs size determination method with this option\&. If you need to use the device before it was ever connected to its peer, use this option to pass the
 \fIsize\fR
 of the DRBD device to the driver\&. Default unit is sectors (1s = 512 bytes)\&.
 .sp
@@ -129,11 +164,11 @@
 parameter in drbd\&.conf, we strongly recommend to add an explicit unit postfix\&. drbdadm and drbdsetup used to have mismatching default units\&.
 .RE
 .PP
-\fB\-e\fR, \fB\-\-on\-io\-error \fR\fB\fIerr_handler\fR\fR
+\fB\-\-on\-io\-error \fR\fB\fIerr_handler\fR\fR
 .RS 4
 If the driver of the
 \fIlower_device\fR
-reports an error to DRBD, DRBD will either pass the error to the upper layers of the operating system, call a helper program, or detach the device from its backing storage and perform all further IO by requesting it from the peer\&. The valid
+reports an error to DRBD, DRBD will mark the disk as inconsistent, call a helper program, or detach the device from its backing storage and perform all further IO by requesting it from the peer\&. The valid
 \fIerr_handlers\fR
 are:
 \fBpass_on\fR,
@@ -142,96 +177,196 @@
 \fBdetach\fR\&.
 .RE
 .PP
-\fB\-f\fR, \fB\-\-fencing \fR\fB\fIfencing_policy\fR\fR
+\fB\-\-fencing \fR\fB\fIfencing_policy\fR\fR
 .RS 4
 Under
 \fBfencing\fR
-we understand preventative measures to avoid situations where both nodes are primary and disconnected (AKA split brain)\&.
+we understand preventive measures to avoid situations where both nodes are primary and disconnected (AKA split brain)\&.
 .sp
 Valid fencing policies are:
 .PP
 \fBdont\-care\fR
 .RS 4
-This is the default policy\&. No fencing actions are undertaken\&.
+This is the default policy\&. No fencing actions are done\&.
 .RE
 .PP
 \fBresource\-only\fR
 .RS 4
-If a node becomes a disconnected primary\&. it tries to outdate the peer\'s disk\&. This is done by calling the fence\-peer handler\&. The handler is supposed to reach the other node over alternative communication paths and call \'drbdadm outdate res\' there\&.
+If a node becomes a disconnected primary, it tries to outdate the peer\*(Aqs disk\&. This is done by calling the fence\-peer handler\&. The handler is supposed to reach the other node over alternative communication paths and call \*(Aqdrbdadm outdate res\*(Aq there\&.
 .RE
 .PP
 \fBresource\-and\-stonith\fR
 .RS 4
-If a node becomes a disconnected primary, it freezes all its IO operations and calls its fence\-peer handler\&. The fence\-peer handler is supposed to reach the peer over alternative communication paths and call \'drbdadm outdate res\' there\&. In case it cannot reach the peer, it should stonith the peer\&. IO is resumed as soon as the situation is resolved\&. In case your handler fails, you can resume IO with the
+If a node becomes a disconnected primary, it freezes all its IO operations and calls its fence\-peer handler\&. The fence\-peer handler is supposed to reach the peer over alternative communication paths and call \*(Aqdrbdadm outdate res\*(Aq there\&. In case it cannot reach the peer, it should stonith the peer\&. IO is resumed as soon as the situation is resolved\&. In case your handler fails, you can resume IO with the
 \fBresume\-io\fR
 command\&.
 .RE
 .RE
 .PP
-\fB\-b\fR, \fB\-\-use\-bmbv\fR
+\fB\-\-disk\-barrier\fR, \fB\-\-disk\-flushes\fR, \fB\-\-disk\-drain\fR
 .RS 4
-In case the backing storage\'s driver has a merge_bvec_fn() function, DRBD has to pretend that it can only process IO requests in units not lager than 4kByte\&. (At time of writing the only known drivers which have such a function are: md (software raid driver), dm (device mapper \- LVM) and DRBD itself)
+DRBD has four implementations to express write\-after\-write dependencies to its backing storage device\&. DRBD will use the first method that is supported by the backing storage device and that is not disabled by the user\&. By default all three options are enabled\&.
 .sp
-To get best performance out of DRBD on top of software raid (or any other driver with a merge_bvec_fn() function) you might enable this function, if you know for sure that the merge_bvec_fn() function will deliver the same results on all nodes of your cluster\&. I\&.e\&. the physical disks of the software raid are exactly of the same type\&. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING\&.
-.RE
-.PP
-\fB\-a\fR, \fB\-\-no\-disk\-barrier\fR, \fB\-i\fR, \fB\-\-no\-disk\-flushes\fR, \fB\-D\fR, \fB\-\-no\-disk\-drain\fR
-.RS 4
-DRBD has four implementations to express write\-after\-write dependencies to its backing storage device\&. DRBD will use the first method that is supported by the backing storage device and that is not disabled by the user\&.
+When selecting the method you should not only base your decision on the measurable performance\&. In case your backing storage device has a volatile write cache (plain disks, RAID of plain disks) you should use one of the first two\&. In case your backing storage device has battery\-backed write cache you may go with option 3\&. Option 4 (disable everything, use "none")
+\fIis dangerous\fR
+on most IO stacks, may result in write\-reordering, and if so, can theoretically be the reason for data corruption, or disturb the DRBD protocol, causing spurious disconnect/reconnect cycles\&.
+\fIDo not use\fR
+\fBno\-disk\-drain\fR\&.
 .sp
-When selecting the method you should not only base your decision on the measurable performance\&. In case your backing storage device has a volatile write cache (plain disks, RAID of plain disks) you should use one of the first two\&. In case your backing storage device has battery\-backed write cache you may go with option 3 or 4\&. Option 4 will deliver the best performance such devices\&.
-.sp
-Unfortunately device mapper (LVM) does not support barriers\&.
+Unfortunately device mapper (LVM) might not support barriers\&.
 .sp
 The letter after "wo:" in /proc/drbd indicates with method is currently in use for a device: b, f, d, n\&. The implementations:
 .PP
 barrier
 .RS 4
-The first requirs that the driver of the backing storage device support barriers (called \'tagged command queuing\' in SCSI and \'native command queuing\' in SATA speak)\&. The use of this method can be disabled by the we
-\fB\-\-no\-disk\-barrier\fR
-option\&.
+The first requires that the driver of the backing storage device support barriers (called \*(Aqtagged command queuing\*(Aq in SCSI and \*(Aqnative command queuing\*(Aq in SATA speak)\&. The use of this method can be disabled by setting the
+\fBdisk\-barrier\fR
+options to
+\fBno\fR\&.
 .RE
 .PP
 flush
 .RS 4
-The second requires that the backing device support disk flushes (called \'force unit access\' in the drive vendors speak)\&. The use of this method can be disabled using the
-\fB\-\-no\-disk\-flushes\fR
-option\&.
+The second requires that the backing device support disk flushes (called \*(Aqforce unit access\*(Aq in the drive vendors speak)\&. The use of this method can be disabled setting
+\fBdisk\-flushes\fR
+to
+\fBno\fR\&.
 .RE
 .PP
 drain
 .RS 4
-The third method is simply to let write requests drain before write requests of a new reordering domain are issued\&. That was the only implementation before 8\&.0\&.9\&. You can prevent to use of this method by using the
-\fB\-\-no\-disk\-drain\fR
-option\&.
+The third method is simply to let write requests drain before write requests of a new reordering domain are issued\&. That was the only implementation before 8\&.0\&.9\&.
 .RE
 .PP
 none
 .RS 4
-The fourth method is to not express write\-after\-write dependencies to the backing store at all\&.
+The fourth method is to not express write\-after\-write dependencies to the backing store at all, by also specifying
+\fB\-\-no\-disk\-drain\fR\&. This
+\fIis dangerous\fR
+on most IO stacks, may result in write\-reordering, and if so, can theoretically be the reason for data corruption, or disturb the DRBD protocol, causing spurious disconnect/reconnect cycles\&.
+\fIDo not use\fR
+\fB\-\-no\-disk\-drain\fR\&.
 .RE
 .RE
 .PP
-\fB\-m\fR, \fB\-\-no\-md\-flushes\fR
+\fB\-\-md\-flushes\fR
 .RS 4
 Disables the use of disk flushes and barrier BIOs when accessing the meta data device\&. See the notes on
-\fB\-\-no\-disk\-flushes\fR\&.
+\fB\-\-disk\-flushes\fR\&.
 .RE
 .PP
-\fB\-s\fR, \fB\-\-max\-bio\-bvecs\fR
+\fB\-\-max\-bio\-bvecs\fR
 .RS 4
-In some special circumstances the device mapper stack manages to pass BIOs to DRBD that violate the constraints that are set forth by DRBD\'s merge_bvec() function and which have more than one bvec\&. A known example is: phys\-disk \-> DRBD \-> LVM \-> Xen \-> missaligned partition (63) \-> DomU FS\&. Then you might see "bio would need to, but cannot, be split:" in the Dom0\'s kernel log\&.
+In some special circumstances the device mapper stack manages to pass BIOs to DRBD that violate the constraints that are set forth by DRBD\*(Aqs merge_bvec() function and which have more than one bvec\&. A known example is: phys\-disk \-> DRBD \-> LVM \-> Xen \-> missaligned partition (63) \-> DomU FS\&. Then you might see "bio would need to, but cannot, be split:" in the Dom0\*(Aqs kernel log\&.
 .sp
-The best workaround is to proper align the partition within the VM (E\&.g\&. start it at sector 1024)\&. Costs 480 KiByte of storage\&. Unfortunately the default of most Linux partitioning tools is to start the first partition at an odd number (63)\&. Therefore most distribution\'s install helpers for virtual linux machines will end up with missaligned partitions\&. The second best workaround is to limit DRBD\'s max bvecs per BIO (= max\-bio\-bvecs) to 1\&. Might cost performance\&.
+The best workaround is to proper align the partition within the VM (E\&.g\&. start it at sector 1024)\&. That costs 480 KiB of storage\&. Unfortunately the default of most Linux partitioning tools is to start the first partition at an odd number (63)\&. Therefore most distributions install helpers for virtual linux machines will end up with missaligned partitions\&. The second best workaround is to limit DRBD\*(Aqs max bvecs per BIO (i\&.e\&., the
+\fBmax\-bio\-bvecs\fR
+option) to 1, but that might cost performance\&.
 .sp
 The default value of
 \fBmax\-bio\-bvecs\fR
 is 0, which means that there is no user imposed limitation\&.
 .RE
-.SS "net"
+.PP
+\fB\-\-resync\-rate \fR\fB\fIrate\fR\fR
+.RS 4
+To ensure smooth operation of the application on top of DRBD, it is possible to limit the bandwidth that may be used by background synchronization\&. The default is 250 KiB/sec, the default unit is KiB/sec\&.
+.RE
+.PP
+\fB\-\-resync\-after \fR\fB\fIminor\fR\fR
+.RS 4
+Start resync on this device only if the device with
+\fIminor\fR
+is already in connected state\&. Otherwise this device waits in SyncPause state\&.
+.RE
+.PP
+\fB\-\-al\-extents \fR\fB\fIextents\fR\fR
+.RS 4
+DRBD automatically performs hot area detection\&. With this parameter you control how big the hot area (=active set) can get\&. Each extent marks 4M of the backing storage\&. In case a primary node leaves the cluster unexpectedly, the areas covered by the active set must be resynced upon rejoining of the failed node\&. The data structure is stored in the meta\-data area, therefore each change of the active set is a write operation to the meta\-data device\&. A higher number of extents gives longer resync times but less updates to the meta\-data\&. The default number of
+\fIextents\fR
+is 127\&. (Minimum: 7, Maximum: 3843)
+.RE
+.PP
+\fB\-\-c\-plan\-ahead \fR\fB\fIplan_time\fR\fR, \fB\-\-c\-fill\-target \fR\fB\fIfill_target\fR\fR, \fB\-\-c\-delay\-target \fR\fB\fIdelay_target\fR\fR, \fB\-\-c\-max\-rate \fR\fB\fImax_rate\fR\fR
+.RS 4
+The dynamic resync speed controller gets enabled with setting
+\fIplan_time\fR
+to a positive value\&. It aims to fill the buffers along the data path with either a constant amount of data
+\fIfill_target\fR, or aims to have a constant delay time of
+\fIdelay_target\fR
+along the path\&. The controller has an upper bound of
+\fImax_rate\fR\&.
+.sp
+By
+\fIplan_time\fR
+the agility of the controller is configured\&. Higher values yield for slower/lower responses of the controller to deviation from the target value\&. It should be at least 5 times RTT\&. For regular data paths a
+\fIfill_target\fR
+in the area of 4k to 100k is appropriate\&. For a setup that contains drbd\-proxy it is advisable to use
+\fIdelay_target\fR
+instead\&. Only when
+\fIfill_target\fR
+is set to 0 the controller will use
+\fIdelay_target\fR\&. 5 times RTT is a reasonable starting value\&.
+\fIMax_rate\fR
+should be set to the bandwidth available between the DRBD\-hosts and the machines hosting DRBD\-proxy, or to the available disk\-bandwidth\&.
+.sp
+The default value of
+\fIplan_time\fR
+is 0, the default unit is 0\&.1 seconds\&.
+\fIFill_target\fR
+has 0 and sectors as default unit\&.
+\fIDelay_target\fR
+has 1 (100ms) and 0\&.1 as default unit\&.
+\fIMax_rate\fR
+has 10240 (100MiB/s) and KiB/s as default unit\&.
+.RE
+.PP
+\fB\-\-c\-min\-rate \fR\fB\fImin_rate\fR\fR
+.RS 4
+We track the disk IO rate caused by the resync, so we can detect non\-resync IO on the lower level device\&. If the lower level device seems to be busy, and the current resync rate is above
+\fImin_rate\fR, we throttle the resync\&.
+.sp
+The default value of
+\fImin_rate\fR
+is 4M, the default unit is k\&. If you want to not throttle at all, set it to zero, if you want to throttle always, set it to one\&.
+.RE
+.PP
+\fB\-t\fR, \fB\-\-disk\-timeout \fR\fB\fIdisk_timeout\fR\fR
+.RS 4
+If the driver of the
+\fIlower_device\fR
+does not finish an IO request within
+\fIdisk_timeout\fR, DRBD considers the disk as failed\&. If DRBD is connected to a remote host, it will reissue local pending IO requests to the peer, and ship all new IO requests to the peer only\&. The disk state advances to diskless, as soon as the backing block device has finished all IO requests\&.
+.sp
+The default value of is 0, which means that no timeout is enforced\&. The default unit is 100ms\&. This option is available since 8\&.3\&.12\&.
+.RE
+.PP
+\fB\-\-read\-balancing \fR\fB\fImethod\fR\fR
+.RS 4
+The supported
+\fImethods\fR
+for load balancing of read requests are
+\fBprefer\-local\fR,
+\fBprefer\-remote\fR,
+\fBround\-robin\fR,
+\fBleast\-pending\fR
+and
+\fBwhen\-congested\-remote\fR,
+\fB32K\-striping\fR,
+\fB64K\-striping\fR,
+\fB128K\-striping\fR,
+\fB256K\-striping\fR,
+\fB512K\-striping\fR
+and
+\fB1M\-striping\fR\&.
+.sp
+The default value of is
+\fBprefer\-local\fR\&. This option is available since 8\&.4\&.1\&.
+.RE
+.SS "connect, net\-options"
 .\" drbdsetup: net
 .PP
-Sets up the
+Connect sets up the
 \fIdevice\fR
 to listen on
 \fIaf:local_addr:port\fR
@@ -245,41 +380,45 @@
 gets used\&. Other supported address families are
 \fBipv6\fR,
 \fBssocks\fR
-for Dolphin Interconnect Solutions\' "super sockets" and
+for Dolphin Interconnect Solutions\*(Aq "super sockets" and
 \fBsdp\fR
 for Sockets Direct Protocol (Infiniband)\&.
 .PP
+The net\-options command allows you to change options while the connection is established\&.
+.PP
+\fB\-\-protocol \fR\fB\fIprotocol\fR\fR
+.RS 4
 On the TCP/IP link the specified
 \fIprotocol\fR
 is used\&. Valid protocol specifiers are A, B, and C\&.
-.PP
+.sp
 Protocol A: write IO is reported as completed, if it has reached local disk and local TCP send buffer\&.
-.PP
+.sp
 Protocol B: write IO is reported as completed, if it has reached local disk and remote buffer cache\&.
-.PP
+.sp
 Protocol C: write IO is reported as completed, if it has reached both local and remote disk\&.
+.RE
 .PP
-\fB\-c\fR, \fB\-\-connect\-int \fR\fB\fItime\fR\fR
+\fB\-\-connect\-int \fR\fB\fItime\fR\fR
 .RS 4
-In case it is not possible to connect to the remote DRBD device immediately, DRBD keeps on trying to connect\&. With this option you can set the time between two tries\&. The default value is 10 seconds, the unit is 1 second\&.
+In case it is not possible to connect to the remote DRBD device immediately, DRBD keeps on trying to connect\&. With this option you can set the time between two retries\&. The default value is 10\&. The unit is seconds\&.
 .RE
 .PP
-\fB\-i\fR, \fB\-\-ping\-int \fR\fB\fItime\fR\fR
+\fB\-\-ping\-int \fR\fB\fItime\fR\fR
 .RS 4
 If the TCP/IP connection linking a DRBD device pair is idle for more than
 \fItime\fR
-seconds, DRBD will generate a keep\-alive packet to check if its partner is still alive\&. The default value is 10 seconds, the unit is 1 second\&.
+seconds, DRBD will generate a keep\-alive packet to check if its partner is still alive\&. The default value is 10\&. The unit is seconds\&.
 .RE
 .PP
-\fB\-t\fR, \fB\-\-timeout \fR\fB\fIval\fR\fR
+\fB\-\-timeout \fR\fB\fIval\fR\fR
 .RS 4
 If the partner node fails to send an expected response packet within
 \fIval\fR
-10ths
-of a second, the partner node is considered dead and therefore the TCP/IP connection is abandoned\&. The default value is 60 (= 6 seconds)\&.
+tenths of a second, the partner node is considered dead and therefore the TCP/IP connection is abandoned\&. The default value is 60 (= 6 seconds)\&.
 .RE
 .PP
-\fB\-S\fR, \fB\-\-sndbuf\-size \fR\fB\fIsize\fR\fR
+\fB\-\-sndbuf\-size \fR\fB\fIsize\fR\fR
 .RS 4
 The socket send buffer is used to store packets sent to the secondary node, which are not yet acknowledged (from a network point of view) by the secondary node\&. When using protocol A, it might be necessary to increase the size of this data structure in order to increase asynchronicity between primary and secondary nodes\&. But keep in mind that more asynchronicity is synonymous with more data loss in the case of a primary node failure\&. Since 8\&.0\&.13 resp\&. 8\&.2\&.7 setting the
 \fIsize\fR
@@ -288,54 +427,54 @@
 is 0, i\&.e\&. autotune\&.
 .RE
 .PP
-\fB\-r\fR, \fB\-\-rcvbuf\-size \fR\fB\fIsize\fR\fR
+\fB\-\-rcvbuf\-size \fR\fB\fIsize\fR\fR
 .RS 4
-Packets received from the network are stored in the socket receive buffer first\&. From there they are consumed by DRBD\&. Before 8\&.3\&.2 the receive buffer\'s size was always set to the size of the socket send buffer\&. Since 8\&.3\&.2 they can be tuned independently\&. A value of 0 means that the kernel should autotune this\&. The default
+Packets received from the network are stored in the socket receive buffer first\&. From there they are consumed by DRBD\&. Before 8\&.3\&.2 the receive buffer\*(Aqs size was always set to the size of the socket send buffer\&. Since 8\&.3\&.2 they can be tuned independently\&. A value of 0 means that the kernel should autotune this\&. The default
 \fIsize\fR
 is 0, i\&.e\&. autotune\&.
 .RE
 .PP
-\fB\-k\fR, \fB\-\-ko\-count \fR\fB\fIcount\fR\fR
+\fB\-\-ko\-count \fR\fB\fIcount\fR\fR
 .RS 4
 In case the secondary node fails to complete a single write request for
 \fIcount\fR
 times the
-\fItimeout\fR, it is expelled from the cluster\&. (I\&.e\&. the primary node goes into StandAlone mode\&.) The default is 0, which disables this feature\&.
+\fItimeout\fR, it is expelled from the cluster, i\&.e\&. the primary node goes into StandAlone mode\&. The default is 0, which disables this feature\&.
 .RE
 .PP
-\fB\-e\fR, \fB\-\-max\-epoch\-size \fR\fB\fIval\fR\fR
+\fB\-\-max\-epoch\-size \fR\fB\fIval\fR\fR
 .RS 4
 With this option the maximal number of write requests between two barriers is limited\&. Should be set to the same as
-\fB\-\-max\-buffers \fR\&. Values smaller than 100 can lead to degraded performance\&. The default value is 2048\&.
+\fB\-\-max\-buffers\fR\&. Values smaller than 10 can lead to degraded performance\&. The default value is 2048\&.
 .RE
 .PP
-\fB\-b\fR, \fB\-\-max\-buffers \fR\fB\fIval\fR\fR
+\fB\-\-max\-buffers \fR\fB\fIval\fR\fR
 .RS 4
-With this option the maximal number of buffer pages allocated by DRBD\'s receiver thread is limited\&. Should be set to the same as
-\fB\-\-max\-epoch\-size \fR\&. Small values could lead to degraded performance\&. (Minimum 32) The default value is 2048\&.
+With this option the maximal number of buffer pages allocated by DRBD\*(Aqs receiver thread is limited\&. Should be set to the same as
+\fB\-\-max\-epoch\-size\fR\&. Small values could lead to degraded performance\&. The default value is 2048, the minimum 32\&.
 .RE
 .PP
-\fB\-u\fR, \fB\-\-unplug\-watermark \fR\fB\fIval\fR\fR
+\fB\-\-unplug\-watermark \fR\fB\fIval\fR\fR
 .RS 4
 When the number of pending write requests on the standby (secondary) node exceeds the unplug\-watermark, we trigger the request processing of our backing storage device\&. Some storage controllers deliver better performance with small values, others deliver best performance when the value is set to the same value as max\-buffers\&. Minimum 16, default 128, maximum 131072\&.
 .RE
 .PP
-\fB\-m\fR, \fB\-\-allow\-two\-primaries \fR
+\fB\-\-allow\-two\-primaries \fR
 .RS 4
 With this option set you may assign primary role to both nodes\&. You only should use this option if you use a shared storage file system on top of DRBD\&. At the time of writing the only ones are: OCFS2 and GFS\&. If you use this option with any other file system, you are going to crash your nodes and to corrupt your data!
 .RE
 .PP
-\fB\-a\fR, \fB\-\-cram\-hmac\-alg \fR\fIalg\fR
+\fB\-\-cram\-hmac\-alg \fR\fB\fIalg\fR\fR
 .RS 4
 You need to specify the HMAC algorithm to enable peer authentication at all\&. You are strongly encouraged to use peer authentication\&. The HMAC algorithm will be used for the challenge response authentication of the peer\&. You may specify any digest algorithm that is named in /proc/crypto\&.
 .RE
 .PP
-\fB\-x\fR, \fB\-\-shared\-secret \fR\fIsecret\fR
+\fB\-\-shared\-secret \fR\fB\fIsecret\fR\fR
 .RS 4
 The shared secret used in peer authentication\&. May be up to 64 characters\&.
 .RE
 .PP
-\fB\-A\fR, \fB\-\-after\-sb\-0pri \fR\fIasb\-0p\-policy\fR
+\fB\-\-after\-sb\-0pri \fR\fB\fIasb\-0p\-policy\fR\fR
 .RS 4
 possible policies are:
 .PP
@@ -370,7 +509,7 @@
 .RE
 .RE
 .PP
-\fB\-B\fR, \fB\-\-after\-sb\-1pri \fR\fIasb\-1p\-policy\fR
+\fB\-\-after\-sb\-1pri \fR\fB\fIasb\-1p\-policy\fR\fR
 .RS 4
 possible policies are:
 .PP
@@ -383,19 +522,19 @@
 .RS 4
 Discard the version of the secondary if the outcome of the
 \fBafter\-sb\-0pri\fR
-algorithm would also destroy the current secondary\'s data\&. Otherwise disconnect\&.
+algorithm would also destroy the current secondary\*(Aqs data\&. Otherwise disconnect\&.
 .RE
 .PP
 \fBdiscard\-secondary\fR
 .RS 4
-Discard the secondary\'s version\&.
+Discard the secondary\*(Aqs version\&.
 .RE
 .PP
 \fBcall\-pri\-lost\-after\-sb\fR
 .RS 4
 Always honor the outcome of the
 \fBafter\-sb\-0pri \fR
-algorithm\&. In case it decides the current secondary has the right data, call the
+algorithm\&. In case it decides the current secondary has the correct data, call the
 \fBpri\-lost\-after\-sb\fR
 on the current primary\&.
 .RE
@@ -404,11 +543,11 @@
 .RS 4
 Always honor the outcome of the
 \fBafter\-sb\-0pri \fR
-algorithm\&. In case it decides the current secondary has the right data, accept a possible instantaneous change of the primary\'s data\&.
+algorithm\&. In case it decides the current secondary has the correct data, accept a possible instantaneous change of the primary\*(Aqs data\&.
 .RE
 .RE
 .PP
-\fB\-C\fR, \fB\-\-after\-sb\-2pri \fR\fIasb\-2p\-policy\fR
+\fB\-\-after\-sb\-2pri \fR\fB\fIasb\-2p\-policy\fR\fR
 .RS 4
 possible policies are:
 .PP
@@ -430,20 +569,20 @@
 .RS 4
 Always honor the outcome of the
 \fBafter\-sb\-0pri \fR
-algorithm\&. In case it decides the current secondary has the right data, accept a possible instantaneous change of the primary\'s data\&.
+algorithm\&. In case it decides the current secondary has the right data, accept a possible instantaneous change of the primary\*(Aqs data\&.
 .RE
 .RE
 .PP
-\fB\-P\fR, \fB\-\-always\-asbp\fR
+\fB\-\-always\-asbp\fR
 .RS 4
 Normally the automatic after\-split\-brain policies are only used if current states of the UUIDs do not indicate the presence of a third node\&.
 .sp
 With this option you request that the automatic after\-split\-brain policies are used as long as the data sets of the nodes are somehow related\&. This might cause a full sync, if the UUIDs indicate the presence of a third node\&. (Or double faults have led to strange UUID sets\&.)
 .RE
 .PP
-\fB\-R\fR, \fB\-\-rr\-conflict \fR\fIrole\-resync\-conflict\-policy\fR
+\fB\-\-rr\-conflict \fR\fB\fIrole\-resync\-conflict\-policy\fR\fR
 .RS 4
-This option sets DRBD\'s behavior when DRBD deduces from its meta data that a resynchronization is needed, and the SyncTarget node is already primary\&. The possible settings are:
+This option sets DRBD\*(Aqs behavior when DRBD deduces from its meta data that a resynchronization is needed, and the SyncTarget node is already primary\&. The possible settings are:
 \fBdisconnect\fR,
 \fBcall\-pri\-lost\fR
 and
@@ -458,12 +597,12 @@
 .sp
 With the
 \fBviolently\fR
-setting you allow DRBD to force a primary node into SyncTarget state\&. This means that with that action the data exposed by DRBD change to the SyncSource\'s version of the data instantaneously\&. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING\&.
+setting you allow DRBD to force a primary node into SyncTarget state\&. This means that the data exposed by DRBD changes to the SyncSource\*(Aqs version of the data instantaneously\&. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING\&.
 .RE
 .PP
-\fB\-d\fR, \fB\-\-data\-integrity\-alg \fR\fIhash_alg\fR
+\fB\-\-data\-integrity\-alg \fR\fB\fIhash_alg\fR\fR
 .RS 4
-DRBD can ensure the data integrity of the user\'s data on the network by comparing hash values\&. Normally this is ensured by the 16 bit checksums in the headers of TCP/IP packets\&. This option can be set to any of the kernel\'s data digest algorithms\&. In a typical kernel configuration you should have at least one of
+DRBD can ensure the data integrity of the user\*(Aqs data on the network by comparing hash values\&. Normally this is ensured by the 16 bit checksums in the headers of TCP/IP packets\&. This option can be set to any of the kernel\*(Aqs data digest algorithms\&. In a typical kernel configuration you should have at least one of
 \fBmd5\fR,
 \fBsha1\fR, and
 \fBcrc32c\fR
@@ -472,51 +611,54 @@
 See also the notes on data integrity on the drbd\&.conf manpage\&.
 .RE
 .PP
-\fB\-o\fR, \fB\-\-no\-tcp\-cork \fR
+\fB\-\-no\-tcp\-cork\fR
 .RS 4
-DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack when it can expect more data, and when it should flush out what it has in its send queue\&. It turned out that there is at lease one network stack that performs worse when one uses this hinting method\&. Therefore we introducted this option, which disable the setting and clearing of the TCP_CORK socket option by DRBD\&.
+DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack when it can expect more data, and when it should flush out what it has in its send queue\&. There is at least one network stack that performs worse when one uses this hinting method\&. Therefore we introduced this option, which disable the setting and clearing of the TCP_CORK socket option by DRBD\&.
 .RE
 .PP
-\fB\-p\fR, \fB\-\-ping\-timeout \fR\fIping_timeout\fR
+\fB\-\-ping\-timeout \fR\fB\fIping_timeout\fR\fR
 .RS 4
-The time the peer has to answer to a keep\-alive packet\&. In case the peer\'s reply is not received within this time period, it is considered as dead\&. The default value is 500ms, the default unit is 100ms\&.
+The time the peer has to answer to a keep\-alive packet\&. In case the peer\*(Aqs reply is not received within this time period, it is considered dead\&. The default unit is tenths of a second, the default value is 5 (for half a second)\&.
 .RE
 .PP
-\fB\-D\fR, \fB\-\-discard\-my\-data \fR
+\fB\-\-discard\-my\-data\fR
 .RS 4
-Use this option to manually recover from a split\-brain situation\&. In case you do not have any automatic after\-split\-brain policies selected, the nodes refuse to connect\&. By passing this option you make a node to sync target immediately after successful connect\&.
+Use this option to manually recover from a split\-brain situation\&. In case you do not have any automatic after\-split\-brain policies selected, the nodes refuse to connect\&. By passing this option you make this node a sync target immediately after successful connect\&.
 .RE
-.SS "syncer"
-.\" drbdsetup: syncer
 .PP
-Changes the synchronization daemon parameters of
-\fIdevice\fR
-at runtime\&.
-.PP
-\fB\-r\fR, \fB\-\-rate \fR\fB\fIrate\fR\fR
+\fB\-\-tentative\fR
 .RS 4
-To ensure smooth operation of the application on top of DRBD, it is possible to limit the bandwidth that may be used by background synchronization\&. The default is 250 KB/sec, the default unit is KB/sec\&.
+Causes DRBD to abort the connection process after the resync handshake, i\&.e\&. no resync gets performed\&. You can find out which resync DRBD would perform by looking at the kernel\*(Aqs log file\&.
 .RE
 .PP
-\fB\-a\fR, \fB\-\-after \fR\fB\fIminor\fR\fR
+\fB\-\-on\-congestion \fR\fB\fIcongestion_policy\fR\fR, \fB\-\-congestion\-fill \fR\fB\fIfill_threshold\fR\fR, \fB\-\-congestion\-extents \fR\fB\fIactive_extents_threshold\fR\fR
 .RS 4
-Start resync on this device only if the device with
-\fIminor\fR
-is already in connected state\&. Otherwise this device waits in SyncPause state\&.
-.RE
-.PP
-\fB\-e\fR, \fB\-\-al\-extents \fR\fB\fIextents\fR\fR
-.RS 4
-DRBD automatically performs hot area detection\&. With this parameter you control how big the hot area (=active set) can get\&. Each extent marks 4M of the backing storage\&. In case a primary node leaves the cluster unexpectedly, the areas covered by the active set must be resynced upon rejoining of the failed node\&. The data structure is stored in the meta\-data area, therefore each change of the active set is a write operation to the meta\-data device\&. A higher number of extents gives longer resync times but less updates to the meta\-data\&. The default number of
-\fIextents\fR
-is 127\&. (Minimum: 7, Maximum: 3843)
+By default DRBD blocks when the available TCP send queue becomes full\&. That means it will slow down the application that generates the write requests that cause DRBD to send more data down that TCP connection\&.
+.sp
+When DRBD is deployed with DRBD\-proxy it might be more desirable that DRBD goes into AHEAD/BEHIND mode shortly before the send queue becomes full\&. In AHEAD/BEHIND mode DRBD does no longer replicate data, but still keeps the connection open\&.
+.sp
+The advantage of the AHEAD/BEHIND mode is that the application is not slowed down, even if DRBD\-proxy\*(Aqs buffer is not sufficient to buffer all write requests\&. The downside is that the peer node falls behind, and that a resync will be necessary to bring it back into sync\&. During that resync the peer node will have an inconsistent disk\&.
+.sp
+Available
+\fIcongestion_policy\fRs are
+\fBblock\fR
+and
+\fBpull\-ahead\fR\&. The default is
+\fBblock\fR\&.
+\fIFill_threshold\fR
+might be in the range of 0 to 10GiBytes\&. The default is 0 which disables the check\&.
+\fIActive_extents_threshold\fR
+has the same limits as
+\fBal\-extents\fR\&.
+.sp
+The AHEAD/BEHIND mode and its settings are available since DRBD 8\&.3\&.10\&.
 .RE
 .PP
-\fB\-v\fR, \fB\-\-verify\-alg \fR\fB\fIhash\-alg\fR\fR
+\fB\-\-verify\-alg \fR\fB\fIhash\-alg\fR\fR
 .RS 4
 During online verification (as initiated by the
 \fBverify\fR
-sub\-command), rather than doing a bit\-wise comparison, DRBD applies a hash function to the contents of every block being verified, and compares that hash with the peer\&. This option defines the hash algorithm being used for that purpose\&. It can be set to any of the kernel\'s data digest algorithms\&. In a typical kernel configuration you should have at least one of
+sub\-command), rather than doing a bit\-wise comparison, DRBD applies a hash function to the contents of every block being verified, and compares that hash with the peer\&. This option defines the hash algorithm being used for that purpose\&. It can be set to any of the kernel\*(Aqs data digest algorithms\&. In a typical kernel configuration you should have at least one of
 \fBmd5\fR,
 \fBsha1\fR, and
 \fBcrc32c\fR
@@ -525,14 +667,7 @@
 See also the notes on data integrity on the drbd\&.conf manpage\&.
 .RE
 .PP
-\fB\-c\fR, \fB\-\-cpu\-mask \fR\fB\fIcpu\-mask\fR\fR
-.RS 4
-Sets the cpu\-affinity\-mask for DRBD\'s kernel threads of this device\&. The default value of
-\fIcpu\-mask\fR
-is 0, which means that DRBD\'s kernel threads should be spread over all CPUs of the machine\&. This value must be given in hexadecimal notation\&. If it is too big it will be truncated\&.
-.RE
-.PP
-\fB\-C\fR, \fB\-\-csums\-alg \fR\fB\fIhash\-alg\fR\fR
+\fB\-\-csums\-alg \fR\fB\fIhash\-alg\fR\fR
 .RS 4
 A resync process sends all marked data blocks form the source to the destination node, as long as no
 \fBcsums\-alg\fR
@@ -543,16 +678,46 @@
 will lower the required bandwidth in exchange for CPU cycles\&.
 .RE
 .PP
-\fB\-R\fR, \fB\-\-use\-rle\fR
+\fB\-\-use\-rle\fR
 .RS 4
 During resync\-handshake, the dirty\-bitmaps of the nodes are exchanged and merged (using bit\-or), so the nodes will have the same understanding of which blocks are dirty\&. On large devices, the fine grained dirty\-bitmap can become large as well, and the bitmap exchange can take quite some time on low\-bandwidth links\&.
 .sp
 Because the bitmap typically contains compact areas where all bits are unset (clean) or set (dirty), a simple run\-length encoding scheme can considerably reduce the network traffic necessary for the bitmap exchange\&.
 .sp
-For backward compatibilty reasons, and because on fast links this possibly does not improve transfer time but consumes cpu cycles, this defaults to off\&.
+For backward compatibility reasons, and because on fast links this possibly does not improve transfer time but consumes cpu cycles, this defaults to off\&.
 .sp
 Introduced in 8\&.3\&.2\&.
 .RE
+.SS "resource\-options"
+.\" drbdsetup: resource-options
+.PP
+Changes the options of the resource at runtime\&.
+.PP
+\fB\-\-cpu\-mask \fR\fB\fIcpu\-mask\fR\fR
+.RS 4
+Sets the cpu\-affinity\-mask for DRBD\*(Aqs kernel threads of this device\&. The default value of
+\fIcpu\-mask\fR
+is 0, which means that DRBD\*(Aqs kernel threads should be spread over all CPUs of the machine\&. This value must be given in hexadecimal notation\&. If it is too big it will be truncated\&.
+.RE
+.PP
+\fB\-\-on\-no\-data\-accessible \fR\fB\fIond\-policy\fR\fR
+.RS 4
+This setting controls what happens to IO requests on a degraded, disk less node (I\&.e\&. no data store is reachable)\&. The available policies are
+\fBio\-error\fR
+and
+\fBsuspend\-io\fR\&.
+.sp
+If
+\fIond\-policy\fR
+is set to
+\fBsuspend\-io\fR
+you can either resume IO by attaching/connecting the last lost data storage, or by the
+\fBdrbdadm resume\-io \fR\fB\fIres\fR\fR
+command\&. The latter will result in IO errors of course\&.
+.sp
+The default is
+\fBio\-error\fR\&. This setting is available since DRBD 8\&.3\&.9\&.
+.RE
 .SS "primary"
 .\" drbdsetup: primary
 .PP
@@ -568,9 +733,14 @@
 \fB\-\-allow\-two\-primaries\fR
 option, you override this behavior and instruct DRBD to allow two primaries\&.
 .PP
-\fB\-o\fR, \fB\-\-overwrite\-data\-of\-peer\fR
+\fB\-\-overwrite\-data\-of\-peer\fR
 .RS 4
-Becoming primary fails if the local replica is inconsistent\&. By using this option you can force it into primary role anyway\&. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING\&.
+Alias for \-\-force\&.
+.RE
+.PP
+\fB\-\-force\fR
+.RS 4
+Becoming primary fails if the local replica is not up\-to\-date\&. I\&.e\&. when it is inconsistent, outdated of consistent\&. By using this option you can force it into primary role anyway\&. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING\&.
 .RE
 .SS "secondary"
 .\" drbdsetup: secondary
@@ -584,7 +754,7 @@
 .\" drbdsetup: verify
 .PP
 This initiates on\-line device verification\&. During on\-line verification, the contents of every block on the local node are compared to those on the peer node\&. Device verification progress can be monitored via
-/proc/drbd\&. Any blocks whose content differs from that of the corresponding block on the peer node will be marked out\-of\-sync in DRBD\'s on\-disk bitmap; they are
+/proc/drbd\&. Any blocks whose content differs from that of the corresponding block on the peer node will be marked out\-of\-sync in DRBD\*(Aqs on\-disk bitmap; they are
 \fInot\fR
 brought back in sync automatically\&. To do that, simply disconnect and reconnect the resource\&.
 .PP
@@ -596,11 +766,13 @@
 .PP
 See also the notes on data integrity on the drbd\&.conf manpage\&.
 .PP
-\fB\-s\fR, \fB\-\-start \fR\fB\fIstart\-sector\fR\fR
+\fB\-\-start \fR\fB\fIstart\-sector\fR\fR
 .RS 4
 Since version 8\&.3\&.2, on\-line verification should resume from the last position after connection loss\&. It may also be started from an arbitrary position by setting this option\&.
 .sp
-Default unit is sectors\&. You may also specify a unit explicitly\&. The start\-sector will be rounded down to a multiple of 8 sectors (4kB)\&.
+Default unit is sectors\&. You may also specify a unit explicitly\&. The
+\fBstart\-sector\fR
+will be rounded down to a multiple of 8 sectors (4kB)\&.
 .RE
 .SS "invalidate"
 .\" drbdsetup: invalidate
@@ -614,6 +786,8 @@
 .\" drbdsetup: invalidate-remote
 .PP
 This forces the local device of a pair of connected DRBD devices into SyncSource state, which means that all data blocks of the device are copied to the peer\&.
+.PP
+On a disconnected device, this will set all bits in the out of sync bitmap\&. As a side effect, this suspends updates to the on disk activity log\&. Updates to the on disk activity log will be resumed automatically when necessary\&.
 .SS "wait\-connect"
 .\" drbdsetup: wait-connect
 .PP
@@ -621,7 +795,7 @@
 \fIdevice\fR
 can communicate with its partner device\&.
 .PP
-\fB\-t\fR, \fB\-\-wfc\-timeout \fR\fB\fIwfc_timeout\fR\fR, \fB\-d\fR, \fB\-\-degr\-wfc\-timeout \fR\fB\fIdegr_wfc_timeout\fR\fR, \fB\-o\fR, \fB\-\-outdated\-wfc\-timeout \fR\fB\fIoutdated_wfc_timeout\fR\fR, \fB\-w\fR, \fB\-\-wait\-after\-sb\fR
+\fB\-\-wfc\-timeout \fR\fB\fIwfc_timeout\fR\fR, \fB\-\-degr\-wfc\-timeout \fR\fB\fIdegr_wfc_timeout\fR\fR, \fB\-\-outdated\-wfc\-timeout \fR\fB\fIoutdated_wfc_timeout\fR\fR, \fB\-\-wait\-after\-sb\fR
 .RS 4
 This command will fail if the
 \fIdevice\fR
@@ -631,9 +805,9 @@
 \fIwfc_timeout\fR
 is used\&. If the peer was already down before this node was rebooted, the
 \fIdegr_wfc_timeout\fR
-is used\&. If the peer was sucessfully outdated before this node was rebooted the
+is used\&. If the peer was successfully outdated before this node was rebooted the
 \fIoutdated_wfc_timeout\fR
-is used\&. The default value for all those timeout values is 0 which means to wait forever\&. In case the connection status goes down to StandAlone because the peer appeared but the devices had a split brain situation, the default for the command is to terminate\&. You can change this behavior with the
+is used\&. The default value for all those timeout values is 0 which means to wait forever\&. The unit is seconds\&. In case the connection status goes down to StandAlone because the peer appeared but the devices had a split brain situation, the default for the command is to terminate\&. You can change this behavior with the
 \fB\-\-wait\-after\-sb\fR
 option\&.
 .RE
@@ -663,6 +837,13 @@
 \fIdevice\fR\&. This means that the
 \fIdevice\fR
 is detached from its backing storage device\&.
+.PP
+\fB\-f\fR, \fB\-\-force\fR
+.RS 4
+A regular detach returns after the disk state finally reached diskless\&. As a consequence detaching from a frozen backing block device never terminates\&.
+.sp
+On the other hand A forced detach returns immediately\&. It allows you to detach DRBD from a frozen backing block device\&. Please note that the disk will be marked as failed until all pending IO requests where finished by the backing block device\&.
+.RE
 .SS "down"
 .\" drbdsetup: down
 .PP
@@ -674,7 +855,8 @@
 .PP
 Shows the current roles of the
 \fIdevice\fR
-and its peer\&. (local/peer)\&.
+and its peer, as
+\fIlocal\fR/\fIpeer\fR\&.
 .SS "state"
 .\" drbdsetup: state
 .PP
@@ -687,39 +869,37 @@
 .SS "dstate"
 .\" drbdsetup: dstate
 .PP
-Shows the current states of the backing storage devices\&. (local/peer)
-.SS "status"
-.\" drbdsetup: status
-.PP
-Shows the current status of the device in xml\-like format\&. Example output:
-.sp
-.if n \{\
-.RS 4
-.\}
-.nf
-<resource minor="0" name="s0" cs="SyncTarget" st1="Secondary" st2="Secondary"
-         ds1="Inconsistent" ds2="UpToDate" resynced_precent="5\&.9" />
-	
-.fi
-.if n \{\
-.RE
-.\}
-.sp
+Shows the current states of the backing storage devices, as
+\fIlocal\fR/\fIpeer\fR\&.
 .SS "resize"
 .\" drbdsetup: resize
 .PP
 This causes DRBD to reexamine the size of the
-\fIdevice\fR\'s backing storage device\&. To actually do online growing you need to extend the backing storages on both devices and call the
+\fIdevice\fR\*(Aqs backing storage device\&. To actually do online growing you need to extend the backing storages on both devices and call the
 \fBresize\fR
-command one of your nodes\&.
+command on one of your nodes\&.
 .PP
 The
 \fB\-\-assume\-peer\-has\-space\fR
-allows you to resize a device which is currently not connected to the peer\&. Use with care, since if you do not resize the peer\'s disk as well, further connect attempts of the two will fail\&.
+allows you to resize a device which is currently not connected to the peer\&. Use with care, since if you do not resize the peer\*(Aqs disk as well, further connect attempts of the two will fail\&.
+.PP
+When the
+\fB\-\-assume\-clean\fR
+option is given DRBD will skip the resync of the new storage\&. Only do this if you know that the new storage was initialized to the same content by other means\&.
+.SS "check\-resize"
+.\" drbdsetup: check-resize
+.PP
+To enable DRBD to detect offline resizing of backing devices this command may be used to record the current size of backing devices\&. The size is stored in files in /var/lib/drbd/ named drbd\-minor\-??\&.lkbd
+.PP
+This command is called by
+\fBdrbdadm resize \fR\fB\fIres\fR\fR
+after
+\fBdrbdsetup \fR\fB\fIdevice\fR\fR\fB resize\fR
+returned\&.
 .SS "pause\-sync"
 .\" drbdsetup: pause-sync
 .PP
-Temporarily suspend an ongoing resynchronization by setting the local pause flag\&. Resync only progresses if neither the local nor the remote pause flag is set\&. It might be desirable to postpone DRBD\'s resynchronization after eventual resynchronization of the backing storage\'s RAID setup\&.
+Temporarily suspend an ongoing resynchronization by setting the local pause flag\&. Resync only progresses if neither the local nor the remote pause flag is set\&. It might be desirable to postpone DRBD\*(Aqs resynchronization after eventual resynchronization of the backing storage\*(Aqs RAID setup\&.
 .SS "resume\-sync"
 .\" drbdsetup: resume-sync
 .PP
@@ -729,15 +909,17 @@
 .PP
 Mark the data on the local backing storage as outdated\&. An outdated device refuses to become primary\&. This is used in conjunction with
 \fBfencing\fR
-and by the peer\'s fence\-peer handler\&.
+and by the peer\*(Aqs
+\fBfence\-peer\fR
+handler\&.
 .SS "show\-gi"
 .\" drbdsetup: show-gi
 .PP
-Displays the device\'s data generation identifiers verbosely\&.
+Displays the device\*(Aqs data generation identifiers verbosely\&.
 .SS "get\-gi"
 .\" drbdsetup: get-gi
 .PP
-Displays the device\'s data generation identifiers\&.
+Displays the device\*(Aqs data generation identifiers\&.
 .SS "show"
 .\" drbdsetup: show
 .PP
@@ -756,25 +938,25 @@
 .SS "events"
 .\" drbdsetup: events
 .PP
-Displays every state change of DRBD and all calls to helper programs\&. This might be used to get notified of DRBD\'s state changes by piping the output to another program\&.
+Displays every state change of DRBD and all calls to helper programs\&. This might be used to get notified of DRBD\*(Aqs state changes by piping the output to another program\&.
 .PP
-\fB\-a\fR, \fB\-\-all\-devices\fR
+\fB\-\-all\-devices\fR
 .RS 4
 Display the events of all DRBD minors\&.
 .RE
 .PP
-\fB\-u\fR, \fB\-\-unfiltered\fR
+\fB\-\-unfiltered\fR
 .RS 4
 This is a debugging aid that displays the content of all received netlink messages\&.
 .RE
 .SS "new\-current\-uuid"
 .\" drbdsetup: new-current-uuid
 .PP
-Generates a new currend UUID and rotates all other UUID values\&. This has at least two use cases, namely to skip the initial sync, and to reduce network bandwidth when starting in a single node configuration and then later (re\-)integrating a remote site\&.
+Generates a new current UUID and rotates all other UUID values\&. This has at least two use cases, namely to skip the initial sync, and to reduce network bandwidth when starting in a single node configuration and then later (re\-)integrating a remote site\&.
 .PP
 Available option:
 .PP
-\fB\-c\fR, \fB\-\-clear\-bitmap\fR
+\fB\-\-clear\-bitmap\fR
 .RS 4
 Clears the sync bitmap in addition to generating a new current UUID\&.
 .RE
@@ -793,7 +975,7 @@
 \fIboth\fR
 nodes, initialize meta data and configure the device\&.
 .sp
-\fBdrbdadm \-\- \-\-force create\-md \fR\fB\fIres\fR\fR
+\fBdrbdadm create\-md \-\-force \fR\fB\fIres\fR\fR
 .RE
 .sp
 .RS 4
@@ -819,7 +1001,7 @@
 .\}
 They are now Connected Secondary/Secondary Inconsistent/Inconsistent\&. Generate a new current\-uuid and clear the dirty bitmap\&.
 .sp
-\fBdrbdadm \-\- \-\-clear\-bitmap new\-current\-uuid \fR\fB\fIres\fR\fR
+\fBdrbdadm new\-current\-uuid \-\-clear\-bitmap \fR\fB\fIres\fR\fR
 .RE
 .sp
 .RS 4
@@ -836,17 +1018,17 @@
 .sp
 \fBmkfs \-t \fR\fB\fIfs\-type\fR\fR\fB $(drbdadm sh\-dev \fR\fB\fIres\fR\fR\fB)\fR
 .RE
-.sp
-.RE
 .PP
-One obvious side\-effect is that the replica are full of old garbage (unless you made them identical using other means), so any online\-verify is expected to find any number of out\-of\-sync blocks\&.
+One obvious side\-effect is that the replica is full of old garbage (unless you made them identical using other means), so any online\-verify is expected to find any number of out\-of\-sync blocks\&.
 .PP
 \fIYou must not use this on pre\-existing data!\fR
-Even though it may appear to work at first glance, once you switch to the other node, your data is toast, as it never got replicated\&. So do
+Even though it may appear to work at first glance, once you switch to the other node, your data is toast, as it never got replicated\&. So
 \fIdo not leave out the mkfs\fR
 (or equivalent)\&.
 .PP
-This can also be used to shorten the initial resync of a cluster where the second node is added after the first node is gone into production, by means of disk shipping\&. This use\-case works on disconnected devices only, the device may be in primary or secondary role\&. The necessary steps are:
+This can also be used to shorten the initial resync of a cluster where the second node is added after the first node is gone into production, by means of disk shipping\&. This use\-case works on disconnected devices only, the device may be in primary or secondary role\&.
+.PP
+The necessary steps on the current active server are:
 .sp
 .RS 4
 .ie n \{\
@@ -856,7 +1038,7 @@
 .sp -1
 .IP "  1." 4.2
 .\}
-\fBdrbdsetup \fR\fB\fIdevice\fR\fR\fB new\-current\-uuid \-\-clear\-bitmap\fR
+\fBdrbdsetup new\-current\-uuid \-\-clear\-bitmap \fR\fB\fIminor\fR\fR\fB \fR
 .RE
 .sp
 .RS 4
@@ -878,27 +1060,16 @@
 .sp -1
 .IP "  3." 4.2
 .\}
-\fBdrbdsetup \fR\fB\fIdevice\fR\fR\fB new\-current\-uuid\fR
+\fBdrbdsetup new\-current\-uuid \fR\fB\fIminor\fR\fR\fB \fR
 .RE
 .sp
-.RS 4
-.ie n \{\
-\h'-04' 4.\h'+01'\c
-.\}
-.el \{\
-.sp -1
-.IP "  4." 4.2
-.\}
-Add the disk to the new secondary node, and join it to the cluster\&. You will get a resync of that parts that where changed since the first call to
+Now add the disk to the new secondary node, and join it to the cluster\&. You will get a resync of that parts that were changed since the first call to
 \fBdrbdsetup\fR
 in step 1\&.
-.RE
-.sp
-.RE
 .SH "EXAMPLES"
 .PP
 For examples, please have a look at the
-\m[blue]\fBDRBD User\'s Guide\fR\m[]\&\s-2\u[1]\d\s+2\&.
+\m[blue]\fBDRBD User\*(Aqs Guide\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .SH "VERSION"
 .sp
 This document was revised for version 8\&.3\&.2 of the DRBD distribution\&.
@@ -915,7 +1086,9 @@
 .PP
 \fBdrbd.conf\fR(5),
 \fBdrbd\fR(8),
-\fBdrbddisk\fR(8)\fBdrbdadm\fR(8)\m[blue]\fBDRBD User\'s Guide\fR\m[]\&\s-2\u[1]\d\s+2,
+\fBdrbddisk\fR(8),
+\fBdrbdadm\fR(8),
+\m[blue]\fBDRBD User\*(Aqs Guide\fR\m[]\&\s-2\u[1]\d\s+2,
 \m[blue]\fBDRBD web site\fR\m[]\&\s-2\u[2]\d\s+2
 .SH "NOTES"
 .IP " 1." 4
diff -Nru drbd8-8.3.7/documentation/drbdsetup.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup.xml
--- drbd8-8.3.7/documentation/drbdsetup.xml	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup.xml	2012-02-02 14:09:14.000000000 +0000
@@ -1,1558 +1,1716 @@
-<?xml version="1.0"?>
-<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <refentry id="re-drbdsetup">
   <refentryinfo>
-    <date>5 Dec 2008</date>
+    <date>6 May 2011</date>
+
     <productname>DRBD</productname>
-    <productnumber>8.3.2</productnumber>
+
+    <productnumber>8.4.0</productnumber>
   </refentryinfo>
+
   <refmeta>
     <refentrytitle>drbdsetup</refentrytitle>
+
     <manvolnum>8</manvolnum>
+
     <refmiscinfo class="manual">System Administration</refmiscinfo>
   </refmeta>
+
   <refnamediv>
     <refname>drbdsetup</refname>
-    <refpurpose>Setup tool for DRBD
-    <indexterm significance="normal">
-      <primary>drbdsetup</primary>
-    </indexterm>
-    </refpurpose>
+
+    <refpurpose>Setup tool for DRBD <indexterm significance="normal">
+        <primary>drbdsetup</primary>
+      </indexterm></refpurpose>
   </refnamediv>
+
   <refsynopsisdiv>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">disk</arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>lower_dev</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>meta_data_dev</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>meta_data_index</replaceable>
-      </arg>
-      <arg choice="opt" rep="norepeat">-d<arg choice="req" rep="norepeat"><replaceable>size</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-e<arg choice="req" rep="norepeat"><replaceable>err_handler</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-f<arg choice="req" rep="norepeat"><replaceable>fencing_policy</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-b</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">net</arg>
-      <arg choice="opt" rep="norepeat">
-        <replaceable>af:</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>local_addr</replaceable>
-      </arg>
-      <arg choice="opt" rep="norepeat">
-        <replaceable>:port</replaceable>
-      </arg>
-      <arg choice="opt" rep="norepeat">
-        <replaceable>af:</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>remote_addr</replaceable>
-      </arg>
-      <arg choice="opt" rep="norepeat">
-        <replaceable>:port</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">
-        <replaceable>protocol</replaceable>
-      </arg>
-      <arg choice="opt" rep="norepeat">-c<arg choice="req" rep="norepeat"><replaceable>time</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-i<arg choice="req" rep="norepeat"><replaceable>time</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-t<arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-S<arg choice="req" rep="norepeat"><replaceable>size</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-r<arg choice="req" rep="norepeat"><replaceable>size</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-k<arg choice="req" rep="norepeat"><replaceable>count</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-e<arg choice="req" rep="norepeat"><replaceable>max_epoch_size</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-b<arg choice="req" rep="norepeat"><replaceable>max_buffers</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-m</arg>
-      <arg choice="opt" rep="norepeat">-a<arg choice="req" rep="norepeat"><replaceable>hash_alg</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-x<arg choice="req" rep="norepeat"><replaceable>shared_secret</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-A<arg choice="req" rep="norepeat"><replaceable>asb-0p-policy</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-B<arg choice="req" rep="norepeat"><replaceable>asb-1p-policy</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-C<arg choice="req" rep="norepeat"><replaceable>asb-2p-policy</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-D</arg>
-      <arg choice="opt" rep="norepeat">-R<arg choice="req" rep="norepeat"><replaceable>role-resync-conflict-policy</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-p<arg choice="req" rep="norepeat"><replaceable>ping_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-u<arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-d<arg choice="req" rep="norepeat"><replaceable>hash_alg</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-o</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">syncer</arg>
-      <arg choice="opt" rep="norepeat">-a<arg choice="req" rep="norepeat"><replaceable>dev_minor</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-r<arg choice="req" rep="norepeat"><replaceable>rate</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-e<arg choice="req" rep="norepeat"><replaceable>extents</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-v<arg choice="req" rep="norepeat"><replaceable>verify-hash-alg</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-c<arg choice="req" rep="norepeat"><replaceable>cpu-mask</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-C<arg choice="req" rep="norepeat"><replaceable>csums-hash-alg</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-R<arg choice="req" rep="norepeat"><replaceable>use-rle</replaceable></arg></arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">disconnect</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">detach</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">down</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">primary</arg>
-      <arg choice="opt" rep="norepeat">-o</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">secondary</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">verify</arg>
-      <arg choice="opt" rep="norepeat">-s<arg choice="req" rep="norepeat"><replaceable>start-position</replaceable></arg></arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">invalidate</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">invalidate-remote</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">wait-connect</arg>
-      <arg choice="opt" rep="norepeat">-t<arg choice="req" rep="norepeat"><replaceable>wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-d<arg choice="req" rep="norepeat"><replaceable>degr_wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-o<arg choice="req" rep="norepeat"><replaceable>outdated_wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-w</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">wait-sync</arg>
-      <arg choice="opt" rep="norepeat">-t<arg choice="req" rep="norepeat"><replaceable>wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-d<arg choice="req" rep="norepeat"><replaceable>degr_wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-o<arg choice="req" rep="norepeat"><replaceable>outdated_wfc_timeout</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-w</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">role</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">cstate</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">dstate</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">status</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">resize</arg>
-      <arg choice="opt" rep="norepeat">-d<arg choice="req" rep="norepeat"><replaceable>size</replaceable></arg></arg>
-      <arg choice="opt" rep="norepeat">-f<arg choice="req" rep="norepeat"><replaceable>assume-peer-has-space</replaceable></arg></arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">pause-sync</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">resume-sync</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">outdate</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">show-gi</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">get-gi</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">show</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">suspend-io</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">resume-io</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">events</arg>
-      <arg choice="opt" rep="norepeat">-u</arg>
-      <arg choice="opt" rep="norepeat">-a</arg>
-    </cmdsynopsis>
-    <cmdsynopsis sepchar=" ">
-      <command moreinfo="none">drbdsetup</command>
-      <arg choice="req" rep="norepeat">
-        <replaceable>device</replaceable>
-      </arg>
-      <arg choice="req" rep="norepeat">new-current-uuid</arg>
-      <arg choice="opt" rep="norepeat">-c</arg>
-    </cmdsynopsis>
+    <xi:include href="drbdsetup_new-resource.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_new-minor.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_del-resource.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_del-minor.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_attach.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_connect.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_disk-options.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_net-options.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_resource-options.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_disconnect.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_detach.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_primary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_secondary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_down.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_verify.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_invalidate.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_invalidate-remote.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_wait-connect.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_wait-sync.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_role.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_cstate.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_dstate.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_resize.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_check-resize.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_pause-sync.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_resume-sync.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_outdate.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_show-gi.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_get-gi.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_show.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_suspend-io.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_resume-io.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_events.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
+    <xi:include href="drbdsetup_new-current-uuid.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
+
   </refsynopsisdiv>
+
   <refsect1>
     <title>Description</title>
-    <para>      drbdsetup is used to associate DRBD devices with their backing
-      block devices, to set up DRBD device pairs to mirror their
-      backing block devices, and to inspect the configuration of
-      running DRBD devices.
-    </para>
+
+    <para>drbdsetup is used to associate DRBD devices with their backing block devices, to set up
+    DRBD device pairs to mirror their backing block devices, and to inspect the configuration of
+    running DRBD devices.</para>
   </refsect1>
+
   <refsect1>
     <title>Note</title>
-    <para>      drbdsetup is a low level tool of the DRBD program suite. It is
-      used by the data disk and drbd scripts to communicate with
-      the device driver.
-    </para>
+
+    <para>drbdsetup is a low level tool of the DRBD program suite. It is used by the data disk and
+    drbd scripts to communicate with the device driver.</para>
   </refsect1>
+
   <refsect1>
     <title>Commands</title>
-    <para>      Each drbdsetup sub-command might require arguments and bring its own
-      set of options. All values have default units which might be overruled
-      by K, M or G. These units are defined in the usual way (e.g. K = 2^10 = 1024).
-    </para>
+
+    <para>Each drbdsetup sub-command might require arguments and bring its own set of options. All
+    values have default units which might be overruled by K, M or G. These units are defined in
+    the usual way (e.g. K = 2^10 = 1024).</para>
+
     <refsect2>
       <title>Common options</title>
-      <para>        All drbdsetup sub-commands accept these two options
 
-	<variablelist><varlistentry><term><option>--create-device</option></term><listitem><para>		In case the specified DRBD device (minor number) does not
-		exist yet, create it implicitly.
-	      </para></listitem></varlistentry><varlistentry><term><option>--set-defaults</option></term><listitem><para>		When <option>--set-defaults</option> is given on the
-		command line, all options of the invoked sub-command that
-		are not explicitly set are reset to their default values.
-	      </para></listitem></varlistentry></variablelist>
+      <para>All drbdsetup sub-commands accept these two options <variablelist>
+          <varlistentry>
+            <term><option>--create-device</option></term>
 
-      </para>
+            <listitem>
+              <para>In case the specified DRBD device (minor number) does not exist yet, create it
+              implicitly.</para>
+            </listitem>
+          </varlistentry>
+        </variablelist></para>
+    </refsect2>
+
+    <refsect2>
+      <title>new-resource</title>
+
+      <para>Resources are the primary objects of any DRBD configuration. A resource must be created
+      with the <option>new-resource</option> command before any volumes or minor devices can be created.
+      Connections are referenced by name.</para>
     </refsect2>
+
     <refsect2>
-      <title>disk</title>
+      <title>new-minor</title>
+
+      <para>A <emphasis>minor</emphasis> is used as a synonym for replicated block device. It is
+      represented in the /dev/ directory by a block device. It is the application's interface to
+      the DRBD-replicated block devices. These block devices get addressed by their minor numbers
+      on the drbdsetup commandline.</para>
+
+      <para>A pair of replicated block devices may have different minor numbers on the two
+      machines. They are associated by a common <emphasis>volume-number</emphasis>. Volume numbers
+      are local to each connection. Minor numbers are global on one node.</para>
+    </refsect2>
+
+    <refsect2>
+      <title>del-resource</title>
+
+      <para>Destroys a resource object. This is only possible if the resource has no
+      volumes.</para>
+    </refsect2>
+
+    <refsect2>
+      <title>del-minor</title>
+
+      <para>Minors can only be destroyed if its disk is detached.</para>
+    </refsect2>
+
+    <refsect2>
+      <title>attach, disk-options</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>disk</secondary>
       </indexterm>
-      <para>	Associates <replaceable>device</replaceable> with
-	<replaceable>lower_device</replaceable> to store its data blocks on.
-	The <option>-d</option> (or <option>--disk-size</option>) should
-	only be used if you wish not to use as much as possible from the
-	backing block devices.
-	If you do not use <option>-d</option>, the <replaceable>device</replaceable>
-	is only ready for use as soon as it was connected to its peer once.
-	(See the <option>net</option> command.)
-      </para>
+
+      <para>Attach associates <replaceable>device</replaceable> with
+      <replaceable>lower_device</replaceable> to store its data blocks on. The <option>-d</option>
+      (or <option>--disk-size</option>) should only be used if you wish not to use as much as
+      possible from the backing block devices. If you do not use <option>-d</option>, the
+      <replaceable>device</replaceable> is only ready for use as soon as it was connected to its
+      peer once. (See the <option>net</option> command.)</para>
+
+      <para>With the disk-options command it is possible to change the options of a minor while it
+      is attached.</para>
+
       <variablelist>
         <varlistentry>
-          <term><option>-d</option>,
-	  <option>--disk-size <replaceable>size</replaceable></option></term>
+          <term><option>--disk-size
+          <replaceable>size</replaceable></option></term>
+
           <listitem>
-            <para>	      You can override DRBD's size determination method with this
-	      option. If you need to use the device before it was ever
-	      connected to its peer, use this option to pass the
-	      <replaceable>size</replaceable> of the DRBD device to the
-	      driver. Default unit is sectors (1s = 512 bytes).
-	    </para>
-            <para>	      If you use the <replaceable>size</replaceable> parameter in drbd.conf,
-	      we strongly recommend to add an explicit unit postfix.
-	      drbdadm and drbdsetup used to have mismatching default units.
-	    </para>
+            <para>You can override DRBD's size determination method with this option. If you need
+            to use the device before it was ever connected to its peer, use this option to pass
+            the <replaceable>size</replaceable> of the DRBD device to the driver. Default unit is
+            sectors (1s = 512 bytes).</para>
+
+            <para>If you use the <replaceable>size</replaceable> parameter in drbd.conf, we
+            strongly recommend to add an explicit unit postfix. drbdadm and drbdsetup used to have
+            mismatching default units.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-e</option>,
-	  <option>--on-io-error <replaceable>err_handler</replaceable></option></term>
+          <term><option>--on-io-error
+          <replaceable>err_handler</replaceable></option></term>
+
           <listitem>
-            <para>	      If the driver of the <replaceable>lower_device</replaceable>
-	      reports an error to DRBD, DRBD will either pass the error
-	      to the upper layers of the operating system, call a helper
-	      program, or detach the device from its backing storage and
-	      perform all further IO by requesting it from the peer. The
-	      valid <replaceable>err_handlers</replaceable> are:
-	      <option>pass_on</option>, <option>call-local-io-error</option>
-	      and <option>detach</option>.
-	    </para>
+            <para>If the driver of the <replaceable>lower_device</replaceable>
+            reports an error to DRBD, DRBD will mark the disk as inconsistent,
+            call a helper program, or detach the device from its backing storage and perform all
+            further IO by requesting it from the peer. The valid
+            <replaceable>err_handlers</replaceable> are: <option>pass_on</option>,
+            <option>call-local-io-error</option> and <option>detach</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-f</option>,
-	  <option>--fencing <replaceable>fencing_policy</replaceable></option></term>
+          <term><option>--fencing
+          <replaceable>fencing_policy</replaceable></option></term>
+
           <listitem>
-            <para>	      Under <option>fencing</option> we understand preventative
-	      measures to avoid situations where both nodes are primary
-	      and disconnected (AKA split brain).
-	    </para>
-            <para>	      Valid fencing policies are:
-	    </para>
+            <para>Under <option>fencing</option> we understand preventive measures to avoid
+            situations where both nodes are primary and disconnected (AKA split brain).</para>
+
+            <para>Valid fencing policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>dont-care</option>
-                </term>
+                <term><option>dont-care</option></term>
+
                 <listitem>
-                  <para>		  This is the default policy. No fencing actions are undertaken.
-		</para>
+                  <para>This is the default policy. No fencing actions are done.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>resource-only</option>
-                </term>
+                <term><option>resource-only</option></term>
+
                 <listitem>
-                  <para>		  If a node becomes a disconnected primary. it tries to outdate
-		  the peer's disk. This is done by calling the fence-peer
-		  handler. The handler is supposed to reach the other node over
-		  alternative communication paths and call 'drbdadm outdate
-		  res' there.
-		</para>
+                  <para>If a node becomes a disconnected primary, it tries to outdate the peer's
+                  disk. This is done by calling the fence-peer handler. The handler is supposed to
+                  reach the other node over alternative communication paths and call 'drbdadm
+                  outdate res' there.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>resource-and-stonith</option>
-                </term>
+                <term><option>resource-and-stonith</option></term>
+
                 <listitem>
-                  <para>		  If a node becomes a disconnected primary, it freezes all
-		  its IO operations and calls its fence-peer handler. The
-		  fence-peer handler is supposed to reach the peer over
-		  alternative communication paths and call 'drbdadm outdate
-		  res' there. In case it cannot reach the peer, it should
-		  stonith the peer. IO is resumed as soon as the situation
-		  is resolved. In case your handler fails, you can resume
-		  IO with the <option>resume-io</option> command.
-		</para>
+                  <para>If a node becomes a disconnected primary, it freezes all its IO operations
+                  and calls its fence-peer handler. The fence-peer handler is supposed to reach
+                  the peer over alternative communication paths and call 'drbdadm outdate res'
+                  there. In case it cannot reach the peer, it should stonith the peer. IO is
+                  resumed as soon as the situation is resolved. In case your handler fails, you
+                  can resume IO with the <option>resume-io</option> command.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-b</option>,
-	  <option>--use-bmbv</option></term>
+          <term><option>--disk-barrier</option></term>
+
+          <term><option>--disk-flushes</option></term>
+
+          <term><option>--disk-drain</option></term>
+
           <listitem>
-            <para>	      In case the backing storage's driver has a merge_bvec_fn()
-	      function,  DRBD has to
-	      pretend that it can only process IO requests in units
-	      not lager than 4kByte. (At time of writing the only known 
-	      drivers which
-	      have such a function are: md (software raid driver),
-	      dm (device mapper - LVM) and DRBD itself)</para>
-            <para>	      To get best performance out of DRBD on top of software
-	      raid (or any other driver with a merge_bvec_fn() function)
-	      you might enable this function, if you know for sure
-	      that the merge_bvec_fn() function will deliver the same
-	      results on all nodes of your cluster. I.e. the physical
-	      disks of the software raid are exactly of the same type.
-              USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING.</para>
-          </listitem>
-        </varlistentry>
-        <varlistentry>
-          <term><option>-a</option>, <option>--no-disk-barrier</option></term>
-          <term><option>-i</option>, <option>--no-disk-flushes</option></term>
-          <term><option>-D</option>, <option>--no-disk-drain</option></term>
-          <listitem>
-            <para>              DRBD has four implementations to express write-after-write dependencies to
-              its backing storage device. DRBD will use the first method that is
-              supported by the backing storage device and that is not disabled by the user.
-	    </para>
-            <para>              When selecting the method you should not only base your decision on the
-              measurable performance. In case your backing storage device has a volatile
-              write cache (plain disks, RAID of plain disks) you should use one
-              of the first two. In case your backing storage device has battery-backed
-              write cache you may go with option 3 or 4. Option 4 will deliver the
-              best performance such devices.
-            </para>
-            <para>              Unfortunately device mapper (LVM) does not support barriers.
-            </para>
-            <para>              The letter after "wo:" in /proc/drbd indicates with method is currently in
-              use for a device: b, f, d, n. The implementations:
-            </para>
+            <para>DRBD has four implementations to express write-after-write dependencies to its
+            backing storage device. DRBD will use the first method that is supported by the
+            backing storage device and that is not disabled by the user. By default all three
+            options are enabled.</para>
+
+            <para>When selecting the method you should not only base your decision on the
+            measurable performance. In case your backing storage device has a volatile write cache
+            (plain disks, RAID of plain disks) you should use one of the first two. In case your
+            backing storage device has battery-backed write cache you may go with option 3.
+	    Option 4 (disable everything, use "none") <emphasis>is dangerous</emphasis>
+	    on most IO stacks, may result in write-reordering, and if so,
+	    can theoretically be the reason for data corruption, or disturb
+	    the DRBD protocol, causing spurious disconnect/reconnect cycles.
+	    <emphasis>Do not use</emphasis> <option>no-disk-drain</option>.</para>
+
+            <para>Unfortunately device mapper (LVM) might not support barriers.</para>
+
+            <para>The letter after "wo:" in /proc/drbd indicates with method is currently in use
+            for a device: b, f, d, n. The implementations:</para>
             <variablelist>
               <varlistentry>
                 <term>barrier</term>
+
                 <listitem>
-                  <para>                  The first requirs that the driver of the
-                  backing storage device support barriers (called 'tagged command queuing' in
-                  SCSI and 'native command queuing' in SATA speak). The use of this
-                  method can be disabled by the we <option>--no-disk-barrier</option> option.
-	        </para>
+                  <para>The first requires that the driver of the backing storage device support
+                  barriers (called 'tagged command queuing' in SCSI and 'native command queuing'
+                  in SATA speak). The use of this method can be disabled by setting the
+                  <option>disk-barrier</option> options to <option>no</option>.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>flush</term>
+
                 <listitem>
-                  <para>                  The second requires that the backing device support disk flushes (called
-                  'force unit access' in the drive vendors speak). The use of this method
-                  can be disabled using the <option>--no-disk-flushes</option> option.
-	        </para>
+                  <para>The second requires that the backing device support disk flushes (called
+                  'force unit access' in the drive vendors speak). The use of this method can be
+                  disabled setting <option>disk-flushes</option> to <option>no</option>.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>drain</term>
+
                 <listitem>
-                  <para>                  The third method is simply to let write requests drain before
-                  write requests of a new reordering domain are issued. That was the
-                  only implementation before 8.0.9. You can prevent to use of this
-                  method by using the <option>--no-disk-drain</option> option.
-	        </para>
+                  <para>The third method is simply to let write requests drain before write
+                  requests of a new reordering domain are issued. That was the only implementation
+                  before 8.0.9.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
                 <term>none</term>
+
                 <listitem>
-                  <para>                  The fourth method is to not express write-after-write dependencies to
-                  the backing store at all.
-	        </para>
+                  <para>The fourth method is to not express write-after-write dependencies to
+		  the backing store at all, by also specifying <option>--no-disk-drain</option>.
+		  This <emphasis>is dangerous</emphasis>
+		  on most IO stacks, may result in write-reordering, and if so,
+		  can theoretically be the reason for data corruption, or disturb
+		  the DRBD protocol, causing spurious disconnect/reconnect cycles.
+		  <emphasis>Do not use</emphasis> <option>--no-disk-drain</option>.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-m</option>,
-	  <option>--no-md-flushes</option></term>
+          <term><option>--md-flushes</option></term>
+
           <listitem>
-            <para>		  Disables the use of disk flushes and barrier BIOs when
-		  accessing the meta data device. See the notes
-		  on <option>--no-disk-flushes</option>.
-		</para>
+            <para>Disables the use of disk flushes and barrier BIOs when accessing the meta data
+            device. See the notes on <option>--disk-flushes</option>.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-s</option>,
-	<option>--max-bio-bvecs</option></term>
-          <listitem>
-            <para>      In some special circumstances the device mapper stack manages to
-      pass BIOs to DRBD that violate the constraints that are set forth
-      by DRBD's merge_bvec() function and which have more than one bvec.
-      A known example is:
-      phys-disk -&gt; DRBD -&gt; LVM -&gt; Xen -&gt; missaligned partition (63) -&gt; DomU FS.
-      Then you might see "bio would need to, but cannot, be split:" in
-      the Dom0's kernel log. </para>
-            <para>      The best workaround is to proper align the partition within
-      the VM (E.g. start it at sector 1024). Costs 480 KiByte of storage.
-      Unfortunately the default of most Linux partitioning tools is
-      to start the first partition at an odd number (63). Therefore
-      most distribution's install helpers for virtual linux machines will
-      end up with missaligned partitions.
-      The second best workaround is to limit DRBD's max bvecs per BIO
-      (= max-bio-bvecs) to 1. Might cost performance.</para>
-            <para>      The default value of <option>max-bio-bvecs</option> is 0, which means that
-      there is no user imposed limitation.
-    </para>
+          <term><option>--max-bio-bvecs</option></term>
+
+          <listitem>
+            <para>In some special circumstances the device mapper stack manages to pass BIOs to
+            DRBD that violate the constraints that are set forth by DRBD's merge_bvec() function
+            and which have more than one bvec. A known example is: phys-disk -&gt; DRBD -&gt; LVM
+            -&gt; Xen -&gt; missaligned partition (63) -&gt; DomU FS. Then you might see "bio
+            would need to, but cannot, be split:" in the Dom0's kernel log.</para>
+
+            <para>The best workaround is to proper align the partition within the VM (E.g. start
+            it at sector 1024). That costs 480 KiB of storage. Unfortunately the default of most
+            Linux partitioning tools is to start the first partition at an odd number (63).
+            Therefore most distributions install helpers for virtual linux machines will end up
+            with missaligned partitions. The second best workaround is to limit DRBD's max bvecs
+            per BIO (i.e., the <option>max-bio-bvecs</option> option) to 1, but that might cost
+            performance.</para>
+
+            <para>The default value of <option>max-bio-bvecs</option> is 0, which means that there
+            is no user imposed limitation.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>--resync-rate
+          <replaceable>rate</replaceable></option></term>
+
+          <listitem>
+            <para>To ensure smooth operation of the application on top of DRBD, it is possible to
+            limit the bandwidth that may be used by background synchronization. The default is 250
+            KiB/sec, the default unit is KiB/sec.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>--resync-after
+          <replaceable>minor</replaceable></option></term>
+
+          <listitem>
+            <para>Start resync on this device only if the device with
+            <replaceable>minor</replaceable> is already in connected state. Otherwise this device
+            waits in SyncPause state.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>--al-extents
+          <replaceable>extents</replaceable></option></term>
+
+          <listitem>
+            <para>DRBD automatically performs hot area detection. With this parameter you control
+            how big the hot area (=active set) can get. Each extent marks 4M of the backing
+            storage. In case a primary node leaves the cluster unexpectedly, the areas covered by
+            the active set must be resynced upon rejoining of the failed node. The data structure
+            is stored in the meta-data area, therefore each change of the active set is a write
+            operation to the meta-data device. A higher number of extents gives longer resync
+            times but less updates to the meta-data. The default number of
+            <replaceable>extents</replaceable> is 127. (Minimum: 7, Maximum: 3843)</para>
           </listitem>
         </varlistentry>
+
+        <varlistentry>
+          <term><option>--c-plan-ahead
+          <replaceable>plan_time</replaceable></option></term>
+
+          <term><option>--c-fill-target
+          <replaceable>fill_target</replaceable></option></term>
+
+          <term><option>--c-delay-target
+          <replaceable>delay_target</replaceable></option></term>
+
+          <term><option>--c-max-rate
+          <replaceable>max_rate</replaceable></option></term>
+
+          <listitem>
+            <para>The dynamic resync speed controller gets enabled with setting
+            <replaceable>plan_time</replaceable> to a positive value. It aims to fill the buffers
+            along the data path with either a constant amount of data
+            <replaceable>fill_target</replaceable>, or aims to have a constant delay time of
+            <replaceable>delay_target</replaceable> along the path. The controller has an upper
+            bound of <replaceable>max_rate</replaceable>.</para>
+
+            <para>By <replaceable>plan_time</replaceable> the agility of the controller is
+            configured. Higher values yield for slower/lower responses of the controller to
+            deviation from the target value. It should be at least 5 times RTT. For regular data
+            paths a <replaceable>fill_target</replaceable> in the area of 4k to 100k is
+            appropriate. For a setup that contains drbd-proxy it is advisable to use
+            <replaceable>delay_target</replaceable> instead. Only when
+            <replaceable>fill_target</replaceable> is set to 0 the controller will use
+            <replaceable>delay_target</replaceable>. 5 times RTT is a reasonable starting value.
+            <replaceable>Max_rate</replaceable> should be set to the bandwidth available between
+            the DRBD-hosts and the machines hosting DRBD-proxy, or to the available
+            disk-bandwidth.</para>
+
+            <para>The default value of <replaceable>plan_time</replaceable> is 0, the default unit
+            is 0.1 seconds. <replaceable>Fill_target</replaceable> has 0 and sectors as default
+            unit. <replaceable>Delay_target</replaceable> has 1 (100ms) and 0.1 as default unit.
+            <replaceable>Max_rate</replaceable> has 10240 (100MiB/s) and KiB/s as default
+            unit.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>--c-min-rate
+          <replaceable>min_rate</replaceable></option></term>
+
+          <listitem>
+            <para>We track the disk IO rate caused by the resync, so we can detect non-resync IO
+            on the lower level device. If the lower level device seems to be busy, and the current
+            resync rate is above <replaceable>min_rate</replaceable>, we throttle the
+            resync.</para>
+
+            <para>The default value of <replaceable>min_rate</replaceable> is 4M, the default unit
+            is k. If you want to not throttle at all, set it to zero, if you want to throttle
+            always, set it to one.</para>
+          </listitem>
+        </varlistentry>
+        <varlistentry>
+          <term><option>-t</option>,
+	  <option>--disk-timeout <replaceable>disk_timeout</replaceable></option></term>
+          <listitem>
+            <para>	      If the driver of the <replaceable>lower_device</replaceable>
+	      does not finish an IO request within <replaceable>disk_timeout</replaceable>,
+	      DRBD considers the disk as failed. If DRBD is connected to a remote host,
+	      it will reissue local pending IO requests to the peer, and ship all new
+	      IO requests to the peer only. The disk state advances to diskless, as soon
+	      as the backing block device has finished all IO requests.</para>
+	      <para>      The default value of is 0, which means that no timeout is enforced.
+	      The default unit is 100ms. This option is available since 8.3.12.
+	      </para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term>
+            <option>--read-balancing <replaceable>method</replaceable></option>
+          </term>
+          <listitem>
+	    <para>
+	      The supported <replaceable>methods</replaceable> for load balancing of
+	      read requests are <option>prefer-local</option>, <option>prefer-remote</option>,
+	      <option>round-robin</option>, <option>least-pending</option> and
+	      <option>when-congested-remote</option>, <option>32K-striping</option>,
+	      <option>64K-striping</option>, <option>128K-striping</option>,
+	      <option>256K-striping</option>, <option>512K-striping</option>
+	      and <option>1M-striping</option>.</para>
+	      <para> The default value of is <option>prefer-local</option>.
+	      This option is available since 8.4.1.
+	      </para>
+          </listitem>
+        </varlistentry>
+
       </variablelist>
     </refsect2>
+
     <refsect2>
-      <title>net</title>
+      <title>connect, net-options</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>net</secondary>
       </indexterm>
-      <para>	Sets up the <replaceable>device</replaceable> to listen on
-	<replaceable>af:local_addr:port</replaceable> for incoming connections
-	and to try to connect to <replaceable>af:remote_addr:port</replaceable>.
-	If <replaceable>port</replaceable> is omitted, 7788 is used as default.
-        If <replaceable>af</replaceable> is omitted <option>ipv4</option> gets
-        used. Other supported address families are <option>ipv6</option>,
-        <option>ssocks</option> for Dolphin Interconnect Solutions' "super sockets"
-        and <option>sdp</option> for Sockets Direct Protocol (Infiniband).
-      </para>
-      <para>	On the TCP/IP link the specified <replaceable>protocol</replaceable>
-	is used. Valid protocol specifiers are A, B, and C.</para>
-      <para>Protocol A: write IO is reported as completed, if it has reached
-	local disk and local TCP send buffer.</para>
-      <para>Protocol B: write IO is reported as completed, if it has reached
-	local disk and remote buffer cache.</para>
-      <para>Protocol C: write IO is reported as completed, if it has
-	reached both local and remote disk.</para>
+
+      <para>Connect sets up the <replaceable>device</replaceable> to listen on
+      <replaceable>af:local_addr:port</replaceable> for incoming connections and to try to connect
+      to <replaceable>af:remote_addr:port</replaceable>. If <replaceable>port</replaceable> is
+      omitted, 7788 is used as default. If <replaceable>af</replaceable> is omitted
+      <option>ipv4</option> gets used. Other supported address families are <option>ipv6</option>,
+      <option>ssocks</option> for Dolphin Interconnect Solutions' "super sockets" and
+      <option>sdp</option> for Sockets Direct Protocol (Infiniband).</para>
+
+      <para>The net-options command allows you to change options while the connection is
+      established.</para>
+
       <variablelist>
         <varlistentry>
-          <term><option>-c</option>,
-	    <option>--connect-int <replaceable>time</replaceable></option></term>
+	  <term><option>--protocol
+	  <replaceable>protocol</replaceable></option></term>
+
           <listitem>
-            <para>		In case it is not possible to connect to the remote DRBD
-		device immediately, DRBD keeps on trying to connect. With
-		this option you can set the time between two tries. The
-		default value is 10 seconds, the unit is 1 second.
-		</para>
+            <para>On the TCP/IP link the specified <replaceable>protocol</replaceable> is used.
+            Valid protocol specifiers are A, B, and C.</para>
+
+            <para>Protocol A: write IO is reported as completed, if it has reached local disk and
+            local TCP send buffer.</para>
+
+            <para>Protocol B: write IO is reported as completed, if it has reached local disk and
+            remote buffer cache.</para>
+
+            <para>Protocol C: write IO is reported as completed, if it has reached both local and
+            remote disk.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-i</option>,
-	    <option>--ping-int <replaceable>time</replaceable></option></term>
+          <term><option>--connect-int
+          <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para>	      If the TCP/IP connection linking a DRBD device pair is idle
-	      for more than <replaceable>time</replaceable> seconds, DRBD
-	      will generate a keep-alive packet to check if its partner is
-	      still alive. The default value is 10 seconds, the unit is 1 second.
-	      </para>
+            <para>In case it is not possible to connect to the remote DRBD device immediately,
+            DRBD keeps on trying to connect. With this option you can set the time between two
+            retries. The default value is 10. The unit is seconds.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-t</option>,
-	  <option>--timeout <replaceable>val</replaceable></option></term>
+          <term><option>--ping-int
+          <replaceable>time</replaceable></option></term>
+
           <listitem>
-            <para>	      If the partner node fails to send an expected response packet
-	      within <replaceable>val</replaceable>
-	      10<superscript>ths</superscript> of a second, the partner node
-	      is considered dead and therefore the TCP/IP connection is
-	      abandoned. The default value is 60 (= 6 seconds).
-	    </para>
+            <para>If the TCP/IP connection linking a DRBD device pair is idle for more than
+            <replaceable>time</replaceable> seconds, DRBD will generate a keep-alive packet to
+            check if its partner is still alive. The default value is 10. The unit is
+            seconds.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-S</option>,
-	  <option>--sndbuf-size <replaceable>size</replaceable></option></term>
+          <term><option>--timeout
+          <replaceable>val</replaceable></option></term>
+
           <listitem>
-            <para>	      The socket send buffer is used to store packets sent to the
-	      secondary node, which are not yet acknowledged (from a network
-	      point of view) by the secondary node. When using protocol A,
-	      it might be necessary to increase the size of this data
-	      structure in order to increase asynchronicity between primary
-	      and secondary nodes. But keep in mind that more asynchronicity
-	      is synonymous with more data loss in the case of a primary
-	      node failure. Since 8.0.13 resp. 8.2.7 setting the <replaceable>size</replaceable>
-              value to 0 means that the kernel should autotune this.
-              The default <replaceable>size</replaceable> is
-	      0, i.e. autotune.
-	    </para>
+            <para>If the partner node fails to send an expected response packet within
+            <replaceable>val</replaceable> tenths of a second, the partner node is considered dead
+            and therefore the TCP/IP connection is abandoned. The default value is 60 (= 6
+            seconds).</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-r</option>,
-	  <option>--rcvbuf-size <replaceable>size</replaceable></option></term>
+          <term><option>--sndbuf-size
+          <replaceable>size</replaceable></option></term>
+
           <listitem>
-            <para>	      Packets received from the network are stored in the socket receive
-              buffer first. From there they are consumed by DRBD. Before 8.3.2 the
-              receive buffer's size was always set to the size of the socket
-              send buffer. Since 8.3.2 they can be tuned independently.
-              A value of 0 means that the kernel should autotune this.
-              The default <replaceable>size</replaceable> is
-	      0, i.e. autotune.
-	    </para>
+            <para>The socket send buffer is used to store packets sent to the secondary node,
+            which are not yet acknowledged (from a network point of view) by the secondary node.
+            When using protocol A, it might be necessary to increase the size of this data
+            structure in order to increase asynchronicity between primary and secondary nodes. But
+            keep in mind that more asynchronicity is synonymous with more data loss in the case of
+            a primary node failure. Since 8.0.13 resp. 8.2.7 setting the
+            <replaceable>size</replaceable> value to 0 means that the kernel should autotune this.
+            The default <replaceable>size</replaceable> is 0, i.e. autotune.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-k</option>,
-	  <option>--ko-count <replaceable>count</replaceable></option></term>
+          <term><option>--rcvbuf-size
+          <replaceable>size</replaceable></option></term>
+
           <listitem>
-            <para>	      In case the secondary node fails to complete a single write
-	      request for <replaceable>count</replaceable> times the
-	      <replaceable>timeout</replaceable>, it is expelled from the
-	      cluster. (I.e. the primary node goes into StandAlone mode.)
-	      The default is 0, which disables this feature.
-	      </para>
+            <para>Packets received from the network are stored in the socket receive buffer first.
+            From there they are consumed by DRBD. Before 8.3.2 the receive buffer's size was
+            always set to the size of the socket send buffer. Since 8.3.2 they can be tuned
+            independently. A value of 0 means that the kernel should autotune this. The default
+            <replaceable>size</replaceable> is 0, i.e. autotune.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-e</option>, <option>--max-epoch-size
-	  <replaceable>val</replaceable></option></term>
+          <term><option>--ko-count
+          <replaceable>count</replaceable></option></term>
+
           <listitem>
-            <para>	      With this option the maximal number of write requests between
-	      two barriers is limited. Should be set to the same as
-	      <option>--max-buffers </option>. Values smaller than 100 can
-	      lead to degraded performance. The default value is 2048.
-	      </para>
+            <para>In case the secondary node fails to complete a single write request for
+            <replaceable>count</replaceable> times the <replaceable>timeout</replaceable>, it is
+            expelled from the cluster, i.e. the primary node goes into StandAlone mode. The
+            default is 0, which disables this feature.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-b</option>,
-	  <option>--max-buffers <replaceable>val</replaceable></option></term>
+          <term><option>--max-epoch-size
+          <replaceable>val</replaceable></option></term>
+
           <listitem>
-            <para>	      With this option the maximal number of buffer pages allocated
-	      by DRBD's receiver thread is limited. Should be set to the
-	      same as <option>--max-epoch-size </option>. Small values
-	      could lead to degraded performance. (Minimum 32) The default value is
-	      2048.
-	    </para>
+            <para>With this option the maximal number of write requests between two barriers is
+            limited. Should be set to the same as <option>--max-buffers</option>. Values smaller
+            than 10 can lead to degraded performance. The default value is 2048.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-u</option>,
-	  <option>--unplug-watermark <replaceable>val</replaceable></option></term>
+          <term><option>--max-buffers
+          <replaceable>val</replaceable></option></term>
+
           <listitem>
-            <para>	      When the number of pending write requests on the standby
-	      (secondary) node exceeds the unplug-watermark, we trigger
-	      the request processing of our backing storage device.
-	      Some storage controllers deliver better performance with small
-	      values, others deliver best performance when the value is set to
-	      the same value as max-buffers. Minimum 16, default 128, maximum
-	      131072.
-	    </para>
+            <para>With this option the maximal number of buffer pages allocated by DRBD's receiver
+            thread is limited. Should be set to the same as <option>--max-epoch-size</option>.
+            Small values could lead to degraded performance. The default value is 2048, the
+            minimum 32.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-m</option>,
-	  <option>--allow-two-primaries </option></term>
+          <term><option>--unplug-watermark
+          <replaceable>val</replaceable></option></term>
+
           <listitem>
-            <para>	      With this option set you may assign primary role to both nodes. You
-	      only should use this option if you use a shared storage
-	      file system on top of DRBD. At the time of writing the only
-	      ones are: OCFS2 and GFS. If you use this option with any
-	      other file system, you are going to crash your nodes and to
-	      corrupt your data!
-	    </para>
+            <para>When the number of pending write requests on the standby (secondary) node
+            exceeds the unplug-watermark, we trigger the request processing of our backing storage
+            device. Some storage controllers deliver better performance with small values, others
+            deliver best performance when the value is set to the same value as max-buffers.
+            Minimum 16, default 128, maximum 131072.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-a</option>,
-	  <option>--cram-hmac-alg </option><replaceable>alg</replaceable></term>
+          <term><option>--allow-two-primaries </option></term>
+
           <listitem>
-            <para>	      You need to specify the HMAC algorithm to enable peer
-	      authentication at all. You are strongly encouraged to use
-	      peer authentication.
-	      The HMAC algorithm will be used for the challenge
-	      response authentication of the peer. You may specify any
-	      digest algorithm that is named in /proc/crypto.
-	    </para>
+            <para>With this option set you may assign primary role to both nodes. You only should
+            use this option if you use a shared storage file system on top of DRBD. At the time of
+            writing the only ones are: OCFS2 and GFS. If you use this option with any other file
+            system, you are going to crash your nodes and to corrupt your data!</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-x</option>,
-	  <option>--shared-secret </option><replaceable>secret</replaceable></term>
+          <term><option>--cram-hmac-alg
+          <replaceable>alg</replaceable></option></term>
+
           <listitem>
-            <para>	      The shared secret used in peer authentication. May be up to
-	      64 characters.
-	    </para>
+            <para>You need to specify the HMAC algorithm to enable peer authentication at all. You
+            are strongly encouraged to use peer authentication. The HMAC algorithm will be used
+            for the challenge response authentication of the peer. You may specify any digest
+            algorithm that is named in /proc/crypto.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-A</option>,
-	  <option>--after-sb-0pri </option><replaceable>asb-0p-policy</replaceable></term>
+          <term><option>--shared-secret
+          <replaceable>secret</replaceable></option></term>
+
           <listitem>
-            <para>	    possible policies are:
-	    </para>
+            <para>The shared secret used in peer authentication. May be up to 64
+            characters.</para>
+          </listitem>
+        </varlistentry>
+
+        <varlistentry>
+          <term><option>--after-sb-0pri
+          <replaceable>asb-0p-policy</replaceable></option></term>
+
+          <listitem>
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-younger-primary</option>
-                </term>
+                <term><option>discard-younger-primary</option></term>
+
                 <listitem>
-                  <para>		  Auto sync from the node that was primary before the split-brain situation occurred.
-		</para>
+                  <para>Auto sync from the node that was primary before the split-brain situation
+                  occurred.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-older-primary</option>
-                </term>
+                <term><option>discard-older-primary</option></term>
+
                 <listitem>
-                  <para>		Auto sync from the node that became primary as second during
-		the split-brain situation.
-		</para>
+                  <para>Auto sync from the node that became primary as second during the
+                  split-brain situation.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-zero-changes</option>
-                </term>
+                <term><option>discard-zero-changes</option></term>
+
                 <listitem>
-                  <para>		In case one node did not write anything since the split
-		brain became evident, sync from the node that wrote something
-		to the node that did not write anything. In case none wrote
-		anything this policy uses a random decision to perform
-		a "resync" of 0 blocks. In case both have written something
-		this policy disconnects the nodes.
-		</para>
+                  <para>In case one node did not write anything since the split brain became
+                  evident, sync from the node that wrote something to the node that did not write
+                  anything. In case none wrote anything this policy uses a random decision to
+                  perform a "resync" of 0 blocks. In case both have written something this policy
+                  disconnects the nodes.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-least-changes</option>
-                </term>
+                <term><option>discard-least-changes</option></term>
+
                 <listitem>
-                  <para>		Auto sync from the node that touched more blocks during the
-		split brain situation.
-		</para>
+                  <para>Auto sync from the node that touched more blocks during the split brain
+                  situation.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-node-NODENAME</option>
-                </term>
+                <term><option>discard-node-NODENAME</option></term>
+
                 <listitem>
-                  <para>		Auto sync to the named node.
-		</para>
+                  <para>Auto sync to the named node.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-B</option>,
-	  <option>--after-sb-1pri </option><replaceable>asb-1p-policy</replaceable></term>
+          <term><option>--after-sb-1pri
+          <replaceable>asb-1p-policy</replaceable></option></term>
+
           <listitem>
-            <para>	    possible policies are:
-	    </para>
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>consensus</option>
-                </term>
+                <term><option>consensus</option></term>
+
                 <listitem>
-                  <para>		  Discard the version of the secondary if the outcome
-                  of the <option>after-sb-0pri</option> algorithm would also
-		  destroy the current secondary's data. Otherwise disconnect.
-		</para>
+                  <para>Discard the version of the secondary if the outcome of the
+                  <option>after-sb-0pri</option> algorithm would also destroy the current
+                  secondary's data. Otherwise disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>discard-secondary</option>
-                </term>
+                <term><option>discard-secondary</option></term>
+
                 <listitem>
-                  <para>		  Discard the secondary's version.
-		</para>
+                  <para>Discard the secondary's version.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>call-pri-lost-after-sb</option>
-                </term>
+                <term><option>call-pri-lost-after-sb</option></term>
+
                 <listitem>
-                  <para>		  Always honor the outcome of the <option>after-sb-0pri
-		  </option> algorithm. In case it decides the current
-		  secondary has the right data, call the
-		  <option>pri-lost-after-sb</option> on the current primary.
-		</para>
+                  <para>Always honor the outcome of the <option>after-sb-0pri </option> algorithm.
+                  In case it decides the current secondary has the correct data, call the
+                  <option>pri-lost-after-sb</option> on the current primary.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>violently-as0p</option>
-                </term>
+                <term><option>violently-as0p</option></term>
+
                 <listitem>
-                  <para>		  Always honor the outcome of the <option>after-sb-0pri
-		  </option> algorithm. In case it decides the current
-		  secondary has the right data, accept a possible instantaneous
-		  change of the primary's data.
-		</para>
+                  <para>Always honor the outcome of the <option>after-sb-0pri </option> algorithm.
+                  In case it decides the current secondary has the correct data, accept a possible
+                  instantaneous change of the primary's data.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-C</option>,
-	  <option>--after-sb-2pri </option><replaceable>asb-2p-policy</replaceable></term>
+          <term><option>--after-sb-2pri
+          <replaceable>asb-2p-policy</replaceable></option></term>
+
           <listitem>
-            <para>	    possible policies are:
-	    </para>
+            <para>possible policies are:</para>
+
             <variablelist>
               <varlistentry>
-                <term>
-                  <option>disconnect</option>
-                </term>
+                <term><option>disconnect</option></term>
+
                 <listitem>
-                  <para>		  No automatic resynchronization, simply disconnect.
-		</para>
+                  <para>No automatic resynchronization, simply disconnect.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>call-pri-lost-after-sb</option>
-                </term>
+                <term><option>call-pri-lost-after-sb</option></term>
+
                 <listitem>
-                  <para>		  Always honor the outcome of the <option>after-sb-0pri
-		  </option> algorithm. In case it decides the current
-		  secondary has the right data, call the
-		  <option>pri-lost-after-sb</option> on the current primary.
-		</para>
+                  <para>Always honor the outcome of the <option>after-sb-0pri </option> algorithm.
+                  In case it decides the current secondary has the right data, call the
+                  <option>pri-lost-after-sb</option> on the current primary.</para>
                 </listitem>
               </varlistentry>
+
               <varlistentry>
-                <term>
-                  <option>violently-as0p</option>
-                </term>
+                <term><option>violently-as0p</option></term>
+
                 <listitem>
-                  <para>		  Always honor the outcome of the <option>after-sb-0pri
-		  </option> algorithm. In case it decides the current
-		  secondary has the right data, accept a possible instantaneous
-		  change of the primary's data.
-		</para>
+                  <para>Always honor the outcome of the <option>after-sb-0pri </option> algorithm.
+                  In case it decides the current secondary has the right data, accept a possible
+                  instantaneous change of the primary's data.</para>
                 </listitem>
               </varlistentry>
             </variablelist>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-P</option>,
-	  <option>--always-asbp</option></term>
+          <term><option>--always-asbp</option></term>
+
           <listitem>
-            <para>	      Normally the automatic after-split-brain policies are only 
-	      used if current states of the UUIDs do not indicate the 
-	      presence of a third node.
-	    </para>
-            <para>	      With this option you request that the automatic 
-	      after-split-brain policies are used as long as the data
-	      sets of the nodes are somehow related. This might cause
-	      a full sync, if the UUIDs indicate the presence of a third
-	      node. (Or double faults have led to strange UUID sets.)
-	    </para>
+            <para>Normally the automatic after-split-brain policies are only used if current
+            states of the UUIDs do not indicate the presence of a third node.</para>
+
+            <para>With this option you request that the automatic after-split-brain policies are
+            used as long as the data sets of the nodes are somehow related. This might cause a
+            full sync, if the UUIDs indicate the presence of a third node. (Or double faults have
+            led to strange UUID sets.)</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-R</option>,
-	  <option>--rr-conflict </option><replaceable>role-resync-conflict-policy</replaceable></term>
+          <term><option>--rr-conflict
+          <replaceable>role-resync-conflict-policy</replaceable></option></term>
+
           <listitem>
-            <para>	      This option sets DRBD's behavior when DRBD deduces from its
-	      meta data that a resynchronization is needed, and the SyncTarget
-	      node is already primary. The possible settings are:
-	      <option>disconnect</option>,
-	      <option>call-pri-lost</option> and
-	      <option>violently</option>. While <option>disconnect</option>
-	      speaks for itself, with the <option>call-pri-lost</option>
-	      setting the <option>pri-lost</option> handler is called
-	      which is expected to either change the role of the node to
-	      secondary, or remove the node from the cluster.
-	      The default is <option>disconnect</option>.</para>
-            <para>	      With the <option>violently</option> setting you allow DRBD
-	      to force a primary node into SyncTarget state. This means
-	      that with that action the data exposed by DRBD change to
-	      the SyncSource's version of the data instantaneously.
-	      USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING.
-	    </para>
+            <para>This option sets DRBD's behavior when DRBD deduces from its meta data that a
+            resynchronization is needed, and the SyncTarget node is already primary. The possible
+            settings are: <option>disconnect</option>, <option>call-pri-lost</option> and
+            <option>violently</option>. While <option>disconnect</option> speaks for itself, with
+            the <option>call-pri-lost</option> setting the <option>pri-lost</option> handler is
+            called which is expected to either change the role of the node to secondary, or remove
+            the node from the cluster. The default is <option>disconnect</option>.</para>
+
+            <para>With the <option>violently</option> setting you allow DRBD to force a primary
+            node into SyncTarget state. This means that the data exposed by DRBD changes to the
+            SyncSource's version of the data instantaneously. USE THIS OPTION ONLY IF YOU KNOW
+            WHAT YOU ARE DOING.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-d</option>,
-	  <option>--data-integrity-alg </option><replaceable>hash_alg</replaceable></term>
+          <term><option>--data-integrity-alg
+          <replaceable>hash_alg</replaceable></option></term>
+
           <listitem>
-            <para>	      DRBD can ensure the data integrity of the user's data on the network
-	      by comparing hash values. Normally this is ensured by the 16 bit checksums
-	      in the headers of TCP/IP packets. This option
-	      can be set to any of the kernel's data digest algorithms.
-              In a typical kernel configuration you should have
-              at least one of <option>md5</option>, <option>sha1</option>, and <option>crc32c</option>
-              available. By default this is not enabled.
-	    </para>
+            <para>DRBD can ensure the data integrity of the user's data on the network by
+            comparing hash values. Normally this is ensured by the 16 bit checksums in the headers
+            of TCP/IP packets. This option can be set to any of the kernel's data digest
+            algorithms. In a typical kernel configuration you should have at least one of
+            <option>md5</option>, <option>sha1</option>, and <option>crc32c</option> available. By
+            default this is not enabled.</para>
+
             <para>See also the notes on data integrity on the drbd.conf manpage.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-o</option>,
-	  <option>--no-tcp-cork </option></term>
+          <term><option>--no-tcp-cork</option></term>
+
           <listitem>
-            <para>	      DRBD usually uses the TCP socket option TCP_CORK to hint to the network
-              stack when it can expect more data, and when it should flush out what it
-              has in its send queue. It turned out that there is at lease one network
-              stack that performs worse when one uses this hinting method. Therefore
-              we introducted this option, which disable the setting and clearing of
-              the TCP_CORK socket option by DRBD.
-            </para>
+            <para>DRBD usually uses the TCP socket option TCP_CORK to hint to the network stack
+            when it can expect more data, and when it should flush out what it has in its send
+            queue. There is at least one network stack that performs worse when one uses this
+            hinting method. Therefore we introduced this option, which disable the setting and
+            clearing of the TCP_CORK socket option by DRBD.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-p</option>,
-	  <option>--ping-timeout </option><replaceable>ping_timeout</replaceable></term>
+          <term><option>--ping-timeout
+          <replaceable>ping_timeout</replaceable></option></term>
+
           <listitem>
-            <para>	      The time the peer has to answer to a keep-alive packet. In case the peer's reply is not received within this
-	      time period, it is considered as dead. The default value is 500ms,
-	      the default unit is 100ms.
-	    </para>
+            <para>The time the peer has to answer to a keep-alive packet. In case the peer's reply
+            is not received within this time period, it is considered dead. The default unit is
+            tenths of a second, the default value is 5 (for half a second).</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-D</option>,
-	  <option>--discard-my-data </option></term>
+          <term><option>--discard-my-data</option></term>
+
           <listitem>
-            <para>	      Use this option to manually recover from a split-brain
-	      situation. In case you do not have any automatic after-split-brain policies selected, the nodes refuse to
-	      connect. By passing this option you make a node to
-	      sync target immediately after successful connect.
-	    </para>
+            <para>Use this option to manually recover from a split-brain situation. In case you do
+            not have any automatic after-split-brain policies selected, the nodes refuse to
+            connect. By passing this option you make this node a sync target immediately after
+            successful connect.</para>
           </listitem>
         </varlistentry>
-      </variablelist>
-    </refsect2>
-    <refsect2>
-      <title>syncer</title>
-      <indexterm significance="normal">
-        <primary>drbdsetup</primary>
-        <secondary>syncer</secondary>
-      </indexterm>
-      <para>	Changes the synchronization daemon parameters of
-	<replaceable>device</replaceable> at runtime.
-      </para>
-      <variablelist>
+
         <varlistentry>
-          <term><option>-r</option>,
-	  <option>--rate <replaceable>rate</replaceable></option></term>
+          <term><option>--tentative</option></term>
+
           <listitem>
-            <para>	      To ensure smooth operation of the application on top of DRBD,
-	      it is possible to limit the bandwidth that  may be used by
-	      background synchronization. The default is 250 KB/sec, the
-	      default unit is KB/sec.
-	    </para>
+            <para>Causes DRBD to abort the connection process after the resync handshake, i.e. no
+            resync gets performed. You can find out which resync DRBD would perform by looking at
+            the kernel's log file.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-a</option>,
-	  <option>--after <replaceable>minor</replaceable></option></term>
+	  <term><option>--on-congestion
+	  <replaceable>congestion_policy</replaceable></option></term>
+
+	  <term><option>--congestion-fill
+	  <replaceable>fill_threshold</replaceable></option></term>
+
+	  <term><option>--congestion-extents
+	  <replaceable>active_extents_threshold</replaceable></option></term>
+
           <listitem>
-            <para>	      Start resync on this device only if the device with
-	      <replaceable>minor</replaceable> is already in connected
-	      state. Otherwise this device waits in SyncPause state.
-	  </para>
+            <para>By default DRBD blocks when the available TCP send queue becomes full. That
+            means it will slow down the application that generates the write requests that cause
+            DRBD to send more data down that TCP connection.</para>
+
+            <para>When DRBD is deployed with DRBD-proxy it might be more desirable that DRBD goes
+            into AHEAD/BEHIND mode shortly before the send queue becomes full. In AHEAD/BEHIND
+            mode DRBD does no longer replicate data, but still keeps the connection open.</para>
+
+            <para>The advantage of the AHEAD/BEHIND mode is that the application is not slowed
+            down, even if DRBD-proxy's buffer is not sufficient to buffer all write requests. The
+            downside is that the peer node falls behind, and that a resync will be necessary to
+            bring it back into sync. During that resync the peer node will have an inconsistent
+            disk.</para>
+
+            <para>Available <replaceable>congestion_policy</replaceable>s are
+            <option>block</option> and <option>pull-ahead</option>. The default is
+            <option>block</option>. <replaceable>Fill_threshold</replaceable> might be in the
+            range of 0 to 10GiBytes. The default is 0 which disables the check.
+            <replaceable>Active_extents_threshold</replaceable> has the same limits as
+            <option>al-extents</option>.</para>
+
+            <para>The AHEAD/BEHIND mode and its settings are available since DRBD 8.3.10.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-e</option>,
-	  <option>--al-extents <replaceable>extents</replaceable></option></term>
-          <listitem>
-            <para>	      DRBD automatically performs hot area detection. With this
-	      parameter you control how big the hot area (=active set) can
-	      get. Each extent marks 4M of the backing storage. In case a
-	      primary node leaves the cluster unexpectedly, the areas covered
-	      by the active set must be resynced upon rejoining of the failed
-	      node. The data structure is stored in the meta-data area,
-	      therefore each change of the active set is a write operation
-	      to the meta-data device. A higher number of extents gives
-	      longer resync times but less updates to the meta-data. The
-	      default number of <replaceable>extents</replaceable> is
-	      127. (Minimum: 7, Maximum: 3843)
-	    </para>
+          <term><option>--verify-alg
+          <replaceable>hash-alg</replaceable></option></term>
+
+          <listitem>
+            <para>During online verification (as initiated by the <command
+            moreinfo="none">verify</command> sub-command), rather than doing a bit-wise
+            comparison, DRBD applies a hash function to the contents of every block being
+            verified, and compares that hash with the peer. This option defines the hash algorithm
+            being used for that purpose. It can be set to any of the kernel's data digest
+            algorithms. In a typical kernel configuration you should have at least one of
+            <option>md5</option>, <option>sha1</option>, and <option>crc32c</option> available. By
+            default this is not enabled; you must set this option explicitly in order to be able
+            to use on-line device verification.</para>
+
+            <para>See also the notes on data integrity on the drbd.conf manpage.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-v</option>,
-	  <option>--verify-alg <replaceable>hash-alg</replaceable></option></term>
+          <term><option>--csums-alg
+          <replaceable>hash-alg</replaceable></option></term>
+
           <listitem>
-            <para>              During online verification (as initiated by the 
-              <command moreinfo="none">verify</command> sub-command),
-              rather than doing a bit-wise comparison, DRBD applies a hash function 
-              to the contents of every block being verified, and compares that
-              hash with the peer. This option defines the hash algorithm being
-              used for that purpose. It can be set to any of the kernel's data 
-              digest algorithms. In a typical kernel configuration you should have
-              at least one of <option>md5</option>, <option>sha1</option>, and <option>crc32c</option>
-              available. By default this is not enabled; you must set this
-              option explicitly in order to be able to use on-line device verification.
-	    </para>
-            <para>See also the notes on data integrity on the drbd.conf manpage.</para>
+            <para>A resync process sends all marked data blocks form the source to the destination
+            node, as long as no <option>csums-alg</option> is given. When one is specified the
+            resync process exchanges hash values of all marked blocks first, and sends only those
+            data blocks over, that have different hash values.</para>
+
+            <para>This setting is useful for DRBD setups with low bandwidth links. During the
+            restart of a crashed primary node, all blocks covered by the activity log are marked
+            for resync. But a large part of those will actually be still in sync, therefore using
+            <option>csums-alg</option> will lower the required bandwidth in exchange for CPU
+            cycles.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-c</option>,
-	  <option>--cpu-mask <replaceable>cpu-mask</replaceable></option></term>
+          <term><option>--use-rle</option></term>
+
           <listitem>
-            <para>	      Sets the cpu-affinity-mask for DRBD's kernel threads of this
-	      device. The default value of <replaceable>cpu-mask</replaceable> is
-	      0, which means that DRBD's kernel threads should be spread over
-	      all CPUs of the machine. This value must be given in hexadecimal
-              notation. If it is too big it will be truncated.
-	    </para>
+            <para>During resync-handshake, the dirty-bitmaps of the nodes are exchanged and merged
+            (using bit-or), so the nodes will have the same understanding of which blocks are
+            dirty. On large devices, the fine grained dirty-bitmap can become large as well, and
+            the bitmap exchange can take quite some time on low-bandwidth links.</para>
+
+            <para>Because the bitmap typically contains compact areas where all bits are unset
+            (clean) or set (dirty), a simple run-length encoding scheme can considerably reduce
+            the network traffic necessary for the bitmap exchange.</para>
+
+            <para>For backward compatibility reasons, and because on fast links this possibly does
+            not improve transfer time but consumes cpu cycles, this defaults to off.</para>
+
+            <para>Introduced in 8.3.2.</para>
           </listitem>
         </varlistentry>
+      </variablelist>
+    </refsect2>
+
+    <refsect2>
+      <title>resource-options</title>
+
+      <indexterm significance="normal">
+        <primary>drbdsetup</primary>
+
+        <secondary>resource-options</secondary>
+      </indexterm>
+
+      <para>Changes the options of the resource at runtime.</para>
+
+      <variablelist>
         <varlistentry>
-          <term><option>-C</option>,
-	  <option>--csums-alg <replaceable>hash-alg</replaceable></option></term>
+          <term><option>--cpu-mask
+          <replaceable>cpu-mask</replaceable></option></term>
+
           <listitem>
-            <para>             A resync process sends all marked data blocks form the source to
-             the destination node, as long as no <option>csums-alg</option> is
-             given. When one is specified the resync process exchanges hash values of all
-             marked blocks first, and sends only those data blocks over, that have different
-             hash values.</para>
-            <para>This setting is useful for DRBD setups with low bandwidth links.
-             During the restart of a crashed primary node, all blocks covered by the
-             activity log are marked for resync. But a large part of those will actually
-             be still in sync, therefore using <option>csums-alg</option> will lower
-             the required bandwidth in exchange for CPU cycles.
-	    </para>
+            <para>Sets the cpu-affinity-mask for DRBD's kernel threads of this device. The default
+            value of <replaceable>cpu-mask</replaceable> is 0, which means that DRBD's kernel
+            threads should be spread over all CPUs of the machine. This value must be given in
+            hexadecimal notation. If it is too big it will be truncated.</para>
           </listitem>
         </varlistentry>
+
         <varlistentry>
-          <term><option>-R</option>,
-	  <option>--use-rle</option></term>
+          <term><option>--on-no-data-accessible
+          <replaceable>ond-policy</replaceable></option></term>
+
           <listitem>
-            <para>              During resync-handshake, the dirty-bitmaps of the nodes are
-              exchanged and merged (using bit-or), so the nodes will have the
-              same understanding of which blocks are dirty. On large devices,
-              the fine grained dirty-bitmap can become large as well, and the
-              bitmap exchange can take quite some time on low-bandwidth links.
-	    </para>
-            <para>              Because the bitmap typically contains compact areas where all
-              bits are unset (clean) or set (dirty), a simple run-length
-              encoding scheme can considerably reduce the network traffic
-              necessary for the bitmap exchange.
-	    </para>
-            <para>              For backward compatibilty reasons, and because on fast links this
-              possibly does not improve transfer time but consumes cpu cycles,
-              this defaults to off.
-	    </para>
-            <para>              Introduced in 8.3.2.
-	    </para>
+            <para>This setting controls what happens to IO requests on a degraded, disk less node
+            (I.e. no data store is reachable). The available policies are
+            <option>io-error</option> and <option>suspend-io</option>.</para>
+
+            <para>If <replaceable>ond-policy</replaceable> is set to <option>suspend-io</option>
+            you can either resume IO by attaching/connecting the last lost data storage, or by the
+            <command moreinfo="none">drbdadm resume-io <replaceable>res</replaceable></command>
+            command. The latter will result in IO errors of course.</para>
+
+            <para>The default is <option>io-error</option>. This setting is available since DRBD
+            8.3.9.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>primary</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>primary</secondary>
       </indexterm>
-      <para>	Sets the <replaceable>device</replaceable> into primary role. This
-	means that applications (e.g. a file system) may open the
-	<replaceable>device</replaceable> for read and write access. Data
-	written to the <replaceable>device</replaceable> in primary role are
-	mirrored to the device in secondary role.
-      </para>
-      <para>	Normally it is not possible to set both devices of a connected DRBD device
-	pair to primary role. By using the <option>--allow-two-primaries</option>
-	option, you override this behavior and instruct DRBD to allow two
-	primaries.
-      </para>
+
+      <para>Sets the <replaceable>device</replaceable> into primary role. This means that
+      applications (e.g. a file system) may open the <replaceable>device</replaceable> for read
+      and write access. Data written to the <replaceable>device</replaceable> in primary role are
+      mirrored to the device in secondary role.</para>
+
+      <para>Normally it is not possible to set both devices of a connected DRBD device pair to
+      primary role. By using the <option>--allow-two-primaries</option> option, you override this
+      behavior and instruct DRBD to allow two primaries.</para>
+
       <variablelist>
         <varlistentry>
-          <term><option>-o</option>,
-	  <option>--overwrite-data-of-peer</option></term>
+          <term><option>--overwrite-data-of-peer</option></term>
+
           <listitem>
-            <para>	      Becoming primary fails if the local replica is
-	      inconsistent. By using this option you can force it into
-	      primary role anyway. USE THIS OPTION ONLY IF YOU KNOW WHAT
-	      YOU ARE DOING.
-	    </para>
+            <para>Alias for --force.</para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+
+      <variablelist>
+        <varlistentry>
+          <term><option>--force</option></term>
+
+          <listitem>
+            <para>Becoming primary fails if the local replica is not up-to-date. I.e. when it is
+            inconsistent, outdated of consistent. By using this option you can force it into
+            primary role anyway. USE THIS OPTION ONLY IF YOU KNOW WHAT YOU ARE DOING.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>secondary</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>secondary</secondary>
       </indexterm>
-      <para>	Brings the <replaceable>device</replaceable> into secondary role.
-	This operation fails as long as at least one application (or file
-	system) has opened the device.
-      </para>
-      <para>	It is possible that both devices of a connected DRBD device pair are secondary.
-      </para>
+
+      <para>Brings the <replaceable>device</replaceable> into secondary role. This operation fails
+      as long as at least one application (or file system) has opened the device.</para>
+
+      <para>It is possible that both devices of a connected DRBD device pair are secondary.</para>
     </refsect2>
+
     <refsect2>
       <title>verify</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>verify</secondary>
       </indexterm>
-      <para>        This initiates on-line device verification. During on-line verification,
-        the contents of every block on the local node are compared to those on
-        the peer node. Device verification progress can be monitored via
-        <filename moreinfo="none">/proc/drbd</filename>.
-        Any blocks whose content differs from that of the corresponding block
-        on the peer node will be marked out-of-sync in DRBD's on-disk bitmap; they
-        are <emphasis>not</emphasis> brought back in sync automatically. To
-        do that, simply disconnect and reconnect the resource.
-      </para>
-      <para>        If on-line verification is already in progress, this command
-        silently does nothing.
-      </para>
-      <para>	This command will fail if the <replaceable>device</replaceable> is
-	not part of a connected device pair.
-      </para>
+
+      <para>This initiates on-line device verification. During on-line verification, the contents
+      of every block on the local node are compared to those on the peer node. Device verification
+      progress can be monitored via <filename moreinfo="none">/proc/drbd</filename>. Any blocks
+      whose content differs from that of the corresponding block on the peer node will be marked
+      out-of-sync in DRBD's on-disk bitmap; they are <emphasis>not</emphasis> brought back in sync
+      automatically. To do that, simply disconnect and reconnect the resource.</para>
+
+      <para>If on-line verification is already in progress, this command silently does
+      nothing.</para>
+
+      <para>This command will fail if the <replaceable>device</replaceable> is not part of a
+      connected device pair.</para>
+
       <para>See also the notes on data integrity on the drbd.conf manpage.</para>
+
       <variablelist>
         <varlistentry>
-          <term><option>-s</option>,
-	  <option>--start <replaceable>start-sector</replaceable></option></term>
+          <term><option>--start
+          <replaceable>start-sector</replaceable></option></term>
+
           <listitem>
-            <para>              Since version 8.3.2, on-line verification should resume from the
-	      last position after connection loss.  It may also be started from
-	      an arbitrary position by setting this option.
-            </para>
-            <para>              Default unit is sectors. You may also specify a unit explicitly.
-              The start-sector will be rounded down to a multiple of 8 sectors (4kB).
-            </para>
+            <para>Since version 8.3.2, on-line verification should resume from the last position
+            after connection loss. It may also be started from an arbitrary position by setting
+            this option.</para>
+
+            <para>Default unit is sectors. You may also specify a unit explicitly. The
+            <option>start-sector</option> will be rounded down to a multiple of 8 sectors
+            (4kB).</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>invalidate</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>invalidate</secondary>
       </indexterm>
-      <para>	This forces the local device of a pair of connected DRBD devices
-	into SyncTarget state, which means that all data blocks of the
-	device are copied over from the peer.
-      </para>
-      <para>	This command will fail if the <replaceable>device</replaceable> is
-	not part of a connected device pair.
-      </para>
+
+      <para>This forces the local device of a pair of connected DRBD devices into SyncTarget
+      state, which means that all data blocks of the device are copied over from the peer.</para>
+
+      <para>This command will fail if the <replaceable>device</replaceable> is not part of a
+      connected device pair.</para>
     </refsect2>
+
     <refsect2>
       <title>invalidate-remote</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>invalidate-remote</secondary>
       </indexterm>
-      <para>	This forces the local device of a pair of connected DRBD devices
-	into SyncSource state, which means that all data blocks of the
-	device are copied to the peer.
-      </para>
+
+      <para>This forces the local device of a pair of connected DRBD devices into SyncSource
+      state, which means that all data blocks of the device are copied to the peer.</para>
+
+      <para>On a disconnected device, this will set all bits in the out of sync bitmap. As a side
+      effect, this suspends updates to the on disk activity log. Updates to the on disk activity log
+      will be resumed automatically when necessary.</para>
     </refsect2>
+
     <refsect2>
       <title>wait-connect</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>wait-connect</secondary>
       </indexterm>
-      <para>	Returns as soon as the <replaceable>device</replaceable> can
-	communicate with its partner device.
-      </para>
+
+      <para>Returns as soon as the <replaceable>device</replaceable> can communicate with its
+      partner device.</para>
+
       <variablelist>
         <varlistentry>
-          <term><option>-t</option>,
-	  <option>--wfc-timeout <replaceable>wfc_timeout</replaceable></option></term>
-          <term><option>-d</option>,
-	  <option>--degr-wfc-timeout <replaceable>degr_wfc_timeout</replaceable></option></term>
-          <term><option>-o</option>,
-	  <option>--outdated-wfc-timeout <replaceable>outdated_wfc_timeout</replaceable></option></term>
-          <term><option>-w</option>, <option>--wait-after-sb</option></term>
-          <listitem>
-            <para>	      This command will fail if the
-	      <replaceable>device</replaceable> cannot communicate with its
-	      partner for <replaceable>timeout</replaceable>
-	      seconds. If the peer was working before this node was
-	      rebooted, the <replaceable>wfc_timeout</replaceable> is used. If the peer was already
-	      down before this node was rebooted, the <replaceable>degr_wfc_timeout</replaceable>
-	      is used. If the peer was sucessfully outdated before this
-              node was rebooted the <replaceable>outdated_wfc_timeout</replaceable> is used.
-	      The default value for all those timeout values
-	      is 0 which means to wait forever.
-	      In case the connection status goes down to StandAlone because
-              the peer appeared but the devices had a split brain situation,
-              the default for the command is to terminate. You can change this
-              behavior with the <option>--wait-after-sb</option> option.
-	    </para>
+          <term><option>--wfc-timeout
+          <replaceable>wfc_timeout</replaceable></option></term>
+
+          <term><option>--degr-wfc-timeout
+          <replaceable>degr_wfc_timeout</replaceable></option></term>
+
+          <term><option>--outdated-wfc-timeout
+          <replaceable>outdated_wfc_timeout</replaceable></option></term>
+
+          <term><option>--wait-after-sb</option></term>
+
+          <listitem>
+            <para>This command will fail if the <replaceable>device</replaceable> cannot
+            communicate with its partner for <replaceable>timeout</replaceable> seconds. If the
+            peer was working before this node was rebooted, the
+            <replaceable>wfc_timeout</replaceable> is used. If the peer was already down before
+            this node was rebooted, the <replaceable>degr_wfc_timeout</replaceable> is used. If
+            the peer was successfully outdated before this node was rebooted the
+            <replaceable>outdated_wfc_timeout</replaceable> is used. The default value for all
+            those timeout values is 0 which means to wait forever. The unit is seconds. In case
+            the connection status goes down to StandAlone because the peer appeared but the
+            devices had a split brain situation, the default for the command is to terminate. You
+            can change this behavior with the <option>--wait-after-sb</option> option.</para>
           </listitem>
         </varlistentry>
       </variablelist>
     </refsect2>
+
     <refsect2>
       <title>wait-sync</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>wait-sync</secondary>
       </indexterm>
-      <para>	Returns as soon as the <replaceable>device</replaceable> leaves any
-	synchronization into connected state. The options
-	are the same as with the <replaceable>wait-connect</replaceable>
-	command.
-      </para>
+
+      <para>Returns as soon as the <replaceable>device</replaceable> leaves any synchronization
+      into connected state. The options are the same as with the
+      <replaceable>wait-connect</replaceable> command.</para>
     </refsect2>
+
     <refsect2>
       <title>disconnect</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>disconnect</secondary>
       </indexterm>
-      <para>	Removes the information set by the <option>net</option> command
-	from the <replaceable>device</replaceable>. This means
-	that the <replaceable>device</replaceable> goes into unconnected
-	state and will no longer listen for incoming connections.
-      </para>
+
+      <para>Removes the information set by the <option>net</option> command from the
+      <replaceable>device</replaceable>. This means that the <replaceable>device</replaceable>
+      goes into unconnected state and will no longer listen for incoming connections.</para>
     </refsect2>
+
     <refsect2>
       <title>detach</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>detach</secondary>
       </indexterm>
-      <para>	Removes the information set by the <option>disk</option> command
-	from the <replaceable>device</replaceable>. This means
-	that the <replaceable>device</replaceable> is detached from its
-	backing storage device.
+      <para>Removes the information set by the <option>disk</option> command from the
+      <replaceable>device</replaceable>. This means that the <replaceable>device</replaceable> is
+      detached from its backing storage device.
+      <variablelist>
+        <varlistentry>
+          <term><option>-f</option>,
+	  <option>--force</option></term>
+          <listitem>
+            <para>A regular detach returns after the disk state finally reached
+	    diskless. As a consequence detaching from a frozen backing block device
+	    never terminates.</para>
+	    <para>On the other hand A forced detach returns immediately. It allows
+	    you to detach DRBD from a frozen backing block device. Please note that
+	    the disk will be marked as failed until all pending IO requests where
+	    finished by the backing block device.
+	    </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
       </para>
     </refsect2>
+
     <refsect2>
       <title>down</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>down</secondary>
       </indexterm>
-      <para>	Removes all configuration information from the
-	<replaceable>device</replaceable> and forces it back to
-	unconfigured state.
-      </para>
+
+      <para>Removes all configuration information from the <replaceable>device</replaceable> and
+      forces it back to unconfigured state.</para>
     </refsect2>
+
     <refsect2>
       <title>role</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>role</secondary>
       </indexterm>
-      <para>	Shows the current roles of the <replaceable>device</replaceable> and
-	its peer. (local/peer).
-      </para>
+
+      <para>Shows the current roles of the <replaceable>device</replaceable> and its peer, as
+      <replaceable>local</replaceable>/<replaceable>peer</replaceable>.</para>
     </refsect2>
+
     <refsect2>
       <title>state</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>state</secondary>
       </indexterm>
+
       <para>Deprecated alias for "role"</para>
     </refsect2>
+
     <refsect2>
       <title>cstate</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>cstate</secondary>
       </indexterm>
-      <para>	Shows the current connection state of the
-	<replaceable>device</replaceable>.
-      </para>
+
+      <para>Shows the current connection state of the <replaceable>device</replaceable>.</para>
     </refsect2>
+
     <refsect2>
       <title>dstate</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>dstate</secondary>
       </indexterm>
-      <para>	Shows the current states of the backing storage devices. (local/peer)
-      </para>
+
+      <para>Shows the current states of the backing storage devices, as
+      <replaceable>local</replaceable>/<replaceable>peer</replaceable>.</para>
     </refsect2>
+
     <refsect2>
-      <title>status</title>
+      <title>resize</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
-        <secondary>status</secondary>
+
+        <secondary>resize</secondary>
       </indexterm>
-      <para>	Shows the current status of the device in xml-like format. Example output:
-	<programlisting format="linespecific">&lt;resource minor="0" name="s0" cs="SyncTarget" st1="Secondary" st2="Secondary"
-         ds1="Inconsistent" ds2="UpToDate" resynced_precent="5.9" /&gt;
-	</programlisting>
-      </para>
+
+      <para>This causes DRBD to reexamine the size of the <replaceable>device</replaceable>'s
+      backing storage device. To actually do online growing you need to extend the backing
+      storages on both devices and call the <option>resize</option> command on one of your
+      nodes.</para>
+
+      <para>The <option>--assume-peer-has-space</option> allows you to resize a device which is
+      currently not connected to the peer. Use with care, since if you do not resize the peer's
+      disk as well, further connect attempts of the two will fail.</para>
+
+      <para>When the <option>--assume-clean</option> option is given DRBD will skip the resync of
+      the new storage. Only do this if you know that the new storage was initialized to the same
+      content by other means.</para>
     </refsect2>
+
     <refsect2>
-      <title>resize</title>
+      <title>check-resize</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
-        <secondary>resize</secondary>
+
+        <secondary>check-resize</secondary>
       </indexterm>
-      <para>This causes DRBD to reexamine the size of the
-	<replaceable>device</replaceable>'s backing storage device.  To
-	actually do online growing you need to extend the backing storages
-	on both devices and call the <option>resize</option> command one of
-        your nodes.
-      </para>
-      <para>The <option>--assume-peer-has-space</option> allows you to
-	resize a device which is currently not connected to the peer.
-	Use with care, since if you do not resize the peer's disk as well,
-	further connect attempts of the two will fail.
-      </para>
+
+      <para>To enable DRBD to detect offline resizing of backing devices this command may be used
+      to record the current size of backing devices. The size is stored in files in /var/lib/drbd/
+      named drbd-minor-??.lkbd</para>
+
+      <para>This command is called by <command moreinfo="none">drbdadm resize
+      <replaceable>res</replaceable></command> after <command moreinfo="none">drbdsetup
+      <replaceable>device</replaceable> resize</command> returned.</para>
     </refsect2>
+
     <refsect2>
       <title>pause-sync</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>pause-sync</secondary>
       </indexterm>
-      <para>        Temporarily suspend an ongoing resynchronization by setting the local
-	pause flag. Resync only progresses if neither the local nor the
-	remote pause flag is set. It might be desirable to postpone DRBD's
-	resynchronization after eventual resynchronization of the backing
-	storage's RAID setup.
-      </para>
+
+      <para>Temporarily suspend an ongoing resynchronization by setting the local pause flag.
+      Resync only progresses if neither the local nor the remote pause flag is set. It might be
+      desirable to postpone DRBD's resynchronization after eventual resynchronization of the
+      backing storage's RAID setup.</para>
     </refsect2>
+
     <refsect2>
       <title>resume-sync</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>resume-sync</secondary>
       </indexterm>
-      <para>        Unset the local sync pause flag.
-      </para>
+
+      <para>Unset the local sync pause flag.</para>
     </refsect2>
+
     <refsect2>
       <title>outdate</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>outdate</secondary>
       </indexterm>
-      <para>        Mark the data on the local backing storage as outdated. An outdated
-	device refuses to become primary. This is used in conjunction with
-	<option>fencing</option> and by the peer's fence-peer handler.
-      </para>
+
+      <para>Mark the data on the local backing storage as outdated. An outdated device refuses to
+      become primary. This is used in conjunction with <option>fencing</option> and by the peer's
+      <option>fence-peer</option> handler.</para>
     </refsect2>
+
     <refsect2>
       <title>show-gi</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>show-gi</secondary>
       </indexterm>
-      <para>        Displays the device's data generation identifiers verbosely.
-      </para>
+
+      <para>Displays the device's data generation identifiers verbosely.</para>
     </refsect2>
+
     <refsect2>
       <title>get-gi</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>get-gi</secondary>
       </indexterm>
-      <para>        Displays the device's data generation identifiers.
-      </para>
+
+      <para>Displays the device's data generation identifiers.</para>
     </refsect2>
+
     <refsect2>
       <title>show</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>show</secondary>
       </indexterm>
-      <para>	Shows all available configuration information of the
-	<replaceable>device</replaceable>.
-      </para>
+
+      <para>Shows all available configuration information of the
+      <replaceable>device</replaceable>.</para>
     </refsect2>
+
     <refsect2>
       <title>suspend-io</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>suspend-io</secondary>
       </indexterm>
-      <para>	This command is of no apparent use and just provided for the sake
-	of completeness.
-      </para>
+
+      <para>This command is of no apparent use and just provided for the sake of
+      completeness.</para>
     </refsect2>
+
     <refsect2>
       <title>resume-io</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>resume-io</secondary>
       </indexterm>
-      <para>	If the fence-peer handler fails to stonith the peer node,
-	and your <option>fencing</option> policy is set to
-	resource-and-stonith, you can unfreeze IO operations with this
-	command.
-      </para>
+
+      <para>If the fence-peer handler fails to stonith the peer node, and your
+      <option>fencing</option> policy is set to resource-and-stonith, you can unfreeze IO
+      operations with this command.</para>
     </refsect2>
+
     <refsect2>
       <title>events</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>events</secondary>
       </indexterm>
-      <para>      	Displays every state change of DRBD and all calls to helper
-	programs. This might be used to get notified of DRBD's state
-	changes by piping the output to another program.
-
-	<variablelist><varlistentry><term><option>-a</option>,
-	    <option>--all-devices</option></term><listitem><para>		Display the events of all DRBD minors.
-	      </para></listitem></varlistentry><varlistentry><term><option>-u</option>,
-	    <option>--unfiltered</option></term><listitem><para>		This is a debugging aid that displays the content of
-		all received netlink messages.
-	      </para></listitem></varlistentry></variablelist>
-      </para>
+
+      <para>Displays every state change of DRBD and all calls to helper programs. This might be
+      used to get notified of DRBD's state changes by piping the output to another program.
+      <variablelist>
+          <varlistentry>
+            <term><option>--all-devices</option></term>
+
+            <listitem>
+              <para>Display the events of all DRBD minors.</para>
+            </listitem>
+          </varlistentry>
+
+          <varlistentry>
+            <term><option>--unfiltered</option></term>
+
+            <listitem>
+              <para>This is a debugging aid that displays the content of all received netlink
+              messages.</para>
+            </listitem>
+          </varlistentry>
+        </variablelist></para>
     </refsect2>
+
     <refsect2>
       <title>new-current-uuid</title>
+
       <indexterm significance="normal">
         <primary>drbdsetup</primary>
+
         <secondary>new-current-uuid</secondary>
       </indexterm>
-      <para>        Generates a new currend UUID and rotates all other UUID values. This
-        has at least two use cases, namely to skip the initial sync, and to
-        reduce network bandwidth when starting in a single node configuration
-        and then later (re-)integrating a remote site.
-      </para>
-      <para>        Available option:
 
-	<variablelist><varlistentry><term><option>-c</option>,
-	    <option>--clear-bitmap</option></term><listitem><para>		Clears the sync bitmap in addition to generating a new current UUID.
-	      </para></listitem></varlistentry></variablelist>
-      </para>
-      <para>        This can be used to skip the initial sync, if you want to start from scratch.
-        This use-case does only work on "Just Created" meta data.
-        Necessary steps:
-        <orderedlist numeration="arabic" inheritnum="ignore" continuation="restarts"><listitem><simpara>            On <emphasis>both</emphasis> nodes, initialize meta data and configure the device.
-          </simpara><simpara><command moreinfo="none">drbdadm -- --force create-md <replaceable>res</replaceable></command></simpara></listitem><listitem><simpara>            They need to do the initial handshake, so they know their sizes.
-          </simpara><simpara><command moreinfo="none">drbdadm up <replaceable>res</replaceable></command></simpara></listitem><listitem><simpara>            They are now Connected Secondary/Secondary Inconsistent/Inconsistent.
-            Generate a new current-uuid and clear the dirty bitmap.
-          </simpara><simpara><command moreinfo="none">drbdadm -- --clear-bitmap new-current-uuid <replaceable>res</replaceable></command></simpara></listitem><listitem><simpara>            They are now Connected Secondary/Secondary UpToDate/UpToDate.
-            Make one side primary and create a file system.
-          </simpara><simpara><command moreinfo="none">drbdadm primary <replaceable>res</replaceable></command></simpara><simpara><command moreinfo="none">mkfs -t <replaceable>fs-type</replaceable> $(drbdadm sh-dev <replaceable>res</replaceable>)</command></simpara></listitem></orderedlist>
-      </para>
-      <para>	One obvious side-effect is that the replica are full of old garbage
-	(unless you made them identical using other means), so any
-	online-verify is expected to find any number of out-of-sync blocks.
-      </para>
-      <para><emphasis>You must not use this on pre-existing data!</emphasis>
-        Even though it may appear to work at first glance, once you switch to
-        the other node, your data is toast, as it never got replicated.
-        So do <emphasis>do not leave out the mkfs</emphasis> (or equivalent).
-      </para>
-      <para>        This can also be used to shorten the initial resync of a cluster where the second node
-        is added after the first node is gone into production, by means of disk shipping.
-	This use-case works on disconnected devices only, the device may be in
-	primary or secondary role.
-        The necessary steps are:
-        <orderedlist numeration="arabic" inheritnum="ignore" continuation="restarts"><listitem><simpara><command moreinfo="none">drbdsetup <replaceable>device</replaceable> new-current-uuid --clear-bitmap</command></simpara></listitem><listitem><simpara>	    Take the copy of the current active server. E.g. by pulling a disk out of
-	    the RAID1 controller, or by copying with dd. You need to copy the actual
-	    data, and the meta data.
-	  </simpara></listitem><listitem><simpara><command moreinfo="none">drbdsetup <replaceable>device</replaceable> new-current-uuid</command></simpara></listitem><listitem><simpara>	    Add the disk to the new secondary node, and join it to the cluster. You will
-	    get a resync of that parts that where changed since the first call to
-	    <command moreinfo="none">drbdsetup</command> in step 1.
-	  </simpara></listitem></orderedlist>
-      </para>
+      <para>Generates a new current UUID and rotates all other UUID values. This has at least two
+      use cases, namely to skip the initial sync, and to reduce network bandwidth when starting in
+      a single node configuration and then later (re-)integrating a remote site.</para>
+
+      <para>Available option: <variablelist>
+          <varlistentry>
+            <term><option>--clear-bitmap</option></term>
+
+            <listitem>
+              <para>Clears the sync bitmap in addition to generating a new current UUID.</para>
+            </listitem>
+          </varlistentry>
+        </variablelist></para>
+
+      <para>This can be used to skip the initial sync, if you want to start from scratch. This
+      use-case does only work on "Just Created" meta data. Necessary steps: <orderedlist
+          continuation="restarts" inheritnum="ignore" numeration="arabic">
+          <listitem>
+            <simpara>On <emphasis>both</emphasis> nodes, initialize meta data and configure the
+            device.</simpara>
+
+            <simpara><command moreinfo="none">drbdadm create-md --force
+            <replaceable>res</replaceable></command></simpara>
+          </listitem>
+
+          <listitem>
+            <simpara>They need to do the initial handshake, so they know their sizes.</simpara>
+
+            <simpara><command moreinfo="none">drbdadm up
+            <replaceable>res</replaceable></command></simpara>
+          </listitem>
+
+          <listitem>
+            <simpara>They are now Connected Secondary/Secondary Inconsistent/Inconsistent.
+            Generate a new current-uuid and clear the dirty bitmap.</simpara>
+
+            <simpara><command moreinfo="none">drbdadm new-current-uuid --clear-bitmap
+            <replaceable>res</replaceable></command></simpara>
+          </listitem>
+
+          <listitem>
+            <simpara>They are now Connected Secondary/Secondary UpToDate/UpToDate. Make one side
+            primary and create a file system.</simpara>
+
+            <simpara><command moreinfo="none">drbdadm primary
+            <replaceable>res</replaceable></command></simpara>
+
+            <simpara><command moreinfo="none">mkfs -t <replaceable>fs-type</replaceable> $(drbdadm
+            sh-dev <replaceable>res</replaceable>)</command></simpara>
+          </listitem>
+        </orderedlist></para>
+
+      <para>One obvious side-effect is that the replica is full of old garbage (unless you made
+      them identical using other means), so any online-verify is expected to find any number of
+      out-of-sync blocks.</para>
+
+      <para><emphasis>You must not use this on pre-existing data!</emphasis> Even though it may
+      appear to work at first glance, once you switch to the other node, your data is toast, as it
+      never got replicated. So <emphasis>do not leave out the mkfs</emphasis> (or
+      equivalent).</para>
+
+      <para>This can also be used to shorten the initial resync of a cluster where the second node
+      is added after the first node is gone into production, by means of disk shipping. This
+      use-case works on disconnected devices only, the device may be in primary or secondary
+      role.</para>
+
+      <para>The necessary steps on the current active server are: <orderedlist
+          continuation="restarts" inheritnum="ignore" numeration="arabic">
+          <listitem>
+            <simpara><command moreinfo="none">drbdsetup new-current-uuid --clear-bitmap <replaceable>minor</replaceable>
+            </command></simpara>
+          </listitem>
+
+          <listitem>
+            <simpara>Take the copy of the current active server. E.g. by pulling a disk out of the
+            RAID1 controller, or by copying with dd. You need to copy the actual data, and the
+            meta data.</simpara>
+          </listitem>
+
+          <listitem>
+            <simpara><command moreinfo="none">drbdsetup new-current-uuid <replaceable>minor</replaceable>
+            </command></simpara>
+          </listitem>
+        </orderedlist> Now add the disk to the new secondary node, and join it to the cluster. You
+      will get a resync of that parts that were changed since the first call to <command
+      moreinfo="none">drbdsetup</command> in step 1.</para>
     </refsect2>
   </refsect1>
+
   <refsect1>
     <title>Examples</title>
-    <para>      For examples, please have a look at the
-      <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>.
-      </para>
+
+    <para>For examples, please have a look at the
+    <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>.</para>
+
   </refsect1>
+
   <refsect1>
     <title>Version</title>
-    <simpara>This document was revised for version 8.3.2 of the DRBD distribution.
-    </simpara>
+
+    <simpara>This document was revised for version 8.3.2 of the DRBD distribution.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Author</title>
-    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email>
-            and Lars Ellenberg <email>lars.ellenberg@linbit.com</email>
-    </simpara>
+
+    <simpara>Written by Philipp Reisner <email>philipp.reisner@linbit.com</email> and Lars
+    Ellenberg <email>lars.ellenberg@linbit.com</email></simpara>
   </refsect1>
+
   <refsect1>
     <title>Reporting Bugs</title>
-    <simpara>Report bugs to <email>drbd-user@lists.linbit.com</email>.
-    </simpara>
+
+    <simpara>Report bugs to <email>drbd-user@lists.linbit.com</email>.</simpara>
   </refsect1>
+
   <refsect1>
     <title>Copyright</title>
-    <simpara>Copyright 2001-2008 LINBIT Information Technologies,
-Philipp Reisner, Lars Ellenberg. This  is  free software;
-see the source for copying conditions.  There is NO warranty;
-not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-    </simpara>
+
+    <simpara>Copyright 2001-2008 LINBIT Information Technologies, Philipp Reisner, Lars Ellenberg.
+    This is free software; see the source for copying conditions. There is NO warranty; not even
+    for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.</simpara>
   </refsect1>
+
   <refsect1>
     <title>See Also</title>
-    <para><citerefentry><refentrytitle>drbd.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>drbd</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
-      <citerefentry><refentrytitle>drbddisk</refentrytitle><manvolnum>8</manvolnum></citerefentry><citerefentry><refentrytitle>drbdadm</refentrytitle><manvolnum>8</manvolnum></citerefentry><ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>,
-      <ulink url="http://www.drbd.org/"><citetitle>DRBD web site</citetitle></ulink></para>
+
+    <para><citerefentry>
+        <refentrytitle>drbd.conf</refentrytitle>
+
+        <manvolnum>5</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbd</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbddisk</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>, <citerefentry>
+        <refentrytitle>drbdadm</refentrytitle>
+
+        <manvolnum>8</manvolnum>
+      </citerefentry>,
+      <ulink url="http://www.drbd.org/users-guide/"><citetitle>DRBD User's Guide</citetitle></ulink>,
+       <ulink url="http://www.drbd.org/"><citetitle>DRBD web site</citetitle></ulink></para>
+
   </refsect1>
 </refentry>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_attach.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_attach.xml
--- drbd8-8.3.7/documentation/drbdsetup_attach.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_attach.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">attach</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>lower_dev</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>meta_data_dev</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>meta_data_index</replaceable></arg><arg choice="opt" rep="norepeat">--size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--max-bio-bvecs <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--on-io-error<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">pass_on</arg><arg choice="plain" rep="norepeat">call-local-io-error</arg><arg choice="plain" rep="norepeat">detach</arg></group></arg><arg choice="opt" rep="norepeat">--fencing<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">dont-care</arg><arg choice="plain" rep="norepeat">resource-only</arg><arg choice="plain" rep="norepeat">resource-and-stonith</arg></group></arg><arg choice="opt" rep="norepeat">--disk-barrier</arg><arg choice="opt" rep="norepeat">--disk-flushes</arg><arg choice="opt" rep="norepeat">--disk-drain</arg><arg choice="opt" rep="norepeat">--md-flushes</arg><arg choice="opt" rep="norepeat">--resync-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--resync-after <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--al-extents <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-plan-ahead <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-delay-target <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-fill-target <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-max-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-min-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--disk-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--read-balancing<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">prefer-local</arg><arg choice="plain" rep="norepeat">prefer-remote</arg><arg choice="plain" rep="norepeat">round-robin</arg><arg choice="plain" rep="norepeat">least-pending</arg><arg choice="plain" rep="norepeat">when-congested-remote</arg><arg choice="plain" rep="norepeat">32K-striping</arg><arg choice="plain" rep="norepeat">64K-striping</arg><arg choice="plain" rep="norepeat">128K-striping</arg><arg choice="plain" rep="norepeat">256K-striping</arg><arg choice="plain" rep="norepeat">512K-striping</arg><arg choice="plain" rep="norepeat">1M-striping</arg></group></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_check-resize.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_check-resize.xml
--- drbd8-8.3.7/documentation/drbdsetup_check-resize.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_check-resize.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">check-resize</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_connect.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_connect.xml
--- drbd8-8.3.7/documentation/drbdsetup_connect.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_connect.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">connect</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>local_addr</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>remote_addr</replaceable></arg><arg choice="opt" rep="norepeat">--tentative</arg><arg choice="opt" rep="norepeat">--discard-my-data</arg><arg choice="opt" rep="norepeat">--protocol<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">A</arg><arg choice="plain" rep="norepeat">B</arg><arg choice="plain" rep="norepeat">C</arg></group></arg><arg choice="opt" rep="norepeat">--timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--max-epoch-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--max-buffers <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--unplug-watermark <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--connect-int <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--ping-int <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--sndbuf-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--rcvbuf-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--ko-count <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--allow-two-primaries</arg><arg choice="opt" rep="norepeat">--cram-hmac-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--shared-secret <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--after-sb-0pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">discard-younger-primary</arg><arg choice="plain" rep="norepeat">discard-older-primary</arg><arg choice="plain" rep="norepeat">discard-zero-changes</arg><arg choice="plain" rep="norepeat">discard-least-changes</arg><arg choice="plain" rep="norepeat">discard-local</arg><arg choice="plain" rep="norepeat">discard-remote</arg></group></arg><arg choice="opt" rep="norepeat">--after-sb-1pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">consensus</arg><arg choice="plain" rep="norepeat">discard-secondary</arg><arg choice="plain" rep="norepeat">call-pri-lost-after-sb</arg><arg choice="plain" rep="norepeat">violently-as0p</arg></group></arg><arg choice="opt" rep="norepeat">--after-sb-2pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">call-pri-lost-after-sb</arg><arg choice="plain" rep="norepeat">violently-as0p</arg></group></arg><arg choice="opt" rep="norepeat">--always-asbp</arg><arg choice="opt" rep="norepeat">--rr-conflict<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">call-pri-lost</arg><arg choice="plain" rep="norepeat">violently</arg></group></arg><arg choice="opt" rep="norepeat">--ping-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--data-integrity-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--tcp-cork</arg><arg choice="opt" rep="norepeat">--on-congestion<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">block</arg><arg choice="plain" rep="norepeat">pull-ahead</arg><arg choice="plain" rep="norepeat">disconnect</arg></group></arg><arg choice="opt" rep="norepeat">--congestion-fill <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--congestion-extents <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--csums-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--verify-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--use-rle</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_cstate.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_cstate.xml
--- drbd8-8.3.7/documentation/drbdsetup_cstate.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_cstate.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">cstate</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_del-minor.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_del-minor.xml
--- drbd8-8.3.7/documentation/drbdsetup_del-minor.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_del-minor.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">del-minor</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_del-resource.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_del-resource.xml
--- drbd8-8.3.7/documentation/drbdsetup_del-resource.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_del-resource.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">del-resource</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_detach.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_detach.xml
--- drbd8-8.3.7/documentation/drbdsetup_detach.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_detach.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">detach</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--force</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_disconnect.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_disconnect.xml
--- drbd8-8.3.7/documentation/drbdsetup_disconnect.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_disconnect.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat"><replaceable>local_addr</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>remote_addr</replaceable></arg><arg choice="opt" rep="norepeat">--force</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_disk-options.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_disk-options.xml
--- drbd8-8.3.7/documentation/drbdsetup_disk-options.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_disk-options.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">disk-options</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg>
+	<arg choice="opt" rep="norepeat">--on-io-error<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">pass_on</arg><arg choice="plain" rep="norepeat">call-local-io-error</arg><arg choice="plain" rep="norepeat">detach</arg></group></arg><arg choice="opt" rep="norepeat">--fencing<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">dont-care</arg><arg choice="plain" rep="norepeat">resource-only</arg><arg choice="plain" rep="norepeat">resource-and-stonith</arg></group></arg><arg choice="opt" rep="norepeat">--disk-barrier</arg><arg choice="opt" rep="norepeat">--disk-flushes</arg><arg choice="opt" rep="norepeat">--disk-drain</arg><arg choice="opt" rep="norepeat">--md-flushes</arg><arg choice="opt" rep="norepeat">--resync-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--resync-after <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--al-extents <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-plan-ahead <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-delay-target <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-fill-target <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-max-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--c-min-rate <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--disk-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--read-balancing<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">prefer-local</arg><arg choice="plain" rep="norepeat">prefer-remote</arg><arg choice="plain" rep="norepeat">round-robin</arg><arg choice="plain" rep="norepeat">least-pending</arg><arg choice="plain" rep="norepeat">when-congested-remote</arg><arg choice="plain" rep="norepeat">32K-striping</arg><arg choice="plain" rep="norepeat">64K-striping</arg><arg choice="plain" rep="norepeat">128K-striping</arg><arg choice="plain" rep="norepeat">256K-striping</arg><arg choice="plain" rep="norepeat">512K-striping</arg><arg choice="plain" rep="norepeat">1M-striping</arg></group></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_down.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_down.xml
--- drbd8-8.3.7/documentation/drbdsetup_down.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_down.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">down</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_dstate.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_dstate.xml
--- drbd8-8.3.7/documentation/drbdsetup_dstate.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_dstate.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">dstate</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_events.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_events.xml
--- drbd8-8.3.7/documentation/drbdsetup_events.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_events.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">events</arg><group choice="req">
+		<arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg>
+		<arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg>
+		<arg choice="plain" rep="norepeat"><replaceable>all</replaceable></arg>
+	</group></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_get-gi.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_get-gi.xml
--- drbd8-8.3.7/documentation/drbdsetup_get-gi.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_get-gi.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">get-gi</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_invalidate-remote.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_invalidate-remote.xml
--- drbd8-8.3.7/documentation/drbdsetup_invalidate-remote.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_invalidate-remote.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">invalidate-remote</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_invalidate.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_invalidate.xml
--- drbd8-8.3.7/documentation/drbdsetup_invalidate.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_invalidate.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">invalidate</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_net-options.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_net-options.xml
--- drbd8-8.3.7/documentation/drbdsetup_net-options.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_net-options.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">net-options</arg><arg choice="plain" rep="norepeat"><replaceable>local_addr</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>remote_addr</replaceable></arg>
+	<arg choice="opt" rep="norepeat">--protocol<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">A</arg><arg choice="plain" rep="norepeat">B</arg><arg choice="plain" rep="norepeat">C</arg></group></arg><arg choice="opt" rep="norepeat">--timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--max-epoch-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--max-buffers <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--unplug-watermark <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--connect-int <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--ping-int <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--sndbuf-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--rcvbuf-size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--ko-count <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--allow-two-primaries</arg><arg choice="opt" rep="norepeat">--cram-hmac-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--shared-secret <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--after-sb-0pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">discard-younger-primary</arg><arg choice="plain" rep="norepeat">discard-older-primary</arg><arg choice="plain" rep="norepeat">discard-zero-changes</arg><arg choice="plain" rep="norepeat">discard-least-changes</arg><arg choice="plain" rep="norepeat">discard-local</arg><arg choice="plain" rep="norepeat">discard-remote</arg></group></arg><arg choice="opt" rep="norepeat">--after-sb-1pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">consensus</arg><arg choice="plain" rep="norepeat">discard-secondary</arg><arg choice="plain" rep="norepeat">call-pri-lost-after-sb</arg><arg choice="plain" rep="norepeat">violently-as0p</arg></group></arg><arg choice="opt" rep="norepeat">--after-sb-2pri<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">call-pri-lost-after-sb</arg><arg choice="plain" rep="norepeat">violently-as0p</arg></group></arg><arg choice="opt" rep="norepeat">--always-asbp</arg><arg choice="opt" rep="norepeat">--rr-conflict<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">disconnect</arg><arg choice="plain" rep="norepeat">call-pri-lost</arg><arg choice="plain" rep="norepeat">violently</arg></group></arg><arg choice="opt" rep="norepeat">--ping-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--data-integrity-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--tcp-cork</arg><arg choice="opt" rep="norepeat">--on-congestion<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">block</arg><arg choice="plain" rep="norepeat">pull-ahead</arg><arg choice="plain" rep="norepeat">disconnect</arg></group></arg><arg choice="opt" rep="norepeat">--congestion-fill <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--congestion-extents <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--csums-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--verify-alg <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--use-rle</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_new-current-uuid.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-current-uuid.xml
--- drbd8-8.3.7/documentation/drbdsetup_new-current-uuid.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-current-uuid.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">new-current-uuid</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--clear-bitmap</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_new-minor.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-minor.xml
--- drbd8-8.3.7/documentation/drbdsetup_new-minor.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-minor.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">new-minor</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="plain" rep="norepeat"><replaceable>volume</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_new-resource.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-resource.xml
--- drbd8-8.3.7/documentation/drbdsetup_new-resource.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_new-resource.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">new-resource</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg><arg choice="opt" rep="norepeat">--cpu-mask <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--on-no-data-accessible<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">io-error</arg><arg choice="plain" rep="norepeat">suspend-io</arg></group></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_outdate.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_outdate.xml
--- drbd8-8.3.7/documentation/drbdsetup_outdate.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_outdate.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">outdate</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_pause-sync.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_pause-sync.xml
--- drbd8-8.3.7/documentation/drbdsetup_pause-sync.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_pause-sync.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">pause-sync</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_primary.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_primary.xml
--- drbd8-8.3.7/documentation/drbdsetup_primary.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_primary.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">primary</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--force</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_resize.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resize.xml
--- drbd8-8.3.7/documentation/drbdsetup_resize.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resize.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">resize</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--size <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--assume-peer-has-space</arg><arg choice="opt" rep="norepeat">--assume-clean</arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_resource-options.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resource-options.xml
--- drbd8-8.3.7/documentation/drbdsetup_resource-options.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resource-options.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">resource-options</arg><arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg>
+	<arg choice="opt" rep="norepeat">--cpu-mask <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--on-no-data-accessible<group choice="req" rep="norepeat"><arg choice="plain" rep="norepeat">io-error</arg><arg choice="plain" rep="norepeat">suspend-io</arg></group></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_resume-io.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resume-io.xml
--- drbd8-8.3.7/documentation/drbdsetup_resume-io.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resume-io.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">resume-io</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_resume-sync.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resume-sync.xml
--- drbd8-8.3.7/documentation/drbdsetup_resume-sync.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_resume-sync.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">resume-sync</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_role.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_role.xml
--- drbd8-8.3.7/documentation/drbdsetup_role.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_role.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">role</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_secondary.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_secondary.xml
--- drbd8-8.3.7/documentation/drbdsetup_secondary.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_secondary.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">secondary</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_show-gi.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_show-gi.xml
--- drbd8-8.3.7/documentation/drbdsetup_show-gi.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_show-gi.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">show-gi</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_show.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_show.xml
--- drbd8-8.3.7/documentation/drbdsetup_show.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_show.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">show</arg><group choice="req">
+		<arg choice="plain" rep="norepeat"><replaceable>resource</replaceable></arg>
+		<arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg>
+		<arg choice="plain" rep="norepeat"><replaceable>all</replaceable></arg>
+	</group>
+	</cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_suspend-io.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_suspend-io.xml
--- drbd8-8.3.7/documentation/drbdsetup_suspend-io.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_suspend-io.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">suspend-io</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_verify.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_verify.xml
--- drbd8-8.3.7/documentation/drbdsetup_verify.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_verify.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,2 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">verify</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--start <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg></cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_wait-connect.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_wait-connect.xml
--- drbd8-8.3.7/documentation/drbdsetup_wait-connect.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_wait-connect.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">wait-connect</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--degr-wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--outdated-wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg>
+	</cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/drbdsetup_wait-sync.xml drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_wait-sync.xml
--- drbd8-8.3.7/documentation/drbdsetup_wait-sync.xml	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/drbdsetup_wait-sync.xml	2012-09-03 22:37:26.000000000 +0000
@@ -0,0 +1,3 @@
+<?xml version="1.0"?>
+<cmdsynopsis sepchar=" "><command moreinfo="none">drbdsetup</command><arg choice="plain" rep="norepeat">wait-sync</arg><arg choice="plain" rep="norepeat"><replaceable>minor</replaceable></arg><arg choice="opt" rep="norepeat">--wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--degr-wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg><arg choice="opt" rep="norepeat">--outdated-wfc-timeout <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg></arg>
+	</cmdsynopsis>
diff -Nru drbd8-8.3.7/documentation/xml-usage-to-docbook.xsl drbd8-8.4.1+git55a81dc~cmd1/documentation/xml-usage-to-docbook.xsl
--- drbd8-8.3.7/documentation/xml-usage-to-docbook.xsl	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/documentation/xml-usage-to-docbook.xsl	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+  <xsl:template match="/">
+    <cmdsynopsis sepchar=" ">
+      <command moreinfo="none">drbdsetup</command>
+      <xsl:apply-templates select="command"/>
+    </cmdsynopsis>
+  </xsl:template>
+
+  <xsl:template match="command">
+    <arg choice="plain" rep="norepeat">
+      <xsl:value-of select="@name"/>
+    </arg>
+    <xsl:apply-templates select="argument|group"/>
+    <xsl:apply-templates select="option"/>
+  </xsl:template>
+
+  <xsl:template match="group">
+    <group choice="req">
+      <xsl:apply-templates/>
+    </group>
+  </xsl:template>
+
+  <xsl:template match="argument">
+    <arg choice="plain" rep="norepeat">
+      <replaceable><xsl:value-of select="."/></replaceable>
+    </arg>
+  </xsl:template>
+
+  <xsl:template match="option[@type = 'numeric'] | option[@type='string']">
+    <arg choice="opt" rep="norepeat">
+      <xsl:text>--</xsl:text>
+      <xsl:value-of select="@name"/>
+      <xsl:text> </xsl:text>
+      <arg choice="req" rep="norepeat"><replaceable>val</replaceable></arg>
+    </arg>
+  </xsl:template>
+
+  <xsl:template match="option[@type = 'handler']">
+    <arg choice="opt" rep="norepeat">--<xsl:value-of select="@name"/>
+      <group choice="req" rep="norepeat">
+	<xsl:apply-templates select="handler"/>
+      </group>
+    </arg>
+  </xsl:template>
+
+  <xsl:template match="option[@type = 'boolean']">
+    <arg choice="opt" rep="norepeat">--<xsl:value-of select="@name"/></arg>
+  </xsl:template>
+
+  <xsl:template match="handler">
+    <arg choice="plain" rep="norepeat">
+      <xsl:value-of select="."/>
+    </arg>
+  </xsl:template>
+
+</xsl:stylesheet>
diff -Nru drbd8-8.3.7/drbd/Kbuild drbd8-8.4.1+git55a81dc~cmd1/drbd/Kbuild
--- drbd8-8.3.7/drbd/Kbuild	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/Kbuild	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,94 @@
+obj-m := drbd.o
+
+clean-files := compat.h .config.timestamp
+
+LINUXINCLUDE := -I$(src) $(LINUXINCLUDE)
+
+# Files in the standard include directories take precendence over files
+# in the compat directory.
+#
+# Add -I$(src) to EXTRA_CFLAGS again: some (rhel5, maybe other) kbuild does not
+# yet use LINUXINCLUDE like we expect it to ;( fortunately it does not contain
+# in-tree drbd either yet, so precedence of include files is not important.
+#
+# override: we absolutely need this, even if EXTRA_CFLAGS originates from make
+# command line or environment
+override EXTRA_CFLAGS += -I$(src) -I$(src)/compat
+
+# The augmented rbtree helper functions are not exported at least until kernel
+# version 2.6.38-rc2.
+ifeq ($(shell grep -e '\<rb_augment_insert\>' \
+		   -e '\<rb_augment_erase_begin\>' \
+		   -e '\<rb_augment_erase_end\>' \
+		   $(objtree)/Module.symvers | wc -l),3)
+EXTRA_CFLAGS += -DAUGMENTED_RBTREE_SYMBOLS_EXPORTED
+endif
+
+ifeq ($(shell grep -e '\<idr_get_next\>' \
+		   $(objtree)/Module.symvers | wc -l),1)
+EXTRA_CFLAGS += -DIDR_GET_NEXT_EXPORTED
+else
+compat_objs += compat/idr.o
+endif
+
+drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
+drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
+drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o
+drbd-y += drbd_interval.o drbd_state.o $(compat_objs)
+drbd-y += drbd_nla.o
+
+ifndef CONFIG_CONNECTOR
+	drbd-y += connector.o cn_queue.o
+endif
+
+$(patsubst %,$(obj)/%,$(drbd-y)): $(obj)/compat.h
+
+obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
+
+# ======================================================================
+
+# remember KERNELRELEASE for install target
+# .kernelversion can be included in Makefile as well as
+# sourced from shell
+$(shell echo -e "VERSION=$(VERSION)\n"             \
+			  "PATCHLEVEL=$(PATCHLEVEL)\n"       \
+			  "SUBLEVEL=$(SUBLEVEL)\n"           \
+			  "EXTRAVERSION=$(EXTRAVERSION)\n"   \
+			  "LOCALVERSION=$(LOCALVERSION)\n"   \
+			  "KERNELRELEASE=$(KERNELRELEASE)\n" \
+			  "KERNELVERSION=$(KERNELVERSION)"   \
+	  > $(src)/.drbd_kernelrelease.new       \
+)
+
+# Are we in stage 2 of the build (modpost)?
+
+KBUILD_STAGE ?= $(if $(filter $(srctree)/scripts/Makefile.modpost,$(MAKEFILE_LIST)),modpost)
+
+ifneq ($(shell date -r $(objtree)/.config),$(shell date -r $(obj)/.config.timestamp 2> /dev/null))
+COMPAT_FORCE := FORCE
+endif
+
+ifneq ($(KBUILD_STAGE),modpost)
+$(obj)/compat.h: $(wildcard $(src)/compat/tests/*.c) $(COMPAT_FORCE)
+	$(call filechk,compat.h)
+	$(Q)touch $@
+	$(Q)touch -r $(objtree)/.config $(obj)/.config.timestamp
+endif
+
+filechk_compat.h = \
+	for cfg in $(sort $(filter-out FORCE,$^)); do \
+	    var=`echo $$cfg | \
+		 sed -e "s,.*/,COMPAT_," -e "s,\.c,," | \
+		 tr -- -a-z _A-Z | \
+		 tr -dc A-Z0-9_`; \
+	    if $(CC) $(c_flags) $(COMPAT_CFLAGS) -c -o $(obj)/dummy.o $$cfg \
+		    > /dev/null $(if $(quiet),2>&1); then \
+		echo "\#define $$var"; \
+		rm -f $(obj)/dummy.{o,gcda,gcno}; \
+	    else \
+		echo "/* \#undef $$var */"; \
+	    fi; \
+	done
+
+
+
diff -Nru drbd8-8.3.7/drbd/Kconfig drbd8-8.4.1+git55a81dc~cmd1/drbd/Kconfig
--- drbd8-8.3.7/drbd/Kconfig	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/Kconfig	2012-02-02 14:09:14.000000000 +0000
@@ -36,13 +36,3 @@
 	  See also: http://www.drbd.org/, http://www.linux-ha.org
 
 	  If unsure, say N.
-
-config DRBD_TRACE
-	tristate "DRBD tracing"
-	depends on BLK_DEV_DRBD
-	select TRACEPOINTS
-	help
-
-	  Say Y here if you want to be able to trace various events in DRBD.
-
-	  If unsure, say N.
diff -Nru drbd8-8.3.7/drbd/Makefile drbd8-8.4.1+git55a81dc~cmd1/drbd/Makefile
--- drbd8-8.3.7/drbd/Makefile	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/Makefile	2012-02-02 14:09:14.000000000 +0000
@@ -36,31 +36,19 @@
 # Lets test on PATCHLEVEL, that won't change too soon...
 
 ifneq ($(PATCHLEVEL),)
+ ifneq ($(VERSION),3)
   ifneq ($(VERSION),2)
     $(error "won't compile with this kernel version")
   endif
   ifneq ($(PATCHLEVEL),6)
     $(error "won't compile with this kernel version")
   endif
+ endif
 
   CONFIG_BLK_DEV_DRBD := m
-  CONFIG_DRBD_TRACE := $(shell test $${SUBLEVEL} -ge 30 && echo m || echo n)
 
-  include $(DRBDSRC)/Makefile-2.6
+  include $(src)/Kbuild
 
-  override EXTRA_CFLAGS += -I$(DRBDSRC)
-  # remember KERNELRELEASE for install target
-  # .kernelversion can be included in Makefile as well as
-  # sourced from shell
-  $(shell echo -e "VERSION=$(VERSION)\n"             \
-                  "PATCHLEVEL=$(PATCHLEVEL)\n"       \
-                  "SUBLEVEL=$(SUBLEVEL)\n"           \
-                  "EXTRAVERSION=$(EXTRAVERSION)\n"   \
-                  "LOCALVERSION=$(LOCALVERSION)\n"   \
-                  "KERNELRELEASE=$(KERNELRELEASE)\n" \
-                  "KERNELVERSION=$(KERNELVERSION)"   \
-          > $(DRBDSRC)/.drbd_kernelrelease.new       \
-   )
 else
   # called from command line in current directory
 
@@ -69,7 +57,6 @@
   SHELL=/bin/bash
 
   DRBDSRC := $(shell pwd)
-  export DRBDSRC
 
   # to be overridden on command line:
   PREFIX := /
@@ -90,11 +77,6 @@
     endif
   endif
 
-  KDIR_Makefile_PATCHLEVEL = $(shell test -e $(KDIR)/Makefile && grep "^PATCHLEVEL = " $(KDIR)/Makefile | cut -d " " -f 3)
-  ifneq ($(findstring $(KDIR_Makefile_PATCHLEVEL),12345),)
-    $(error "won't compile with this kernel version")
-  endif
-
   .PHONY: drbd.o default all greeting clean kbuild install dep tags
 
   drbd.o: greeting kbuild
@@ -114,30 +96,31 @@
 .PHONY: drbd_buildtag.c
 
   drbd_buildtag.c:
-	@is_tarball=`test -e ../.git/. && echo false || echo true`;\
-	set -e; exec > $@.new;							\
+	@set -e; exec > $@.new;							\
 	echo -e "/* automatically generated. DO NOT EDIT. */";			\
 	echo -e "#include <linux/drbd.h>";					\
 	echo -e "const char *drbd_buildtag(void)\n{";				\
-	if $$is_tarball; then							\
-	  if ! test -e $@ ; then 						\
-	  	echo >&2 "your DRBD source tree is broken. unpack again.";	\
+	if test -e ../.git && GITHEAD=$$(git rev-parse HEAD); then		\
+		GITDIFF=$$(cd .. && git diff --name-only HEAD |			\
+			tr -s '\t\n' '  ' |					\
+			sed -e 's/^/ /;s/ *$$//');				\
+		echo -e "\treturn \"GIT-hash: $$GITHEAD$$GITDIFF\"";		\
+	elif ! test -e $@ ; then						\
+		echo >&2 "$@ not found.";					\
+		test -e ../.git &&						\
+		>&2 printf "%s\n"						\
+			"git did not work, but this looks like a git checkout?"	\
+			"Install git and try again." ||				\
+		echo >&2 "Your DRBD source tree is broken. Unpack again.";	\
 		exit 1;								\
-	  fi;									\
-	  grep return $@ ;							\
 	else									\
-	  GITHEAD=$$(git rev-parse HEAD);					\
-	  GITDIFF=$$(cd .. && git diff --name-only HEAD | tr -s '\t\n' '  ' |	\
-	  	sed -e 's/^/ /;s/ *$$//');					\
-	  echo -e "\treturn \"GIT-hash: $$GITHEAD$$GITDIFF\"";	 		\
+		grep return $@ ;						\
 	fi ;									\
 	echo -e "\t\t\" build by $$USER@$$HOSTNAME, `date "+%F %T"`\";\n}";	\
 	mv --force $@.new $@
 
   kbuild: drbd_buildtag.c
 	@rm -f .drbd_kernelrelease*
-	-test -f ../scripts/adjust_drbd_config_h.sh && \
-	 KDIR=$(KDIR) O=$(O) $(SHELL) ../scripts/adjust_drbd_config_h.sh
     # previous to 2.6.6 (suse: 2.6.5-dunno), this should be:
 	$(MAKE) -C $(KDIR)  $(if $(O),O=$(O),) SUBDIRS=$(DRBDSRC) $(ARCH_UM) modules
 # $(MAKE) -C $(KDIR) M=$(DRBDSRC) $(ARCH_UM) modules
@@ -159,8 +142,9 @@
 	@echo "done."
 
   clean:
-	rm -rf .tmp_versions
+	rm -rf .tmp_versions Module.markers Module.symvers modules.order
 	rm -f *.[oas] *.ko .*.cmd .*.d .*.tmp *.mod.c .*.flags .depend .kernel*
+	rm -f compat/*.[oas] compat/.*.cmd
 
   distclean: clean
 	@if git show HEAD:drbd/linux/drbd_config.h > linux/drbd_config.h.tmp \
@@ -179,7 +163,7 @@
     # for VERSION, PATCHLEVEL, SUBLEVEL, EXTRAVERSION, KERNELRELEASE
     include .drbd_kernelrelease
     MODOBJ := drbd.ko
-    MODSUBDIR := kernel/drivers/block
+    MODSUBDIR := updates
     LINUX := $(wildcard /lib/modules/$(KERNELRELEASE)/build)
 
     install:
diff -Nru drbd8-8.3.7/drbd/Makefile-2.6 drbd8-8.4.1+git55a81dc~cmd1/drbd/Makefile-2.6
--- drbd8-8.3.7/drbd/Makefile-2.6	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/Makefile-2.6	1970-01-01 00:00:00.000000000 +0000
@@ -1,12 +0,0 @@
-drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
-drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
-drbd-y += lru_cache.o drbd_main.o drbd_strings.o drbd_nl.o
-
-ifndef CONFIG_CONNECTOR
-	drbd-y += connector.o cn_queue.o
-endif
-
-drbd_trace-y := drbd_tracing.o
-
-obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
-obj-$(CONFIG_DRBD_TRACE)       += drbd_trace.o
diff -Nru drbd8-8.3.7/drbd/cn_queue.c drbd8-8.4.1+git55a81dc~cmd1/drbd/cn_queue.c
--- drbd8-8.3.7/drbd/cn_queue.c	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/cn_queue.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,212 +0,0 @@
-/*
- * 	cn_queue.c
- * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- *
- * Modified by Philipp Reiser to work on older 2.6.x kernels.
- * 
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/skbuff.h>
-#include <linux/suspend.h>
-#include <linux/connector.h>
-#include <linux/delay.h>
-
-#include <linux/drbd_config.h> /* In case kzalloc() is missing. */
-
-#ifdef NEED_BACKPORT_OF_KZALLOC
-static inline void *kzalloc(size_t size, int flags)
-{
-	void *rv = kmalloc(size, flags);
-	if (rv)
-		memset(rv, 0, size);
-
-	return rv;
-}
-#endif
-
-
-#ifndef KERNEL_HAS_MSLEEP
-/**
- * msleep - sleep safely even with waitqueue interruptions
- * @msecs: Time in milliseconds to sleep for
- */
-static inline void msleep(unsigned int msecs)
-{
-	unsigned long timeout = (msecs * HZ + 999) / 1000;
-
-	while (timeout) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		timeout = schedule_timeout(timeout);
-	}
-}
-
-#endif
-
-void cn_queue_wrapper(void *data)
-{
-	struct cn_callback_data *d = data;
-
-	d->callback(d->callback_priv);
-
-	d->destruct_data(d->ddata);
-	d->ddata = NULL;
-
-	kfree(d->free);
-}
-
-static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struct cb_id *id, void (*callback)(void *))
-{
-	struct cn_callback_entry *cbq;
-
-	cbq = kzalloc(sizeof(*cbq), GFP_KERNEL);
-	if (!cbq) {
-		printk(KERN_ERR "Failed to create new callback queue.\n");
-		return NULL;
-	}
-
-	snprintf(cbq->id.name, sizeof(cbq->id.name), "%s", name);
-	memcpy(&cbq->id.id, id, sizeof(struct cb_id));
-	cbq->data.callback = callback;
-	
-	INIT_WORK(&cbq->work, &cn_queue_wrapper, &cbq->data);
-	return cbq;
-}
-
-static void cn_queue_free_callback(struct cn_callback_entry *cbq)
-{
-	cancel_delayed_work(&cbq->work);
-	flush_workqueue(cbq->pdev->cn_queue);
-
-	kfree(cbq);
-}
-
-int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
-{
-	return ((i1->idx == i2->idx) && (i1->val == i2->val));
-}
-
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *))
-{
-	struct cn_callback_entry *cbq, *__cbq;
-	int found = 0;
-
-	cbq = cn_queue_alloc_callback_entry(name, id, callback);
-	if (!cbq)
-		return -ENOMEM;
-
-	atomic_inc(&dev->refcnt);
-	cbq->pdev = dev;
-
-	spin_lock_bh(&dev->queue_lock);
-	list_for_each_entry(__cbq, &dev->queue_list, callback_entry) {
-		if (cn_cb_equal(&__cbq->id.id, id)) {
-			found = 1;
-			break;
-		}
-	}
-	if (!found)
-		list_add_tail(&cbq->callback_entry, &dev->queue_list);
-	spin_unlock_bh(&dev->queue_lock);
-
-	if (found) {
-		atomic_dec(&dev->refcnt);
-		cn_queue_free_callback(cbq);
-		return -EINVAL;
-	}
-
-	cbq->nls = dev->nls;
-	cbq->seq = 0;
-	cbq->group = cbq->id.id.idx;
-
-	return 0;
-}
-
-void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id)
-{
-	struct cn_callback_entry *cbq, *n;
-	int found = 0;
-
-	spin_lock_bh(&dev->queue_lock);
-	list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry) {
-		if (cn_cb_equal(&cbq->id.id, id)) {
-			list_del(&cbq->callback_entry);
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock_bh(&dev->queue_lock);
-
-	if (found) {
-		cn_queue_free_callback(cbq);
-		atomic_dec_and_test(&dev->refcnt);
-	}
-}
-
-struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *nls)
-{
-	struct cn_queue_dev *dev;
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev)
-		return NULL;
-
-	snprintf(dev->name, sizeof(dev->name), "%s", name);
-	atomic_set(&dev->refcnt, 0);
-	INIT_LIST_HEAD(&dev->queue_list);
-	spin_lock_init(&dev->queue_lock);
-
-	dev->nls = nls;
-	dev->netlink_groups = 0;
-
-	dev->cn_queue = create_workqueue(dev->name);
-	if (!dev->cn_queue) {
-		kfree(dev);
-		return NULL;
-	}
-
-	return dev;
-}
-
-void cn_queue_free_dev(struct cn_queue_dev *dev)
-{
-	struct cn_callback_entry *cbq, *n;
-
-	flush_workqueue(dev->cn_queue);
-	destroy_workqueue(dev->cn_queue);
-
-	spin_lock_bh(&dev->queue_lock);
-	list_for_each_entry_safe(cbq, n, &dev->queue_list, callback_entry)
-		list_del(&cbq->callback_entry);
-	spin_unlock_bh(&dev->queue_lock);
-
-	while (atomic_read(&dev->refcnt)) {
-		printk(KERN_INFO "Waiting for %s to become free: refcnt=%d.\n",
-		       dev->name, atomic_read(&dev->refcnt));
-		msleep(1000);
-	}
-
-	kfree(dev);
-	dev = NULL;
-}
diff -Nru drbd8-8.3.7/drbd/compat/asm-generic/bitops/le.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/asm-generic/bitops/le.h
--- drbd8-8.3.7/drbd/compat/asm-generic/bitops/le.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/asm-generic/bitops/le.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,80 @@
+#ifndef _ASM_GENERIC_BITOPS_LE_H_
+#define _ASM_GENERIC_BITOPS_LE_H_
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#if defined(__LITTLE_ENDIAN)
+
+#define BITOP_LE_SWIZZLE	0
+
+static inline unsigned long find_next_zero_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_zero_bit(addr, size, offset);
+}
+
+static inline unsigned long find_next_bit_le(const void *addr,
+		unsigned long size, unsigned long offset)
+{
+	return find_next_bit(addr, size, offset);
+}
+
+static inline unsigned long find_first_zero_bit_le(const void *addr,
+		unsigned long size)
+{
+	return find_first_zero_bit(addr, size);
+}
+
+#elif defined(__BIG_ENDIAN)
+
+#define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
+
+extern unsigned long find_next_zero_bit_le(const void *addr,
+		unsigned long size, unsigned long offset);
+extern unsigned long find_next_bit_le(const void *addr,
+		unsigned long size, unsigned long offset);
+
+#define find_first_zero_bit_le(addr, size) \
+	find_next_zero_bit_le((addr), (size), 0)
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+static inline int test_bit_le(int nr, const void *addr)
+{
+	return test_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline void __set_bit_le(int nr, void *addr)
+{
+	__set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline void __clear_bit_le(int nr, void *addr)
+{
+	__clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline int test_and_set_bit_le(int nr, void *addr)
+{
+	return test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline int test_and_clear_bit_le(int nr, void *addr)
+{
+	return test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline int __test_and_set_bit_le(int nr, void *addr)
+{
+	return __test_and_set_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+static inline int __test_and_clear_bit_le(int nr, void *addr)
+{
+	return __test_and_clear_bit(nr ^ BITOP_LE_SWIZZLE, addr);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_LE_H_ */
diff -Nru drbd8-8.3.7/drbd/compat/bitops.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/bitops.h
--- drbd8-8.3.7/drbd/compat/bitops.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/bitops.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,81 @@
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+/* did not yet include generic_find_next_le_bit() {{{ */
+
+#if defined(__LITTLE_ENDIAN)
+
+#define generic_find_next_le_bit(addr, size, offset) \
+		find_next_bit(addr, size, offset)
+
+#elif defined(__BIG_ENDIAN)
+/* from 2.6.33 lib/find_bit.c */
+
+/* include/linux/byteorder does not support "unsigned long" type */
+static inline unsigned long ext2_swabp(const unsigned long * x)
+{
+#if BITS_PER_LONG == 64
+	return (unsigned long) __swab64p((u64 *) x);
+#elif BITS_PER_LONG == 32
+	return (unsigned long) __swab32p((u32 *) x);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+/* include/linux/byteorder doesn't support "unsigned long" type */
+static inline unsigned long ext2_swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+	return (unsigned long) __swab64((u64) y);
+#elif BITS_PER_LONG == 32
+	return (unsigned long) __swab32((u32) y);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
+		long size, unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG - 1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset &= (BITS_PER_LONG - 1UL);
+	if (offset) {
+		tmp = ext2_swabp(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+
+	while (size & ~(BITS_PER_LONG - 1)) {
+		tmp = *(p++);
+		if (tmp)
+			goto found_middle_swap;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = ext2_swabp(p);
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size; /* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+
+found_middle_swap:
+	return result + __ffs(ext2_swab(tmp));
+}
+#else
+#error "unknown byte order"
+#endif
+#endif  /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) */
diff -Nru drbd8-8.3.7/drbd/compat/idr.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/idr.c
--- drbd8-8.3.7/drbd/compat/idr.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/idr.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,48 @@
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/rcupdate.h>
+
+/* The idr_get_next() function exists since 2009-04-02 Linux-2.6.29 (commit 38460b48)
+   but is exported for use in modules since 2010-01-29 Linux-2.6.35 (commit 4d1ee80f)  */
+#ifndef IDR_GET_NEXT_EXPORTED
+#ifndef rcu_dereference_raw
+/* see c26d34a rcu: Add lockdep-enabled variants of rcu_dereference() */
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#endif
+void *idr_get_next(struct idr *idp, int *nextidp)
+{
+	struct idr_layer *p, *pa[MAX_LEVEL];
+	struct idr_layer **paa = &pa[0];
+	int id = *nextidp;
+	int n, max;
+
+	/* find first ent */
+	n = idp->layers * IDR_BITS;
+	max = 1 << n;
+	p = rcu_dereference_raw(idp->top);
+	if (!p)
+		return NULL;
+
+	while (id < max) {
+		while (n > 0 && p) {
+			n -= IDR_BITS;
+			*paa++ = p;
+			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+		}
+
+		if (p) {
+			*nextidp = id;
+			return p;
+		}
+
+		id += 1 << n;
+		while (n < fls(id)) {
+			n += IDR_BITS;
+			p = *--paa;
+		}
+	}
+	return NULL;
+}
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/linux/autoconf.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/autoconf.h
--- drbd8-8.3.7/drbd/compat/linux/autoconf.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/autoconf.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+/* empty file, for compat reasons */
diff -Nru drbd8-8.3.7/drbd/compat/linux/dynamic_debug.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/dynamic_debug.h
--- drbd8-8.3.7/drbd/compat/linux/dynamic_debug.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/dynamic_debug.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#ifndef _DYNAMIC_DEBUG_H
+#define _DYNAMIC_DEBUG_H
+
+#ifndef dynamic_dev_dbg
+#define dynamic_dev_dbg(dev, fmt, ...)
+#endif
+
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/linux/hardirq.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/hardirq.h
--- drbd8-8.3.7/drbd/compat/linux/hardirq.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/hardirq.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+/* Just an empty file. */
diff -Nru drbd8-8.3.7/drbd/compat/linux/memcontrol.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/memcontrol.h
--- drbd8-8.3.7/drbd/compat/linux/memcontrol.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/memcontrol.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,3 @@
+/* just an empty file
+ * memcontrol.h did not exist prior to 2.6.25.
+ * but it needs more recent kernels for mm_inline.h to work. */
diff -Nru drbd8-8.3.7/drbd/compat/linux/mutex.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/mutex.h
--- drbd8-8.3.7/drbd/compat/linux/mutex.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/mutex.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,41 @@
+/* "Backport" of the mutex to older Linux-2.6.x kernels.
+ */
+#ifndef __LINUX_MUTEX_H
+#define __LINUX_MUTEX_H
+
+#include <asm/semaphore.h>
+
+struct mutex {
+	struct semaphore sem;
+};
+
+static inline void mutex_init(struct mutex *m)
+{
+	sema_init(&m->sem, 1);
+}
+
+static inline void mutex_lock(struct mutex *m)
+{
+	down(&m->sem);
+}
+
+static inline int mutex_lock_interruptible(struct mutex *m)
+{
+	return down_interruptible(&m->sem);
+}
+
+static inline void mutex_unlock(struct mutex *m)
+{
+	up(&m->sem);
+}
+
+static inline int mutex_is_locked(struct mutex *lock)
+{
+        return atomic_read(&lock->sem.count) != 1;
+}
+
+static inline int mutex_trylock(struct mutex *lock)
+{
+	return !down_trylock(&lock->sem);
+}
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/linux/tracepoint.h drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/tracepoint.h
--- drbd8-8.3.7/drbd/compat/linux/tracepoint.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/linux/tracepoint.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1 @@
+struct tracepoint;
diff -Nru drbd8-8.3.7/drbd/compat/tests/bio_split_has_bio_split_pool_parameter.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/bio_split_has_bio_split_pool_parameter.c
--- drbd8-8.3.7/drbd/compat/tests/bio_split_has_bio_split_pool_parameter.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/bio_split_has_bio_split_pool_parameter.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,12 @@
+#include <linux/bio.h>
+
+/*
+ * bio_split() had a memory pool parameter until commit 6feef53 (2.6.28-rc1).
+ */
+void test(void)
+{
+	struct bio *bio = NULL;
+	struct bio_pair *bio_pair;
+
+	bio_pair = bio_split(bio, bio_split_pool, 0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/bioset_create_has_three_parameters.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/bioset_create_has_three_parameters.c
--- drbd8-8.3.7/drbd/compat/tests/bioset_create_has_three_parameters.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/bioset_create_has_three_parameters.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,10 @@
+#include <linux/bio.h>
+
+/*
+ * Note that up until 2.6.21 inclusive, it was
+ * struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
+ */
+void dummy(void)
+{
+	bioset_create(16, 16, 4);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_IS_ERR_OR_NULL.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_IS_ERR_OR_NULL.c
--- drbd8-8.3.7/drbd/compat/tests/have_IS_ERR_OR_NULL.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_IS_ERR_OR_NULL.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#include <linux/err.h>
+
+int foo()
+{
+	void *x = 0;
+
+	return IS_ERR_OR_NULL(x);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_bioset_create_front_pad.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_bioset_create_front_pad.c
--- drbd8-8.3.7/drbd/compat/tests/have_bioset_create_front_pad.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_bioset_create_front_pad.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,30 @@
+#include <linux/bio.h>
+
+/*
+ * upstream commit (included in 2.6.29)
+ * commit bb799ca0202a360fa74d5f17039b9100caebdde7
+ * Author: Jens Axboe <jens.axboe@oracle.com>
+ * Date:   Wed Dec 10 15:35:05 2008 +0100
+ *
+ *     bio: allow individual slabs in the bio_set
+ *
+ * does
+ * -struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
+ * +struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
+ *
+ * Note that up until 2.6.21 inclusive, it was
+ * struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size, int scale)
+ * so if we want to support old kernels (RHEL5), we will need an additional compat check.
+ *
+ * This also means that we must not use the front_pad trick as long as we want
+ * to keep compatibility with < 2.6.29.
+ */
+extern struct bio_set *compat_check_bioset_create(unsigned int, unsigned int);
+
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+void dummy(void)
+{
+	BUILD_BUG_ON(!__same_type(&compat_check_bioset_create, &bioset_create));
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_blk_queue_max_hw_sectors.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blk_queue_max_hw_sectors.c
--- drbd8-8.3.7/drbd/compat/tests/have_blk_queue_max_hw_sectors.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blk_queue_max_hw_sectors.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,5 @@
+#include <linux/blkdev.h>
+
+#ifndef blk_queue_max_hw_sectors
+void *p = blk_queue_max_hw_sectors;
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_blk_queue_max_segments.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blk_queue_max_segments.c
--- drbd8-8.3.7/drbd/compat/tests/have_blk_queue_max_segments.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blk_queue_max_segments.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,5 @@
+#include <linux/blkdev.h>
+
+#ifndef blk_queue_max_segments
+void *p = blk_queue_max_segments;
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_blkdev_get_by_path.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blkdev_get_by_path.c
--- drbd8-8.3.7/drbd/compat/tests/have_blkdev_get_by_path.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_blkdev_get_by_path.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+#include <linux/fs.h>
+
+/*
+ * In kernel version 2.6.38-rc1, open_bdev_exclusive() was replaced by
+ * blkdev_get_by_path(); see commits e525fd89 and d4d77629.
+ */
+void foo(void) {
+	struct block_device *blkdev;
+
+	blkdev = blkdev_get_by_path("", (fmode_t) 0, (void *) 0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_bool_type.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_bool_type.c
--- drbd8-8.3.7/drbd/compat/tests/have_bool_type.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_bool_type.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,3 @@
+#include <linux/types.h>
+
+bool x;
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_clear_bit_unlock.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_clear_bit_unlock.c
--- drbd8-8.3.7/drbd/compat/tests/have_clear_bit_unlock.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_clear_bit_unlock.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,18 @@
+#include <asm/system.h>
+/* Including asm/system.h is necessary for s390.
+
+   They define smp_mb__before_clear_bit() in asm/system.h
+   From asm/bitops.h they include asm-generic/bitops/lock.h
+   The macro defining clear_bit_unlock() in
+   asm-generic/bitops/lock.h needs smp_mb__before_clear_bit().
+
+   They fail to include asm/system.h from asm/bitops.h
+*/
+#include <linux/bitops.h>
+
+void foo()
+{
+	unsigned long bar;
+
+	clear_bit_unlock(0, &bar);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_cn_netlink_skb_parms.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_cn_netlink_skb_parms.c
--- drbd8-8.3.7/drbd/compat/tests/have_cn_netlink_skb_parms.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_cn_netlink_skb_parms.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,13 @@
+#include <linux/kernel.h>
+#include <linux/connector.h>
+
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
+void dummy(void)
+{
+	void (*cb) (struct cn_msg *, struct netlink_skb_parms *) = NULL;
+	struct cn_callback_data ccb;
+	BUILD_BUG_ON(!(__same_type(ccb.callback, cb)));
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_ctrl_attr_mcast_groups.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_ctrl_attr_mcast_groups.c
--- drbd8-8.3.7/drbd/compat/tests/have_ctrl_attr_mcast_groups.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_ctrl_attr_mcast_groups.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,6 @@
+#include <linux/genetlink.h>
+
+void f(void)
+{
+	int i = CTRL_ATTR_MCAST_GROUPS;
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_dst_groups.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_dst_groups.c
--- drbd8-8.3.7/drbd/compat/tests/have_dst_groups.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_dst_groups.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+
+void dummy(void)
+{
+	static struct netlink_skb_parms p;
+	p.dst_groups = 0;
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_find_next_zero_bit_le.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_find_next_zero_bit_le.c
--- drbd8-8.3.7/drbd/compat/tests/have_find_next_zero_bit_le.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_find_next_zero_bit_le.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,13 @@
+#include <linux/bitops.h>
+#include <asm-generic/bitops/le.h>
+
+unsigned long func(void)
+{
+	void *addr;
+	unsigned long size, offset;
+
+	addr = NULL;
+	size = 0;
+	offset = 0;
+	return find_next_zero_bit_le(addr, size, offset);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_fmode_t.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_fmode_t.c
--- drbd8-8.3.7/drbd/compat/tests/have_fmode_t.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_fmode_t.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,6 @@
+#include <linux/types.h>
+
+void foo()
+{
+	fmode_t mode;
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_genlmsg_msg_size.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_msg_size.c
--- drbd8-8.3.7/drbd/compat/tests/have_genlmsg_msg_size.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_msg_size.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,9 @@
+#include <net/genetlink.h>
+
+void f(void)
+{
+	int dummy;
+
+	dummy = genlmsg_msg_size(0);
+	dummy = genlmsg_total_size(0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_genlmsg_new.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_new.c
--- drbd8-8.3.7/drbd/compat/tests/have_genlmsg_new.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_new.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#include <net/genetlink.h>
+
+void f(void)
+{
+	struct sk_buff *skb;
+
+	skb = genlmsg_new(123, GFP_KERNEL);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_genlmsg_put_reply.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_put_reply.c
--- drbd8-8.3.7/drbd/compat/tests/have_genlmsg_put_reply.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_put_reply.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+#include <net/genetlink.h>
+
+void f(void)
+{
+	struct sk_buff *skb = NULL;
+	struct genl_info *info = NULL;
+	struct genl_family *family = NULL;
+	void *ret;
+
+	ret = genlmsg_put_reply(skb, info, family, 0, 0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_genlmsg_reply.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_reply.c
--- drbd8-8.3.7/drbd/compat/tests/have_genlmsg_reply.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_genlmsg_reply.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#include <net/genetlink.h>
+
+void f(void)
+{
+	struct sk_buff *skb = NULL;
+	struct genl_info *info = NULL;
+	int ret = genlmsg_reply(skb, info);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_linux_byteorder_swabb_h.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_linux_byteorder_swabb_h.c
--- drbd8-8.3.7/drbd/compat/tests/have_linux_byteorder_swabb_h.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_linux_byteorder_swabb_h.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,2 @@
+#include <asm/byteorder.h>
+#include <linux/byteorder/swabb.h>
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_nlmsg_hdr.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_nlmsg_hdr.c
--- drbd8-8.3.7/drbd/compat/tests/have_nlmsg_hdr.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_nlmsg_hdr.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+
+void f(void)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *hdr = nlmsg_hdr(skb);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_nr_cpu_ids.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_nr_cpu_ids.c
--- drbd8-8.3.7/drbd/compat/tests/have_nr_cpu_ids.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_nr_cpu_ids.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,6 @@
+#include <linux/cpumask.h>
+
+void foo()
+{
+	int x = nr_cpu_ids;
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_open_bdev_exclusive.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_open_bdev_exclusive.c
--- drbd8-8.3.7/drbd/compat/tests/have_open_bdev_exclusive.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_open_bdev_exclusive.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,13 @@
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+
+/*
+ * In kernel version v2.6.28-rc1, open_bdev_excl() was replaced by
+ * open_bdev_exclusive(); see commit 30c40d2.
+ */
+void foo(void)
+{
+	struct block_device *blkdev;
+
+	blkdev = open_bdev_exclusive("", (fmode_t) 0, (void *) 0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_proc_create_data.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_proc_create_data.c
--- drbd8-8.3.7/drbd/compat/tests/have_proc_create_data.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_proc_create_data.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,5 @@
+#include <linux/proc_fs.h>
+
+#ifndef proc_create_data
+void *p = proc_create_data;
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_rb_augment_functions.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_rb_augment_functions.c
--- drbd8-8.3.7/drbd/compat/tests/have_rb_augment_functions.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_rb_augment_functions.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,14 @@
+#include <linux/rbtree.h>
+
+/* introduced with commit b945d6b2, Linux 2.6.35-rc5 */
+
+void foo(void) {
+	struct rb_node *n;
+
+	rb_augment_insert((struct rb_node *) NULL,
+			  (rb_augment_f) NULL,
+			  NULL);
+
+	n = rb_augment_erase_begin((struct rb_node *)NULL);
+	rb_augment_erase_end((struct rb_node *) NULL, (rb_augment_f) NULL, NULL);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_sock_shutdown.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_sock_shutdown.c
--- drbd8-8.3.7/drbd/compat/tests/have_sock_shutdown.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_sock_shutdown.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,5 @@
+#include <linux/net.h>
+
+#ifndef kernel_sock_shutdown
+void *p = kernel_sock_shutdown;
+#endif
diff -Nru drbd8-8.3.7/drbd/compat/tests/have_void_make_request.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_void_make_request.c
--- drbd8-8.3.7/drbd/compat/tests/have_void_make_request.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/have_void_make_request.c	2012-09-03 21:31:23.000000000 +0000
@@ -0,0 +1,19 @@
+#include <linux/blkdev.h>
+
+/* hm. sometimes this pragma is ignored :(
+ * use BUILD_BUG_ON instead.
+#pragma GCC diagnostic warning "-Werror"
+ */
+
+/* in Commit 5a7bbad27a410350e64a2d7f5ec18fc73836c14f (between Linux-3.1 and 3.2)
+   make_request() becomes type void. Before it had type int.
+ */
+
+void drbd_make_request(struct request_queue *q, struct bio *bio)
+{
+}
+
+void foo(void)
+{
+	BUILD_BUG_ON(!(__same_type(&drbd_make_request, make_request_fn)));
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/need_genlmsg_multicast_wrapper.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/need_genlmsg_multicast_wrapper.c
--- drbd8-8.3.7/drbd/compat/tests/need_genlmsg_multicast_wrapper.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/need_genlmsg_multicast_wrapper.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,9 @@
+#include <net/genetlink.h>
+
+void f(void)
+{
+	struct sk_buff *skb = NULL;
+	int ret;
+
+	ret = genlmsg_multicast(skb, 0, 0);
+}
diff -Nru drbd8-8.3.7/drbd/compat/tests/use_blk_queue_max_sectors_anyways.c drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/use_blk_queue_max_sectors_anyways.c
--- drbd8-8.3.7/drbd/compat/tests/use_blk_queue_max_sectors_anyways.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/compat/tests/use_blk_queue_max_sectors_anyways.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,15 @@
+#include <linux/blkdev.h>
+
+#ifndef blk_queue_max_hw_sectors
+void *p = blk_queue_max_hw_sectors;
+#endif
+
+/* For kernel versions 2.6.31 to 2.6.33 inclusive, even though
+ * blk_queue_max_hw_sectors is present, we actually need to use
+ * blk_queue_max_sectors to set max_hw_sectors. :-(
+ * RHEL6 2.6.32 chose to be different and already has eliminated
+ * blk_queue_max_sectors as upstream 2.6.34 did.
+ */
+#ifndef blk_queue_max_sectors
+void *q = blk_queue_max_sectors;
+#endif
diff -Nru drbd8-8.3.7/drbd/connector.c drbd8-8.4.1+git55a81dc~cmd1/drbd/connector.c
--- drbd8-8.3.7/drbd/connector.c	2009-10-06 11:32:41.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/connector.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,513 +0,0 @@
-/*
- * 	connector.c
- * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- * 
- * Modified by Philipp Reiser to work on older 2.6.x kernels.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/moduleparam.h>
-#include <linux/connector.h>
-#include <linux/capability.h>
-
-#ifndef DRBD_CONNECTOR_BACKPORT_HEADER
-#error "drbd backported connector.c compiled against kernel connector.h will not work"
-#error "enable CONFIG_CONNECTOR in your kernel and try again"
-#endif
-
-#include <net/sock.h>
-
-#ifdef DRBD_NL_DST_GROUPS
-   /* pre 2.6.16 */
-#  define NETLINK_GROUP(skb) NETLINK_CB(skb).dst_groups
-#else
-#  define NETLINK_GROUP(skb) NETLINK_CB(skb).dst_group
-#endif
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
-MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
-
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DECLARE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
-static struct cn_dev cdev;
-
-int cn_already_initialized = 0;
-
-/*
- * msg->seq and msg->ack are used to determine message genealogy.
- * When someone sends message it puts there locally unique sequence
- * and random acknowledge numbers.  Sequence number may be copied into
- * nlmsghdr->nlmsg_seq too.
- *
- * Sequence number is incremented with each message to be sent.
- *
- * If we expect reply to our message then the sequence number in
- * received message MUST be the same as in original message, and
- * acknowledge number MUST be the same + 1.
- *
- * If we receive a message and its sequence number is not equal to the
- * one we are expecting then it is a new message.
- *
- * If we receive a message and its sequence number is the same as one
- * we are expecting but it's acknowledgement number is not equal to
- * the acknowledgement number in the original message + 1, then it is
- * a new message.
- *
- */
-int cn_netlink_send(struct cn_msg *msg, u32 __group, gfp_t gfp_mask)
-{
-	struct cn_callback_entry *__cbq;
-	unsigned int size;
-	struct sk_buff *skb;
-	struct nlmsghdr *nlh;
-	struct cn_msg *data;
-	struct cn_dev *dev = &cdev;
-	u32 group = 0;
-	int found = 0;
-
-	if (!__group) {
-		spin_lock_bh(&dev->cbdev->queue_lock);
-		list_for_each_entry(__cbq, &dev->cbdev->queue_list,
-				    callback_entry) {
-			if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
-				found = 1;
-				group = __cbq->group;
-			}
-		}
-		spin_unlock_bh(&dev->cbdev->queue_lock);
-
-		if (!found)
-			return -ENODEV;
-	} else {
-		group = __group;
-	}
-
-	size = NLMSG_SPACE(sizeof(*msg) + msg->len);
-
-	skb = alloc_skb(size, gfp_mask);
-	if (!skb)
-		return -ENOMEM;
-
-	nlh = NLMSG_PUT(skb, 0, msg->seq, NLMSG_DONE, size - sizeof(*nlh));
-
-	data = NLMSG_DATA(nlh);
-
-	memcpy(data, msg, sizeof(*data) + msg->len);
-
-	NETLINK_GROUP(skb) = group;
-
-	netlink_broadcast(dev->nls, skb, 0, group, gfp_mask);
-
-	return 0;
-
-nlmsg_failure:
-	kfree_skb(skb);
-	return -EINVAL;
-}
-
-/*
- * Callback helper - queues work and setup destructor for given data.
- */
-static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), void *data)
-{
-	struct cn_callback_entry *__cbq;
-	struct cn_dev *dev = &cdev;
-	int err = -ENODEV;
-
-	spin_lock_bh(&dev->cbdev->queue_lock);
-	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
-		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
-			if (likely(!test_bit(0, &__cbq->work.pending) &&
-					__cbq->data.ddata == NULL)) {
-				__cbq->data.callback_priv = msg;
-
-				__cbq->data.ddata = data;
-				__cbq->data.destruct_data = destruct_data;
-
-				if (queue_work(dev->cbdev->cn_queue,
-						&__cbq->work))
-					err = 0;
-			} else {
-				struct work_struct *w;
-				struct cn_callback_data *d;
-				
-				w = kmalloc(sizeof(*w) + sizeof(*d), GFP_ATOMIC);
-				if (w) {
-					memset(w,0,sizeof(*w) + sizeof(*d));
-					d = (struct cn_callback_data *)(w+1);
-
-					d->callback_priv = msg;
-					d->callback = __cbq->data.callback;
-					d->ddata = data;
-					d->destruct_data = destruct_data;
-					d->free = w;
-
-					INIT_LIST_HEAD(&w->entry);
-					w->pending = 0;
-					w->func = &cn_queue_wrapper;
-					w->data = d;
-					init_timer(&w->timer);
-					
-					if (queue_work(dev->cbdev->cn_queue, w))
-						err = 0;
-					else {
-						kfree(w);
-						err = -EINVAL;
-					}
-				} else
-					err = -ENOMEM;
-			}
-			break;
-		}
-	}
-	spin_unlock_bh(&dev->cbdev->queue_lock);
-
-	return err;
-}
-
-/*
- * Skb receive helper - checks skb and msg size and calls callback
- * helper.
- */
-static int __cn_rx_skb(struct sk_buff *skb, struct nlmsghdr *nlh)
-{
-	u32 pid, uid, seq, group;
-	struct cn_msg *msg;
-
-	pid = NETLINK_CREDS(skb)->pid;
-	uid = NETLINK_CREDS(skb)->uid;
-	seq = nlh->nlmsg_seq;
-	group = NETLINK_GROUP(skb);
-	msg = NLMSG_DATA(nlh);
-
-	/* DRBD specific change: Only allow packets from ROOT */
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	return cn_call_callback(msg, (void (*)(void *))kfree_skb, skb);
-}
-
-/*
- * Main netlink receiving function.
- *
- * It checks skb and netlink header sizes and calls the skb receive
- * helper with a shared skb.
- */
-static void cn_rx_skb(struct sk_buff *__skb)
-{
-	struct nlmsghdr *nlh;
-	u32 len;
-	int err;
-	struct sk_buff *skb;
-
-	skb = skb_get(__skb);
-
-	if (skb->len >= NLMSG_SPACE(0)) {
-		nlh = (struct nlmsghdr *)skb->data;
-
-		if (nlh->nlmsg_len < sizeof(struct cn_msg) ||
-		    skb->len < nlh->nlmsg_len ||
-		    nlh->nlmsg_len > CONNECTOR_MAX_MSG_SIZE) {
-			kfree_skb(skb);
-			goto out;
-		}
-
-		len = NLMSG_ALIGN(nlh->nlmsg_len);
-		if (len > skb->len)
-			len = skb->len;
-
-		err = __cn_rx_skb(skb, nlh);
-		if (err < 0)
-			kfree_skb(skb);
-	}
-
-out:
-	kfree_skb(__skb);
-}
-
-/*
- * Netlink socket input callback - dequeues the skbs and calls the
- * main netlink receiving function.
- */
-static void cn_input(struct sock *sk, int len)
-{
-	struct sk_buff *skb;
-
-	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL)
-		cn_rx_skb(skb);
-}
-
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	down(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first && 
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first && 
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	up(&notify_lock);
-}
-
-/*
- * Callback add routing - adds callback with given ID and name.
- * If there is registered callback with the same ID it will not be added.
- *
- * May sleep.
- */
-int cn_add_callback(struct cb_id *id, char *name, void (*callback)(void *))
-{
-	int err;
-	struct cn_dev *dev = &cdev;
-
-	err = cn_queue_add_callback(dev->cbdev, name, id, callback);
-	if (err)
-		return err;
-
-	cn_notify(id, 0);
-
-	return 0;
-}
-
-/*
- * Callback remove routing - removes callback
- * with given ID.
- * If there is no registered callback with given
- * ID nothing happens.
- *
- * May sleep while waiting for reference counter to become zero.
- */
-void cn_del_callback(struct cb_id *id)
-{
-	struct cn_dev *dev = &cdev;
-
-	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
-}
-
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(void *data)
-{
-	struct cn_msg *msg = data;
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		down(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		up(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kmalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	memset(ent,0,size);
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	down(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	up(&notify_lock);
-}
-
-int __init cn_init(void)
-{
-	struct cn_dev *dev = &cdev;
-	int err;
-
-	dev->input = cn_input;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
-
-#ifdef DRBD_NL_DST_GROUPS
-	/* history of upstream commits between kernel.org 2.6.13 and 2.6.14-rc1:
-	 * 4fdb3bb723db469717c6d38fda667d8b0fa86ebd 2005-08-10 adds module parameter
-	 * d629b836d151d43332492651dd841d32e57ebe3b 2005-08-15 renames dst_groups to dst_group
-	 * 066286071d3542243baa68166acb779187c848b3 2005-08-15 adds groups parameter
-	 * so it is not exactly correct to trigger on the rename dst_groups to dst_group,
-	 * but sufficiently close.
-	 */
-	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,dev->input);
-#else
-	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
-					 CN_NETLINK_USERS + 0xf,
-					 dev->input, THIS_MODULE);
-#endif
-	if (!dev->nls)
-		return -EIO;
-
-	dev->cbdev = cn_queue_alloc_dev("cqueue", dev->nls);
-	if (!dev->cbdev) {
-		if (dev->nls->sk_socket)
-			sock_release(dev->nls->sk_socket);
-		return -EINVAL;
-	}
-
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_queue_free_dev(dev->cbdev);
-		if (dev->nls->sk_socket)
-			sock_release(dev->nls->sk_socket);
-		return -EINVAL;
-	}
-
-	cn_already_initialized = 1;
-
-	return 0;
-}
-
-void __exit cn_fini(void)
-{
-	struct cn_dev *dev = &cdev;
-
-	cn_already_initialized = 0;
-
-	cn_del_callback(&dev->id);
-	cn_queue_free_dev(dev->cbdev);
-	if (dev->nls->sk_socket)
-		sock_release(dev->nls->sk_socket);
-}
diff -Nru drbd8-8.3.7/drbd/drbd_actlog.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_actlog.c
--- drbd8-8.3.7/drbd/drbd_actlog.c	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_actlog.c	2012-02-02 14:09:14.000000000 +0000
@@ -24,22 +24,73 @@
  */
 
 #include <linux/slab.h>
+#include <linux/crc32c.h>
 #include <linux/drbd.h>
+#include <linux/drbd_limits.h>
+#include <linux/dynamic_debug.h>
 #include "drbd_int.h"
-#include "drbd_tracing.h"
 #include "drbd_wrappers.h"
 
-/* We maintain a trivial check sum in our on disk activity log.
- * With that we can ensure correct operation even when the storage
- * device might do a partial (last) sector write while loosing power.
- */
-struct __packed al_transaction {
-	u32       magic;
-	u32       tr_number;
-	struct __packed {
-		u32 pos;
-		u32 extent; } updates[1 + AL_EXTENTS_PT];
-	u32       xor_sum;
+
+enum al_transaction_types {
+	AL_TR_UPDATE = 0,
+	AL_TR_INITIALIZED = 0xffff
+};
+/* all fields on disc in big endian */
+struct __packed al_transaction_on_disk {
+	/* don't we all like magic */
+	__be32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	__be32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	__be32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined
+	 * see also enum al_transaction_types */
+	__be16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	__be16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	__be16	context_size;
+
+	/* slot number the context starts with */
+	__be16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	__be32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	__be16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	__be32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	__be32	context[AL_CONTEXT_PER_TRANSACTION];
 };
 
 struct update_odbm_work {
@@ -49,25 +100,39 @@
 
 struct update_al_work {
 	struct drbd_work w;
-	struct lc_element *al_ext;
 	struct completion event;
-	unsigned int enr;
-	/* if old_enr != LC_FREE, write corresponding bitmap sector, too */
-	unsigned int old_enr;
+	int err;
 };
 
 
-int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
+static int al_write_transaction(struct drbd_conf *mdev);
 
-/* The actual tracepoint needs to have constant number of known arguments...
- */
-void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...)
+void *drbd_md_get_buffer(struct drbd_conf *mdev)
+{
+	int r;
+
+	wait_event(mdev->misc_wait,
+		   (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
+		   mdev->state.disk <= D_FAILED);
+
+	return r ? NULL : page_address(mdev->md_io_page);
+}
+
+void drbd_md_put_buffer(struct drbd_conf *mdev)
+{
+	if (atomic_dec_and_test(&mdev->md_io_in_use))
+		wake_up(&mdev->misc_wait);
+}
+
+static bool md_io_allowed(struct drbd_conf *mdev)
 {
-	va_list ap;
+	enum drbd_disk_state ds = mdev->state.disk;
+	return ds >= D_NEGOTIATING || ds == D_ATTACHING;
+}
 
-	va_start(ap, fmt);
-	trace__drbd_resync(mdev, level, fmt, ap);
-	va_end(ap);
+void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
+{
+	wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
 }
 
 STATIC int _drbd_md_sync_page_io(struct drbd_conf *mdev,
@@ -76,138 +141,95 @@
 				 int rw, int size)
 {
 	struct bio *bio;
-	struct drbd_md_io md_io;
-	int ok;
-
-	md_io.mdev = mdev;
-	init_completion(&md_io.event);
-	md_io.error = 0;
+	int err;
 
 	if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
-		rw |= (1UL << BIO_RW_BARRIER);
-#ifdef BIO_RW_SYNC
-	rw |= (1<<BIO_RW_SYNC);
-#else
-	/* see upstream commits
-	 * 213d9417fec62ef4c3675621b9364a667954d4dd,
-	 * 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8
-	 * later, the defines even became an enum ;-) */
-	rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
-#endif
+		rw |= DRBD_REQ_FUA | DRBD_REQ_FLUSH;
+	rw |= DRBD_REQ_SYNC;
 
+#ifndef REQ_FLUSH
+	/* < 2.6.36, "barrier" semantic may fail with EOPNOTSUPP */
  retry:
-	bio = bio_alloc(GFP_NOIO, 1);
+#endif
+	mdev->md_io.done = 0;
+	mdev->md_io.error = -ENODEV;
+
+	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
 	bio->bi_sector = sector;
-	ok = (bio_add_page(bio, page, size, 0) == size);
-	if (!ok)
+	err = -EIO;
+	if (bio_add_page(bio, page, size, 0) != size)
 		goto out;
-	bio->bi_private = &md_io;
+	bio->bi_private = &mdev->md_io;
 	bio->bi_end_io = drbd_md_io_complete;
 	bio->bi_rw = rw;
 
-	trace_drbd_bio(mdev, "Md", bio, 0, NULL);
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* Corresponding put_ldev in drbd_md_io_complete() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
+		err = -ENODEV;
+		goto out;
+	}
 
-	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+	bio_get(bio); /* one bio_put() is in the completion handler */
+	atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_endio(bio, -EIO);
 	else
 		submit_bio(rw, bio);
-	wait_for_completion(&md_io.event);
-	ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
+	wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
+	if (bio_flagged(bio, BIO_UPTODATE))
+		err = mdev->md_io.error;
 
+#ifndef REQ_FLUSH
 	/* check for unsupported barrier op.
 	 * would rather check on EOPNOTSUPP, but that is not reliable.
 	 * don't try again for ANY return value != 0 */
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
+	if (err && mdev->md_io.done && (bio->bi_rw & DRBD_REQ_HARDBARRIER)) {
 		/* Try again with no barrier */
 		dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
 		set_bit(MD_NO_BARRIER, &mdev->flags);
-		rw &= ~(1 << BIO_RW_BARRIER);
+		rw &= ~DRBD_REQ_HARDBARRIER;
 		bio_put(bio);
 		goto retry;
 	}
+#endif
  out:
 	bio_put(bio);
-	return ok;
+	return err;
 }
 
 int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
 			 sector_t sector, int rw)
 {
-	int logical_block_size, mask, ok;
-	int offset = 0;
+	int err;
 	struct page *iop = mdev->md_io_page;
 
-	D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
+	D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
 
 	if (!bdev->md_bdev) {
 		if (DRBD_ratelimit(5*HZ, 5)) {
 			dev_err(DEV, "bdev->md_bdev==NULL\n");
 			dump_stack();
 		}
-		return 0;
-	}
-
-	logical_block_size = bdev_logical_block_size(bdev->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	/* in case logical_block_size != 512 [ s390 only? ] */
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		mask = (logical_block_size / MD_SECTOR_SIZE) - 1;
-		D_ASSERT(mask == 1 || mask == 3 || mask == 7);
-		D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE);
-		offset = sector & mask;
-		sector = sector & ~mask;
-		iop = mdev->md_io_tmpp;
-
-		if (rw & WRITE) {
-			/* these are GFP_KERNEL pages, pre-allocated
-			 * on device initialization */
-			void *p = page_address(mdev->md_io_page);
-			void *hp = page_address(mdev->md_io_tmpp);
-
-			ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector,
-					READ, logical_block_size);
-
-			if (unlikely(!ok)) {
-				dev_err(DEV, "drbd_md_sync_page_io(,%llus,"
-				    "READ [logical_block_size!=512]) failed!\n",
-				    (unsigned long long)sector);
-				return 0;
-			}
-
-			memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE);
-		}
+		return -EIO;
 	}
 
-#if DUMP_MD >= 3
-	dev_info(DEV, "%s [%d]:%s(,%llus,%s)\n",
+	dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
 	     current->comm, current->pid, __func__,
 	     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
-#endif
 
 	if (sector < drbd_md_first_sector(bdev) ||
-	    sector > drbd_md_last_sector(bdev))
+	    sector + 7 > drbd_md_last_sector(bdev))
 		dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
-	ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size);
-	if (unlikely(!ok)) {
-		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n",
-		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
-		return 0;
-	}
-
-	if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) {
-		void *p = page_address(mdev->md_io_page);
-		void *hp = page_address(mdev->md_io_tmpp);
-
-		memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE);
+	err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
+	if (err) {
+		dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
+		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
 	}
-
-	return ok;
+	return err;
 }
 
 static
@@ -215,583 +237,263 @@
 {
 	struct lc_element *al_ext;
 	struct lc_element *tmp;
-	unsigned long     al_flags = 0;
+	int wake;
 
 	spin_lock_irq(&mdev->al_lock);
 	tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
 	if (unlikely(tmp != NULL)) {
 		struct bm_extent  *bm_ext = lc_entry(tmp, struct bm_extent, lce);
 		if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
+			wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
 			spin_unlock_irq(&mdev->al_lock);
+			if (wake)
+				wake_up(&mdev->al_wait);
 			return NULL;
 		}
 	}
-	al_ext   = lc_get(mdev->act_log, enr);
-	al_flags = mdev->act_log->flags;
+	al_ext = lc_get(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
-
-	/*
-	if (!al_ext) {
-		if (al_flags & LC_STARVING)
-			dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n");
-		if (al_flags & LC_DIRTY)
-			dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n");
-	}
-	*/
-
 	return al_ext;
 }
 
-void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector)
+void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
 {
-	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
-	struct lc_element *al_ext;
-	struct update_al_work al_work;
+	/* for bios crossing activity log extent boundaries,
+	 * we may need to activate two extents in one go */
+	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+	unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
+	unsigned enr;
+	bool locked = false;
+
 
 	D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
 
-	trace_drbd_actlog(mdev, sector, "al_begin_io");
+	for (enr = first; enr <= last; enr++)
+		wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
 
-	wait_event(mdev->al_wait, (al_ext = _al_get(mdev, enr)));
+	/* Serialize multiple transactions.
+	 * This uses test_and_set_bit, memory barrier is implicit.
+	 */
+	wait_event(mdev->al_wait,
+			mdev->act_log->pending_changes == 0 ||
+			(locked = lc_try_lock_for_transaction(mdev->act_log)));
 
-	if (al_ext->lc_number != enr) {
+	if (locked) {
 		/* drbd_al_write_transaction(mdev,al_ext,enr);
 		 * recurses into generic_make_request(), which
 		 * disallows recursion, bios being serialized on the
 		 * current->bio_tail list now.
 		 * we have to delegate updates to the activity log
 		 * to the worker thread. */
-		init_completion(&al_work.event);
-		al_work.al_ext = al_ext;
-		al_work.enr = enr;
-		al_work.old_enr = al_ext->lc_number;
-		al_work.w.cb = w_al_write_transaction;
-		drbd_queue_work_front(&mdev->data.work, &al_work.w);
-		wait_for_completion(&al_work.event);
-
-		mdev->al_writ_cnt++;
-
-		/*
-		DUMPI(al_ext->lc_number);
-		DUMPI(mdev->act_log->new_number);
-		*/
-		spin_lock_irq(&mdev->al_lock);
-		lc_changed(mdev->act_log, al_ext);
-		spin_unlock_irq(&mdev->al_lock);
+
+		/* Double check: it may have been committed by someone else,
+		 * while we have been waiting for the lock. */
+		if (mdev->act_log->pending_changes) {
+			al_write_transaction(mdev);
+			mdev->al_writ_cnt++;
+
+			spin_lock_irq(&mdev->al_lock);
+			/* FIXME
+			if (err)
+				we need an "lc_cancel" here;
+			*/
+			lc_committed(mdev->act_log);
+			spin_unlock_irq(&mdev->al_lock);
+		}
+		lc_unlock(mdev->act_log);
 		wake_up(&mdev->al_wait);
 	}
 }
 
-void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector)
+void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
 {
-	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
+	/* for bios crossing activity log extent boundaries,
+	 * we may need to activate two extents in one go */
+	unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
+	unsigned last = (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
+	unsigned enr;
 	struct lc_element *extent;
 	unsigned long flags;
 
-	trace_drbd_actlog(mdev, sector, "al_complete_io");
-
 	spin_lock_irqsave(&mdev->al_lock, flags);
 
-	extent = lc_find(mdev->act_log, enr);
-
-	if (!extent) {
-		spin_unlock_irqrestore(&mdev->al_lock, flags);
-		dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
-		return;
+	for (enr = first; enr <= last; enr++) {
+		extent = lc_find(mdev->act_log, enr);
+		if (!extent) {
+			dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
+			continue;
+		}
+		lc_put(mdev->act_log, extent);
 	}
-
-	if (lc_put(mdev->act_log, extent) == 0)
-		wake_up(&mdev->al_wait);
-
 	spin_unlock_irqrestore(&mdev->al_lock, flags);
+	wake_up(&mdev->al_wait);
 }
 
-int
-w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
-{
-	struct update_al_work *aw = container_of(w, struct update_al_work, w);
-	struct lc_element *updated = aw->al_ext;
-	const unsigned int new_enr = aw->enr;
-	const unsigned int evicted = aw->old_enr;
-	struct al_transaction *buffer;
-	sector_t sector;
-	int i, n, mx;
-	unsigned int extent_nr;
-	u32 xor_sum = 0;
-
-	if (!get_ldev(mdev)) {
-		dev_err(DEV, "get_ldev() failed in w_al_write_transaction\n");
-		complete(&((struct update_al_work *)w)->event);
-		return 1;
-	}
-	/* do we have to do a bitmap write, first?
-	 * TODO reduce maximum latency:
-	 * submit both bios, then wait for both,
-	 * instead of doing two synchronous sector writes. */
-	if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE)
-		drbd_bm_write_sect(mdev, evicted/AL_EXT_PER_BM_SECT);
-
-	mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
-	buffer = (struct al_transaction *)page_address(mdev->md_io_page);
-
-	buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
-	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
-
-	n = lc_index_of(mdev->act_log, updated);
-
-	buffer->updates[0].pos = cpu_to_be32(n);
-	buffer->updates[0].extent = cpu_to_be32(new_enr);
-
-	xor_sum ^= new_enr;
-
-	mx = min_t(int, AL_EXTENTS_PT,
-		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
-	for (i = 0; i < mx; i++) {
-		unsigned idx = mdev->al_tr_cycle + i;
-		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
-		buffer->updates[i+1].pos = cpu_to_be32(idx);
-		buffer->updates[i+1].extent = cpu_to_be32(extent_nr);
-		xor_sum ^= extent_nr;
-	}
-	for (; i < AL_EXTENTS_PT; i++) {
-		buffer->updates[i+1].pos = __constant_cpu_to_be32(-1);
-		buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE);
-		xor_sum ^= LC_FREE;
-	}
-	mdev->al_tr_cycle += AL_EXTENTS_PT;
-	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
-		mdev->al_tr_cycle = 0;
-
-	buffer->xor_sum = cpu_to_be32(xor_sum);
-
-	sector =  mdev->ldev->md.md_offset
-		+ mdev->ldev->md.al_offset + mdev->al_tr_pos;
-
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE))
-		drbd_chk_io_error(mdev, 1, TRUE);
-
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
-
-	D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
-	mdev->al_tr_number++;
-
-	mutex_unlock(&mdev->md_io_mutex);
-
-	complete(&((struct update_al_work *)w)->event);
-	put_ldev(mdev);
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
 
-	return 1;
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+	return al_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* al extent number to bit */
+		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
 }
 
-/**
- * drbd_al_read_tr() - Read a single transaction from the on disk activity log
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- * @b:		pointer to an al_transaction.
- * @index:	On disk slot of the transaction to read.
- *
- * Returns -1 on IO error, 0 on checksum error and 1 upon success.
- */
-STATIC int drbd_al_read_tr(struct drbd_conf *mdev,
-			   struct drbd_backing_dev *bdev,
-			   struct al_transaction *b,
-			   int index)
+static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
 {
-	sector_t sector;
-	int rv, i;
-	u32 xor_sum = 0;
-
-	sector = bdev->md.md_offset + bdev->md.al_offset + index;
-
-	/* Dont process error normally,
-	 * as this is done before disk is attached! */
-	if (!drbd_md_sync_page_io(mdev, bdev, sector, READ))
-		return -1;
-
-	rv = (be32_to_cpu(b->magic) == DRBD_MAGIC);
-
-	for (i = 0; i < AL_EXTENTS_PT + 1; i++)
-		xor_sum ^= be32_to_cpu(b->updates[i].extent);
-	rv &= (xor_sum == be32_to_cpu(b->xor_sum));
-
-	return rv;
+	return rs_enr >>
+		/* bit to page */
+		((PAGE_SHIFT + 3) -
+		/* resync extent number to bit */
+		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
 }
 
-/**
- * drbd_al_read_log() - Restores the activity log from its on disk representation.
- * @mdev:	DRBD device.
- * @bdev:	Block device to read form.
- *
- * Returns 1 on success, returns 0 when reading the log failed due to IO errors.
- */
-int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+static int
+_al_write_transaction(struct drbd_conf *mdev)
 {
-	struct al_transaction *buffer;
-	int i;
-	int rv;
-	int mx;
-	int active_extents = 0;
-	int transactions = 0;
-	int found_valid = 0;
-	int from = 0;
-	int to = 0;
-	u32 from_tnr = 0;
-	u32 to_tnr = 0;
-	u32 cnr;
-
-	mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT);
-
-	/* lock out all other meta data io for now,
-	 * and make sure the page is mapped.
-	 */
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = page_address(mdev->md_io_page);
-
-	/* Find the valid transaction in the log */
-	for (i = 0; i <= mx; i++) {
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
-		if (rv == 0)
-			continue;
-		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
-			return 0;
-		}
-		cnr = be32_to_cpu(buffer->tr_number);
+	struct al_transaction_on_disk *buffer;
+	struct lc_element *e;
+	sector_t sector;
+	int i, mx;
+	unsigned extent_nr;
+	unsigned crc = 0;
+	int err = 0;
 
-		if (++found_valid == 1) {
-			from = i;
-			to = i;
-			from_tnr = cnr;
-			to_tnr = cnr;
-			continue;
-		}
-		if ((int)cnr - (int)from_tnr < 0) {
-			D_ASSERT(from_tnr - cnr + i - from == mx+1);
-			from = i;
-			from_tnr = cnr;
-		}
-		if ((int)cnr - (int)to_tnr > 0) {
-			D_ASSERT(cnr - to_tnr == i - to);
-			to = i;
-			to_tnr = cnr;
-		}
+	if (!get_ldev(mdev)) {
+		dev_err(DEV, "disk is %s, cannot start al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		return -EIO;
 	}
 
-	if (!found_valid) {
-		dev_warn(DEV, "No usable activity log found.\n");
-		mutex_unlock(&mdev->md_io_mutex);
-		return 1;
-	}
-
-	/* Read the valid transactions.
-	 * dev_info(DEV, "Reading from %d to %d.\n",from,to); */
-	i = from;
-	while (1) {
-		int j, pos;
-		unsigned int extent_nr;
-		unsigned int trn;
-
-		rv = drbd_al_read_tr(mdev, bdev, buffer, i);
-		ERR_IF(rv == 0) goto cancel;
-		if (rv == -1) {
-			mutex_unlock(&mdev->md_io_mutex);
-			return 0;
-		}
-
-		trn = be32_to_cpu(buffer->tr_number);
-
-		spin_lock_irq(&mdev->al_lock);
-
-		/* This loop runs backwards because in the cyclic
-		   elements there might be an old version of the
-		   updated element (in slot 0). So the element in slot 0
-		   can overwrite old versions. */
-		for (j = AL_EXTENTS_PT; j >= 0; j--) {
-			pos = be32_to_cpu(buffer->updates[j].pos);
-			extent_nr = be32_to_cpu(buffer->updates[j].extent);
+	/* The bitmap write may have failed, causing a state change. */
+	if (mdev->state.disk < D_INCONSISTENT) {
+		dev_err(DEV,
+			"disk is %s, cannot write al transaction\n",
+			drbd_disk_str(mdev->state.disk));
+		put_ldev(mdev);
+		return -EIO;
+	}
 
-			if (extent_nr == LC_FREE)
-				continue;
+	buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
+	if (!buffer) {
+		dev_err(DEV, "disk failed while waiting for md_io buffer\n");
+		put_ldev(mdev);
+		return -ENODEV;
+	}
 
-			lc_set(mdev->act_log, extent_nr, pos);
-			active_extents++;
-		}
-		spin_unlock_irq(&mdev->al_lock);
+	memset(buffer, 0, sizeof(*buffer));
+	buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
 
-		transactions++;
+	i = 0;
 
-cancel:
-		if (i == to)
+	/* Even though no one can start to change this list
+	 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+	 * lc_try_lock_for_transaction() --, someone may still
+	 * be in the process of changing it. */
+	spin_lock_irq(&mdev->al_lock);
+	list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
+		if (i == AL_UPDATES_PER_TRANSACTION) {
+			i++;
 			break;
+		}
+		buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+		buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+		if (e->lc_number != LC_FREE)
+			drbd_bm_mark_for_writeout(mdev,
+					al_extent_to_bm_page(e->lc_number));
 		i++;
-		if (i > mx)
-			i = 0;
 	}
+	spin_unlock_irq(&mdev->al_lock);
+	BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
 
-	mdev->al_tr_number = to_tnr+1;
-	mdev->al_tr_pos = to;
-	if (++mdev->al_tr_pos >
-	    div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT))
-		mdev->al_tr_pos = 0;
-
-	/* ok, we are done with it */
-	mutex_unlock(&mdev->md_io_mutex);
-
-	dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
-	     transactions, active_extents);
-
-	return 1;
-}
-
-struct drbd_atodb_wait {
-	atomic_t           count;
-	struct completion  io_done;
-	struct drbd_conf   *mdev;
-	int                error;
-};
+	buffer->n_updates = cpu_to_be16(i);
+	for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+		buffer->update_slot_nr[i] = cpu_to_be16(-1);
+		buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+	}
 
-STATIC BIO_ENDIO_TYPE atodb_endio BIO_ENDIO_ARGS(struct bio *bio, int error)
-{
-	struct drbd_atodb_wait *wc = bio->bi_private;
-	struct drbd_conf *mdev = wc->mdev;
-	struct page *page;
-	int uptodate = bio_flagged(bio, BIO_UPTODATE);
-
-	BIO_ENDIO_FN_START;
-	/* strange behavior of some lower level drivers...
-	 * fail the request by clearing the uptodate flag,
-	 * but do not return any error?! */
-	if (!error && !uptodate)
-		error = -EIO;
-
-	drbd_chk_io_error(mdev, error, TRUE);
-	if (error && wc->error == 0)
-		wc->error = error;
+	buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
+	buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
 
-	if (atomic_dec_and_test(&wc->count))
-		complete(&wc->io_done);
+	mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
+		   mdev->act_log->nr_elements - mdev->al_tr_cycle);
+	for (i = 0; i < mx; i++) {
+		unsigned idx = mdev->al_tr_cycle + i;
+		extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
+		buffer->context[i] = cpu_to_be32(extent_nr);
+	}
+	for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+		buffer->context[i] = cpu_to_be32(LC_FREE);
 
-	page = bio->bi_io_vec[0].bv_page;
-	put_page(page);
-	bio_put(bio);
-	mdev->bm_writ_cnt++;
-	put_ldev(mdev);
+	mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
+	if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
+		mdev->al_tr_cycle = 0;
 
-	BIO_ENDIO_FN_RETURN;
-}
+	sector =  mdev->ldev->md.md_offset
+		+ mdev->ldev->md.al_offset
+		+ mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
 
-#define S2W(s)	((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
-/* activity log to on disk bitmap -- prepare bio unless that sector
- * is already covered by previously prepared bios */
-STATIC int atodb_prepare_unless_covered(struct drbd_conf *mdev,
-					struct bio **bios,
-					unsigned int enr,
-					struct drbd_atodb_wait *wc) __must_hold(local)
-{
-	struct bio *bio;
-	struct page *page;
-	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
-				      + mdev->ldev->md.bm_offset;
-	unsigned int page_offset = PAGE_SIZE;
-	int offset;
-	int i = 0;
-	int err = -ENOMEM;
-
-	/* Check if that enr is already covered by an already created bio.
-	 * Caution, bios[] is not NULL terminated,
-	 * but only initialized to all NULL.
-	 * For completely scattered activity log,
-	 * the last invocation iterates over all bios,
-	 * and finds the last NULL entry.
-	 */
-	while ((bio = bios[i])) {
-		if (bio->bi_sector == on_disk_sector)
-			return 0;
-		i++;
-	}
-	/* bios[i] == NULL, the next not yet used slot */
+	crc = crc32c(0, buffer, 4096);
+	buffer->crc32c = cpu_to_be32(crc);
 
-	/* GFP_KERNEL, we are not in the write-out path */
-	bio = bio_alloc(GFP_KERNEL, 1);
-	if (bio == NULL)
-		return -ENOMEM;
-
-	if (i > 0) {
-		const struct bio_vec *prev_bv = bios[i-1]->bi_io_vec;
-		page_offset = prev_bv->bv_offset + prev_bv->bv_len;
-		page = prev_bv->bv_page;
-	}
-	if (page_offset == PAGE_SIZE) {
-		page = alloc_page(__GFP_HIGHMEM);
-		if (page == NULL)
-			goto out_bio_put;
-		page_offset = 0;
+	if (drbd_bm_write_hinted(mdev))
+		err = -EIO;
+		/* drbd_chk_io_error done already */
+	else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
+		err = -EIO;
+		drbd_chk_io_error(mdev, 1, true);
 	} else {
-		get_page(page);
+		/* advance ringbuffer position and transaction counter */
+		mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
+		mdev->al_tr_number++;
 	}
 
-	offset = S2W(enr);
-	drbd_bm_get_lel(mdev, offset,
-			min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
-			kmap(page) + page_offset);
-	kunmap(page);
-
-	bio->bi_private = wc;
-	bio->bi_end_io = atodb_endio;
-	bio->bi_bdev = mdev->ldev->md_bdev;
-	bio->bi_sector = on_disk_sector;
-
-	if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
-		goto out_put_page;
-
-	atomic_inc(&wc->count);
-	/* we already know that we may do this...
-	 * get_ldev_if_state(mdev,D_ATTACHING);
-	 * just get the extra reference, so that the local_cnt reflects
-	 * the number of pending IO requests DRBD at its backing device.
-	 */
-	atomic_inc(&mdev->local_cnt);
-
-	bios[i] = bio;
-
-	return 0;
+	drbd_md_put_buffer(mdev);
+	put_ldev(mdev);
 
-out_put_page:
-	err = -EINVAL;
-	put_page(page);
-out_bio_put:
-	bio_put(bio);
 	return err;
 }
 
-/**
- * drbd_al_to_on_disk_bm() -  * Writes bitmap parts covered by active AL extents
- * @mdev:	DRBD device.
- *
- * Called when we detach (unconfigure) local storage,
- * or when we go from R_PRIMARY to R_SECONDARY role.
- */
-void drbd_al_to_on_disk_bm(struct drbd_conf *mdev)
-{
-	int i, nr_elements;
-	unsigned int enr;
-	struct bio **bios;
-	struct drbd_atodb_wait wc;
-
-	ERR_IF (!get_ldev_if_state(mdev, D_ATTACHING))
-		return; /* sorry, I don't have any act_log etc... */
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-
-	nr_elements = mdev->act_log->nr_elements;
-
-	/* GFP_KERNEL, we are not in anyone's write-out path */
-	bios = kzalloc(sizeof(struct bio *) * nr_elements, GFP_KERNEL);
-	if (!bios)
-		goto submit_one_by_one;
-
-	atomic_set(&wc.count, 0);
-	init_completion(&wc.io_done);
-	wc.mdev = mdev;
-	wc.error = 0;
-
-	for (i = 0; i < nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		/* next statement also does atomic_inc wc.count and local_cnt */
-		if (atodb_prepare_unless_covered(mdev, bios,
-						enr/AL_EXT_PER_BM_SECT,
-						&wc))
-			goto free_bios_submit_one_by_one;
-	}
-
-	/* unnecessary optimization? */
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-
-	/* all prepared, submit them */
-	for (i = 0; i < nr_elements; i++) {
-		if (bios[i] == NULL)
-			break;
-		if (FAULT_ACTIVE(mdev, DRBD_FAULT_MD_WR)) {
-			bios[i]->bi_rw = WRITE;
-			bio_endio(bios[i], -EIO);
-		} else {
-			submit_bio(WRITE, bios[i]);
-		}
-	}
-
-	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
-
-	/* always (try to) flush bitmap to stable storage */
-	drbd_md_flush(mdev);
-
-	/* In case we did not submit a single IO do not wait for
-	 * them to complete. ( Because we would wait forever here. )
-	 *
-	 * In case we had IOs and they are already complete, there
-	 * is not point in waiting anyways.
-	 * Therefore this if () ... */
-	if (atomic_read(&wc.count))
-		wait_for_completion(&wc.io_done);
 
-	put_ldev(mdev);
-
-	kfree(bios);
-	return;
-
- free_bios_submit_one_by_one:
-	/* free everything by calling the endio callback directly. */
-	for (i = 0; i < nr_elements && bios[i]; i++)
-		bio_endio(bios[i], 0);
-
-	kfree(bios);
+static int w_al_write_transaction(struct drbd_work *w, int unused)
+{
+	struct update_al_work *aw = container_of(w, struct update_al_work, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
- submit_one_by_one:
-	dev_warn(DEV, "Using the slow drbd_al_to_on_disk_bm()\n");
+	err = _al_write_transaction(mdev);
+	aw->err = err;
+	complete(&aw->event);
 
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		/* Really slow: if we have al-extents 16..19 active,
-		 * sector 4 will be written four times! Synchronous! */
-		drbd_bm_write_sect(mdev, enr/AL_EXT_PER_BM_SECT);
-	}
-
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
-	put_ldev(mdev);
+	return err != -EIO ? err : 0;
 }
 
-/**
- * drbd_al_apply_to_bm() - Sets the bitmap to diry(1) where covered ba active AL extents
- * @mdev:	DRBD device.
- */
-void drbd_al_apply_to_bm(struct drbd_conf *mdev)
+/* Calls from worker context (see w_restart_disk_io()) need to write the
+   transaction directly. Others came through generic_make_request(),
+   those need to delegate it to the worker. */
+static int al_write_transaction(struct drbd_conf *mdev)
 {
-	unsigned int enr;
-	unsigned long add = 0;
-	char ppb[10];
-	int i;
-
-	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+	struct update_al_work al_work;
 
-	for (i = 0; i < mdev->act_log->nr_elements; i++) {
-		enr = lc_element_by_index(mdev->act_log, i)->lc_number;
-		if (enr == LC_FREE)
-			continue;
-		add += drbd_bm_ALe_set_all(mdev, enr);
-	}
+	if (current == mdev->tconn->worker.task)
+		return _al_write_transaction(mdev);
 
-	lc_unlock(mdev->act_log);
-	wake_up(&mdev->al_wait);
+	init_completion(&al_work.event);
+	al_work.w.cb = w_al_write_transaction;
+	al_work.w.mdev = mdev;
+	drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w);
+	wait_for_completion(&al_work.event);
 
-	dev_info(DEV, "Marked additional %s as out-of-sync based on AL.\n",
-	     ppsize(ppb, Bit2KB(add)));
+	return al_work.err;
 }
 
 static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
@@ -821,7 +523,7 @@
 	struct lc_element *al_ext;
 	int i;
 
-	D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags));
+	D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
 
 	for (i = 0; i < mdev->act_log->nr_elements; i++) {
 		al_ext = lc_element_by_index(mdev->act_log, i);
@@ -833,18 +535,20 @@
 	wake_up(&mdev->al_wait);
 }
 
-STATIC int w_update_odbm(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+STATIC int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
+	struct drbd_conf *mdev = w->mdev;
+	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
 
 	if (!get_ldev(mdev)) {
 		if (DRBD_ratelimit(5*HZ, 5))
 			dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
 		kfree(udw);
-		return 1;
+		return 0;
 	}
 
-	drbd_bm_write_sect(mdev, udw->enr);
+	drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
 	put_ldev(mdev);
 
 	kfree(udw);
@@ -859,9 +563,9 @@
 			break;
 		}
 	}
-	drbd_bcast_sync_progress(mdev);
+	drbd_bcast_event(mdev, &sib);
 
-	return 1;
+	return 0;
 }
 
 
@@ -894,16 +598,20 @@
 			else
 				ext->rs_failed += count;
 			if (ext->rs_left < ext->rs_failed) {
-				dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
-				    "rs_failed=%d count=%d\n",
+				dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
+				    "rs_failed=%d count=%d cstate=%s\n",
 				     (unsigned long long)sector,
 				     ext->lce.lc_number, ext->rs_left,
-				     ext->rs_failed, count);
-				dump_stack();
+				     ext->rs_failed, count,
+				     drbd_conn_str(mdev->state.conn));
 
-				lc_put(mdev->resync, &ext->lce);
-				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-				return;
+				/* We don't expect to be able to clear more bits
+				 * than have been set when we originally counted
+				 * the set bits to cache that value in ext->rs_left.
+				 * Whatever the reason (disconnect during resync,
+				 * delayed local completion of an application write),
+				 * try to fix it up by recounting here. */
+				ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			}
 		} else {
 			/* Normally this element should be in the cache,
@@ -924,11 +632,12 @@
 				dev_warn(DEV, "Kicking resync_lru element enr=%u "
 				     "out with rs_failed=%d\n",
 				     ext->lce.lc_number, ext->rs_failed);
-				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 			}
 			ext->rs_left = rs_left;
 			ext->rs_failed = success ? 0 : count;
-			lc_changed(mdev->resync, &ext->lce);
+			/* we don't keep a persistent log of the resync lru,
+			 * we can commit any change right away. */
+			lc_committed(mdev->resync);
 		}
 		lc_put(mdev->resync, &ext->lce);
 		/* no race, we are within the al_lock! */
@@ -940,10 +649,10 @@
 			if (udw) {
 				udw->enr = ext->lce.lc_number;
 				udw->w.cb = w_update_odbm;
-				drbd_queue_work_front(&mdev->data.work, &udw->w);
+				udw->w.mdev = mdev;
+				drbd_queue_work_front(&mdev->tconn->data.work, &udw->w);
 			} else {
 				dev_warn(DEV, "Could not kmalloc an udw\n");
-				set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 			}
 		}
 	} else {
@@ -954,6 +663,22 @@
 	}
 }
 
+void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
+{
+	unsigned long now = jiffies;
+	unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
+	int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+	if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
+		if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
+		    mdev->state.conn != C_PAUSED_SYNC_T &&
+		    mdev->state.conn != C_PAUSED_SYNC_S) {
+			mdev->rs_mark_time[next] = now;
+			mdev->rs_mark_left[next] = still_to_go;
+			mdev->rs_last_mark = next;
+		}
+	}
+}
+
 /* clear the bit corresponding to the piece of storage in question:
  * size byte of data starting from sector.  Only clear a bits of the affected
  * one ore more _aligned_ BM_BLOCK_SIZE blocks.
@@ -971,7 +696,7 @@
 	int wake_up = 0;
 	unsigned long flags;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
@@ -979,8 +704,10 @@
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors) return;
-	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+	if (!expect(sector < nr_sectors))
+		return;
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
 
@@ -995,10 +722,6 @@
 		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
 	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
 
-	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
-			  "drbd_set_in_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
-			  (unsigned long long)sector, size, sbnr, ebnr);
-
 	if (sbnr > ebnr)
 		return;
 
@@ -1006,29 +729,18 @@
 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
 	 * we count rs_{total,left} in bits, not sectors.
 	 */
-	spin_lock_irqsave(&mdev->al_lock, flags);
 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
-	if (count) {
-		/* we need the lock for drbd_try_clear_on_disk_bm */
-		if (jiffies - mdev->rs_mark_time > HZ*10) {
-			/* should be rolling marks,
-			 * but we estimate only anyways. */
-			if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
-			    mdev->state.conn != C_PAUSED_SYNC_T &&
-			    mdev->state.conn != C_PAUSED_SYNC_S) {
-				mdev->rs_mark_time = jiffies;
-				mdev->rs_mark_left = drbd_bm_total_weight(mdev);
-			}
-		}
-		if (get_ldev(mdev)) {
-			drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
-			put_ldev(mdev);
-		}
+	if (count && get_ldev(mdev)) {
+		drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
+		spin_lock_irqsave(&mdev->al_lock, flags);
+		drbd_try_clear_on_disk_bm(mdev, sector, count, true);
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+
 		/* just wake_up unconditional now, various lc_chaged(),
 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
 		wake_up = 1;
+		put_ldev(mdev);
 	}
-	spin_unlock_irqrestore(&mdev->al_lock, flags);
 	if (wake_up)
 		wake_up(&mdev->al_wait);
 }
@@ -1036,47 +748,41 @@
 /*
  * this is intended to set one request worth of data out of sync.
  * affects at least 1 bit,
- * and at most 1+DRBD_MAX_SEGMENT_SIZE/BM_BLOCK_SIZE bits.
+ * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
  *
  * called by tl_clear and drbd_send_dblock (==drbd_make_request).
  * so this can be _any_ process.
  */
-void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
+int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
 			    const char *file, const unsigned int line)
 {
-	unsigned long sbnr, ebnr, lbnr, flags;
+	unsigned long sbnr, ebnr, flags;
 	sector_t esector, nr_sectors;
-	unsigned int enr, count;
+	unsigned int enr, count = 0;
 	struct lc_element *e;
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "sector: %llus, size: %d\n",
 			(unsigned long long)sector, size);
-		return;
+		return 0;
 	}
 
 	if (!get_ldev(mdev))
-		return; /* no disk, no metadata, no bitmap to set bits in */
+		return 0; /* no disk, no metadata, no bitmap to set bits in */
 
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors)
+	if (!expect(sector < nr_sectors))
 		goto out;
-	ERR_IF(esector >= nr_sectors)
-		esector = (nr_sectors-1);
-
-	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	/* we set it out of sync,
 	 * we do not need to round anything here */
 	sbnr = BM_SECT_TO_BIT(sector);
 	ebnr = BM_SECT_TO_BIT(esector);
 
-	trace_drbd_resync(mdev, TRACE_LVL_METRICS,
-			  "drbd_set_out_of_sync: sector=%llus size=%u sbnr=%lu ebnr=%lu\n",
-			  (unsigned long long)sector, size, sbnr, ebnr);
-
 	/* ok, (capacity & 7) != 0 sometimes, but who cares...
 	 * we count rs_{total,left} in bits, not sectors.  */
 	spin_lock_irqsave(&mdev->al_lock, flags);
@@ -1090,6 +796,8 @@
 
 out:
 	put_ldev(mdev);
+
+	return count;
 }
 
 static
@@ -1111,7 +819,7 @@
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wakeup = 1;
 		}
 		if (bm_ext->lce.refcnt == 1)
@@ -1127,7 +835,7 @@
 		if (rs_flags & LC_STARVING)
 			dev_warn(DEV, "Have to wait for element"
 			     " (resync LRU too small?)\n");
-		BUG_ON(rs_flags & LC_DIRTY);
+		BUG_ON(rs_flags & LC_LOCKED);
 	}
 
 	return bm_ext;
@@ -1135,26 +843,12 @@
 
 static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
 {
-	struct lc_element *al_ext;
-	int rv = 0;
+	int rv;
 
 	spin_lock_irq(&mdev->al_lock);
-	if (unlikely(enr == mdev->act_log->new_number))
-		rv = 1;
-	else {
-		al_ext = lc_find(mdev->act_log, enr);
-		if (al_ext) {
-			if (al_ext->refcnt)
-				rv = 1;
-		}
-	}
+	rv = lc_is_used(mdev->act_log, enr);
 	spin_unlock_irq(&mdev->al_lock);
 
-	/*
-	if (unlikely(rv)) {
-		dev_info(DEV, "Delaying sync read until app's write is done\n");
-	}
-	*/
 	return rv;
 }
 
@@ -1163,44 +857,50 @@
  * @mdev:	DRBD device.
  * @sector:	The sector number.
  *
- * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
+ * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
  */
 int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 {
 	unsigned int enr = BM_SECT_TO_EXT(sector);
 	struct bm_extent *bm_ext;
 	int i, sig;
+	int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
+			 200 times -> 20 seconds. */
 
-	trace_drbd_resync(mdev, TRACE_LVL_ALL,
-			  "drbd_rs_begin_io: sector=%llus (rs_end=%d)\n",
-			  (unsigned long long)sector, enr);
-
+retry:
 	sig = wait_event_interruptible(mdev->al_wait,
 			(bm_ext = _bme_get(mdev, enr)));
 	if (sig)
-		return 0;
+		return -EINTR;
 
 	if (test_bit(BME_LOCKED, &bm_ext->flags))
-		return 1;
+		return 0;
 
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 		sig = wait_event_interruptible(mdev->al_wait,
-				!_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i));
-		if (sig) {
+					       !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
+					       test_bit(BME_PRIORITY, &bm_ext->flags));
+
+		if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
 			spin_lock_irq(&mdev->al_lock);
 			if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
-				clear_bit(BME_NO_WRITES, &bm_ext->flags);
+				bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
 				mdev->resync_locked--;
 				wake_up(&mdev->al_wait);
 			}
 			spin_unlock_irq(&mdev->al_lock);
-			return 0;
+			if (sig)
+				return -EINTR;
+			if (schedule_timeout_interruptible(HZ/10))
+				return -EINTR;
+			if (sa && --sa == 0)
+				dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
+					 "Resync stalled?\n");
+			goto retry;
 		}
 	}
-
 	set_bit(BME_LOCKED, &bm_ext->flags);
-
-	return 1;
+	return 0;
 }
 
 /**
@@ -1220,9 +920,6 @@
 	struct bm_extent *bm_ext;
 	int i;
 
-	trace_drbd_resync(mdev, TRACE_LVL_ALL, "drbd_try_rs_begin_io: sector=%llus\n",
-			  (unsigned long long)sector);
-
 	spin_lock_irq(&mdev->al_lock);
 	if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
 		/* in case you have very heavy scattered io, it may
@@ -1239,10 +936,6 @@
 		 * we also have to wake_up
 		 */
 
-		trace_drbd_resync(mdev, TRACE_LVL_ALL,
-				  "dropping %u, apparently got 'synced' by application io\n",
-				  mdev->resync_wenr);
-
 		e = lc_find(mdev->resync, mdev->resync_wenr);
 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
 		if (bm_ext) {
@@ -1270,21 +963,14 @@
 			 * but then could not set BME_LOCKED,
 			 * so we tried again.
 			 * drop the extra reference. */
-			trace_drbd_resync(mdev, TRACE_LVL_ALL,
-					  "dropping extra reference on %u\n", enr);
-
 			bm_ext->lce.refcnt--;
 			D_ASSERT(bm_ext->lce.refcnt > 0);
 		}
 		goto check_al;
 	} else {
 		/* do we rather want to try later? */
-		if (mdev->resync_locked > mdev->resync->nr_elements-3) {
-			trace_drbd_resync(mdev, TRACE_LVL_ALL,
-					  "resync_locked = %u!\n", mdev->resync_locked);
-
+		if (mdev->resync_locked > mdev->resync->nr_elements-3)
 			goto try_again;
-		}
 		/* Do or do not. There is no try. -- Yoda */
 		e = lc_get(mdev->resync, enr);
 		bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
@@ -1293,13 +979,13 @@
 			if (rs_flags & LC_STARVING)
 				dev_warn(DEV, "Have to wait for element"
 				     " (resync LRU too small?)\n");
-			BUG_ON(rs_flags & LC_DIRTY);
+			BUG_ON(rs_flags & LC_LOCKED);
 			goto try_again;
 		}
 		if (bm_ext->lce.lc_number != enr) {
 			bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
 			bm_ext->rs_failed = 0;
-			lc_changed(mdev->resync, &bm_ext->lce);
+			lc_committed(mdev->resync);
 			wake_up(&mdev->al_wait);
 			D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
 		}
@@ -1309,11 +995,7 @@
 		goto check_al;
 	}
 check_al:
-	trace_drbd_resync(mdev, TRACE_LVL_ALL, "checking al for %u\n", enr);
-
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
-		if (unlikely(al_enr+i == mdev->act_log->new_number))
-			goto try_again;
 		if (lc_is_used(mdev->act_log, al_enr+i))
 			goto try_again;
 	}
@@ -1324,7 +1006,6 @@
 	return 0;
 
 try_again:
-	trace_drbd_resync(mdev, TRACE_LVL_ALL, "need to try again for %u\n", enr);
 	if (bm_ext)
 		mdev->resync_wenr = enr;
 	spin_unlock_irq(&mdev->al_lock);
@@ -1338,10 +1019,6 @@
 	struct bm_extent *bm_ext;
 	unsigned long flags;
 
-	trace_drbd_resync(mdev, TRACE_LVL_ALL,
-			  "drbd_rs_complete_io: sector=%llus (rs_enr=%d)\n",
-			  (long long)sector, enr);
-
 	spin_lock_irqsave(&mdev->al_lock, flags);
 	e = lc_find(mdev->resync, enr);
 	bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
@@ -1361,8 +1038,7 @@
 	}
 
 	if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
-		clear_bit(BME_LOCKED, &bm_ext->flags);
-		clear_bit(BME_NO_WRITES, &bm_ext->flags);
+		bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
 		mdev->resync_locked--;
 		wake_up(&mdev->al_wait);
 	}
@@ -1376,8 +1052,6 @@
  */
 void drbd_rs_cancel_all(struct drbd_conf *mdev)
 {
-	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_cancel_all\n");
-
 	spin_lock_irq(&mdev->al_lock);
 
 	if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
@@ -1403,15 +1077,13 @@
 	struct bm_extent *bm_ext;
 	int i;
 
-	trace_drbd_resync(mdev, TRACE_LVL_METRICS, "drbd_rs_del_all\n");
-
 	spin_lock_irq(&mdev->al_lock);
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
 		/* ok, ->resync is there. */
 		for (i = 0; i < mdev->resync->nr_elements; i++) {
 			e = lc_element_by_index(mdev->resync, i);
-			bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+			bm_ext = lc_entry(e, struct bm_extent, lce);
 			if (bm_ext->lce.lc_number == LC_FREE)
 				continue;
 			if (bm_ext->lce.lc_number == mdev->resync_wenr) {
@@ -1457,11 +1129,7 @@
 	sector_t esector, nr_sectors;
 	int wake_up = 0;
 
-	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY,
-			  "drbd_rs_failed_io: sector=%llus, size=%u\n",
-			  (unsigned long long)sector, size);
-
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
 				(unsigned long long)sector, size);
 		return;
@@ -1469,8 +1137,10 @@
 	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	esector = sector + (size >> 9) - 1;
 
-	ERR_IF(sector >= nr_sectors) return;
-	ERR_IF(esector >= nr_sectors) esector = (nr_sectors-1);
+	if (!expect(sector < nr_sectors))
+		return;
+	if (!expect(esector < nr_sectors))
+		esector = nr_sectors - 1;
 
 	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
 
@@ -1498,7 +1168,7 @@
 		mdev->rs_failed += count;
 
 		if (get_ldev(mdev)) {
-			drbd_try_clear_on_disk_bm(mdev, sector, count, FALSE);
+			drbd_try_clear_on_disk_bm(mdev, sector, count, false);
 			put_ldev(mdev);
 		}
 
diff -Nru drbd8-8.3.7/drbd/drbd_bitmap.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_bitmap.c
--- drbd8-8.3.7/drbd/drbd_bitmap.c	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_bitmap.c	2012-02-02 14:09:14.000000000 +0000
@@ -26,19 +26,64 @@
 #include <linux/vmalloc.h>
 #include <linux/string.h>
 #include <linux/drbd.h>
+#include <linux/slab.h>
+#include <linux/dynamic_debug.h>
 #include <asm/kmap_types.h>
+
+#include <asm-generic/bitops/le.h>
+
 #include "drbd_int.h"
 
+/* See the ifdefs and comments inside that header file.
+ * On recent kernels this is not needed. */
+#include "compat/bitops.h"
+
 /* OPAQUE outside this file!
  * interface defined in drbd_int.h
 
  * convention:
  * function name drbd_bm_... => used elsewhere, "public".
  * function name      bm_... => internal to implementation, "private".
+ */
 
- * Note that since find_first_bit returns int, at the current granularity of
- * the bitmap (4KB per byte), this implementation "only" supports up to
- * 1<<(32+12) == 16 TB...
+
+/*
+ * LIMITATIONS:
+ * We want to support >= peta byte of backend storage, while for now still using
+ * a granularity of one bit per 4KiB of storage.
+ * 1 << 50		bytes backend storage (1 PiB)
+ * 1 << (50 - 12)	bits needed
+ *	38 --> we need u64 to index and count bits
+ * 1 << (38 - 3)	bitmap bytes needed
+ *	35 --> we still need u64 to index and count bytes
+ *			(that's 32 GiB of bitmap for 1 PiB storage)
+ * 1 << (35 - 2)	32bit longs needed
+ *	33 --> we'd even need u64 to index and count 32bit long words.
+ * 1 << (35 - 3)	64bit longs needed
+ *	32 --> we could get away with a 32bit unsigned int to index and count
+ *	64bit long words, but I rather stay with unsigned long for now.
+ *	We probably should neither count nor point to bytes or long words
+ *	directly, but either by bitnumber, or by page index and offset.
+ * 1 << (35 - 12)
+ *	22 --> we need that much 4KiB pages of bitmap.
+ *	1 << (22 + 3) --> on a 64bit arch,
+ *	we need 32 MiB to store the array of page pointers.
+ *
+ * Because I'm lazy, and because the resulting patch was too large, too ugly
+ * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
+ * (1 << 32) bits * 4k storage.
+ *
+
+ * bitmap storage and IO:
+ *	Bitmap is stored little endian on disk, and is kept little endian in
+ *	core memory. Currently we still hold the full bitmap in core as long
+ *	as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
+ *	seems excessive.
+ *
+ *	We plan to reduce the amount of in-core bitmap pages by paging them in
+ *	and out against their on-disk location as necessary, but need to make
+ *	sure we don't cause too much meta data IO, and must not deadlock in
+ *	tight memory situations. This needs some more work.
  */
 
 /*
@@ -54,40 +99,25 @@
 struct drbd_bitmap {
 	struct page **bm_pages;
 	spinlock_t bm_lock;
-	/* WARNING unsigned long bm_*:
-	 * 32bit number of bit offset is just enough for 512 MB bitmap.
-	 * it will blow up if we make the bitmap bigger...
-	 * not that it makes much sense to have a bitmap that large,
-	 * rather change the granularity to 16k or 64k or something.
-	 * (that implies other problems, however...)
-	 */
+
+	/* see LIMITATIONS: above */
+
 	unsigned long bm_set;       /* nr of set bits; THINK maybe atomic_t? */
 	unsigned long bm_bits;
 	size_t   bm_words;
 	size_t   bm_number_of_pages;
 	sector_t bm_dev_capacity;
-	struct semaphore bm_change; /* serializes resize operations */
+	struct mutex bm_change; /* serializes resize operations */
 
-	atomic_t bm_async_io;
-	wait_queue_head_t bm_io_wait;
+	wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
 
-	unsigned long  bm_flags;
+	enum bm_flag bm_flags;
 
 	/* debugging aid, in case we are still racy somewhere */
 	char          *bm_why;
 	struct task_struct *bm_task;
 };
 
-/* definition of bits in bm_flags */
-#define BM_LOCKED       0
-#define BM_MD_IO_ERROR  1
-#define BM_P_VMALLOCED  2
-
-static int bm_is_locked(struct drbd_bitmap *b)
-{
-	return test_bit(BM_LOCKED, &b->bm_flags);
-}
-
 #define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
 static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
 {
@@ -95,16 +125,12 @@
 	if (!DRBD_ratelimit(5*HZ, 5))
 		return;
 	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
-	    current == mdev->receiver.task ? "receiver" :
-	    current == mdev->asender.task  ? "asender"  :
-	    current == mdev->worker.task   ? "worker"   : current->comm,
-	    func, b->bm_why ?: "?",
-	    b->bm_task == mdev->receiver.task ? "receiver" :
-	    b->bm_task == mdev->asender.task  ? "asender"  :
-	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
+		drbd_task_to_thread_name(mdev->tconn, current),
+		func, b->bm_why ?: "?",
+		drbd_task_to_thread_name(mdev->tconn, b->bm_task));
 }
 
-void drbd_bm_lock(struct drbd_conf *mdev, char *why)
+void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	int trylock_failed;
@@ -114,21 +140,18 @@
 		return;
 	}
 
-	trylock_failed = down_trylock(&b->bm_change);
+	trylock_failed = !mutex_trylock(&b->bm_change);
 
 	if (trylock_failed) {
 		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
-		    current == mdev->receiver.task ? "receiver" :
-		    current == mdev->asender.task  ? "asender"  :
-		    current == mdev->worker.task   ? "worker"   : current->comm,
-		    why, b->bm_why ?: "?",
-		    b->bm_task == mdev->receiver.task ? "receiver" :
-		    b->bm_task == mdev->asender.task  ? "asender"  :
-		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
-		down(&b->bm_change);
+			 drbd_task_to_thread_name(mdev->tconn, current),
+			 why, b->bm_why ?: "?",
+			 drbd_task_to_thread_name(mdev->tconn, b->bm_task));
+		mutex_lock(&b->bm_change);
 	}
-	if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
+	if (BM_LOCKED_MASK & b->bm_flags)
 		dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
+	b->bm_flags |= flags & BM_LOCKED_MASK;
 
 	b->bm_why  = why;
 	b->bm_task = current;
@@ -142,48 +165,160 @@
 		return;
 	}
 
-	if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags))
+	if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
 		dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
 
+	b->bm_flags &= ~BM_LOCKED_MASK;
 	b->bm_why  = NULL;
 	b->bm_task = NULL;
-	up(&b->bm_change);
+	mutex_unlock(&b->bm_change);
+}
+
+/* we store some "meta" info about our pages in page->private */
+/* at a granularity of 4k storage per bitmap bit:
+ * one peta byte storage: 1<<50 byte, 1<<38 * 4k storage blocks
+ *  1<<38 bits,
+ *  1<<23 4k bitmap pages.
+ * Use 24 bits as page index, covers 2 peta byte storage
+ * at a granularity of 4k per bit.
+ * Used to report the failed page idx on io error from the endio handlers.
+ */
+#define BM_PAGE_IDX_MASK	((1UL<<24)-1)
+/* this page is currently read in, or written back */
+#define BM_PAGE_IO_LOCK		31
+/* if there has been an IO error for this page */
+#define BM_PAGE_IO_ERROR	30
+/* this is to be able to intelligently skip disk IO,
+ * set if bits have been set since last IO. */
+#define BM_PAGE_NEED_WRITEOUT	29
+/* to mark for lazy writeout once syncer cleared all clearable bits,
+ * we if bits have been cleared since last IO. */
+#define BM_PAGE_LAZY_WRITEOUT	28
+/* pages marked with this "HINT" will be considered for writeout
+ * on activity log transactions */
+#define BM_PAGE_HINT_WRITEOUT	27
+
+/* store_page_idx uses non-atomic assignment. It is only used directly after
+ * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
+ * use atomic bit manipulation, as set_out_of_sync (and therefore bitmap
+ * changes) may happen from various contexts, and wait_on_bit/wake_up_bit
+ * requires it all to be atomic as well. */
+static void bm_store_page_idx(struct page *page, unsigned long idx)
+{
+	BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
+	page_private(page) |= idx;
 }
 
-#define bm_end_info(ignored...)	((void)(0))
+static unsigned long bm_page_to_idx(struct page *page)
+{
+	return page_private(page) & BM_PAGE_IDX_MASK;
+}
 
-#if 0
-#define catch_oob_access_start() do {	\
-	do {				\
-		if ((bm-p_addr) >= PAGE_SIZE/sizeof(long)) { \
-			printk(KERN_ALERT "drbd_bitmap.c:%u %s: p_addr:%p bm:%p %d\n", \
-					__LINE__ , __func__ , p_addr, bm, (bm-p_addr)); \
-			break;		\
-		}
-#define catch_oob_access_end()	\
-	} while (0); } while (0)
-#else
-#define catch_oob_access_start() do {
-#define catch_oob_access_end() } while (0)
-#endif
+/* As is very unlikely that the same page is under IO from more than one
+ * context, we can get away with a bit per page and one wait queue per bitmap.
+ */
+static void bm_page_lock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	wait_event(b->bm_io_wait, !test_and_set_bit(BM_PAGE_IO_LOCK, addr));
+}
 
-/* word offset to long pointer */
-STATIC unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km)
+static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
+{
+	struct drbd_bitmap *b = mdev->bitmap;
+	void *addr = &page_private(b->bm_pages[page_nr]);
+	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
+	wake_up(&mdev->bitmap->bm_io_wait);
+}
+
+/* set _before_ submit_io, so it may be reset due to being changed
+ * while this page is in flight... will get submitted later again */
+static void bm_set_page_unchanged(struct page *page)
+{
+	/* use cmpxchg? */
+	clear_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+	clear_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static void bm_set_page_need_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
+}
+
+/**
+ * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
+ * @mdev:	DRBD device.
+ * @page_nr:	the bitmap page to mark with the "hint" flag
+ *
+ * From within an activity log transaction, we mark a few pages with these
+ * hints, then call drbd_bm_write_hinted(), which will only write out changed
+ * pages which are flagged with this mark.
+ */
+void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
 {
 	struct page *page;
-	unsigned long page_nr;
+	if (page_nr >= mdev->bitmap->bm_number_of_pages) {
+		dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
+			 page_nr, (int)mdev->bitmap->bm_number_of_pages);
+		return;
+	}
+	page = mdev->bitmap->bm_pages[page_nr];
+	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_unchanged(struct page *page)
+{
+	volatile const unsigned long *addr = &page_private(page);
+	return (*addr & ((1UL<<BM_PAGE_NEED_WRITEOUT)|(1UL<<BM_PAGE_LAZY_WRITEOUT))) == 0;
+}
 
+static void bm_set_page_io_err(struct page *page)
+{
+	set_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_clear_page_io_err(struct page *page)
+{
+	clear_bit(BM_PAGE_IO_ERROR, &page_private(page));
+}
+
+static void bm_set_page_lazy_writeout(struct page *page)
+{
+	set_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+static int bm_test_page_lazy_writeout(struct page *page)
+{
+	return test_bit(BM_PAGE_LAZY_WRITEOUT, &page_private(page));
+}
+
+/* on a 32bit box, this would allow for exactly (2<<38) bits. */
+static unsigned int bm_word_to_page_idx(struct drbd_bitmap *b, unsigned long long_nr)
+{
 	/* page_nr = (word*sizeof(long)) >> PAGE_SHIFT; */
-	page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+	unsigned int page_nr = long_nr >> (PAGE_SHIFT - LN2_BPL + 3);
 	BUG_ON(page_nr >= b->bm_number_of_pages);
-	page = b->bm_pages[page_nr];
+	return page_nr;
+}
 
+static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr)
+{
+	/* page_nr = (bitnr/8) >> PAGE_SHIFT; */
+	unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3);
+	BUG_ON(page_nr >= b->bm_number_of_pages);
+	return page_nr;
+}
+
+static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km)
+{
+	struct page *page = b->bm_pages[idx];
 	return (unsigned long *) kmap_atomic(page, km);
 }
 
-static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset)
+static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx)
 {
-	return __bm_map_paddr(b, offset, KM_IRQ1);
+	return __bm_map_pidx(b, idx, KM_IRQ1);
 }
 
 static void __bm_unmap(unsigned long *p_addr, const enum km_type km)
@@ -201,7 +336,7 @@
 /* word offset from start of bitmap to word number _in_page_
  * modulo longs per page
 #define MLPP(X) ((X) % (PAGE_SIZE/sizeof(long))
- hm, well, Philipp thinks gcc might not optimze the % into & (... - 1)
+ hm, well, Philipp thinks gcc might not optimize the % into & (... - 1)
  so do it explicitly:
  */
 #define MLPP(X) ((X) & ((PAGE_SIZE/sizeof(long))-1))
@@ -215,6 +350,7 @@
  * to be able to report device specific.
  */
 
+
 STATIC void bm_free_pages(struct page **pages, unsigned long number)
 {
 	unsigned long i;
@@ -259,8 +395,8 @@
 
 	/* Trying kmalloc first, falling back to vmalloc.
 	 * GFP_KERNEL is ok, as this is done when a lower level disk is
-	 * "attached" to the drbd.  Context is receiver thread or cqueue
-	 * thread.  As we have no disk yet, we are not in the IO path,
+	 * "attached" to the drbd.  Context is receiver thread or drbdsetup /
+	 * netlink process.  As we have no disk yet, we are not in the IO path,
 	 * not even the IO path of the peer. */
 	bytes = sizeof(struct page *)*want;
 	new_pages = kmalloc(bytes, GFP_KERNEL);
@@ -282,6 +418,9 @@
 				bm_vk_free(new_pages, vmalloced);
 				return NULL;
 			}
+			/* we want to know which page it is
+			 * from the endio handlers */
+			bm_store_page_idx(page, i);
 			new_pages[i] = page;
 		}
 	} else {
@@ -293,9 +432,9 @@
 	}
 
 	if (vmalloced)
-		set_bit(BM_P_VMALLOCED, &b->bm_flags);
+		b->bm_flags |= BM_P_VMALLOCED;
 	else
-		clear_bit(BM_P_VMALLOCED, &b->bm_flags);
+		b->bm_flags &= ~BM_P_VMALLOCED;
 
 	return new_pages;
 }
@@ -312,7 +451,7 @@
 	if (!b)
 		return -ENOMEM;
 	spin_lock_init(&b->bm_lock);
-	init_MUTEX(&b->bm_change);
+	mutex_init(&b->bm_change);
 	init_waitqueue_head(&b->bm_io_wait);
 
 	mdev->bitmap = b;
@@ -322,7 +461,8 @@
 
 sector_t drbd_bm_capacity(struct drbd_conf *mdev)
 {
-	ERR_IF(!mdev->bitmap) return 0;
+	if (!expect(mdev->bitmap))
+		return 0;
 	return mdev->bitmap->bm_dev_capacity;
 }
 
@@ -330,9 +470,10 @@
  */
 void drbd_bm_cleanup(struct drbd_conf *mdev)
 {
-	ERR_IF (!mdev->bitmap) return;
+	if (!expect(mdev->bitmap))
+		return;
 	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
-	bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags));
+	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
 	kfree(mdev->bitmap);
 	mdev->bitmap = NULL;
 }
@@ -342,28 +483,41 @@
  * this masks out the remaining bits.
  * Returns the number of bits cleared.
  */
+#define BITS_PER_PAGE		(1UL << (PAGE_SHIFT + 3))
+#define BITS_PER_PAGE_MASK	(BITS_PER_PAGE - 1)
+#define BITS_PER_LONG_MASK	(BITS_PER_LONG - 1)
 STATIC int bm_clear_surplus(struct drbd_bitmap *b)
 {
-	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
-	size_t w = b->bm_bits >> LN2_BPL;
-	int cleared = 0;
+	unsigned long mask;
 	unsigned long *p_addr, *bm;
+	int tmp;
+	int cleared = 0;
 
-	p_addr = bm_map_paddr(b, w);
-	bm = p_addr + MLPP(w);
-	if (w < b->bm_words) {
-		catch_oob_access_start();
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
 		cleared = hweight_long(*bm & ~mask);
 		*bm &= mask;
-		catch_oob_access_end();
-		w++; bm++;
+		bm++;
 	}
 
-	if (w < b->bm_words) {
-		catch_oob_access_start();
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
 		cleared += hweight_long(*bm);
 		*bm = 0;
-		catch_oob_access_end();
 	}
 	bm_unmap(p_addr);
 	return cleared;
@@ -371,72 +525,75 @@
 
 STATIC void bm_set_surplus(struct drbd_bitmap *b)
 {
-	const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1;
-	size_t w = b->bm_bits >> LN2_BPL;
+	unsigned long mask;
 	unsigned long *p_addr, *bm;
+	int tmp;
 
-	p_addr = bm_map_paddr(b, w);
-	bm = p_addr + MLPP(w);
-	if (w < b->bm_words) {
-		catch_oob_access_start();
+	/* number of bits modulo bits per page */
+	tmp = (b->bm_bits & BITS_PER_PAGE_MASK);
+	/* mask the used bits of the word containing the last bit */
+	mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1;
+	/* bitmap is always stored little endian,
+	 * on disk and in core memory alike */
+	mask = cpu_to_lel(mask);
+
+	p_addr = bm_map_pidx(b, b->bm_number_of_pages - 1);
+	bm = p_addr + (tmp/BITS_PER_LONG);
+	if (mask) {
+		/* If mask != 0, we are not exactly aligned, so bm now points
+		 * to the long containing the last bit.
+		 * If mask == 0, bm already points to the word immediately
+		 * after the last (long word aligned) bit. */
 		*bm |= ~mask;
-		bm++; w++;
-		catch_oob_access_end();
+		bm++;
 	}
 
-	if (w < b->bm_words) {
-		catch_oob_access_start();
-		*bm = ~(0UL);
-		catch_oob_access_end();
+	if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) {
+		/* on a 32bit arch, we may need to zero out
+		 * a padding long to align with a 64bit remote */
+		*bm = ~0UL;
 	}
 	bm_unmap(p_addr);
 }
 
-STATIC unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian)
+/* you better not modify the bitmap while this is running,
+ * or its results will be stale */
+STATIC unsigned long bm_count_bits(struct drbd_bitmap *b)
 {
-	unsigned long *p_addr, *bm, offset = 0;
+	unsigned long *p_addr;
 	unsigned long bits = 0;
-	unsigned long i, do_now;
+	unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
+	int idx, i, last_word;
 
-	while (offset < b->bm_words) {
-		i = do_now = min_t(size_t, b->bm_words-offset, LWPP);
-		p_addr = __bm_map_paddr(b, offset, KM_USER0);
-		bm = p_addr + MLPP(offset);
-		while (i--) {
-			catch_oob_access_start();
-#ifndef __LITTLE_ENDIAN
-			if (swap_endian)
-				*bm = lel_to_cpu(*bm);
-#endif
-			bits += hweight_long(*bm++);
-			catch_oob_access_end();
-		}
+	/* all but last page */
+	for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
+		p_addr = __bm_map_pidx(b, idx, KM_USER0);
+		for (i = 0; i < LWPP; i++)
+			bits += hweight_long(p_addr[i]);
 		__bm_unmap(p_addr, KM_USER0);
-		offset += do_now;
 		cond_resched();
 	}
-
+	/* last (or only) page */
+	last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
+	p_addr = __bm_map_pidx(b, idx, KM_USER0);
+	for (i = 0; i < last_word; i++)
+		bits += hweight_long(p_addr[i]);
+	p_addr[last_word] &= cpu_to_lel(mask);
+	bits += hweight_long(p_addr[last_word]);
+	/* 32bit arch, may have an unused padding long */
+	if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
+		p_addr[last_word+1] = 0;
+	__bm_unmap(p_addr, KM_USER0);
 	return bits;
 }
 
-static unsigned long bm_count_bits(struct drbd_bitmap *b)
-{
-	return __bm_count_bits(b, 0);
-}
-
-static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b)
-{
-	return __bm_count_bits(b, 1);
-}
-
 /* offset and len in long words.*/
 STATIC void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
 {
 	unsigned long *p_addr, *bm;
+	unsigned int idx;
 	size_t do_now, end;
 
-#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
-
 	end = offset + len;
 
 	if (end > b->bm_words) {
@@ -446,17 +603,16 @@
 
 	while (offset < end) {
 		do_now = min_t(size_t, ALIGN(offset + 1, LWPP), end) - offset;
-		p_addr = bm_map_paddr(b, offset);
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
 		bm = p_addr + MLPP(offset);
-		catch_oob_access_start();
 		if (bm+do_now > p_addr + LWPP) {
 			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
 			       p_addr, bm, (int)do_now);
-			break; /* breaks to after catch_oob_access_end() only! */
-		}
-		memset(bm, c, do_now * sizeof(long));
-		catch_oob_access_end();
+		} else
+			memset(bm, c, do_now * sizeof(long));
 		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
 		offset += do_now;
 	}
 }
@@ -469,18 +625,19 @@
  * In case this is actually a resize, we copy the old bitmap into the new one.
  * Otherwise, the bitmap is initialized to all bits set.
  */
-int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity)
+int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long bits, words, owords, obits, *p_addr, *bm;
+	unsigned long bits, words, owords, obits;
 	unsigned long want, have, onpages; /* number of pages */
 	struct page **npages, **opages = NULL;
 	int err = 0, growing;
 	int opages_vmalloced;
 
-	ERR_IF(!b) return -ENOMEM;
+	if (!expect(b))
+		return -ENOMEM;
 
-	drbd_bm_lock(mdev, "resize");
+	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
 
 	dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
 			(unsigned long long)capacity);
@@ -488,7 +645,7 @@
 	if (capacity == b->bm_dev_capacity)
 		goto out;
 
-	opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags);
+	opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
 
 	if (capacity == 0) {
 		spin_lock_irq(&b->bm_lock);
@@ -516,18 +673,23 @@
 	words = ALIGN(bits, 64) >> LN2_BPL;
 
 	if (get_ldev(mdev)) {
-		D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12));
+		u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
 		put_ldev(mdev);
+		if (bits > bits_on_disk) {
+			dev_err(DEV, "Not enough space for bitmap: %lu > %lu\n",
+				(unsigned long)bits, (unsigned long)bits_on_disk);
+			err = -ENOSPC;
+			goto out;
+		}
 	}
 
-	/* one extra long to catch off by one errors */
-	want = ALIGN((words+1)*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
+	want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
 	have = b->bm_number_of_pages;
 	if (want == have) {
 		D_ASSERT(b->bm_pages != NULL);
 		npages = b->bm_pages;
 	} else {
-		if (FAULT_ACTIVE(mdev, DRBD_FAULT_BM_ALLOC))
+		if (drbd_insert_fault(mdev, DRBD_FAULT_BM_ALLOC))
 			npages = NULL;
 		else
 			npages = bm_realloc_pages(b, want);
@@ -544,7 +706,7 @@
 	obits  = b->bm_bits;
 
 	growing = bits > obits;
-	if (opages)
+	if (opages && growing && set_new_bits)
 		bm_set_surplus(b);
 
 	b->bm_pages = npages;
@@ -554,8 +716,12 @@
 	b->bm_dev_capacity = capacity;
 
 	if (growing) {
-		bm_memset(b, owords, 0xff, words-owords);
-		b->bm_set += bits - obits;
+		if (set_new_bits) {
+			bm_memset(b, owords, 0xff, words-owords);
+			b->bm_set += bits - obits;
+		} else
+			bm_memset(b, owords, 0x00, words-owords);
+
 	}
 
 	if (want < have) {
@@ -563,22 +729,14 @@
 		bm_free_pages(opages + want, have - want);
 	}
 
-	p_addr = bm_map_paddr(b, words);
-	bm = p_addr + MLPP(words);
-	catch_oob_access_start();
-	*bm = DRBD_MAGIC;
-	catch_oob_access_end();
-	bm_unmap(p_addr);
-
 	(void)bm_clear_surplus(b);
 
-	bm_end_info(mdev, __func__);
 	spin_unlock_irq(&b->bm_lock);
 	if (opages != npages)
 		bm_vk_free(opages, opages_vmalloced);
 	if (!growing)
 		b->bm_set = bm_count_bits(b);
-	dev_info(DEV, "resync bitmap: bits=%lu words=%lu\n", bits, words);
+	dev_info(DEV, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
 
  out:
 	drbd_bm_unlock(mdev);
@@ -593,14 +751,16 @@
  *
  * maybe bm_set should be atomic_t ?
  */
-static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long s;
 	unsigned long flags;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
 	s = b->bm_set;
@@ -623,8 +783,10 @@
 size_t drbd_bm_words(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	return b->bm_words;
 }
@@ -632,7 +794,8 @@
 unsigned long drbd_bm_bits(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return 0;
+	if (!expect(b))
+		return 0;
 
 	return b->bm_bits;
 }
@@ -648,12 +811,15 @@
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr, *bm;
 	unsigned long word, bits;
+	unsigned int idx;
 	size_t end, do_now;
 
 	end = offset + number;
 
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 	if (number == 0)
 		return;
 	WARN_ON(offset >= b->bm_words);
@@ -662,28 +828,26 @@
 	spin_lock_irq(&b->bm_lock);
 	while (offset < end) {
 		do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
-		p_addr = bm_map_paddr(b, offset);
+		idx = bm_word_to_page_idx(b, offset);
+		p_addr = bm_map_pidx(b, idx);
 		bm = p_addr + MLPP(offset);
 		offset += do_now;
 		while (do_now--) {
-			catch_oob_access_start();
 			bits = hweight_long(*bm);
-			word = *bm | lel_to_cpu(*buffer++);
+			word = *bm | *buffer++;
 			*bm++ = word;
 			b->bm_set += hweight_long(word) - bits;
-			catch_oob_access_end();
 		}
 		bm_unmap(p_addr);
+		bm_set_page_need_writeout(b->bm_pages[idx]);
 	}
 	/* with 32bit <-> 64bit cross-platform connect
 	 * this is only correct for current usage,
 	 * where we _know_ that we are 64 bit aligned,
 	 * and know that this function is used in this way, too...
 	 */
-	if (end == b->bm_words) {
+	if (end == b->bm_words)
 		b->bm_set -= bm_clear_surplus(b);
-		bm_end_info(mdev, __func__);
-	}
 	spin_unlock_irq(&b->bm_lock);
 }
 
@@ -699,8 +863,10 @@
 
 	end = offset + number;
 
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	if ((offset >= b->bm_words) ||
@@ -713,14 +879,11 @@
 	else {
 		while (offset < end) {
 			do_now = min_t(size_t, ALIGN(offset+1, LWPP), end) - offset;
-			p_addr = bm_map_paddr(b, offset);
+			p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, offset));
 			bm = p_addr + MLPP(offset);
 			offset += do_now;
-			while (do_now--) {
-				catch_oob_access_start();
-				*buffer++ = cpu_to_lel(*bm++);
-				catch_oob_access_end();
-			}
+			while (do_now--)
+				*buffer++ = *bm++;
 			bm_unmap(p_addr);
 		}
 	}
@@ -731,8 +894,10 @@
 void drbd_bm_set_all(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	bm_memset(b, 0, 0xff, b->bm_words);
@@ -745,8 +910,10 @@
 void drbd_bm_clear_all(struct drbd_conf *mdev)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	ERR_IF(!b) return;
-	ERR_IF(!b->bm_pages) return;
+	if (!expect(b))
+		return;
+	if (!expect(b->bm_pages))
+		return;
 
 	spin_lock_irq(&b->bm_lock);
 	bm_memset(b, 0, 0, b->bm_words);
@@ -754,9 +921,32 @@
 	spin_unlock_irq(&b->bm_lock);
 }
 
+struct bm_aio_ctx {
+	struct drbd_conf *mdev;
+	atomic_t in_flight;
+	unsigned int done;
+	unsigned flags;
+#define BM_AIO_COPY_PAGES	1
+#define BM_AIO_WRITE_HINTED	2
+	int error;
+	struct kref kref;
+};
+
+static void bm_aio_ctx_destroy(struct kref *kref)
+{
+	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+
+	put_ldev(ctx->mdev);
+	kfree(ctx);
+}
+
+/* bv_page may be a copy, or may be the original */
 static BIO_ENDIO_TYPE bm_async_io_complete BIO_ENDIO_ARGS(struct bio *bio, int error)
 {
-	struct drbd_bitmap *b = bio->bi_private;
+	struct bm_aio_ctx *ctx = bio->bi_private;
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
 	BIO_ENDIO_FN_START;
@@ -768,140 +958,215 @@
 	if (!error && !uptodate)
 		error = -EIO;
 
+	if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 &&
+	    !bm_test_page_unchanged(b->bm_pages[idx]))
+		dev_warn(DEV, "bitmap page idx %u changed during IO!\n", idx);
+
 	if (error) {
-		/* doh. what now?
-		 * for now, set all bits, and flag MD_IO_ERROR */
-		__set_bit(BM_MD_IO_ERROR, &b->bm_flags);
+		/* ctx error will hold the completed-last non-zero error code,
+		 * in case error codes differ. */
+		ctx->error = error;
+		bm_set_page_io_err(b->bm_pages[idx]);
+		/* Not identical to on disk version of it.
+		 * Is BM_PAGE_IO_ERROR enough? */
+		if (DRBD_ratelimit(5*HZ, 5))
+			dev_err(DEV, "IO ERROR %d on bitmap page idx %u\n",
+					error, idx);
+	} else {
+		bm_clear_page_io_err(b->bm_pages[idx]);
+		dynamic_dev_dbg(DEV, "bitmap page idx %u completed\n", idx);
 	}
-	if (atomic_dec_and_test(&b->bm_async_io))
-		wake_up(&b->bm_io_wait);
+
+	bm_page_unlock_io(mdev, idx);
+
+	if (ctx->flags & BM_AIO_COPY_PAGES)
+		mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
 
 	bio_put(bio);
 
+	if (atomic_dec_and_test(&ctx->in_flight)) {
+		ctx->done = 1;
+		wake_up(&mdev->misc_wait);
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	}
+
 	BIO_ENDIO_FN_RETURN;
 }
 
-STATIC void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int page_nr, int rw) __must_hold(local)
+STATIC void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
 {
-	/* we are process context. we always get a bio */
-	struct bio *bio = bio_alloc(GFP_KERNEL, 1);
+	struct bio *bio = bio_alloc_drbd(GFP_NOIO);
+	struct drbd_conf *mdev = ctx->mdev;
+	struct drbd_bitmap *b = mdev->bitmap;
+	struct page *page;
 	unsigned int len;
+
 	sector_t on_disk_sector =
 		mdev->ldev->md.md_offset + mdev->ldev->md.bm_offset;
 	on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
 
 	/* this might happen with very small
-	 * flexible external meta data device */
+	 * flexible external meta data device,
+	 * or with PAGE_SIZE > 4k */
 	len = min_t(unsigned int, PAGE_SIZE,
 		(drbd_md_last_sector(mdev->ldev) - on_disk_sector + 1)<<9);
 
-	D_DUMPLU(on_disk_sector);
-	D_DUMPI(len);
+	/* serialize IO on this page */
+	bm_page_lock_io(mdev, page_nr);
+	/* before memcpy and submit,
+	 * so it can be redirtied any time */
+	bm_set_page_unchanged(b->bm_pages[page_nr]);
+
+	if (ctx->flags & BM_AIO_COPY_PAGES) {
+		void *src, *dest;
+		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
+		dest = kmap_atomic(page, KM_USER0);
+		src = kmap_atomic(b->bm_pages[page_nr], KM_USER1);
+		memcpy(dest, src, PAGE_SIZE);
+		kunmap_atomic(src, KM_USER1);
+		kunmap_atomic(dest, KM_USER0);
+		bm_store_page_idx(page, page_nr);
+	} else
+		page = b->bm_pages[page_nr];
 
 	bio->bi_bdev = mdev->ldev->md_bdev;
 	bio->bi_sector = on_disk_sector;
-	bio_add_page(bio, b->bm_pages[page_nr], len, 0);
-	bio->bi_private = b;
+	/* bio_add_page of a single page to an empty bio will always succeed,
+	 * according to api.  Do we want to assert that? */
+	bio_add_page(bio, page, len, 0);
+	bio->bi_private = ctx;
 	bio->bi_end_io = bm_async_io_complete;
 
-	if (FAULT_ACTIVE(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
+	if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
 		bio->bi_rw |= rw;
 		bio_endio(bio, -EIO);
 	} else {
 		submit_bio(rw, bio);
+		/* this should not count as user activity and cause the
+		 * resync to throttle -- see drbd_rs_should_slow_down(). */
+		atomic_add(len >> 9, &mdev->rs_sect_ev);
 	}
 }
 
-# if defined(__LITTLE_ENDIAN)
-	/* nothing to do, on disk == in memory */
-# define bm_cpu_to_lel(x) ((void)0)
-# else
-void bm_cpu_to_lel(struct drbd_bitmap *b)
-{
-	/* need to cpu_to_lel all the pages ...
-	 * this may be optimized by using
-	 * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0;
-	 * the following is still not optimal, but better than nothing */
-	unsigned int i;
-	unsigned long *p_addr, *bm;
-	if (b->bm_set == 0) {
-		/* no page at all; avoid swap if all is 0 */
-		i = b->bm_number_of_pages;
-	} else if (b->bm_set == b->bm_bits) {
-		/* only the last page */
-		i = b->bm_number_of_pages - 1;
-	} else {
-		/* all pages */
-		i = 0;
-	}
-	for (; i < b->bm_number_of_pages; i++) {
-		p_addr = kmap_atomic(b->bm_pages[i], KM_USER0);
-		for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++)
-			*bm = cpu_to_lel(*bm);
-		kunmap_atomic(p_addr, KM_USER0);
-	}
-}
-# endif
-/* lel_to_cpu == cpu_to_lel */
-# define bm_lel_to_cpu(x) bm_cpu_to_lel(x)
-
 /*
  * bm_rw: read/write the whole bitmap from/to its on disk location.
  */
-STATIC int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local)
+STATIC int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
+	struct bm_aio_ctx *ctx;
 	struct drbd_bitmap *b = mdev->bitmap;
-	/* sector_t sector; */
-	int bm_words, num_pages, i;
+	int num_pages, i, count = 0;
 	unsigned long now;
 	char ppb[10];
 	int err = 0;
 
-	WARN_ON(!bm_is_locked(b));
+	/*
+	 * We are protected against bitmap disappearing/resizing by holding an
+	 * ldev reference (caller must have called get_ldev()).
+	 * For read/write, we are protected against changes to the bitmap by
+	 * the bitmap lock (see drbd_bitmap_io).
+	 * For lazy writeout, we don't care for ongoing changes to the bitmap,
+	 * as we submit copies of pages anyways.
+	 */
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
 
-	/* no spinlock here, the drbd_bm_lock should be enough! */
+	*ctx = (struct bm_aio_ctx) {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = 0,
+		.flags = flags,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
+	};
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
+		err = -ENODEV;
+		goto out;
+	}
 
-	bm_words  = drbd_bm_words(mdev);
-	num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT;
+	if (!ctx->flags)
+		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
-	/* on disk bitmap is little endian */
-	if (rw == WRITE)
-		bm_cpu_to_lel(b);
+	num_pages = b->bm_number_of_pages;
 
 	now = jiffies;
-	atomic_set(&b->bm_async_io, num_pages);
-	__clear_bit(BM_MD_IO_ERROR, &b->bm_flags);
 
 	/* let the layers below us try to merge these bios... */
-	for (i = 0; i < num_pages; i++)
-		bm_page_io_async(mdev, b, i, rw);
+	for (i = 0; i < num_pages; i++) {
+		/* ignore completely unchanged pages */
+		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
+			break;
+		if (rw & WRITE) {
+			if ((flags & BM_AIO_WRITE_HINTED) &&
+			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
+				    &page_private(b->bm_pages[i])))
+				continue;
+			if (bm_test_page_unchanged(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
+				continue;
+			}
+			/* during lazy writeout,
+			 * ignore those pages not marked for lazy writeout. */
+			if (lazy_writeout_upper_idx &&
+			    !bm_test_page_lazy_writeout(b->bm_pages[i])) {
+				dynamic_dev_dbg(DEV, "skipped bm lazy write for idx %u\n", i);
+				continue;
+			}
+		}
+		atomic_inc(&ctx->in_flight);
+		bm_page_io_async(ctx, i, rw);
+		++count;
+		cond_resched();
+	}
+
+	/*
+	 * We initialize ctx->in_flight to one to make sure bm_async_io_complete
+	 * will not set ctx->done early, and decrement / test it here.  If there
+	 * are still some bios in flight, we need to wait for them here.
+	 * If all IO is done already (or nothing had been submitted), there is
+	 * no need to wait.  Still, we need to put the kref associated with the
+	 * "in_flight reached zero, all done" event.
+	 */
+	if (!atomic_dec_and_test(&ctx->in_flight))
+		wait_until_done_or_disk_failure(mdev, &ctx->done);
+	else
+		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 
-	drbd_blk_run_queue(bdev_get_queue(mdev->ldev->md_bdev));
-	wait_event(b->bm_io_wait, atomic_read(&b->bm_async_io) == 0);
+	/* summary for global bitmap IO */
+	if (flags == 0)
+		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
+			 rw == WRITE ? "WRITE" : "READ",
+			 count, jiffies - now);
 
-	if (test_bit(BM_MD_IO_ERROR, &b->bm_flags)) {
+	if (ctx->error) {
 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
-		drbd_chk_io_error(mdev, 1, TRUE);
-		err = -EIO;
+		drbd_chk_io_error(mdev, 1, true);
+		err = -EIO; /* ctx->error ? */
 	}
 
+	if (atomic_read(&ctx->in_flight))
+		err = -EIO; /* Disk failed during IO... */
+
 	now = jiffies;
 	if (rw == WRITE) {
-		/* swap back endianness */
-		bm_lel_to_cpu(b);
-		/* flush bitmap to stable storage */
 		drbd_md_flush(mdev);
 	} else /* rw == READ */ {
-		/* just read, if necessary adjust endianness */
-		b->bm_set = bm_count_bits_swap_endian(b);
+		b->bm_set = bm_count_bits(b);
 		dev_info(DEV, "recounting of set bits took additional %lu jiffies\n",
 		     jiffies - now);
 	}
 	now = b->bm_set;
 
-	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
-	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
+	if (flags == 0)
+		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
+		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
+out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	return err;
 }
 
@@ -911,116 +1176,156 @@
  */
 int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
 {
-	return bm_rw(mdev, READ);
+	return bm_rw(mdev, READ, 0, 0);
 }
 
 /**
  * drbd_bm_write() - Write the whole bitmap to its on disk location.
  * @mdev:	DRBD device.
+ *
+ * Will only write pages that have changed since last IO.
  */
 int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
 {
-	return bm_rw(mdev, WRITE);
+	return bm_rw(mdev, WRITE, 0, 0);
 }
 
 /**
- * drbd_bm_write_sect: Writes a 512 (MD_SECTOR_SIZE) byte piece of the bitmap
+ * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
  * @mdev:	DRBD device.
- * @enr:	Extent number in the resync lru (happens to be sector offset)
+ * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
+ */
+int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
+}
+
+/**
+ * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
+ * @mdev:	DRBD device.
+ */
+int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
+{
+	return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
+}
+
+/**
+ * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
+ * @mdev:	DRBD device.
+ * @idx:	bitmap page index
  *
- * The BM_EXT_SIZE is on purpose exactly the amount of the bitmap covered
- * by a single sector write. Therefore enr == sector offset from the
- * start of the bitmap.
- */
-int drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local)
-{
-	sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
-				      + mdev->ldev->md.bm_offset;
-	int bm_words, num_words, offset;
-	int err = 0;
+ * We don't want to special case on logical_block_size of the backend device,
+ * so we submit PAGE_SIZE aligned pieces.
+ * Note that on "most" systems, PAGE_SIZE is 4k.
+ *
+ * In case this becomes an issue on systems with larger PAGE_SIZE,
+ * we may want to change this again to write 4k aligned 4k pieces.
+ */
+int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
+{
+	struct bm_aio_ctx *ctx;
+	int err;
 
-	mutex_lock(&mdev->md_io_mutex);
-	bm_words  = drbd_bm_words(mdev);
-	offset    = S2W(enr);	/* word offset into bitmap */
-	num_words = min(S2W(1), bm_words - offset);
-#if DUMP_MD >= 3
-	dev_info(DEV, "write_sect: sector=%lu offset=%u num_words=%u\n",
-			enr, offset, num_words);
-#endif
-	if (num_words < S2W(1))
-		memset(page_address(mdev->md_io_page), 0, MD_SECTOR_SIZE);
-	drbd_bm_get_lel(mdev, offset, num_words,
-			page_address(mdev->md_io_page));
-	if (!drbd_md_sync_page_io(mdev, mdev->ldev, on_disk_sector, WRITE)) {
-		int i;
-		err = -EIO;
-		dev_err(DEV, "IO ERROR writing bitmap sector %lu "
-		    "(meta-disk sector %llus)\n",
-		    enr, (unsigned long long)on_disk_sector);
-		drbd_chk_io_error(mdev, 1, TRUE);
-		for (i = 0; i < AL_EXT_PER_BM_SECT; i++)
-			drbd_bm_ALe_set_all(mdev, enr*AL_EXT_PER_BM_SECT+i);
+	if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
+		dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
+		return 0;
+	}
+
+	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
+
+	*ctx = (struct bm_aio_ctx) {
+		.mdev = mdev,
+		.in_flight = ATOMIC_INIT(1),
+		.done = 0,
+		.flags = BM_AIO_COPY_PAGES,
+		.error = 0,
+		.kref = { ATOMIC_INIT(2) },
+	};
+
+	if (!get_ldev_if_state(mdev, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+		dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
+		err = -ENODEV;
+		goto out;
 	}
+
+	bm_page_io_async(ctx, idx, WRITE_SYNC);
+	wait_until_done_or_disk_failure(mdev, &ctx->done);
+
+	if (ctx->error)
+		drbd_chk_io_error(mdev, 1, true);
+		/* that should force detach, so the in memory bitmap will be
+		 * gone in a moment as well. */
+
 	mdev->bm_writ_cnt++;
-	mutex_unlock(&mdev->md_io_mutex);
+	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
+ out:
+	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
 	return err;
 }
 
 /* NOTE
  * find_first_bit returns int, we return unsigned long.
- * should not make much difference anyways, but ...
+ * For this to work on 32bit arch with bitnumbers > (1<<32),
+ * we'd need to return u64, and get a whole lot of other places
+ * fixed where we still use unsigned long.
  *
  * this returns a bit number, NOT a sector!
  */
-#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
 static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
 	const int find_zero_bit, const enum km_type km)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long i = -1UL;
 	unsigned long *p_addr;
-	unsigned long bit_offset; /* bit offset of the mapped page. */
+	unsigned long bit_offset;
+	unsigned i;
+
 
 	if (bm_fo > b->bm_bits) {
 		dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
+		bm_fo = DRBD_END_OF_BITMAP;
 	} else {
 		while (bm_fo < b->bm_bits) {
-			unsigned long offset;
-			bit_offset = bm_fo & ~BPP_MASK; /* bit offset of the page */
-			offset = bit_offset >> LN2_BPL;    /* word offset of the page */
-			p_addr = __bm_map_paddr(b, offset, km);
+			/* bit offset of the first bit in the page */
+			bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
+			p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
 
 			if (find_zero_bit)
-				i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+				i = find_next_zero_bit_le(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
 			else
-				i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK);
+				i = find_next_bit_le(p_addr,
+						PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
 
 			__bm_unmap(p_addr, km);
 			if (i < PAGE_SIZE*8) {
-				i = bit_offset + i;
-				if (i >= b->bm_bits)
+				bm_fo = bit_offset + i;
+				if (bm_fo >= b->bm_bits)
 					break;
 				goto found;
 			}
 			bm_fo = bit_offset + PAGE_SIZE*8;
 		}
-		i = -1UL;
+		bm_fo = DRBD_END_OF_BITMAP;
 	}
  found:
-	return i;
+	return bm_fo;
 }
 
 static unsigned long bm_find_next(struct drbd_conf *mdev,
 	unsigned long bm_fo, const int find_zero_bit)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long i = -1UL;
+	unsigned long i = DRBD_END_OF_BITMAP;
 
-	ERR_IF(!b) return i;
-	ERR_IF(!b->bm_pages) return i;
+	if (!expect(b))
+		return i;
+	if (!expect(b->bm_pages))
+		return i;
 
 	spin_lock_irq(&b->bm_lock);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 
 	i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
@@ -1046,13 +1351,13 @@
  * you must take drbd_bm_lock() first */
 unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
 {
-	/* WARN_ON(!bm_is_locked(mdev)); */
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
 	return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
 }
 
 unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
 {
-	/* WARN_ON(!bm_is_locked(mdev)); */
+	/* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
 	return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
 }
 
@@ -1062,14 +1367,15 @@
  * wants bitnr, not sector.
  * expected to be called for only a few bits (e - s about BITS_PER_LONG).
  * Must hold bitmap lock already. */
-int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
-	unsigned long e, int val, const enum km_type km)
+STATIC int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+	unsigned long e, int val)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr = NULL;
 	unsigned long bitnr;
-	unsigned long last_page_nr = -1UL;
+	unsigned int last_page_nr = -1U;
 	int c = 0;
+	int changed_total = 0;
 
 	if (e >= b->bm_bits) {
 		dev_err(DEV, "ASSERT FAILED: bit_s=%lu bit_e=%lu bm_bits=%lu\n",
@@ -1077,44 +1383,56 @@
 		e = b->bm_bits ? b->bm_bits -1 : 0;
 	}
 	for (bitnr = s; bitnr <= e; bitnr++) {
-		unsigned long offset = bitnr>>LN2_BPL;
-		unsigned long page_nr = offset >> (PAGE_SHIFT - LN2_BPL + 3);
+		unsigned int page_nr = bm_bit_to_page_idx(b, bitnr);
 		if (page_nr != last_page_nr) {
 			if (p_addr)
-				__bm_unmap(p_addr, km);
-			p_addr = __bm_map_paddr(b, offset, km);
+				__bm_unmap(p_addr, KM_IRQ1);
+			if (c < 0)
+				bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+			else if (c > 0)
+				bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+			changed_total += c;
+			c = 0;
+			p_addr = __bm_map_pidx(b, page_nr, KM_IRQ1);
 			last_page_nr = page_nr;
 		}
 		if (val)
-			c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr));
+			c += (0 == __test_and_set_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
 		else
-			c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr));
+			c -= (0 != __test_and_clear_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr));
 	}
 	if (p_addr)
-		__bm_unmap(p_addr, km);
-	b->bm_set += c;
-	return c;
+		__bm_unmap(p_addr, KM_IRQ1);
+	if (c < 0)
+		bm_set_page_lazy_writeout(b->bm_pages[last_page_nr]);
+	else if (c > 0)
+		bm_set_page_need_writeout(b->bm_pages[last_page_nr]);
+	changed_total += c;
+	b->bm_set += changed_total;
+	return changed_total;
 }
 
 /* returns number of bits actually changed.
  * for val != 0, we change 0 -> 1, return code positive
  * for val == 0, we change 1 -> 0, return code negative
  * wants bitnr, not sector */
-int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
+STATIC int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
 	const unsigned long e, int val)
 {
 	unsigned long flags;
 	struct drbd_bitmap *b = mdev->bitmap;
 	int c = 0;
 
-	ERR_IF(!b) return 1;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 1;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
 		bm_print_lock_info(mdev);
 
-	c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
+	c = __bm_change_bits_to(mdev, s, e, val);
 
 	spin_unlock_irqrestore(&b->bm_lock, flags);
 	return c;
@@ -1139,16 +1457,25 @@
 {
 	int i;
 	int bits;
-	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_USER0);
+	int changed = 0;
+	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr], KM_IRQ1);
 	for (i = first_word; i < last_word; i++) {
 		bits = hweight_long(paddr[i]);
 		paddr[i] = ~0UL;
-		b->bm_set += BITS_PER_LONG - bits;
+		changed += BITS_PER_LONG - bits;
+	}
+	kunmap_atomic(paddr, KM_IRQ1);
+	if (changed) {
+		/* We only need lazy writeout, the information is still in the
+		 * remote bitmap as well, and is reconstructed during the next
+		 * bitmap exchange, if lost locally due to a crash. */
+		bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
+		b->bm_set += changed;
 	}
-	kunmap_atomic(paddr, KM_USER0);
 }
 
-/* Same thing as drbd_bm_set_bits, but without taking the spin_lock_irqsave.
+/* Same thing as drbd_bm_set_bits,
+ * but more efficient for a large bit range.
  * You must first drbd_bm_lock().
  * Can be called to set the whole bitmap in one go.
  * Sets bits from s to e _inclusive_. */
@@ -1162,6 +1489,7 @@
 	 * Do not use memset, because we must account for changes,
 	 * so we need to loop over the words with hweight() anyways.
 	 */
+	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long sl = ALIGN(s,BITS_PER_LONG);
 	unsigned long el = (e+1) & ~((unsigned long)BITS_PER_LONG-1);
 	int first_page;
@@ -1172,15 +1500,19 @@
 
 	if (e - s <= 3*BITS_PER_LONG) {
 		/* don't bother; el and sl may even be wrong. */
-		__bm_change_bits_to(mdev, s, e, 1, KM_USER0);
+		spin_lock_irq(&b->bm_lock);
+		__bm_change_bits_to(mdev, s, e, 1);
+		spin_unlock_irq(&b->bm_lock);
 		return;
 	}
 
 	/* difference is large enough that we can trust sl and el */
 
+	spin_lock_irq(&b->bm_lock);
+
 	/* bits filling the current long */
 	if (sl)
-		__bm_change_bits_to(mdev, s, sl-1, 1, KM_USER0);
+		__bm_change_bits_to(mdev, s, sl-1, 1);
 
 	first_page = sl >> (3 + PAGE_SHIFT);
 	last_page = el >> (3 + PAGE_SHIFT);
@@ -1193,8 +1525,10 @@
 	/* first and full pages, unless first page == last page */
 	for (page_nr = first_page; page_nr < last_page; page_nr++) {
 		bm_set_full_words_within_one_page(mdev->bitmap, page_nr, first_word, last_word);
+		spin_unlock_irq(&b->bm_lock);
 		cond_resched();
 		first_word = 0;
+		spin_lock_irq(&b->bm_lock);
 	}
 
 	/* last page (respectively only page, for first page == last page) */
@@ -1207,7 +1541,8 @@
 	 * it would trigger an assert in __bm_change_bits_to()
 	 */
 	if (el <= e)
-		__bm_change_bits_to(mdev, el, e, 1, KM_USER0);
+		__bm_change_bits_to(mdev, el, e, 1);
+	spin_unlock_irq(&b->bm_lock);
 }
 
 /* returns bit state
@@ -1224,16 +1559,17 @@
 	unsigned long *p_addr;
 	int i;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 	if (bitnr < b->bm_bits) {
-		unsigned long offset = bitnr>>LN2_BPL;
-		p_addr = bm_map_paddr(b, offset);
-		i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0;
+		p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
+		i = test_bit_le(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
 		bm_unmap(p_addr);
 	} else if (bitnr == b->bm_bits) {
 		i = -1;
@@ -1251,34 +1587,35 @@
 {
 	unsigned long flags;
 	struct drbd_bitmap *b = mdev->bitmap;
-	unsigned long *p_addr = NULL, page_nr = -1;
+	unsigned long *p_addr = NULL;
 	unsigned long bitnr;
+	unsigned int page_nr = -1U;
 	int c = 0;
-	size_t w;
 
 	/* If this is called without a bitmap, that is a bug.  But just to be
 	 * robust in case we screwed up elsewhere, in that case pretend there
 	 * was one dirty bit in the requested area, so we won't try to do a
 	 * local read there (no bitmap probably implies no disk) */
-	ERR_IF(!b) return 1;
-	ERR_IF(!b->bm_pages) return 1;
+	if (!expect(b))
+		return 1;
+	if (!expect(b->bm_pages))
+		return 1;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 	for (bitnr = s; bitnr <= e; bitnr++) {
-		w = bitnr >> LN2_BPL;
-		if (page_nr != w >> (PAGE_SHIFT - LN2_BPL + 3)) {
-			page_nr = w >> (PAGE_SHIFT - LN2_BPL + 3);
+		unsigned int idx = bm_bit_to_page_idx(b, bitnr);
+		if (page_nr != idx) {
+			page_nr = idx;
 			if (p_addr)
 				bm_unmap(p_addr);
-			p_addr = bm_map_paddr(b, w);
+			p_addr = bm_map_pidx(b, idx);
 		}
-		ERR_IF (bitnr >= b->bm_bits) {
+		if (expect(bitnr < b->bm_bits))
+			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
+		else
 			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
-		} else {
-			c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
-		}
 	}
 	if (p_addr)
 		bm_unmap(p_addr);
@@ -1308,11 +1645,13 @@
 	unsigned long flags;
 	unsigned long *p_addr, *bm;
 
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irqsave(&b->bm_lock, flags);
-	if (bm_is_locked(b))
+	if (BM_DONT_TEST & b->bm_flags)
 		bm_print_lock_info(mdev);
 
 	s = S2W(enr);
@@ -1320,13 +1659,10 @@
 	count = 0;
 	if (s < b->bm_words) {
 		int n = e-s;
-		p_addr = bm_map_paddr(b, s);
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
 		bm = p_addr + MLPP(s);
-		while (n--) {
-			catch_oob_access_start();
+		while (n--)
 			count += hweight_long(*bm++);
-			catch_oob_access_end();
-		}
 		bm_unmap(p_addr);
 	} else {
 		dev_err(DEV, "start offset (%d) too large in drbd_bm_e_weight\n", s);
@@ -1338,18 +1674,22 @@
 	return count;
 }
 
-/* set all bits covered by the AL-extent al_enr */
+/* Set all bits covered by the AL-extent al_enr.
+ * Returns number of bits changed. */
 unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long *p_addr, *bm;
 	unsigned long weight;
-	int count, s, e, i, do_now;
-	ERR_IF(!b) return 0;
-	ERR_IF(!b->bm_pages) return 0;
+	unsigned long s, e;
+	int count, i, do_now;
+	if (!expect(b))
+		return 0;
+	if (!expect(b->bm_pages))
+		return 0;
 
 	spin_lock_irq(&b->bm_lock);
-	if (bm_is_locked(b))
+	if (BM_DONT_SET & b->bm_flags)
 		bm_print_lock_info(mdev);
 	weight = b->bm_set;
 
@@ -1361,13 +1701,11 @@
 	count = 0;
 	if (s < b->bm_words) {
 		i = do_now = e-s;
-		p_addr = bm_map_paddr(b, s);
+		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
 		bm = p_addr + MLPP(s);
 		while (i--) {
-			catch_oob_access_start();
 			count += hweight_long(*bm);
 			*bm = -1UL;
-			catch_oob_access_end();
 			bm++;
 		}
 		bm_unmap(p_addr);
@@ -1375,7 +1713,7 @@
 		if (e == b->bm_words)
 			b->bm_set -= bm_clear_surplus(b);
 	} else {
-		dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s);
+		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
 	}
 	weight = b->bm_set - weight;
 	spin_unlock_irq(&b->bm_lock);
diff -Nru drbd8-8.3.7/drbd/drbd_buildtag.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_buildtag.c
--- drbd8-8.3.7/drbd/drbd_buildtag.c	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_buildtag.c	2012-09-03 22:37:15.000000000 +0000
@@ -2,6 +2,6 @@
 #include <linux/drbd.h>
 const char *drbd_buildtag(void)
 {
-	return "GIT-hash: ea9e28dbff98e331a62bcbcc63a6135808fe2917"
-		" build by ivoks@lucid, 2010-02-19 17:53:12";
+	return "GIT-hash: e3169387b068d825dd433287f7fd7ba48ed07919 debian/changelog"
+		" build by ildefonso@rexy, 2012-09-03 18:07:15";
 }
diff -Nru drbd8-8.3.7/drbd/drbd_int.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_int.h
--- drbd8-8.3.7/drbd/drbd_int.h	2010-01-07 09:09:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_int.h	2012-02-02 14:09:14.000000000 +0000
@@ -37,8 +37,16 @@
 #include <linux/tcp.h>
 #include <linux/mutex.h>
 #include <linux/genhd.h>
+#include <linux/idr.h>
 #include <net/tcp.h>
 #include <linux/lru_cache.h>
+#include <linux/prefetch.h>
+#include <linux/drbd_genl_api.h>
+#include <linux/drbd.h>
+#include <linux/drbd_config.h>
+
+#include "compat.h"
+#include "drbd_state.h"
 
 #ifdef __CHECKER__
 # define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
@@ -75,7 +83,6 @@
 extern unsigned int minor_count;
 extern int disable_sendpage;
 extern int allow_oos;
-extern unsigned int cn_idx;
 
 #ifdef DRBD_ENABLE_FAULTS
 extern int enable_faults;
@@ -93,14 +100,6 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 
-/* XXX do we need this? */
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
 /* I don't remember why XCPU ...
  * This is used to wake the asender,
  * and to interrupt sending the sending task
@@ -117,19 +116,14 @@
  */
 #define DRBD_SIGKILL SIGHUP
 
-/* All EEs on the free list should have ID_VACANT (== 0)
- * freshly allocated EEs get !ID_VACANT (== 1)
- * so if it says "cannot dereference null pointer at adress 0x00000001",
- * it is most likely one of these :( */
-
 #define ID_IN_SYNC      (4711ULL)
 #define ID_OUT_OF_SYNC  (4712ULL)
-
 #define ID_SYNCER (-1ULL)
-#define ID_VACANT 0
-#define is_syncer_block_id(id) ((id) == ID_SYNCER)
+
+#define UUID_NEW_BM_OFFSET ((u64)0x0001000000000000ULL)
 
 struct drbd_conf;
+struct drbd_tconn;
 
 #ifdef DBG_ALL_SYMBOLS
 # define STATIC
@@ -137,44 +131,11 @@
 # define STATIC static
 #endif
 
-#ifdef PARANOIA
-# define PARANOIA_BUG_ON(x) BUG_ON(x)
-#else
-# define PARANOIA_BUG_ON(x)
-#endif
-
-/*
- * Some Message Macros
- *************************/
-
-/* handy macro: DUMPP(somepointer) */
-#define DUMPP(A)   dev_err(DEV, #A " = %p in %s:%d\n", (A), __FILE__, __LINE__);
-#define DUMPLU(A)  dev_err(DEV, #A " = %lu in %s:%d\n", (unsigned long)(A), __FILE__, __LINE__);
-#define DUMPLLU(A) dev_err(DEV, #A " = %llu in %s:%d\n", (unsigned long long)(A), __FILE__, __LINE__);
-#define DUMPLX(A)  dev_err(DEV, #A " = %lx in %s:%d\n", (A), __FILE__, __LINE__);
-#define DUMPI(A)   dev_err(DEV, #A " = %d in %s:%d\n", (int)(A), __FILE__, __LINE__);
-
-#define DUMPST(A) DUMPLLU((unsigned long long)(A))
-
-#if 0
-#define D_DUMPP(A)   DUMPP(A)
-#define D_DUMPLU(A)  DUMPLU(A)
-#define D_DUMPLLU(A) DUMPLLU(A)
-#define D_DUMPLX(A)  DUMPLX(A)
-#define D_DUMPI(A)   DUMPI(A)
-#else
-#define D_DUMPP(A)
-#define D_DUMPLU(A)
-#define D_DUMPLLU(A)
-#define D_DUMPLX(A)
-#define D_DUMPI(A)
-#endif
-
 /* upstream kernel wants us to use dev_warn(), ...
  * dev_printk() expects to be presented a struct device *;
  * in older kernels, (<= 2.6.24), there is nothing suitable there.
  * "backport" hack: redefine dev_printk.
- * Trigger is definition of dev_to_disk marcro, introduced with the
+ * Trigger is definition of dev_to_disk macro, introduced with the
  * commit edfaa7c36574f1bf09c65ad602412db9da5f96bf
  *     Driver core: convert block from raw kobjects to core devices
  */
@@ -197,10 +158,18 @@
 	        dev_printk(KERN_CRIT , dev , format , ## arg)
 #endif
 
-
+#define conn_printk(LEVEL, TCONN, FMT, ARGS...) \
+	printk(LEVEL "d-con %s: " FMT, TCONN->name , ## ARGS)
+#define conn_alert(TCONN, FMT, ARGS...)  conn_printk(KERN_ALERT, TCONN, FMT, ## ARGS)
+#define conn_crit(TCONN, FMT, ARGS...)   conn_printk(KERN_CRIT, TCONN, FMT, ## ARGS)
+#define conn_err(TCONN, FMT, ARGS...)    conn_printk(KERN_ERR, TCONN, FMT, ## ARGS)
+#define conn_warn(TCONN, FMT, ARGS...)   conn_printk(KERN_WARNING, TCONN, FMT, ## ARGS)
+#define conn_notice(TCONN, FMT, ARGS...) conn_printk(KERN_NOTICE, TCONN, FMT, ## ARGS)
+#define conn_info(TCONN, FMT, ARGS...)   conn_printk(KERN_INFO, TCONN, FMT, ## ARGS)
+#define conn_dbg(TCONN, FMT, ARGS...)    conn_printk(KERN_DEBUG, TCONN, FMT, ## ARGS)
 
 /* see kernel/printk.c:printk_ratelimit
- * macro, so it is easy do have independend rate limits at different locations
+ * macro, so it is easy do have independent rate limits at different locations
  * "initializer element not constant ..." with kernel 2.4 :(
  * so I initialize toks to something large
  */
@@ -239,12 +208,19 @@
 # define D_ASSERT(exp)	if (!(exp)) \
 	 dev_err(DEV, "ASSERT( " #exp " ) in %s:%d\n", __FILE__, __LINE__)
 #endif
-#define ERR_IF(exp) if (({				\
-	int _b = (exp) != 0;				\
-	if (_b) dev_err(DEV, "%s: (%s) in %s:%d\n",		\
-		__func__, #exp, __FILE__, __LINE__);	\
-	 _b;						\
-	}))
+
+/**
+ * expect  -  Make an assertion
+ *
+ * Unlike the assert macro, this macro returns a boolean result.
+ */
+#define expect(exp) ({								\
+		bool _bool = (exp);						\
+		if (!_bool)							\
+			dev_err(DEV, "ASSERTION %s FAILED in %s\n",		\
+			        #exp, __func__);				\
+		_bool;								\
+		})
 
 /* Defines to control fault insertion */
 enum {
@@ -257,37 +233,24 @@
 	DRBD_FAULT_DT_RA = 6,	/* data read ahead */
 	DRBD_FAULT_BM_ALLOC = 7,	/* bitmap allocation */
 	DRBD_FAULT_AL_EE = 8,	/* alloc ee */
+	DRBD_FAULT_RECEIVE = 9, /* Changes some bytes upon receiving a [rs]data block */
 
 	DRBD_FAULT_MAX,
 };
 
-extern void trace_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, ...);
-
-#ifdef DRBD_ENABLE_FAULTS
 extern unsigned int
 _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type);
+
 static inline int
 drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) {
+#ifdef DRBD_ENABLE_FAULTS
 	return fault_rate &&
 		(enable_faults & (1<<type)) &&
 		_drbd_insert_fault(mdev, type);
-}
-#define FAULT_ACTIVE(_m, _t) (drbd_insert_fault((_m), (_t)))
-
-#else
-#define FAULT_ACTIVE(_m, _t) (0)
-#endif
-
-/* integer division, round _UP_ to the next integer */
-#define div_ceil(A, B) ((A)/(B) + ((A)%(B) ? 1 : 0))
-/* usual integer division */
-#define div_floor(A, B) ((A)/(B))
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8)
-# define HAVE_KERNEL_SENDMSG 1
 #else
-# define HAVE_KERNEL_SENDMSG 0
+	return 0;
 #endif
+}
 
 /*
  * our structs
@@ -300,14 +263,11 @@
 	(typecheck(struct drbd_conf*, x) && \
 	  ((x) ? (((x)->magic ^ DRBD_MAGIC) == (long)(x)) : 0))
 
-/* drbd_meta-data.c (still in drbd_main.c) */
-/* 4th incarnation of the disk layout. */
-#define DRBD_MD_MAGIC (DRBD_MAGIC+4)
-
-extern struct drbd_conf **minor_table;
+extern struct idr minors; /* RCU, updates: genl_lock() */
+extern struct list_head drbd_tconns; /* RCU, updates: genl_lock() */
 
 /* on the wire */
-enum drbd_packets {
+enum drbd_packet {
 	/* receiver (data socket) */
 	P_DATA		      = 0x00,
 	P_DATA_REPLY	      = 0x01, /* Response to P_DATA_REQUEST */
@@ -335,7 +295,7 @@
 	P_RECV_ACK	      = 0x15, /* Used in protocol B */
 	P_WRITE_ACK	      = 0x16, /* Used in protocol C */
 	P_RS_WRITE_ACK	      = 0x17, /* Is a P_WRITE_ACK, additionally call set_in_sync(). */
-	P_DISCARD_ACK	      = 0x18, /* Used in proto C, two-primaries conflict detection */
+	P_DISCARD_WRITE	      = 0x18, /* Used in proto C, two-primaries conflict detection */
 	P_NEG_ACK	      = 0x19, /* Sent if local disk is unusable */
 	P_NEG_DREPLY	      = 0x1a, /* Local disk is broken... */
 	P_NEG_RS_DREPLY	      = 0x1b, /* Local disk is broken... */
@@ -351,72 +311,28 @@
 	P_RS_IS_IN_SYNC	      = 0x22, /* meta socket */
 	P_SYNC_PARAM89	      = 0x23, /* data socket, protocol version 89 replacement for P_SYNC_PARAM */
 	P_COMPRESSED_BITMAP   = 0x24, /* compressed or otherwise encoded bitmap transfer */
+	/* P_CKPT_FENCE_REQ      = 0x25, * currently reserved for protocol D */
+	/* P_CKPT_DISABLE_REQ    = 0x26, * currently reserved for protocol D */
+	P_DELAY_PROBE         = 0x27, /* is used on BOTH sockets */
+	P_OUT_OF_SYNC         = 0x28, /* Mark as out of sync (Outrunning), data socket */
+	P_RS_CANCEL           = 0x29, /* meta: Used to cancel RS_DATA_REQUEST packet by SyncSource */
+	P_CONN_ST_CHG_REQ     = 0x2a, /* data sock: Connection wide state request */
+	P_CONN_ST_CHG_REPLY   = 0x2b, /* meta sock: Connection side state req reply */
+	P_RETRY_WRITE	      = 0x2c, /* Protocol C: retry conflicting write request */
+	P_PROTOCOL_UPDATE     = 0x2d, /* data sock: is used in established connections */
 
-	P_MAX_CMD	      = 0x25,
 	P_MAY_IGNORE	      = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
 	P_MAX_OPT_CMD	      = 0x101,
 
 	/* special command ids for handshake */
 
-	P_HAND_SHAKE_M	      = 0xfff1, /* First Packet on the MetaSock */
-	P_HAND_SHAKE_S	      = 0xfff2, /* First Packet on the Socket */
+	P_INITIAL_META	      = 0xfff1, /* First Packet on the MetaSock */
+	P_INITIAL_DATA	      = 0xfff2, /* First Packet on the Socket */
 
-	P_HAND_SHAKE	      = 0xfffe	/* FIXED for the next century! */
+	P_CONNECTION_FEATURES = 0xfffe	/* FIXED for the next century! */
 };
 
-static inline const char *cmdname(enum drbd_packets cmd)
-{
-	/* THINK may need to become several global tables
-	 * when we want to support more than
-	 * one PRO_VERSION */
-	static const char *cmdnames[] = {
-		[P_DATA]	        = "Data",
-		[P_DATA_REPLY]	        = "DataReply",
-		[P_RS_DATA_REPLY]	= "RSDataReply",
-		[P_BARRIER]	        = "Barrier",
-		[P_BITMAP]	        = "ReportBitMap",
-		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
-		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
-		[P_UNPLUG_REMOTE]	= "UnplugRemote",
-		[P_DATA_REQUEST]	= "DataRequest",
-		[P_RS_DATA_REQUEST]     = "RSDataRequest",
-		[P_SYNC_PARAM]	        = "SyncParam",
-		[P_SYNC_PARAM89]	= "SyncParam89",
-		[P_PROTOCOL]            = "ReportProtocol",
-		[P_UUIDS]	        = "ReportUUIDs",
-		[P_SIZES]	        = "ReportSizes",
-		[P_STATE]	        = "ReportState",
-		[P_SYNC_UUID]           = "ReportSyncUUID",
-		[P_AUTH_CHALLENGE]      = "AuthChallenge",
-		[P_AUTH_RESPONSE]	= "AuthResponse",
-		[P_PING]		= "Ping",
-		[P_PING_ACK]	        = "PingAck",
-		[P_RECV_ACK]	        = "RecvAck",
-		[P_WRITE_ACK]	        = "WriteAck",
-		[P_RS_WRITE_ACK]	= "RSWriteAck",
-		[P_DISCARD_ACK]	        = "DiscardAck",
-		[P_NEG_ACK]	        = "NegAck",
-		[P_NEG_DREPLY]	        = "NegDReply",
-		[P_NEG_RS_DREPLY]	= "NegRSDReply",
-		[P_BARRIER_ACK]	        = "BarrierAck",
-		[P_STATE_CHG_REQ]       = "StateChgRequest",
-		[P_STATE_CHG_REPLY]     = "StateChgReply",
-		[P_OV_REQUEST]          = "OVRequest",
-		[P_OV_REPLY]            = "OVReply",
-		[P_OV_RESULT]           = "OVResult",
-		[P_MAX_CMD]	        = NULL,
-	};
-
-	if (cmd == P_HAND_SHAKE_M)
-		return "HandShakeM";
-	if (cmd == P_HAND_SHAKE_S)
-		return "HandShakeS";
-	if (cmd == P_HAND_SHAKE)
-		return "HandShake";
-	if (cmd >= P_MAX_CMD)
-		return "Unknown";
-	return cmdnames[cmd];
-}
+extern const char *cmdname(enum drbd_packet cmd);
 
 /* for sending/receiving the bitmap,
  * possibly in some encoding scheme */
@@ -472,37 +388,41 @@
  * NOTE that the payload starts at a long aligned offset,
  * regardless of 32 or 64 bit arch!
  */
-struct p_header {
+struct p_header80 {
 	u32	  magic;
 	u16	  command;
 	u16	  length;	/* bytes of data after this header */
-	u8	  payload[0];
 } __packed;
-/* 8 bytes. packet FIXED for the next century! */
 
-/*
- * short commands, packets without payload, plain p_header:
- *   P_PING
- *   P_PING_ACK
- *   P_BECOME_SYNC_TARGET
- *   P_BECOME_SYNC_SOURCE
- *   P_UNPLUG_REMOTE
- */
+/* Header for big packets, Used for data packets exceeding 64kB */
+struct p_header95 {
+	u16	  magic;	/* use DRBD_MAGIC_BIG here */
+	u16	  command;
+	u32	  length;
+} __packed;
 
-/*
- * commands with out-of-struct payload:
- *   P_BITMAP    (no additional fields)
- *   P_DATA, P_DATA_REPLY (see p_data)
- *   P_COMPRESSED_BITMAP (see receive_compressed_bitmap)
- */
+struct p_header100 {
+	u32	  magic;
+	u16	  volume;
+	u16	  command;
+	u32	  length;
+	u32	  pad;
+} __packed;
+
+extern unsigned int drbd_header_size(struct drbd_tconn *tconn);
 
 /* these defines must not be changed without changing the protocol version */
-#define DP_HARDBARRIER	      1
-#define DP_RW_SYNC	      2
+#define DP_HARDBARRIER	      1 /* no longer used */
+#define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
+#define DP_UNPLUG             8 /* not used anymore */
+#define DP_FUA               16 /* equals REQ_FUA     */
+#define DP_FLUSH             32 /* equals REQ_FLUSH   */
+#define DP_DISCARD           64 /* equals REQ_DISCARD */
+#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
+#define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 
 struct p_data {
-	struct p_header head;
 	u64	    sector;    /* 64 bits sector number */
 	u64	    block_id;  /* to identify the request in protocol B&C */
 	u32	    seq_num;
@@ -513,21 +433,18 @@
  * commands which share a struct:
  *  p_block_ack:
  *   P_RECV_ACK (proto B), P_WRITE_ACK (proto C),
- *   P_DISCARD_ACK (proto C, two-primaries conflict detection)
+ *   P_DISCARD_WRITE (proto C, two-primaries conflict detection)
  *  p_block_req:
  *   P_DATA_REQUEST, P_RS_DATA_REQUEST
  */
 struct p_block_ack {
-	struct p_header head;
 	u64	    sector;
 	u64	    block_id;
 	u32	    blksize;
 	u32	    seq_num;
 } __packed;
 
-
 struct p_block_req {
-	struct p_header head;
 	u64 sector;
 	u64 block_id;
 	u32 blksize;
@@ -536,63 +453,71 @@
 
 /*
  * commands with their own struct for additional fields:
- *   P_HAND_SHAKE
+ *   P_CONNECTION_FEATURES
  *   P_BARRIER
  *   P_BARRIER_ACK
  *   P_SYNC_PARAM
  *   ReportParams
  */
 
-struct p_handshake {
-	struct p_header head;	/* 8 bytes */
+struct p_connection_features {
 	u32 protocol_min;
 	u32 feature_flags;
 	u32 protocol_max;
 
 	/* should be more than enough for future enhancements
-	 * for now, feature_flags and the reserverd array shall be zero.
+	 * for now, feature_flags and the reserved array shall be zero.
 	 */
 
 	u32 _pad;
-	u64 reserverd[7];
+	u64 reserved[7];
 } __packed;
-/* 80 bytes, FIXED for the next century */
 
 struct p_barrier {
-	struct p_header head;
 	u32 barrier;	/* barrier number _handle_ only */
 	u32 pad;	/* to multiple of 8 Byte */
 } __packed;
 
 struct p_barrier_ack {
-	struct p_header head;
 	u32 barrier;
 	u32 set_size;
 } __packed;
 
 struct p_rs_param {
-	struct p_header head;
-	u32 rate;
+	u32 resync_rate;
 
 	      /* Since protocol version 88 and higher. */
 	char verify_alg[0];
 } __packed;
 
 struct p_rs_param_89 {
-	struct p_header head;
-	u32 rate;
+	u32 resync_rate;
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 
+struct p_rs_param_95 {
+	u32 resync_rate;
+	char verify_alg[SHARED_SECRET_MAX];
+	char csums_alg[SHARED_SECRET_MAX];
+	u32 c_plan_ahead;
+	u32 c_delay_target;
+	u32 c_fill_target;
+	u32 c_max_rate;
+} __packed;
+
+enum drbd_conn_flags {
+	CF_DISCARD_MY_DATA = 1,
+	CF_DRY_RUN = 2,
+};
+
 struct p_protocol {
-	struct p_header head;
 	u32 protocol;
 	u32 after_sb_0p;
 	u32 after_sb_1p;
 	u32 after_sb_2p;
-	u32 want_lose;
+	u32 conn_flags;
 	u32 two_primaries;
 
               /* Since protocol version 87 and higher. */
@@ -601,37 +526,32 @@
 } __packed;
 
 struct p_uuids {
-	struct p_header head;
 	u64 uuid[UI_EXTENDED_SIZE];
 } __packed;
 
 struct p_rs_uuid {
-	struct p_header head;
 	u64	    uuid;
 } __packed;
 
 struct p_sizes {
-	struct p_header head;
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
 	u64	    c_size;  /* current exported size */
-	u32	    max_segment_size;  /* Maximal size of a BIO */
-	u32	    queue_order_type;
+	u32	    max_bio_size;  /* Maximal size of a BIO */
+	u16	    queue_order_type;  /* not yet implemented in DRBD*/
+	u16	    dds_flags; /* use enum dds_flags here. */
 } __packed;
 
 struct p_state {
-	struct p_header head;
 	u32	    state;
 } __packed;
 
 struct p_req_state {
-	struct p_header head;
 	u32	    mask;
 	u32	    val;
 } __packed;
 
 struct p_req_state_reply {
-	struct p_header head;
 	u32	    retcode;
 } __packed;
 
@@ -646,12 +566,17 @@
 } __packed;
 
 struct p_discard {
-	struct p_header head;
 	u64	    block_id;
 	u32	    seq_num;
 	u32	    pad;
 } __packed;
 
+struct p_block_desc {
+	u64 sector;
+	u32 blksize;
+	u32 pad;	/* to multiple of 8 Byte */
+} __packed;
+
 /* Valid values for the encoding field.
  * Bump proto version when changing this. */
 enum drbd_bitmap_code {
@@ -662,7 +587,6 @@
 };
 
 struct p_compressed_bm {
-	struct p_header head;
 	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 	 * (encoding & 0x80): polarity (set/unset) of first runlength
 	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -673,81 +597,23 @@
 	u8 code[0];
 } __packed;
 
-/* DCBP: Drbd Compressed Bitmap Packet ... */
-static inline enum drbd_bitmap_code
-DCBP_get_code(struct p_compressed_bm *p)
-{
-	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
-}
-
-static inline void
-DCBP_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
-{
-	BUG_ON(code & ~0xf);
-	p->encoding = (p->encoding & ~0xf) | code;
-}
-
-static inline int
-DCBP_get_start(struct p_compressed_bm *p)
-{
-	return (p->encoding & 0x80) != 0;
-}
-
-static inline void
-DCBP_set_start(struct p_compressed_bm *p, int set)
-{
-	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
-}
-
-static inline int
-DCBP_get_pad_bits(struct p_compressed_bm *p)
-{
-	return (p->encoding >> 4) & 0x7;
-}
-
-static inline void
-DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
-{
-	BUG_ON(n & ~0x7);
-	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
-}
-
-/* one bitmap packet, including the p_header,
- * should fit within one _architecture independend_ page.
- * so we need to use the fixed size 4KiB page size
- * most architechtures have used for a long time.
- */
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
-#define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
-#define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
-#if (PAGE_SIZE < 4096)
-/* drbd_send_bitmap / receive_bitmap would break horribly */
-#error "PAGE_SIZE too small"
-#endif
-
-union p_polymorph {
-        struct p_header          header;
-        struct p_handshake       handshake;
-        struct p_data            data;
-        struct p_block_ack       block_ack;
-        struct p_barrier         barrier;
-        struct p_barrier_ack     barrier_ack;
-        struct p_rs_param_89     rs_param_89;
-        struct p_protocol        protocol;
-        struct p_sizes           sizes;
-        struct p_uuids           uuids;
-        struct p_state           state;
-        struct p_req_state       req_state;
-        struct p_req_state_reply req_state_reply;
-        struct p_block_req       block_req;
+struct p_delay_probe93 {
+	u32     seq_num; /* sequence number to match the two probe packets */
+	u32     offset;  /* usecs the probe got sent after the reference time point */
 } __packed;
 
+/*
+ * Bitmap packets need to fit within a single page on the sender and receiver,
+ * so we are limited to 4 KiB (and not to PAGE_SIZE, which can be bigger).
+ */
+#define DRBD_SOCKET_BUFFER_SIZE 4096
+
 /**********************************************************************/
 enum drbd_thread_state {
-	None,
-	Running,
-	Exiting,
-	Restarting
+	NONE,
+	RUNNING,
+	EXITING,
+	RESTARTING
 };
 
 struct drbd_thread {
@@ -756,8 +622,9 @@
 	struct completion startstop;
 	enum drbd_thread_state t_state;
 	int (*function) (struct drbd_thread *);
-	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
 	int reset_cpu_mask;
+	char name[9];
 };
 
 static inline enum drbd_thread_state get_t_state(struct drbd_thread *thi)
@@ -770,34 +637,29 @@
 	return thi->t_state;
 }
 
-
-/*
- * Having this as the first member of a struct provides sort of "inheritance".
- * "derived" structs can be "drbd_queue_work()"ed.
- * The callback should know and cast back to the descendant struct.
- * drbd_request and drbd_epoch_entry are descendants of drbd_work.
- */
-struct drbd_work;
-typedef int (*drbd_work_cb)(struct drbd_conf *, struct drbd_work *, int cancel);
 struct drbd_work {
 	struct list_head list;
-	drbd_work_cb cb;
+	int (*cb)(struct drbd_work *, int cancel);
+	union {
+		struct drbd_conf *mdev;
+		struct drbd_tconn *tconn;
+	};
 };
 
-struct drbd_tl_epoch;
+#include "drbd_interval.h"
+
+extern int drbd_wait_misc(struct drbd_conf *, struct drbd_interval *);
+
 struct drbd_request {
 	struct drbd_work w;
-	struct drbd_conf *mdev;
 
 	/* if local IO is not allowed, will be NULL.
 	 * if local IO _is_ allowed, holds the locally submitted bio clone,
 	 * or, after local IO completion, the ERR_PTR(error).
-	 * see drbd_endio_pri(). */
+	 * see drbd_request_endio(). */
 	struct bio *private_bio;
 
-	struct hlist_node colision;
-	sector_t sector;
-	unsigned int size;
+	struct drbd_interval i;
 	unsigned int epoch; /* barrier_nr */
 
 	/* barrier_nr: used to check on "completion" whether this req was in
@@ -805,9 +667,6 @@
 	 * starting a new epoch...
 	 */
 
-	/* up to here, the struct layout is identical to drbd_epoch_entry;
-	 * we might be able to use that to our advantage...  */
-
 	struct list_head tl_requests; /* ring list in the transfer log */
 	struct bio *master_bio;       /* master bio pointer */
 	unsigned long rq_state; /* see comments above _req_mod() */
@@ -820,19 +679,11 @@
 	struct list_head requests; /* requests before */
 	struct drbd_tl_epoch *next; /* pointer to the next barrier */
 	unsigned int br_number;  /* the barriers identifier. */
-	int n_req;	/* number of requests attached before this barrier */
+	int n_writes;	/* number of requests attached before this barrier */
 };
 
-struct drbd_request;
-
-/* These Tl_epoch_entries may be in one of 6 lists:
-   active_ee .. data packet being written
-   sync_ee   .. syncer block being written
-   done_ee   .. block written, need to send P_WRITE_ACK
-   read_ee   .. [RS]P_DATA_REQUEST being read
-*/
-
 struct drbd_epoch {
+	struct drbd_conf *mdev;
 	struct list_head list;
 	unsigned int barrier_nr;
 	atomic_t epoch_size; /* increased on every request added. */
@@ -854,30 +705,9 @@
 	EV_GOT_BARRIER_NR,
 	EV_BARRIER_DONE,
 	EV_BECAME_LAST,
-	EV_TRACE_FLUSH,       /* TRACE_ are not real events, only used for tracing */
-	EV_TRACE_ADD_BARRIER, /* Doing the first write as a barrier write */
-	EV_TRACE_SETTING_BI,  /* Barrier is expressed with the first write of the next epoch */
-	EV_TRACE_ALLOC,
-	EV_TRACE_FREE,
 	EV_CLEANUP = 32, /* used as flag */
 };
 
-struct drbd_epoch_entry {
-	struct drbd_work    w;
-	struct drbd_conf *mdev;
-	struct bio *private_bio;
-	struct hlist_node colision;
-	sector_t sector;
-	unsigned int size;
-	struct drbd_epoch *epoch;
-
-	/* up to here, the struct layout is identical to drbd_request;
-	 * we might be able to use that to our advantage...  */
-
-	unsigned int flags;
-	u64    block_id;
-};
-
 struct drbd_wq_barrier {
 	struct drbd_work w;
 	struct completion done;
@@ -888,37 +718,77 @@
 	void *digest;
 };
 
-/* ee flag bits */
+struct drbd_peer_request {
+	struct drbd_work w;
+	struct drbd_epoch *epoch; /* for writes */
+	struct page *pages;
+	atomic_t pending_bios;
+	struct drbd_interval i;
+	/* see comments on ee flag bits below */
+	unsigned long flags;
+	union {
+		u64 block_id;
+		struct digest_info *digest;
+	};
+};
+
+/* ee flag bits.
+ * While corresponding bios are in flight, the only modification will be
+ * set_bit WAS_ERROR, which has to be atomic.
+ * If no bios are in flight yet, or all have been completed,
+ * non-atomic modification to ee->flags is ok.
+ */
 enum {
 	__EE_CALL_AL_COMPLETE_IO,
-	__EE_CONFLICT_PENDING,
 	__EE_MAY_SET_IN_SYNC,
+
+	/* This peer request closes an epoch using a barrier.
+	 * On successful completion, the epoch is released,
+	 * and the P_BARRIER_ACK send. */
 	__EE_IS_BARRIER,
+
+	/* In case a barrier failed,
+	 * we need to resubmit without the barrier flag. */
+	__EE_RESUBMITTED,
+
+	/* we may have several bios per peer request.
+	 * if any of those fail, we set this flag atomically
+	 * from the endio callback */
+	__EE_WAS_ERROR,
+
+	/* This ee has a pointer to a digest instead of a block id */
+	__EE_HAS_DIGEST,
+
+	/* Conflicting local requests need to be restarted after this request */
+	__EE_RESTART_REQUESTS,
+
+	/* The peer wants a write ACK for this (wire proto C) */
+	__EE_SEND_WRITE_ACK,
+
+	/* Is set when net_conf had two_primaries set while creating this peer_req */
+	__EE_IN_INTERVAL_TREE,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
-#define EE_CONFLICT_PENDING    (1<<__EE_CONFLICT_PENDING)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define EE_IS_BARRIER          (1<<__EE_IS_BARRIER)
+#define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
+#define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
+#define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
+#define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
+#define EE_SEND_WRITE_ACK	(1<<__EE_SEND_WRITE_ACK)
+#define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
 
-/* global flag bits */
+/* flag bits per mdev */
 enum {
-	CREATE_BARRIER,		/* next P_DATA is preceeded by a P_BARRIER */
-	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
-	SEND_PING,		/* whether asender should send a ping asap */
-
-	STOP_SYNC_TIMER,	/* tell timer to cancel itself */
 	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 	MD_DIRTY,		/* current uuids and flags not yet on disk */
-	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
 	USE_DEGR_WFC_T,		/* degr-wfc-timeout instead of wfc-timeout. */
-	CLUSTER_ST_CHANGE,	/* Cluster wide state change going on... */
 	CL_ST_CHG_SUCCESS,
 	CL_ST_CHG_FAIL,
 	CRASHED_PRIMARY,	/* This node was a crashed primary.
 				 * Gets cleared when the state.conn
 				 * goes into C_CONNECTED state. */
-	WRITE_BM_AFTER_RESYNC,	/* A kmalloc() during resync failed */
 	NO_BARRIER_SUPP,	/* underlying block device doesn't implement barriers */
 	CONSIDER_RESYNC,
 
@@ -928,30 +798,52 @@
 	BITMAP_IO,		/* suspend application io;
 				   once no more io in flight, start bitmap io */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
+	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
+	WAS_IO_ERROR,		/* Local disk failed returned IO error */
 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
-	NET_CONGESTED,		/* The data socket is congested */
-
-	CONFIG_PENDING,		/* serialization of (re)configuration requests.
-				 * if set, also prevents the device from dying */
-	DEVICE_DYING,		/* device became unconfigured,
-				 * but worker thread is still handling the cleanup.
-				 * reconfiguring (nl_disk_conf, nl_net_conf) is dissalowed,
-				 * while this is set. */
 	RESIZE_PENDING,		/* Size change detected locally, waiting for the response from
 				 * the peer, if it changed there as well. */
+	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
+	AL_SUSPENDED,		/* Activity logging is currently suspended. */
+	AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
+	B_RS_H_DONE,		/* Before resync handler done (already executed) */
+	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
+	READ_BALANCE_RR,
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
 
+/* definition of bits in bm_flags to be used in drbd_bm_lock
+ * and drbd_bitmap_io and friends. */
+enum bm_flag {
+	/* do we need to kfree, or vfree bm_pages? */
+	BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
+
+	/* currently locked for bulk operation */
+	BM_LOCKED_MASK = 0x7,
+
+	/* in detail, that is: */
+	BM_DONT_CLEAR = 0x1,
+	BM_DONT_SET   = 0x2,
+	BM_DONT_TEST  = 0x4,
+
+	/* (test bit, count bit) allowed (common case) */
+	BM_LOCKED_TEST_ALLOWED = 0x3,
+
+	/* testing bits, as well as setting new bits allowed, but clearing bits
+	 * would be unexpected.  Used during bitmap receive.  Setting new bits
+	 * requires sending of "out-of-sync" information, though. */
+	BM_LOCKED_SET_ALLOWED = 0x1,
+
+	/* clear is not expected while bitmap is locked for bulk operation */
+};
+
+
 /* TODO sort members for performance
  * MAYBE group them further */
 
 /* THINK maybe we actually want to use the default "event/%s" worker threads
  * or similar in linux 2.6, which uses per cpu data and threads.
- *
- * To be general, this might need a spin_lock member.
- * For now, please use the mdev->req_lock to protect list_head,
- * see drbd_queue_work below.
  */
 struct drbd_work_queue {
 	struct list_head q;
@@ -970,8 +862,8 @@
 	struct socket    *socket;
 	/* this way we get our
 	 * send/receive buffers off the stack */
-	union p_polymorph sbuf;
-	union p_polymorph rbuf;
+	void *sbuf;
+	void *rbuf;
 };
 
 struct drbd_md {
@@ -987,38 +879,28 @@
 	s32 bm_offset;	/* signed relative sector offset to bitmap */
 
 	/* u32 al_nr_extents;	   important for restoring the AL
-	 * is stored into  sync_conf.al_extents, which in turn
+	 * is stored into  ldev->dc.al_extents, which in turn
 	 * gets applied to act_log->nr_elements
 	 */
 };
 
-/* for sync_conf and other types... */
-#define NL_PACKET(name, number, fields) struct name { fields };
-#define NL_INTEGER(pn,pr,member) int member;
-#define NL_INT64(pn,pr,member) __u64 member;
-#define NL_BIT(pn,pr,member)   unsigned member:1;
-#define NL_STRING(pn,pr,member,len) unsigned char member[len]; int member ## _len;
-#include "linux/drbd_nl.h"
-
 struct drbd_backing_dev {
 	struct block_device *backing_bdev;
 	struct block_device *md_bdev;
-	struct file *lo_file;
-	struct file *md_file;
 	struct drbd_md md;
-	struct disk_conf dc; /* The user provided config... */
+	struct disk_conf *disk_conf; /* RCU, for updates: mdev->tconn->conf_update */
 	sector_t known_size; /* last known size of that backing device */
 };
 
 struct drbd_md_io {
-	struct drbd_conf *mdev;
-	struct completion event;
+	unsigned int done;
 	int error;
 };
 
 struct bm_io_work {
 	struct drbd_work w;
 	char *why;
+	enum bm_flag flags;
 	int (*io_fn)(struct drbd_conf *mdev);
 	void (*done)(struct drbd_conf *mdev, int rv);
 };
@@ -1030,16 +912,97 @@
 	WO_bio_barrier
 };
 
+struct fifo_buffer {
+	unsigned int head_index;
+	unsigned int size;
+	int total; /* sum of all values */
+	int values[0];
+};
+extern struct fifo_buffer *fifo_alloc(int fifo_size);
+
+/* flag bits per tconn */
+enum {
+	NET_CONGESTED,		/* The data socket is congested */
+	DISCARD_CONCURRENT,	/* Set on one node, cleared on the peer! */
+	SEND_PING,		/* whether asender should send a ping asap */
+	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
+	GOT_PING_ACK,		/* set when we receive a ping_ack packet, ping_wait gets woken */
+	CONN_WD_ST_CHG_OKAY,
+	CONN_WD_ST_CHG_FAIL,
+	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
+	CREATE_BARRIER,		/* next P_DATA is preceded by a P_BARRIER */
+};
+
+struct drbd_tconn {			/* is a resource from the config file */
+	char *name;			/* Resource name */
+	struct list_head all_tconn;	/* linked on global drbd_tconns */
+	struct kref kref;
+	struct idr volumes;		/* <tconn, vnr> to mdev mapping */
+	enum drbd_conns cstate;		/* Only C_STANDALONE to C_WF_REPORT_PARAMS */
+	unsigned susp:1;		/* IO suspended by user */
+	unsigned susp_nod:1;		/* IO suspended because no data */
+	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
+	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+
+	unsigned long flags;
+	struct net_conf *net_conf;	/* content protected by rcu */
+	struct mutex conf_update;	/* mutex for ready-copy-update of net_conf and disk_conf */
+	wait_queue_head_t ping_wait;	/* Woken upon reception of a ping, and a state change */
+	struct res_opts res_opts;
+
+	struct sockaddr_storage my_addr;
+	int my_addr_len;
+	struct sockaddr_storage peer_addr;
+	int peer_addr_len;
+
+	struct drbd_socket data;	/* data/barrier/cstate/parameter packets */
+	struct drbd_socket meta;	/* ping/ack (metadata) packets */
+	int agreed_pro_version;		/* actually used protocol version */
+	unsigned long last_received;	/* in jiffies, either socket */
+	unsigned int ko_count;
+
+	spinlock_t req_lock;
+	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
+	struct drbd_tl_epoch *newest_tle;
+	struct drbd_tl_epoch *oldest_tle;
+	struct list_head out_of_sequence_requests;
+	struct list_head barrier_acked_requests;
+
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_tfm;  /* checksums we compute, updates protected by tconn->data->mutex */
+	struct crypto_hash *peer_integrity_tfm;  /* checksums we verify, only accessed from receiver thread  */
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *verify_tfm;
+	void *int_dig_in;
+	void *int_dig_vv;
+
+	struct drbd_epoch *current_epoch;
+	spinlock_t epoch_lock;
+	unsigned int epochs;
+	enum write_ordering_e write_ordering;
+
+	struct drbd_thread receiver;
+	struct drbd_thread worker;
+	struct drbd_thread asender;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) && !defined(cpumask_bits)
+	cpumask_t cpu_mask[1];
+#else
+	cpumask_var_t cpu_mask;
+#endif
+};
+
 struct drbd_conf {
 #ifdef PARANOIA
 	long magic;
 #endif
+	struct drbd_tconn *tconn;
+	int vnr;			/* volume number within the connection */
+	struct kref kref;
+
 	/* things that are stored as / read from meta data on disk */
 	unsigned long flags;
 
 	/* configured by drbdsetup */
-	struct net_conf *net_conf; /* protected by get_net_conf() and put_net_conf() */
-	struct syncer_conf sync_conf;
 	struct drbd_backing_dev *ldev __protected_by(local);
 
 	sector_t p_size;     /* partner's disk size */
@@ -1047,21 +1010,26 @@
 	struct block_device *this_bdev;
 	struct gendisk	    *vdisk;
 
-	struct drbd_socket data; /* data/barrier/cstate/parameter packets */
-	struct drbd_socket meta; /* ping/ack (metadata) packets */
-	int agreed_pro_version;  /* actually used protocol version */
-	unsigned long last_received; /* in jiffies, either socket */
-	unsigned int ko_count;
 	struct drbd_work  resync_work,
 			  unplug_work,
-			  md_sync_work;
+			  go_diskless,
+			  md_sync_work,
+			  start_resync_work;
 	struct timer_list resync_timer;
 	struct timer_list md_sync_timer;
+	struct timer_list start_resync_timer;
+	struct timer_list request_timer;
+#ifdef DRBD_DEBUG_MD_SYNC
+	struct {
+		unsigned int line;
+		const char* func;
+	} last_md_mark_dirty;
+#endif
 
 	/* Used after attach while negotiating new disk state. */
 	union drbd_state new_state_tmp;
 
-	union drbd_state state;
+	union drbd_dev_state state;
 	wait_queue_head_t misc_wait;
 	wait_queue_head_t state_wait;  /* upon each state change. */
 	unsigned int send_cnt;
@@ -1073,31 +1041,31 @@
 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
-	atomic_t unacked_cnt;	 /* Need to send replys for */
+	atomic_t unacked_cnt;	 /* Need to send replies for */
 	atomic_t local_cnt;	 /* Waiting for local completion */
-	atomic_t net_cnt;	 /* Users of net_conf */
-	spinlock_t req_lock;
-	struct drbd_tl_epoch *unused_spare_tle; /* for pre-allocation */
-	struct drbd_tl_epoch *newest_tle;
-	struct drbd_tl_epoch *oldest_tle;
-	struct list_head out_of_sequence_requests;
-	struct hlist_head *tl_hash;
-	unsigned int tl_hash_s;
 
-	/* blocks to sync in this run [unit BM_BLOCK_SIZE] */
+	/* Interval tree of pending local write requests */
+	struct rb_root read_requests;
+	struct rb_root write_requests;
+
+	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
-	/* number of sync IOs that failed in this run */
+	/* number of resync blocks that failed in this run */
 	unsigned long rs_failed;
 	/* Syncer's start time [unit jiffies] */
 	unsigned long rs_start;
 	/* cumulated time in PausedSyncX state [unit jiffies] */
 	unsigned long rs_paused;
+	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
+	unsigned long rs_same_csum;
+#define DRBD_SYNC_MARKS 8
+#define DRBD_SYNC_MARK_STEP (3*HZ)
 	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
-	unsigned long rs_mark_left;
+	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
 	/* marks's time [unit jiffies] */
-	unsigned long rs_mark_time;
-	/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
-	unsigned long rs_same_csum;
+	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
+	/* current index into rs_mark_{left,time} */
+	int rs_last_mark;
 
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
@@ -1108,12 +1076,7 @@
 	/* size of out-of-sync range in sectors. */
 	sector_t ov_last_oos_size;
 	unsigned long ov_left; /* in bits */
-	struct crypto_hash *csums_tfm;
-	struct crypto_hash *verify_tfm;
 
-	struct drbd_thread receiver;
-	struct drbd_thread worker;
-	struct drbd_thread asender;
 	struct drbd_bitmap *bitmap;
 	unsigned long bm_resync_fo; /* bit offset for drbd_bm_find_next */
 
@@ -1128,65 +1091,54 @@
 	u64 *p_uuid;
 	/* FIXME clean comments, restructure so it is more obvious which
 	 * members are protected by what */
-	struct drbd_epoch *current_epoch;
-	spinlock_t epoch_lock;
-	unsigned int epochs;
-	enum write_ordering_e write_ordering;
-	struct list_head active_ee; /* IO in progress */
-	struct list_head sync_ee;   /* IO in progress */
-	struct list_head done_ee;   /* send ack */
-	struct list_head read_ee;   /* IO in progress */
-	struct list_head net_ee;    /* zero-copy network send in progress */
-	struct hlist_head *ee_hash; /* is proteced by req_lock! */
-	unsigned int ee_hash_s;
 
-	/* this one is protected by ee_lock, single thread */
-	struct drbd_epoch_entry *last_write_w_barrier;
+	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
+	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
+	struct list_head done_ee;   /* need to send P_WRITE_ACK */
+	struct list_head read_ee;   /* [RS]P_DATA_REQUEST being read */
+	struct list_head net_ee;    /* zero-copy network send in progress */
 
 	int next_barrier_nr;
-	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
 	struct list_head resync_reads;
-	atomic_t pp_in_use;
+	atomic_t pp_in_use;		/* allocated from page pool */
+	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
-	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
-	struct mutex md_io_mutex;	/* protects the md_io_buffer */
+	struct drbd_md_io md_io;
+	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
 	struct lru_cache *act_log;	/* activity log */
 	unsigned int al_tr_number;
 	int al_tr_cycle;
 	int al_tr_pos;   /* position of the next transaction in the journal */
-	struct crypto_hash *cram_hmac_tfm;
-	struct crypto_hash *integrity_w_tfm; /* to be used by the worker thread */
-	struct crypto_hash *integrity_r_tfm; /* to be used by the receiver thread */
-	void *int_dig_out;
-	void *int_dig_in;
-	void *int_dig_vv;
 	wait_queue_head_t seq_wait;
 	atomic_t packet_seq;
 	unsigned int peer_seq;
 	spinlock_t peer_seq_lock;
 	unsigned int minor;
 	unsigned long comm_bm_set; /* communicated number of set bits. */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) && !defined(cpumask_bits)
-	cpumask_t cpu_mask[1];
-#else
-	cpumask_var_t cpu_mask;
-#endif
 	struct bm_io_work bm_io_work;
 	u64 ed_uuid; /* UUID of the exposed data */
-	struct mutex state_mutex;
+	struct mutex own_state_mutex;
+	struct mutex *state_mutex; /* either own_state_mutex or mdev->tconn->cstate_mutex */
 	char congestion_reason;  /* Why we where congested... */
+	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
+	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
+	int rs_last_sect_ev; /* counter to compare with */
+	int rs_last_events;  /* counter of read or write "events" (unit sectors)
+			      * on the lower level device when we last looked. */
+	int c_sync_rate; /* current resync rate after syncer throttle magic */
+	struct fifo_buffer *rs_plan_s; /* correction values of resync planer (RCU, tconn->conn_update) */
+	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+	atomic_t ap_in_flight; /* App sectors in flight (waiting for ack) */
+	int peer_max_bio_size;
+	int local_max_bio_size;
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
 {
-	struct drbd_conf *mdev;
-
-	mdev = minor < minor_count ? minor_table[minor] : NULL;
-
-	return mdev;
+	return (struct drbd_conf *)idr_find(&minors, minor);
 }
 
 static inline unsigned int mdev_to_minor(struct drbd_conf *mdev)
@@ -1194,29 +1146,9 @@
 	return mdev->minor;
 }
 
-/* returns 1 if it was successfull,
- * returns 0 if there was no data socket.
- * so wherever you are going to use the data.socket, e.g. do
- * if (!drbd_get_data_sock(mdev))
- *	return 0;
- *	CODE();
- * drbd_put_data_sock(mdev);
- */
-static inline int drbd_get_data_sock(struct drbd_conf *mdev)
-{
-	mutex_lock(&mdev->data.mutex);
-	/* drbd_disconnect() could have called drbd_free_sock()
-	 * while we were waiting in down()... */
-	if (unlikely(mdev->data.socket == NULL)) {
-		mutex_unlock(&mdev->data.mutex);
-		return 0;
-	}
-	return 1;
-}
-
-static inline void drbd_put_data_sock(struct drbd_conf *mdev)
+static inline struct drbd_conf *vnr_to_mdev(struct drbd_tconn *tconn, int vnr)
 {
-	mutex_unlock(&mdev->data.mutex);
+	return (struct drbd_conf *)idr_find(&tconn->volumes, vnr);
 }
 
 /*
@@ -1225,93 +1157,76 @@
 
 /* drbd_main.c */
 
-enum chg_state_flags {
-	CS_HARD	= 1,
-	CS_VERBOSE = 2,
-	CS_WAIT_COMPLETE = 4,
-	CS_SERIALIZE    = 8,
-	CS_ORDERED      = CS_WAIT_COMPLETE + CS_SERIALIZE,
+enum dds_flags {
+	DDSF_FORCED    = 1,
+	DDSF_NO_RESYNC = 2, /* Do not run a resync for the new space */
 };
 
 extern void drbd_init_set_defaults(struct drbd_conf *mdev);
-extern int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-			union drbd_state mask, union drbd_state val);
-extern void drbd_force_state(struct drbd_conf *, union drbd_state,
-			union drbd_state);
-extern int _drbd_request_state(struct drbd_conf *, union drbd_state,
-			union drbd_state, enum chg_state_flags);
-extern int __drbd_set_state(struct drbd_conf *, union drbd_state,
-			    enum chg_state_flags, struct completion *done);
-extern void print_st_err(struct drbd_conf *, union drbd_state,
-			union drbd_state, int);
 extern int  drbd_thread_start(struct drbd_thread *thi);
 extern void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait);
+extern char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task);
 #ifdef CONFIG_SMP
-extern void drbd_thread_current_set_cpu(struct drbd_conf *mdev);
-extern void drbd_calc_cpu_mask(struct drbd_conf *mdev);
+extern void drbd_thread_current_set_cpu(struct drbd_thread *thi);
+extern void drbd_calc_cpu_mask(struct drbd_tconn *tconn);
 #else
 #define drbd_thread_current_set_cpu(A) ({})
 #define drbd_calc_cpu_mask(A) ({})
 #endif
-extern void drbd_free_resources(struct drbd_conf *mdev);
-extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
+extern void tl_release(struct drbd_tconn *, unsigned int barrier_nr,
 		       unsigned int set_size);
-extern void tl_clear(struct drbd_conf *mdev);
-extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
-extern void drbd_free_sock(struct drbd_conf *mdev);
-extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
-			void *buf, size_t size, unsigned msg_flags);
-extern int drbd_send_protocol(struct drbd_conf *mdev);
+extern void tl_clear(struct drbd_tconn *);
+extern void _tl_add_barrier(struct drbd_tconn *, struct drbd_tl_epoch *);
+extern void drbd_free_sock(struct drbd_tconn *tconn);
+extern int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
+		     void *buf, size_t size, unsigned msg_flags);
+extern int drbd_send_all(struct drbd_tconn *, struct socket *, void *, size_t,
+			 unsigned);
+
+extern int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd);
+extern int drbd_send_protocol(struct drbd_tconn *tconn);
 extern int drbd_send_uuids(struct drbd_conf *mdev);
 extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
-extern int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val);
-extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply);
-extern int _drbd_send_state(struct drbd_conf *mdev);
-extern int drbd_send_state(struct drbd_conf *mdev);
-extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			enum drbd_packets cmd, struct p_header *h,
-			size_t size, unsigned msg_flags);
-#define USE_DATA_SOCKET 1
-#define USE_META_SOCKET 0
-extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-			enum drbd_packets cmd, struct p_header *h,
-			size_t size);
-extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
-			char *data, size_t size);
-extern int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc);
-extern int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
-			u32 set_size);
-extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct drbd_epoch_entry *e);
-extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct p_block_req *rp);
-extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct p_data *dp);
-extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+extern void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
+extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
+#define drbd_send_state(m, s) drbd_send_state_(m, s, __func__ , __LINE__ )
+#define drbd_send_current_state(m) drbd_send_current_state_(m, __func__ , __LINE__ )
+extern int drbd_send_state_(struct drbd_conf *mdev,
+               union drbd_state s,
+               const char *func, unsigned int line);
+extern int drbd_send_current_state_(struct drbd_conf *mdev,
+               const char *func, unsigned int line);
+extern int drbd_send_sync_param(struct drbd_conf *mdev);
+extern void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr,
+			    u32 set_size);
+extern int drbd_send_ack(struct drbd_conf *, enum drbd_packet,
+			 struct drbd_peer_request *);
+extern void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
+			     struct p_block_req *rp);
+extern void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
+			     struct p_data *dp, int data_size);
+extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
 			    sector_t sector, int blksize, u64 block_id);
-extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
-			   struct drbd_epoch_entry *e);
+extern int drbd_send_out_of_sync(struct drbd_conf *, struct drbd_request *);
+extern int drbd_send_block(struct drbd_conf *, enum drbd_packet,
+			   struct drbd_peer_request *);
 extern int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req);
-extern int _drbd_send_barrier(struct drbd_conf *mdev,
-			struct drbd_tl_epoch *barrier);
 extern int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 			      sector_t sector, int size, u64 block_id);
-extern int drbd_send_drequest_csum(struct drbd_conf *mdev,
-				   sector_t sector,int size,
-				   void *digest, int digest_size,
-				   enum drbd_packets cmd);
+extern int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector,
+				   int size, void *digest, int digest_size,
+				   enum drbd_packet cmd);
 extern int drbd_send_ov_request(struct drbd_conf *mdev,sector_t sector,int size);
 
 extern int drbd_send_bitmap(struct drbd_conf *mdev);
-extern int _drbd_send_bitmap(struct drbd_conf *mdev);
-extern int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode);
+extern void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode);
+extern void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode);
 extern void drbd_free_bc(struct drbd_backing_dev *ldev);
 extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
-/* drbd_meta-data.c (still in drbd_main.c) */
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
-/* maybe define them below as inline? */
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
 extern void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local);
@@ -1320,37 +1235,63 @@
 extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
 extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
 extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+#ifndef DRBD_DEBUG_MD_SYNC
 extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+#else
+#define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
+extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
+		unsigned int line, const char *func);
+#endif
 extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 				 int (*io_fn)(struct drbd_conf *),
 				 void (*done)(struct drbd_conf *, int),
-				 char *why);
+				 char *why, enum bm_flag flags);
+extern int drbd_bitmap_io(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags);
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
-extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
-
+extern void drbd_go_diskless(struct drbd_conf *mdev);
+extern void drbd_ldev_destroy(struct drbd_conf *mdev);
 
 /* Meta data layout
    We reserve a 128MB Block (4k aligned)
    * either at the end of the backing device
-   * or on a seperate meta data device. */
+   * or on a separate meta data device. */
 
-#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
 /* The following numbers are sectors */
-#define MD_AL_OFFSET 8	    /* 8 Sectors after start of meta area */
-#define MD_AL_MAX_SIZE 64   /* = 32 kb LOG  ~ 3776 extents ~ 14 GB Storage */
-/* Allows up to about 3.8TB */
-#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE)
-
-/* Since the smalles IO unit is usually 512 byte */
-#define MD_SECTOR_SHIFT	 9
-#define MD_SECTOR_SIZE	 (1<<MD_SECTOR_SHIFT)
-
-/* activity log */
-#define AL_EXTENTS_PT ((MD_SECTOR_SIZE-12)/8-1) /* 61 ; Extents per 512B sector */
-#define AL_EXTENT_SHIFT 22		 /* One extent represents 4M Storage */
+/* Allows up to about 3.8TB, so if you want more,
+ * you need to use the "flexible" meta data format. */
+#define MD_RESERVED_SECT (128LU << 11)  /* 128 MB, unit sectors */
+#define MD_AL_OFFSET	8    /* 8 Sectors after start of meta area */
+#define MD_AL_SECTORS	64   /* = 32 kB on disk activity log ring buffer */
+#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_SECTORS)
+
+/* we do all meta data IO in 4k blocks */
+#define MD_BLOCK_SHIFT	12
+#define MD_BLOCK_SIZE	(1<<MD_BLOCK_SHIFT)
+
+/* One activity log extent represents 4M of storage */
+#define AL_EXTENT_SHIFT 22
 #define AL_EXTENT_SIZE (1<<AL_EXTENT_SHIFT)
 
+/* We could make these currently hardcoded constants configurable
+ * variables at create-md time (or even re-configurable at runtime?).
+ * Which will require some more changes to the DRBD "super block"
+ * and attach code.
+ *
+ * updates per transaction:
+ *   This many changes to the active set can be logged with one transaction.
+ *   This number is arbitrary.
+ * context per transaction:
+ *   This many context extent numbers are logged with each transaction.
+ *   This number is resulting from the transaction block size (4k), the layout
+ *   of the transaction header, and the number of updates per transaction.
+ *   See drbd_actlog.c:struct al_transaction_on_disk
+ * */
+#define AL_UPDATES_PER_TRANSACTION	 64	// arbitrary
+#define AL_CONTEXT_PER_TRANSACTION	919	// (4096 - 36 - 6*64)/4
+
 #if BITS_PER_LONG == 32
 #define LN2_BPL 5
 #define cpu_to_lel(A) cpu_to_le32(A)
@@ -1363,18 +1304,6 @@
 #error "LN2 of BITS_PER_LONG unknown!"
 #endif
 
-/* resync bitmap */
-/* 16MB sized 'bitmap extent' to track syncer usage */
-struct bm_extent {
-	int rs_left; /* number of bits set (out of sync) in this extent. */
-	int rs_failed; /* number of failed resync requests in this extent. */
-	unsigned long flags;
-	struct lc_element lce;
-};
-
-#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
-#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
-
 /* drbd_bitmap.c */
 /*
  * We need to store one bit for a block.
@@ -1383,11 +1312,16 @@
  * Bit 1 ==> local node thinks this block needs to be synced.
  */
 
-#define BM_BLOCK_SHIFT  12			 /* 4k per bit */
+#define SLEEP_TIME (HZ/10)
+
+/* We do bitmap IO in units of 4k blocks.
+ * We also still have a hardcoded 4k per bit relation. */
+#define BM_BLOCK_SHIFT	12			 /* 4k per bit */
 #define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
-/* (9+3) : 512 bytes @ 8 bits; representing 16M storage
- * per sector of on disk bitmap */
-#define BM_EXT_SHIFT	 (BM_BLOCK_SHIFT + MD_SECTOR_SHIFT + 3)  /* = 24 */
+/* mostly arbitrarily set the represented size of one bitmap extent,
+ * aka resync extent, to 16 MiB (which is also 512 Byte worth of bitmap
+ * at 4k per bit resolution) */
+#define BM_EXT_SHIFT	 24	/* 16 MiB per resync extent */
 #define BM_EXT_SIZE	 (1<<BM_EXT_SHIFT)
 
 #if (BM_EXT_SHIFT != 24) || (BM_BLOCK_SHIFT != 12)
@@ -1449,58 +1383,73 @@
  * you should use 64bit OS for that much storage, anyways. */
 #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
 #else
-#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32)
+/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
+#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
+/* corresponds to (1UL << 38) bits right now. */
 #endif
 #endif
 
-/* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
- * With a value of 6 all IO in one 32K block make it to the same slot of the
- * hash table. */
-#define HT_SHIFT 6
-#define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
+/* BIO_MAX_SIZE is 256 * PAGE_CACHE_SIZE,
+ * so for typical PAGE_CACHE_SIZE of 4k, that is (1<<20) Byte.
+ * Since we may live in a mixed-platform cluster,
+ * we limit us to a platform agnostic constant here for now.
+ * A followup commit may allow even bigger BIO sizes,
+ * once we thought that through. */
+#define DRBD_MAX_BIO_SIZE (1 << 20)
+#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#endif
+#define DRBD_MAX_BIO_SIZE_SAFE (1 << 12)       /* Works always = 4k */
 
-/* Number of elements in the app_reads_hash */
-#define APP_R_HSIZE 15
+#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* Header 80 only allows packets up to 32KiB data */
+#define DRBD_MAX_BIO_SIZE_P95    (1 << 17) /* Protocol 95 to 99 allows bios up to 128KiB */
 
 extern int  drbd_bm_init(struct drbd_conf *mdev);
-extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors);
+extern int  drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new_bits);
 extern void drbd_bm_cleanup(struct drbd_conf *mdev);
 extern void drbd_bm_set_all(struct drbd_conf *mdev);
 extern void drbd_bm_clear_all(struct drbd_conf *mdev);
+/* set/clear/test only a few bits at a time */
 extern int  drbd_bm_set_bits(
 		struct drbd_conf *mdev, unsigned long s, unsigned long e);
 extern int  drbd_bm_clear_bits(
 		struct drbd_conf *mdev, unsigned long s, unsigned long e);
-/* bm_set_bits variant for use while holding drbd_bm_lock */
+extern int drbd_bm_count_bits(
+	struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
+/* bm_set_bits variant for use while holding drbd_bm_lock,
+ * may process the whole bitmap in one go */
 extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
 		const unsigned long s, const unsigned long e);
 extern int  drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
 extern int  drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
-extern int  drbd_bm_write_sect(struct drbd_conf *mdev, unsigned long enr) __must_hold(local);
+extern int  drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
 extern int  drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
+extern void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr);
 extern int  drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
+extern int  drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local);
 extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
 		unsigned long al_enr);
 extern size_t	     drbd_bm_words(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
 extern sector_t      drbd_bm_capacity(struct drbd_conf *mdev);
+
+#define DRBD_END_OF_BITMAP	(~(unsigned long)0)
 extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 /* bm_find_next variants for use while you hold drbd_bm_lock() */
 extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
 extern int drbd_bm_rs_done(struct drbd_conf *mdev);
 /* for receive_bitmap */
 extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
 		size_t number, unsigned long *buffer);
-/* for _drbd_send_bitmap and drbd_bm_write_sect */
+/* for _drbd_send_bitmap */
 extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
 		size_t number, unsigned long *buffer);
 
-extern void drbd_bm_lock(struct drbd_conf *mdev, char *why);
+extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
 extern void drbd_bm_unlock(struct drbd_conf *mdev);
-
-extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
 /* drbd_main.c */
 
 /* needs to be included here,
@@ -1508,26 +1457,63 @@
 #include "drbd_wrappers.h"
 
 extern struct kmem_cache *drbd_request_cache;
-extern struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+extern struct kmem_cache *drbd_ee_cache;	/* peer requests */
 extern struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 extern struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 extern mempool_t *drbd_request_mempool;
 extern mempool_t *drbd_ee_mempool;
 
-extern struct page *drbd_pp_pool; /* drbd's page pool */
+/* drbd's page pool, used to buffer data received from the peer,
+ * or data requested by the peer.
+ *
+ * This does not have an emergency reserve.
+ *
+ * When allocating from this pool, it first takes pages from the pool.
+ * Only if the pool is depleted will try to allocate from the system.
+ *
+ * The assumption is that pages taken from this pool will be processed,
+ * and given back, "quickly", and then can be recycled, so we can avoid
+ * frequent calls to alloc_page(), and still will be able to make progress even
+ * under memory pressure.
+ */
+extern struct page *drbd_pp_pool;
 extern spinlock_t   drbd_pp_lock;
 extern int	    drbd_pp_vacant;
 extern wait_queue_head_t drbd_pp_wait;
 
+/* We also need a standard (emergency-reserve backed) page pool
+ * for meta data IO (activity log, bitmap).
+ * We can keep it global, as long as it is used as "N pages at a time".
+ * 128 should be plenty, currently we probably can get away with as few as 1.
+ */
+#define DRBD_MIN_POOL_PAGES	128
+extern mempool_t *drbd_md_io_page_pool;
+
+/* We also need to make sure we get a bio
+ * when we need it for housekeeping purposes */
+extern struct bio_set *drbd_md_io_bio_set;
+/* to allocate from that set */
+extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
+
 extern rwlock_t global_state_lock;
 
-extern struct drbd_conf *drbd_new_device(unsigned int minor);
-extern void drbd_free_mdev(struct drbd_conf *mdev);
+extern int conn_lowest_minor(struct drbd_tconn *tconn);
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr);
+extern void drbd_minor_destroy(struct kref *kref);
+
+extern int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts);
+extern struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts);
+extern void conn_destroy(struct kref *kref);
+struct drbd_tconn *conn_get_by_name(const char *name);
+extern struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+					    void *peer_addr, int peer_addr_len);
+extern void conn_free_crypto(struct drbd_tconn *tconn);
 
 extern int proc_details;
 
 /* drbd_req */
-extern int drbd_make_request_26(struct request_queue *q, struct bio *bio);
+extern int __drbd_make_request(struct drbd_conf *, struct bio *, unsigned long);
+extern MAKE_REQUEST_TYPE drbd_make_request(struct request_queue *q, struct bio *bio);
 extern int drbd_read_remote(struct drbd_conf *mdev, struct drbd_request *req);
 extern int drbd_merge_bvec(struct request_queue *q,
 #ifdef HAVE_bvec_merge_data
@@ -1540,32 +1526,40 @@
 
 
 /* drbd_nl.c */
+extern int drbd_msg_put_info(const char *info);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
+extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
-extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
-extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
-		int force);
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
+extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
+					enum drbd_role new_role,
+					int force);
+extern bool conn_try_outdate_peer(struct drbd_tconn *tconn);
+extern void conn_try_outdate_peer_async(struct drbd_tconn *tconn);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 /* drbd_worker.c */
 extern int drbd_worker(struct drbd_thread *thi);
-extern int drbd_alter_sa(struct drbd_conf *mdev, int na);
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor);
+void drbd_resync_after_changed(struct drbd_conf *mdev);
 extern void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side);
 extern void resume_next_sg(struct drbd_conf *mdev);
 extern void suspend_other_sg(struct drbd_conf *mdev);
 extern int drbd_resync_finished(struct drbd_conf *mdev);
 /* maybe rather drbd_main.c ? */
+extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
+extern void drbd_md_put_buffer(struct drbd_conf *mdev);
 extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
-extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
+extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
+extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done);
+extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
 
-static inline void ov_oos_print(struct drbd_conf *mdev)
+static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
 {
 	if (mdev->ov_last_oos_size) {
 		dev_err(DEV, "Out of sync: start=%llu, size=%lu (sectors)\n",
@@ -1576,88 +1570,105 @@
 }
 
 
-extern void drbd_csum(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum_bio(struct drbd_conf *, struct crypto_hash *, struct bio *, void *);
+extern void drbd_csum_ee(struct drbd_conf *, struct crypto_hash *,
+			 struct drbd_peer_request *, void *);
 /* worker callbacks */
-extern int w_req_cancel_conflict(struct drbd_conf *, struct drbd_work *, int);
-extern int w_read_retry_remote(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_data_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_rsdata_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_csum_rs_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_ov_reply(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int);
-extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int);
-extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int);
-extern int w_io_error(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int);
-extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
-extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
-extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
-extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+extern int w_read_retry_remote(struct drbd_work *, int);
+extern int w_e_end_data_req(struct drbd_work *, int);
+extern int w_e_end_rsdata_req(struct drbd_work *, int);
+extern int w_e_end_csum_rs_req(struct drbd_work *, int);
+extern int w_e_end_ov_reply(struct drbd_work *, int);
+extern int w_e_end_ov_req(struct drbd_work *, int);
+extern int w_ov_finished(struct drbd_work *, int);
+extern int w_resync_timer(struct drbd_work *, int);
+extern int w_send_write_hint(struct drbd_work *, int);
+extern int w_make_resync_request(struct drbd_work *, int);
+extern int w_send_dblock(struct drbd_work *, int);
+extern int w_send_barrier(struct drbd_work *, int);
+extern int w_send_read_req(struct drbd_work *, int);
+extern int w_prev_work_done(struct drbd_work *, int);
+extern int w_e_reissue(struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_work *, int);
+extern int w_send_out_of_sync(struct drbd_work *, int);
+extern int w_start_resync(struct drbd_work *, int);
 
 extern void resync_timer_fn(unsigned long data);
+extern void start_resync_timer_fn(unsigned long data);
 
 /* drbd_receiver.c */
-extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
-extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
-					    u64 id,
-					    sector_t sector,
-					    unsigned int data_size,
-					    gfp_t gfp_mask) __must_hold(local);
-extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
-extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
-		struct list_head *head);
-extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
-		struct list_head *head);
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector);
+extern int drbd_submit_peer_request(struct drbd_conf *,
+				    struct drbd_peer_request *, const unsigned,
+				    const int);
+extern int drbd_free_peer_reqs(struct drbd_conf *, struct list_head *);
+extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_conf *, u64,
+						     sector_t, unsigned int,
+						     gfp_t) __must_hold(local);
+extern void __drbd_free_peer_req(struct drbd_conf *, struct drbd_peer_request *,
+				 int);
+#define drbd_free_peer_req(m,e) __drbd_free_peer_req(m, e, 0)
+#define drbd_free_net_peer_req(m,e) __drbd_free_peer_req(m, e, 1)
+extern struct page *drbd_alloc_pages(struct drbd_conf *, unsigned int, bool);
 extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
-extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+extern void conn_flush_workqueue(struct drbd_tconn *tconn);
+extern int drbd_connected(struct drbd_conf *mdev);
+static inline void drbd_flush_workqueue(struct drbd_conf *mdev)
+{
+	conn_flush_workqueue(mdev->tconn);
+}
 
-/* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
- * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
+/* Yes, there is kernel_setsockopt, but only since 2.6.18.
+ * So we have our own copy of it here. */
 static inline int drbd_setsockopt(struct socket *sock, int level, int optname,
-			char __user *optval, int optlen)
+				  char *optval, int optlen)
 {
+	mm_segment_t oldfs = get_fs();
+	char __user *uoptval;
 	int err;
+
+	uoptval = (char __user __force *)optval;
+
+	set_fs(KERNEL_DS);
 	if (level == SOL_SOCKET)
-		err = sock_setsockopt(sock, level, optname, optval, optlen);
+		err = sock_setsockopt(sock, level, optname, uoptval, optlen);
 	else
-		err = sock->ops->setsockopt(sock, level, optname, optval,
+		err = sock->ops->setsockopt(sock, level, optname, uoptval,
 					    optlen);
+	set_fs(oldfs);
 	return err;
 }
 
 static inline void drbd_tcp_cork(struct socket *sock)
 {
-	int __user val = 1;
+	int val = 1;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_uncork(struct socket *sock)
 {
-	int __user val = 0;
+	int val = 0;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_nodelay(struct socket *sock)
 {
-	int __user val = 1;
+	int val = 1;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
 static inline void drbd_tcp_quickack(struct socket *sock)
 {
-	int __user val = 1;
+	int val = 2;
 	(void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
-			(char __user *)&val, sizeof(val));
+			(char*)&val, sizeof(val));
 }
 
-void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo);
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
@@ -1666,8 +1677,8 @@
 extern const char *drbd_role_str(enum drbd_role s);
 
 /* drbd_actlog.c */
-extern void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector);
-extern void drbd_al_complete_io(struct drbd_conf *mdev, sector_t sector);
+extern void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i);
+extern void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i);
 extern void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector);
 extern int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
 extern int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector);
@@ -1675,110 +1686,78 @@
 extern int drbd_rs_del_all(struct drbd_conf *mdev);
 extern void drbd_rs_failed_io(struct drbd_conf *mdev,
 		sector_t sector, int size);
-extern int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *);
+extern void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go);
 extern void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_in_sync(mdev, sector, size) \
 	__drbd_set_in_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
+extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
 		int size, const char *file, const unsigned int line);
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
-extern void drbd_al_apply_to_bm(struct drbd_conf *mdev);
-extern void drbd_al_to_on_disk_bm(struct drbd_conf *mdev);
 extern void drbd_al_shrink(struct drbd_conf *mdev);
 
-
 /* drbd_nl.c */
+/* state info broadcast */
+struct sib_info {
+	enum drbd_state_info_bcast_reason sib_reason;
+	union {
+		struct {
+			char *helper_name;
+			unsigned helper_exit_code;
+		};
+		struct {
+			union drbd_state os;
+			union drbd_state ns;
+		};
+	};
+};
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib);
 
-void drbd_nl_cleanup(void);
-int __init drbd_nl_init(void);
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state);
-void drbd_bcast_sync_progress(struct drbd_conf *mdev);
-void drbd_bcast_ee(struct drbd_conf *mdev,
-		const char *reason, const int dgs,
-		const char* seen_hash, const char* calc_hash,
-		const struct drbd_epoch_entry* e);
-
-
-/**
- * DOC: DRBD State macros
- *
- * These macros are used to express state changes in easily readable form.
- *
- * The NS macros expand to a mask and a value, that can be bit ored onto the
- * current state as soon as the spinlock (req_lock) was taken.
- *
- * The _NS macros are used for state functions that get called with the
- * spinlock. These macros expand directly to the new state value.
- *
- * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
- * to express state changes that affect more than one aspect of the state.
- *
- * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
- * Means that the network connection was established and that the peer
- * is in secondary role.
- */
-#define role_MASK R_MASK
-#define peer_MASK R_MASK
-#define disk_MASK D_MASK
-#define pdsk_MASK D_MASK
-#define conn_MASK C_MASK
-#define susp_MASK 1
-#define user_isp_MASK 1
-#define aftr_isp_MASK 1
-
-/* drbd state debug */
-#if DRBD_DEBUG_STATE_CHANGES
-#define DRBD_STATE_DEBUG_INIT_VAL(s) ({ (s).line = __LINE__; (s).func = __func__; })
-#else
-#define DRBD_STATE_DEBUG_INIT_VAL(s) do { } while (0)
-#endif
-
-#define NS(T, S) \
-	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
-	({ union drbd_state val; DRBD_STATE_DEBUG_INIT_VAL(val); val.i = 0; val.T = (S); val; })
-#define NS2(T1, S1, T2, S2) \
-	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
-	  mask.T2 = T2##_MASK; mask; }), \
-	({ union drbd_state val; DRBD_STATE_DEBUG_INIT_VAL(val); val.i = 0; val.T1 = (S1); \
-	  val.T2 = (S2); val; })
-#define NS3(T1, S1, T2, S2, T3, S3) \
-	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
-	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
-	({ union drbd_state val; DRBD_STATE_DEBUG_INIT_VAL(val); val.i = 0; val.T1 = (S1); \
-	  val.T2 = (S2); val.T3 = (S3); val; })
-
-#define _NS(D, T, S) \
-	D, ({ union drbd_state __ns; DRBD_STATE_DEBUG_INIT_VAL(__ns); __ns.i = D->state.i; __ns.T = (S); __ns; })
-#define _NS2(D, T1, S1, T2, S2) \
-	D, ({ union drbd_state __ns; DRBD_STATE_DEBUG_INIT_VAL(__ns); __ns.i = D->state.i; __ns.T1 = (S1); \
-	__ns.T2 = (S2); __ns; })
-#define _NS3(D, T1, S1, T2, S2, T3, S3) \
-	D, ({ union drbd_state __ns; DRBD_STATE_DEBUG_INIT_VAL(__ns); __ns.i = D->state.i; __ns.T1 = (S1); \
-	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
 
 /*
  * inline helper functions
  *************************/
 
-static inline void drbd_state_lock(struct drbd_conf *mdev)
+/* see also page_chain_add and friends in drbd_receiver.c */
+static inline struct page *page_chain_next(struct page *page)
 {
-	wait_event(mdev->misc_wait,
-		   !test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+	return (struct page *)page_private(page);
 }
+#define page_chain_for_each(page) \
+	for (; page && ({ prefetch(page_chain_next(page)); 1; }); \
+			page = page_chain_next(page))
+#define page_chain_for_each_safe(page, n) \
+	for (; page && ({ n = page_chain_next(page); 1; }); page = n)
 
-static inline void drbd_state_unlock(struct drbd_conf *mdev)
+static inline int drbd_bio_has_active_page(struct bio *bio)
 {
-	clear_bit(CLUSTER_ST_CHANGE, &mdev->flags);
-	wake_up(&mdev->misc_wait);
+	struct bio_vec *bvec;
+	int i;
+
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		if (page_count(bvec->bv_page) > 1)
+			return 1;
+	}
+
+	return 0;
 }
 
-static inline int _drbd_set_state(struct drbd_conf *mdev,
-				   union drbd_state ns, enum chg_state_flags flags,
-				   struct completion *done)
+static inline int drbd_peer_req_has_active_page(struct drbd_peer_request *peer_req)
 {
-	int rv;
+	struct page *page = peer_req->pages;
+	page_chain_for_each(page) {
+		if (page_count(page) > 1)
+			return 1;
+	}
+	return 0;
+}
+
+static inline enum drbd_state_rv
+_drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+		enum chg_state_flags flags, struct completion *done)
+{
+	enum drbd_state_rv rv;
 
 	read_lock(&global_state_lock);
 	rv = __drbd_set_state(mdev, ns, flags, done);
@@ -1787,41 +1766,43 @@
 	return rv;
 }
 
-/**
- * drbd_request_state() - Reqest a state change
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- *
- * This is the most graceful way of requesting a state change. It is verbose
- * quite verbose in case the state change is not possible, and all those
- * state changes are globally serialized.
- */
-static inline int drbd_request_state(struct drbd_conf *mdev,
-				     union drbd_state mask,
-				     union drbd_state val)
+static inline union drbd_state drbd_read_state(struct drbd_conf *mdev)
 {
-	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+	union drbd_state rv;
+
+	rv.i = mdev->state.i;
+	rv.susp = mdev->tconn->susp;
+	rv.susp_nod = mdev->tconn->susp_nod;
+	rv.susp_fen = mdev->tconn->susp_fen;
+
+	return rv;
 }
 
 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
 static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, const char *where)
 {
-	switch (mdev->ldev->dc.on_io_error) {
+	enum drbd_io_error_p ep;
+
+	rcu_read_lock();
+	ep = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
+	rcu_read_unlock();
+	switch (ep) {
 	case EP_PASS_ON: /* FIXME would this be better named "Ignore"? */
 		if (!forcedetach) {
-			if (printk_ratelimit())
-				dev_err(DEV, "Local IO failed in %s."
-					     "Passing error on...\n", where);
+			if (DRBD_ratelimit(5*HZ, 5))
+				dev_err(DEV, "Local IO failed in %s.\n", where);
+			if (mdev->state.disk > D_INCONSISTENT)
+				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
 			break;
 		}
 		/* NOTE fall through to detach case if forcedetach set */
 	case EP_DETACH:
 	case EP_CALL_HELPER:
+		set_bit(WAS_IO_ERROR, &mdev->flags);
 		if (mdev->state.disk > D_FAILED) {
 			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
-			dev_err(DEV, "Local IO failed in %s."
-				     "Detaching...\n", where);
+			dev_err(DEV,
+				"Local IO failed in %s. Detaching...\n", where);
 		}
 		break;
 	}
@@ -1841,9 +1822,9 @@
 {
 	if (error) {
 		unsigned long flags;
-		spin_lock_irqsave(&mdev->req_lock, flags);
+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 		__drbd_chk_io_error_(mdev, forcedetach, where);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 	}
 }
 
@@ -1855,9 +1836,9 @@
  * BTW, for internal meta data, this happens to be the maximum capacity
  * we could agree upon with our peer node.
  */
-static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+static inline sector_t _drbd_md_first_sector(int meta_dev_idx, struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		return bdev->md.md_offset + bdev->md.bm_offset;
@@ -1867,13 +1848,30 @@
 	}
 }
 
+static inline sector_t drbd_md_first_sector(struct drbd_backing_dev *bdev)
+{
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	return _drbd_md_first_sector(meta_dev_idx, bdev);
+}
+
 /**
  * drbd_md_last_sector() - Return the last sector number of the meta data area
  * @bdev:	Meta data block device.
  */
 static inline sector_t drbd_md_last_sector(struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		return bdev->md.md_offset + MD_AL_OFFSET - 1;
@@ -1894,12 +1892,18 @@
 static inline sector_t drbd_get_max_capacity(struct drbd_backing_dev *bdev)
 {
 	sector_t s;
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	case DRBD_MD_INDEX_INTERNAL:
 	case DRBD_MD_INDEX_FLEX_INT:
 		s = drbd_get_capacity(bdev->backing_bdev)
 			? min_t(sector_t, DRBD_MAX_SECTORS_FLEX,
-					drbd_md_first_sector(bdev))
+				_drbd_md_first_sector(meta_dev_idx, bdev))
 			: 0;
 		break;
 	case DRBD_MD_INDEX_FLEX_EXT:
@@ -1925,9 +1929,15 @@
 static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
 				    struct drbd_backing_dev *bdev)
 {
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+	rcu_read_unlock();
+
+	switch (meta_dev_idx) {
 	default: /* external, some index */
-		return MD_RESERVED_SECT * bdev->dc.meta_dev_idx;
+		return MD_RESERVED_SECT * meta_dev_idx;
 	case DRBD_MD_INDEX_INTERNAL:
 		/* with drbd08, internal meta data is always "flexible" */
 	case DRBD_MD_INDEX_FLEX_INT:
@@ -1948,13 +1958,6 @@
 }
 
 static inline void
-_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
-{
-	list_add_tail(&w->list, &q->q);
-	up(&q->s);
-}
-
-static inline void
 drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
 {
 	unsigned long flags;
@@ -1976,50 +1979,45 @@
 	spin_unlock_irqrestore(&q->q_lock, flags);
 }
 
-static inline void wake_asender(struct drbd_conf *mdev)
-{
-	if (test_bit(SIGNAL_ASENDER, &mdev->flags))
-		force_sig(DRBD_SIG, mdev->asender.task);
-}
-
-static inline void request_ping(struct drbd_conf *mdev)
+static inline void wake_asender(struct drbd_tconn *tconn)
 {
-	set_bit(SEND_PING, &mdev->flags);
-	wake_asender(mdev);
+	if (test_bit(SIGNAL_ASENDER, &tconn->flags))
+		force_sig(DRBD_SIG, tconn->asender.task);
 }
 
-static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
-	enum drbd_packets cmd)
+static inline void request_ping(struct drbd_tconn *tconn)
 {
-	struct p_header h;
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
+	set_bit(SEND_PING, &tconn->flags);
+	wake_asender(tconn);
 }
 
-static inline int drbd_send_ping(struct drbd_conf *mdev)
-{
-	struct p_header h;
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
-}
+extern void *conn_prepare_command(struct drbd_tconn *, struct drbd_socket *);
+extern void *drbd_prepare_command(struct drbd_conf *, struct drbd_socket *);
+extern int conn_send_command(struct drbd_tconn *, struct drbd_socket *,
+			     enum drbd_packet, unsigned int, void *,
+			     unsigned int);
+extern int drbd_send_command(struct drbd_conf *, struct drbd_socket *,
+			     enum drbd_packet, unsigned int, void *,
+			     unsigned int);
 
-static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
-{
-	struct p_header h;
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
-}
+extern int drbd_send_ping(struct drbd_tconn *tconn);
+extern int drbd_send_ping_ack(struct drbd_tconn *tconn);
+extern int drbd_send_state_req(struct drbd_conf *, union drbd_state, union drbd_state);
+extern int conn_send_state_req(struct drbd_tconn *, union drbd_state, union drbd_state);
 
 static inline void drbd_thread_stop(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, FALSE, TRUE);
+	_drbd_thread_stop(thi, false, true);
 }
 
 static inline void drbd_thread_stop_nowait(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, FALSE, FALSE);
+	_drbd_thread_stop(thi, false, false);
 }
 
 static inline void drbd_thread_restart_nowait(struct drbd_thread *thi)
 {
-	_drbd_thread_stop(thi, TRUE, FALSE);
+	_drbd_thread_stop(thi, true, false);
 }
 
 /* counts how many answer packets packets we expect from our peer,
@@ -2027,22 +2025,22 @@
  * or implicit barrier packets as necessary.
  * increased:
  *  w_send_barrier
- *  _req_mod(req, queue_for_net_write or queue_for_net_read);
+ *  _req_mod(req, QUEUE_FOR_NET_WRITE or QUEUE_FOR_NET_READ);
  *    it is much easier and equally valid to count what we queue for the
  *    worker, even before it actually was queued or send.
  *    (drbd_make_request_common; recovery path on read io-error)
  * decreased:
  *  got_BarrierAck (respective tl_clear, tl_clear_barrier)
- *  _req_mod(req, data_received)
+ *  _req_mod(req, DATA_RECEIVED)
  *     [from receive_DataReply]
- *  _req_mod(req, write_acked_by_peer or recv_acked_by_peer or neg_acked)
+ *  _req_mod(req, WRITE_ACKED_BY_PEER or RECV_ACKED_BY_PEER or NEG_ACKED)
  *     [from got_BlockAck (P_WRITE_ACK, P_RECV_ACK)]
  *     FIXME
  *     for some reason it is NOT decreased in got_NegAck,
  *     but in the resulting cleanup code from report_params.
  *     we should try to remember the reason for that...
- *  _req_mod(req, send_failed or send_canceled)
- *  _req_mod(req, connection_lost_while_pending)
+ *  _req_mod(req, SEND_FAILED or SEND_CANCELED)
+ *  _req_mod(req, CONNECTION_LOST_WHILE_PENDING)
  *     [from tl_clear_barrier]
  */
 static inline void inc_ap_pending(struct drbd_conf *mdev)
@@ -2050,22 +2048,24 @@
 	atomic_inc(&mdev->ap_pending_cnt);
 }
 
-#define ERR_IF_CNT_IS_NEGATIVE(which)				\
-	if (atomic_read(&mdev->which) < 0)			\
+#define ERR_IF_CNT_IS_NEGATIVE(which, func, line)			\
+	if (atomic_read(&mdev->which) < 0)				\
 		dev_err(DEV, "in %s:%d: " #which " = %d < 0 !\n",	\
-		    __func__ , __LINE__ ,			\
-		    atomic_read(&mdev->which))
+			func, line,					\
+			atomic_read(&mdev->which))
 
-#define dec_ap_pending(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	if (atomic_dec_and_test(&mdev->ap_pending_cnt))		\
-		wake_up(&mdev->misc_wait);			\
-	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt); } while (0)
+#define dec_ap_pending(mdev) _dec_ap_pending(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_ap_pending(struct drbd_conf *mdev, const char *func, int line)
+{
+	if (atomic_dec_and_test(&mdev->ap_pending_cnt))
+		wake_up(&mdev->misc_wait);
+	ERR_IF_CNT_IS_NEGATIVE(ap_pending_cnt, func, line);
+}
 
 /* counts how many resync-related answers we still expect from the peer
  *		     increase			decrease
  * C_SYNC_TARGET sends P_RS_DATA_REQUEST (and expects P_RS_DATA_REPLY)
- * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK whith ID_SYNCER)
+ * C_SYNC_SOURCE sends P_RS_DATA_REPLY   (and expects P_WRITE_ACK with ID_SYNCER)
  *					   (or P_NEG_ACK with ID_SYNCER)
  */
 static inline void inc_rs_pending(struct drbd_conf *mdev)
@@ -2073,10 +2073,12 @@
 	atomic_inc(&mdev->rs_pending_cnt);
 }
 
-#define dec_rs_pending(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_dec(&mdev->rs_pending_cnt);			\
-	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt); } while (0)
+#define dec_rs_pending(mdev) _dec_rs_pending(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_rs_pending(struct drbd_conf *mdev, const char *func, int line)
+{
+	atomic_dec(&mdev->rs_pending_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(rs_pending_cnt, func, line);
+}
 
 /* counts how many answers we still need to send to the peer.
  * increased on
@@ -2092,38 +2094,18 @@
 	atomic_inc(&mdev->unacked_cnt);
 }
 
-#define dec_unacked(mdev)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_dec(&mdev->unacked_cnt);				\
-	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
-
-#define sub_unacked(mdev, n)	do {				\
-	typecheck(struct drbd_conf *, mdev);			\
-	atomic_sub(n, &mdev->unacked_cnt);			\
-	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt); } while (0)
-
-
-static inline void put_net_conf(struct drbd_conf *mdev)
+#define dec_unacked(mdev) _dec_unacked(mdev, __FUNCTION__, __LINE__)
+static inline void _dec_unacked(struct drbd_conf *mdev, const char *func, int line)
 {
-	if (atomic_dec_and_test(&mdev->net_cnt))
-		wake_up(&mdev->misc_wait);
+	atomic_dec(&mdev->unacked_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
-/**
- * get_net_conf() - Increase ref count on mdev->net_conf; Returns 0 if nothing there
- * @mdev:	DRBD device.
- *
- * You have to call put_net_conf() when finished working with mdev->net_conf.
- */
-static inline int get_net_conf(struct drbd_conf *mdev)
+#define sub_unacked(mdev, n) _sub_unacked(mdev, n, __FUNCTION__, __LINE__)
+static inline void _sub_unacked(struct drbd_conf *mdev, int n, const char *func, int line)
 {
-	int have_net_conf;
-
-	atomic_inc(&mdev->net_cnt);
-	have_net_conf = mdev->state.conn >= C_UNCONNECTED;
-	if (!have_net_conf)
-		put_net_conf(mdev);
-	return have_net_conf;
+	atomic_sub(n, &mdev->unacked_cnt);
+	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
 /**
@@ -2137,10 +2119,22 @@
 
 static inline void put_ldev(struct drbd_conf *mdev)
 {
+	int i = atomic_dec_return(&mdev->local_cnt);
+
+	/* This may be called from some endio handler,
+	 * so we must not sleep here. */
+
 	__release(local);
-	if (atomic_dec_and_test(&mdev->local_cnt))
+	D_ASSERT(i >= 0);
+	if (i == 0) {
+		if (mdev->state.disk == D_DISKLESS)
+			/* even internal references gone, safe to destroy */
+			drbd_ldev_destroy(mdev);
+		if (mdev->state.disk == D_FAILED)
+			/* all application IO references gone. */
+			drbd_go_diskless(mdev);
 		wake_up(&mdev->misc_wait);
-	D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
+	}
 }
 
 #ifndef __CHECKER__
@@ -2148,6 +2142,10 @@
 {
 	int io_allowed;
 
+	/* never get a reference while D_DISKLESS */
+	if (mdev->state.disk == D_DISKLESS)
+		return 0;
+
 	atomic_inc(&mdev->local_cnt);
 	io_allowed = (mdev->state.disk >= mins);
 	if (!io_allowed)
@@ -2162,17 +2160,18 @@
 static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
 		unsigned long *bits_left, unsigned int *per_mil_done)
 {
-	/*
-	 * this is to break it at compile time when we change that
-	 * (we may feel 4TB maximum storage per drbd is not enough)
-	 */
+	/* this is to break it at compile time when we change that, in case we
+	 * want to support more than (1<<32) bits on a 32bit arch. */
 	typecheck(unsigned long, mdev->rs_total);
 
 	/* note: both rs_total and rs_left are in bits, i.e. in
 	 * units of BM_BLOCK_SIZE.
 	 * for the percentage, we don't care. */
 
-	*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		*bits_left = mdev->ov_left;
+	else
+		*bits_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
 	/* >> 10 to prevent overflow,
 	 * +1 to prevent division by zero */
 	if (*bits_left > mdev->rs_total) {
@@ -2187,10 +2186,19 @@
 				*bits_left, mdev->rs_total, mdev->rs_failed);
 		*per_mil_done = 0;
 	} else {
-		/* make sure the calculation happens in long context */
-		unsigned long tmp = 1000UL -
-				(*bits_left >> 10)*1000UL
-				/ ((mdev->rs_total >> 10) + 1UL);
+		/* Make sure the division happens in long context.
+		 * We allow up to one petabyte storage right now,
+		 * at a granularity of 4k per bit that is 2**38 bits.
+		 * After shift right and multiplication by 1000,
+		 * this should still fit easily into a 32bit long,
+		 * so we don't need a 64bit division on 32bit arch.
+		 * Note: currently we don't support such large bitmaps on 32bit
+		 * arch anyways, but no harm done to be prepared for it here.
+		 */
+		unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10;
+		unsigned long left = *bits_left >> shift;
+		unsigned long total = 1UL + (mdev->rs_total >> shift);
+		unsigned long tmp = 1000UL - left * 1000UL/total;
 		*per_mil_done = tmp;
 	}
 }
@@ -2201,16 +2209,20 @@
  * maybe re-implement using semaphores? */
 static inline int drbd_get_max_buffers(struct drbd_conf *mdev)
 {
-	int mxb = 1000000; /* arbitrary limit on open requests */
-	if (get_net_conf(mdev)) {
-		mxb = mdev->net_conf->max_buffers;
-		put_net_conf(mdev);
-	}
+	struct net_conf *nc;
+	int mxb;
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	mxb = nc ? nc->max_buffers : 1000000;  /* arbitrary limit on open requests */
+	rcu_read_unlock();
+
 	return mxb;
 }
 
-static inline int drbd_state_is_stable(union drbd_state s)
+static inline int drbd_state_is_stable(struct drbd_conf *mdev)
 {
+	union drbd_dev_state s = mdev->state;
 
 	/* DO NOT add a default clause, we want the compiler to warn us
 	 * for any newly introduced state we may have forgotten to add here */
@@ -2227,11 +2239,9 @@
 	case C_VERIFY_T:
 	case C_PAUSED_SYNC_S:
 	case C_PAUSED_SYNC_T:
-		/* maybe stable, look at the disk state */
-		break;
-
-	/* no new io accepted during tansitional states
-	 * like handshake or teardown */
+	case C_AHEAD:
+	case C_BEHIND:
+		/* transitional states, IO allowed */
 	case C_DISCONNECTING:
 	case C_UNCONNECTED:
 	case C_TIMEOUT:
@@ -2242,7 +2252,15 @@
 	case C_WF_REPORT_PARAMS:
 	case C_STARTING_SYNC_S:
 	case C_STARTING_SYNC_T:
+		break;
+
+		/* Allow IO in BM exchange states with new protocols */
 	case C_WF_BITMAP_S:
+		if (mdev->tconn->agreed_pro_version < 96)
+			return 0;
+		break;
+
+		/* no new io accepted in these states */
 	case C_WF_BITMAP_T:
 	case C_WF_SYNC_UUID:
 	case C_MASK:
@@ -2256,12 +2274,12 @@
 	case D_OUTDATED:
 	case D_CONSISTENT:
 	case D_UP_TO_DATE:
+	case D_FAILED:
 		/* disk state is stable as well. */
 		break;
 
-	/* no new io accepted during tansitional states */
+	/* no new io accepted during transitional states */
 	case D_ATTACHING:
-	case D_FAILED:
 	case D_NEGOTIATING:
 	case D_UNKNOWN:
 	case D_MASK:
@@ -2272,59 +2290,63 @@
 	return 1;
 }
 
-static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
+static inline int drbd_suspended(struct drbd_conf *mdev)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+
+	return tconn->susp || tconn->susp_fen || tconn->susp_nod;
+}
+
+static inline bool may_inc_ap_bio(struct drbd_conf *mdev)
 {
 	int mxb = drbd_get_max_buffers(mdev);
 
-	if (mdev->state.susp)
-		return 0;
+	if (drbd_suspended(mdev))
+		return false;
 	if (test_bit(SUSPEND_IO, &mdev->flags))
-		return 0;
+		return false;
 
 	/* to avoid potential deadlock or bitmap corruption,
 	 * in various places, we only allow new application io
 	 * to start during "stable" states. */
 
 	/* no new io accepted when attaching or detaching the disk */
-	if (!drbd_state_is_stable(mdev->state))
-		return 0;
+	if (!drbd_state_is_stable(mdev))
+		return false;
 
 	/* since some older kernels don't have atomic_add_unless,
 	 * and we are within the spinlock anyways, we have this workaround.  */
 	if (atomic_read(&mdev->ap_bio_cnt) > mxb)
-		return 0;
+		return false;
 	if (test_bit(BITMAP_IO, &mdev->flags))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
-/* I'd like to use wait_event_lock_irq,
- * but I'm not sure when it got introduced,
- * and not sure when it has 3 or 4 arguments */
-static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
-{
-	/* compare with after_state_ch,
-	 * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
-	DEFINE_WAIT(wait);
+static inline bool inc_ap_bio_cond(struct drbd_conf *mdev)
+{
+	bool rv = false;
+
+	spin_lock_irq(&mdev->tconn->req_lock);
+	rv = may_inc_ap_bio(mdev);
+	if (rv)
+		atomic_inc(&mdev->ap_bio_cnt);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	return rv;
+}
 
+static inline void inc_ap_bio(struct drbd_conf *mdev)
+{
 	/* we wait here
 	 *    as long as the device is suspended
 	 *    until the bitmap is no longer on the fly during connection
-	 *    handshake as long as we would exeed the max_buffer limit.
+	 *    handshake as long as we would exceed the max_buffer limit.
 	 *
 	 * to avoid races with the reconnect code,
 	 * we need to atomic_inc within the spinlock. */
 
-	spin_lock_irq(&mdev->req_lock);
-	while (!__inc_ap_bio_cond(mdev)) {
-		prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mdev->req_lock);
-		schedule();
-		finish_wait(&mdev->misc_wait, &wait);
-		spin_lock_irq(&mdev->req_lock);
-	}
-	atomic_add(one_or_two, &mdev->ap_bio_cnt);
-	spin_unlock_irq(&mdev->req_lock);
+	wait_event(mdev->misc_wait, inc_ap_bio_cond(mdev));
 }
 
 static inline void dec_ap_bio(struct drbd_conf *mdev)
@@ -2340,47 +2362,15 @@
 		wake_up(&mdev->misc_wait);
 	if (ap_bio == 0 && test_bit(BITMAP_IO, &mdev->flags)) {
 		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
-			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
+			drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
 	}
 }
 
-static inline void drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
+static inline int drbd_set_ed_uuid(struct drbd_conf *mdev, u64 val)
 {
+	int changed = mdev->ed_uuid != val;
 	mdev->ed_uuid = val;
-}
-
-static inline int seq_cmp(u32 a, u32 b)
-{
-	/* we assume wrap around at 32bit.
-	 * for wrap around at 24bit (old atomic_t),
-	 * we'd have to
-	 *  a <<= 8; b <<= 8;
-	 */
-	return (s32)(a) - (s32)(b);
-}
-#define seq_lt(a, b) (seq_cmp((a), (b)) < 0)
-#define seq_gt(a, b) (seq_cmp((a), (b)) > 0)
-#define seq_ge(a, b) (seq_cmp((a), (b)) >= 0)
-#define seq_le(a, b) (seq_cmp((a), (b)) <= 0)
-/* CAUTION: please no side effects in arguments! */
-#define seq_max(a, b) ((u32)(seq_gt((a), (b)) ? (a) : (b)))
-
-static inline void update_peer_seq(struct drbd_conf *mdev, unsigned int new_seq)
-{
-	unsigned int m;
-	spin_lock(&mdev->peer_seq_lock);
-	m = seq_max(mdev->peer_seq, new_seq);
-	mdev->peer_seq = m;
-	spin_unlock(&mdev->peer_seq_lock);
-	if (m == new_seq)
-		wake_up(&mdev->seq_wait);
-}
-
-static inline void drbd_update_congested(struct drbd_conf *mdev)
-{
-	struct sock *sk = mdev->data.socket->sk;
-	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
-		set_bit(NET_CONGESTED, &mdev->flags);
+	return changed;
 }
 
 static inline int drbd_queue_order_type(struct drbd_conf *mdev)
@@ -2393,34 +2383,6 @@
 	return QUEUE_ORDERED_NONE;
 }
 
-/*
- * FIXME investigate what makes most sense:
- * a) blk_run_queue(q);
- *
- * b) struct backing_dev_info *bdi;
- *    b1) bdi = &q->backing_dev_info;
- *    b2) bdi = mdev->ldev->backing_bdev->bd_inode->i_mapping->backing_dev_info;
- *    blk_run_backing_dev(bdi,NULL);
- *
- * c) generic_unplug(q) ? __generic_unplug(q) ?
- *
- * d) q->unplug_fn(q), which is what all the drivers/md/ stuff uses...
- *
- */
-static inline void drbd_blk_run_queue(struct request_queue *q)
-{
-	if (q && q->unplug_fn)
-		q->unplug_fn(q);
-}
-
-static inline void drbd_kick_lo(struct drbd_conf *mdev)
-{
-	if (get_ldev(mdev)) {
-		drbd_blk_run_queue(bdev_get_queue(mdev->ldev->backing_bdev));
-		put_ldev(mdev);
-	}
-}
-
 static inline void drbd_md_flush(struct drbd_conf *mdev)
 {
 	int r;
@@ -2428,11 +2390,36 @@
 	if (test_bit(MD_NO_BARRIER, &mdev->flags))
 		return;
 
-	r = blkdev_issue_flush(mdev->ldev->md_bdev, NULL);
+	r = blkdev_issue_flush(mdev->ldev->md_bdev, GFP_KERNEL, NULL);
 	if (r) {
 		set_bit(MD_NO_BARRIER, &mdev->flags);
 		dev_err(DEV, "meta data flush failed with status %d, disabling md-flushes\n", r);
 	}
 }
 
+/* resync bitmap */
+/* 16MB sized 'bitmap extent' to track syncer usage */
+struct bm_extent {
+	int rs_left; /* number of bits set (out of sync) in this extent. */
+	int rs_failed; /* number of failed resync requests in this extent. */
+	unsigned long flags;
+	struct lc_element lce;
+};
+
+#define BME_NO_WRITES  0  /* bm_extent.flags: no more requests on this one! */
+#define BME_LOCKED     1  /* bm_extent.flags: syncer active on this one. */
+#define BME_PRIORITY   2  /* finish resync IO on this extent ASAP! App IO waiting! */
+
+/* should be moved to idr.h */
+/**
+ * idr_for_each_entry - iterate over an idr's elements of a given type
+ * @idp:     idr handle
+ * @entry:   the type * to use as cursor
+ * @id:      id entry's key
+ */
+#define idr_for_each_entry(idp, entry, id)				\
+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
+	     entry != NULL;						\
+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
+
 #endif
diff -Nru drbd8-8.3.7/drbd/drbd_interval.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_interval.c
--- drbd8-8.3.7/drbd/drbd_interval.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_interval.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,178 @@
+#include "drbd_interval.h"
+#include "drbd_wrappers.h"
+
+/**
+ * interval_end  -  return end of @node
+ */
+static inline
+sector_t interval_end(struct rb_node *node)
+{
+	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
+	return this->end;
+}
+
+/**
+ * update_interval_end  -  recompute end of @node
+ *
+ * The end of an interval is the highest (start + (size >> 9)) value of this
+ * node and of its children.  Called for @node and its parents whenever the end
+ * may have changed.
+ */
+static void
+update_interval_end(struct rb_node *node, void *__unused)
+{
+	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
+	sector_t end;
+
+	end = this->sector + (this->size >> 9);
+	if (node->rb_left) {
+		sector_t left = interval_end(node->rb_left);
+		if (left > end)
+			end = left;
+	}
+	if (node->rb_right) {
+		sector_t right = interval_end(node->rb_right);
+		if (right > end)
+			end = right;
+	}
+	this->end = end;
+}
+
+/**
+ * drbd_insert_interval  -  insert a new interval into a tree
+ */
+bool
+drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
+{
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	BUG_ON(!IS_ALIGNED(this->size, 512));
+
+	while (*new) {
+		struct drbd_interval *here =
+			rb_entry(*new, struct drbd_interval, rb);
+
+		parent = *new;
+		if (this->sector < here->sector)
+			new = &(*new)->rb_left;
+		else if (this->sector > here->sector)
+			new = &(*new)->rb_right;
+		else if (this < here)
+			new = &(*new)->rb_left;
+		else if (this > here)
+			new = &(*new)->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&this->rb, parent, new);
+	rb_insert_color(&this->rb, root);
+	rb_augment_insert(&this->rb, update_interval_end, NULL);
+	return true;
+}
+
+/**
+ * drbd_contains_interval  -  check if a tree contains a given interval
+ * @sector:	start sector of @interval
+ * @interval:	may not be a valid pointer
+ *
+ * Returns if the tree contains the node @interval with start sector @start.
+ * Does not dereference @interval until @interval is known to be a valid object
+ * in @tree.  Returns %false if @interval is in the tree but with a different
+ * sector number.
+ */
+bool
+drbd_contains_interval(struct rb_root *root, sector_t sector,
+		       struct drbd_interval *interval)
+{
+	struct rb_node *node = root->rb_node;
+
+	while (node) {
+		struct drbd_interval *here =
+			rb_entry(node, struct drbd_interval, rb);
+
+		if (sector < here->sector)
+			node = node->rb_left;
+		else if (sector > here->sector)
+			node = node->rb_right;
+		else if (interval < here)
+			node = node->rb_left;
+		else if (interval > here)
+			node = node->rb_right;
+		else
+			return true;
+	}
+	return false;
+}
+
+/**
+ * drbd_remove_interval  -  remove an interval from a tree
+ */
+void
+drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
+{
+	struct rb_node *deepest;
+
+	deepest = rb_augment_erase_begin(&this->rb);
+	rb_erase(&this->rb, root);
+	rb_augment_erase_end(deepest, update_interval_end, NULL);
+}
+
+/**
+ * drbd_find_overlap  - search for an interval overlapping with [sector, sector + size)
+ * @sector:	start sector
+ * @size:	size, aligned to 512 bytes
+ *
+ * Returns an interval overlapping with [sector, sector + size), or NULL if
+ * there is none.  When there is more than one overlapping interval in the
+ * tree, the interval with the lowest start sector is returned, and all other
+ * overlapping intervals will be on the right side of the tree, reachable with
+ * rb_next().
+ */
+struct drbd_interval *
+drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
+{
+	struct rb_node *node = root->rb_node;
+	struct drbd_interval *overlap = NULL;
+	sector_t end = sector + (size >> 9);
+
+	BUG_ON(!IS_ALIGNED(size, 512));
+
+	while (node) {
+		struct drbd_interval *here =
+			rb_entry(node, struct drbd_interval, rb);
+
+		if (node->rb_left &&
+		    sector < interval_end(node->rb_left)) {
+			/* Overlap if any must be on left side */
+			node = node->rb_left;
+		} else if (here->sector < end &&
+			   sector < here->sector + (here->size >> 9)) {
+			overlap = here;
+			break;
+		} else if (sector >= here->sector) {
+			/* Overlap if any must be on right side */
+			node = node->rb_right;
+		} else
+			break;
+	}
+	return overlap;
+}
+
+struct drbd_interval *
+drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
+{
+	sector_t end = sector + (size >> 9);
+	struct rb_node *node;
+
+	for (;;) {
+		node = rb_next(&i->rb);
+		if (!node)
+			return NULL;
+		i = rb_entry(node, struct drbd_interval, rb);
+		if (i->sector >= end)
+			return NULL;
+		if (sector < i->sector + (i->size >> 9))
+			return i;
+	}
+}
diff -Nru drbd8-8.3.7/drbd/drbd_interval.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_interval.h
--- drbd8-8.3.7/drbd/drbd_interval.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_interval.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,68 @@
+#ifndef __DRBD_INTERVAL_H
+#define __DRBD_INTERVAL_H
+
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/* Compatibility code for 2.6.16 (SLES10) */
+#ifndef rb_parent
+#define rb_parent(r)   ((r)->rb_parent)
+#endif
+
+/*
+ * Kernels between mainline commit dd67d051 (v2.6.18-rc1) and 10fd48f2
+ * (v2.6.19-rc1) have a broken version of RB_EMPTY_NODE().
+ *
+ * RHEL5 kernels until at least 2.6.18-238.12.1.el5 have the broken definition.
+ */
+#if !defined(RB_EMPTY_NODE) || LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,19)
+
+#undef RB_EMPTY_NODE
+#define RB_EMPTY_NODE(node)     (rb_parent(node) == node)
+
+#endif
+
+#ifndef RB_CLEAR_NODE
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+        rb->rb_parent = p;
+}
+#define RB_CLEAR_NODE(node)     (rb_set_parent(node, node))
+#endif
+/* /Compatibility code */
+
+struct drbd_interval {
+	struct rb_node rb;
+	sector_t sector;	/* start sector of the interval */
+	unsigned int size;	/* size in bytes */
+	sector_t end;		/* highest interval end in subtree */
+	int local:1		/* local or remote request? */;
+	int waiting:1;
+};
+
+static inline void drbd_clear_interval(struct drbd_interval *i)
+{
+	RB_CLEAR_NODE(&i->rb);
+}
+
+static inline bool drbd_interval_empty(struct drbd_interval *i)
+{
+	return RB_EMPTY_NODE(&i->rb);
+}
+
+extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
+extern bool drbd_contains_interval(struct rb_root *, sector_t,
+				   struct drbd_interval *);
+extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
+extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
+					unsigned int);
+extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
+					unsigned int);
+
+#define drbd_for_each_overlap(i, root, sector, size)		\
+	for (i = drbd_find_overlap(root, sector, size);		\
+	     i;							\
+	     i = drbd_next_overlap(i, sector, size))
+
+#endif  /* __DRBD_INTERVAL_H */
diff -Nru drbd8-8.3.7/drbd/drbd_main.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_main.c
--- drbd8-8.3.7/drbd/drbd_main.c	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_main.c	2012-02-02 14:09:14.000000000 +0000
@@ -33,7 +33,6 @@
 #include <asm/types.h>
 #include <net/sock.h>
 #include <linux/ctype.h>
-#include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
@@ -45,30 +44,22 @@
 #include <linux/random.h>
 #include <linux/reboot.h>
 #include <linux/notifier.h>
-#ifdef HAVE_LINUX_BYTEORDER_SWABB_H
-#include <linux/byteorder/swabb.h>
-#else
-#include <linux/swab.h>
-#endif
-
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/dynamic_debug.h>
 
 #include <linux/drbd_limits.h>
 #include "drbd_int.h"
-#include "drbd_tracing.h"
 #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
-
 #include "drbd_vli.h"
 
-struct after_state_chg_work {
-	struct drbd_work w;
-	union drbd_state os;
-	union drbd_state ns;
-	enum chg_state_flags flags;
-	struct completion *done;
-};
+#ifdef COMPAT_HAVE_LINUX_BYTEORDER_SWABB_H
+#include <linux/byteorder/swabb.h>
+#else
+#include <linux/swab.h>
+#endif
 
 int drbdd_init(struct drbd_thread *);
 int drbd_worker(struct drbd_thread *);
@@ -82,31 +73,18 @@
 static int drbd_open(struct inode *inode, struct file *file);
 static int drbd_release(struct inode *inode, struct file *file);
 #endif
-STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
-			   union drbd_state ns, enum chg_state_flags flags);
-STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
+STATIC int w_md_sync(struct drbd_work *w, int unused);
 STATIC void md_sync_timer_fn(unsigned long data);
-STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
-
-DEFINE_TRACE(drbd_unplug);
-DEFINE_TRACE(drbd_uuid);
-DEFINE_TRACE(drbd_ee);
-DEFINE_TRACE(drbd_packet);
-DEFINE_TRACE(drbd_md_io);
-DEFINE_TRACE(drbd_epoch);
-DEFINE_TRACE(drbd_netlink);
-DEFINE_TRACE(drbd_actlog);
-DEFINE_TRACE(drbd_bio);
-DEFINE_TRACE(_drbd_resync);
-DEFINE_TRACE(drbd_req);
+STATIC int w_bitmap_io(struct drbd_work *w, int unused);
+STATIC int w_go_diskless(struct drbd_work *w, int unused);
 
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 	      "Lars Ellenberg <lars@linbit.com>");
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Approximate number of drbd devices ("
+		 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
 #include <linux/moduleparam.h>
@@ -117,7 +95,6 @@
 module_param(minor_count, uint, 0444);
 module_param(disable_sendpage, bool, 0644);
 module_param(allow_oos, bool, 0);
-module_param(cn_idx, uint, 0444);
 module_param(proc_details, int, 0644);
 
 #ifdef DRBD_ENABLE_FAULTS
@@ -136,10 +113,9 @@
 #endif
 
 /* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
-unsigned int cn_idx = CN_IDX_DRBD;
 int proc_details;       /* Detail level in proc drbd*/
 
 /* Module parameter for setting the user mode helper program
@@ -151,14 +127,17 @@
 /* in 2.6.x, our device mapping and config info contains our virtual gendisks
  * as member "struct gendisk *vdisk;"
  */
-struct drbd_conf **minor_table;
+struct idr minors;
+struct list_head drbd_tconns;  /* list of struct drbd_tconn */
 
 struct kmem_cache *drbd_request_cache;
-struct kmem_cache *drbd_ee_cache;	/* epoch entries */
+struct kmem_cache *drbd_ee_cache;	/* peer requests */
 struct kmem_cache *drbd_bm_ext_cache;	/* bitmap extents */
 struct kmem_cache *drbd_al_ext_cache;	/* activity log extents */
 mempool_t *drbd_request_mempool;
 mempool_t *drbd_ee_mempool;
+mempool_t *drbd_md_io_page_pool;
+struct bio_set *drbd_md_io_bio_set;
 
 /* I do not use a standard mempool, because:
    1) I want to hand out the pre-allocated objects first.
@@ -177,7 +156,24 @@
 	.release = drbd_release,
 };
 
-#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+static void bio_destructor_drbd(struct bio *bio)
+{
+	bio_free(bio, drbd_md_io_bio_set);
+}
+
+struct bio *bio_alloc_drbd(gfp_t gfp_mask)
+{
+	struct bio *bio;
+
+	if (!drbd_md_io_bio_set)
+		return bio_alloc(gfp_mask, 1);
+
+	bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
+	if (!bio)
+		return NULL;
+	bio->bi_destructor = bio_destructor_drbd;
+	return bio;
+}
 
 #ifdef __CHECKER__
 /* When checking with sparse, and this is an inline function, sparse will
@@ -202,13 +198,13 @@
  * DOC: The transfer log
  *
  * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
- * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
+ * mdev->tconn->newest_tle points to the head, mdev->tconn->oldest_tle points to the tail
  * of the list. There is always at least one &struct drbd_tl_epoch object.
  *
  * Each &struct drbd_tl_epoch has a circular double linked list of requests
  * attached.
  */
-STATIC int tl_init(struct drbd_conf *mdev)
+STATIC int tl_init(struct drbd_tconn *tconn)
 {
 	struct drbd_tl_epoch *b;
 
@@ -220,30 +216,27 @@
 	INIT_LIST_HEAD(&b->w.list);
 	b->next = NULL;
 	b->br_number = 4711;
-	b->n_req = 0;
+	b->n_writes = 0;
 	b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
 
-	mdev->oldest_tle = b;
-	mdev->newest_tle = b;
-	INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
-
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
+	tconn->oldest_tle = b;
+	tconn->newest_tle = b;
+	INIT_LIST_HEAD(&tconn->out_of_sequence_requests);
+	INIT_LIST_HEAD(&tconn->barrier_acked_requests);
 
 	return 1;
 }
 
-STATIC void tl_cleanup(struct drbd_conf *mdev)
+STATIC void tl_cleanup(struct drbd_tconn *tconn)
 {
-	D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-	kfree(mdev->oldest_tle);
-	mdev->oldest_tle = NULL;
-	kfree(mdev->unused_spare_tle);
-	mdev->unused_spare_tle = NULL;
-	kfree(mdev->tl_hash);
-	mdev->tl_hash = NULL;
-	mdev->tl_hash_s = 0;
+	if (tconn->oldest_tle != tconn->newest_tle)
+		conn_err(tconn, "ASSERT FAILED: oldest_tle == newest_tle\n");
+	if (!list_empty(&tconn->out_of_sequence_requests))
+		conn_err(tconn, "ASSERT FAILED: list_empty(out_of_sequence_requests)\n");
+	kfree(tconn->oldest_tle);
+	tconn->oldest_tle = NULL;
+	kfree(tconn->unused_spare_tle);
+	tconn->unused_spare_tle = NULL;
 }
 
 /**
@@ -253,7 +246,7 @@
  *
  * The caller must hold the req_lock.
  */
-void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
+void _tl_add_barrier(struct drbd_tconn *tconn, struct drbd_tl_epoch *new)
 {
 	struct drbd_tl_epoch *newest_before;
 
@@ -261,15 +254,15 @@
 	INIT_LIST_HEAD(&new->w.list);
 	new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
 	new->next = NULL;
-	new->n_req = 0;
+	new->n_writes = 0;
 
-	newest_before = mdev->newest_tle;
+	newest_before = tconn->newest_tle;
 	/* never send a barrier number == 0, because that is special-cased
 	 * when using TCQ for our write ordering code */
 	new->br_number = (newest_before->br_number+1) ?: 1;
-	if (mdev->newest_tle != new) {
-		mdev->newest_tle->next = new;
-		mdev->newest_tle = new;
+	if (tconn->newest_tle != new) {
+		tconn->newest_tle->next = new;
+		tconn->newest_tle = new;
 	}
 }
 
@@ -283,38 +276,39 @@
  * &struct drbd_tl_epoch objects this function will cause a termination
  * of the connection.
  */
-void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
-		       unsigned int set_size)
+void tl_release(struct drbd_tconn *tconn, unsigned int barrier_nr,
+		unsigned int set_size)
 {
+	struct drbd_conf *mdev;
 	struct drbd_tl_epoch *b, *nob; /* next old barrier */
 	struct list_head *le, *tle;
 	struct drbd_request *r;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&tconn->req_lock);
 
-	b = mdev->oldest_tle;
+	b = tconn->oldest_tle;
 
 	/* first some paranoia code */
 	if (b == NULL) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
-			barrier_nr);
+		conn_err(tconn, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
+			 barrier_nr);
 		goto bail;
 	}
 	if (b->br_number != barrier_nr) {
-		dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
-			barrier_nr, b->br_number);
+		conn_err(tconn, "BAD! BarrierAck #%u received, expected #%u!\n",
+			 barrier_nr, b->br_number);
 		goto bail;
 	}
-	if (b->n_req != set_size) {
-		dev_err(DEV, "BAD! BarrierAck #%u received with n_req=%u, expected n_req=%u!\n",
-			barrier_nr, set_size, b->n_req);
+	if (b->n_writes != set_size) {
+		conn_err(tconn, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
+			 barrier_nr, set_size, b->n_writes);
 		goto bail;
 	}
 
 	/* Clean up list of requests processed during current epoch */
 	list_for_each_safe(le, tle, &b->requests) {
 		r = list_entry(le, struct drbd_request, tl_requests);
-		_req_mod(r, barrier_acked);
+		_req_mod(r, BARRIER_ACKED);
 	}
 	/* There could be requests on the list waiting for completion
 	   of the write to the local disk. To avoid corruptions of
@@ -324,1158 +318,219 @@
 	   the write acks - which would be a bug and violating write ordering.
 	   To not deadlock in case we lose connection while such requests are
 	   still pending, we need some way to find them for the
-	   _req_mode(connection_lost_while_pending).
+	   _req_mode(CONNECTION_LOST_WHILE_PENDING).
 
 	   These have been list_move'd to the out_of_sequence_requests list in
-	   _req_mod(, barrier_acked) above.
+	   _req_mod(, BARRIER_ACKED) above.
 	   */
-	list_del_init(&b->requests);
+	list_splice_init(&b->requests, &tconn->barrier_acked_requests);
+	mdev = b->w.mdev;
 
 	nob = b->next;
-	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
-		_tl_add_barrier(mdev, b);
+	if (test_and_clear_bit(CREATE_BARRIER, &tconn->flags)) {
+		_tl_add_barrier(tconn, b);
 		if (nob)
-			mdev->oldest_tle = nob;
+			tconn->oldest_tle = nob;
 		/* if nob == NULL b was the only barrier, and becomes the new
-		   barrier. Therefore mdev->oldest_tle points already to b */
+		   barrier. Therefore tconn->oldest_tle points already to b */
 	} else {
 		D_ASSERT(nob != NULL);
-		mdev->oldest_tle = nob;
+		tconn->oldest_tle = nob;
 		kfree(b);
 	}
 
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&tconn->req_lock);
 	dec_ap_pending(mdev);
 
 	return;
 
 bail:
-	spin_unlock_irq(&mdev->req_lock);
-	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
+	spin_unlock_irq(&tconn->req_lock);
+	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 }
 
 
 /**
- * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:	DRBD device.
+ * @what:       The action/event to perform with all request objects
  *
- * This is called after the connection to the peer was lost. The storage covered
- * by the requests on the transfer gets marked as our of sync. Called from the
- * receiver thread and the worker thread.
+ * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
+ * RESTART_FROZEN_DISK_IO.
  */
-void tl_clear(struct drbd_conf *mdev)
+void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 {
-	struct drbd_tl_epoch *b, *tmp;
-	struct list_head *le, *tle;
-	struct drbd_request *r;
-	int new_initial_bnr = net_random();
-
-	spin_lock_irq(&mdev->req_lock);
+	struct drbd_tl_epoch *b, *tmp, **pn;
+	struct list_head *le, *tle, carry_reads;
+	struct drbd_request *req;
+	int rv, n_writes, n_reads;
 
-	b = mdev->oldest_tle;
+	b = tconn->oldest_tle;
+	pn = &tconn->oldest_tle;
 	while (b) {
+		n_writes = 0;
+		n_reads = 0;
+		INIT_LIST_HEAD(&carry_reads);
 		list_for_each_safe(le, tle, &b->requests) {
-			r = list_entry(le, struct drbd_request, tl_requests);
-			/* It would be nice to complete outside of spinlock.
-			 * But this is easier for now. */
-			_req_mod(r, connection_lost_while_pending);
-		}
-		tmp = b->next;
-
-		/* there could still be requests on that ring list,
-		 * in case local io is still pending */
-		list_del(&b->requests);
-
-		/* dec_ap_pending corresponding to queue_barrier.
-		 * the newest barrier may not have been queued yet,
-		 * in which case w.cb is still NULL. */
-		if (b->w.cb != NULL)
-			dec_ap_pending(mdev);
-
-		if (b == mdev->newest_tle) {
-			/* recycle, but reinit! */
-			D_ASSERT(tmp == NULL);
-			INIT_LIST_HEAD(&b->requests);
-			INIT_LIST_HEAD(&b->w.list);
-			b->w.cb = NULL;
-			b->br_number = new_initial_bnr;
-			b->n_req = 0;
+			req = list_entry(le, struct drbd_request, tl_requests);
+			rv = _req_mod(req, what);
 
-			mdev->oldest_tle = b;
-			break;
+			if (rv & MR_WRITE)
+				n_writes++;
+			if (rv & MR_READ)
+				n_reads++;
 		}
-		kfree(b);
-		b = tmp;
-	}
-
-	/* we expect this list to be empty. */
-	D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
-
-	/* but just in case, clean it up anyways! */
-	list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
-		r = list_entry(le, struct drbd_request, tl_requests);
-		/* It would be nice to complete outside of spinlock.
-		 * But this is easier for now. */
-		_req_mod(r, connection_lost_while_pending);
-	}
-
-	/* ensure bit indicating barrier is required is clear */
-	clear_bit(CREATE_BARRIER, &mdev->flags);
-
-	spin_unlock_irq(&mdev->req_lock);
-}
-
-#if DRBD_DEBUG_STATE_CHANGES
-static void trace_st(struct drbd_conf *mdev, const unsigned long long seq,
-		const char *func, unsigned int line,
-		const char *name, union drbd_state s);
-#endif
-
-/**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
- * @mdev:	DRBD device.
- * @os:		old (current) state.
- * @ns:		new (wanted) state.
- */
-STATIC int cl_wide_st_chg(struct drbd_conf *mdev,
-			  union drbd_state os, union drbd_state ns)
-{
-	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
-		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
-		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
-		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
-		  (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
-		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
-		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
-}
-
-int drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
-		      union drbd_state mask, union drbd_state val)
-{
-#if DRBD_DEBUG_STATE_CHANGES
-	static unsigned long long sseq = 0xf0000000LLU;
-	unsigned long seq;
-	unsigned int line = val.line;
-	const char *func = val.func;
-#endif
-
-	unsigned long flags;
-	union drbd_state os, ns;
-	int rv;
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-#if DRBD_DEBUG_STATE_CHANGES
-	seq = ++sseq;
-	trace_st(mdev, seq, func, line, "!os", os);
-	trace_st(mdev, seq, func, line, "!ns", ns);
-	ns.func = NULL;
-#endif
-	rv = _drbd_set_state(mdev, ns, f, NULL);
-	ns = mdev->state;
-#if DRBD_DEBUG_STATE_CHANGES
-	trace_st(mdev, seq, func, line, "=ns", ns);
-#endif
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	return rv;
-}
-
-/**
- * drbd_force_state() - Impose a change which happens outside our control on our state
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- */
-void drbd_force_state(struct drbd_conf *mdev,
-	union drbd_state mask, union drbd_state val)
-{
-	drbd_change_state(mdev, CS_HARD, mask, val);
-}
-
-STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns);
-STATIC int is_valid_state_transition(struct drbd_conf *,
-				     union drbd_state, union drbd_state);
-STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, int *warn_sync_abort);
-int drbd_send_state_req(struct drbd_conf *,
-			union drbd_state, union drbd_state);
-
-STATIC enum drbd_state_ret_codes _req_st_cond(struct drbd_conf *mdev,
-				    union drbd_state mask, union drbd_state val)
-{
-	union drbd_state os, ns;
-	unsigned long flags;
-	int rv;
+		tmp = b->next;
 
-	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
-		return SS_CW_SUCCESS;
+		if (n_writes) {
+			if (what == RESEND) {
+				b->n_writes = n_writes;
+				if (b->w.cb == NULL) {
+					b->w.cb = w_send_barrier;
+					inc_ap_pending(b->w.mdev);
+					set_bit(CREATE_BARRIER, &tconn->flags);
+				}
 
-	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
-		return SS_CW_FAILED_BY_PEER;
+				drbd_queue_work(&tconn->data.work, &b->w);
+			}
+			pn = &b->next;
+		} else {
+			if (n_reads)
+				list_add(&carry_reads, &b->requests);
+			/* there could still be requests on that ring list,
+			 * in case local io is still pending */
+			list_del(&b->requests);
+
+			/* dec_ap_pending corresponding to queue_barrier.
+			 * the newest barrier may not have been queued yet,
+			 * in which case w.cb is still NULL. */
+			if (b->w.cb != NULL)
+				dec_ap_pending(b->w.mdev);
+
+			if (b == tconn->newest_tle) {
+				/* recycle, but reinit! */
+				if (tmp != NULL)
+					conn_err(tconn, "ASSERT FAILED tmp == NULL");
+				INIT_LIST_HEAD(&b->requests);
+				list_splice(&carry_reads, &b->requests);
+				INIT_LIST_HEAD(&b->w.list);
+				b->w.cb = NULL;
+				b->br_number = net_random();
+				b->n_writes = 0;
 
-	rv = 0;
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-	if (!cl_wide_st_chg(mdev, os, ns))
-		rv = SS_CW_NO_NEED;
-	if (!rv) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS) {
-			rv = is_valid_state_transition(mdev, ns, os);
-			if (rv == SS_SUCCESS)
-				rv = 0; /* cont waiting, otherwise fail. */
+				*pn = b;
+				break;
+			}
+			*pn = tmp;
+			kfree(b);
 		}
+		b = tmp;
+		list_splice(&carry_reads, &b->requests);
 	}
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
-	return rv;
-}
-
-/**
- * drbd_req_state() - Perform an eventually cluster wide state change
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Should not be called directly, use drbd_request_state() or
- * _drbd_request_state().
- */
-STATIC int drbd_req_state(struct drbd_conf *mdev,
-			  union drbd_state mask, union drbd_state val,
-			  enum chg_state_flags f)
-{
-#if DRBD_DEBUG_STATE_CHANGES
-	static unsigned long long sseq = 0;
-	unsigned long seq;
-	unsigned int line = val.line;
-	const char *func = val.func;
-#endif
-
-	struct completion done;
-	unsigned long flags;
-	union drbd_state os, ns;
-	int rv;
-
-	init_completion(&done);
-
-	if (f & CS_SERIALIZE)
-		mutex_lock(&mdev->state_mutex);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	os = mdev->state;
-	ns.i = (os.i & ~mask.i) | val.i;
-	ns = sanitize_state(mdev, os, ns, NULL);
-
-#if DRBD_DEBUG_STATE_CHANGES
-	seq = ++sseq;
-	trace_st(mdev, seq, func, line, "?os", os);
-	trace_st(mdev, seq, func, line, "?ns", ns);
-	ns.func = NULL;
-#endif
-
-	if (cl_wide_st_chg(mdev, os, ns)) {
-		rv = is_valid_state(mdev, ns);
-		if (rv == SS_SUCCESS)
-			rv = is_valid_state_transition(mdev, ns, os);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-		if (rv < SS_SUCCESS) {
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-
-		drbd_state_lock(mdev);
-		if (!drbd_send_state_req(mdev, mask, val)) {
-			drbd_state_unlock(mdev);
-			rv = SS_CW_FAILED_BY_PEER;
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
+	/* Actions operating on the disk state, also want to work on
+	   requests that got barrier acked. */
+	switch (what) {
+	case FAIL_FROZEN_DISK_IO:
+	case RESTART_FROZEN_DISK_IO:
+		list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			_req_mod(req, what);
 		}
-
-		wait_event(mdev->state_wait,
-			(rv = _req_st_cond(mdev, mask, val)));
-
-		if (rv < SS_SUCCESS) {
-			drbd_state_unlock(mdev);
-			if (f & CS_VERBOSE)
-				print_st_err(mdev, os, ns, rv);
-			goto abort;
-		}
-		spin_lock_irqsave(&mdev->req_lock, flags);
-		os = mdev->state;
-		ns.i = (os.i & ~mask.i) | val.i;
-		rv = _drbd_set_state(mdev, ns, f, &done);
-		drbd_state_unlock(mdev);
-	} else {
-		rv = _drbd_set_state(mdev, ns, f, &done);
-	}
-
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
-		D_ASSERT(current != mdev->worker.task);
-		wait_for_completion(&done);
-	}
-
-abort:
-#if DRBD_DEBUG_STATE_CHANGES
-	trace_st(mdev, seq, func, line, ":os", os);
-	trace_st(mdev, seq, func, line, ":ns", ns);
-#endif
-
-	if (f & CS_SERIALIZE)
-		mutex_unlock(&mdev->state_mutex);
-
-	return rv;
-}
-
-/**
- * _drbd_request_state() - Request a state change (with flags)
- * @mdev:	DRBD device.
- * @mask:	mask of state bits to change.
- * @val:	value of new state bits.
- * @f:		flags
- *
- * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
- * flag, or when logging of failed state change requests is not desired.
- */
-int _drbd_request_state(struct drbd_conf *mdev,	union drbd_state mask,
-			union drbd_state val,	enum chg_state_flags f)
-{
-	int rv;
-
-	wait_event(mdev->state_wait,
-		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
-
-	return rv;
-}
-
-#if DRBD_DEBUG_STATE_CHANGES
-static void trace_st(struct drbd_conf *mdev, const unsigned long long seq,
-		const char *func, unsigned int line,
-		const char *name, union drbd_state s)
-{
-
-	const struct task_struct *c = current;
-	const char *context =
-		c == mdev->worker.task ? "worker" :
-		c == mdev->receiver.task ? "receiver" :
-		c == mdev->asender.task ? "asender" : "other";
-
-	dev_info(DEV, " %8llx [%s] %s:%u %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
-	    seq, context, func, line,
-	    name,
-	    drbd_conn_str(s.conn),
-	    drbd_role_str(s.role),
-	    drbd_role_str(s.peer),
-	    drbd_disk_str(s.disk),
-	    drbd_disk_str(s.pdsk),
-	    s.susp ? 's' : 'r',
-	    s.aftr_isp ? 'a' : '-',
-	    s.peer_isp ? 'p' : '-',
-	    s.user_isp ? 'u' : '-'
-	    );
-}
-#else
-#define trace_st(...) do { } while (0)
-#endif
-
-STATIC void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
-{
-	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
-	    name,
-	    drbd_conn_str(ns.conn),
-	    drbd_role_str(ns.role),
-	    drbd_role_str(ns.peer),
-	    drbd_disk_str(ns.disk),
-	    drbd_disk_str(ns.pdsk),
-	    ns.susp ? 's' : 'r',
-	    ns.aftr_isp ? 'a' : '-',
-	    ns.peer_isp ? 'p' : '-',
-	    ns.user_isp ? 'u' : '-'
-	    );
-}
-
-void print_st_err(struct drbd_conf *mdev,
-	union drbd_state os, union drbd_state ns, int err)
-{
-	if (err == SS_IN_TRANSIENT_STATE)
-		return;
-	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
-	print_st(mdev, " state", os);
-	print_st(mdev, "wanted", ns);
-}
-
-
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define drbd_susp_str(A)     ((A) ? "1" : "0")
-#define drbd_aftr_isp_str(A) ((A) ? "1" : "0")
-#define drbd_peer_isp_str(A) ((A) ? "1" : "0")
-#define drbd_user_isp_str(A) ((A) ? "1" : "0")
-
-#define PSC(A) \
-	({ if (ns.A != os.A) { \
-		pbp += sprintf(pbp, #A "( %s -> %s ) ", \
-			      drbd_##A##_str(os.A), \
-			      drbd_##A##_str(ns.A)); \
-	} })
-
-/**
- * is_valid_state() - Returns an SS_ error code if ns is not valid
- * @mdev:	DRBD device.
- * @ns:		State to consider.
- */
-STATIC int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
-{
-	/* See drbd_state_sw_errors in drbd_strings.c */
-
-	enum drbd_fencing_p fp;
-	int rv = SS_SUCCESS;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
+	case CONNECTION_LOST_WHILE_PENDING:
+	case RESEND:
+		break;
+	default:
+		conn_err(tconn, "what = %d in _tl_restart()\n", what);
 	}
-
-	if (get_net_conf(mdev)) {
-		if (!mdev->net_conf->two_primaries &&
-		    ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
-			rv = SS_TWO_PRIMARIES;
-		put_net_conf(mdev);
-	}
-
-	if (rv <= 0)
-		/* already found a reason to abort */;
-	else if (ns.role == R_SECONDARY && mdev->open_cnt)
-		rv = SS_DEVICE_IN_USE;
-
-	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (fp >= FP_RESOURCE &&
-		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
-		rv = SS_PRIMARY_NOP;
-
-	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
-		rv = SS_NO_UP_TO_DATE_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
-		rv = SS_NO_LOCAL_DISK;
-
-	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
-		rv = SS_NO_REMOTE_DISK;
-
-	else if ((ns.conn == C_CONNECTED ||
-		  ns.conn == C_WF_BITMAP_S ||
-		  ns.conn == C_SYNC_SOURCE ||
-		  ns.conn == C_PAUSED_SYNC_S) &&
-		  ns.disk == D_OUTDATED)
-		rv = SS_CONNECTED_OUTDATES;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		 (mdev->sync_conf.verify_alg[0] == 0))
-		rv = SS_NO_VERIFY_ALG;
-
-	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-		  mdev->agreed_pro_version < 88)
-		rv = SS_NOT_SUPPORTED;
-
-	return rv;
 }
 
-/**
- * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @os:		old state.
- */
-STATIC int is_valid_state_transition(struct drbd_conf *mdev,
-				     union drbd_state ns, union drbd_state os)
-{
-	int rv = SS_SUCCESS;
-
-	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
-	    os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
-		rv = SS_ALREADY_STANDALONE;
-
-	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
-		rv = SS_IS_DISKLESS;
-
-	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
-		rv = SS_NO_NET_CONFIG;
-
-	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
-		rv = SS_LOWER_THAN_OUTDATED;
-
-	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
-		rv = SS_IN_TRANSIENT_STATE;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
-	    ns.conn != os.conn && os.conn > C_CONNECTED)
-		rv = SS_RESYNC_RUNNING;
-
-	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
-	    os.conn < C_CONNECTED)
-		rv = SS_NEED_CONNECTION;
-
-	return rv;
-}
 
 /**
- * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
  * @mdev:	DRBD device.
- * @os:		old state.
- * @ns:		new state.
- * @warn_sync_abort:
  *
- * When we loose connection, we have to set the state of the peers disk (pdsk)
- * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
  */
-STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
-				       union drbd_state ns, int *warn_sync_abort)
+void tl_clear(struct drbd_tconn *tconn)
 {
-	enum drbd_fencing_p fp;
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	/* Disallow Network errors to configure a device's network part */
-	if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
-	    os.conn <= C_DISCONNECTING)
-		ns.conn = os.conn;
-
-	/* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow */
-	if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
-	    ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING)
-		ns.conn = os.conn;
-
-	/* After C_DISCONNECTING only C_STANDALONE may follow */
-	if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
-		ns.conn = os.conn;
-
-	if (ns.conn < C_CONNECTED) {
-		ns.peer_isp = 0;
-		ns.peer = R_UNKNOWN;
-		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
-			ns.pdsk = D_UNKNOWN;
-	}
-
-	/* Clear the aftr_isp when becoming unconfigured */
-	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
-		ns.aftr_isp = 0;
-
-	if (ns.conn <= C_DISCONNECTING && ns.disk == D_DISKLESS)
-		ns.pdsk = D_UNKNOWN;
-
-	/* Abort resync if a disk fails/detaches */
-	if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
-	    (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
-		if (warn_sync_abort)
-			*warn_sync_abort = 1;
-		ns.conn = C_CONNECTED;
-	}
-
-	if (ns.conn >= C_CONNECTED &&
-	    ((ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED) ||
-	     (ns.disk == D_NEGOTIATING && ns.conn == C_WF_BITMAP_T))) {
-		switch (ns.conn) {
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-			ns.disk = D_OUTDATED;
-			break;
-		case C_CONNECTED:
-		case C_WF_BITMAP_S:
-		case C_SYNC_SOURCE:
-		case C_PAUSED_SYNC_S:
-			ns.disk = D_UP_TO_DATE;
-			break;
-		case C_SYNC_TARGET:
-			ns.disk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set disk state Inconsistent!\n");
-			break;
-		}
-		if (os.disk == D_OUTDATED && ns.disk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set disk from Outdated to UpToDate\n");
-	}
-
-	if (ns.conn >= C_CONNECTED &&
-	    (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)) {
-		switch (ns.conn) {
-		case C_CONNECTED:
-		case C_WF_BITMAP_T:
-		case C_PAUSED_SYNC_T:
-		case C_SYNC_TARGET:
-			ns.pdsk = D_UP_TO_DATE;
-			break;
-		case C_WF_BITMAP_S:
-		case C_PAUSED_SYNC_S:
-			ns.pdsk = D_OUTDATED;
-			break;
-		case C_SYNC_SOURCE:
-			ns.pdsk = D_INCONSISTENT;
-			dev_warn(DEV, "Implicitly set pdsk Inconsistent!\n");
-			break;
-		}
-		if (os.pdsk == D_OUTDATED && ns.pdsk == D_UP_TO_DATE)
-			dev_warn(DEV, "Implicitly set pdsk from Outdated to UpToDate\n");
-	}
-
-	/* Connection breaks down before we finished "Negotiating" */
-	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
-	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
-		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
-			ns.disk = mdev->new_state_tmp.disk;
-			ns.pdsk = mdev->new_state_tmp.pdsk;
-		} else {
-			dev_alert(DEV, "Connection lost while negotiating, no data!\n");
-			ns.disk = D_DISKLESS;
-			ns.pdsk = D_UNKNOWN;
-		}
-		put_ldev(mdev);
-	}
-
-	if (fp == FP_STONITH &&
-	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
-	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
-		ns.susp = 1;
-
-	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
-		if (ns.conn == C_SYNC_SOURCE)
-			ns.conn = C_PAUSED_SYNC_S;
-		if (ns.conn == C_SYNC_TARGET)
-			ns.conn = C_PAUSED_SYNC_T;
-	} else {
-		if (ns.conn == C_PAUSED_SYNC_S)
-			ns.conn = C_SYNC_SOURCE;
-		if (ns.conn == C_PAUSED_SYNC_T)
-			ns.conn = C_SYNC_TARGET;
-	}
-
-	return ns;
-}
-
-/* helper for __drbd_set_state */
-static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
-{
-	if (cs == C_VERIFY_T) {
-		/* starting online verify from an arbitrary position
-		 * does not fit well into the existing protocol.
-		 * on C_VERIFY_T, we initialize ov_left and friends
-		 * implicitly in receive_DataRequest once the
-		 * first P_OV_REQUEST is received */
-		mdev->ov_start_sector = ~(sector_t)0;
-	} else {
-		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
-		if (bit >= mdev->rs_total)
-			mdev->ov_start_sector =
-				BM_BIT_TO_SECT(mdev->rs_total - 1);
-		mdev->ov_position = mdev->ov_start_sector;
-	}
-}
-
-/**
- * __drbd_set_state() - Set a new DRBD state
- * @mdev:	DRBD device.
- * @ns:		new state.
- * @flags:	Flags
- * @done:	Optional completion, that will get completed after the after_state_ch() finished
- *
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
- */
-int __drbd_set_state(struct drbd_conf *mdev,
-		    union drbd_state ns, enum chg_state_flags flags,
-		    struct completion *done)
-{
-#if DRBD_DEBUG_STATE_CHANGES
-	static unsigned long long sseq = 0xff000000LLU;
-	unsigned long long seq = 0;
-#endif
-	union drbd_state os;
-	int rv = SS_SUCCESS;
-	int warn_sync_abort = 0;
-	struct after_state_chg_work *ascw;
-
-
-	os = mdev->state;
-
-#if DRBD_DEBUG_STATE_CHANGES
-	if (ns.func) {
-		seq = ++sseq;
-		trace_st(mdev, seq, ns.func, ns.line, "==os", os);
-		trace_st(mdev, seq, ns.func, ns.line, "==ns", ns);
-	}
-#endif
-
-	ns = sanitize_state(mdev, os, ns, &warn_sync_abort);
-
-#if DRBD_DEBUG_STATE_CHANGES
-	if (ns.func)
-		trace_st(mdev, seq, ns.func, ns.line, "==ns", ns);
-#endif
-
-	if (ns.i == os.i)
-		return SS_NOTHING_TO_DO;
-
-	if (!(flags & CS_HARD)) {
-		/*  pre-state-change checks ; only look at ns  */
-		/* See drbd_state_sw_errors in drbd_strings.c */
-
-		rv = is_valid_state(mdev, ns);
-		if (rv < SS_SUCCESS) {
-			/* If the old state was illegal as well, then let
-			   this happen...*/
-
-			if (is_valid_state(mdev, os) == rv) {
-				dev_err(DEV, "Considering state change from bad state. "
-				    "Error would be: '%s'\n",
-				    drbd_set_st_err_str(rv));
-				print_st(mdev, "old", os);
-				print_st(mdev, "new", ns);
-				rv = is_valid_state_transition(mdev, ns, os);
-			}
-		} else
-			rv = is_valid_state_transition(mdev, ns, os);
-	}
-
-	if (rv < SS_SUCCESS) {
-		if (flags & CS_VERBOSE)
-			print_st_err(mdev, os, ns, rv);
-		return rv;
-	}
-
-	if (warn_sync_abort)
-		dev_warn(DEV, "Resync aborted.\n");
-
-#if DUMP_MD >= 2
-	{
-	char *pbp, pb[300];
-	pbp = pb;
-	*pbp = 0;
-	PSC(role);
-	PSC(peer);
-	PSC(conn);
-	PSC(disk);
-	PSC(pdsk);
-	PSC(susp);
-	PSC(aftr_isp);
-	PSC(peer_isp);
-	PSC(user_isp);
-	dev_info(DEV, "%s\n", pb);
-	}
-#endif
-
-#if DRBD_DEBUG_STATE_CHANGES
-	if (ns.func)
-		trace_st(mdev, seq, ns.func, ns.line, ":=ns", ns);
-#endif
-
-	/* solve the race between becoming unconfigured,
-	 * worker doing the cleanup, and
-	 * admin reconfiguring us:
-	 * on (re)configure, first set CONFIG_PENDING,
-	 * then wait for a potentially exiting worker,
-	 * start the worker, and schedule one no_op.
-	 * then proceed with configuration.
-	 */
-	if (ns.disk == D_DISKLESS &&
-	    ns.conn == C_STANDALONE &&
-	    ns.role == R_SECONDARY &&
-	    !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
-		set_bit(DEVICE_DYING, &mdev->flags);
-
-	mdev->state.i = ns.i;
-	wake_up(&mdev->misc_wait);
-	wake_up(&mdev->state_wait);
-
-	/*   post-state-change actions   */
-	if (os.conn >= C_SYNC_SOURCE   && ns.conn <= C_CONNECTED) {
-		set_bit(STOP_SYNC_TIMER, &mdev->flags);
-		mod_timer(&mdev->resync_timer, jiffies);
-	}
-
-	/* aborted verify run. log the last position */
-	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
-	    ns.conn < C_CONNECTED) {
-		mdev->ov_start_sector =
-			BM_BIT_TO_SECT(mdev->rs_total - mdev->ov_left);
-		dev_info(DEV, "Online Verify reached sector %llu\n",
-			(unsigned long long)mdev->ov_start_sector);
-	}
-
-	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
-	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
-		dev_info(DEV, "Syncer continues.\n");
-		mdev->rs_paused += (long)jiffies-(long)mdev->rs_mark_time;
-		if (ns.conn == C_SYNC_TARGET) {
-			if (!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))
-				mod_timer(&mdev->resync_timer, jiffies);
-			/* This if (!test_bit) is only needed for the case
-			   that a device that has ceased to used its timer,
-			   i.e. it is already in drbd_resync_finished() gets
-			   paused and resumed. */
-		}
-	}
+	struct list_head *le, *tle;
+	struct drbd_request *r;
 
-	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
-	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
-		dev_info(DEV, "Resync suspended\n");
-		mdev->rs_mark_time = jiffies;
-		if (ns.conn == C_PAUSED_SYNC_T)
-			set_bit(STOP_SYNC_TIMER, &mdev->flags);
-	}
+	spin_lock_irq(&tconn->req_lock);
 
-	if (os.conn == C_CONNECTED &&
-	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
-		mdev->ov_position = 0;
-		mdev->rs_total =
-		mdev->rs_mark_left = drbd_bm_bits(mdev);
-		if (mdev->agreed_pro_version >= 90)
-			set_ov_position(mdev, ns.conn);
-		else
-			mdev->ov_start_sector = 0;
-		mdev->ov_left = mdev->rs_total
-			      - BM_SECT_TO_BIT(mdev->ov_position);
-		mdev->rs_start     =
-		mdev->rs_mark_time = jiffies;
-		mdev->ov_last_oos_size = 0;
-		mdev->ov_last_oos_start = 0;
-
-		if (ns.conn == C_VERIFY_S) {
-			dev_info(DEV, "Starting Online Verify from sector %llu\n",
-					(unsigned long long)mdev->ov_position);
-			mod_timer(&mdev->resync_timer, jiffies);
-		}
-	}
+	_tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
 
-	if (get_ldev(mdev)) {
-		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
-						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
-						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
-
-		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
-			mdf |= MDF_CRASHED_PRIMARY;
-		if (mdev->state.role == R_PRIMARY ||
-		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
-			mdf |= MDF_PRIMARY_IND;
-		if (mdev->state.conn > C_WF_REPORT_PARAMS)
-			mdf |= MDF_CONNECTED_IND;
-		if (mdev->state.disk > D_INCONSISTENT)
-			mdf |= MDF_CONSISTENT;
-		if (mdev->state.disk > D_OUTDATED)
-			mdf |= MDF_WAS_UP_TO_DATE;
-		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
-			mdf |= MDF_PEER_OUT_DATED;
-		if (mdf != mdev->ldev->md.flags) {
-			mdev->ldev->md.flags = mdf;
-			drbd_md_mark_dirty(mdev);
-		}
-		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
-			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
-		put_ldev(mdev);
-	}
+	/* we expect this list to be empty. */
+	if (!list_empty(&tconn->out_of_sequence_requests))
+		conn_err(tconn, "ASSERT FAILED list_empty(&out_of_sequence_requests)\n");
 
-	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
-	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
-	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
-		set_bit(CONSIDER_RESYNC, &mdev->flags);
-
-	/* Receiver should clean up itself */
-	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
-		drbd_thread_stop_nowait(&mdev->receiver);
-
-	/* Now the receiver finished cleaning up itself, it should die */
-	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
-		drbd_thread_stop_nowait(&mdev->receiver);
-
-	/* Upon network failure, we need to restart the receiver. */
-	if (os.conn > C_TEAR_DOWN &&
-	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
-		drbd_thread_restart_nowait(&mdev->receiver);
-
-	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
-	if (ascw) {
-		ascw->os = os;
-		ascw->ns = ns;
-		ascw->flags = flags;
-		ascw->w.cb = w_after_state_ch;
-		ascw->done = done;
-		drbd_queue_work(&mdev->data.work, &ascw->w);
-	} else {
-		dev_warn(DEV, "Could not kmalloc an ascw\n");
+	/* but just in case, clean it up anyways! */
+	list_for_each_safe(le, tle, &tconn->out_of_sequence_requests) {
+		r = list_entry(le, struct drbd_request, tl_requests);
+		/* It would be nice to complete outside of spinlock.
+		 * But this is easier for now. */
+		_req_mod(r, CONNECTION_LOST_WHILE_PENDING);
 	}
 
-	return rv;
-}
-
-STATIC int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
-{
-	struct after_state_chg_work *ascw =
-		container_of(w, struct after_state_chg_work, w);
-	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
-	if (ascw->flags & CS_WAIT_COMPLETE) {
-		D_ASSERT(ascw->done != NULL);
-		complete(ascw->done);
-	}
-	kfree(ascw);
+	/* ensure bit indicating barrier is required is clear */
+	clear_bit(CREATE_BARRIER, &tconn->flags);
 
-	return 1;
+	spin_unlock_irq(&tconn->req_lock);
 }
 
-static void abw_start_sync(struct drbd_conf *mdev, int rv)
+void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what)
 {
-	if (rv) {
-		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
-		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
-		return;
-	}
-
-	switch (mdev->state.conn) {
-	case C_STARTING_SYNC_T:
-		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
-		break;
-	case C_STARTING_SYNC_S:
-		drbd_start_resync(mdev, C_SYNC_SOURCE);
-		break;
-	}
+	spin_lock_irq(&tconn->req_lock);
+	_tl_restart(tconn, what);
+	spin_unlock_irq(&tconn->req_lock);
 }
 
 /**
- * after_state_ch() - Perform after state change actions that may sleep
+ * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
  * @mdev:	DRBD device.
- * @os:		old state.
- * @ns:		new state.
- * @flags:	Flags
  */
-STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
-			   union drbd_state ns, enum chg_state_flags flags)
+void tl_abort_disk_io(struct drbd_conf *mdev)
 {
-	enum drbd_fencing_p fp;
-
-	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
-		clear_bit(CRASHED_PRIMARY, &mdev->flags);
-		if (mdev->p_uuid)
-			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
-	}
-
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	/* Inform userspace about the change... */
-	drbd_bcast_state(mdev, ns);
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_tl_epoch *b;
+	struct list_head *le, *tle;
+	struct drbd_request *req;
 
-	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
-	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
-		drbd_khelper(mdev, "pri-on-incon-degr");
-
-	/* Here we have the actions that are performed after a
-	   state change. This function might sleep */
-
-	if (fp == FP_STONITH && ns.susp) {
-		/* case1: The outdate peer handler is successful:
-		 * case2: The connection was established again: */
-		if ((os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) ||
-		    (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)) {
-			tl_clear(mdev);
-			spin_lock_irq(&mdev->req_lock);
-			_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
-			spin_unlock_irq(&mdev->req_lock);
-		}
-	}
-	/* Do not change the order of the if above and the two below... */
-	if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
-		drbd_send_uuids(mdev);
-		drbd_send_state(mdev);
-	}
-	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
-		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
-
-	/* Lost contact to peer's copy of the data */
-	if ((os.pdsk >= D_INCONSISTENT &&
-	     os.pdsk != D_UNKNOWN &&
-	     os.pdsk != D_OUTDATED)
-	&&  (ns.pdsk < D_INCONSISTENT ||
-	     ns.pdsk == D_UNKNOWN ||
-	     ns.pdsk == D_OUTDATED)) {
-		kfree(mdev->p_uuid);
-		mdev->p_uuid = NULL;
-		if (get_ldev(mdev)) {
-			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
-			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-				drbd_uuid_new_current(mdev);
-				drbd_send_uuids(mdev);
-			}
-			put_ldev(mdev);
+	spin_lock_irq(&tconn->req_lock);
+	b = tconn->oldest_tle;
+	while (b) {
+		list_for_each_safe(le, tle, &b->requests) {
+			req = list_entry(le, struct drbd_request, tl_requests);
+			if (!(req->rq_state & RQ_LOCAL_PENDING))
+				continue;
+			if (req->w.mdev == mdev)
+				_req_mod(req, ABORT_DISK_IO);
 		}
+		b = b->next;
 	}
 
-	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
-		if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
-			drbd_uuid_new_current(mdev);
-
-		/* D_DISKLESS Peer becomes secondary */
-		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-			drbd_al_to_on_disk_bm(mdev);
-		put_ldev(mdev);
-	}
-
-	/* Last part of the attaching process ... */
-	if (ns.conn >= C_CONNECTED &&
-	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
-		kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
-		mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
-		drbd_send_sizes(mdev, 0);  /* to start sync... */
-		drbd_send_uuids(mdev);
-		drbd_send_state(mdev);
-	}
-
-	/* We want to pause/continue resync, tell peer. */
-	if (ns.conn >= C_CONNECTED &&
-	     ((os.aftr_isp != ns.aftr_isp) ||
-	      (os.user_isp != ns.user_isp)))
-		drbd_send_state(mdev);
-
-	/* In case one of the isp bits got set, suspend other devices. */
-	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
-	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
-		suspend_other_sg(mdev);
-
-	/* Make sure the peer gets informed about eventual state
-	   changes (ISP bits) while we were in WFReportParams. */
-	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
-		drbd_send_state(mdev);
-
-	/* We are in the progress to start a full sync... */
-	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
-	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
-
-	/* We are invalidating our self... */
-	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
-	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
-		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
-
-	if (os.disk > D_FAILED && ns.disk == D_FAILED) {
-		enum drbd_io_error_p eh;
-
-		eh = EP_PASS_ON;
-		if (get_ldev_if_state(mdev, D_FAILED)) {
-			eh = mdev->ldev->dc.on_io_error;
-			put_ldev(mdev);
-		}
-
-		drbd_rs_cancel_all(mdev);
-		/* since get_ldev() only works as long as disk>=D_INCONSISTENT,
-		   and it is D_DISKLESS here, local_cnt can only go down, it can
-		   not increase... It will reach zero */
-		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
-		mdev->rs_total = 0;
-		mdev->rs_failed = 0;
-		atomic_set(&mdev->rs_pending_cnt, 0);
-
-		spin_lock_irq(&mdev->req_lock);
-		_drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
-		spin_unlock_irq(&mdev->req_lock);
-
-		if (eh == EP_CALL_HELPER)
-			drbd_khelper(mdev, "local-io-error");
-	}
-
-	if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
-
-		if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ {
-			if (drbd_send_state(mdev))
-				dev_warn(DEV, "Notified peer that my disk is broken.\n");
-			else
-				dev_err(DEV, "Sending state in drbd_io_error() failed\n");
-		}
-
-		wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
-		lc_destroy(mdev->resync);
-		mdev->resync = NULL;
-		lc_destroy(mdev->act_log);
-		mdev->act_log = NULL;
-		__no_warn(local,
-			drbd_free_bc(mdev->ldev);
-			mdev->ldev = NULL;);
-
-		if (mdev->md_io_tmpp)
-			__free_page(mdev->md_io_tmpp);
-	}
-
-	/* Disks got bigger while they were detached */
-	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
-	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
-		if (ns.conn == C_CONNECTED)
-			resync_after_online_grow(mdev);
-	}
-
-	/* A resync finished or aborted, wake paused devices... */
-	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
-	    (os.peer_isp && !ns.peer_isp) ||
-	    (os.user_isp && !ns.user_isp))
-		resume_next_sg(mdev);
-
-	/* Upon network connection, we need to start the receiver */
-	if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
-		drbd_thread_start(&mdev->receiver);
-
-	/* Terminate worker thread if we are unconfigured - it will be
-	   restarted as needed... */
-	if (ns.disk == D_DISKLESS &&
-	    ns.conn == C_STANDALONE &&
-	    ns.role == R_SECONDARY) {
-		if (os.aftr_isp != ns.aftr_isp)
-			resume_next_sg(mdev);
-		/* set in __drbd_set_state, unless CONFIG_PENDING was set */
-		if (test_bit(DEVICE_DYING, &mdev->flags))
-			drbd_thread_stop_nowait(&mdev->worker);
+	list_for_each_safe(le, tle, &tconn->barrier_acked_requests) {
+		req = list_entry(le, struct drbd_request, tl_requests);
+		if (!(req->rq_state & RQ_LOCAL_PENDING))
+			continue;
+		if (req->w.mdev == mdev)
+			_req_mod(req, ABORT_DISK_IO);
 	}
 
-	drbd_md_sync(mdev);
+	spin_unlock_irq(&tconn->req_lock);
 }
 
-
 STATIC int drbd_thread_setup(void *arg)
 {
 	struct drbd_thread *thi = (struct drbd_thread *) arg;
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	unsigned long flags;
-	long timeout;
 	int retval;
-	const char *me =
-		thi == &mdev->receiver ? "receiver" :
-		thi == &mdev->asender  ? "asender"  :
-		thi == &mdev->worker   ? "worker"   : "NONSENSE";
 
 	daemonize("drbd_thread");
-	D_ASSERT(get_t_state(thi) == Running);
-	D_ASSERT(thi->task == NULL);
 	/* state engine takes this lock (in drbd_thread_stop_nowait)
 	 * while holding the req_lock irqsave */
 	spin_lock_irqsave(&thi->t_lock, flags);
@@ -1485,139 +540,137 @@
 
 	__set_current_state(TASK_UNINTERRUPTIBLE);
 	complete(&thi->startstop); /* notify: thi->task is set. */
-	timeout = schedule_timeout(10*HZ);
-	D_ASSERT(timeout != 0);
+	schedule_timeout(10*HZ);
+	snprintf(current->comm, sizeof(current->comm), "drbd_%c_%s",
+			thi->name[0], thi->tconn->name);
 
 restart:
 	retval = thi->function(thi);
 
 	spin_lock_irqsave(&thi->t_lock, flags);
 
-	/* if the receiver has been "Exiting", the last thing it did
+	/* if the receiver has been "EXITING", the last thing it did
 	 * was set the conn state to "StandAlone",
 	 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
 	 * and receiver thread will be "started".
-	 * drbd_thread_start needs to set "Restarting" in that case.
+	 * drbd_thread_start needs to set "RESTARTING" in that case.
 	 * t_state check and assignment needs to be within the same spinlock,
-	 * so either thread_start sees Exiting, and can remap to Restarting,
-	 * or thread_start see None, and can proceed as normal.
+	 * so either thread_start sees EXITING, and can remap to RESTARTING,
+	 * or thread_start see NONE, and can proceed as normal.
 	 */
 
-	if (thi->t_state == Restarting) {
-		dev_info(DEV, "Restarting %s thread\n", me);
-		thi->t_state = Running;
+	if (thi->t_state == RESTARTING) {
+		conn_info(tconn, "Restarting %s thread\n", thi->name);
+		thi->t_state = RUNNING;
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		goto restart;
 	}
 
 	thi->task = NULL;
-	thi->t_state = None;
+	thi->t_state = NONE;
 	smp_mb();
 
 	/* THINK maybe two different completions? */
-	complete(&thi->startstop); /* notify: thi->task unset. */
-	dev_info(DEV, "Terminating %s thread\n", me);
+	complete_all(&thi->startstop); /* notify: thi->task unset. */
+	conn_info(tconn, "Terminating %s thread\n", thi->name);
 	spin_unlock_irqrestore(&thi->t_lock, flags);
 
 	/* Release mod reference taken when thread was started */
+
+	kref_put(&tconn->kref, &conn_destroy);
 	module_put(THIS_MODULE);
 	return retval;
 }
 
-STATIC void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
-		      int (*func) (struct drbd_thread *))
+STATIC void drbd_thread_init(struct drbd_tconn *tconn, struct drbd_thread *thi,
+			     int (*func) (struct drbd_thread *), char *name)
 {
 	spin_lock_init(&thi->t_lock);
 	thi->task    = NULL;
-	thi->t_state = None;
+	thi->t_state = NONE;
 	thi->function = func;
-	thi->mdev = mdev;
+	thi->tconn = tconn;
+	strncpy(thi->name, name, ARRAY_SIZE(thi->name));
 }
 
 int drbd_thread_start(struct drbd_thread *thi)
 {
-	int pid;
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	unsigned long flags;
-	const char *me =
-		thi == &mdev->receiver ? "receiver" :
-		thi == &mdev->asender  ? "asender"  :
-		thi == &mdev->worker   ? "worker"   : "NONSENSE";
+	int pid;
 
 	/* is used from state engine doing drbd_thread_stop_nowait,
 	 * while holding the req lock irqsave */
 	spin_lock_irqsave(&thi->t_lock, flags);
 
 	switch (thi->t_state) {
-	case None:
-		dev_info(DEV, "Starting %s thread (from %s [%d])\n",
-				me, current->comm, current->pid);
+	case NONE:
+		conn_info(tconn, "Starting %s thread (from %s [%d])\n",
+			 thi->name, current->comm, current->pid);
 
 		/* Get ref on module for thread - this is released when thread exits */
 		if (!try_module_get(THIS_MODULE)) {
-			dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
+			conn_err(tconn, "Failed to get module reference in drbd_thread_start\n");
 			spin_unlock_irqrestore(&thi->t_lock, flags);
-			return FALSE;
+			return false;
 		}
 
+		kref_get(&thi->tconn->kref);
+
 		init_completion(&thi->startstop);
-		D_ASSERT(thi->task == NULL);
 		thi->reset_cpu_mask = 1;
-		thi->t_state = Running;
+		thi->t_state = RUNNING;
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
 
 		pid = kernel_thread(drbd_thread_setup, (void *) thi, CLONE_FS);
 		if (pid < 0) {
-			dev_err(DEV, "Couldn't start thread (%d)\n", pid);
+			conn_err(tconn, "Couldn't start thread (%d)\n", pid);
 
+			kref_put(&tconn->kref, &conn_destroy);
 			module_put(THIS_MODULE);
-			return FALSE;
+			return false;
 		}
 		/* waits until thi->task is set */
 		wait_for_completion(&thi->startstop);
-		if (thi->t_state != Running)
-			dev_err(DEV, "ASSERT FAILED: %s t_state == %d expected %d.\n",
-					me, thi->t_state, Running);
+		if (thi->t_state != RUNNING)
+			conn_err(tconn, "ASSERT FAILED: %s t_state == %d expected %d.\n",
+					thi->name, thi->t_state, RUNNING);
 		if (thi->task)
 			wake_up_process(thi->task);
 		else
-			dev_err(DEV, "ASSERT FAILED thi->task is NULL where it should be set!?\n");
+			conn_err(tconn, "ASSERT FAILED thi->task is NULL where it should be set!?\n");
 		break;
-	case Exiting:
-		thi->t_state = Restarting;
-		dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
-				me, current->comm, current->pid);
+	case EXITING:
+		thi->t_state = RESTARTING;
+		conn_info(tconn, "Restarting %s thread (from %s [%d])\n",
+				thi->name, current->comm, current->pid);
 		/* fall through */
-	case Running:
-	case Restarting:
+	case RUNNING:
+	case RESTARTING:
 	default:
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		break;
 	}
 
-	return TRUE;
+	return true;
 }
 
 
 void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
 {
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	unsigned long flags;
-	enum drbd_thread_state ns = restart ? Restarting : Exiting;
-	const char *me =
-		thi == &mdev->receiver ? "receiver" :
-		thi == &mdev->asender  ? "asender"  :
-		thi == &mdev->worker   ? "worker"   : "NONSENSE";
+	enum drbd_thread_state ns = restart ? RESTARTING : EXITING;
 
 	/* may be called from state engine, holding the req lock irqsave */
 	spin_lock_irqsave(&thi->t_lock, flags);
 
-	/* dev_info(DEV, "drbd_thread_stop: %s [%d]: %s %d -> %d; %d\n",
+	/* conn_err(tconn, "drbd_thread_stop: %s [%d]: %s %d -> %d; %d\n",
 	     current->comm, current->pid,
 	     thi->task ? thi->task->comm : "NULL", thi->t_state, ns, wait); */
 
-	if (thi->t_state == None) {
+	if (thi->t_state == NONE) {
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 		if (restart)
 			drbd_thread_start(thi);
@@ -1635,23 +688,54 @@
 		init_completion(&thi->startstop);
 		if (thi->task != current)
 			force_sig(DRBD_SIGKILL, thi->task);
-		else
-			D_ASSERT(!wait);
+		else if (wait)
+			conn_err(tconn, "ASSERT FAILED: wait=%d\n", wait);
 	}
 	spin_unlock_irqrestore(&thi->t_lock, flags);
 
 	if (wait) {
-		D_ASSERT(thi->task != current);
+		if (thi->task == current) {
+			conn_err(tconn, "ASSERT FAILED: Trying to wait for current task!\n");
+			return;
+		}
 		wait_for_completion(&thi->startstop);
 		spin_lock_irqsave(&thi->t_lock, flags);
-		D_ASSERT(thi->task == NULL);
-		if (thi->t_state != None)
-			dev_err(DEV, "ASSERT FAILED: %s t_state == %d expected %d.\n",
-					me, thi->t_state, None);
+		if (thi->t_state != NONE)
+			conn_err(tconn, "ASSERT FAILED: %s t_state == %d expected %d.\n",
+				 thi->name, thi->t_state, NONE);
 		spin_unlock_irqrestore(&thi->t_lock, flags);
 	}
 }
 
+static struct drbd_thread *drbd_task_to_thread(struct drbd_tconn *tconn, struct task_struct *task)
+{
+	struct drbd_thread *thi =
+		task == tconn->receiver.task ? &tconn->receiver :
+		task == tconn->asender.task  ? &tconn->asender :
+		task == tconn->worker.task   ? &tconn->worker : NULL;
+
+	return thi;
+}
+
+char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *task)
+{
+	struct drbd_thread *thi = drbd_task_to_thread(tconn, task);
+	return thi ? thi->name : task->comm;
+}
+
+int conn_lowest_minor(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr = 0, m;
+
+	rcu_read_lock();
+	mdev = idr_get_next(&tconn->volumes, &vnr);
+	m = mdev ? mdev_to_minor(mdev) : -1;
+	rcu_read_unlock();
+
+	return m;
+}
+
 #ifdef CONFIG_SMP
 /**
  * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
@@ -1660,223 +744,343 @@
  * Forces all threads of a device onto the same CPU. This is beneficial for
  * DRBD's performance. May be overwritten by user's configuration.
  */
-void drbd_calc_cpu_mask(struct drbd_conf *mdev)
+void drbd_calc_cpu_mask(struct drbd_tconn *tconn)
 {
 	int ord, cpu;
 
 	/* user override. */
-	if (cpumask_weight(mdev->cpu_mask))
+	if (cpumask_weight(tconn->cpu_mask))
 		return;
 
-	ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
+	ord = conn_lowest_minor(tconn) % cpumask_weight(cpu_online_mask);
 	for_each_online_cpu(cpu) {
 		if (ord-- == 0) {
-			cpumask_set_cpu(cpu, mdev->cpu_mask);
+			cpumask_set_cpu(cpu, tconn->cpu_mask);
 			return;
 		}
 	}
 	/* should not be reached */
-	cpumask_setall(mdev->cpu_mask);
+	cpumask_setall(tconn->cpu_mask);
 }
 
 /**
  * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
  * @mdev:	DRBD device.
+ * @thi:	drbd_thread object
  *
  * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
  * prematurely.
  */
-void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
+void drbd_thread_current_set_cpu(struct drbd_thread *thi)
 {
 	struct task_struct *p = current;
-	struct drbd_thread *thi =
-		p == mdev->asender.task  ? &mdev->asender  :
-		p == mdev->receiver.task ? &mdev->receiver :
-		p == mdev->worker.task   ? &mdev->worker   :
-		NULL;
-	ERR_IF(thi == NULL)
-		return;
+
 	if (!thi->reset_cpu_mask)
 		return;
 	thi->reset_cpu_mask = 0;
-	set_cpus_allowed_ptr(p, mdev->cpu_mask);
+	set_cpus_allowed_ptr(p, thi->tconn->cpu_mask);
 }
 #endif
 
-/* the appropriate socket mutex must be held already */
-int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			  enum drbd_packets cmd, struct p_header *h,
-			  size_t size, unsigned msg_flags)
+/**
+ * drbd_header_size  -  size of a packet header
+ *
+ * The header size is a multiple of 8, so any payload following the header is
+ * word aligned on 64-bit architectures.  (The bitmap send and receive code
+ * relies on this.)
+ */
+unsigned int drbd_header_size(struct drbd_tconn *tconn)
 {
-	int sent, ok;
+	if (tconn->agreed_pro_version >= 100) {
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header100), 8));
+		return sizeof(struct p_header100);
+	} else {
+		BUILD_BUG_ON(sizeof(struct p_header80) !=
+			     sizeof(struct p_header95));
+		BUILD_BUG_ON(!IS_ALIGNED(sizeof(struct p_header80), 8));
+		return sizeof(struct p_header80);
+	}
+}
 
-	ERR_IF(!h) return FALSE;
-	ERR_IF(!size) return FALSE;
+static unsigned int prepare_header80(struct p_header80 *h, enum drbd_packet cmd, int size)
+{
+	h->magic   = cpu_to_be32(DRBD_MAGIC);
+	h->command = cpu_to_be16(cmd);
+	h->length  = cpu_to_be16(size);
+	return sizeof(struct p_header80);
+}
 
-	h->magic   = BE_DRBD_MAGIC;
+static unsigned int prepare_header95(struct p_header95 *h, enum drbd_packet cmd, int size)
+{
+	h->magic   = cpu_to_be16(DRBD_MAGIC_BIG);
 	h->command = cpu_to_be16(cmd);
-	h->length  = cpu_to_be16(size-sizeof(struct p_header));
+	h->length = cpu_to_be32(size);
+	return sizeof(struct p_header95);
+}
 
-	trace_drbd_packet(mdev, sock, 0, (void *)h, __FILE__, __LINE__);
-	sent = drbd_send(mdev, sock, h, size, msg_flags);
+static unsigned int prepare_header100(struct p_header100 *h, enum drbd_packet cmd,
+				      int size, int vnr)
+{
+	h->magic = cpu_to_be32(DRBD_MAGIC_100);
+	h->volume = cpu_to_be16(vnr);
+	h->command = cpu_to_be16(cmd);
+	h->length = cpu_to_be32(size);
+	h->pad = 0;
+	return sizeof(struct p_header100);
+}
 
-	ok = (sent == size);
-	if (!ok)
-		dev_err(DEV, "short sent %s size=%d sent=%d\n",
-		    cmdname(cmd), (int)size, sent);
-	return ok;
+static unsigned int prepare_header(struct drbd_tconn *tconn, int vnr,
+				   void *buffer, enum drbd_packet cmd, int size)
+{
+	if (tconn->agreed_pro_version >= 100)
+		return prepare_header100(buffer, cmd, size, vnr);
+	else if (tconn->agreed_pro_version >= 95 &&
+		 size > DRBD_MAX_SIZE_H80_PACKET)
+		return prepare_header95(buffer, cmd, size);
+	else
+		return prepare_header80(buffer, cmd, size);
 }
 
-/* don't pass the socket. we may only look at it
- * when we hold the appropriate socket mutex.
- */
-int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-		  enum drbd_packets cmd, struct p_header *h, size_t size)
+static void *__conn_prepare_command(struct drbd_tconn *tconn,
+				    struct drbd_socket *sock)
 {
-	int ok = 0;
-	struct socket *sock;
+	if (!sock->socket)
+		return NULL;
+	return sock->sbuf + drbd_header_size(tconn);
+}
 
-	if (use_data_socket) {
-		mutex_lock(&mdev->data.mutex);
-		sock = mdev->data.socket;
-	} else {
-		mutex_lock(&mdev->meta.mutex);
-		sock = mdev->meta.socket;
-	}
+void *conn_prepare_command(struct drbd_tconn *tconn, struct drbd_socket *sock)
+{
+	void *p;
 
-	/* drbd_disconnect() could have called drbd_free_sock()
-	 * while we were waiting in down()... */
-	if (likely(sock != NULL))
-		ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
+	mutex_lock(&sock->mutex);
+	p = __conn_prepare_command(tconn, sock);
+	if (!p)
+		mutex_unlock(&sock->mutex);
 
-	if (use_data_socket)
-		mutex_unlock(&mdev->data.mutex);
-	else
-		mutex_unlock(&mdev->meta.mutex);
-	return ok;
+	return p;
 }
 
-int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
-		   size_t size)
+void *drbd_prepare_command(struct drbd_conf *mdev, struct drbd_socket *sock)
 {
-	struct p_header h;
-	int ok;
+	return conn_prepare_command(mdev->tconn, sock);
+}
 
-	h.magic   = BE_DRBD_MAGIC;
-	h.command = cpu_to_be16(cmd);
-	h.length  = cpu_to_be16(size);
+static int __send_command(struct drbd_tconn *tconn, int vnr,
+			  struct drbd_socket *sock, enum drbd_packet cmd,
+			  unsigned int header_size, void *data,
+			  unsigned int size)
+{
+	int msg_flags;
+	int err;
 
-	if (!drbd_get_data_sock(mdev))
-		return 0;
+	/*
+	 * Called with @data == NULL and the size of the data blocks in @size
+	 * for commands that send data blocks.  For those commands, omit the
+	 * MSG_MORE flag: this will increase the likelihood that data blocks
+	 * which are page aligned on the sender will end up page aligned on the
+	 * receiver.
+	 */
+	msg_flags = data ? MSG_MORE : 0;
 
-	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&h, __FILE__, __LINE__);
+	header_size += prepare_header(tconn, vnr, sock->sbuf, cmd,
+				      header_size + size);
+	err = drbd_send_all(tconn, sock->socket, sock->sbuf, header_size,
+			    msg_flags);
+	if (data && !err)
+		err = drbd_send_all(tconn, sock->socket, data, size, 0);
+	return err;
+}
 
-	ok = (sizeof(h) ==
-		drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
-	ok = ok && (size ==
-		drbd_send(mdev, mdev->data.socket, data, size, 0));
+static int __conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
+			       enum drbd_packet cmd, unsigned int header_size,
+			       void *data, unsigned int size)
+{
+	return __send_command(tconn, 0, sock, cmd, header_size, data, size);
+}
 
-	drbd_put_data_sock(mdev);
+int conn_send_command(struct drbd_tconn *tconn, struct drbd_socket *sock,
+		      enum drbd_packet cmd, unsigned int header_size,
+		      void *data, unsigned int size)
+{
+	int err;
 
-	return ok;
+	err = __conn_send_command(tconn, sock, cmd, header_size, data, size);
+	mutex_unlock(&sock->mutex);
+	return err;
 }
 
-int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
+int drbd_send_command(struct drbd_conf *mdev, struct drbd_socket *sock,
+		      enum drbd_packet cmd, unsigned int header_size,
+		      void *data, unsigned int size)
 {
-	struct p_rs_param_89 *p;
-	struct socket *sock;
-	int size, rv;
-	const int apv = mdev->agreed_pro_version;
+	int err;
 
-	size = apv <= 87 ? sizeof(struct p_rs_param)
-		: apv == 88 ? sizeof(struct p_rs_param)
-			+ strlen(mdev->sync_conf.verify_alg) + 1
-		: /* 89 */    sizeof(struct p_rs_param_89);
+	err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, header_size,
+			     data, size);
+	mutex_unlock(&sock->mutex);
+	return err;
+}
+
+int drbd_send_ping(struct drbd_tconn *tconn)
+{
+	struct drbd_socket *sock;
+
+	sock = &tconn->meta;
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, P_PING, 0, NULL, 0);
+}
+
+int drbd_send_ping_ack(struct drbd_tconn *tconn)
+{
+	struct drbd_socket *sock;
 
-	/* used from admin command context and receiver/worker context.
-	 * to avoid kmalloc, grab the socket right here,
-	 * then use the pre-allocated sbuf there */
-	mutex_lock(&mdev->data.mutex);
-	sock = mdev->data.socket;
+	sock = &tconn->meta;
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, P_PING_ACK, 0, NULL, 0);
+}
 
-	if (likely(sock != NULL)) {
-		enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
+int drbd_send_sync_param(struct drbd_conf *mdev)
+{
+	struct drbd_socket *sock;
+	struct p_rs_param_95 *p;
+	int size;
+	const int apv = mdev->tconn->agreed_pro_version;
+	enum drbd_packet cmd;
+	struct net_conf *nc;
+	struct disk_conf *dc;
 
-		p = &mdev->data.sbuf.rs_param_89;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
 
-		/* initialize verify_alg and csums_alg */
-		memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
 
-		p->rate = cpu_to_be32(sc->rate);
+	size = apv <= 87 ? sizeof(struct p_rs_param)
+		: apv == 88 ? sizeof(struct p_rs_param)
+			+ strlen(nc->verify_alg) + 1
+		: apv <= 94 ? sizeof(struct p_rs_param_89)
+		: /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
-		if (apv >= 88)
-			strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
-		if (apv >= 89)
-			strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
+	cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
 
-		rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
-	} else
-		rv = 0; /* not ok */
+	/* initialize verify_alg and csums_alg */
+	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-	mutex_unlock(&mdev->data.mutex);
+	if (get_ldev(mdev)) {
+		dc = rcu_dereference(mdev->ldev->disk_conf);
+		p->resync_rate = cpu_to_be32(dc->resync_rate);
+		p->c_plan_ahead = cpu_to_be32(dc->c_plan_ahead);
+		p->c_delay_target = cpu_to_be32(dc->c_delay_target);
+		p->c_fill_target = cpu_to_be32(dc->c_fill_target);
+		p->c_max_rate = cpu_to_be32(dc->c_max_rate);
+		put_ldev(mdev);
+	} else {
+		p->resync_rate = cpu_to_be32(DRBD_RESYNC_RATE_DEF);
+		p->c_plan_ahead = cpu_to_be32(DRBD_C_PLAN_AHEAD_DEF);
+		p->c_delay_target = cpu_to_be32(DRBD_C_DELAY_TARGET_DEF);
+		p->c_fill_target = cpu_to_be32(DRBD_C_FILL_TARGET_DEF);
+		p->c_max_rate = cpu_to_be32(DRBD_C_MAX_RATE_DEF);
+	}
 
-	return rv;
+	if (apv >= 88)
+		strcpy(p->verify_alg, nc->verify_alg);
+	if (apv >= 89)
+		strcpy(p->csums_alg, nc->csums_alg);
+	rcu_read_unlock();
+
+	return drbd_send_command(mdev, sock, cmd, size, NULL, 0);
 }
 
-int drbd_send_protocol(struct drbd_conf *mdev)
+int __drbd_send_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd)
 {
+	struct drbd_socket *sock;
 	struct p_protocol *p;
-	int size, rv;
+	struct net_conf *nc;
+	int size, cf;
 
-	size = sizeof(struct p_protocol);
+	sock = &tconn->data;
+	p = __conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
+
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+
+	if (nc->tentative && tconn->agreed_pro_version < 92) {
+		rcu_read_unlock();
+		mutex_unlock(&sock->mutex);
+		conn_err(tconn, "--dry-run is not supported by peer");
+		return -EOPNOTSUPP;
+	}
+
+	size = sizeof(*p);
+	if (tconn->agreed_pro_version >= 87)
+		size += strlen(nc->integrity_alg) + 1;
+
+	p->protocol      = cpu_to_be32(nc->wire_protocol);
+	p->after_sb_0p   = cpu_to_be32(nc->after_sb_0p);
+	p->after_sb_1p   = cpu_to_be32(nc->after_sb_1p);
+	p->after_sb_2p   = cpu_to_be32(nc->after_sb_2p);
+	p->two_primaries = cpu_to_be32(nc->two_primaries);
+	cf = 0;
+	if (nc->discard_my_data)
+		cf |= CF_DISCARD_MY_DATA;
+	if (nc->tentative)
+		cf |= CF_DRY_RUN;
+	p->conn_flags    = cpu_to_be32(cf);
+
+	if (tconn->agreed_pro_version >= 87)
+		strcpy(p->integrity_alg, nc->integrity_alg);
+	rcu_read_unlock();
 
-	if (mdev->agreed_pro_version >= 87)
-		size += strlen(mdev->net_conf->integrity_alg) + 1;
+	return __conn_send_command(tconn, sock, cmd, size, NULL, 0);
+}
 
-	/* we must not recurse into our own queue,
-	 * as that is blocked during handshake */
-	p = kmalloc(size, GFP_NOIO);
-	if (p == NULL)
-		return 0;
+int drbd_send_protocol(struct drbd_tconn *tconn)
+{
+	int err;
 
-	p->protocol      = cpu_to_be32(mdev->net_conf->wire_protocol);
-	p->after_sb_0p   = cpu_to_be32(mdev->net_conf->after_sb_0p);
-	p->after_sb_1p   = cpu_to_be32(mdev->net_conf->after_sb_1p);
-	p->after_sb_2p   = cpu_to_be32(mdev->net_conf->after_sb_2p);
-	p->want_lose     = cpu_to_be32(mdev->net_conf->want_lose);
-	p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
-
-	if (mdev->agreed_pro_version >= 87)
-		strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
-
-	rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
-			   (struct p_header *)p, size);
-	kfree(p);
-	return rv;
+	mutex_lock(&tconn->data.mutex);
+	err = __drbd_send_protocol(tconn, P_PROTOCOL);
+	mutex_unlock(&tconn->data.mutex);
+
+	return err;
 }
 
 int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
 {
-	struct p_uuids p;
+	struct drbd_socket *sock;
+	struct p_uuids *p;
 	int i;
 
 	if (!get_ldev_if_state(mdev, D_NEGOTIATING))
-		return 1;
+		return 0;
 
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p) {
+		put_ldev(mdev);
+		return -EIO;
+	}
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
-		p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
+		p->uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
 
 	mdev->comm_bm_set = drbd_bm_total_weight(mdev);
-	p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
-	uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
+	p->uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
+	rcu_read_lock();
+	uuid_flags |= rcu_dereference(mdev->tconn->net_conf)->discard_my_data ? 1 : 0;
+	rcu_read_unlock();
 	uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
 	uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
-	p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
+	p->uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
 
 	put_ldev(mdev);
-
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
-			     (struct p_header *)&p, sizeof(p));
+	return drbd_send_command(mdev, sock, P_UUIDS, sizeof(*p), NULL, 0);
 }
 
 int drbd_send_uuids(struct drbd_conf *mdev)
@@ -1889,103 +1093,210 @@
 	return _drbd_send_uuids(mdev, 8);
 }
 
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		u64 *uuid = mdev->ldev->md.uuid;
+		dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+		     text,
+		     (unsigned long long)uuid[UI_CURRENT],
+		     (unsigned long long)uuid[UI_BITMAP],
+		     (unsigned long long)uuid[UI_HISTORY_START],
+		     (unsigned long long)uuid[UI_HISTORY_END]);
+		put_ldev(mdev);
+	} else {
+		dev_info(DEV, "%s effective data uuid: %016llX\n",
+				text,
+				(unsigned long long)mdev->ed_uuid);
+	}
+}
 
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+void drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
-	struct p_rs_uuid p;
+	struct drbd_socket *sock;
+	struct p_rs_uuid *p;
+	u64 uuid;
+
+	D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
 
-	p.uuid = cpu_to_be64(val);
+	uuid = mdev->ldev->md.uuid[UI_BITMAP];
+	if (uuid && uuid != UUID_JUST_CREATED)
+		uuid = uuid + UUID_NEW_BM_OFFSET;
+	else
+		get_random_bytes(&uuid, sizeof(u64));
+	drbd_uuid_set(mdev, UI_BITMAP, uuid);
+	drbd_print_uuids(mdev, "updated sync UUID");
+	drbd_md_sync(mdev);
 
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
-			     (struct p_header *)&p, sizeof(p));
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (p) {
+		p->uuid = cpu_to_be64(uuid);
+		drbd_send_command(mdev, sock, P_SYNC_UUID, sizeof(*p), NULL, 0);
+	}
 }
 
-int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
 {
-	struct p_sizes p;
+	struct drbd_socket *sock;
+	struct p_sizes *p;
 	sector_t d_size, u_size;
-	int q_order_type;
-	int ok;
+	int q_order_type, max_bio_size;
 
 	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		D_ASSERT(mdev->ldev->backing_bdev);
 		d_size = drbd_get_max_capacity(mdev->ldev);
-		u_size = mdev->ldev->dc.disk_size;
+		rcu_read_lock();
+		u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+		rcu_read_unlock();
 		q_order_type = drbd_queue_order_type(mdev);
-		p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
+		max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
 		put_ldev(mdev);
 	} else {
 		d_size = 0;
 		u_size = 0;
 		q_order_type = QUEUE_ORDERED_NONE;
+		max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
 	}
 
-	p.d_size = cpu_to_be64(d_size);
-	p.u_size = cpu_to_be64(u_size);
-	p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
-	p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
-	p.queue_order_type = cpu_to_be32(q_order_type);
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
 
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
-			   (struct p_header *)&p, sizeof(p));
-	return ok;
+	if (mdev->tconn->agreed_pro_version <= 94)
+		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+	else if (mdev->tconn->agreed_pro_version < 100)
+		max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE_P95);
+
+	p->d_size = cpu_to_be64(d_size);
+	p->u_size = cpu_to_be64(u_size);
+	p->c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
+	p->max_bio_size = cpu_to_be32(max_bio_size);
+	p->queue_order_type = cpu_to_be16(q_order_type);
+	p->dds_flags = cpu_to_be16(flags);
+	return drbd_send_command(mdev, sock, P_SIZES, sizeof(*p), NULL, 0);
 }
 
 /**
- * drbd_send_state() - Sends the drbd state to the peer
+ * drbd_send_current_state() - Sends the drbd state to the peer
  * @mdev:	DRBD device.
  */
-int drbd_send_state(struct drbd_conf *mdev)
+int drbd_send_current_state_(struct drbd_conf *mdev, const char *func, unsigned int line)
 {
-	struct socket *sock;
-	struct p_state p;
-	int ok = 0;
+	struct drbd_socket *sock;
+	struct p_state *p;
 
-	/* Grab state lock so we wont send state if we're in the middle
-	 * of a cluster wide state change on another thread */
-	drbd_state_lock(mdev);
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
+	return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
+}
 
-	mutex_lock(&mdev->data.mutex);
+/**
+ * drbd_send_state() - After a state change, sends the new state to the peer
+ * @mdev:      DRBD device.
+ * @state:     the state to send, not necessarily the current state.
+ *
+ * Each state change queues an "after_state_ch" work, which will eventually
+ * send the resulting new state to the peer. If more state changes happen
+ * between queuing and processing of the after_state_ch work, we still
+ * want to send each intermediary state in the order it occurred.
+ */
+int drbd_send_state_(struct drbd_conf *mdev, union drbd_state state, const char *func, unsigned int line)
+{
+	struct drbd_socket *sock;
+	struct p_state *p;
 
-	p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
-	sock = mdev->data.socket;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->state = cpu_to_be32(state.i); /* Within the send mutex */
+	return drbd_send_command(mdev, sock, P_STATE, sizeof(*p), NULL, 0);
+}
 
-	if (likely(sock != NULL)) {
-		ok = _drbd_send_cmd(mdev, sock, P_STATE,
-				    (struct p_header *)&p, sizeof(p), 0);
-	}
+int drbd_send_state_req(struct drbd_conf *mdev, union drbd_state mask, union drbd_state val)
+{
+	struct drbd_socket *sock;
+	struct p_req_state *p;
 
-	mutex_unlock(&mdev->data.mutex);
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->mask = cpu_to_be32(mask.i);
+	p->val = cpu_to_be32(val.i);
+	return drbd_send_command(mdev, sock, P_STATE_CHG_REQ, sizeof(*p), NULL, 0);
+}
 
-	drbd_state_unlock(mdev);
-	return ok;
+int conn_send_state_req(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
+{
+	enum drbd_packet cmd;
+	struct drbd_socket *sock;
+	struct p_req_state *p;
+
+	cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REQ : P_CONN_ST_CHG_REQ;
+	sock = &tconn->data;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
+	p->mask = cpu_to_be32(mask.i);
+	p->val = cpu_to_be32(val.i);
+	return conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
 }
 
-int drbd_send_state_req(struct drbd_conf *mdev,
-	union drbd_state mask, union drbd_state val)
+void drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
 {
-	struct p_req_state p;
+	struct drbd_socket *sock;
+	struct p_req_state_reply *p;
+
+	sock = &mdev->tconn->meta;
+	p = drbd_prepare_command(mdev, sock);
+	if (p) {
+		p->retcode = cpu_to_be32(retcode);
+		drbd_send_command(mdev, sock, P_STATE_CHG_REPLY, sizeof(*p), NULL, 0);
+	}
+}
 
-	p.mask    = cpu_to_be32(mask.i);
-	p.val     = cpu_to_be32(val.i);
+void conn_send_sr_reply(struct drbd_tconn *tconn, enum drbd_state_rv retcode)
+{
+	struct drbd_socket *sock;
+	struct p_req_state_reply *p;
+	enum drbd_packet cmd = tconn->agreed_pro_version < 100 ? P_STATE_CHG_REPLY : P_CONN_ST_CHG_REPLY;
 
-	return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
-			     (struct p_header *)&p, sizeof(p));
+	sock = &tconn->meta;
+	p = conn_prepare_command(tconn, sock);
+	if (p) {
+		p->retcode = cpu_to_be32(retcode);
+		conn_send_command(tconn, sock, cmd, sizeof(*p), NULL, 0);
+	}
 }
 
-int drbd_send_sr_reply(struct drbd_conf *mdev, int retcode)
+static void dcbp_set_code(struct p_compressed_bm *p, enum drbd_bitmap_code code)
 {
-	struct p_req_state_reply p;
+	BUG_ON(code & ~0xf);
+	p->encoding = (p->encoding & ~0xf) | code;
+}
 
-	p.retcode    = cpu_to_be32(retcode);
+static void dcbp_set_start(struct p_compressed_bm *p, int set)
+{
+	p->encoding = (p->encoding & ~0x80) | (set ? 0x80 : 0);
+}
 
-	return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
-			     (struct p_header *)&p, sizeof(p));
+static void dcbp_set_pad_bits(struct p_compressed_bm *p, int n)
+{
+	BUG_ON(n & ~0x7);
+	p->encoding = (p->encoding & (~0x7 << 4)) | (n << 4);
 }
 
 int fill_bitmap_rle_bits(struct drbd_conf *mdev,
-	struct p_compressed_bm *p,
-	struct bm_xfer_ctx *c)
+			 struct p_compressed_bm *p,
+			 unsigned int size,
+			 struct bm_xfer_ctx *c)
 {
 	struct bitstream bs;
 	unsigned long plain_bits;
@@ -1993,19 +1304,21 @@
 	unsigned long rl;
 	unsigned len;
 	unsigned toggle;
-	int bits;
+	int bits, use_rle;
 
 	/* may we use this feature? */
-	if ((mdev->sync_conf.use_rle == 0) ||
-		(mdev->agreed_pro_version < 90))
-			return 0;
+	rcu_read_lock();
+	use_rle = rcu_dereference(mdev->tconn->net_conf)->use_rle;
+	rcu_read_unlock();
+	if (!use_rle || mdev->tconn->agreed_pro_version < 90)
+		return 0;
 
 	if (c->bit_offset >= c->bm_bits)
 		return 0; /* nothing to do. */
 
 	/* use at most thus many bytes */
-	bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
-	memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
+	bitstream_init(&bs, p->code, size, 0);
+	memset(p->code, 0, size);
 	/* plain bits covered in this code string */
 	plain_bits = 0;
 
@@ -2027,12 +1340,12 @@
 			if (rl == 0) {
 				/* the first checked bit was set,
 				 * store start value, */
-				DCBP_set_start(p, 1);
+				dcbp_set_start(p, 1);
 				/* but skip encoding of zero run length */
 				toggle = !toggle;
 				continue;
 			}
-			DCBP_set_start(p, 0);
+			dcbp_set_start(p, 0);
 		}
 
 		/* paranoia: catch zero runlength.
@@ -2072,76 +1385,81 @@
 	bm_xfer_ctx_bit_to_word_offset(c);
 
 	/* store pad_bits */
-	DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
+	dcbp_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
 
 	return len;
 }
 
-enum { OK, FAILED, DONE }
-send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-	struct p_header *h, struct bm_xfer_ctx *c)
-{
-	struct p_compressed_bm *p = (void*)h;
-	unsigned long num_words;
-	int len;
-	int ok;
-
-	len = fill_bitmap_rle_bits(mdev, p, c);
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+STATIC int
+send_bitmap_rle_or_plain(struct drbd_conf *mdev, struct bm_xfer_ctx *c)
+{
+	struct drbd_socket *sock = &mdev->tconn->data;
+	unsigned int header_size = drbd_header_size(mdev->tconn);
+	struct p_compressed_bm *p = sock->sbuf + header_size;
+	int len, err;
 
+	len = fill_bitmap_rle_bits(mdev, p,
+			DRBD_SOCKET_BUFFER_SIZE - header_size - sizeof(*p), c);
 	if (len < 0)
-		return FAILED;
+		return -EIO;
 
 	if (len) {
-		DCBP_set_code(p, RLE_VLI_Bits);
-		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
-			sizeof(*p) + len, 0);
-
+		dcbp_set_code(p, RLE_VLI_Bits);
+		err = __send_command(mdev->tconn, mdev->vnr, sock,
+				     P_COMPRESSED_BITMAP, sizeof(*p) + len,
+				     NULL, 0);
 		c->packets[0]++;
-		c->bytes[0] += sizeof(*p) + len;
+		c->bytes[0] += header_size + sizeof(*p) + len;
 
 		if (c->bit_offset >= c->bm_bits)
 			len = 0; /* DONE */
 	} else {
 		/* was not compressible.
 		 * send a buffer full of plain text bits instead. */
-		num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
-		len = num_words * sizeof(long);
+		unsigned int data_size;
+		unsigned long num_words;
+		unsigned long *p = sock->sbuf + header_size;
+
+		data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
+		num_words = min_t(size_t, data_size / sizeof(*p),
+				  c->bm_words - c->word_offset);
+		len = num_words * sizeof(*p);
 		if (len)
-			drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
-		ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
-				   h, sizeof(struct p_header) + len, 0);
+			drbd_bm_get_lel(mdev, c->word_offset, num_words, p);
+		err = __send_command(mdev->tconn, mdev->vnr, sock, P_BITMAP, len, NULL, 0);
 		c->word_offset += num_words;
 		c->bit_offset = c->word_offset * BITS_PER_LONG;
 
 		c->packets[1]++;
-		c->bytes[1] += sizeof(struct p_header) + len;
+		c->bytes[1] += header_size + len;
 
 		if (c->bit_offset > c->bm_bits)
 			c->bit_offset = c->bm_bits;
 	}
-	ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
-	if (ok == DONE)
-		INFO_bm_xfer_stats(mdev, "send", c);
-	return ok;
+	if (!err) {
+		if (len == 0) {
+			INFO_bm_xfer_stats(mdev, "send", c);
+			return 0;
+		} else
+			return 1;
+	}
+	return -EIO;
 }
 
 /* See the comment at receive_bitmap() */
-int _drbd_send_bitmap(struct drbd_conf *mdev)
+static int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
 	struct bm_xfer_ctx c;
-	struct p_header *p;
-	int ret;
+	int err;
 
-	ERR_IF(!mdev->bitmap) return FALSE;
-
-	/* maybe we should use some per thread scratch page,
-	 * and allocate that during initial device creation? */
-	p = (struct p_header *) __get_free_page(GFP_NOIO);
-	if (!p) {
-		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-		return FALSE;
-	}
+	if (!expect(mdev->bitmap))
+		return false;
 
 	if (get_ldev(mdev)) {
 		if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
@@ -2166,37 +1484,39 @@
 	};
 
 	do {
-		ret = send_bitmap_rle_or_plain(mdev, p, &c);
-	} while (ret == OK);
+		err = send_bitmap_rle_or_plain(mdev, &c);
+	} while (err > 0);
 
-	free_page((unsigned long) p);
-	return (ret == DONE);
+	return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
 {
-	int err;
+	struct drbd_socket *sock = &mdev->tconn->data;
+	int err = -1;
 
-	if (!drbd_get_data_sock(mdev))
-		return -1;
-	err = !_drbd_send_bitmap(mdev);
-	drbd_put_data_sock(mdev);
+	mutex_lock(&sock->mutex);
+	if (sock->socket)
+		err = !_drbd_send_bitmap(mdev);
+	mutex_unlock(&sock->mutex);
 	return err;
 }
 
-int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
+void drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
 {
-	int ok;
-	struct p_barrier_ack p;
-
-	p.barrier  = barrier_nr;
-	p.set_size = cpu_to_be32(set_size);
+	struct drbd_socket *sock;
+	struct p_barrier_ack *p;
 
 	if (mdev->state.conn < C_CONNECTED)
-		return FALSE;
-	ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
-			(struct p_header *)&p, sizeof(p));
-	return ok;
+		return;
+
+	sock = &mdev->tconn->meta;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return;
+	p->barrier = barrier_nr;
+	p->set_size = cpu_to_be32(set_size);
+	drbd_send_command(mdev, sock, P_BARRIER_ACK, sizeof(*p), NULL, 0);
 }
 
 /**
@@ -2207,61 +1527,62 @@
  * @blksize:	size in byte, needs to be in big endian byte order
  * @block_id:	Id, big endian byte order
  */
-STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
-			  u64 sector,
-			  u32 blksize,
-			  u64 block_id)
+STATIC int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
+			  u64 sector, u32 blksize, u64 block_id)
 {
-	int ok;
-	struct p_block_ack p;
-
-	p.sector   = sector;
-	p.block_id = block_id;
-	p.blksize  = blksize;
-	p.seq_num  = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
+	struct drbd_socket *sock;
+	struct p_block_ack *p;
 
-	if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
-		return FALSE;
-	ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
-				(struct p_header *)&p, sizeof(p));
-	return ok;
-}
-
-int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
-		     struct p_data *dp)
-{
-	const int header_size = sizeof(struct p_data)
-			      - sizeof(struct p_header);
-	int data_size  = ((struct p_header *)dp)->length - header_size;
+	if (mdev->state.conn < C_CONNECTED)
+		return -EIO;
 
-	return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
-			      dp->block_id);
+	sock = &mdev->tconn->meta;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = sector;
+	p->block_id = block_id;
+	p->blksize = blksize;
+	p->seq_num = cpu_to_be32(atomic_inc_return(&mdev->packet_seq));
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
+}
+
+/* dp->sector and dp->block_id already/still in network byte order,
+ * data_size is payload size according to dp->head,
+ * and may need to be corrected for digest size. */
+void drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packet cmd,
+		      struct p_data *dp, int data_size)
+{
+	if (mdev->tconn->peer_integrity_tfm)
+		data_size -= crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+	_drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
+		       dp->block_id);
 }
 
-int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
-		     struct p_block_req *rp)
+void drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packet cmd,
+		      struct p_block_req *rp)
 {
-	return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
+	_drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
 }
 
 /**
  * drbd_send_ack() - Sends an ack packet
- * @mdev:	DRBD device.
- * @cmd:	Packet command code.
- * @e:		Epoch entry.
+ * @mdev:	DRBD device
+ * @cmd:	packet command code
+ * @peer_req:	peer request
  */
-int drbd_send_ack(struct drbd_conf *mdev,
-	enum drbd_packets cmd, struct drbd_epoch_entry *e)
+int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packet cmd,
+		  struct drbd_peer_request *peer_req)
 {
 	return _drbd_send_ack(mdev, cmd,
-			      cpu_to_be64(e->sector),
-			      cpu_to_be32(e->size),
-			      e->block_id);
+			      cpu_to_be64(peer_req->i.sector),
+			      cpu_to_be32(peer_req->i.size),
+			      peer_req->block_id);
 }
 
 /* This function misuses the block_id field to signal if the blocks
  * are is sync or not. */
-int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
+int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packet cmd,
 		     sector_t sector, int blksize, u64 block_id)
 {
 	return _drbd_send_ack(mdev, cmd,
@@ -2273,86 +1594,86 @@
 int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
 		       sector_t sector, int size, u64 block_id)
 {
-	int ok;
-	struct p_block_req p;
+	struct drbd_socket *sock;
+	struct p_block_req *p;
 
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = block_id;
-	p.blksize  = cpu_to_be32(size);
-
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
-				(struct p_header *)&p, sizeof(p));
-	return ok;
-}
-
-int drbd_send_drequest_csum(struct drbd_conf *mdev,
-			    sector_t sector, int size,
-			    void *digest, int digest_size,
-			    enum drbd_packets cmd)
-{
-	int ok;
-	struct p_block_req p;
-
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = BE_DRBD_MAGIC + 0xbeef;
-	p.blksize  = cpu_to_be32(size);
-
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(cmd);
-	p.head.length  = cpu_to_be16(sizeof(p) - sizeof(struct p_header) + digest_size);
-
-	mutex_lock(&mdev->data.mutex);
-
-	ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
-	ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
-
-	mutex_unlock(&mdev->data.mutex);
-
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = block_id;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p), NULL, 0);
+}
+
+int drbd_send_drequest_csum(struct drbd_conf *mdev, sector_t sector, int size,
+			    void *digest, int digest_size, enum drbd_packet cmd)
+{
+	struct drbd_socket *sock;
+	struct p_block_req *p;
+
+	/* FIXME: Put the digest into the preallocated socket buffer.  */
+
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = ID_SYNCER /* unused */;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, cmd, sizeof(*p),
+				 digest, digest_size);
 }
 
 int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
 {
-	int ok;
-	struct p_block_req p;
+	struct drbd_socket *sock;
+	struct p_block_req *p;
 
-	p.sector   = cpu_to_be64(sector);
-	p.block_id = BE_DRBD_MAGIC + 0xbabe;
-	p.blksize  = cpu_to_be32(size);
-
-	ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
-			   (struct p_header *)&p, sizeof(p));
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(sector);
+	p->block_id = ID_SYNCER /* unused */;
+	p->blksize = cpu_to_be32(size);
+	return drbd_send_command(mdev, sock, P_OV_REQUEST, sizeof(*p), NULL, 0);
 }
 
 /* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
  */
-STATIC int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
+STATIC int we_should_drop_the_connection(struct drbd_tconn *tconn, struct socket *sock)
 {
 	int drop_it;
-	/* long elapsed = (long)(jiffies - mdev->last_received); */
-	/* DUMPLU(elapsed); // elapsed ignored for now. */
 
-	drop_it =   mdev->meta.socket == sock
-		|| !mdev->asender.task
-		|| get_t_state(&mdev->asender) != Running
-		|| mdev->state.conn < C_CONNECTED;
+	drop_it =   tconn->meta.socket == sock
+		|| !tconn->asender.task
+		|| get_t_state(&tconn->asender) != RUNNING
+		|| tconn->cstate < C_WF_REPORT_PARAMS;
 
 	if (drop_it)
-		return TRUE;
+		return true;
 
-	drop_it = !--mdev->ko_count;
+	drop_it = !--tconn->ko_count;
 	if (!drop_it) {
-		dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
-		       current->comm, current->pid, mdev->ko_count);
-		request_ping(mdev);
+		conn_err(tconn, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
+			 current->comm, current->pid, tconn->ko_count);
+		request_ping(tconn);
 	}
 
 	return drop_it; /* && (mdev->state == R_PRIMARY) */;
 }
 
+static void drbd_update_congested(struct drbd_tconn *tconn)
+{
+	struct sock *sk = tconn->data.socket->sk;
+	if (sk->sk_wmem_queued > sk->sk_sndbuf * 4 / 5)
+		set_bit(NET_CONGESTED, &tconn->flags);
+}
+
 /* The idea of sendpage seems to be to put some kind of reference
  * to the page into the skb, and to hand it over to the NIC. In
  * this process get_page() gets called.
@@ -2375,21 +1696,28 @@
  * with page_count == 0 or PageSlab.
  */
 STATIC int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
-		   int offset, size_t size)
+			      int offset, size_t size, unsigned msg_flags)
 {
-	int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0);
+	struct socket *socket;
+	void *addr;
+	int err;
+
+	socket = mdev->tconn->data.socket;
+	addr = kmap(page) + offset;
+	err = drbd_send_all(mdev->tconn, socket, addr, size, msg_flags);
 	kunmap(page);
-	if (sent == size)
-		mdev->send_cnt += size>>9;
-	return sent == size;
+	if (!err)
+		mdev->send_cnt += size >> 9;
+	return err;
 }
 
 STATIC int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
-		    int offset, size_t size)
+		    int offset, size_t size, unsigned msg_flags)
 {
+	struct socket *socket = mdev->tconn->data.socket;
 	mm_segment_t oldfs = get_fs();
-	int sent, ok;
 	int len = size;
+	int err = -EIO;
 
 	/* e.g. XFS meta- & log-data is in slab pages, which have a
 	 * page_count of 0 and/or have PageSlab() set.
@@ -2398,61 +1726,107 @@
 	 * __page_cache_release a page that would actually still be referenced
 	 * by someone, leading to some obscure delayed Oops somewhere else. */
 	if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
-		return _drbd_no_send_page(mdev, page, offset, size);
+		return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
 
-	drbd_update_congested(mdev);
+	msg_flags |= MSG_NOSIGNAL;
+	drbd_update_congested(mdev->tconn);
 	set_fs(KERNEL_DS);
 	do {
-		sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
-							offset, len,
-							MSG_NOSIGNAL);
-		if (sent == -EAGAIN) {
-			if (we_should_drop_the_connection(mdev,
-							  mdev->data.socket))
-				break;
-			else
-				continue;
-		}
+		int sent;
+
+		sent = socket->ops->sendpage(socket, page, offset, len, msg_flags);
 		if (sent <= 0) {
+			if (sent == -EAGAIN) {
+				if (we_should_drop_the_connection(mdev->tconn, socket))
+					break;
+				continue;
+			}
 			dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
 			     __func__, (int)size, len, sent);
+			if (sent < 0)
+				err = sent;
 			break;
 		}
 		len    -= sent;
 		offset += sent;
 	} while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
 	set_fs(oldfs);
-	clear_bit(NET_CONGESTED, &mdev->flags);
+	clear_bit(NET_CONGESTED, &mdev->tconn->flags);
 
-	ok = (len == 0);
-	if (likely(ok))
-		mdev->send_cnt += size>>9;
-	return ok;
+	if (len == 0) {
+		err = 0;
+		mdev->send_cnt += size >> 9;
+	}
+	return err;
 }
 
 static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	/* hint all but last page with MSG_MORE */
 	__bio_for_each_segment(bvec, bio, i, 0) {
-		if (!_drbd_no_send_page(mdev, bvec->bv_page,
-				     bvec->bv_offset, bvec->bv_len))
-			return 0;
+		int err;
+
+		err = _drbd_no_send_page(mdev, bvec->bv_page,
+					 bvec->bv_offset, bvec->bv_len,
+					 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		if (err)
+			return err;
 	}
-	return 1;
+	return 0;
+}
+
+static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
+{
+	struct bio_vec *bvec;
+	int i;
+	/* hint all but last page with MSG_MORE */
+	__bio_for_each_segment(bvec, bio, i, 0) {
+		int err;
+
+		err = _drbd_send_page(mdev, bvec->bv_page,
+				      bvec->bv_offset, bvec->bv_len,
+				      i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		if (err)
+			return err;
+	}
+	return 0;
 }
 
-static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
+static int _drbd_send_zc_ee(struct drbd_conf *mdev,
+			    struct drbd_peer_request *peer_req)
 {
-	struct bio_vec *bvec;
-	int i;
-	__bio_for_each_segment(bvec, bio, i, 0) {
-		if (!_drbd_send_page(mdev, bvec->bv_page,
-				     bvec->bv_offset, bvec->bv_len))
-			return 0;
+	struct page *page = peer_req->pages;
+	unsigned len = peer_req->i.size;
+	int err;
+
+	/* hint all but last page with MSG_MORE */
+	page_chain_for_each(page) {
+		unsigned l = min_t(unsigned, len, PAGE_SIZE);
+
+		err = _drbd_send_page(mdev, page, 0, l,
+				      page_chain_next(page) ? MSG_MORE : 0);
+		if (err)
+			return err;
+		len -= l;
 	}
+	return 0;
+}
 
-	return 1;
+/* see also wire_flags_to_bio()
+ * DRBD_REQ_*, because we need to semantically map the flags to data packet
+ * flags and back. We may replicate to other kernel versions. */
+static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
+{
+	if (mdev->tconn->agreed_pro_version >= 95)
+		return  (bi_rw & DRBD_REQ_SYNC ? DP_RW_SYNC : 0) |
+			(bi_rw & DRBD_REQ_FUA ? DP_FUA : 0) |
+			(bi_rw & DRBD_REQ_FLUSH ? DP_FLUSH : 0) |
+			(bi_rw & DRBD_REQ_DISCARD ? DP_DISCARD : 0);
+
+	/* else: we used to communicate one bit only in older DRBD */
+	return bi_rw & DRBD_REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write requests
@@ -2460,115 +1834,116 @@
  */
 int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
 {
-	int ok = 1;
-	struct p_data p;
+	struct drbd_socket *sock;
+	struct p_data *p;
 	unsigned int dp_flags = 0;
-	void *dgb;
 	int dgs;
+	int err;
 
-	if (!drbd_get_data_sock(mdev))
-		return 0;
-
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
-
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(P_DATA);
-	p.head.length  =
-		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->size);
-
-	p.sector   = cpu_to_be64(req->sector);
-	p.block_id = (unsigned long)req;
-	p.seq_num  = cpu_to_be32(req->seq_num =
-				 atomic_add_return(1, &mdev->packet_seq));
-	dp_flags = 0;
-
-	/* NOTE: no need to check if barriers supported here as we would
-	 *       not pass the test in make_request_common in that case
-	 */
-	if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) {
-		dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
-		/* dp_flags |= DP_HARDBARRIER; */
-	}
-#ifdef BIO_RW_SYNC
-	if (bio_rw_flagged(req->master_bio, BIO_RW_SYNC))
-		dp_flags |= DP_RW_SYNC;
-#else
-	if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO))
-		dp_flags |= DP_RW_SYNC;
-	/* for now handle SYNCIO and UNPLUG
-	 * as if they still were one and the same flag */
-	if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG))
-		dp_flags |= DP_RW_SYNC;
-#endif
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
+
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(req->i.sector);
+	p->block_id = (unsigned long)req;
+	p->seq_num = cpu_to_be32(req->seq_num = atomic_inc_return(&mdev->packet_seq));
+	dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
 	if (mdev->state.conn >= C_SYNC_SOURCE &&
 	    mdev->state.conn <= C_PAUSED_SYNC_T)
 		dp_flags |= DP_MAY_SET_IN_SYNC;
-
-	p.dp_flags = cpu_to_be32(dp_flags);
-	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__);
-	ok = (sizeof(p) ==
-		drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
-	if (ok && dgs) {
-		dgb = mdev->int_dig_out;
-		drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
-	}
-	if (ok) {
-		if (mdev->net_conf->wire_protocol == DRBD_PROT_A)
-			ok = _drbd_send_bio(mdev, req->master_bio);
+	if (mdev->tconn->agreed_pro_version >= 100) {
+		if (req->rq_state & RQ_EXP_RECEIVE_ACK)
+			dp_flags |= DP_SEND_RECEIVE_ACK;
+		if (req->rq_state & RQ_EXP_WRITE_ACK)
+			dp_flags |= DP_SEND_WRITE_ACK;
+	}
+	p->dp_flags = cpu_to_be32(dp_flags);
+	if (dgs)
+		drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, p + 1);
+	err = __send_command(mdev->tconn, mdev->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size);
+	if (!err) {
+		/* For protocol A, we have to memcpy the payload into
+		 * socket buffers, as we may complete right away
+		 * as soon as we handed it over to tcp, at which point the data
+		 * pages may become invalid.
+		 *
+		 * For data-integrity enabled, we copy it as well, so we can be
+		 * sure that even if the bio pages may still be modified, it
+		 * won't change the data on the wire, thus if the digest checks
+		 * out ok after sending on this side, but does not fit on the
+		 * receiving side, we sure have detected corruption elsewhere.
+		 */
+		if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)) || dgs)
+			err = _drbd_send_bio(mdev, req->master_bio);
 		else
-			ok = _drbd_send_zc_bio(mdev, req->master_bio);
+			err = _drbd_send_zc_bio(mdev, req->master_bio);
+
+		/* double check digest, sometimes buffers have been modified in flight. */
+		if (dgs > 0 && dgs <= 64) {
+			/* 64 byte, 512 bit, is the largest digest size
+			 * currently supported in kernel crypto. */
+			unsigned char digest[64];
+			drbd_csum_bio(mdev, mdev->tconn->integrity_tfm, req->master_bio, digest);
+			if (memcmp(p + 1, digest, dgs)) {
+				dev_warn(DEV,
+					"Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
+					(unsigned long long)req->i.sector, req->i.size);
+			}
+		} /* else if (dgs > 64) {
+		     ... Be noisy about digest too large ...
+		} */
 	}
+	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
 
-	drbd_put_data_sock(mdev);
-	return ok;
+	return err;
 }
 
 /* answer packet, used to send data back for read requests:
  *  Peer       -> (diskless) R_PRIMARY   (P_DATA_REPLY)
  *  C_SYNC_SOURCE -> C_SYNC_TARGET         (P_RS_DATA_REPLY)
  */
-int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
-		    struct drbd_epoch_entry *e)
+int drbd_send_block(struct drbd_conf *mdev, enum drbd_packet cmd,
+		    struct drbd_peer_request *peer_req)
 {
-	int ok;
-	struct p_data p;
-	void *dgb;
+	struct drbd_socket *sock;
+	struct p_data *p;
+	int err;
 	int dgs;
 
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
 
-	p.head.magic   = BE_DRBD_MAGIC;
-	p.head.command = cpu_to_be16(cmd);
-	p.head.length  =
-		cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + e->size);
-
-	p.sector   = cpu_to_be64(e->sector);
-	p.block_id = e->block_id;
-	/* p.seq_num  = 0;    No sequence numbers here.. */
-
-	/* Only called by our kernel thread.
-	 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
-	 * in response to admin command or module unload.
-	 */
-	if (!drbd_get_data_sock(mdev))
-		return 0;
+	dgs = mdev->tconn->integrity_tfm ? crypto_hash_digestsize(mdev->tconn->integrity_tfm) : 0;
 
-	trace_drbd_packet(mdev, mdev->data.socket, 0, (void *)&p, __FILE__, __LINE__);
-	ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p,
-					sizeof(p), MSG_MORE);
-	if (ok && dgs) {
-		dgb = mdev->int_dig_out;
-		drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb);
-		ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
-	}
-	if (ok)
-		ok = _drbd_send_zc_bio(mdev, e->private_bio);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(peer_req->i.sector);
+	p->block_id = peer_req->block_id;
+	p->seq_num = 0;  /* unused */
+	if (dgs)
+		drbd_csum_ee(mdev, mdev->tconn->integrity_tfm, peer_req, p + 1);
+	err = __send_command(mdev->tconn, mdev->vnr, sock, cmd, sizeof(*p) + dgs, NULL, peer_req->i.size);
+	if (!err)
+		err = _drbd_send_zc_ee(mdev, peer_req);
+	mutex_unlock(&sock->mutex);  /* locked by drbd_prepare_command() */
+
+	return err;
+}
+
+int drbd_send_out_of_sync(struct drbd_conf *mdev, struct drbd_request *req)
+{
+	struct drbd_socket *sock;
+	struct p_block_desc *p;
 
-	drbd_put_data_sock(mdev);
-	return ok;
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
+	p->sector = cpu_to_be64(req->i.sector);
+	p->blksize = cpu_to_be32(req->i.size);
+	return drbd_send_command(mdev, sock, P_OUT_OF_SYNC, sizeof(*p), NULL, 0);
 }
 
 /*
@@ -2587,20 +1962,15 @@
 /*
  * you must have down()ed the appropriate [m]sock_mutex elsewhere!
  */
-int drbd_send(struct drbd_conf *mdev, struct socket *sock,
+int drbd_send(struct drbd_tconn *tconn, struct socket *sock,
 	      void *buf, size_t size, unsigned msg_flags)
 {
-#if !HAVE_KERNEL_SENDMSG
-	mm_segment_t oldfs;
-	struct iovec iov;
-#else
 	struct kvec iov;
-#endif
 	struct msghdr msg;
 	int rv, sent = 0;
 
 	if (!sock)
-		return -1000;
+		return -EBADR;
 
 	/* THINK  if (signal_pending) return ... ? */
 
@@ -2609,22 +1979,15 @@
 
 	msg.msg_name       = NULL;
 	msg.msg_namelen    = 0;
-#if !HAVE_KERNEL_SENDMSG
-	msg.msg_iov        = &iov;
-	msg.msg_iovlen     = 1;
-#endif
 	msg.msg_control    = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags      = msg_flags | MSG_NOSIGNAL;
 
-#if !HAVE_KERNEL_SENDMSG
-	oldfs = get_fs();
-	set_fs(KERNEL_DS);
-#endif
-
-	if (sock == mdev->data.socket) {
-		mdev->ko_count = mdev->net_conf->ko_count;
-		drbd_update_congested(mdev);
+	if (sock == tconn->data.socket) {
+		rcu_read_lock();
+		tconn->ko_count = rcu_dereference(tconn->net_conf)->ko_count;
+		rcu_read_unlock();
+		drbd_update_congested(tconn);
 	}
 	do {
 		/* STRANGE
@@ -2636,30 +1999,14 @@
  * do we need to block DRBD_SIG if sock == &meta.socket ??
  * otherwise wake_asender() might interrupt some send_*Ack !
  */
-#if !HAVE_KERNEL_SENDMSG
-		rv = sock_sendmsg(sock, &msg, iov.iov_len);
-#else
 		rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
-#endif
 		if (rv == -EAGAIN) {
-			if (we_should_drop_the_connection(mdev, sock))
+			if (we_should_drop_the_connection(tconn, sock))
 				break;
 			else
 				continue;
 		}
-		D_ASSERT(rv != 0);
 		if (rv == -EINTR) {
-#if 0
-			/* FIXME this happens all the time.
-			 * we don't care for now!
-			 * eventually this should be sorted out be the proper
-			 * use of the SIGNAL_ASENDER bit... */
-			if (DRBD_ratelimit(5*HZ, 5)) {
-				dev_dbg(DEV, "Got a signal in drbd_send(,%c,)!\n",
-				    sock == mdev->meta.socket ? 'm' : 's');
-				/* dump_stack(); */
-			}
-#endif
 			flush_signals(current);
 			rv = 0;
 		}
@@ -2670,27 +2017,40 @@
 		iov.iov_len  -= rv;
 	} while (sent < size);
 
-	if (sock == mdev->data.socket)
-		clear_bit(NET_CONGESTED, &mdev->flags);
-
-#if !HAVE_KERNEL_SENDMSG
-	set_fs(oldfs);
-#endif
-
+	if (sock == tconn->data.socket)
+		clear_bit(NET_CONGESTED, &tconn->flags);
 
 	if (rv <= 0) {
 		if (rv != -EAGAIN) {
-			dev_err(DEV, "%s_sendmsg returned %d\n",
-			    sock == mdev->meta.socket ? "msock" : "sock",
-			    rv);
-			drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+			conn_err(tconn, "%s_sendmsg returned %d\n",
+				 sock == tconn->meta.socket ? "msock" : "sock",
+				 rv);
+			conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 		} else
-			drbd_force_state(mdev, NS(conn, C_TIMEOUT));
+			conn_request_state(tconn, NS(conn, C_TIMEOUT), CS_HARD);
 	}
 
 	return sent;
 }
 
+/**
+ * drbd_send_all  -  Send an entire buffer
+ *
+ * Returns 0 upon success and a negative error value otherwise.
+ */
+int drbd_send_all(struct drbd_tconn *tconn, struct socket *sock, void *buffer,
+		  size_t size, unsigned msg_flags)
+{
+	int err;
+
+	err = drbd_send(tconn, sock, buffer, size, msg_flags);
+	if (err < 0)
+		return err;
+	if (err != size)
+		return -EIO;
+	return 0;
+}
+
 #ifdef BD_OPS_USE_FMODE
 static int drbd_open(struct block_device *bdev, fmode_t mode)
 #else
@@ -2706,7 +2066,7 @@
 	unsigned long flags;
 	int rv = 0;
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 	/* to have a stable mdev->state.role
 	 * and no race with updating open_cnt */
 
@@ -2719,7 +2079,7 @@
 
 	if (!rv)
 		mdev->open_cnt++;
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
 	return rv;
 }
@@ -2740,49 +2100,16 @@
 }
 #endif
 
-STATIC void drbd_unplug_fn(struct request_queue *q)
-{
-	struct drbd_conf *mdev = q->queuedata;
-
-	trace_drbd_unplug(mdev, "got unplugged");
-
-	/* unplug FIRST */
-	spin_lock_irq(q->queue_lock);
-	blk_remove_plug(q);
-	spin_unlock_irq(q->queue_lock);
-
-	/* only if connected */
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.pdsk >= D_INCONSISTENT && mdev->state.conn >= C_CONNECTED) {
-		D_ASSERT(mdev->state.role == R_PRIMARY);
-		if (test_and_clear_bit(UNPLUG_REMOTE, &mdev->flags)) {
-			/* add to the data.work queue,
-			 * unless already queued.
-			 * XXX this might be a good addition to drbd_queue_work
-			 * anyways, to detect "double queuing" ... */
-			if (list_empty(&mdev->unplug_work.list))
-				drbd_queue_work(&mdev->data.work,
-						&mdev->unplug_work);
-		}
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (mdev->state.disk >= D_INCONSISTENT)
-		drbd_kick_lo(mdev);
-}
-
 STATIC void drbd_set_defaults(struct drbd_conf *mdev)
 {
-	mdev->sync_conf.after      = DRBD_AFTER_DEF;
-	mdev->sync_conf.rate       = DRBD_RATE_DEF;
-	mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF;
-	mdev->state = (union drbd_state) {
+	/* Beware! The actual layout differs
+	 * between big endian and little endian */
+	mdev->state = (union drbd_dev_state) {
 		{ .role = R_SECONDARY,
 		  .peer = R_UNKNOWN,
 		  .conn = C_STANDALONE,
 		  .disk = D_DISKLESS,
 		  .pdsk = D_UNKNOWN,
-		  .susp = 0
 		} };
 }
 
@@ -2807,24 +2134,17 @@
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	atomic_set(&mdev->unacked_cnt, 0);
 	atomic_set(&mdev->local_cnt, 0);
-	atomic_set(&mdev->net_cnt, 0);
-	atomic_set(&mdev->packet_seq, 0);
-	atomic_set(&mdev->pp_in_use, 0);
-
-	mutex_init(&mdev->md_io_mutex);
-	mutex_init(&mdev->data.mutex);
-	mutex_init(&mdev->meta.mutex);
-	sema_init(&mdev->data.work.s, 0);
-	sema_init(&mdev->meta.work.s, 0);
-	mutex_init(&mdev->state_mutex);
+	atomic_set(&mdev->pp_in_use_by_net, 0);
+	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
+	atomic_set(&mdev->ap_in_flight, 0);
+	atomic_set(&mdev->md_io_in_use, 0);
 
-	spin_lock_init(&mdev->data.work.q_lock);
-	spin_lock_init(&mdev->meta.work.q_lock);
+	mutex_init(&mdev->own_state_mutex);
+	mdev->state_mutex = &mdev->own_state_mutex;
 
 	spin_lock_init(&mdev->al_lock);
-	spin_lock_init(&mdev->req_lock);
 	spin_lock_init(&mdev->peer_seq_lock);
-	spin_lock_init(&mdev->epoch_lock);
 
 	INIT_LIST_HEAD(&mdev->active_ee);
 	INIT_LIST_HEAD(&mdev->sync_ee);
@@ -2832,22 +2152,39 @@
 	INIT_LIST_HEAD(&mdev->read_ee);
 	INIT_LIST_HEAD(&mdev->net_ee);
 	INIT_LIST_HEAD(&mdev->resync_reads);
-	INIT_LIST_HEAD(&mdev->data.work.q);
-	INIT_LIST_HEAD(&mdev->meta.work.q);
 	INIT_LIST_HEAD(&mdev->resync_work.list);
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
+	INIT_LIST_HEAD(&mdev->go_diskless.list);
 	INIT_LIST_HEAD(&mdev->md_sync_work.list);
+	INIT_LIST_HEAD(&mdev->start_resync_work.list);
 	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
-	mdev->resync_work.cb  = w_resync_inactive;
+
+	mdev->resync_work.cb  = w_resync_timer;
 	mdev->unplug_work.cb  = w_send_write_hint;
+	mdev->go_diskless.cb  = w_go_diskless;
 	mdev->md_sync_work.cb = w_md_sync;
 	mdev->bm_io_work.w.cb = w_bitmap_io;
+	mdev->start_resync_work.cb = w_start_resync;
+
+	mdev->resync_work.mdev  = mdev;
+	mdev->unplug_work.mdev  = mdev;
+	mdev->go_diskless.mdev  = mdev;
+	mdev->md_sync_work.mdev = mdev;
+	mdev->bm_io_work.w.mdev = mdev;
+	mdev->start_resync_work.mdev = mdev;
+
 	init_timer(&mdev->resync_timer);
 	init_timer(&mdev->md_sync_timer);
+	init_timer(&mdev->start_resync_timer);
+	init_timer(&mdev->request_timer);
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
 	mdev->md_sync_timer.function = md_sync_timer_fn;
 	mdev->md_sync_timer.data = (unsigned long) mdev;
+	mdev->start_resync_timer.function = start_resync_timer_fn;
+	mdev->start_resync_timer.data = (unsigned long) mdev;
+	mdev->request_timer.function = request_timer_fn;
+	mdev->request_timer.data = (unsigned long) mdev;
 
 	init_waitqueue_head(&mdev->misc_wait);
 	init_waitqueue_head(&mdev->state_wait);
@@ -2855,24 +2192,18 @@
 	init_waitqueue_head(&mdev->al_wait);
 	init_waitqueue_head(&mdev->seq_wait);
 
-	drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
-	drbd_thread_init(mdev, &mdev->worker, drbd_worker);
-	drbd_thread_init(mdev, &mdev->asender, drbd_asender);
-
-	mdev->agreed_pro_version = PRO_VERSION_MAX;
-	mdev->write_ordering = WO_bio_barrier;
 	mdev->resync_wenr = LC_FREE;
+	mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
+	mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
 }
 
 void drbd_mdev_cleanup(struct drbd_conf *mdev)
 {
-	if (mdev->receiver.t_state != None)
+	int i;
+	if (mdev->tconn->receiver.t_state != NONE)
 		dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
-				mdev->receiver.t_state);
+				mdev->tconn->receiver.t_state);
 
-	/* no need to lock it, I'm the only thread alive */
-	if (atomic_read(&mdev->current_epoch->epoch_size) !=  0)
-		dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
 	mdev->al_writ_cnt  =
 	mdev->bm_writ_cnt  =
 	mdev->read_cnt     =
@@ -2882,35 +2213,40 @@
 	mdev->p_size       =
 	mdev->rs_start     =
 	mdev->rs_total     =
-	mdev->rs_failed    =
-	mdev->rs_mark_left =
-	mdev->rs_mark_time = 0;
-	D_ASSERT(mdev->net_conf == NULL);
+	mdev->rs_failed    = 0;
+	mdev->rs_last_events = 0;
+	mdev->rs_last_sect_ev = 0;
+	for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+		mdev->rs_mark_left[i] = 0;
+		mdev->rs_mark_time[i] = 0;
+	}
+	D_ASSERT(mdev->tconn->net_conf == NULL);
 
 	drbd_set_my_capacity(mdev, 0);
 	if (mdev->bitmap) {
 		/* maybe never allocated. */
-		drbd_bm_resize(mdev, 0);
+		drbd_bm_resize(mdev, 0, 1);
 		drbd_bm_cleanup(mdev);
 	}
 
-	drbd_free_resources(mdev);
+	drbd_free_bc(mdev->ldev);
+	mdev->ldev = NULL;
+
+	clear_bit(AL_SUSPENDED, &mdev->flags);
 
-	/*
-	 * currently we drbd_init_ee only on module load, so
-	 * we may do drbd_release_ee only on module unload!
-	 */
 	D_ASSERT(list_empty(&mdev->active_ee));
 	D_ASSERT(list_empty(&mdev->sync_ee));
 	D_ASSERT(list_empty(&mdev->done_ee));
 	D_ASSERT(list_empty(&mdev->read_ee));
 	D_ASSERT(list_empty(&mdev->net_ee));
 	D_ASSERT(list_empty(&mdev->resync_reads));
-	D_ASSERT(list_empty(&mdev->data.work.q));
-	D_ASSERT(list_empty(&mdev->meta.work.q));
+	D_ASSERT(list_empty(&mdev->tconn->data.work.q));
+	D_ASSERT(list_empty(&mdev->tconn->meta.work.q));
 	D_ASSERT(list_empty(&mdev->resync_work.list));
 	D_ASSERT(list_empty(&mdev->unplug_work.list));
+	D_ASSERT(list_empty(&mdev->go_diskless.list));
 
+	drbd_set_defaults(mdev);
 }
 
 
@@ -2927,6 +2263,10 @@
 
 	/* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
 
+	if (drbd_md_io_bio_set)
+		bioset_free(drbd_md_io_bio_set);
+	if (drbd_md_io_page_pool)
+		mempool_destroy(drbd_md_io_page_pool);
 	if (drbd_ee_mempool)
 		mempool_destroy(drbd_ee_mempool);
 	if (drbd_request_mempool)
@@ -2940,6 +2280,8 @@
 	if (drbd_al_ext_cache)
 		kmem_cache_destroy(drbd_al_ext_cache);
 
+	drbd_md_io_bio_set   = NULL;
+	drbd_md_io_page_pool = NULL;
 	drbd_ee_mempool      = NULL;
 	drbd_request_mempool = NULL;
 	drbd_ee_cache        = NULL;
@@ -2953,7 +2295,7 @@
 STATIC int drbd_create_mempools(void)
 {
 	struct page *page;
-	const int number = (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE) * minor_count;
+	const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
 	int i;
 
 	/* prepare our caches and mempools */
@@ -2963,6 +2305,8 @@
 	drbd_bm_ext_cache    = NULL;
 	drbd_al_ext_cache    = NULL;
 	drbd_pp_pool         = NULL;
+	drbd_md_io_page_pool = NULL;
+	drbd_md_io_bio_set   = NULL;
 
 	/* caches */
 	drbd_request_cache = kmem_cache_create(
@@ -2971,7 +2315,7 @@
 		goto Enomem;
 
 	drbd_ee_cache = kmem_cache_create(
-		"drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
+		"drbd_ee", sizeof(struct drbd_peer_request), 0, 0, NULL);
 	if (drbd_ee_cache == NULL)
 		goto Enomem;
 
@@ -2986,6 +2330,14 @@
 		goto Enomem;
 
 	/* mempools */
+	drbd_md_io_bio_set = bioset_create(DRBD_MIN_POOL_PAGES, 0);
+	if (drbd_md_io_bio_set == NULL)
+		goto Enomem;
+
+	drbd_md_io_page_pool = mempool_create_page_pool(DRBD_MIN_POOL_PAGES, 0);
+	if (drbd_md_io_page_pool == NULL)
+		goto Enomem;
+
 	drbd_request_mempool = mempool_create(number,
 		mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
 	if (drbd_request_mempool == NULL)
@@ -2993,7 +2345,7 @@
 
 	drbd_ee_mempool = mempool_create(number,
 		mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
-	if (drbd_request_mempool == NULL)
+	if (drbd_ee_mempool == NULL)
 		goto Enomem;
 
 	/* drbd's page pool */
@@ -3029,71 +2381,53 @@
 	.notifier_call = drbd_notify_sys,
 };
 
-static void drbd_release_ee_lists(struct drbd_conf *mdev)
+static void drbd_release_all_peer_reqs(struct drbd_conf *mdev)
 {
 	int rr;
 
-	rr = drbd_release_ee(mdev, &mdev->active_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->active_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in active list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->sync_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->sync_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in sync list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->read_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->read_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in read list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->done_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->done_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in done list found!\n", rr);
 
-	rr = drbd_release_ee(mdev, &mdev->net_ee);
+	rr = drbd_free_peer_reqs(mdev, &mdev->net_ee);
 	if (rr)
 		dev_err(DEV, "%d EEs in net list found!\n", rr);
 }
 
-/* caution. no locking.
- * currently only used from module cleanup code. */
-static void drbd_delete_device(unsigned int minor)
+/* caution. no locking. */
+void drbd_minor_destroy(struct kref *kref)
 {
-	struct drbd_conf *mdev = minor_to_mdev(minor);
+	struct drbd_conf *mdev = container_of(kref, struct drbd_conf, kref);
+	struct drbd_tconn *tconn = mdev->tconn;
 
-	if (!mdev)
-		return;
+	del_timer_sync(&mdev->request_timer);
 
 	/* paranoia asserts */
-	if (mdev->open_cnt != 0)
-		dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
-				__FILE__ , __LINE__);
-
-	ERR_IF (!list_empty(&mdev->data.work.q)) {
-		struct list_head *lp;
-		list_for_each(lp, &mdev->data.work.q) {
-			DUMPP(lp);
-		}
-	};
+	D_ASSERT(mdev->open_cnt == 0);
 	/* end paranoia asserts */
 
-	del_gendisk(mdev->vdisk);
-
 	/* cleanup stuff that may have been allocated during
 	 * device (re-)configuration or state changes */
 
 	if (mdev->this_bdev)
 		bdput(mdev->this_bdev);
 
-	drbd_free_resources(mdev);
-
-	drbd_release_ee_lists(mdev);
+	drbd_free_bc(mdev->ldev);
+	mdev->ldev = NULL;
 
-	/* should be free'd on disconnect? */
-	kfree(mdev->ee_hash);
-	/*
-	mdev->ee_hash_s = 0;
-	mdev->ee_hash = NULL;
-	*/
+	drbd_release_all_peer_reqs(mdev);
 
 	lc_destroy(mdev->act_log);
 	lc_destroy(mdev->resync);
@@ -3101,37 +2435,58 @@
 	kfree(mdev->p_uuid);
 	/* mdev->p_uuid = NULL; */
 
-	kfree(mdev->int_dig_out);
-	kfree(mdev->int_dig_in);
-	kfree(mdev->int_dig_vv);
-
-	/* cleanup the rest that has been
-	 * allocated from drbd_new_device
-	 * and actually free the mdev itself */
-	drbd_free_mdev(mdev);
+	if (mdev->bitmap) /* should no longer be there. */
+		drbd_bm_cleanup(mdev);
+	__free_page(mdev->md_io_page);
+	put_disk(mdev->vdisk);
+	blk_cleanup_queue(mdev->rq_queue);
+	kfree(mdev->rs_plan_s);
+	kfree(mdev);
+
+	kref_put(&tconn->kref, &conn_destroy);
 }
 
 STATIC void drbd_cleanup(void)
 {
 	unsigned int i;
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn, *tmp;
 
 	unregister_reboot_notifier(&drbd_notifier);
 
-	drbd_nl_cleanup();
+	/* first remove proc,
+	 * drbdsetup uses it's presence to detect
+	 * whether DRBD is loaded.
+	 * If we would get stuck in proc removal,
+	 * but have netlink already deregistered,
+	 * some drbdsetup commands may wait forever
+	 * for an answer.
+	 */
+	if (drbd_proc)
+		remove_proc_entry("drbd", NULL);
+
+	drbd_genl_unregister();
 
-	if (minor_table) {
-		if (drbd_proc)
-			remove_proc_entry("drbd", NULL);
-		i = minor_count;
-		while (i--)
-			drbd_delete_device(i);
-		drbd_destroy_mempools();
+	idr_for_each_entry(&minors, mdev, i) {
+		idr_remove(&minors, mdev_to_minor(mdev));
+		idr_remove(&mdev->tconn->volumes, mdev->vnr);
+		del_gendisk(mdev->vdisk);
+		/* synchronize_rcu(); No other threads running at this point */
+		kref_put(&mdev->kref, &drbd_minor_destroy);
 	}
 
-	kfree(minor_table);
+	/* not _rcu since, no other updater anymore. Genl already unregistered */
+	list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
+		list_del(&tconn->all_tconn); /* not _rcu no proc, not other threads */
+		/* synchronize_rcu(); */
+		kref_put(&tconn->kref, &conn_destroy);
+	}
 
+	drbd_destroy_mempools();
 	drbd_unregister_blkdev(DRBD_MAJOR, "drbd");
 
+	idr_destroy(&minors);
+
 	printk(KERN_INFO "drbd: module cleanup done.\n");
 }
 
@@ -3149,7 +2504,7 @@
 	char reason = '-';
 	int r = 0;
 
-	if (!__inc_ap_bio_cond(mdev)) {
+	if (!may_inc_ap_bio(mdev)) {
 		/* DRBD has frozen IO */
 		r = bdi_bits;
 		reason = 'd';
@@ -3164,7 +2519,7 @@
 			reason = 'b';
 	}
 
-	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
+	if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->tconn->flags)) {
 		r |= (1 << BDI_async_congested);
 		reason = reason == 'b' ? 'a' : 'n';
 	}
@@ -3174,20 +2529,242 @@
 	return r;
 }
 
-struct drbd_conf *drbd_new_device(unsigned int minor)
+static void drbd_init_workqueue(struct drbd_work_queue* wq)
+{
+	sema_init(&wq->s, 0);
+	spin_lock_init(&wq->q_lock);
+	INIT_LIST_HEAD(&wq->q);
+}
+
+struct drbd_tconn *conn_get_by_name(const char *name)
+{
+	struct drbd_tconn *tconn;
+
+	if (!name || !name[0])
+		return NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (!strcmp(tconn->name, name)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
+struct drbd_tconn *conn_get_by_addrs(void *my_addr, int my_addr_len,
+				     void *peer_addr, int peer_addr_len)
+{
+	struct drbd_tconn *tconn;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(tconn, &drbd_tconns, all_tconn) {
+		if (tconn->my_addr_len == my_addr_len &&
+		    tconn->peer_addr_len == peer_addr_len &&
+		    !memcmp(&tconn->my_addr, my_addr, my_addr_len) &&
+		    !memcmp(&tconn->peer_addr, peer_addr, peer_addr_len)) {
+			kref_get(&tconn->kref);
+			goto found;
+		}
+	}
+	tconn = NULL;
+found:
+	rcu_read_unlock();
+	return tconn;
+}
+
+static int drbd_alloc_socket(struct drbd_socket *socket)
+{
+	socket->rbuf = (void *) __get_free_page(GFP_KERNEL);
+	if (!socket->rbuf)
+		return -ENOMEM;
+	socket->sbuf = (void *) __get_free_page(GFP_KERNEL);
+	if (!socket->sbuf)
+		return -ENOMEM;
+	return 0;
+}
+
+static void drbd_free_socket(struct drbd_socket *socket)
+{
+	free_page((unsigned long) socket->sbuf);
+	free_page((unsigned long) socket->rbuf);
+}
+
+void conn_free_crypto(struct drbd_tconn *tconn)
+{
+	drbd_free_sock(tconn);
+
+	crypto_free_hash(tconn->csums_tfm);
+	crypto_free_hash(tconn->verify_tfm);
+	crypto_free_hash(tconn->cram_hmac_tfm);
+	crypto_free_hash(tconn->integrity_tfm);
+	crypto_free_hash(tconn->peer_integrity_tfm);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+
+	tconn->csums_tfm = NULL;
+	tconn->verify_tfm = NULL;
+	tconn->cram_hmac_tfm = NULL;
+	tconn->integrity_tfm = NULL;
+	tconn->peer_integrity_tfm = NULL;
+	tconn->int_dig_in = NULL;
+	tconn->int_dig_vv = NULL;
+}
+
+int set_resource_options(struct drbd_tconn *tconn, struct res_opts *res_opts)
+{
+	cpumask_var_t new_cpu_mask;
+	int err;
+
+	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL))
+		return -ENOMEM;
+		/*
+		retcode = ERR_NOMEM;
+		drbd_msg_put_info("unable to allocate cpumask");
+		*/
+
+	/* silently ignore cpu mask on UP kernel */
+	if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
+		/* FIXME: Get rid of constant 32 here */
+		err = __bitmap_parse(res_opts->cpu_mask, 32, 0,
+				cpumask_bits(new_cpu_mask), nr_cpu_ids);
+		if (err) {
+			conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
+			/* retcode = ERR_CPU_MASK_PARSE; */
+			goto fail;
+		}
+	}
+	tconn->res_opts = *res_opts;
+	if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
+		cpumask_copy(tconn->cpu_mask, new_cpu_mask);
+		drbd_calc_cpu_mask(tconn);
+		tconn->receiver.reset_cpu_mask = 1;
+		tconn->asender.reset_cpu_mask = 1;
+		tconn->worker.reset_cpu_mask = 1;
+	}
+	err = 0;
+
+fail:
+	free_cpumask_var(new_cpu_mask);
+	return err;
+
+}
+
+/* caller must be under genl_lock() */
+struct drbd_tconn *conn_create(const char *name, struct res_opts *res_opts)
+{
+	struct drbd_tconn *tconn;
+
+	tconn = kzalloc(sizeof(struct drbd_tconn), GFP_KERNEL);
+	if (!tconn)
+		return NULL;
+
+	tconn->name = kstrdup(name, GFP_KERNEL);
+	if (!tconn->name)
+		goto fail;
+
+	if (drbd_alloc_socket(&tconn->data))
+		goto fail;
+	if (drbd_alloc_socket(&tconn->meta))
+		goto fail;
+
+	if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL))
+		goto fail;
+
+	if (set_resource_options(tconn, res_opts))
+		goto fail;
+
+	if (!tl_init(tconn))
+		goto fail;
+
+	tconn->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
+	if (!tconn->current_epoch)
+		goto fail;
+	INIT_LIST_HEAD(&tconn->current_epoch->list);
+	tconn->epochs = 1;
+	spin_lock_init(&tconn->epoch_lock);
+	tconn->write_ordering = WO_bio_barrier;
+
+	tconn->cstate = C_STANDALONE;
+	mutex_init(&tconn->cstate_mutex);
+	spin_lock_init(&tconn->req_lock);
+	mutex_init(&tconn->conf_update);
+	init_waitqueue_head(&tconn->ping_wait);
+	idr_init(&tconn->volumes);
+
+	drbd_init_workqueue(&tconn->data.work);
+	mutex_init(&tconn->data.mutex);
+
+	drbd_init_workqueue(&tconn->meta.work);
+	mutex_init(&tconn->meta.mutex);
+
+	drbd_thread_init(tconn, &tconn->receiver, drbdd_init, "receiver");
+	drbd_thread_init(tconn, &tconn->worker, drbd_worker, "worker");
+	drbd_thread_init(tconn, &tconn->asender, drbd_asender, "asender");
+
+	kref_init(&tconn->kref);
+	list_add_tail_rcu(&tconn->all_tconn, &drbd_tconns);
+
+	return tconn;
+
+fail:
+	kfree(tconn->current_epoch);
+	tl_cleanup(tconn);
+	free_cpumask_var(tconn->cpu_mask);
+	drbd_free_socket(&tconn->meta);
+	drbd_free_socket(&tconn->data);
+	kfree(tconn->name);
+	kfree(tconn);
+
+	return NULL;
+}
+
+void conn_destroy(struct kref *kref)
+{
+	struct drbd_tconn *tconn = container_of(kref, struct drbd_tconn, kref);
+
+	if (atomic_read(&tconn->current_epoch->epoch_size) !=  0)
+		conn_err(tconn, "epoch_size:%d\n", atomic_read(&tconn->current_epoch->epoch_size));
+	kfree(tconn->current_epoch);
+
+	idr_destroy(&tconn->volumes);
+
+	free_cpumask_var(tconn->cpu_mask);
+	drbd_free_socket(&tconn->meta);
+	drbd_free_socket(&tconn->data);
+	kfree(tconn->name);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+	kfree(tconn);
+}
+
+enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
 {
 	struct drbd_conf *mdev;
 	struct gendisk *disk;
 	struct request_queue *q;
+	int vnr_got = vnr;
+	int minor_got = minor;
+	enum drbd_ret_code err = ERR_NOMEM;
+
+	mdev = minor_to_mdev(minor);
+	if (mdev)
+		return ERR_MINOR_EXISTS;
 
 	/* GFP_KERNEL, we are outside of all write-out paths */
 	mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
 	if (!mdev)
-		return NULL;
-	if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
-		goto out_no_cpumask;
+		return ERR_NOMEM;
+
+	kref_get(&tconn->kref);
+	mdev->tconn = tconn;
 
 	mdev->minor = minor;
+	mdev->vnr = vnr;
 
 	drbd_init_set_defaults(mdev);
 
@@ -3196,14 +2773,13 @@
 		goto out_no_q;
 	mdev->rq_queue = q;
 	q->queuedata   = mdev;
-	blk_queue_max_segment_size(q, DRBD_MAX_SEGMENT_SIZE);
 
 	disk = alloc_disk(1);
 	if (!disk)
 		goto out_no_disk;
 	mdev->vdisk = disk;
 
-	set_disk_ro(disk, TRUE);
+	set_disk_ro(disk, true);
 
 	disk->queue = q;
 	disk->major = DRBD_MAJOR;
@@ -3219,12 +2795,13 @@
 	q->backing_dev_info.congested_fn = drbd_congested;
 	q->backing_dev_info.congested_data = mdev;
 
-	blk_queue_make_request(q, drbd_make_request_26);
+	blk_queue_make_request(q, drbd_make_request);
+	/* Setting the max_hw_sectors to an odd value of 8kibyte here
+	   This triggers a max_bio_size message upon first attach or connect */
+	blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
 	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
-	q->queue_lock = &mdev->req_lock; /* needed since we use */
-		/* plugging on a queue, that actually has no requests! */
-	q->unplug_fn = drbd_unplug_fn;
+	q->queue_lock = &mdev->tconn->req_lock;
 
 	mdev->md_io_page = alloc_page(GFP_KERNEL);
 	if (!mdev->md_io_page)
@@ -3232,30 +2809,44 @@
 
 	if (drbd_bm_init(mdev))
 		goto out_no_bitmap;
-	/* no need to lock access, we are still initializing this minor device. */
-	if (!tl_init(mdev))
-		goto out_no_tl;
-
-	mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
-	if (!mdev->app_reads_hash)
-		goto out_no_app_reads;
-
-	mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
-	if (!mdev->current_epoch)
-		goto out_no_epoch;
-
-	INIT_LIST_HEAD(&mdev->current_epoch->list);
-	mdev->epochs = 1;
-
-	return mdev;
-
-/* out_whatever_else:
-	kfree(mdev->current_epoch); */
-out_no_epoch:
-	kfree(mdev->app_reads_hash);
-out_no_app_reads:
-	tl_cleanup(mdev);
-out_no_tl:
+	mdev->read_requests = RB_ROOT;
+	mdev->write_requests = RB_ROOT;
+
+	if (!idr_pre_get(&minors, GFP_KERNEL))
+		goto out_no_minor_idr;
+	if (idr_get_new_above(&minors, mdev, minor, &minor_got))
+		goto out_no_minor_idr;
+	if (minor_got != minor) {
+		err = ERR_MINOR_EXISTS;
+		drbd_msg_put_info("requested minor exists already");
+		goto out_idr_remove_minor;
+	}
+
+	if (!idr_pre_get(&tconn->volumes, GFP_KERNEL))
+		goto out_idr_remove_minor;
+	if (idr_get_new_above(&tconn->volumes, mdev, vnr, &vnr_got))
+		goto out_idr_remove_minor;
+	if (vnr_got != vnr) {
+		err = ERR_INVALID_REQUEST;
+		drbd_msg_put_info("requested volume exists already");
+		goto out_idr_remove_vol;
+	}
+	add_disk(disk);
+	kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
+
+	/* inherit the connection state */
+	mdev->state.conn = tconn->cstate;
+	if (mdev->state.conn == C_WF_REPORT_PARAMS)
+		drbd_connected(mdev);
+
+	return NO_ERROR;
+
+out_idr_remove_vol:
+	idr_remove(&tconn->volumes, vnr_got);
+out_idr_remove_minor:
+	idr_remove(&minors, minor_got);
+	synchronize_rcu();
+out_no_minor_idr:
 	drbd_bm_cleanup(mdev);
 out_no_bitmap:
 	__free_page(mdev->md_io_page);
@@ -3264,54 +2855,25 @@
 out_no_disk:
 	blk_cleanup_queue(q);
 out_no_q:
-	free_cpumask_var(mdev->cpu_mask);
-out_no_cpumask:
-	kfree(mdev);
-	return NULL;
-}
-
-/* counterpart of drbd_new_device.
- * last part of drbd_delete_device. */
-void drbd_free_mdev(struct drbd_conf *mdev)
-{
-	kfree(mdev->current_epoch);
-	kfree(mdev->app_reads_hash);
-	tl_cleanup(mdev);
-	if (mdev->bitmap) /* should no longer be there. */
-		drbd_bm_cleanup(mdev);
-	__free_page(mdev->md_io_page);
-	put_disk(mdev->vdisk);
-	blk_cleanup_queue(mdev->rq_queue);
-	free_cpumask_var(mdev->cpu_mask);
 	kfree(mdev);
+	kref_put(&tconn->kref, &conn_destroy);
+	return err;
 }
 
-
 int __init drbd_init(void)
 {
 	int err;
 
-	if (sizeof(struct p_handshake) != 80) {
-		printk(KERN_ERR
-		       "drbd: never change the size or layout "
-		       "of the HandShake packet.\n");
-		return -EINVAL;
-	}
-
-	if (1 > minor_count || minor_count > 255) {
+	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
 		printk(KERN_ERR
-			"drbd: invalid minor_count (%d)\n", minor_count);
+		       "drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
 		return -EINVAL;
 #else
-		minor_count = 8;
+		minor_count = DRBD_MINOR_COUNT_DEF;
 #endif
 	}
 
-	err = drbd_nl_init();
-	if (err)
-		return err;
-
 	err = register_blkdev(DRBD_MAJOR, "drbd");
 	if (err) {
 		printk(KERN_ERR
@@ -3320,6 +2882,13 @@
 		return err;
 	}
 
+	err = drbd_genl_register();
+	if (err) {
+		printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+		goto fail;
+	}
+
+
 	register_reboot_notifier(&drbd_notifier);
 
 	/*
@@ -3330,22 +2899,20 @@
 	init_waitqueue_head(&drbd_pp_wait);
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
-	minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
-				GFP_KERNEL);
-	if (!minor_table)
-		goto Enomem;
+	idr_init(&minors);
 
 	err = drbd_create_mempools();
 	if (err)
-		goto Enomem;
+		goto fail;
 
-	drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops);
+	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
-		goto Enomem;
+		goto fail;
 	}
 
 	rwlock_init(&global_state_lock);
+	INIT_LIST_HEAD(&drbd_tconns);
 
 	printk(KERN_INFO "drbd: initialized. "
 	       "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
@@ -3353,11 +2920,10 @@
 	printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
 	printk(KERN_INFO "drbd: registered as block device major %d\n",
 		DRBD_MAJOR);
-	printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
 
 	return 0; /* Success! */
 
-Enomem:
+fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
 		/* currently always the case */
@@ -3372,48 +2938,29 @@
 	if (ldev == NULL)
 		return;
 
-	bd_release(ldev->backing_bdev);
-	bd_release(ldev->md_bdev);
-
-	fput(ldev->lo_file);
-	fput(ldev->md_file);
+	blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+	blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 
 	kfree(ldev);
 }
 
-void drbd_free_sock(struct drbd_conf *mdev)
-{
-	if (mdev->data.socket) {
-		kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
-		sock_release(mdev->data.socket);
-		mdev->data.socket = NULL;
-	}
-	if (mdev->meta.socket) {
-		kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
-		sock_release(mdev->meta.socket);
-		mdev->meta.socket = NULL;
-	}
-}
-
 
-void drbd_free_resources(struct drbd_conf *mdev)
+void drbd_free_sock(struct drbd_tconn *tconn)
 {
-	crypto_free_hash(mdev->csums_tfm);
-	mdev->csums_tfm = NULL;
-	crypto_free_hash(mdev->verify_tfm);
-	mdev->verify_tfm = NULL;
-	crypto_free_hash(mdev->cram_hmac_tfm);
-	mdev->cram_hmac_tfm = NULL;
-	crypto_free_hash(mdev->integrity_w_tfm);
-	mdev->integrity_w_tfm = NULL;
-	crypto_free_hash(mdev->integrity_r_tfm);
-	mdev->integrity_r_tfm = NULL;
-
-	drbd_free_sock(mdev);
-
-	__no_warn(local,
-		  drbd_free_bc(mdev->ldev);
-		  mdev->ldev = NULL;);
+	if (tconn->data.socket) {
+		mutex_lock(&tconn->data.mutex);
+		kernel_sock_shutdown(tconn->data.socket, SHUT_RDWR);
+		sock_release(tconn->data.socket);
+		tconn->data.socket = NULL;
+		mutex_unlock(&tconn->data.mutex);
+	}
+	if (tconn->meta.socket) {
+		mutex_lock(&tconn->meta.mutex);
+		kernel_sock_shutdown(tconn->meta.socket, SHUT_RDWR);
+		sock_release(tconn->meta.socket);
+		tconn->meta.socket = NULL;
+		mutex_unlock(&tconn->meta.mutex);
+	}
 }
 
 /* meta data management */
@@ -3428,10 +2975,11 @@
 	u32 md_size_sect;
 	u32 al_offset;         /* offset to this block */
 	u32 al_nr_extents;     /* important for restoring the AL */
-	      /* `-- act_log->nr_elements <-- sync_conf.al_extents */
+	      /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
-	u32 reserved_u32[4];
+	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
+	u32 reserved_u32[3];
 
 } __packed;
 
@@ -3445,26 +2993,27 @@
 	sector_t sector;
 	int i;
 
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
 	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
 		return;
-	del_timer(&mdev->md_sync_timer);
 
 	/* We use here D_FAILED and not D_ATTACHING because we try to write
 	 * metadata even if we detach due to a disk failure! */
 	if (!get_ldev_if_state(mdev, D_FAILED))
 		return;
 
-	trace_drbd_md_io(mdev, WRITE, mdev->ldev);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
 
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
 	memset(buffer, 0, 512);
 
 	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
 	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
-	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
+	buffer->magic = cpu_to_be32(DRBD_MD_MAGIC_84_UNCLEAN);
 
 	buffer->md_size_sect  = cpu_to_be32(mdev->ldev->md.md_size_sect);
 	buffer->al_offset     = cpu_to_be32(mdev->ldev->md.al_offset);
@@ -3473,24 +3022,23 @@
 	buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
 
 	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
+	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
 
 	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
 	sector = mdev->ldev->md.md_offset;
 
 	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
-		clear_bit(MD_DIRTY, &mdev->flags);
-	} else {
 		/* this was a try anyways ... */
 		dev_err(DEV, "meta data update failed!\n");
-
-		drbd_chk_io_error(mdev, 1, TRUE);
+		drbd_chk_io_error(mdev, 1, true);
 	}
 
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */
 	mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
 
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+out:
 	put_ldev(mdev);
 }
 
@@ -3499,32 +3047,44 @@
  * @mdev:	DRBD device.
  * @bdev:	Device from which the meta data should be read in.
  *
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_codes in case
- * something goes wrong.  Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
+ * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
+ * something goes wrong.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
 	struct meta_data_on_disk *buffer;
+	u32 magic, flags;
 	int i, rv = NO_ERROR;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
 		return ERR_IO_MD_DISK;
 
-	trace_drbd_md_io(mdev, READ, bdev);
-
-	mutex_lock(&mdev->md_io_mutex);
-	buffer = (struct meta_data_on_disk *)page_address(mdev->md_io_page);
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
 
-	if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
-		/* NOTE: cant do normal error processing here as this is
+	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
+		/* NOTE: can't do normal error processing here as this is
 		   called BEFORE disk is attached */
 		dev_err(DEV, "Error while reading metadata.\n");
 		rv = ERR_IO_MD_DISK;
 		goto err;
 	}
 
-	if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
-		dev_err(DEV, "Error while reading metadata, magic not found.\n");
+	magic = be32_to_cpu(buffer->magic);
+	flags = be32_to_cpu(buffer->flags);
+	if (magic == DRBD_MD_MAGIC_84_UNCLEAN ||
+	    (magic == DRBD_MD_MAGIC_08 && !(flags & MDF_AL_CLEAN))) {
+			/* btw: that's Activity Log clean, not "all" clean. */
+		dev_err(DEV, "Found unclean meta data. Did you \"drbdadm apply-al\"?\n");
+		rv = ERR_MD_UNCLEAN;
+		goto err;
+	}
+	if (magic != DRBD_MD_MAGIC_08) {
+		if (magic == DRBD_MD_MAGIC_07)
+			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
+		else
+			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
 		rv = ERR_MD_INVALID;
 		goto err;
 	}
@@ -3558,14 +3118,20 @@
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
 	bdev->md.flags = be32_to_cpu(buffer->flags);
-	mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
 	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
 
-	if (mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (mdev->state.conn < C_CONNECTED) {
+		int peer;
+		peer = be32_to_cpu(buffer->la_peer_max_bio_size);
+		peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
+		mdev->peer_max_bio_size = peer;
+	}
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
  err:
-	mutex_unlock(&mdev->md_io_mutex);
+	drbd_md_put_buffer(mdev);
+ out:
 	put_ldev(mdev);
 
 	return rv;
@@ -3579,22 +3145,29 @@
  * the meta-data super block. This function sets MD_DIRTY, and starts a
  * timer that ensures that within five seconds you have to call drbd_md_sync().
  */
+#ifdef DRBD_DEBUG_MD_SYNC
+void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
+{
+	if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
+		mod_timer(&mdev->md_sync_timer, jiffies + HZ);
+		mdev->last_md_mark_dirty.line = line;
+		mdev->last_md_mark_dirty.func = func;
+	}
+}
+#else
 void drbd_md_mark_dirty(struct drbd_conf *mdev)
 {
-	set_bit(MD_DIRTY, &mdev->flags);
-	mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
+	if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
+		mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
 }
+#endif
 
-
-STATIC void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
+static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
 	int i;
 
-	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+	for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
 		mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
-
-		trace_drbd_uuid(mdev, i+1);
-	}
 }
 
 void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3609,7 +3182,6 @@
 	}
 
 	mdev->ldev->md.uuid[idx] = val;
-	trace_drbd_uuid(mdev, idx);
 	drbd_md_mark_dirty(mdev);
 }
 
@@ -3619,7 +3191,6 @@
 	if (mdev->ldev->md.uuid[idx]) {
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
-		trace_drbd_uuid(mdev, UI_HISTORY_START);
 	}
 	_drbd_uuid_set(mdev, idx, val);
 }
@@ -3634,14 +3205,18 @@
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
 	u64 val;
+	unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+	if (bm_uuid)
+		dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-	dev_info(DEV, "Creating new current UUID\n");
-	D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
 	mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
-	trace_drbd_uuid(mdev, UI_BITMAP);
 
 	get_random_bytes(&val, sizeof(u64));
 	_drbd_uuid_set(mdev, UI_CURRENT, val);
+	drbd_print_uuids(mdev, "new current UUID");
+	/* get it to stable storage _now_ */
+	drbd_md_sync(mdev);
 }
 
 void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
@@ -3653,16 +3228,12 @@
 		drbd_uuid_move_history(mdev);
 		mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
 		mdev->ldev->md.uuid[UI_BITMAP] = 0;
-		trace_drbd_uuid(mdev, UI_HISTORY_START);
-		trace_drbd_uuid(mdev, UI_BITMAP);
 	} else {
-		if (mdev->ldev->md.uuid[UI_BITMAP])
-			dev_warn(DEV, "bm UUID already set");
+		unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+		if (bm_uuid)
+			dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-		mdev->ldev->md.uuid[UI_BITMAP] = val;
-		mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
-		trace_drbd_uuid(mdev, UI_BITMAP);
+		mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
 	}
 	drbd_md_mark_dirty(mdev);
 }
@@ -3705,6 +3276,7 @@
 {
 	int rv = -EIO;
 
+	drbd_resume_al(mdev);
 	if (get_ldev_if_state(mdev, D_ATTACHING)) {
 		drbd_bm_clear_all(mdev);
 		rv = drbd_bm_write(mdev);
@@ -3714,18 +3286,22 @@
 	return rv;
 }
 
-STATIC int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+STATIC int w_bitmap_io(struct drbd_work *w, int unused)
 {
 	struct bm_io_work *work = container_of(w, struct bm_io_work, w);
-	int rv;
+	struct drbd_conf *mdev = w->mdev;
+	int rv = -EIO;
 
 	D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
 
-	drbd_bm_lock(mdev, work->why);
-	rv = work->io_fn(mdev);
-	drbd_bm_unlock(mdev);
+	if (get_ldev(mdev)) {
+		drbd_bm_lock(mdev, work->why, work->flags);
+		rv = work->io_fn(mdev);
+		drbd_bm_unlock(mdev);
+		put_ldev(mdev);
+	}
 
-	clear_bit(BITMAP_IO, &mdev->flags);
+	clear_bit_unlock(BITMAP_IO, &mdev->flags);
 	wake_up(&mdev->misc_wait);
 
 	if (work->done)
@@ -3733,8 +3309,42 @@
 
 	clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
 	work->why = NULL;
+	work->flags = 0;
 
-	return 1;
+	return 0;
+}
+
+void drbd_ldev_destroy(struct drbd_conf *mdev)
+{
+	lc_destroy(mdev->resync);
+	mdev->resync = NULL;
+	lc_destroy(mdev->act_log);
+	mdev->act_log = NULL;
+	__no_warn(local,
+		drbd_free_bc(mdev->ldev);
+		mdev->ldev = NULL;);
+
+	clear_bit(GO_DISKLESS, &mdev->flags);
+}
+
+STATIC int w_go_diskless(struct drbd_work *w, int unused)
+{
+	struct drbd_conf *mdev = w->mdev;
+
+	D_ASSERT(mdev->state.disk == D_FAILED);
+	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
+	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
+	 * the protected members anymore, though, so once put_ldev reaches zero
+	 * again, it will be safe to free them. */
+	drbd_force_state(mdev, NS(disk, D_DISKLESS));
+	return 0;
+}
+
+void drbd_go_diskless(struct drbd_conf *mdev)
+{
+	D_ASSERT(mdev->state.disk == D_FAILED);
+	if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
+		drbd_queue_work(&mdev->tconn->data.work, &mdev->go_diskless);
 }
 
 /**
@@ -3752,9 +3362,9 @@
 void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 			  int (*io_fn)(struct drbd_conf *),
 			  void (*done)(struct drbd_conf *, int),
-			  char *why)
+			  char *why, enum bm_flag flags)
 {
-	D_ASSERT(current == mdev->worker.task);
+	D_ASSERT(current == mdev->tconn->worker.task);
 
 	D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
 	D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
@@ -3766,15 +3376,15 @@
 	mdev->bm_io_work.io_fn = io_fn;
 	mdev->bm_io_work.done = done;
 	mdev->bm_io_work.why = why;
+	mdev->bm_io_work.flags = flags;
 
+	spin_lock_irq(&mdev->tconn->req_lock);
 	set_bit(BITMAP_IO, &mdev->flags);
 	if (atomic_read(&mdev->ap_bio_cnt) == 0) {
-		if (list_empty(&mdev->bm_io_work.w.list)) {
-			set_bit(BITMAP_IO_QUEUED, &mdev->flags);
-			drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
-		} else
-			dev_err(DEV, "FIXME avoided double queuing bm_io_work\n");
+		if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
+			drbd_queue_work(&mdev->tconn->data.work, &mdev->bm_io_work.w);
 	}
+	spin_unlock_irq(&mdev->tconn->req_lock);
 }
 
 /**
@@ -3786,19 +3396,22 @@
  * freezes application IO while that the actual IO operations runs. This
  * functions MAY NOT be called from worker context.
  */
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
 {
 	int rv;
 
-	D_ASSERT(current != mdev->worker.task);
+	D_ASSERT(current != mdev->tconn->worker.task);
 
-	drbd_suspend_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_suspend_io(mdev);
 
-	drbd_bm_lock(mdev, why);
+	drbd_bm_lock(mdev, why, flags);
 	rv = io_fn(mdev);
 	drbd_bm_unlock(mdev);
 
-	drbd_resume_io(mdev);
+	if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+		drbd_resume_io(mdev);
 
 	return rv;
 }
@@ -3827,15 +3440,125 @@
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
 
-	drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
+	drbd_queue_work_front(&mdev->tconn->data.work, &mdev->md_sync_work);
 }
 
-STATIC int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+STATIC int w_md_sync(struct drbd_work *w, int unused)
 {
+	struct drbd_conf *mdev = w->mdev;
+
 	dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+#ifdef DRBD_DEBUG_MD_SYNC
+	dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
+		mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
+#endif
 	drbd_md_sync(mdev);
+	return 0;
+}
 
-	return 1;
+const char *cmdname(enum drbd_packet cmd)
+{
+	/* THINK may need to become several global tables
+	 * when we want to support more than
+	 * one PRO_VERSION */
+	static const char *cmdnames[] = {
+		[P_DATA]	        = "Data",
+		[P_DATA_REPLY]	        = "DataReply",
+		[P_RS_DATA_REPLY]	= "RSDataReply",
+		[P_BARRIER]	        = "Barrier",
+		[P_BITMAP]	        = "ReportBitMap",
+		[P_BECOME_SYNC_TARGET]  = "BecomeSyncTarget",
+		[P_BECOME_SYNC_SOURCE]  = "BecomeSyncSource",
+		[P_UNPLUG_REMOTE]	= "UnplugRemote",
+		[P_DATA_REQUEST]	= "DataRequest",
+		[P_RS_DATA_REQUEST]     = "RSDataRequest",
+		[P_SYNC_PARAM]	        = "SyncParam",
+		[P_SYNC_PARAM89]	= "SyncParam89",
+		[P_PROTOCOL]            = "ReportProtocol",
+		[P_UUIDS]	        = "ReportUUIDs",
+		[P_SIZES]	        = "ReportSizes",
+		[P_STATE]	        = "ReportState",
+		[P_SYNC_UUID]           = "ReportSyncUUID",
+		[P_AUTH_CHALLENGE]      = "AuthChallenge",
+		[P_AUTH_RESPONSE]	= "AuthResponse",
+		[P_PING]		= "Ping",
+		[P_PING_ACK]	        = "PingAck",
+		[P_RECV_ACK]	        = "RecvAck",
+		[P_WRITE_ACK]	        = "WriteAck",
+		[P_RS_WRITE_ACK]	= "RSWriteAck",
+		[P_DISCARD_WRITE]        = "DiscardWrite",
+		[P_NEG_ACK]	        = "NegAck",
+		[P_NEG_DREPLY]	        = "NegDReply",
+		[P_NEG_RS_DREPLY]	= "NegRSDReply",
+		[P_BARRIER_ACK]	        = "BarrierAck",
+		[P_STATE_CHG_REQ]       = "StateChgRequest",
+		[P_STATE_CHG_REPLY]     = "StateChgReply",
+		[P_OV_REQUEST]          = "OVRequest",
+		[P_OV_REPLY]            = "OVReply",
+		[P_OV_RESULT]           = "OVResult",
+		[P_CSUM_RS_REQUEST]     = "CsumRSRequest",
+		[P_RS_IS_IN_SYNC]	= "CsumRSIsInSync",
+		[P_COMPRESSED_BITMAP]   = "CBitmap",
+		[P_DELAY_PROBE]         = "DelayProbe",
+		[P_OUT_OF_SYNC]		= "OutOfSync",
+		[P_RETRY_WRITE]		= "RetryWrite",
+		[P_RS_CANCEL]		= "RSCancel",
+		[P_CONN_ST_CHG_REQ]	= "conn_st_chg_req",
+		[P_CONN_ST_CHG_REPLY]	= "conn_st_chg_reply",
+		[P_RETRY_WRITE]		= "retry_write",
+		[P_PROTOCOL_UPDATE]	= "protocol_update",
+
+		/* enum drbd_packet, but not commands - obsoleted flags:
+		 *	P_MAY_IGNORE
+		 *	P_MAX_OPT_CMD
+		 */
+	};
+
+	/* too big for the array: 0xfffX */
+	if (cmd == P_INITIAL_META)
+		return "InitialMeta";
+	if (cmd == P_INITIAL_DATA)
+		return "InitialData";
+	if (cmd == P_CONNECTION_FEATURES)
+		return "ConnectionFeatures";
+	if (cmd >= ARRAY_SIZE(cmdnames))
+		return "Unknown";
+	return cmdnames[cmd];
+}
+
+/**
+ * drbd_wait_misc  -  wait for a request to make progress
+ * @mdev:	device associated with the request
+ * @i:		the struct drbd_interval embedded in struct drbd_request or
+ *		struct drbd_peer_request
+ */
+int drbd_wait_misc(struct drbd_conf *mdev, struct drbd_interval *i)
+{
+	struct net_conf *nc;
+	DEFINE_WAIT(wait);
+	long timeout;
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
+		return -ETIMEDOUT;
+	}
+	timeout = nc->ko_count ? nc->timeout * HZ / 10 * nc->ko_count : MAX_SCHEDULE_TIMEOUT;
+	rcu_read_unlock();
+
+	/* Indicate to wake up mdev->misc_wait on progress.  */
+	i->waiting = true;
+	prepare_to_wait(&mdev->misc_wait, &wait, TASK_INTERRUPTIBLE);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	timeout = schedule_timeout(timeout);
+	finish_wait(&mdev->misc_wait, &wait);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (!timeout || mdev->state.conn < C_CONNECTED)
+		return -ETIMEDOUT;
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	return 0;
 }
 
 #ifdef DRBD_ENABLE_FAULTS
@@ -3879,7 +3602,8 @@
 		[DRBD_FAULT_DT_RD] = "Data read",
 		[DRBD_FAULT_DT_RA] = "Data read ahead",
 		[DRBD_FAULT_BM_ALLOC] = "BM allocation",
-		[DRBD_FAULT_AL_EE] = "EE allocation"
+		[DRBD_FAULT_AL_EE] = "EE allocation",
+		[DRBD_FAULT_RECEIVE] = "receive data corruption",
 	};
 
 	return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
@@ -3898,7 +3622,7 @@
 	if (ret) {
 		fault_count++;
 
-		if (printk_ratelimit())
+		if (DRBD_ratelimit(5*HZ, 5))
 			dev_warn(DEV, "***Simulating %s failure\n",
 				_drbd_fault_str(type));
 	}
diff -Nru drbd8-8.3.7/drbd/drbd_nl.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nl.c
--- drbd8-8.3.7/drbd/drbd_nl.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nl.c	2012-02-02 14:09:14.000000000 +0000
@@ -30,151 +30,337 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/slab.h>
-#include <linux/connector.h>
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
-#include "drbd_tracing.h"
-#include "drbd_wrappers.h"
+#include "drbd_req.h"
 #include <asm/unaligned.h>
-#include <linux/drbd_tag_magic.h>
 #include <linux/drbd_limits.h>
-#include <linux/compiler.h>
+#include <linux/kthread.h>
 
-static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
-static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
-static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
+#include <net/genetlink.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+/*
+ * copied from more recent kernel source
+ */
+int genl_register_family_with_ops(struct genl_family *family,
+	struct genl_ops *ops, size_t n_ops)
+{
+	int err, i;
+
+	err = genl_register_family(family);
+	if (err)
+		return err;
+
+	for (i = 0; i < n_ops; ++i, ++ops) {
+		err = genl_register_ops(family, ops);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+err_out:
+	genl_unregister_family(family);
+	return err;
+}
+#endif
 
-/* see get_sb_bdev and bd_claim */
+/* .doit */
+// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
+// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
+
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
+/* .dumpit */
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+#include <linux/genl_magic_func.h>
+
+/* used blkdev_get_by_path, to claim our meta data device(s) */
 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
 
-/* Generate the tag_list to struct functions */
-#define NL_PACKET(name, number, fields) \
-STATIC int name ## _from_tags(struct drbd_conf *mdev, \
-	unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
-STATIC int name ## _from_tags(struct drbd_conf *mdev, \
-	unsigned short *tags, struct name *arg) \
-{ \
-	int tag; \
-	int dlen; \
-	\
-	while ((tag = get_unaligned(tags++)) != TT_END) {	\
-		dlen = get_unaligned(tags++);			\
-		switch (tag_number(tag)) { \
-		fields \
-		default: \
-			if (tag & T_MANDATORY) { \
-				dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
-				return 0; \
-			} \
-		} \
-		tags = (unsigned short *)((char *)tags + dlen); \
-	} \
-	return 1; \
-}
-#define NL_INTEGER(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
-		arg->member = get_unaligned((int *)(tags));	\
-		break;
-#define NL_INT64(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
-		arg->member = get_unaligned((u64 *)(tags));	\
+/* Configuration is strictly serialized, because generic netlink message
+ * processing is strictly serialized by the genl_lock().
+ * Which means we can use one static global drbd_config_context struct.
+ */
+static struct drbd_config_context {
+	/* assigned from drbd_genlmsghdr */
+	unsigned int minor;
+	/* assigned from request attributes, if present */
+	unsigned int volume;
+#define VOLUME_UNSPECIFIED		(-1U)
+	/* pointer into the request skb,
+	 * limited lifetime! */
+	char *resource_name;
+	struct nlattr *my_addr;
+	struct nlattr *peer_addr;
+
+	/* reply buffer */
+	struct sk_buff *reply_skb;
+	/* pointer into reply buffer */
+	struct drbd_genlmsghdr *reply_dh;
+	/* resolved from attributes, if possible */
+	struct drbd_conf *mdev;
+	struct drbd_tconn *tconn;
+} adm_ctx;
+
+static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
+{
+	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
+	if (genlmsg_reply(skb, info))
+		printk(KERN_ERR "drbd: error sending genl reply\n");
+}
+
+/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
+ * reason it could fail was no space in skb, and there are 4k available. */
+int drbd_msg_put_info(const char *info)
+{
+	struct sk_buff *skb = adm_ctx.reply_skb;
+	struct nlattr *nla;
+	int err = -EMSGSIZE;
+
+	if (!info || !info[0])
+		return 0;
+
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+	if (!nla)
+		return err;
+
+	err = nla_put_string(skb, T_info_text, info);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	} else
+		nla_nest_end(skb, nla);
+	return 0;
+}
+
+/* This would be a good candidate for a "pre_doit" hook,
+ * and per-family private info->pointers.
+ * But we need to stay compatible with older kernels.
+ * If it returns successfully, adm_ctx members are valid.
+ */
+#define DRBD_ADM_NEED_MINOR	1
+#define DRBD_ADM_NEED_RESOURCE	2
+#define DRBD_ADM_NEED_CONNECTION 4
+static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
+		unsigned flags)
+{
+	struct drbd_genlmsghdr *d_in = info->userhdr;
+	const u8 cmd = info->genlhdr->cmd;
+	int err;
+
+	memset(&adm_ctx, 0, sizeof(adm_ctx));
+
+	/* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
+	if (cmd != DRBD_ADM_GET_STATUS
+	&& security_netlink_recv(skb, CAP_SYS_ADMIN))
+	       return -EPERM;
+
+	adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!adm_ctx.reply_skb) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
+					info, &drbd_genl_family, 0, cmd);
+	/* put of a few bytes into a fresh skb of >= 4k will always succeed.
+	 * but anyways */
+	if (!adm_ctx.reply_dh) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	adm_ctx.reply_dh->minor = d_in->minor;
+	adm_ctx.reply_dh->ret_code = NO_ERROR;
+
+	adm_ctx.volume = VOLUME_UNSPECIFIED;
+	if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
+		struct nlattr *nla;
+		/* parse and validate only */
+		err = drbd_cfg_context_from_attrs(NULL, info);
+		if (err)
+			goto fail;
+
+		/* It was present, and valid,
+		 * copy it over to the reply skb. */
+		err = nla_put_nohdr(adm_ctx.reply_skb,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
+				info->attrs[DRBD_NLA_CFG_CONTEXT]);
+		if (err)
+			goto fail;
+
+		/* and assign stuff to the global adm_ctx */
+		nla = nested_attr_tb[__nla_type(T_ctx_volume)];
+		if (nla)
+			adm_ctx.volume = nla_get_u32(nla);
+		nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
+		if (nla)
+			adm_ctx.resource_name = nla_data(nla);
+		adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
+		adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
+		if ((adm_ctx.my_addr &&
+		     nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.tconn->my_addr)) ||
+		    (adm_ctx.peer_addr &&
+		     nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.tconn->peer_addr))) {
+			err = -EINVAL;
+			goto fail;
+		}
+	}
+
+	adm_ctx.minor = d_in->minor;
+	adm_ctx.mdev = minor_to_mdev(d_in->minor);
+	adm_ctx.tconn = conn_get_by_name(adm_ctx.resource_name);
+
+	if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
+		drbd_msg_put_info("unknown minor");
+		return ERR_MINOR_INVALID;
+	}
+	if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_RESOURCE)) {
+		drbd_msg_put_info("unknown resource");
+		return ERR_INVALID_REQUEST;
+	}
+
+	if (flags & DRBD_ADM_NEED_CONNECTION) {
+		if (adm_ctx.tconn && !(flags & DRBD_ADM_NEED_RESOURCE)) {
+			drbd_msg_put_info("no resource name expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.mdev) {
+			drbd_msg_put_info("no minor number expected");
+			return ERR_INVALID_REQUEST;
+		}
+		if (adm_ctx.my_addr && adm_ctx.peer_addr)
+			adm_ctx.tconn = conn_get_by_addrs(nla_data(adm_ctx.my_addr),
+							  nla_len(adm_ctx.my_addr),
+							  nla_data(adm_ctx.peer_addr),
+							  nla_len(adm_ctx.peer_addr));
+		if (!adm_ctx.tconn) {
+			drbd_msg_put_info("unknown connection");
+			return ERR_INVALID_REQUEST;
+		}
+	}
+
+	/* some more paranoia, if the request was over-determined */
+	if (adm_ctx.mdev && adm_ctx.tconn &&
+	    adm_ctx.mdev->tconn != adm_ctx.tconn) {
+		pr_warning("request: minor=%u, resource=%s; but that minor belongs to connection %s\n",
+				adm_ctx.minor, adm_ctx.resource_name,
+				adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("minor exists in different resource");
+		return ERR_INVALID_REQUEST;
+	}
+	if (adm_ctx.mdev &&
+	    adm_ctx.volume != VOLUME_UNSPECIFIED &&
+	    adm_ctx.volume != adm_ctx.mdev->vnr) {
+		pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
+				adm_ctx.minor, adm_ctx.volume,
+				adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
+		drbd_msg_put_info("minor exists as different volume");
+		return ERR_INVALID_REQUEST;
+	}
+
+	return NO_ERROR;
+
+fail:
+	nlmsg_free(adm_ctx.reply_skb);
+	adm_ctx.reply_skb = NULL;
+	return err;
+}
+
+static int drbd_adm_finish(struct genl_info *info, int retcode)
+{
+	if (adm_ctx.tconn) {
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+		adm_ctx.tconn = NULL;
+	}
+
+	if (!adm_ctx.reply_skb)
+		return -ENOMEM;
+
+	adm_ctx.reply_dh->ret_code = retcode;
+	drbd_adm_send_reply(adm_ctx.reply_skb, info);
+	return 0;
+}
+
+static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
+{
+	char *afs;
+
+	/* FIXME: A future version will not allow this case. */
+	if (tconn->my_addr_len == 0 || tconn->peer_addr_len == 0)
+		return;
+
+	switch (((struct sockaddr *)&tconn->peer_addr)->sa_family) {
+	case AF_INET6:
+		afs = "ipv6";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
+			 &((struct sockaddr_in6 *)&tconn->peer_addr)->sin6_addr);
 		break;
-#define NL_BIT(pn, pr, member) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
-		arg->member = *(char *)(tags) ? 1 : 0; \
+	case AF_INET:
+		afs = "ipv4";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
 		break;
-#define NL_STRING(pn, pr, member, len) \
-	case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
-		if (dlen > len) { \
-			dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
-				#member, dlen, (unsigned int)len); \
-			return 0; \
-		} \
-		 arg->member ## _len = dlen; \
-		 memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
-		 break;
-#include "linux/drbd_nl.h"
-
-/* Generate the struct to tag_list functions */
-#define NL_PACKET(name, number, fields) \
-STATIC unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
-	struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
-STATIC unsigned short* \
-name ## _to_tags(struct drbd_conf *mdev, \
-	struct name *arg, unsigned short *tags) \
-{ \
-	fields \
-	return tags; \
-}
-
-#define NL_INTEGER(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INTEGER, tags++);	\
-	put_unaligned(sizeof(int), tags++);		\
-	put_unaligned(arg->member, (int *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(int));
-#define NL_INT64(pn, pr, member) \
-	put_unaligned(pn | pr | TT_INT64, tags++);	\
-	put_unaligned(sizeof(u64), tags++);		\
-	put_unaligned(arg->member, (u64 *)tags);	\
-	tags = (unsigned short *)((char *)tags+sizeof(u64));
-#define NL_BIT(pn, pr, member) \
-	put_unaligned(pn | pr | TT_BIT, tags++);	\
-	put_unaligned(sizeof(char), tags++);		\
-	*(char *)tags = arg->member; \
-	tags = (unsigned short *)((char *)tags+sizeof(char));
-#define NL_STRING(pn, pr, member, len) \
-	put_unaligned(pn | pr | TT_STRING, tags++);	\
-	put_unaligned(arg->member ## _len, tags++);	\
-	memcpy(tags, arg->member, arg->member ## _len); \
-	tags = (unsigned short *)((char *)tags + arg->member ## _len);
-#include "linux/drbd_nl.h"
-
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
-void drbd_nl_send_reply(struct cn_msg *, int);
+	default:
+		afs = "ssocks";
+		snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
+			 &((struct sockaddr_in *)&tconn->peer_addr)->sin_addr);
+	}
+	snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
+}
 
 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 {
 	char *envp[] = { "HOME=/",
 			"TERM=linux",
 			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
-			NULL, /* Will be set to address family */
-			NULL, /* Will be set to address */
+			 (char[20]) { }, /* address family */
+			 (char[60]) { }, /* address */
 			NULL };
-
-	char mb[12], af[20], ad[60], *afs;
+	char mb[12];
 	char *argv[] = {usermode_helper, cmd, mb, NULL };
+	struct sib_info sib;
 	int ret;
 
 	snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
+	setup_khelper_env(mdev->tconn, envp);
 
-	if (get_net_conf(mdev)) {
-		switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
-		case AF_INET6:
-			afs = "ipv6";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
-				 &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
-			break;
-		case AF_INET:
-			afs = "ipv4";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
-			break;
-		default:
-			afs = "ssocks";
-			snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
-				 &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
-		}
-		snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
-		envp[3]=af;
-		envp[4]=ad;
-		put_net_conf(mdev);
-	}
+	/* The helper may take some time.
+	 * write out any unsynced meta data changes now */
+	drbd_md_sync(mdev);
 
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
-
-	drbd_bcast_ev_helper(mdev, cmd);
+	sib.sib_reason = SIB_HELPER_PRE;
+	sib.helper_name = cmd;
+	drbd_bcast_event(mdev, &sib);
 	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
 	if (ret)
 		dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
@@ -184,6 +370,59 @@
 		dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
 				usermode_helper, cmd, mb,
 				(ret >> 8) & 0xff, ret);
+	sib.sib_reason = SIB_HELPER_POST;
+	sib.helper_exit_code = ret;
+	drbd_bcast_event(mdev, &sib);
+
+	if (ret < 0) /* Ignore any ERRNOs we got. */
+		ret = 0;
+
+	return ret;
+}
+
+static void conn_md_sync(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_md_sync(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+}
+
+int conn_khelper(struct drbd_tconn *tconn, char *cmd)
+{
+	char *envp[] = { "HOME=/",
+			"TERM=linux",
+			"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+			 (char[20]) { }, /* address family */
+			 (char[60]) { }, /* address */
+			NULL };
+	char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
+	int ret;
+
+	setup_khelper_env(tconn, envp);
+	conn_md_sync(tconn);
+
+	conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
+	/* TODO: conn_bcast_event() ?? */
+
+	ret = call_usermodehelper(usermode_helper, argv, envp, 1);
+	if (ret)
+		conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+			  usermode_helper, cmd, tconn->name,
+			  (ret >> 8) & 0xff, ret);
+	else
+		conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
+			  usermode_helper, cmd, tconn->name,
+			  (ret >> 8) & 0xff, ret);
+	/* TODO: conn_bcast_event() ?? */
 
 	if (ret < 0) /* Ignore any ERRNOs we got. */
 		ret = 0;
@@ -191,166 +430,217 @@
 	return ret;
 }
 
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
+static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
+{
+	enum drbd_fencing_p fp = FP_NOT_AVAIL;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (get_ldev_if_state(mdev, D_CONSISTENT)) {
+			fp = max_t(enum drbd_fencing_p, fp,
+				   rcu_dereference(mdev->ldev->disk_conf)->fencing);
+			put_ldev(mdev);
+		}
+	}
+	rcu_read_unlock();
+
+	return fp;
+}
+
+bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+	union drbd_state mask = { };
+	union drbd_state val = { };
+	enum drbd_fencing_p fp;
 	char *ex_to_string;
 	int r;
-	enum drbd_disk_state nps;
-	enum drbd_fencing_p fp;
-
-	D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
 
-	if (get_ldev_if_state(mdev, D_CONSISTENT)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	} else {
-		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		return mdev->state.pdsk;
+	if (tconn->cstate >= C_WF_REPORT_PARAMS) {
+		conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		return false;
 	}
 
-	if (fp == FP_STONITH)
-		_drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
+	fp = highest_fencing_policy(tconn);
+	switch (fp) {
+	case FP_NOT_AVAIL:
+		conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
+		goto out;
+	case FP_DONT_CARE:
+		return true;
+	default: ;
+	}
 
-	r = drbd_khelper(mdev, "fence-peer");
+	r = conn_khelper(tconn, "fence-peer");
 
 	switch ((r>>8) & 0xff) {
 	case 3: /* peer is inconsistent */
 		ex_to_string = "peer is inconsistent or worse";
-		nps = D_INCONSISTENT;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_INCONSISTENT;
 		break;
 	case 4: /* peer got outdated, or was already outdated */
 		ex_to_string = "peer was fenced";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	case 5: /* peer was down */
-		if (mdev->state.disk == D_UP_TO_DATE) {
+		if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
 			/* we will(have) create(d) a new UUID anyways... */
 			ex_to_string = "peer is unreachable, assumed to be dead";
-			nps = D_OUTDATED;
+			mask.pdsk = D_MASK;
+			val.pdsk = D_OUTDATED;
 		} else {
 			ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
-			nps = mdev->state.pdsk;
 		}
 		break;
 	case 6: /* Peer is primary, voluntarily outdate myself.
 		 * This is useful when an unconnected R_SECONDARY is asked to
 		 * become R_PRIMARY, but finds the other peer being active. */
 		ex_to_string = "peer is active";
-		dev_warn(DEV, "Peer is primary, outdating myself.\n");
-		nps = D_UNKNOWN;
-		_drbd_request_state(mdev, NS(disk, D_OUTDATED), CS_WAIT_COMPLETE);
+		conn_warn(tconn, "Peer is primary, outdating myself.\n");
+		mask.disk = D_MASK;
+		val.disk = D_OUTDATED;
 		break;
 	case 7:
 		/* THINK: do we need to handle this
 		 * like case 4, or more like case 5? */
 		if (fp != FP_STONITH)
-			dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
+			conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
 		ex_to_string = "peer was stonithed";
-		nps = D_OUTDATED;
+		mask.pdsk = D_MASK;
+		val.pdsk = D_OUTDATED;
 		break;
 	default:
 		/* The script is broken ... */
-		nps = D_UNKNOWN;
-		dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
-		return nps;
+		conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
+		return false; /* Eventually leave IO frozen */
 	}
 
-	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
-			(r>>8) & 0xff, ex_to_string);
-	return nps;
+	conn_info(tconn, "fence-peer helper returned %d (%s)\n",
+		  (r>>8) & 0xff, ex_to_string);
+
+ out:
+
+	/* Not using
+	   conn_request_state(tconn, mask, val, CS_VERBOSE);
+	   here, because we might were able to re-establish the connection in the
+	   meantime. */
+	spin_lock_irq(&tconn->req_lock);
+	if (tconn->cstate < C_WF_REPORT_PARAMS)
+		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return conn_highest_pdsk(tconn) <= D_OUTDATED;
 }
 
+static int _try_outdate_peer_async(void *data)
+{
+	struct drbd_tconn *tconn = (struct drbd_tconn *)data;
+
+	conn_try_outdate_peer(tconn);
+
+	kref_put(&tconn->kref, &conn_destroy);
+	return 0;
+}
+
+void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
+{
+	struct task_struct *opa;
+
+	kref_get(&tconn->kref);
+	opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
+	if (IS_ERR(opa)) {
+		conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
+		kref_put(&tconn->kref, &conn_destroy);
+	}
+}
 
-int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
+enum drbd_state_rv
+drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 {
 	const int max_tries = 4;
-	int r = 0;
+	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
+	struct net_conf *nc;
 	int try = 0;
 	int forced = 0;
 	union drbd_state mask, val;
-	enum drbd_disk_state nps;
 
 	if (new_role == R_PRIMARY)
-		request_ping(mdev); /* Detect a dead peer ASAP */
+		request_ping(mdev->tconn); /* Detect a dead peer ASAP */
 
-	mutex_lock(&mdev->state_mutex);
+	mutex_lock(mdev->state_mutex);
 
 	mask.i = 0; mask.role = R_MASK;
 	val.i  = 0; val.role  = new_role;
 
 	while (try++ < max_tries) {
-		DRBD_STATE_DEBUG_INIT_VAL(val);
-		r = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
+		rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
 
 		/* in case we first succeeded to outdate,
 		 * but now suddenly could establish a connection */
-		if (r == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
+		if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
 			val.pdsk = 0;
 			mask.pdsk = 0;
 			continue;
 		}
 
-		if (r == SS_NO_UP_TO_DATE_DISK && force &&
-		    (mdev->state.disk == D_INCONSISTENT ||
-		     mdev->state.disk == D_OUTDATED)) {
+		if (rv == SS_NO_UP_TO_DATE_DISK && force &&
+		    (mdev->state.disk < D_UP_TO_DATE &&
+		     mdev->state.disk >= D_INCONSISTENT)) {
 			mask.disk = D_MASK;
 			val.disk  = D_UP_TO_DATE;
 			forced = 1;
 			continue;
 		}
 
-		if (r == SS_NO_UP_TO_DATE_DISK &&
+		if (rv == SS_NO_UP_TO_DATE_DISK &&
 		    mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
-			nps = drbd_try_outdate_peer(mdev);
 
-			if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
+			if (conn_try_outdate_peer(mdev->tconn)) {
 				val.disk = D_UP_TO_DATE;
 				mask.disk = D_MASK;
 			}
-
-			val.pdsk = nps;
-			mask.pdsk = D_MASK;
-
 			continue;
 		}
 
-		if (r == SS_NOTHING_TO_DO)
-			goto fail;
-		if (r == SS_PRIMARY_NOP && mask.pdsk == 0) {
-			nps = drbd_try_outdate_peer(mdev);
-
-			if (force && nps > D_OUTDATED) {
+		if (rv == SS_NOTHING_TO_DO)
+			goto out;
+		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
+			if (!conn_try_outdate_peer(mdev->tconn) && force) {
 				dev_warn(DEV, "Forced into split brain situation!\n");
-				nps = D_OUTDATED;
-			}
-
-			mask.pdsk = D_MASK;
-			val.pdsk  = nps;
+				mask.pdsk = D_MASK;
+				val.pdsk  = D_OUTDATED;
 
+			}
 			continue;
 		}
-		if (r == SS_TWO_PRIMARIES) {
+		if (rv == SS_TWO_PRIMARIES) {
 			/* Maybe the peer is detected as dead very soon...
 			   retry at most once more in this case. */
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout((mdev->net_conf->ping_timeo+1)*HZ/10);
+			int timeo;
+			rcu_read_lock();
+			nc = rcu_dereference(mdev->tconn->net_conf);
+			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
+			rcu_read_unlock();
+			schedule_timeout_interruptible(timeo);
 			if (try < max_tries)
 				try = max_tries - 1;
 			continue;
 		}
-		if (r < SS_SUCCESS) {
-			DRBD_STATE_DEBUG_INIT_VAL(val);
-			r = _drbd_request_state(mdev, mask, val,
+		if (rv < SS_SUCCESS) {
+			rv = _drbd_request_state(mdev, mask, val,
 						CS_VERBOSE + CS_WAIT_COMPLETE);
-			if (r < SS_SUCCESS)
-				goto fail;
+			if (rv < SS_SUCCESS)
+				goto out;
 		}
 		break;
 	}
 
-	if (r < SS_SUCCESS)
-		goto fail;
+	if (rv < SS_SUCCESS)
+		goto out;
 
 	if (forced)
 		dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
@@ -359,17 +649,19 @@
 	wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
 
 	if (new_role == R_SECONDARY) {
-		set_disk_ro(mdev->vdisk, TRUE);
+		set_disk_ro(mdev->vdisk, true);
 		if (get_ldev(mdev)) {
 			mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 			put_ldev(mdev);
 		}
 	} else {
-		if (get_net_conf(mdev)) {
-			mdev->net_conf->want_lose = 0;
-			put_net_conf(mdev);
-		}
-		set_disk_ro(mdev->vdisk, FALSE);
+		mutex_lock(&mdev->tconn->conf_update);
+		nc = mdev->tconn->net_conf;
+		if (nc)
+			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+		mutex_unlock(&mdev->tconn->conf_update);
+
+		set_disk_ro(mdev->vdisk, false);
 		if (get_ldev(mdev)) {
 			if (((mdev->state.conn < C_CONNECTED ||
 			       mdev->state.pdsk <= D_FAILED)
@@ -381,49 +673,60 @@
 		}
 	}
 
-	if ((new_role == R_SECONDARY) && get_ldev(mdev)) {
-		drbd_al_to_on_disk_bm(mdev);
-		put_ldev(mdev);
-	}
+	/* writeout of activity log covered areas of the bitmap
+	 * to stable storage done in after state change already */
 
 	if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
 		/* if this was forced, we should consider sync */
 		if (forced)
 			drbd_send_uuids(mdev);
-		drbd_send_state(mdev);
+		drbd_send_current_state(mdev);
 	}
 
 	drbd_md_sync(mdev);
 
 	drbd_kobject_uevent(mdev);
- fail:
-	mutex_unlock(&mdev->state_mutex);
-	return r;
+out:
+	mutex_unlock(mdev->state_mutex);
+	return rv;
 }
 
-
-STATIC int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+static const char *from_attrs_err_to_txt(int err)
 {
-	struct primary primary_args;
-
-	memset(&primary_args, 0, sizeof(struct primary));
-	if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-
-	reply->ret_code =
-		drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer);
-
-	return 0;
+	return	err == -ENOMSG ? "required attribute missing" :
+		err == -EOPNOTSUPP ? "unknown mandatory attribute" :
+		err == -EEXIST ? "can not change invariant setting" :
+		"invalid attribute value";
 }
 
-STATIC int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
+	struct set_role_parms parms;
+	int err;
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
+		err = set_role_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+	}
+
+	if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
+		retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
+	else
+		retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
@@ -433,7 +736,12 @@
 				       struct drbd_backing_dev *bdev)
 {
 	sector_t md_size_sect = 0;
-	switch (bdev->dc.meta_dev_idx) {
+	int meta_dev_idx;
+
+	rcu_read_lock();
+	meta_dev_idx = rcu_dereference(bdev->disk_conf)->meta_dev_idx;
+
+	switch (meta_dev_idx) {
 	default:
 		/* v07 style fixed size indexed meta data */
 		bdev->md.md_size_sect = MD_RESERVED_SECT;
@@ -452,7 +760,7 @@
 	case DRBD_MD_INDEX_FLEX_INT:
 		bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
 		/* al size is still fixed */
-		bdev->md.al_offset = -MD_AL_MAX_SIZE;
+		bdev->md.al_offset = -MD_AL_SECTORS;
 		/* we need (slightly less than) ~ this much bitmap sectors: */
 		md_size_sect = drbd_get_capacity(bdev->backing_bdev);
 		md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
@@ -468,19 +776,22 @@
 		bdev->md.bm_offset   = -md_size_sect + MD_AL_OFFSET;
 		break;
 	}
+	rcu_read_unlock();
 }
 
+/* input size is expected to be in KB */
 char *ppsize(char *buf, unsigned long long size)
 {
-	/* Needs 9 bytes at max. */
+	/* Needs 9 bytes at max including trailing NUL:
+	 * -1ULL ==> "16384 EB" */
 	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 	int base = 0;
-	while (size >= 10000) {
+	while (size >= 10000 && base < sizeof(units)-1) {
 		/* shift + round */
 		size = (size >> 10) + !!(size & (1<<9));
 		base++;
 	}
-	sprintf(buf, "%lu %cB", (long)size, units[base]);
+	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 
 	return buf;
 }
@@ -498,9 +809,17 @@
  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
  *  peer may not initiate a resize.
  */
+/* Note these are not to be confused with
+ * drbd_adm_suspend_io/drbd_adm_resume_io,
+ * which are (sub) state changes triggered by admin (drbdsetup),
+ * and can be long lived.
+ * This changes an mdev->flag, is triggered by drbd internals,
+ * and should be short-lived. */
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
+	if (drbd_suspended(mdev))
+		return;
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
 
@@ -517,10 +836,10 @@
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
+enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
-	sector_t la_size;
+	sector_t la_size, u_size;
 	sector_t size;
 	char ppb[10];
 
@@ -548,12 +867,15 @@
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev, force);
+	rcu_read_lock();
+	u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+	rcu_read_unlock();
+	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
-		err = drbd_bm_resize(mdev, size);
+		err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
 		if (unlikely(err)) {
 			/* currently there is only one error: ENOMEM! */
 			size = drbd_bm_capacity(mdev)>>1;
@@ -582,11 +904,19 @@
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
 	if (la_size_changed || md_moved) {
+		int err;
+
 		drbd_al_shrink(mdev); /* All extents inactive. */
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
-		rv = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */
+		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
+		err = drbd_bitmap_io(mdev, &drbd_bm_write,
+				"size changed", BM_LOCKED_MASK);
+		if (err) {
+			rv = dev_size_error;
+			goto out;
+		}
 		drbd_md_mark_dirty(mdev);
 	}
 
@@ -603,12 +933,12 @@
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
+		  sector_t u_size, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
 	sector_t m_size; /* my size */
-	sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
 	sector_t size = 0;
 
 	m_size = drbd_get_max_capacity(bdev);
@@ -657,24 +987,21 @@
  * failed, and 0 on success. You should call drbd_md_sync() after you called
  * this function.
  */
-STATIC int drbd_check_al_size(struct drbd_conf *mdev)
+STATIC int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
 {
 	struct lru_cache *n, *t;
 	struct lc_element *e;
 	unsigned int in_use;
 	int i;
 
-	ERR_IF(mdev->sync_conf.al_extents < 7)
-		mdev->sync_conf.al_extents = 127;
-
 	if (mdev->act_log &&
-	    mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
+	    mdev->act_log->nr_elements == dc->al_extents)
 		return 0;
 
 	in_use = 0;
 	t = mdev->act_log;
-	n = lc_create("act_log", drbd_al_ext_cache,
-		mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
+	n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
+		dc->al_extents, sizeof(struct lc_element), 0);
 
 	if (n == NULL) {
 		dev_err(DEV, "Cannot allocate act_log lru!\n");
@@ -705,229 +1032,413 @@
 	return 0;
 }
 
-void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __must_hold(local)
+static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
 {
 	struct request_queue * const q = mdev->rq_queue;
-	struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
-	int max_segments = mdev->ldev->dc.max_bio_bvecs;
+	int max_hw_sectors = max_bio_size >> 9;
+	int max_segments = 0;
 
-	if (b->merge_bvec_fn && !mdev->ldev->dc.use_bmbv)
-		max_seg_s = PAGE_SIZE;
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
 
-	max_seg_s = min(queue_max_sectors(b) * queue_logical_block_size(b), max_seg_s);
+		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
+		rcu_read_lock();
+		max_segments = rcu_dereference(mdev->ldev->disk_conf)->max_bio_bvecs;
+		rcu_read_unlock();
+		put_ldev(mdev);
+	}
 
-	blk_queue_max_sectors(q, max_seg_s >> 9);
-	blk_queue_max_phys_segments(q, max_segments ? max_segments : MAX_PHYS_SEGMENTS);
-	blk_queue_max_hw_segments(q, max_segments ? max_segments : MAX_HW_SEGMENTS);
-	blk_queue_max_segment_size(q, max_seg_s);
 	blk_queue_logical_block_size(q, 512);
-	blk_queue_segment_boundary(q, PAGE_SIZE-1);
-	blk_queue_stack_limits(q, b);
-
-	/* KERNEL BUG in old ll_rw_blk.c
-	 * t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
-	 * should be
-	 * t->max_segment_size = min_not_zero(...,...)
-	 * workaround here: */
-	if (queue_max_segment_size(q) == 0)
-		blk_queue_max_segment_size(q, max_seg_s);
-
-	if (b->merge_bvec_fn)
-		dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
-		     b->merge_bvec_fn);
-	dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
-
-	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
-		dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
-		     q->backing_dev_info.ra_pages,
-		     b->backing_dev_info.ra_pages);
-		q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+	blk_queue_max_hw_sectors(q, max_hw_sectors);
+	/* This is the workaround for "bio would need to, but cannot, be split" */
+	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
+	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
+
+		blk_queue_stack_limits(q, b);
+
+		if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
+			dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
+				 q->backing_dev_info.ra_pages,
+				 b->backing_dev_info.ra_pages);
+			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
+		}
+		put_ldev(mdev);
 	}
 }
 
-/* serialize deconfig (worker exiting, doing cleanup)
- * and reconfig (drbdsetup disk, drbdsetup net)
- *
- * wait for a potentially exiting worker, then restart it,
- * or start a new one.
- */
-static void drbd_reconfig_start(struct drbd_conf *mdev)
+void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
 {
-	wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
-	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
-	drbd_thread_start(&mdev->worker);
+	int now, new, local, peer;
+
+	now = queue_max_hw_sectors(mdev->rq_queue) << 9;
+	local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
+	peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
+
+	if (get_ldev_if_state(mdev, D_ATTACHING)) {
+		local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
+		mdev->local_max_bio_size = local;
+		put_ldev(mdev);
+	}
+
+	/* We may ignore peer limits if the peer is modern enough.
+	   Because new from 8.3.8 onwards the peer can use multiple
+	   BIOs for a single peer_request */
+	if (mdev->state.conn >= C_CONNECTED) {
+		if (mdev->tconn->agreed_pro_version < 94)
+			peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
+			/* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
+		else if (mdev->tconn->agreed_pro_version == 94)
+			peer = DRBD_MAX_SIZE_H80_PACKET;
+		else if (mdev->tconn->agreed_pro_version < 100)
+			peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
+		else
+			peer = DRBD_MAX_BIO_SIZE;
+	}
+
+	new = min_t(int, local, peer);
+
+	if (mdev->state.role == R_PRIMARY && new < now)
+		dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
+
+	if (new != now)
+		dev_info(DEV, "max BIO size = %u\n", new);
+
+	drbd_setup_queue_param(mdev, new);
 }
 
-/* if still unconfigured, stops worker again.
- * if configured now, clears CONFIG_PENDING.
- * wakes potential waiters */
-static void drbd_reconfig_done(struct drbd_conf *mdev)
+/* Starts the worker thread */
+static void conn_reconfig_start(struct drbd_tconn *tconn)
 {
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.disk == D_DISKLESS &&
-	    mdev->state.conn == C_STANDALONE &&
-	    mdev->state.role == R_SECONDARY) {
-		set_bit(DEVICE_DYING, &mdev->flags);
-		drbd_thread_stop_nowait(&mdev->worker);
-	} else
-		clear_bit(CONFIG_PENDING, &mdev->flags);
-	spin_unlock_irq(&mdev->req_lock);
-	wake_up(&mdev->state_wait);
+	drbd_thread_start(&tconn->worker);
+	conn_flush_workqueue(tconn);
 }
 
-/* does always return 0;
- * interesting return code is in reply->ret_code */
-STATIC int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+/* if still unconfigured, stops worker again. */
+static void conn_reconfig_done(struct drbd_tconn *tconn)
 {
-	enum drbd_ret_codes retcode;
-	enum determine_dev_size dd;
-	sector_t max_possible_sectors;
-	sector_t min_md_device_sectors;
-	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
-	struct inode *inode, *inode2;
-	struct lru_cache *resync_lru = NULL;
-	union drbd_state ns, os;
-	int rv;
-	int cp_discovered = 0;
-	int logical_block_size;
+	bool stop_threads;
+	spin_lock_irq(&tconn->req_lock);
+	stop_threads = conn_all_vols_unconf(tconn) &&
+		tconn->cstate == C_STANDALONE;
+	spin_unlock_irq(&tconn->req_lock);
+	if (stop_threads) {
+		/* asender is implicitly stopped by receiver
+		 * in conn_disconnect() */
+		drbd_thread_stop(&tconn->receiver);
+		drbd_thread_stop(&tconn->worker);
+	}
+}
 
-	drbd_reconfig_start(mdev);
+/* Make sure IO is suspended before calling this function(). */
+static void drbd_suspend_al(struct drbd_conf *mdev)
+{
+	int s = 0;
 
-	/* if you want to reconfigure, please tear down first */
-	if (mdev->state.disk > D_DISKLESS) {
-		retcode = ERR_DISK_CONFIGURED;
-		goto fail;
+	if (!lc_try_lock(mdev->act_log)) {
+		dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
+		return;
 	}
 
-	/* allocation not in the IO path, cqueue thread context */
-	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
-	if (!nbc) {
-		retcode = ERR_NOMEM;
-		goto fail;
-	}
+	drbd_al_shrink(mdev);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (mdev->state.conn < C_CONNECTED)
+		s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	lc_unlock(mdev->act_log);
 
-	nbc->dc.disk_size     = DRBD_DISK_SIZE_SECT_DEF;
-	nbc->dc.on_io_error   = DRBD_ON_IO_ERROR_DEF;
-	nbc->dc.fencing       = DRBD_FENCING_DEF;
-	nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
+	if (s)
+		dev_info(DEV, "Suspended AL updates\n");
+}
 
-	if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
-		retcode = ERR_MANDATORY_TAG;
-		goto fail;
-	}
 
-	if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
-		retcode = ERR_MD_IDX_INVALID;
-		goto fail;
-	}
+static bool should_set_defaults(struct genl_info *info)
+{
+	unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
+	return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
+}
 
-	nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
-	if (IS_ERR(nbc->lo_file)) {
-		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
-		    PTR_ERR(nbc->lo_file));
-		nbc->lo_file = NULL;
-		retcode = ERR_OPEN_DISK;
-		goto fail;
-	}
+static void enforce_disk_conf_limits(struct disk_conf *dc)
+{
+	if (dc->al_extents < DRBD_AL_EXTENTS_MIN)
+		dc->al_extents = DRBD_AL_EXTENTS_MIN;
+	if (dc->al_extents > DRBD_AL_EXTENTS_MAX)
+		dc->al_extents = DRBD_AL_EXTENTS_MAX;
 
-	inode = nbc->lo_file->f_dentry->d_inode;
+	if (dc->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
+		dc->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
+}
 
-	if (!S_ISBLK(inode->i_mode)) {
-		retcode = ERR_DISK_NOT_BDEV;
-		goto fail;
+int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_conf *mdev;
+	struct disk_conf *new_disk_conf, *old_disk_conf;
+	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
+	int err, fifo_size;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* we also need a disk
+	 * to change the options on */
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto out;
 	}
 
-	nbc->md_file = filp_open(nbc->dc.meta_dev, O_RDWR, 0);
-	if (IS_ERR(nbc->md_file)) {
-		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
-		    PTR_ERR(nbc->md_file));
-		nbc->md_file = NULL;
-		retcode = ERR_OPEN_MD_DISK;
+	new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
+	if (!new_disk_conf) {
+		retcode = ERR_NOMEM;
 		goto fail;
 	}
 
-	inode2 = nbc->md_file->f_dentry->d_inode;
+	mutex_lock(&mdev->tconn->conf_update);
+	old_disk_conf = mdev->ldev->disk_conf;
+	*new_disk_conf = *old_disk_conf;
+	if (should_set_defaults(info))
+		set_disk_conf_defaults(new_disk_conf);
 
-	if (!S_ISBLK(inode2->i_mode)) {
-		retcode = ERR_MD_NOT_BDEV;
-		goto fail;
+	err = disk_conf_from_attrs_for_change(new_disk_conf, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 	}
 
-	nbc->backing_bdev = inode->i_bdev;
-	if (bd_claim(nbc->backing_bdev, mdev)) {
-		printk(KERN_ERR "drbd: bd_claim(%p,%p); failed [%p;%p;%u]\n",
-		       nbc->backing_bdev, mdev,
-		       nbc->backing_bdev->bd_holder,
-		       nbc->backing_bdev->bd_contains->bd_holder,
-		       nbc->backing_bdev->bd_holders);
-		retcode = ERR_BDCLAIM_DISK;
-		goto fail;
-	}
+	if (!expect(new_disk_conf->resync_rate >= 1))
+		new_disk_conf->resync_rate = 1;
 
-	resync_lru = lc_create("resync", drbd_bm_ext_cache,
-			61, sizeof(struct bm_extent),
-			offsetof(struct bm_extent, lce));
-	if (!resync_lru) {
+	enforce_disk_conf_limits(new_disk_conf);
+
+	fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s->size) {
+		new_plan = fifo_alloc(fifo_size);
+		if (!new_plan) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail_unlock;
+		}
+	}
+
+	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
+	drbd_al_shrink(mdev);
+	err = drbd_check_al_size(mdev, new_disk_conf);
+	lc_unlock(mdev->act_log);
+	wake_up(&mdev->al_wait);
+
+	if (err) {
+		retcode = ERR_NOMEM;
+		goto fail_unlock;
+	}
+
+	write_lock_irq(&global_state_lock);
+	retcode = drbd_resync_after_valid(mdev, new_disk_conf->resync_after);
+	if (retcode == NO_ERROR) {
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		drbd_resync_after_changed(mdev);
+	}
+	write_unlock_irq(&global_state_lock);
+
+	if (retcode != NO_ERROR)
+		goto fail_unlock;
+
+	if (new_plan) {
+		old_plan = mdev->rs_plan_s;
+		rcu_assign_pointer(mdev->rs_plan_s, new_plan);
+	}
+
+	mutex_unlock(&mdev->tconn->conf_update);
+	drbd_md_sync(mdev);
+
+	if (mdev->state.conn >= C_CONNECTED)
+		drbd_send_sync_param(mdev);
+
+	synchronize_rcu();
+	kfree(old_disk_conf);
+	kfree(old_plan);
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+	goto success;
+
+fail_unlock:
+	mutex_unlock(&mdev->tconn->conf_update);
+ fail:
+	kfree(new_disk_conf);
+	kfree(new_plan);
+success:
+	put_ldev(mdev);
+ out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
+{
+	struct drbd_conf *mdev;
+	int err;
+	enum drbd_ret_code retcode;
+	enum determine_dev_size dd;
+	sector_t max_possible_sectors;
+	sector_t min_md_device_sectors;
+	struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
+	struct disk_conf *new_disk_conf = NULL;
+	struct block_device *bdev;
+	struct lru_cache *resync_lru = NULL;
+	struct fifo_buffer *new_plan = NULL;
+	union drbd_state ns, os;
+	enum drbd_state_rv rv;
+	struct net_conf *nc;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto finish;
+
+	mdev = adm_ctx.mdev;
+	conn_reconfig_start(mdev->tconn);
+
+	/* if you want to reconfigure, please tear down first */
+	if (mdev->state.disk > D_DISKLESS) {
+		retcode = ERR_DISK_CONFIGURED;
+		goto fail;
+	}
+	/* It may just now have detached because of IO error.  Make sure
+	 * drbd_ldev_destroy is done already, we may end up here very fast,
+	 * e.g. if someone calls attach from the on-io-error handler,
+	 * to realize a "hot spare" feature (not that I'd recommend that) */
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
+
+	/* allocation not in the IO path, drbdsetup context */
+	nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
+	if (!nbc) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+	new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+	if (!new_disk_conf) {
+		retcode = ERR_NOMEM;
+		goto fail;
+	}
+	nbc->disk_conf = new_disk_conf;
+
+	set_disk_conf_defaults(new_disk_conf);
+	err = disk_conf_from_attrs(new_disk_conf, info);
+	if (err) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	enforce_disk_conf_limits(new_disk_conf);
+
+	new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
+	if (!new_plan) {
 		retcode = ERR_NOMEM;
-		goto release_bdev_fail;
+		goto fail;
+	}
+
+	if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
+		retcode = ERR_MD_IDX_INVALID;
+		goto fail;
+	}
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc) {
+		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
+			rcu_read_unlock();
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
+	rcu_read_unlock();
+
+	bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
+				  FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
+	if (IS_ERR(bdev)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
+			PTR_ERR(bdev));
+		retcode = ERR_OPEN_DISK;
+		goto fail;
 	}
+	nbc->backing_bdev = bdev;
 
-	/* meta_dev_idx >= 0: external fixed size,
-	 * possibly multiple drbd sharing one meta device.
-	 * TODO in that case, paranoia check that [md_bdev, meta_dev_idx] is
-	 * not yet used by some other drbd minor!
-	 * (if you use drbd.conf + drbdadm,
-	 * that should check it for you already; but if you don't, or someone
-	 * fooled it, we need to double check here) */
-	nbc->md_bdev = inode2->i_bdev;
-	if (bd_claim(nbc->md_bdev, (nbc->dc.meta_dev_idx < 0) ? (void *)mdev
-				: (void *) drbd_m_holder)) {
-		retcode = ERR_BDCLAIM_MD_DISK;
-		goto release_bdev_fail;
+	/*
+	 * meta_dev_idx >= 0: external fixed size, possibly multiple
+	 * drbd sharing one meta device.  TODO in that case, paranoia
+	 * check that [md_bdev, meta_dev_idx] is not yet used by some
+	 * other drbd minor!  (if you use drbd.conf + drbdadm, that
+	 * should check it for you already; but if you don't, or
+	 * someone fooled it, we need to double check here)
+	 */
+	bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
+				  FMODE_READ | FMODE_WRITE | FMODE_EXCL,
+				  (new_disk_conf->meta_dev_idx < 0) ?
+				  (void *)mdev : (void *)drbd_m_holder);
+	if (IS_ERR(bdev)) {
+		dev_err(DEV, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
+			PTR_ERR(bdev));
+		retcode = ERR_OPEN_MD_DISK;
+		goto fail;
 	}
+	nbc->md_bdev = bdev;
 
 	if ((nbc->backing_bdev == nbc->md_bdev) !=
-	    (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
-	     nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
+	    (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
+	     new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
 		retcode = ERR_MD_IDX_INVALID;
-		goto release_bdev2_fail;
+		goto fail;
+	}
+
+	resync_lru = lc_create("resync", drbd_bm_ext_cache,
+			1, 61, sizeof(struct bm_extent),
+			offsetof(struct bm_extent, lce));
+	if (!resync_lru) {
+		retcode = ERR_NOMEM;
+		goto fail;
 	}
 
 	/* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
 	drbd_md_set_sector_offsets(mdev, nbc);
 
-	if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
+	if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
 		dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
 			(unsigned long long) drbd_get_max_capacity(nbc),
-			(unsigned long long) nbc->dc.disk_size);
-		retcode = ERR_DISK_TO_SMALL;
-		goto release_bdev2_fail;
+			(unsigned long long) new_disk_conf->disk_size);
+		retcode = ERR_DISK_TOO_SMALL;
+		goto fail;
 	}
 
-	if (nbc->dc.meta_dev_idx < 0) {
+	if (new_disk_conf->meta_dev_idx < 0) {
 		max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
 		/* at least one MB, otherwise it does not make sense */
 		min_md_device_sectors = (2<<10);
 	} else {
 		max_possible_sectors = DRBD_MAX_SECTORS;
-		min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
+		min_md_device_sectors = MD_RESERVED_SECT * (new_disk_conf->meta_dev_idx + 1);
 	}
 
 	if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
-		retcode = ERR_MD_DISK_TO_SMALL;
+		retcode = ERR_MD_DISK_TOO_SMALL;
 		dev_warn(DEV, "refusing attach: md-device too small, "
 		     "at least %llu sectors needed for this meta-disk type\n",
 		     (unsigned long long) min_md_device_sectors);
-		goto release_bdev2_fail;
+		goto fail;
 	}
 
 	/* Make sure the new disk is big enough
 	 * (we may currently be R_PRIMARY with no local disk...) */
 	if (drbd_get_max_capacity(nbc) <
 	    drbd_get_capacity(mdev->this_bdev)) {
-		retcode = ERR_DISK_TO_SMALL;
-		goto release_bdev2_fail;
+		retcode = ERR_DISK_TOO_SMALL;
+		goto fail;
 	}
 
 	nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
@@ -936,21 +1447,22 @@
 		dev_warn(DEV, "==> truncating very big lower level device "
 			"to currently maximum possible %llu sectors <==\n",
 			(unsigned long long) max_possible_sectors);
-		if (nbc->dc.meta_dev_idx >= 0)
+		if (new_disk_conf->meta_dev_idx >= 0)
 			dev_warn(DEV, "==>> using internal or flexible "
 				      "meta data may help <<==\n");
 	}
 
 	drbd_suspend_io(mdev);
 	/* also wait for the last barrier ack. */
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev));
 	/* and for any other previously queued work */
 	drbd_flush_workqueue(mdev);
 
-	retcode = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
+	retcode = rv;  /* FIXME: Type mismatch. */
 	drbd_resume_io(mdev);
-	if (retcode < SS_SUCCESS)
-		goto release_bdev2_fail;
+	if (rv < SS_SUCCESS)
+		goto fail;
 
 	if (!get_ldev_if_state(mdev, D_ATTACHING))
 		goto force_diskless;
@@ -978,49 +1490,25 @@
 	}
 
 	/* Since we are diskless, fix the activity log first... */
-	if (drbd_check_al_size(mdev)) {
+	if (drbd_check_al_size(mdev, new_disk_conf)) {
 		retcode = ERR_NOMEM;
 		goto force_diskless_dec;
 	}
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
-		retcode = ERR_DISK_TO_SMALL;
-		goto force_diskless_dec;
-	}
-
-	if (!drbd_al_read_log(mdev, nbc)) {
-		retcode = ERR_IO_MD_DISK;
+		retcode = ERR_DISK_TOO_SMALL;
 		goto force_diskless_dec;
 	}
 
-	/* allocate a second IO page if logical_block_size != 512 */
-	logical_block_size = bdev_logical_block_size(nbc->md_bdev);
-	if (logical_block_size == 0)
-		logical_block_size = MD_SECTOR_SIZE;
-
-	if (logical_block_size != MD_SECTOR_SIZE) {
-		if (!mdev->md_io_tmpp) {
-			struct page *page = alloc_page(GFP_NOIO);
-			if (!page)
-				goto force_diskless_dec;
-
-			dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
-			     logical_block_size, MD_SECTOR_SIZE);
-			dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
-			mdev->md_io_tmpp = page;
-		}
-	}
-
 	/* Reset the "barriers don't work" bits here, then force meta data to
 	 * be written, to ensure we determine if barriers are supported. */
-	if (nbc->dc.no_md_flush)
-		set_bit(MD_NO_BARRIER, &mdev->flags);
-	else
+	if (new_disk_conf->md_flushes)
 		clear_bit(MD_NO_BARRIER, &mdev->flags);
+	else
+		set_bit(MD_NO_BARRIER, &mdev->flags);
 
 	/* Point of no return reached.
 	 * Devices and memory are no longer released by error cleanup below.
@@ -1029,28 +1517,29 @@
 	D_ASSERT(mdev->ldev == NULL);
 	mdev->ldev = nbc;
 	mdev->resync = resync_lru;
+	mdev->rs_plan_s = new_plan;
 	nbc = NULL;
 	resync_lru = NULL;
+	new_disk_conf = NULL;
+	new_plan = NULL;
 
-	mdev->write_ordering = WO_bio_barrier;
-	drbd_bump_write_ordering(mdev, WO_bio_barrier);
+	drbd_bump_write_ordering(mdev->tconn, WO_bio_barrier);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
 	else
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
-	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
+	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+	    !(mdev->state.role == R_PRIMARY && mdev->tconn->susp_nod))
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
-		cp_discovered = 1;
-	}
 
 	mdev->send_cnt = 0;
 	mdev->recv_cnt = 0;
 	mdev->read_cnt = 0;
 	mdev->writ_cnt = 0;
 
-	drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
+	drbd_reconsider_max_bio_size(mdev);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -1072,7 +1561,7 @@
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev, 0);
+	dd = drbd_determine_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1082,25 +1571,25 @@
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
 		dev_info(DEV, "Assuming that all blocks are out of sync "
 		     "(aka FullSync)\n");
-		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) {
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
+			"set_n_write from attaching", BM_LOCKED_MASK)) {
 			retcode = ERR_IO_MD_DISK;
 			goto force_diskless_dec;
 		}
 	} else {
-		if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) {
+		if (drbd_bitmap_io(mdev, &drbd_bm_read,
+			"read from attaching", BM_LOCKED_MASK)) {
 			retcode = ERR_IO_MD_DISK;
 			goto force_diskless_dec;
 		}
 	}
 
-	if (cp_discovered) {
-		drbd_al_apply_to_bm(mdev);
-		drbd_al_to_on_disk_bm(mdev);
-	}
+	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
+		drbd_suspend_al(mdev); /* IO is still suspended here... */
 
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
-	ns.i = os.i;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	os = drbd_read_state(mdev);
+	ns = os;
 	/* If MDF_CONSISTENT is not set go into inconsistent state,
 	   otherwise investigate MDF_WasUpToDate...
 	   If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
@@ -1118,9 +1607,11 @@
 	if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
 		ns.pdsk = D_OUTDATED;
 
-	if ( ns.disk == D_CONSISTENT &&
-	    (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
+	rcu_read_lock();
+	if (ns.disk == D_CONSISTENT &&
+	    (ns.pdsk == D_OUTDATED || rcu_dereference(mdev->ldev->disk_conf)->fencing == FP_DONT_CARE))
 		ns.disk = D_UP_TO_DATE;
+	rcu_read_unlock();
 
 	/* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
 	   MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
@@ -1133,16 +1624,22 @@
 		mdev->new_state_tmp.i = ns.i;
 		ns.i = os.i;
 		ns.disk = D_NEGOTIATING;
+
+		/* We expect to receive up-to-date UUIDs soon.
+		   To avoid a race in receive_state, free p_uuid while
+		   holding req_lock. I.e. atomic with the state change */
+		kfree(mdev->p_uuid);
+		mdev->p_uuid = NULL;
 	}
 
-	DRBD_STATE_DEBUG_INIT_VAL(ns);
 	rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	ns = mdev->state;
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (rv < SS_SUCCESS)
 		goto force_diskless_dec;
 
+	mod_timer(&mdev->request_timer, jiffies + HZ);
+
 	if (mdev->state.role == R_PRIMARY)
 		mdev->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
 	else
@@ -1153,8 +1650,8 @@
 
 	drbd_kobject_uevent(mdev);
 	put_ldev(mdev);
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+	conn_reconfig_done(mdev->tconn);
+	drbd_adm_finish(info, retcode);
 	return 0;
 
  force_diskless_dec:
@@ -1162,579 +1659,743 @@
  force_diskless:
 	drbd_force_state(mdev, NS(disk, D_DISKLESS));
 	drbd_md_sync(mdev);
- release_bdev2_fail:
-	if (nbc)
-		bd_release(nbc->md_bdev);
- release_bdev_fail:
-	if (nbc)
-		bd_release(nbc->backing_bdev);
  fail:
+	conn_reconfig_done(mdev->tconn);
 	if (nbc) {
-		if (nbc->lo_file)
-			fput(nbc->lo_file);
-		if (nbc->md_file)
-			fput(nbc->md_file);
+		if (nbc->backing_bdev)
+			blkdev_put(nbc->backing_bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+		if (nbc->md_bdev)
+			blkdev_put(nbc->md_bdev,
+				   FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 		kfree(nbc);
 	}
+	kfree(new_disk_conf);
 	lc_destroy(resync_lru);
+	kfree(new_plan);
 
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
+ finish:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
+static int adm_detach(struct drbd_conf *mdev, int force)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_DISKLESS));
-	return 0;
+	enum drbd_state_rv retcode;
+	int ret;
+
+	if (force) {
+		drbd_force_state(mdev, NS(disk, D_FAILED));
+		retcode = SS_SUCCESS;
+		goto out;
+	}
+
+	drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
+	drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */
+	retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
+	drbd_md_put_buffer(mdev);
+	/* D_FAILED will transition to DISKLESS. */
+	ret = wait_event_interruptible(mdev->misc_wait,
+			mdev->state.disk != D_FAILED);
+	drbd_resume_io(mdev);
+	if (retcode == SS_IS_DISKLESS)
+		retcode = SS_NOTHING_TO_DO;
+	if (ret)
+		retcode = ERR_INTR;
+out:
+	return retcode;
 }
 
-STATIC int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			    struct drbd_nl_cfg_reply *reply)
+/* Detaching the disk is a process in multiple stages.  First we need to lock
+ * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
+ * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
+ * internal references as well.
+ * Only then we have finally detached. */
+int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
 {
-	int i, ns;
-	enum drbd_ret_codes retcode;
-	struct net_conf *new_conf = NULL;
-	struct crypto_hash *tfm = NULL;
-	struct crypto_hash *integrity_w_tfm = NULL;
-	struct crypto_hash *integrity_r_tfm = NULL;
-	struct hlist_head *new_tl_hash = NULL;
-	struct hlist_head *new_ee_hash = NULL;
-	struct drbd_conf *odev;
-	char hmac_name[CRYPTO_MAX_ALG_NAME];
-	void *int_dig_out = NULL;
-	void *int_dig_in = NULL;
-	void *int_dig_vv = NULL;
-	struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
+	enum drbd_ret_code retcode;
+	struct detach_parms parms = { };
+	int err;
 
-	drbd_reconfig_start(mdev);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (mdev->state.conn > C_STANDALONE) {
-		retcode = ERR_NET_CONFIGURED;
-		goto fail;
+	if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
+		err = detach_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
 	}
 
-	/* allocation not in the IO path, cqueue thread context */
-	new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
-	if (!new_conf) {
-		retcode = ERR_NOMEM;
-		goto fail;
-	}
+	retcode = adm_detach(adm_ctx.mdev, parms.force_detach);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
 
-	memset(new_conf, 0, sizeof(struct net_conf));
-	new_conf->timeout	   = DRBD_TIMEOUT_DEF;
-	new_conf->try_connect_int  = DRBD_CONNECT_INT_DEF;
-	new_conf->ping_int	   = DRBD_PING_INT_DEF;
-	new_conf->max_epoch_size   = DRBD_MAX_EPOCH_SIZE_DEF;
-	new_conf->max_buffers	   = DRBD_MAX_BUFFERS_DEF;
-	new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
-	new_conf->sndbuf_size	   = DRBD_SNDBUF_SIZE_DEF;
-	new_conf->rcvbuf_size	   = DRBD_RCVBUF_SIZE_DEF;
-	new_conf->ko_count	   = DRBD_KO_COUNT_DEF;
-	new_conf->after_sb_0p	   = DRBD_AFTER_SB_0P_DEF;
-	new_conf->after_sb_1p	   = DRBD_AFTER_SB_1P_DEF;
-	new_conf->after_sb_2p	   = DRBD_AFTER_SB_2P_DEF;
-	new_conf->want_lose	   = 0;
-	new_conf->two_primaries    = 0;
-	new_conf->wire_protocol    = DRBD_PROT_C;
-	new_conf->ping_timeo	   = DRBD_PING_TIMEO_DEF;
-	new_conf->rr_conflict	   = DRBD_RR_CONFLICT_DEF;
+static bool conn_resync_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	bool rv = false;
+	int vnr;
 
-	if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
-		retcode = ERR_MANDATORY_TAG;
-		goto fail;
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_SYNC_SOURCE ||
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_PAUSED_SYNC_S ||
+		    mdev->state.conn == C_PAUSED_SYNC_T) {
+			rv = true;
+			break;
+		}
 	}
+	rcu_read_unlock();
 
-	if (new_conf->two_primaries
-	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
-		retcode = ERR_NOT_PROTO_C;
-		goto fail;
-	};
-
-	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
-		retcode = ERR_DISCARD;
-		goto fail;
-	}
+	return rv;
+}
 
-	retcode = NO_ERROR;
+static bool conn_ov_running(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	bool rv = false;
+	int vnr;
 
-	new_my_addr = (struct sockaddr *)&new_conf->my_addr;
-	new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev || odev == mdev)
-			continue;
-		if (get_net_conf(odev)) {
-			taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
-			if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
-			    !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
-				retcode = ERR_LOCAL_ADDR;
-
-			taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
-			if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
-			    !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
-				retcode = ERR_PEER_ADDR;
-
-			put_net_conf(odev);
-			if (retcode != NO_ERROR)
-				goto fail;
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T) {
+			rv = true;
+			break;
 		}
 	}
+	rcu_read_unlock();
 
-	if (new_conf->cram_hmac_alg[0] != 0) {
-		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
-			new_conf->cram_hmac_alg);
-		tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(tfm)) {
-			tfm = NULL;
-			retcode = ERR_AUTH_ALG;
-			goto fail;
-		}
+	return rv;
+}
 
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
-			retcode = ERR_AUTH_ALG_ND;
-			goto fail;
-		}
-	}
+static enum drbd_ret_code
+_check_net_options(struct drbd_tconn *tconn, struct net_conf *old_conf, struct net_conf *new_conf)
+{
+	struct drbd_conf *mdev;
+	int i;
 
-	if (new_conf->integrity_alg[0]) {
-		integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(integrity_w_tfm)) {
-			integrity_w_tfm = NULL;
-			retcode=ERR_INTEGRITY_ALG;
-			goto fail;
-		}
+	if (old_conf && tconn->cstate == C_WF_REPORT_PARAMS && tconn->agreed_pro_version < 100) {
+		if (new_conf->wire_protocol != old_conf->wire_protocol)
+			return ERR_NEED_APV_100;
 
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
-			retcode=ERR_INTEGRITY_ALG_ND;
-			goto fail;
-		}
+		if (new_conf->two_primaries != old_conf->two_primaries)
+			return ERR_NEED_APV_100;
 
-		integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(integrity_r_tfm)) {
-			integrity_r_tfm = NULL;
-			retcode=ERR_INTEGRITY_ALG;
-			goto fail;
-		}
-	}
+		if (!new_conf->integrity_alg != !old_conf->integrity_alg)
+			return ERR_NEED_APV_100;
 
-	ns = new_conf->max_epoch_size/8;
-	if (mdev->tl_hash_s != ns) {
-		new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
-		if (!new_tl_hash) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
+		if (strcmp(new_conf->integrity_alg, old_conf->integrity_alg))
+			return ERR_NEED_APV_100;
 	}
 
-	ns = new_conf->max_buffers/8;
-	if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
-		new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
-		if (!new_ee_hash) {
-			retcode = ERR_NOMEM;
-			goto fail;
+	if (!new_conf->two_primaries &&
+	    conn_highest_role(tconn) == R_PRIMARY &&
+	    conn_highest_peer(tconn) == R_PRIMARY)
+		return ERR_NEED_ALLOW_TWO_PRI;
+
+	if (new_conf->two_primaries &&
+	    (new_conf->wire_protocol != DRBD_PROT_C))
+		return ERR_NOT_PROTO_C;
+
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		if (get_ldev(mdev)) {
+			enum drbd_fencing_p fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+			put_ldev(mdev);
+			if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
+				return ERR_STONITH_AND_PROT_A;
 		}
+		if (mdev->state.role == R_PRIMARY && new_conf->discard_my_data)
+			return ERR_DISCARD;
 	}
 
-	((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
+	if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A)
+		return ERR_CONG_NOT_PROTO_A;
 
-#if 0
-	/* for the connection loss logic in drbd_recv
-	 * I _need_ the resulting timeo in jiffies to be
-	 * non-zero and different
-	 *
-	 * XXX maybe rather store the value scaled to jiffies?
-	 * Note: MAX_SCHEDULE_TIMEOUT/HZ*HZ != MAX_SCHEDULE_TIMEOUT
-	 *	 and HZ > 10; which is unlikely to change...
-	 *	 Thus, if interrupted by a signal,
-	 *	 sock_{send,recv}msg returns -EINTR,
-	 *	 if the timeout expires, -EAGAIN.
-	 */
-	/* unlikely: someone disabled the timeouts ...
-	 * just put some huge values in there. */
-	if (!new_conf->ping_int)
-		new_conf->ping_int = MAX_SCHEDULE_TIMEOUT/HZ;
-	if (!new_conf->timeout)
-		new_conf->timeout = MAX_SCHEDULE_TIMEOUT/HZ*10;
-	if (new_conf->ping_int*10 < new_conf->timeout)
-		new_conf->timeout = new_conf->ping_int*10/6;
-	if (new_conf->ping_int*10 == new_conf->timeout)
-		new_conf->ping_int = new_conf->ping_int+1;
-#endif
+	return NO_ERROR;
+}
 
-	/* allocation not in the IO path, cqueue thread context */
-	if (integrity_w_tfm) {
-		i = crypto_hash_digestsize(integrity_w_tfm);
-		int_dig_out = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_out) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-		int_dig_in = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_in) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-		int_dig_vv = kmalloc(i, GFP_KERNEL);
-		if (!int_dig_vv) {
-			retcode = ERR_NOMEM;
-			goto fail;
-		}
-	}
+static enum drbd_ret_code
+check_net_options(struct drbd_tconn *tconn, struct net_conf *new_conf)
+{
+	static enum drbd_ret_code rv;
+	struct drbd_conf *mdev;
+	int i;
 
-	if (!mdev->bitmap) {
-		if(drbd_bm_init(mdev)) {
-			retcode = ERR_NOMEM;
-			goto fail;
+	rcu_read_lock();
+	rv = _check_net_options(tconn, rcu_dereference(tconn->net_conf), new_conf);
+	rcu_read_unlock();
+
+	/* tconn->volumes protected by genl_lock() here */
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		if (!mdev->bitmap) {
+			if(drbd_bm_init(mdev))
+				return ERR_NOMEM;
 		}
 	}
 
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->net_conf != NULL) {
-		retcode = ERR_NET_CONFIGURED;
-		spin_unlock_irq(&mdev->req_lock);
-		goto fail;
-	}
-	mdev->net_conf = new_conf;
+	return rv;
+}
 
-	mdev->send_cnt = 0;
-	mdev->recv_cnt = 0;
+struct crypto {
+	struct crypto_hash *verify_tfm;
+	struct crypto_hash *csums_tfm;
+	struct crypto_hash *cram_hmac_tfm;
+	struct crypto_hash *integrity_tfm;
+};
 
-	if (new_tl_hash) {
-		kfree(mdev->tl_hash);
-		mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
-		mdev->tl_hash = new_tl_hash;
-	}
+static int
+alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
+{
+	if (!tfm_name[0])
+		return NO_ERROR;
 
-	if (new_ee_hash) {
-		kfree(mdev->ee_hash);
-		mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
-		mdev->ee_hash = new_ee_hash;
+	*tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(*tfm)) {
+		*tfm = NULL;
+		return err_alg;
 	}
 
-	crypto_free_hash(mdev->cram_hmac_tfm);
-	mdev->cram_hmac_tfm = tfm;
-
-	crypto_free_hash(mdev->integrity_w_tfm);
-	mdev->integrity_w_tfm = integrity_w_tfm;
-
-	crypto_free_hash(mdev->integrity_r_tfm);
-	mdev->integrity_r_tfm = integrity_r_tfm;
+	return NO_ERROR;
+}
 
-	kfree(mdev->int_dig_out);
-	kfree(mdev->int_dig_in);
-	kfree(mdev->int_dig_vv);
-	mdev->int_dig_out=int_dig_out;
-	mdev->int_dig_in=int_dig_in;
-	mdev->int_dig_vv=int_dig_vv;
-	spin_unlock_irq(&mdev->req_lock);
+static enum drbd_ret_code
+alloc_crypto(struct crypto *crypto, struct net_conf *new_conf)
+{
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+	enum drbd_ret_code rv;
 
-	retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
+	rv = alloc_hash(&crypto->csums_tfm, new_conf->csums_alg,
+		       ERR_CSUMS_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	rv = alloc_hash(&crypto->verify_tfm, new_conf->verify_alg,
+		       ERR_VERIFY_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	rv = alloc_hash(&crypto->integrity_tfm, new_conf->integrity_alg,
+		       ERR_INTEGRITY_ALG);
+	if (rv != NO_ERROR)
+		return rv;
+	if (new_conf->cram_hmac_alg[0] != 0) {
+		snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+			 new_conf->cram_hmac_alg);
 
-	drbd_kobject_uevent(mdev);
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
-	return 0;
+		rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
+			       ERR_AUTH_ALG);
+	}
 
-fail:
-	kfree(int_dig_out);
-	kfree(int_dig_in);
-	kfree(int_dig_vv);
-	crypto_free_hash(tfm);
-	crypto_free_hash(integrity_w_tfm);
-	crypto_free_hash(integrity_r_tfm);
-	kfree(new_tl_hash);
-	kfree(new_ee_hash);
-	kfree(new_conf);
+	return rv;
+}
 
-	reply->ret_code = retcode;
-	drbd_reconfig_done(mdev);
-	return 0;
+static void free_crypto(struct crypto *crypto)
+{
+	crypto_free_hash(crypto->cram_hmac_tfm);
+	crypto_free_hash(crypto->integrity_tfm);
+	crypto_free_hash(crypto->csums_tfm);
+	crypto_free_hash(crypto->verify_tfm);
 }
 
-STATIC int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct net_conf *old_conf, *new_conf = NULL;
+	int err;
+	int ovr; /* online verify running */
+	int rsr; /* re-sync running */
+	struct crypto crypto = { };
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
+	tconn = adm_ctx.tconn;
 
-	if (retcode == SS_NOTHING_TO_DO)
-		goto done;
-	else if (retcode == SS_ALREADY_STANDALONE)
-		goto done;
-	else if (retcode == SS_PRIMARY_NOP) {
-		/* Our statche checking code wants to see the peer outdated. */
-		retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
-						      pdsk, D_OUTDATED));
-	} else if (retcode == SS_CW_FAILED_BY_PEER) {
-		/* The peer probably wants to see us outdated. */
-		retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
-							disk, D_OUTDATED),
-					      CS_ORDERED);
-		if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			retcode = SS_SUCCESS;
-		}
+	new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_conf) {
+		retcode = ERR_NOMEM;
+		goto out;
 	}
 
-	if (retcode < SS_SUCCESS)
-		goto fail;
+	conn_reconfig_start(tconn);
 
-	if (wait_event_interruptible(mdev->state_wait,
-				     mdev->state.conn != C_DISCONNECTING)) {
-		/* Do not test for mdev->state.conn == C_STANDALONE, since
-		   someone else might connect us in the mean time! */
-		retcode = ERR_INTR;
+	mutex_lock(&tconn->data.mutex);
+	mutex_lock(&tconn->conf_update);
+	old_conf = tconn->net_conf;
+
+	if (!old_conf) {
+		drbd_msg_put_info("net conf missing, try connect");
+		retcode = ERR_INVALID_REQUEST;
 		goto fail;
 	}
 
- done:
-	retcode = NO_ERROR;
- fail:
-	drbd_md_sync(mdev);
-	reply->ret_code = retcode;
-	return 0;
-}
+	*new_conf = *old_conf;
+	if (should_set_defaults(info))
+		set_net_conf_defaults(new_conf);
 
-void resync_after_online_grow(struct drbd_conf *mdev)
-{
-	int iass; /* I am sync source */
+	err = net_conf_from_attrs_for_change(new_conf, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
 
-	dev_info(DEV, "Resync of new storage after online grow\n");
-	if (mdev->state.role != mdev->state.peer)
-		iass = (mdev->state.role == R_PRIMARY);
-	else
-		iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+	retcode = check_net_options(tconn, new_conf);
+	if (retcode != NO_ERROR)
+		goto fail;
 
-	if (iass)
-		drbd_start_resync(mdev, C_SYNC_SOURCE);
-	else
-		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
-}
-
-STATIC int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			  struct drbd_nl_cfg_reply *reply)
-{
-	struct resize rs;
-	int retcode = NO_ERROR;
-	int ldsc = 0; /* local disk size changed */
-	enum determine_dev_size dd;
-
-	memset(&rs, 0, sizeof(struct resize));
-	if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
-		retcode = ERR_MANDATORY_TAG;
+	/* re-sync running */
+	rsr = conn_resync_running(tconn);
+	if (rsr && strcmp(new_conf->csums_alg, old_conf->csums_alg)) {
+		retcode = ERR_CSUMS_RESYNC_RUNNING;
 		goto fail;
 	}
 
-	if (mdev->state.conn > C_CONNECTED) {
-		retcode = ERR_RESIZE_RESYNC;
+	/* online verify running */
+	ovr = conn_ov_running(tconn);
+	if (ovr && strcmp(new_conf->verify_alg, old_conf->verify_alg)) {
+		retcode = ERR_VERIFY_RUNNING;
 		goto fail;
 	}
 
-	if (mdev->state.role == R_SECONDARY &&
-	    mdev->state.peer == R_SECONDARY) {
-		retcode = ERR_NO_PRIMARY;
+	retcode = alloc_crypto(&crypto, new_conf);
+	if (retcode != NO_ERROR)
 		goto fail;
-	}
 
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
-	}
+	rcu_assign_pointer(tconn->net_conf, new_conf);
 
-	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
-		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
-		ldsc = 1;
+	if (!rsr) {
+		crypto_free_hash(tconn->csums_tfm);
+		tconn->csums_tfm = crypto.csums_tfm;
+		crypto.csums_tfm = NULL;
 	}
-
-	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev, rs.resize_force);
-	drbd_md_sync(mdev);
-	put_ldev(mdev);
-	if (dd == dev_size_error) {
-		retcode = ERR_NOMEM_BITMAP;
-		goto fail;
+	if (!ovr) {
+		crypto_free_hash(tconn->verify_tfm);
+		tconn->verify_tfm = crypto.verify_tfm;
+		crypto.verify_tfm = NULL;
 	}
 
-	if (mdev->state.conn == C_CONNECTED && (dd != unchanged || ldsc)) {
-		if (dd == grew)
-			set_bit(RESIZE_PENDING, &mdev->flags);
+	crypto_free_hash(tconn->integrity_tfm);
+	tconn->integrity_tfm = crypto.integrity_tfm;
+	if (tconn->cstate >= C_WF_REPORT_PARAMS && tconn->agreed_pro_version >= 100)
+		/* Do this without trying to take tconn->data.mutex again.  */
+		__drbd_send_protocol(tconn, P_PROTOCOL_UPDATE);
 
-		drbd_send_uuids(mdev);
-		drbd_send_sizes(mdev, 1);
-	}
+	crypto_free_hash(tconn->cram_hmac_tfm);
+	tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
+
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+	synchronize_rcu();
+	kfree(old_conf);
+
+	if (tconn->cstate >= C_WF_REPORT_PARAMS)
+		drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
+
+	goto done;
 
  fail:
-	reply->ret_code = retcode;
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+	free_crypto(&crypto);
+	kfree(new_conf);
+ done:
+	conn_reconfig_done(tconn);
+ out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	struct net_conf *old_conf, *new_conf = NULL;
+	struct crypto crypto = { };
+	struct drbd_tconn *tconn;
+	enum drbd_ret_code retcode;
+	int i;
 	int err;
-	int ovr; /* online verify running */
-	int rsr; /* re-sync running */
-	struct crypto_hash *verify_tfm = NULL;
-	struct crypto_hash *csums_tfm = NULL;
-	struct syncer_conf sc;
-	cpumask_var_t new_cpu_mask;
 
-	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+	if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
+		drbd_msg_put_info("connection endpoint(s) missing");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+
+	/* No need for _rcu here. All reconfiguration is
+	 * strictly serialized on genl_lock(). We are protected against
+	 * concurrent reconfiguration/addition/deletion */
+	list_for_each_entry(tconn, &drbd_tconns, all_tconn) {
+		if (nla_len(adm_ctx.my_addr) == tconn->my_addr_len &&
+		    !memcmp(nla_data(adm_ctx.my_addr), &tconn->my_addr, tconn->my_addr_len)) {
+			retcode = ERR_LOCAL_ADDR;
+			goto out;
+		}
+
+		if (nla_len(adm_ctx.peer_addr) == tconn->peer_addr_len &&
+		    !memcmp(nla_data(adm_ctx.peer_addr), &tconn->peer_addr, tconn->peer_addr_len)) {
+			retcode = ERR_PEER_ADDR;
+			goto out;
+		}
+	}
+
+	tconn = adm_ctx.tconn;
+	conn_reconfig_start(tconn);
+
+	if (tconn->cstate > C_STANDALONE) {
+		retcode = ERR_NET_CONFIGURED;
+		goto fail;
+	}
+
+	/* allocation not in the IO path, drbdsetup / netlink process context */
+	new_conf = kzalloc(sizeof(*new_conf), GFP_KERNEL);
+	if (!new_conf) {
 		retcode = ERR_NOMEM;
 		goto fail;
 	}
 
-	if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
-		memset(&sc, 0, sizeof(struct syncer_conf));
-		sc.rate       = DRBD_RATE_DEF;
-		sc.after      = DRBD_AFTER_DEF;
-		sc.al_extents = DRBD_AL_EXTENTS_DEF;
-	} else
-		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
+	set_net_conf_defaults(new_conf);
 
-	if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
+	err = net_conf_from_attrs(new_conf, info);
+	if (err && err != -ENOMSG) {
 		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
 		goto fail;
 	}
 
-	/* re-sync running */
-	rsr = (	mdev->state.conn == C_SYNC_SOURCE ||
-		mdev->state.conn == C_SYNC_TARGET ||
-		mdev->state.conn == C_PAUSED_SYNC_S ||
-		mdev->state.conn == C_PAUSED_SYNC_T );
+	retcode = check_net_options(tconn, new_conf);
+	if (retcode != NO_ERROR)
+		goto fail;
 
-	if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
-		retcode = ERR_CSUMS_RESYNC_RUNNING;
+	retcode = alloc_crypto(&crypto, new_conf);
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
+
+	conn_flush_workqueue(tconn);
+
+	mutex_lock(&tconn->conf_update);
+	old_conf = tconn->net_conf;
+	if (old_conf) {
+		retcode = ERR_NET_CONFIGURED;
+		mutex_unlock(&tconn->conf_update);
 		goto fail;
 	}
+	rcu_assign_pointer(tconn->net_conf, new_conf);
 
-	if (!rsr && sc.csums_alg[0]) {
-		csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(csums_tfm)) {
-			csums_tfm = NULL;
-			retcode = ERR_CSUMS_ALG;
-			goto fail;
-		}
+	conn_free_crypto(tconn);
+	tconn->cram_hmac_tfm = crypto.cram_hmac_tfm;
+	tconn->integrity_tfm = crypto.integrity_tfm;
+	tconn->csums_tfm = crypto.csums_tfm;
+	tconn->verify_tfm = crypto.verify_tfm;
 
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
-			retcode = ERR_CSUMS_ALG_ND;
-			goto fail;
-		}
+	tconn->my_addr_len = nla_len(adm_ctx.my_addr);
+	memcpy(&tconn->my_addr, nla_data(adm_ctx.my_addr), tconn->my_addr_len);
+	tconn->peer_addr_len = nla_len(adm_ctx.peer_addr);
+	memcpy(&tconn->peer_addr, nla_data(adm_ctx.peer_addr), tconn->peer_addr_len);
+
+	mutex_unlock(&tconn->conf_update);
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, i) {
+		mdev->send_cnt = 0;
+		mdev->recv_cnt = 0;
 	}
+	rcu_read_unlock();
 
-	/* online verify running */
-	ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
+	retcode = conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
-	if (ovr) {
-		if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
-			retcode = ERR_VERIFY_RUNNING;
-			goto fail;
+	conn_reconfig_done(tconn);
+	drbd_adm_finish(info, retcode);
+	return 0;
+
+fail:
+	free_crypto(&crypto);
+	kfree(new_conf);
+
+	conn_reconfig_done(tconn);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
+{
+	enum drbd_state_rv rv;
+
+	rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
+			force ? CS_HARD : 0);
+
+	switch (rv) {
+	case SS_NOTHING_TO_DO:
+		break;
+	case SS_ALREADY_STANDALONE:
+		return SS_SUCCESS;
+	case SS_PRIMARY_NOP:
+		/* Our state checking code wants to see the peer outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+						pdsk, D_OUTDATED), CS_VERBOSE);
+		break;
+	case SS_CW_FAILED_BY_PEER:
+		/* The peer probably wants to see us outdated. */
+		rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
+							disk, D_OUTDATED), 0);
+		if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
+			rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING),
+					CS_HARD);
 		}
+		break;
+	default:;
+		/* no special handling necessary */
 	}
 
-	if (!ovr && sc.verify_alg[0]) {
-		verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
-		if (IS_ERR(verify_tfm)) {
-			verify_tfm = NULL;
-			retcode = ERR_VERIFY_ALG;
-			goto fail;
-		}
+	if (rv >= SS_SUCCESS) {
+		enum drbd_state_rv rv2;
+		/* No one else can reconfigure the network while I am here.
+		 * The state handling only uses drbd_thread_stop_nowait(),
+		 * we want to really wait here until the receiver is no more.
+		 */
+		drbd_thread_stop(&adm_ctx.tconn->receiver);
+
+		/* Race breaker.  This additional state change request may be
+		 * necessary, if this was a forced disconnect during a receiver
+		 * restart.  We may have "killed" the receiver thread just
+		 * after drbdd_init() returned.  Typically, we should be
+		 * C_STANDALONE already, now, and this becomes a no-op.
+		 */
+		rv2 = conn_request_state(tconn, NS(conn, C_STANDALONE),
+				CS_VERBOSE | CS_HARD);
+		if (rv2 < SS_SUCCESS)
+			conn_err(tconn,
+				"unexpected rv2=%d in conn_try_disconnect()\n",
+				rv2);
+	}
+	return rv;
+}
+
+int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct disconnect_parms parms;
+	struct drbd_tconn *tconn;
+	enum drbd_state_rv rv;
+	enum drbd_ret_code retcode;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
 
-		if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
-			retcode = ERR_VERIFY_ALG_ND;
+	tconn = adm_ctx.tconn;
+	memset(&parms, 0, sizeof(parms));
+	if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
+		err = disconnect_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
 			goto fail;
 		}
 	}
 
-	/* silently ignore cpu mask on UP kernel */
-	if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
-		err = __bitmap_parse(sc.cpu_mask, 32, 0,
-				cpumask_bits(new_cpu_mask), nr_cpu_ids);
+	rv = conn_try_disconnect(tconn, parms.force_disconnect);
+	if (rv < SS_SUCCESS)
+		retcode = rv;  /* FIXME: Type mismatch. */
+	else
+		retcode = NO_ERROR;
+ fail:
+	drbd_adm_finish(info, retcode);
+	return 0;
+}
+
+void resync_after_online_grow(struct drbd_conf *mdev)
+{
+	int iass; /* I am sync source */
+
+	dev_info(DEV, "Resync of new storage after online grow\n");
+	if (mdev->state.role != mdev->state.peer)
+		iass = (mdev->state.role == R_PRIMARY);
+	else
+		iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
+
+	if (iass)
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+	else
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
+}
+
+int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
+{
+	struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
+	struct resize_parms rs;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
+	enum determine_dev_size dd;
+	enum dds_flags ddsf;
+	sector_t u_size;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+
+	memset(&rs, 0, sizeof(struct resize_parms));
+	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
+		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
-			dev_warn(DEV, "__bitmap_parse() failed with %d\n", err);
-			retcode = ERR_CPU_MASK_PARSE;
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
 			goto fail;
 		}
 	}
 
-	ERR_IF (sc.rate < 1) sc.rate = 1;
-	ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
-#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
-	if (sc.al_extents > AL_MAX) {
-		dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
-		sc.al_extents = AL_MAX;
-	}
-#undef AL_MAX
-
-	/* most sanity checks done, try to assign the new sync-after
-	 * dependency.  need to hold the global lock in there,
-	 * to avoid a race in the dependency loop check. */
-	retcode = drbd_alter_sa(mdev, sc.after);
-	if (retcode != NO_ERROR)
+	mdev = adm_ctx.mdev;
+	if (mdev->state.conn > C_CONNECTED) {
+		retcode = ERR_RESIZE_RESYNC;
 		goto fail;
+	}
 
-	/* ok, assign the rest of it as well.
-	 * lock against receive_SyncParam() */
-	spin_lock(&mdev->peer_seq_lock);
-	mdev->sync_conf = sc;
-
-	if (!rsr) {
-		crypto_free_hash(mdev->csums_tfm);
-		mdev->csums_tfm = csums_tfm;
-		csums_tfm = NULL;
+	if (mdev->state.role == R_SECONDARY &&
+	    mdev->state.peer == R_SECONDARY) {
+		retcode = ERR_NO_PRIMARY;
+		goto fail;
 	}
 
-	if (!ovr) {
-		crypto_free_hash(mdev->verify_tfm);
-		mdev->verify_tfm = verify_tfm;
-		verify_tfm = NULL;
-	}
-	spin_unlock(&mdev->peer_seq_lock);
-
-	if (get_ldev(mdev)) {
-		wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
-		drbd_al_shrink(mdev);
-		err = drbd_check_al_size(mdev);
-		lc_unlock(mdev->act_log);
-		wake_up(&mdev->al_wait);
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
 
-		put_ldev(mdev);
-		drbd_md_sync(mdev);
+	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
+		retcode = ERR_NEED_APV_93;
+		goto fail_ldev;
+	}
 
-		if (err) {
+	rcu_read_lock();
+	u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+	rcu_read_unlock();
+	if (u_size != (sector_t)rs.resize_size) {
+		new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
+		if (!new_disk_conf) {
 			retcode = ERR_NOMEM;
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	if (mdev->state.conn >= C_CONNECTED)
-		drbd_send_sync_param(mdev, &sc);
+	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
+		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
-	if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
-		cpumask_copy(mdev->cpu_mask, new_cpu_mask);
-		drbd_calc_cpu_mask(mdev);
-		mdev->receiver.reset_cpu_mask = 1;
-		mdev->asender.reset_cpu_mask = 1;
-		mdev->worker.reset_cpu_mask = 1;
+	if (new_disk_conf) {
+		mutex_lock(&mdev->tconn->conf_update);
+		old_disk_conf = mdev->ldev->disk_conf;
+		*new_disk_conf = *old_disk_conf;
+		new_disk_conf->disk_size = (sector_t)rs.resize_size;
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		mutex_unlock(&mdev->tconn->conf_update);
+		synchronize_rcu();
+		kfree(old_disk_conf);
+	}
+
+	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
+	dd = drbd_determine_dev_size(mdev, ddsf);
+	drbd_md_sync(mdev);
+	put_ldev(mdev);
+	if (dd == dev_size_error) {
+		retcode = ERR_NOMEM_BITMAP;
+		goto fail;
+	}
+
+	if (mdev->state.conn == C_CONNECTED) {
+		if (dd == grew)
+			set_bit(RESIZE_PENDING, &mdev->flags);
+
+		drbd_send_uuids(mdev);
+		drbd_send_sizes(mdev, 1, ddsf);
+	}
+
+ fail:
+	drbd_adm_finish(info, retcode);
+	return 0;
+
+ fail_ldev:
+	put_ldev(mdev);
+	goto fail;
+}
+
+int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct drbd_tconn *tconn;
+	struct res_opts res_opts;
+	int err;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto fail;
+	tconn = adm_ctx.tconn;
+
+	res_opts = tconn->res_opts;
+	if (should_set_defaults(info))
+		set_res_opts_defaults(&res_opts);
+
+	err = res_opts_from_attrs(&res_opts, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto fail;
+	}
+
+	err = set_resource_options(tconn, &res_opts);
+	if (err) {
+		retcode = ERR_INVALID_REQUEST;
+		if (err == -ENOMEM)
+			retcode = ERR_NOMEM;
 	}
 
-	drbd_kobject_uevent(mdev);
 fail:
-	free_cpumask_var(new_cpu_mask);
-	crypto_free_hash(csums_tfm);
-	crypto_free_hash(verify_tfm);
-	reply->ret_code = retcode;
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode;
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+
+	/* If there is still bitmap IO pending, probably because of a previous
+	 * resync just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
 	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
 
@@ -1742,10 +2403,10 @@
 		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
 
 	while (retcode == SS_NEED_CONNECTION) {
-		spin_lock_irq(&mdev->req_lock);
+		spin_lock_irq(&mdev->tconn->req_lock);
 		if (mdev->state.conn < C_CONNECTED)
 			retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
-		spin_unlock_irq(&mdev->req_lock);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 
 		if (retcode != SS_NEED_CONNECTION)
 			break;
@@ -1753,185 +2414,514 @@
 		retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
 	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				   struct drbd_nl_cfg_reply *reply)
+static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
+		union drbd_state mask, union drbd_state val)
 {
+	enum drbd_ret_code retcode;
 
-	reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
+	retcode = drbd_request_state(adm_ctx.mdev, mask, val);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
+}
 
-	if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
-		retcode = ERR_PAUSE_IS_SET;
+int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
 
-	reply->ret_code = retcode;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
+		retcode = ERR_PAUSE_IS_SET;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			       struct drbd_nl_cfg_reply *reply)
+int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	union drbd_dev_state s;
+	enum drbd_ret_code retcode;
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO)
-		retcode = ERR_PAUSE_IS_CLEAR;
+	if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
+		s = adm_ctx.mdev->state;
+		if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
+			retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
+				  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
+		} else {
+			retcode = ERR_PAUSE_IS_CLEAR;
+		}
+	}
 
-	reply->ret_code = retcode;
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
+{
+	return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
+}
+
+int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
+	struct drbd_conf *mdev;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+		drbd_uuid_new_current(mdev);
+		clear_bit(NEW_CUR_UUID, &mdev->flags);
+	}
+	drbd_suspend_io(mdev);
+	retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (retcode == SS_SUCCESS) {
+		if (mdev->state.conn < C_CONNECTED)
+			tl_clear(mdev->tconn);
+		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
+			tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
+	}
+	drbd_resume_io(mdev);
 
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
-	return 0;
+	return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
 }
 
-STATIC int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int nla_put_drbd_cfg_context(struct sk_buff *skb, struct drbd_tconn *tconn, unsigned vnr)
 {
-	reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
+	struct nlattr *nla;
+	nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
+	if (!nla)
+		goto nla_put_failure;
+	if (vnr != VOLUME_UNSPECIFIED)
+		NLA_PUT_U32(skb, T_ctx_volume, vnr);
+	NLA_PUT_STRING(skb, T_ctx_resource_name, tconn->name);
+	if (tconn->my_addr_len)
+		NLA_PUT(skb, T_ctx_my_addr, tconn->my_addr_len, &tconn->my_addr);
+	if (tconn->peer_addr_len)
+		NLA_PUT(skb, T_ctx_peer_addr, tconn->peer_addr_len, &tconn->peer_addr);
+	nla_nest_end(skb, nla);
 	return 0;
+
+nla_put_failure:
+	if (nla)
+		nla_nest_cancel(skb, nla);
+	return -EMSGSIZE;
 }
 
-STATIC int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			   struct drbd_nl_cfg_reply *reply)
+int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
+		const struct sib_info *sib)
 {
-	unsigned short *tl;
+	struct state_info *si = NULL; /* for sizeof(si->member); */
+	struct net_conf *nc;
+	struct nlattr *nla;
+	int got_ldev;
+	int err = 0;
+	int exclude_sensitive;
 
-	tl = reply->tag_list;
+	/* If sib != NULL, this is drbd_bcast_event, which anyone can listen
+	 * to.  So we better exclude_sensitive information.
+	 *
+	 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
+	 * in the context of the requesting user process. Exclude sensitive
+	 * information, unless current has superuser.
+	 *
+	 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
+	 * relies on the current implementation of netlink_dump(), which
+	 * executes the dump callback successively from netlink_recvmsg(),
+	 * always in the context of the receiving process */
+	exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
+
+	got_ldev = get_ldev(mdev);
+
+	/* We need to add connection name and volume number information still.
+	 * Minor number is in drbd_genlmsghdr. */
+	if (nla_put_drbd_cfg_context(skb, mdev->tconn, mdev->vnr))
+		goto nla_put_failure;
+
+	if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
+		goto nla_put_failure;
+
+	rcu_read_lock();
+	if (got_ldev)
+		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
+			goto nla_put_failure;
+
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc)
+		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	rcu_read_unlock();
+	if (err)
+		goto nla_put_failure;
 
-	if (get_ldev(mdev)) {
-		tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
-		put_ldev(mdev);
+	nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
+	if (!nla)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
+	NLA_PUT_U32(skb, T_current_state, mdev->state.i);
+	NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
+	NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
+
+	if (got_ldev) {
+		NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
+		NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
+		NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
+		NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
+		if (C_SYNC_SOURCE <= mdev->state.conn &&
+		    C_PAUSED_SYNC_T >= mdev->state.conn) {
+			NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
+			NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
+		}
 	}
 
-	if (get_net_conf(mdev)) {
-		tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
-		put_net_conf(mdev);
+	if (sib) {
+		switch(sib->sib_reason) {
+		case SIB_SYNC_PROGRESS:
+		case SIB_GET_STATUS_REPLY:
+			break;
+		case SIB_STATE_CHANGE:
+			NLA_PUT_U32(skb, T_prev_state, sib->os.i);
+			NLA_PUT_U32(skb, T_new_state, sib->ns.i);
+			break;
+		case SIB_HELPER_POST:
+			NLA_PUT_U32(skb,
+				T_helper_exit_code, sib->helper_exit_code);
+			/* fall through */
+		case SIB_HELPER_PRE:
+			NLA_PUT_STRING(skb, T_helper, sib->helper_name);
+			break;
+		}
 	}
-	tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
+	nla_nest_end(skb, nla);
 
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	return (int)((char *)tl - (char *)reply->tag_list);
+	if (0)
+nla_put_failure:
+		err = -EMSGSIZE;
+	if (got_ldev)
+		put_ldev(mdev);
+	return err;
 }
 
-STATIC int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
 {
-	unsigned short *tl = reply->tag_list;
-	union drbd_state s = mdev->state;
-	unsigned long rs_left;
-	unsigned int res;
+	enum drbd_ret_code retcode;
+	int err;
 
-	tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* no local ref, no bitmap, no syncer progress. */
-	if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
-		if (get_ldev(mdev)) {
-			drbd_get_syncer_progress(mdev, &rs_left, &res);
-			tl = tl_add_int(tl, T_sync_progress, &res);
-			put_ldev(mdev);
-		}
+	err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	return (int)((char *)tl - (char *)reply->tag_list);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-STATIC int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			     struct drbd_nl_cfg_reply *reply)
+int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	unsigned short *tl;
-
-	tl = reply->tag_list;
+	struct drbd_conf *mdev;
+	struct drbd_genlmsghdr *dh;
+	struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
+	struct drbd_tconn *tconn = NULL;
+	struct drbd_tconn *tmp;
+	unsigned volume = cb->args[1];
+
+	/* Open coded, deferred, iteration:
+	 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
+	 *	idr_for_each_entry(&tconn->volumes, mdev, i) {
+	 *	  ...
+	 *	}
+	 * }
+	 * where tconn is cb->args[0];
+	 * and i is cb->args[1];
+	 *
+	 * cb->args[2] indicates if we shall loop over all resources,
+	 * or just dump all volumes of a single resource.
+	 *
+	 * This may miss entries inserted after this dump started,
+	 * or entries deleted before they are reached.
+	 *
+	 * We need to make sure the mdev won't disappear while
+	 * we are looking at it, and revalidate our iterators
+	 * on each iteration.
+	 */
 
-	if (get_ldev(mdev)) {
-		tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
-		tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
-		put_ldev(mdev);
+	/* synchronize with conn_create()/conn_destroy() */
+	rcu_read_lock();
+	/* revalidate iterator position */
+	list_for_each_entry_rcu(tmp, &drbd_tconns, all_tconn) {
+		if (pos == NULL) {
+			/* first iteration */
+			pos = tmp;
+			tconn = pos;
+			break;
+		}
+		if (tmp == pos) {
+			tconn = pos;
+			break;
+		}
 	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	if (tconn) {
+next_tconn:
+		mdev = idr_get_next(&tconn->volumes, &volume);
+		if (!mdev) {
+			/* No more volumes to dump on this tconn.
+			 * Advance tconn iterator. */
+			pos = list_entry_rcu(tconn->all_tconn.next,
+					     struct drbd_tconn, all_tconn);
+			/* Did we dump any volume on this tconn yet? */
+			if (volume != 0) {
+				/* If we reached the end of the list,
+				 * or only a single resource dump was requested,
+				 * we are done. */
+				if (&pos->all_tconn == &drbd_tconns || cb->args[2])
+					goto out;
+				volume = 0;
+				tconn = pos;
+				goto next_tconn;
+			}
+		}
+
+		dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
+				cb->nlh->nlmsg_seq, &drbd_genl_family,
+				NLM_F_MULTI, DRBD_ADM_GET_STATUS);
+		if (!dh)
+			goto out;
+
+		if (!mdev) {
+			/* This is a tconn without a single volume.
+			 * Suprisingly enough, it may have a network
+			 * configuration. */
+			struct net_conf *nc;
+			dh->minor = -1U;
+			dh->ret_code = NO_ERROR;
+			if (nla_put_drbd_cfg_context(skb, tconn, VOLUME_UNSPECIFIED))
+				goto cancel;
+			nc = rcu_dereference(tconn->net_conf);
+			if (nc && net_conf_to_skb(skb, nc, 1) != 0)
+				goto cancel;
+			goto done;
+		}
+
+		D_ASSERT(mdev->vnr == volume);
+		D_ASSERT(mdev->tconn == tconn);
+
+		dh->minor = mdev_to_minor(mdev);
+		dh->ret_code = NO_ERROR;
+
+		if (nla_put_status_info(skb, mdev, NULL)) {
+cancel:
+			genlmsg_cancel(skb, dh);
+			goto out;
+		}
+done:
+		genlmsg_end(skb, dh);
+        }
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+out:
+	rcu_read_unlock();
+	/* where to start the next iteration */
+        cb->args[0] = (long)pos;
+        cb->args[1] = (pos == tconn) ? volume + 1 : 0;
+
+	/* No more tconns/volumes/minors found results in an empty skb.
+	 * Which will terminate the dump. */
+        return skb->len;
 }
 
-/**
- * drbd_nl_get_timeout_flag() - Used by drbdsetup to find out which timeout value to use
- * @mdev:	DRBD device.
- * @nlp:	Netlink/connector packet from drbdsetup
- * @reply:	Reply packet for drbdsetup
+/*
+ * Request status of all resources, or of all volumes within a single resource.
+ *
+ * This is a dump, as the answer may not fit in a single reply skb otherwise.
+ * Which means we cannot use the family->attrbuf or other such members, because
+ * dump is NOT protected by the genl_lock().  During dump, we only have access
+ * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
+ *
+ * Once things are setup properly, we call into get_one_status().
  */
-STATIC int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	unsigned short *tl;
-	char rv;
+	const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
+	struct nlattr *nla;
+	const char *resource_name;
+	struct drbd_tconn *tconn;
+	int maxtype;
+
+	/* Is this a followup call? */
+	if (cb->args[0]) {
+		/* ... of a single resource dump,
+		 * and the resource iterator has been advanced already? */
+		if (cb->args[2] && cb->args[2] != cb->args[0])
+			return 0; /* DONE. */
+		goto dump;
+	}
+
+	/* First call (from netlink_dump_start).  We need to figure out
+	 * which resource(s) the user wants us to dump. */
+	nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
+			nlmsg_attrlen(cb->nlh, hdrlen),
+			DRBD_NLA_CFG_CONTEXT);
+
+	/* No explicit context given.  Dump all. */
+	if (!nla)
+		goto dump;
+	maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+	nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
+	if (IS_ERR(nla))
+		return PTR_ERR(nla);
+	/* context given, but no name present? */
+	if (!nla)
+		return -EINVAL;
+	resource_name = nla_data(nla);
+	tconn = conn_get_by_name(resource_name);
+
+	if (!tconn)
+		return -ENODEV;
+
+	kref_put(&tconn->kref, &conn_destroy); /* get_one_status() (re)validates tconn by itself */
+
+	/* prime iterators, and set "filter" mode mark:
+	 * only dump this tconn. */
+	cb->args[0] = (long)tconn;
+	/* cb->args[1] = 0; passed in this way. */
+	cb->args[2] = (long)tconn;
 
-	tl = reply->tag_list;
+dump:
+	return get_one_status(skb, cb);
+}
 
-	rv = mdev->state.pdsk == D_OUTDATED        ? UT_PEER_OUTDATED :
-	  test_bit(USE_DEGR_WFC_T, &mdev->flags) ? UT_DEGRADED : UT_DEFAULT;
+int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
+	struct timeout_parms tp;
+	int err;
 
-	tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	tp.timeout_type =
+		adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
+		test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
+		UT_DEFAULT;
 
-	return (int)((char *)tl - (char *)reply->tag_list);
+	err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
+	if (err) {
+		nlmsg_free(adm_ctx.reply_skb);
+		return err;
+	}
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-STATIC int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-				    struct drbd_nl_cfg_reply *reply)
+int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
 {
-	/* default to resume from last known position, if possible */
-	struct start_ov args =
-		{ .start_sector = mdev->ov_start_sector };
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 
-	if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
-	}
-	/* w_make_ov_request expects position to be aligned */
-	mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
-	reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
+
+	mdev = adm_ctx.mdev;
+	if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
+		/* resume from last known position, if possible */
+		struct start_ov_parms parms =
+			{ .ov_start_sector = mdev->ov_start_sector };
+		int err = start_ov_parms_from_attrs(&parms, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out;
+		}
+		/* w_make_ov_request expects position to be aligned */
+		mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
+	}
+	/* If there is still bitmap IO pending, e.g. previous resync or verify
+	 * just being finished, wait for it before requesting a new resync. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
+	retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
 
-STATIC int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
-			      struct drbd_nl_cfg_reply *reply)
+int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
 {
-	int retcode = NO_ERROR;
+	struct drbd_conf *mdev;
+	enum drbd_ret_code retcode;
 	int skip_initial_sync = 0;
 	int err;
+	struct new_c_uuid_parms args;
 
-	struct new_c_uuid args;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out_nolock;
 
-	memset(&args, 0, sizeof(struct new_c_uuid));
-	if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
-		reply->ret_code = ERR_MANDATORY_TAG;
-		return 0;
+	mdev = adm_ctx.mdev;
+	memset(&args, 0, sizeof(args));
+	if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
+		err = new_c_uuid_parms_from_attrs(&args, info);
+		if (err) {
+			retcode = ERR_MANDATORY_TAG;
+			drbd_msg_put_info(from_attrs_err_to_txt(err));
+			goto out_nolock;
+		}
 	}
 
-	mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
+	mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
 
 	if (!get_ldev(mdev)) {
 		retcode = ERR_NO_DISK;
@@ -1939,7 +2929,7 @@
 	}
 
 	/* this is "skip initial sync", assume to be clean */
-	if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
+	if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
 	    mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
 		dev_info(DEV, "Preparing to skip initial sync\n");
 		skip_initial_sync = 1;
@@ -1952,7 +2942,8 @@
 	drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
 
 	if (args.clear_bm) {
-		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid");
+		err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
+			"clear_n_write from new_c_uuid", BM_LOCKED_MASK);
 		if (err) {
 			dev_err(DEV, "Writing bitmap failed with %d\n",err);
 			retcode = ERR_IO_MD_DISK;
@@ -1960,10 +2951,11 @@
 		if (skip_initial_sync) {
 			drbd_send_uuids_skip_initial_sync(mdev);
 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
-			spin_lock_irq(&mdev->req_lock);
+			drbd_print_uuids(mdev, "cleared bitmap UUID");
+			spin_lock_irq(&mdev->tconn->req_lock);
 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 					CS_VERBOSE, NULL);
-			spin_unlock_irq(&mdev->req_lock);
+			spin_unlock_irq(&mdev->tconn->req_lock);
 		}
 	}
 
@@ -1971,491 +2963,265 @@
 out_dec:
 	put_ldev(mdev);
 out:
-	mutex_unlock(&mdev->state_mutex);
-
-	reply->ret_code = retcode;
+	mutex_unlock(mdev->state_mutex);
+out_nolock:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-STATIC struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
+static enum drbd_ret_code
+drbd_check_resource_name(const char *name)
 {
-	struct drbd_conf *mdev;
-
-	if (nlp->drbd_minor >= minor_count)
-		return NULL;
-
-	mdev = minor_to_mdev(nlp->drbd_minor);
-
-	if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
-		struct gendisk *disk = NULL;
-		mdev = drbd_new_device(nlp->drbd_minor);
-
-		spin_lock_irq(&drbd_pp_lock);
-		if (minor_table[nlp->drbd_minor] == NULL) {
-			minor_table[nlp->drbd_minor] = mdev;
-			disk = mdev->vdisk;
-			mdev = NULL;
-		} /* else: we lost the race */
-		spin_unlock_irq(&drbd_pp_lock);
-
-		if (disk) /* we won the race above */
-			/* in case we ever add a drbd_delete_device(),
-			 * don't forget the del_gendisk! */
-			add_disk(disk);
-		else /* we lost the race above */
-			drbd_free_mdev(mdev);
-
-		mdev = minor_to_mdev(nlp->drbd_minor);
+	if (!name || !name[0]) {
+		drbd_msg_put_info("resource name missing");
+		return ERR_MANDATORY_TAG;
 	}
-
-	return mdev;
+	/* if we want to use these in sysfs/configfs/debugfs some day,
+	 * we must not allow slashes */
+	if (strchr(name, '/')) {
+		drbd_msg_put_info("invalid resource name");
+		return ERR_INVALID_REQUEST;
+	}
+	return NO_ERROR;
 }
 
-struct cn_handler_struct {
-	int (*function)(struct drbd_conf *,
-			 struct drbd_nl_cfg_req *,
-			 struct drbd_nl_cfg_reply *);
-	int reply_body_size;
-};
-
-static struct cn_handler_struct cnd_table[] = {
-	[ P_primary ]		= { &drbd_nl_primary,		0 },
-	[ P_secondary ]		= { &drbd_nl_secondary,		0 },
-	[ P_disk_conf ]		= { &drbd_nl_disk_conf,		0 },
-	[ P_detach ]		= { &drbd_nl_detach,		0 },
-	[ P_net_conf ]		= { &drbd_nl_net_conf,		0 },
-	[ P_disconnect ]	= { &drbd_nl_disconnect,	0 },
-	[ P_resize ]		= { &drbd_nl_resize,		0 },
-	[ P_syncer_conf ]	= { &drbd_nl_syncer_conf,	0 },
-	[ P_invalidate ]	= { &drbd_nl_invalidate,	0 },
-	[ P_invalidate_peer ]	= { &drbd_nl_invalidate_peer,	0 },
-	[ P_pause_sync ]	= { &drbd_nl_pause_sync,	0 },
-	[ P_resume_sync ]	= { &drbd_nl_resume_sync,	0 },
-	[ P_suspend_io ]	= { &drbd_nl_suspend_io,	0 },
-	[ P_resume_io ]		= { &drbd_nl_resume_io,		0 },
-	[ P_outdate ]		= { &drbd_nl_outdate,		0 },
-	[ P_get_config ]	= { &drbd_nl_get_config,
-				    sizeof(struct syncer_conf_tag_len_struct) +
-				    sizeof(struct disk_conf_tag_len_struct) +
-				    sizeof(struct net_conf_tag_len_struct) },
-	[ P_get_state ]		= { &drbd_nl_get_state,
-				    sizeof(struct get_state_tag_len_struct) +
-				    sizeof(struct sync_progress_tag_len_struct)	},
-	[ P_get_uuids ]		= { &drbd_nl_get_uuids,
-				    sizeof(struct get_uuids_tag_len_struct) },
-	[ P_get_timeout_flag ]	= { &drbd_nl_get_timeout_flag,
-				    sizeof(struct get_timeout_flag_tag_len_struct)},
-	[ P_start_ov ]		= { &drbd_nl_start_ov,		0 },
-	[ P_new_c_uuid ]	= { &drbd_nl_new_c_uuid,	0 },
-};
-
-#ifdef KERNEL_HAS_CN_SKB_PARMS
-STATIC void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
+int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
 {
-#else
-STATIC void drbd_connector_callback(void *data)
-{
-	struct cn_msg *req = data;
-#endif
-	struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
-	struct cn_handler_struct *cm;
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_conf *mdev;
-	int retcode, rr;
-	int reply_size = sizeof(struct cn_msg)
-		+ sizeof(struct drbd_nl_cfg_reply)
-		+ sizeof(short int);
-
-	if (!try_module_get(THIS_MODULE)) {
-		printk(KERN_ERR "drbd: try_module_get() failed!\n");
-		return;
-	}
-
-#ifdef KERNEL_HAS_CN_SKB_PARMS
-	if (!cap_raised(nsp->eff_cap, CAP_SYS_ADMIN)) {
-		retcode = ERR_PERM;
-		goto fail;
-	}
-#endif
-
-	mdev = ensure_mdev(nlp);
-	if (!mdev) {
-		retcode = ERR_MINOR_INVALID;
-		goto fail;
-	}
+	enum drbd_ret_code retcode;
+	struct res_opts res_opts;
+	int err;
 
-	trace_drbd_netlink(req, 1);
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (nlp->packet_type >= P_nl_after_last_packet) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
+	set_res_opts_defaults(&res_opts);
+	err = res_opts_from_attrs(&res_opts, info);
+	if (err && err != -ENOMSG) {
+		retcode = ERR_MANDATORY_TAG;
+		drbd_msg_put_info(from_attrs_err_to_txt(err));
+		goto out;
 	}
 
-	cm = cnd_table + nlp->packet_type;
-
-	/* This may happen if packet number is 0: */
-	if (cm->function == NULL) {
-		retcode = ERR_PACKET_NR;
-		goto fail;
-	}
+	retcode = drbd_check_resource_name(adm_ctx.resource_name);
+	if (retcode != NO_ERROR)
+		goto out;
 
-	reply_size += cm->reply_body_size;
+	if (adm_ctx.tconn)
+		goto out;
 
-	/* allocation not in the IO path, cqueue thread context */
-	cn_reply = kmalloc(reply_size, GFP_KERNEL);
-	if (!cn_reply) {
+	if (!conn_create(adm_ctx.resource_name, &res_opts))
 		retcode = ERR_NOMEM;
-		goto fail;
-	}
-	reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
-
-	reply->packet_type =
-		cm->reply_body_size ? nlp->packet_type : P_nl_after_last_packet;
-	reply->minor = nlp->drbd_minor;
-	reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
-	/* reply->tag_list; might be modified by cm->function. */
-
-	rr = cm->function(mdev, nlp, reply);
-
-	cn_reply->id = req->id;
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
-	cn_reply->flags = 0;
-
-	trace_drbd_netlink(cn_reply, 0);
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
-
-	kfree(cn_reply);
-	module_put(THIS_MODULE);
-	return;
- fail:
-	drbd_nl_send_reply(req, retcode);
-	module_put(THIS_MODULE);
-}
-
-static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
-
-static unsigned short *
-__tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
-	unsigned short len, int nul_terminated)
-{
-	unsigned short l = tag_descriptions[tag_number(tag)].max_len;
-	len = (len < l) ? len :  l;
-	put_unaligned(tag, tl++);
-	put_unaligned(len, tl++);
-	memcpy(tl, data, len);
-	tl = (unsigned short*)((char*)tl + len);
-	if (nul_terminated)
-		*((char*)tl - 1) = 0;
-	return tl;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-static unsigned short *
-tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
+int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
 {
-	return __tl_add_blob(tl, tag, data, len, 0);
-}
+	struct drbd_genlmsghdr *dh = info->userhdr;
+	enum drbd_ret_code retcode;
 
-static unsigned short *
-tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
-{
-	return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
-}
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-static unsigned short *
-tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
-{
-	put_unaligned(tag, tl++);
-	switch(tag_type(tag)) {
-	case TT_INTEGER:
-		put_unaligned(sizeof(int), tl++);
-		put_unaligned(*(int *)val, (int *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(int));
-		break;
-	case TT_INT64:
-		put_unaligned(sizeof(u64), tl++);
-		put_unaligned(*(u64 *)val, (u64 *)tl);
-		tl = (unsigned short*)((char*)tl+sizeof(u64));
-		break;
-	default:
-		/* someone did something stupid. */
-		;
+	if (dh->minor > MINORMASK) {
+		drbd_msg_put_info("requested minor out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
+	}
+	if (adm_ctx.volume > DRBD_VOLUME_MAX) {
+		drbd_msg_put_info("requested volume id out of range");
+		retcode = ERR_INVALID_REQUEST;
+		goto out;
 	}
-	return tl;
-}
-
-void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct get_state_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
-
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
 
-	reply->packet_type = P_get_state;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
+	if (adm_ctx.mdev)
+		goto out;
 
-	trace_drbd_netlink(cn_reply, 0);
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
+static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
 {
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct call_helper_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-
-	/* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
-
-	tl = tl_add_str(tl, T_helper, helper_name);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
+	if (mdev->state.disk == D_DISKLESS &&
+	    /* no need to be mdev->state.conn == C_STANDALONE &&
+	     * we may want to delete a minor from a live replication group.
+	     */
+	    mdev->state.role == R_SECONDARY) {
+		_drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS),
+				    CS_VERBOSE + CS_WAIT_COMPLETE);
+		idr_remove(&mdev->tconn->volumes, mdev->vnr);
+		idr_remove(&minors, mdev_to_minor(mdev));
+		del_gendisk(mdev->vdisk);
+		synchronize_rcu();
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		return NO_ERROR;
+	} else
+		return ERR_MINOR_CONFIGURED;
+}
 
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
+int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
+{
+	enum drbd_ret_code retcode;
 
-	reply->packet_type = P_call_helper;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	trace_drbd_netlink(cn_reply, 0);
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+	retcode = adm_delete_minor(adm_ctx.mdev);
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-void drbd_bcast_ee(struct drbd_conf *mdev,
-		const char *reason, const int dgs,
-		const char* seen_hash, const char* calc_hash,
-		const struct drbd_epoch_entry* e)
+int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 {
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct bio_vec *bvec;
-	unsigned short *tl;
-	int i;
+	int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
+	struct drbd_conf *mdev;
+	unsigned i;
 
-	if (!e)
-		return;
-	if (!reason || !reason[0])
-		return;
+	retcode = drbd_adm_prepare(skb, info, 0);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	/* apparently we have to memcpy twice, first to prepare the data for the
-	 * struct cn_msg, then within cn_netlink_send from the cn_msg to the
-	 * netlink skb. */
-	/* receiver thread context, which is not in the writeout path (of this node),
-	 * but may be in the writeout path of the _other_ node.
-	 * GFP_NOIO to avoid potential "distributed deadlock". */
-	cn_reply = kmalloc(
-		sizeof(struct cn_msg)+
-		sizeof(struct drbd_nl_cfg_reply)+
-		sizeof(struct dump_ee_tag_len_struct)+
-		sizeof(short int),
-		GFP_NOIO);
-
-	if (!cn_reply) {
-		dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
-				(unsigned long long)e->sector, e->size);
-		return;
+	if (!adm_ctx.tconn) {
+		retcode = ERR_RES_NOT_KNOWN;
+		goto out;
 	}
 
-	reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
-	tl = reply->tag_list;
+	/* demote */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = drbd_set_role(mdev, R_SECONDARY, 0);
+		if (retcode < SS_SUCCESS) {
+			drbd_msg_put_info("failed to demote");
+			goto out;
+		}
+	}
 
-	tl = tl_add_str(tl, T_dump_ee_reason, reason);
-	tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
-	tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
-	tl = tl_add_int(tl, T_ee_sector, &e->sector);
-	tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
-
-	put_unaligned(T_ee_data, tl++);
-	put_unaligned(e->size, tl++);
-
-	__bio_for_each_segment(bvec, e->private_bio, i, 0) {
-		void *d = kmap(bvec->bv_page);
-		memcpy(tl, d + bvec->bv_offset, bvec->bv_len);
-		kunmap(bvec->bv_page);
-		tl=(unsigned short*)((char*)tl + bvec->bv_len);
-	}
-	put_unaligned(TT_END, tl++); /* Close the tag list */
-
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
-
-	cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
-	cn_reply->ack = 0; // not used here.
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char*)tl - (char*)reply->tag_list);
-	cn_reply->flags = 0;
-
-	reply->packet_type = P_dump_ee;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
-
-	trace_drbd_netlink(cn_reply, 0);
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	kfree(cn_reply);
-}
-
-void drbd_bcast_sync_progress(struct drbd_conf *mdev)
-{
-	char buffer[sizeof(struct cn_msg)+
-		    sizeof(struct drbd_nl_cfg_reply)+
-		    sizeof(struct sync_progress_tag_len_struct)+
-		    sizeof(short int)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	unsigned short *tl = reply->tag_list;
-	unsigned long rs_left;
-	unsigned int res;
+	retcode = conn_try_disconnect(adm_ctx.tconn, 0);
+	if (retcode < SS_SUCCESS) {
+		drbd_msg_put_info("failed to disconnect");
+		goto out;
+	}
 
-	/* no local ref, no bitmap, no syncer progress, no broadcast. */
-	if (!get_ldev(mdev))
-		return;
-	drbd_get_syncer_progress(mdev, &rs_left, &res);
-	put_ldev(mdev);
+	/* detach */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_detach(mdev, 0);
+		if (retcode < SS_SUCCESS) {
+			drbd_msg_put_info("failed to detach");
+			goto out;
+		}
+	}
 
-	tl = tl_add_int(tl, T_sync_progress, &res);
-	put_unaligned(TT_END, tl++); /* Close the tag list */
+	/* If we reach this, all volumes (of this tconn) are Secondary,
+	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
+	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
+	drbd_thread_stop(&adm_ctx.tconn->worker);
 
-	cn_reply->id.idx = CN_IDX_DRBD;
-	cn_reply->id.val = CN_VAL_DRBD;
+	/* Now, nothing can fail anymore */
 
-	cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
-	cn_reply->ack = 0; /* not used here. */
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
-		(int)((char *)tl - (char *)reply->tag_list);
-	cn_reply->flags = 0;
+	/* delete volumes */
+	idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
+		retcode = adm_delete_minor(mdev);
+		if (retcode != NO_ERROR) {
+			/* "can not happen" */
+			drbd_msg_put_info("failed to delete volume");
+			goto out;
+		}
+	}
 
-	reply->packet_type = P_sync_progress;
-	reply->minor = mdev_to_minor(mdev);
-	reply->ret_code = NO_ERROR;
+	/* delete connection */
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		list_del_rcu(&adm_ctx.tconn->all_tconn);
+		synchronize_rcu();
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
 
-	trace_drbd_netlink(cn_reply, 0);
-	cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
+		retcode = NO_ERROR;
+	} else {
+		/* "can not happen" */
+		retcode = ERR_RES_IN_USE;
+		drbd_msg_put_info("failed to delete connection");
+	}
+	goto out;
+out:
+	drbd_adm_finish(info, retcode);
+	return 0;
 }
 
-#ifdef NETLINK_ROUTE6
-int __init cn_init(void);
-void __exit cn_fini(void);
-#endif
-
-typedef int (*cn_add_callback_req_nsp_fn)(struct cb_id *, char *,
-	void (*cb)(struct cn_msg *req, struct netlink_skb_parms *nsp));
-typedef int (*cn_add_callback_req_fn)(struct cb_id *, char *,
-	void (*cb)(struct cn_msg *req));
-typedef int (*cn_add_callback_void_fn)(struct cb_id *, char *,
-	void (*cb)(void *data));
-#ifndef __same_type
-# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
-#endif
-
-int __init drbd_nl_init(void)
+int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
-	static struct cb_id cn_id_drbd;
-	int err, try=10;
+	enum drbd_ret_code retcode;
 
-#ifdef NETLINK_ROUTE6
-	/* pre 2.6.16 */
-	err = cn_init();
-	if (err)
-		return err;
-#endif
-	cn_id_drbd.val = CN_VAL_DRBD;
-	do {
-		cn_id_drbd.idx = cn_idx;
-		/* Try to catch incompatible callbacks at compile time,
-		 * otherwise it will just be a compiler _warning_,
-		 * but then BUG at runtime. */
-#ifdef KERNEL_HAS_CN_SKB_PARMS
-		BUILD_BUG_ON(!__same_type(&cn_add_callback, cn_add_callback_req_nsp_fn));
-#else
-		BUILD_BUG_ON(!(
-			__same_type(&cn_add_callback, cn_add_callback_req_fn) ||
-			__same_type(&cn_add_callback, cn_add_callback_void_fn)));
-#endif
-		err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
-		if (!err)
-			break;
-		cn_idx = (cn_idx + CN_IDX_STEP);
-	} while (try--);
+	retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE);
+	if (!adm_ctx.reply_skb)
+		return retcode;
+	if (retcode != NO_ERROR)
+		goto out;
 
-	if (err) {
-		printk(KERN_ERR "drbd: cn_drbd failed to register\n");
-		return err;
+	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
+		list_del_rcu(&adm_ctx.tconn->all_tconn);
+		synchronize_rcu();
+		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
+
+		retcode = NO_ERROR;
+	} else {
+		retcode = ERR_RES_IN_USE;
 	}
 
+	if (retcode == NO_ERROR)
+		drbd_thread_stop(&adm_ctx.tconn->worker);
+out:
+	drbd_adm_finish(info, retcode);
 	return 0;
 }
 
-void drbd_nl_cleanup(void)
+void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
 {
-	static struct cb_id cn_id_drbd;
-
-	cn_id_drbd.idx = cn_idx;
-	cn_id_drbd.val = CN_VAL_DRBD;
+	static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+	struct sk_buff *msg;
+	struct drbd_genlmsghdr *d_out;
+	unsigned seq;
+	int err = -ENOMEM;
+
+	seq = atomic_inc_return(&drbd_genl_seq);
+	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+	if (!msg)
+		goto failed;
+
+	err = -EMSGSIZE;
+	d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
+	if (!d_out) /* cannot happen, but anyways. */
+		goto nla_put_failure;
+	d_out->minor = mdev_to_minor(mdev);
+	d_out->ret_code = NO_ERROR;
+
+	if (nla_put_status_info(msg, mdev, sib))
+		goto nla_put_failure;
+	genlmsg_end(msg, d_out);
+	err = drbd_genl_multicast_events(msg, 0);
+	/* msg has been consumed or freed in netlink_broadcast() */
+	if (err && err != -ESRCH)
+		goto failed;
 
-	cn_del_callback(&cn_id_drbd);
-
-#ifdef NETLINK_ROUTE6
-	/* pre 2.6.16 */
-	cn_fini();
-#endif
-}
+	return;
 
-void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
-{
-	char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
-	struct cn_msg *cn_reply = (struct cn_msg *) buffer;
-	struct drbd_nl_cfg_reply *reply =
-		(struct drbd_nl_cfg_reply *)cn_reply->data;
-	int rr;
-
-	cn_reply->id = req->id;
-
-	cn_reply->seq = req->seq;
-	cn_reply->ack = req->ack  + 1;
-	cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
-	cn_reply->flags = 0;
-
-	reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
-	reply->ret_code = ret_code;
-
-	trace_drbd_netlink(cn_reply, 0);
-	rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
-	if (rr && rr != -ESRCH)
-		printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
+nla_put_failure:
+	nlmsg_free(msg);
+failed:
+	dev_err(DEV, "Error %d while broadcasting event. "
+			"Event seq:%u sib_reason:%u\n",
+			err, seq, sib->sib_reason);
 }
-
diff -Nru drbd8-8.3.7/drbd/drbd_nla.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nla.c
--- drbd8-8.3.7/drbd/drbd_nla.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nla.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,55 @@
+#include "drbd_wrappers.h"
+#include <linux/kernel.h>
+#include <net/netlink.h>
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+
+static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+			  const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
+{
+	int err;
+	/*
+	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
+	 * we don't know about that attribute, reject all the nested
+	 * attributes.
+	 */
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (err)
+		return ERR_PTR(err);
+	return nla_find_nested(nla, attrtype);
+}
diff -Nru drbd8-8.3.7/drbd/drbd_nla.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nla.h
--- drbd8-8.3.7/drbd/drbd_nla.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_nla.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#ifndef __DRBD_NLA_H
+#define __DRBD_NLA_H
+
+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+				 const struct nla_policy *policy);
+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
+
+#endif  /* __DRBD_NLA_H */
diff -Nru drbd8-8.3.7/drbd/drbd_proc.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_proc.c
--- drbd8-8.3.7/drbd/drbd_proc.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_proc.c	2012-02-02 14:09:14.000000000 +0000
@@ -29,13 +29,13 @@
 #include <asm/uaccess.h>
 #include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/drbd.h>
 #include "drbd_int.h"
 
 STATIC int drbd_proc_open(struct inode *inode, struct file *file);
+STATIC int drbd_proc_release(struct inode *inode, struct file *file);
 
 
 struct proc_dir_entry *drbd_proc;
@@ -44,9 +44,22 @@
 	.open		= drbd_proc_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= single_release,
+	.release	= drbd_proc_release,
 };
 
+void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
+{
+	/* v is in kB/sec. We don't expect TiByte/sec yet. */
+	if (unlikely(v >= 1000000)) {
+		/* cool: > GiByte/s */
+		seq_printf(seq, "%ld,", v / 1000000);
+		v %= 1000000;
+		seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
+	} else if (likely(v >= 1000))
+		seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
+	else
+		seq_printf(seq, "%ld", v);
+}
 
 /*lge
  * progress bars shamelessly adapted from driver/md/md.c
@@ -59,6 +72,7 @@
 	unsigned long db, dt, dbdt, rt, rs_left;
 	unsigned int res;
 	int i, x, y;
+	int stalled = 0;
 
 	drbd_get_syncer_progress(mdev, &rs_left, &res);
 
@@ -72,17 +86,24 @@
 		seq_printf(seq, ".");
 	seq_printf(seq, "] ");
 
-	seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
-	/* if more than 1 GB display in MB */
-	if (mdev->rs_total > 0x100000L)
-		seq_printf(seq, "(%lu/%lu)M\n\t",
+	if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+		seq_printf(seq, "verified:");
+	else
+		seq_printf(seq, "sync'ed:");
+	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
+
+	/* if more than a few GB, display in MB */
+	if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+		seq_printf(seq, "(%lu/%lu)M",
 			    (unsigned long) Bit2KB(rs_left >> 10),
 			    (unsigned long) Bit2KB(mdev->rs_total >> 10));
 	else
-		seq_printf(seq, "(%lu/%lu)K\n\t",
+		seq_printf(seq, "(%lu/%lu)K",
 			    (unsigned long) Bit2KB(rs_left),
 			    (unsigned long) Bit2KB(mdev->rs_total));
 
+	seq_printf(seq, "\n\t");
+
 	/* see drivers/md/md.c
 	 * We do not want to overflow, so the order of operands and
 	 * the * 100 / 100 trick are important. We do a +1 to be
@@ -92,45 +113,76 @@
 	 * db: blocks written from mark until now
 	 * rt: remaining time
 	 */
-	dt = (jiffies - mdev->rs_mark_time) / HZ;
-
-	if (dt > 20) {
-		/* if we made no update to rs_mark_time for too long,
-		 * we are stalled. show that. */
-		seq_printf(seq, "stalled\n");
-		return;
-	}
+	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
+	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
+	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
+	/* ------------------------ ~18s average ------------------------ */
+	i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
+	dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+	if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
+		stalled = 1;
 
 	if (!dt)
 		dt++;
-	db = mdev->rs_mark_left - rs_left;
+	db = mdev->rs_mark_left[i] - rs_left;
 	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
 
 	seq_printf(seq, "finish: %lu:%02lu:%02lu",
 		rt / 3600, (rt % 3600) / 60, rt % 60);
 
-	/* current speed average over (SYNC_MARKS * SYNC_MARK_STEP) jiffies */
 	dbdt = Bit2KB(db/dt);
-	if (dbdt > 1000)
-		seq_printf(seq, " speed: %ld,%03ld",
-			dbdt/1000, dbdt % 1000);
-	else
-		seq_printf(seq, " speed: %ld", dbdt);
+	seq_printf(seq, " speed: ");
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, " (");
+	/* ------------------------- ~3s average ------------------------ */
+	if (proc_details >= 1) {
+		/* this is what drbd_rs_should_slow_down() uses */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+		dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+		seq_printf_with_thousands_grouping(seq, dbdt);
+		seq_printf(seq, " -- ");
+	}
 
+	/* --------------------- long term average ---------------------- */
 	/* mean speed since syncer started
 	 * we do account for PausedSync periods */
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
-	if (dt <= 0)
+	if (dt == 0)
 		dt = 1;
 	db = mdev->rs_total - rs_left;
 	dbdt = Bit2KB(db/dt);
-	if (dbdt > 1000)
-		seq_printf(seq, " (%ld,%03ld)",
-			dbdt/1000, dbdt % 1000);
-	else
-		seq_printf(seq, " (%ld)", dbdt);
+	seq_printf_with_thousands_grouping(seq, dbdt);
+	seq_printf(seq, ")");
+
+	if (mdev->state.conn == C_SYNC_TARGET ||
+	    mdev->state.conn == C_VERIFY_S) {
+		seq_printf(seq, " want: ");
+		seq_printf_with_thousands_grouping(seq, mdev->c_sync_rate);
+	}
+	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
 
-	seq_printf(seq, " K/sec\n");
+	if (proc_details >= 1) {
+		/* 64 bit:
+		 * we convert to sectors in the display below. */
+		unsigned long bm_bits = drbd_bm_bits(mdev);
+		unsigned long bit_pos;
+		if (mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
+			bit_pos = bm_bits - mdev->ov_left;
+		else
+			bit_pos = mdev->bm_resync_fo;
+		/* Total sectors may be slightly off for oddly
+		 * sized devices. So what. */
+		seq_printf(seq,
+			"\t%3d%% sector pos: %llu/%llu\n",
+			(int)(bit_pos / (bm_bits/100+1)),
+			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
+			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
+	}
 }
 
 STATIC void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -145,9 +197,11 @@
 
 STATIC int drbd_seq_show(struct seq_file *seq, void *v)
 {
-	int i, hole = 0;
+	int i, prev_i = -1;
 	const char *sn;
 	struct drbd_conf *mdev;
+	struct net_conf *nc;
+	char wp;
 
 	static char write_ordering_chars[] = {
 		[WO_none] = 'n',
@@ -179,16 +233,11 @@
 	 oos .. known out-of-sync kB
 	*/
 
-	for (i = 0; i < minor_count; i++) {
-		mdev = minor_to_mdev(i);
-		if (!mdev) {
-			hole = 1;
-			continue;
-		}
-		if (hole) {
-			hole = 0;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, mdev, i) {
+		if (prev_i != i - 1)
 			seq_printf(seq, "\n");
-		}
+		prev_i = i;
 
 		sn = drbd_conn_str(mdev->state.conn);
 
@@ -197,8 +246,10 @@
 		    mdev->state.role == R_SECONDARY) {
 			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
 		} else {
+			nc = rcu_dereference(mdev->tconn->net_conf);
+			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
 			seq_printf(seq,
-			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
+			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
 			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
 			   i, sn,
@@ -206,13 +257,13 @@
 			   drbd_role_str(mdev->state.peer),
 			   drbd_disk_str(mdev->state.disk),
 			   drbd_disk_str(mdev->state.pdsk),
-			   (mdev->net_conf == NULL ? ' ' :
-			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
-			   mdev->state.susp ? 's' : 'r',
+			   wp,
+			   drbd_suspended(mdev) ? 's' : 'r',
 			   mdev->state.aftr_isp ? 'a' : '-',
 			   mdev->state.peer_isp ? 'p' : '-',
 			   mdev->state.user_isp ? 'u' : '-',
 			   mdev->congestion_reason ?: '-',
+			   test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
 			   mdev->send_cnt/2,
 			   mdev->recv_cnt/2,
 			   mdev->writ_cnt/2,
@@ -224,23 +275,19 @@
 			   atomic_read(&mdev->rs_pending_cnt),
 			   atomic_read(&mdev->unacked_cnt),
 			   atomic_read(&mdev->ap_bio_cnt),
-			   mdev->epochs,
-			   write_ordering_chars[mdev->write_ordering]
+			   mdev->tconn->epochs,
+			   write_ordering_chars[mdev->tconn->write_ordering]
 			);
-			seq_printf(seq, " oos:%lu\n",
-				   Bit2KB(drbd_bm_total_weight(mdev)));
+			seq_printf(seq, " oos:%llu\n",
+				   Bit2KB((unsigned long long)
+					   drbd_bm_total_weight(mdev)));
 		}
 		if (mdev->state.conn == C_SYNC_SOURCE ||
-		    mdev->state.conn == C_SYNC_TARGET)
+		    mdev->state.conn == C_SYNC_TARGET ||
+		    mdev->state.conn == C_VERIFY_S ||
+		    mdev->state.conn == C_VERIFY_T)
 			drbd_syncer_progress(mdev, seq);
 
-		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
-			seq_printf(seq, "\t%3d%%      %lu/%lu\n",
-				   (int)((mdev->rs_total-mdev->ov_left) /
-					 (mdev->rs_total/100+1)),
-				   mdev->rs_total - mdev->ov_left,
-				   mdev->rs_total);
-
 		if (proc_details >= 1 && get_ldev_if_state(mdev, D_FAILED)) {
 			lc_seq_printf_stats(seq, mdev->resync);
 			lc_seq_printf_stats(seq, mdev->act_log);
@@ -254,13 +301,22 @@
 			}
 		}
 	}
+	rcu_read_unlock();
 
 	return 0;
 }
 
 STATIC int drbd_proc_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, drbd_seq_show, PDE(inode)->data);
+	if (try_module_get(THIS_MODULE))
+		return single_open(file, drbd_seq_show, PDE(inode)->data);
+	return -ENODEV;
+}
+
+STATIC int drbd_proc_release(struct inode *inode, struct file *file)
+{
+	module_put(THIS_MODULE);
+	return single_release(inode, file);
 }
 
 /* PROC FS stuff end */
diff -Nru drbd8-8.3.7/drbd/drbd_receiver.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_receiver.c
--- drbd8-8.3.7/drbd/drbd_receiver.c	2010-01-07 16:15:48.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_receiver.c	2012-02-02 14:09:14.000000000 +0000
@@ -37,51 +37,49 @@
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/pkt_sched.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/vmalloc.h>
 #include <linux/random.h>
-#ifdef HAVE_LINUX_SCATTERLIST_H
-/* 2.6.11 (suse 9.3, fc4) does not include requisites
- * from linux/scatterlist.h :( */
-#include <asm/scatterlist.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/scatterlist.h>
-#endif
 #include "drbd_int.h"
-#include "drbd_tracing.h"
 #include "drbd_req.h"
-
 #include "drbd_vli.h"
+#include <linux/scatterlist.h>
 
 struct flush_work {
 	struct drbd_work w;
 	struct drbd_epoch *epoch;
 };
 
+struct packet_info {
+	enum drbd_packet cmd;
+	unsigned int size;
+	unsigned int vnr;
+	void *data;
+};
+
 enum finish_epoch {
 	FE_STILL_LIVE,
 	FE_DESTROYED,
 	FE_RECYCLED,
 };
 
-STATIC int drbd_do_handshake(struct drbd_conf *mdev);
-STATIC int drbd_do_auth(struct drbd_conf *mdev);
+STATIC int drbd_do_features(struct drbd_tconn *tconn);
+STATIC int drbd_do_auth(struct drbd_tconn *tconn);
+STATIC int drbd_disconnected(struct drbd_conf *mdev);
 
-STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
-STATIC int e_end_block(struct drbd_conf *, struct drbd_work *, int);
+STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
+STATIC int e_end_block(struct drbd_work *, int);
 
-static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+static struct drbd_epoch *previous_epoch(struct drbd_tconn *tconn, struct drbd_epoch *epoch)
 {
 	struct drbd_epoch *prev;
-	spin_lock(&mdev->epoch_lock);
+	spin_lock(&tconn->epoch_lock);
 	prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
-	if (prev == epoch || prev == mdev->current_epoch)
+	if (prev == epoch || prev == tconn->current_epoch)
 		prev = NULL;
-	spin_unlock(&mdev->epoch_lock);
+	spin_unlock(&tconn->epoch_lock);
 	return prev;
 }
 
@@ -95,44 +93,135 @@
 
 #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
 
-static struct page *drbd_pp_first_page_or_try_alloc(struct drbd_conf *mdev)
+/*
+ * some helper functions to deal with single linked page lists,
+ * page->private being our "next" pointer.
+ */
+
+/* If at least n pages are linked at head, get n pages off.
+ * Otherwise, don't modify head, and return NULL.
+ * Locking is the responsibility of the caller.
+ */
+static struct page *page_chain_del(struct page **head, int n)
+{
+	struct page *page;
+	struct page *tmp;
+
+	BUG_ON(!n);
+	BUG_ON(!head);
+
+	page = *head;
+
+	if (!page)
+		return NULL;
+
+	while (page) {
+		tmp = page_chain_next(page);
+		if (--n == 0)
+			break; /* found sufficient pages */
+		if (tmp == NULL)
+			/* insufficient pages, don't use any of them. */
+			return NULL;
+		page = tmp;
+	}
+
+	/* add end of list marker for the returned list */
+	set_page_private(page, 0);
+	/* actual return value, and adjustment of head */
+	page = *head;
+	*head = tmp;
+	return page;
+}
+
+/* may be used outside of locks to find the tail of a (usually short)
+ * "private" page chain, before adding it back to a global chain head
+ * with page_chain_add() under a spinlock. */
+static struct page *page_chain_tail(struct page *page, int *len)
+{
+	struct page *tmp;
+	int i = 1;
+	while ((tmp = page_chain_next(page)))
+		++i, page = tmp;
+	if (len)
+		*len = i;
+	return page;
+}
+
+static int page_chain_free(struct page *page)
+{
+	struct page *tmp;
+	int i = 0;
+	page_chain_for_each_safe(page, tmp) {
+		put_page(page);
+		++i;
+	}
+	return i;
+}
+
+static void page_chain_add(struct page **head,
+		struct page *chain_first, struct page *chain_last)
+{
+#if 1
+	struct page *tmp;
+	tmp = page_chain_tail(chain_first, NULL);
+	BUG_ON(tmp != chain_last);
+#endif
+
+	/* add chain to head */
+	set_page_private(chain_last, (unsigned long)*head);
+	*head = chain_first;
+}
+
+static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
+				       unsigned int number)
 {
 	struct page *page = NULL;
+	struct page *tmp = NULL;
+	unsigned int i = 0;
 
 	/* Yes, testing drbd_pp_vacant outside the lock is racy.
 	 * So what. It saves a spin_lock. */
-	if (drbd_pp_vacant > 0) {
+	if (drbd_pp_vacant >= number) {
 		spin_lock(&drbd_pp_lock);
-		page = drbd_pp_pool;
-		if (page) {
-			drbd_pp_pool = (struct page *)page_private(page);
-			set_page_private(page, 0); /* just to be polite */
-			drbd_pp_vacant--;
-		}
+		page = page_chain_del(&drbd_pp_pool, number);
+		if (page)
+			drbd_pp_vacant -= number;
 		spin_unlock(&drbd_pp_lock);
+		if (page)
+			return page;
 	}
+
 	/* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	if (!page)
-		page = alloc_page(GFP_TRY);
-	if (page)
-		atomic_inc(&mdev->pp_in_use);
-	return page;
-}
+	for (i = 0; i < number; i++) {
+		tmp = alloc_page(GFP_TRY);
+		if (!tmp)
+			break;
+		set_page_private(tmp, (unsigned long)page);
+		page = tmp;
+	}
 
-/* kick lower level device, if we have more than (arbitrary number)
- * reference counts on it, which typically are locally submitted io
- * requests.  don't use unacked_cnt, so we speed up proto A and B, too. */
-static void maybe_kick_lo(struct drbd_conf *mdev)
-{
-	if (atomic_read(&mdev->local_cnt) >= mdev->net_conf->unplug_watermark)
-		drbd_kick_lo(mdev);
+	if (i == number)
+		return page;
+
+	/* Not enough pages immediately available this time.
+	 * No need to jump around here, drbd_alloc_pages will retry this
+	 * function "soon". */
+	if (page) {
+		tmp = page_chain_tail(page, NULL);
+		spin_lock(&drbd_pp_lock);
+		page_chain_add(&drbd_pp_pool, page, tmp);
+		drbd_pp_vacant += i;
+		spin_unlock(&drbd_pp_lock);
+	}
+	return NULL;
 }
 
-static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
+static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
+					   struct list_head *to_be_freed)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 	struct list_head *le, *tle;
 
 	/* The EEs are always appended to the end of the list. Since
@@ -141,54 +230,63 @@
 	   stop to examine the list... */
 
 	list_for_each_safe(le, tle, &mdev->net_ee) {
-		e = list_entry(le, struct drbd_epoch_entry, w.list);
-		if (drbd_bio_has_active_page(e->private_bio))
+		peer_req = list_entry(le, struct drbd_peer_request, w.list);
+		if (drbd_peer_req_has_active_page(peer_req))
 			break;
 		list_move(le, to_be_freed);
 	}
 }
 
-static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
+static void drbd_reclaim_net(struct drbd_conf *mdev)
 {
 	LIST_HEAD(reclaimed);
-	struct drbd_epoch_entry *e, *t;
+	struct drbd_peer_request *peer_req, *t;
 
-	maybe_kick_lo(mdev);
-	spin_lock_irq(&mdev->req_lock);
-	reclaim_net_ee(mdev, &reclaimed);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &reclaimed, w.list)
-		drbd_free_ee(mdev, e);
+	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
+		drbd_free_net_peer_req(mdev, peer_req);
 }
 
 /**
- * drbd_pp_alloc() - Returns a page, fails only if a signal comes in
+ * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
  * @mdev:	DRBD device.
- * @retry:	whether or not to retry allocation forever (or until signalled)
+ * @number:	number of pages requested
+ * @retry:	whether to retry, if not enough pages are available right now
+ *
+ * Tries to allocate number pages, first from our own page pool, then from
+ * the kernel, unless this allocation would exceed the max_buffers setting.
+ * Possibly retry until DRBD frees sufficient pages somewhere else.
  *
- * Tries to allocate a page, first from our own page pool, then from the
- * kernel, unless this allocation would exceed the max_buffers setting.
- * If @retry is non-zero, retry until DRBD frees a page somewhere else.
+ * Returns a page chain linked via page->private.
  */
-STATIC struct page *drbd_pp_alloc(struct drbd_conf *mdev, int retry)
+struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
+			      bool retry)
 {
 	struct page *page = NULL;
+	struct net_conf *nc;
 	DEFINE_WAIT(wait);
+	int mxb;
 
-	if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
-		page = drbd_pp_first_page_or_try_alloc(mdev);
-		if (page)
-			return page;
-	}
+	/* Yes, we may run up to @number over max_buffers. If we
+	 * follow it strictly, the admin will get it wrong anyways. */
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	mxb = nc ? nc->max_buffers : 1000000;
+	rcu_read_unlock();
 
-	for (;;) {
+	if (atomic_read(&mdev->pp_in_use) < mxb)
+		page = __drbd_alloc_pages(mdev, number);
+
+	while (page == NULL) {
 		prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
 
-		drbd_kick_lo_and_reclaim_net(mdev);
+		drbd_reclaim_net(mdev);
 
-		if (atomic_read(&mdev->pp_in_use) < mdev->net_conf->max_buffers) {
-			page = drbd_pp_first_page_or_try_alloc(mdev);
+		if (atomic_read(&mdev->pp_in_use) < mxb) {
+			page = __drbd_alloc_pages(mdev, number);
 			if (page)
 				break;
 		}
@@ -197,7 +295,7 @@
 			break;
 
 		if (signal_pending(current)) {
-			dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
+			dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
 			break;
 		}
 
@@ -205,62 +303,34 @@
 	}
 	finish_wait(&drbd_pp_wait, &wait);
 
+	if (page)
+		atomic_add(number, &mdev->pp_in_use);
 	return page;
 }
 
-/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
- * Is also used from inside an other spin_lock_irq(&mdev->req_lock) */
-STATIC void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
-{
-	int free_it;
-
-	spin_lock(&drbd_pp_lock);
-	if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
-		free_it = 1;
-	} else {
-		set_page_private(page, (unsigned long)drbd_pp_pool);
-		drbd_pp_pool = page;
-		drbd_pp_vacant++;
-		free_it = 0;
-	}
-	spin_unlock(&drbd_pp_lock);
-
-	atomic_dec(&mdev->pp_in_use);
-
-	if (free_it)
-		__free_page(page);
-
-	wake_up(&drbd_pp_wait);
-}
-
-STATIC void drbd_pp_free_bio_pages(struct drbd_conf *mdev, struct bio *bio)
+/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
+ * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
+ * Either links the page chain back to the global pool,
+ * or returns all pages to the system. */
+STATIC void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
 {
-	struct page *p_to_be_freed = NULL;
-	struct page *page;
-	struct bio_vec *bvec;
+	atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
 	int i;
 
-	spin_lock(&drbd_pp_lock);
-	__bio_for_each_segment(bvec, bio, i, 0) {
-		if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) {
-			set_page_private(bvec->bv_page, (unsigned long)p_to_be_freed);
-			p_to_be_freed = bvec->bv_page;
-		} else {
-			set_page_private(bvec->bv_page, (unsigned long)drbd_pp_pool);
-			drbd_pp_pool = bvec->bv_page;
-			drbd_pp_vacant++;
-		}
-	}
-	spin_unlock(&drbd_pp_lock);
-	atomic_sub(bio->bi_vcnt, &mdev->pp_in_use);
-
-	while (p_to_be_freed) {
-		page = p_to_be_freed;
-		p_to_be_freed = (struct page *)page_private(page);
-		set_page_private(page, 0); /* just to be polite */
-		put_page(page);
+	if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
+		i = page_chain_free(page);
+	else {
+		struct page *tmp;
+		tmp = page_chain_tail(page, &i);
+		spin_lock(&drbd_pp_lock);
+		page_chain_add(&drbd_pp_pool, page, tmp);
+		drbd_pp_vacant += i;
+		spin_unlock(&drbd_pp_lock);
 	}
-
+	i = atomic_sub_return(i, a);
+	if (i < 0)
+		dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
+			is_net ? "pp_in_use_by_net" : "pp_in_use", i);
 	wake_up(&drbd_pp_wait);
 }
 
@@ -269,186 +339,128 @@
  _drbd_wait_ee_list_empty()
 
 You must not have the req_lock:
- drbd_free_ee()
- drbd_alloc_ee()
- drbd_init_ee()
- drbd_release_ee()
+ drbd_free_peer_req()
+ drbd_alloc_peer_req()
+ drbd_free_peer_reqs()
  drbd_ee_fix_bhs()
- drbd_process_done_ee()
+ drbd_finish_peer_reqs()
  drbd_clear_done_ee()
  drbd_wait_ee_list_empty()
 */
 
-struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
-				     u64 id,
-				     sector_t sector,
-				     unsigned int data_size,
-				     gfp_t gfp_mask) __must_hold(local)
+struct drbd_peer_request *
+drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
+		    unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
 {
-	struct request_queue *q;
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 	struct page *page;
-	struct bio *bio;
-	unsigned int ds;
+	unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
 
-	if (FAULT_ACTIVE(mdev, DRBD_FAULT_AL_EE))
+	if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
 		return NULL;
 
-	e = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
-	if (!e) {
+	peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
+	if (!peer_req) {
 		if (!(gfp_mask & __GFP_NOWARN))
-			dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
+			dev_err(DEV, "%s: allocation failed\n", __func__);
 		return NULL;
 	}
 
-	bio = bio_alloc(gfp_mask & ~__GFP_HIGHMEM, div_ceil(data_size, PAGE_SIZE));
-	if (!bio) {
-		if (!(gfp_mask & __GFP_NOWARN))
-			dev_err(DEV, "alloc_ee: Allocation of a bio failed\n");
-		goto fail1;
-	}
-
-	bio->bi_bdev = mdev->ldev->backing_bdev;
-	bio->bi_sector = sector;
-
-	ds = data_size;
-	while (ds) {
-		page = drbd_pp_alloc(mdev, (gfp_mask & __GFP_WAIT));
-		if (!page) {
-			if (!(gfp_mask & __GFP_NOWARN))
-				dev_err(DEV, "alloc_ee: Allocation of a page failed\n");
-			goto fail2;
-		}
-		if (!bio_add_page(bio, page, min_t(int, ds, PAGE_SIZE), 0)) {
-			drbd_pp_free(mdev, page);
-			dev_err(DEV, "alloc_ee: bio_add_page(s=%llu,"
-			    "data_size=%u,ds=%u) failed\n",
-			    (unsigned long long)sector, data_size, ds);
-
-			q = bdev_get_queue(bio->bi_bdev);
-			if (q->merge_bvec_fn) {
-#ifdef HAVE_bvec_merge_data
-				struct bvec_merge_data bvm = {
-					.bi_bdev = bio->bi_bdev,
-					.bi_sector = bio->bi_sector,
-					.bi_size = bio->bi_size,
-					.bi_rw = bio->bi_rw,
-				};
-				int l = q->merge_bvec_fn(q, &bvm,
-						&bio->bi_io_vec[bio->bi_vcnt]);
-#else
-				int l = q->merge_bvec_fn(q, bio,
-						&bio->bi_io_vec[bio->bi_vcnt]);
-#endif
-				dev_err(DEV, "merge_bvec_fn() = %d\n", l);
-			}
-
-			/* dump more of the bio. */
-			DUMPI(bio->bi_max_vecs);
-			DUMPI(bio->bi_vcnt);
-			DUMPI(bio->bi_size);
-			DUMPI(bio->bi_phys_segments);
-
-			goto fail2;
-			break;
-		}
-		ds -= min_t(int, ds, PAGE_SIZE);
-	}
-
-	D_ASSERT(data_size == bio->bi_size);
-
-	bio->bi_private = e;
-	e->mdev = mdev;
-	e->sector = sector;
-	e->size = bio->bi_size;
-
-	e->private_bio = bio;
-	e->block_id = id;
-	INIT_HLIST_NODE(&e->colision);
-	e->epoch = NULL;
-	e->flags = 0;
-
-	trace_drbd_ee(mdev, e, "allocated");
+	page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
+	if (!page)
+		goto fail;
 
-	return e;
+	drbd_clear_interval(&peer_req->i);
+	peer_req->i.size = data_size;
+	peer_req->i.sector = sector;
+	peer_req->i.local = false;
+	peer_req->i.waiting = false;
+
+	peer_req->epoch = NULL;
+	peer_req->w.mdev = mdev;
+	peer_req->pages = page;
+	atomic_set(&peer_req->pending_bios, 0);
+	peer_req->flags = 0;
+	/*
+	 * The block_id is opaque to the receiver.  It is not endianness
+	 * converted, and sent back to the sender unchanged.
+	 */
+	peer_req->block_id = id;
 
- fail2:
-	drbd_pp_free_bio_pages(mdev, bio);
-	bio_put(bio);
- fail1:
-	mempool_free(e, drbd_ee_mempool);
+	return peer_req;
 
+ fail:
+	mempool_free(peer_req, drbd_ee_mempool);
 	return NULL;
 }
 
-void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
+		       int is_net)
 {
-	struct bio *bio = e->private_bio;
-	trace_drbd_ee(mdev, e, "freed");
-	drbd_pp_free_bio_pages(mdev, bio);
-	bio_put(bio);
-	D_ASSERT(hlist_unhashed(&e->colision));
-	mempool_free(e, drbd_ee_mempool);
+	if (peer_req->flags & EE_HAS_DIGEST)
+		kfree(peer_req->digest);
+	drbd_free_pages(mdev, peer_req->pages, is_net);
+	D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
+	D_ASSERT(drbd_interval_empty(&peer_req->i));
+	mempool_free(peer_req, drbd_ee_mempool);
 }
 
-int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
+int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
 {
 	LIST_HEAD(work_list);
-	struct drbd_epoch_entry *e, *t;
+	struct drbd_peer_request *peer_req, *t;
 	int count = 0;
+	int is_net = list == &mdev->net_ee;
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	list_splice_init(list, &work_list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &work_list, w.list) {
-		drbd_free_ee(mdev, e);
+	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
+		__drbd_free_peer_req(mdev, peer_req, is_net);
 		count++;
 	}
 	return count;
 }
 
-
 /*
- * This function is called from _asender only_
- * but see also comments in _req_mod(,barrier_acked)
- * and receive_Barrier.
- *
- * Move entries from net_ee to done_ee, if ready.
- * Grab done_ee, call all callbacks, free the entries.
- * The callbacks typically send out ACKs.
+ * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
  */
-STATIC int drbd_process_done_ee(struct drbd_conf *mdev)
+static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
 {
 	LIST_HEAD(work_list);
 	LIST_HEAD(reclaimed);
-	struct drbd_epoch_entry *e, *t;
-	int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
+	struct drbd_peer_request *peer_req, *t;
+	int err = 0;
 
-	spin_lock_irq(&mdev->req_lock);
-	reclaim_net_ee(mdev, &reclaimed);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	reclaim_finished_net_peer_reqs(mdev, &reclaimed);
 	list_splice_init(&mdev->done_ee, &work_list);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	list_for_each_entry_safe(e, t, &reclaimed, w.list)
-		drbd_free_ee(mdev, e);
+	list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
+		drbd_free_net_peer_req(mdev, peer_req);
 
 	/* possible callbacks here:
-	 * e_end_block, and e_end_resync_block, e_send_discard_ack.
+	 * e_end_block, and e_end_resync_block, e_send_discard_write.
 	 * all ignore the last argument.
 	 */
-	list_for_each_entry_safe(e, t, &work_list, w.list) {
-		trace_drbd_ee(mdev, e, "process_done_ee");
+	list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
+		int err2;
+
 		/* list_del not necessary, next/prev members not touched */
-		ok = e->w.cb(mdev, &e->w, !ok) && ok;
-		drbd_free_ee(mdev, e);
+		err2 = peer_req->w.cb(&peer_req->w, !!err);
+		if (!err)
+			err = err2;
+		drbd_free_peer_req(mdev, peer_req);
 	}
 	wake_up(&mdev->ee_wait);
 
-	return ok;
+	return err;
 }
 
-void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+				     struct list_head *head)
 {
 	DEFINE_WAIT(wait);
 
@@ -456,44 +468,24 @@
 	 * and calling prepare_to_wait in the fast path */
 	while (!list_empty(head)) {
 		prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mdev->req_lock);
-		drbd_kick_lo(mdev);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 		schedule();
 		finish_wait(&mdev->ee_wait, &wait);
-		spin_lock_irq(&mdev->req_lock);
+		spin_lock_irq(&mdev->tconn->req_lock);
 	}
 }
 
-void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
+static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
+				    struct list_head *head)
 {
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	_drbd_wait_ee_list_empty(mdev, head);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 }
 
-#ifdef DEFINE_SOCK_CREATE_KERN
-/* if there is no sock_create_kern,
- * there is also sock_create_lite missing */
-int sock_create_lite(int family, int type, int protocol, struct socket **res)
-{
-	int err = 0;
-	struct socket *sock = NULL;
-
-	sock = sock_alloc();
-	if (!sock)
-		err = -ENOMEM;
-	else
-		sock->type = type;
-
-	*res = sock;
-	return err;
-}
-#endif
-
 /* see also kernel_accept; which is only present since 2.6.18.
  * also we want to log which part of it failed, exactly */
-STATIC int drbd_accept(struct drbd_conf *mdev, const char **what,
-		struct socket *sock, struct socket **newsock)
+STATIC int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
@@ -522,8 +514,7 @@
 	return err;
 }
 
-STATIC int drbd_recv_short(struct drbd_conf *mdev, struct socket *sock,
-		    void *buf, size_t size, int flags)
+STATIC int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
 {
 	mm_segment_t oldfs;
 	struct kvec iov = {
@@ -545,7 +536,7 @@
 	return rv;
 }
 
-STATIC int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size)
+STATIC int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
 {
 	mm_segment_t oldfs;
 	struct kvec iov = {
@@ -563,7 +554,7 @@
 	set_fs(KERNEL_DS);
 
 	for (;;) {
-		rv = sock_recvmsg(mdev->data.socket, &msg, size, msg.msg_flags);
+		rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
 		if (rv == size)
 			break;
 
@@ -574,12 +565,12 @@
 
 		if (rv < 0) {
 			if (rv == -ECONNRESET)
-				dev_info(DEV, "sock was reset by peer\n");
+				conn_info(tconn, "sock was reset by peer\n");
 			else if (rv != -ERESTARTSYS)
-				dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+				conn_err(tconn, "sock_recvmsg returned %d\n", rv);
 			break;
 		} else if (rv == 0) {
-			dev_info(DEV, "sock was shut down by peer\n");
+			conn_info(tconn, "sock was shut down by peer\n");
 			break;
 		} else	{
 			/* signal came in, or peer/link went down,
@@ -593,32 +584,97 @@
 	set_fs(oldfs);
 
 	if (rv != size)
-		drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
+		conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 
 	return rv;
 }
 
-STATIC struct socket *drbd_try_connect(struct drbd_conf *mdev)
+static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
+{
+	int err;
+
+	err = drbd_recv(tconn, buf, size);
+	if (err != size) {
+		if (err >= 0)
+			err = -EIO;
+	} else
+		err = 0;
+	return err;
+}
+
+static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
+{
+	int err;
+
+	err = drbd_recv_all(tconn, buf, size);
+	if (err && !signal_pending(current))
+		conn_warn(tconn, "short read (expected size %d)\n", (int)size);
+	return err;
+}
+
+/* quoting tcp(7):
+ *   On individual connections, the socket buffer size must be set prior to the
+ *   listen(2) or connect(2) calls in order to have it take effect.
+ * This is our wrapper to do so.
+ */
+static void drbd_setbufsize(struct socket *sock, unsigned int snd,
+		unsigned int rcv)
+{
+	/* open coded SO_SNDBUF, SO_RCVBUF */
+	if (snd) {
+		sock->sk->sk_sndbuf = snd;
+		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	}
+	if (rcv) {
+		sock->sk->sk_rcvbuf = rcv;
+		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+}
+
+STATIC struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 {
 	const char *what;
 	struct socket *sock;
 	struct sockaddr_in6 src_in6;
-	int err;
+	struct sockaddr_in6 peer_in6;
+	struct net_conf *nc;
+	int err, peer_addr_len, my_addr_len;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	int disconnect_on_error = 1;
 
-	if (!get_net_conf(mdev))
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
 		return NULL;
+	}
+	sndbuf_size = nc->sndbuf_size;
+	rcvbuf_size = nc->rcvbuf_size;
+	connect_int = nc->connect_int;
+	rcu_read_unlock();
+
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
+	memcpy(&src_in6, &tconn->my_addr, my_addr_len);
+
+	if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
+		src_in6.sin6_port = 0;
+	else
+		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
+
+	peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
+	memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
-		SOCK_STREAM, IPPROTO_TCP, &sock);
+	err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
+			       SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (err < 0) {
 		sock = NULL;
 		goto out;
 	}
 
 	sock->sk->sk_rcvtimeo =
-	sock->sk->sk_sndtimeo =  mdev->net_conf->try_connect_int*HZ;
+	sock->sk->sk_sndtimeo = connect_int * HZ;
+	drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
 
        /* explicitly bind to the configured IP as source IP
 	*  for the outgoing connections.
@@ -627,17 +683,8 @@
 	* Make sure to use 0 as port number, so linux selects
 	*  a free one dynamically.
 	*/
-	memcpy(&src_in6, mdev->net_conf->my_addr,
-	       min_t(int, mdev->net_conf->my_addr_len, sizeof(src_in6)));
-	if (((struct sockaddr *)mdev->net_conf->my_addr)->sa_family == AF_INET6)
-		src_in6.sin6_port = 0;
-	else
-		((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
-
 	what = "bind before connect";
-	err = sock->ops->bind(sock,
-			      (struct sockaddr *) &src_in6,
-			      mdev->net_conf->my_addr_len);
+	err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
 	if (err < 0)
 		goto out;
 
@@ -645,9 +692,7 @@
 	 * stay C_WF_CONNECTION, don't go Disconnecting! */
 	disconnect_on_error = 0;
 	what = "connect";
-	err = sock->ops->connect(sock,
-				 (struct sockaddr *)mdev->net_conf->peer_addr,
-				 mdev->net_conf->peer_addr_len, 0);
+	err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
 
 out:
 	if (err < 0) {
@@ -665,106 +710,149 @@
 			disconnect_on_error = 0;
 			break;
 		default:
-			dev_err(DEV, "%s failed, err = %d\n", what, err);
+			conn_err(tconn, "%s failed, err = %d\n", what, err);
 		}
 		if (disconnect_on_error)
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 	}
-	put_net_conf(mdev);
+
 	return sock;
 }
 
-STATIC struct socket *drbd_wait_for_connect(struct drbd_conf *mdev)
+STATIC struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
 {
-	int timeo, err;
+	int timeo, err, my_addr_len;
+	int sndbuf_size, rcvbuf_size, connect_int;
 	struct socket *s_estab = NULL, *s_listen;
+	struct sockaddr_in6 my_addr;
+	struct net_conf *nc;
 	const char *what;
 
-	if (!get_net_conf(mdev))
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
 		return NULL;
+	}
+	sndbuf_size = nc->sndbuf_size;
+	rcvbuf_size = nc->rcvbuf_size;
+	connect_int = nc->connect_int;
+	rcu_read_unlock();
+
+	my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
+	memcpy(&my_addr, &tconn->my_addr, my_addr_len);
 
 	what = "sock_create_kern";
-	err = sock_create_kern(((struct sockaddr *)mdev->net_conf->my_addr)->sa_family,
+	err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
 		SOCK_STREAM, IPPROTO_TCP, &s_listen);
 	if (err) {
 		s_listen = NULL;
 		goto out;
 	}
 
-	timeo = mdev->net_conf->try_connect_int * HZ;
+	timeo = connect_int * HZ;
 	timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
 
 	s_listen->sk->sk_reuse    = 1; /* SO_REUSEADDR */
 	s_listen->sk->sk_rcvtimeo = timeo;
 	s_listen->sk->sk_sndtimeo = timeo;
+	drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
 
 	what = "bind before listen";
-	err = s_listen->ops->bind(s_listen,
-			      (struct sockaddr *) mdev->net_conf->my_addr,
-			      mdev->net_conf->my_addr_len);
+	err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
 	if (err < 0)
 		goto out;
 
-	err = drbd_accept(mdev, &what, s_listen, &s_estab);
+	err = drbd_accept(&what, s_listen, &s_estab);
 
 out:
 	if (s_listen)
 		sock_release(s_listen);
 	if (err < 0) {
 		if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
-			dev_err(DEV, "%s failed, err = %d\n", what, err);
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_err(tconn, "%s failed, err = %d\n", what, err);
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 		}
 	}
-	put_net_conf(mdev);
 
 	return s_estab;
 }
 
-STATIC int drbd_send_fp(struct drbd_conf *mdev,
-	struct socket *sock, enum drbd_packets cmd)
-{
-	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
+static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
 
-	return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0);
+static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
+			     enum drbd_packet cmd)
+{
+	if (!conn_prepare_command(tconn, sock))
+		return -EIO;
+	return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
 }
 
-STATIC enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock)
+static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
 {
-	struct p_header *h = (struct p_header *) &mdev->data.sbuf.header;
-	int rr;
-
-	rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0);
-
-	if (rr == sizeof(*h) && h->magic == BE_DRBD_MAGIC)
-		return be16_to_cpu(h->command);
+	unsigned int header_size = drbd_header_size(tconn);
+	struct packet_info pi;
+	int err;
 
-	return 0xffff;
+	err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
+	if (err != header_size) {
+		if (err >= 0)
+			err = -EIO;
+		return err;
+	}
+	err = decode_header(tconn, tconn->data.rbuf, &pi);
+	if (err)
+		return err;
+	return pi.cmd;
 }
 
 /**
  * drbd_socket_okay() - Free the socket if its connection is not okay
- * @mdev:	DRBD device.
  * @sock:	pointer to the pointer to the socket.
  */
-static int drbd_socket_okay(struct drbd_conf *mdev, struct socket **sock)
+static int drbd_socket_okay(struct socket **sock)
 {
 	int rr;
 	char tb[4];
 
 	if (!*sock)
-		return FALSE;
+		return false;
 
-	rr = drbd_recv_short(mdev, *sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
+	rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
 
 	if (rr > 0 || rr == -EAGAIN) {
-		return TRUE;
+		return true;
 	} else {
 		sock_release(*sock);
 		*sock = NULL;
-		return FALSE;
+		return false;
 	}
 }
+/* Gets called if a connection is established, or if a new minor gets created
+   in a connection */
+int drbd_connected(struct drbd_conf *mdev)
+{
+	int err;
+
+	atomic_set(&mdev->packet_seq, 0);
+	mdev->peer_seq = 0;
+
+	mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
+		&mdev->tconn->cstate_mutex :
+		&mdev->own_state_mutex;
+
+	err = drbd_send_sync_param(mdev);
+	if (!err)
+		err = drbd_send_sizes(mdev, 0, 0);
+	if (!err)
+		err = drbd_send_uuids(mdev);
+	if (!err)
+		err = drbd_send_current_state(mdev);
+	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
+	clear_bit(RESIZE_PENDING, &mdev->flags);
+	mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
+	return err;
+}
 
 /*
  * return values:
@@ -774,248 +862,314 @@
  *     no point in trying again, please go standalone.
  *  -2 We do not have a network config...
  */
-STATIC int drbd_connect(struct drbd_conf *mdev)
+STATIC int conn_connect(struct drbd_tconn *tconn)
 {
-	struct socket *s, *sock, *msock;
-	int try, h, ok;
-
-	D_ASSERT(!mdev->data.socket);
+	struct drbd_socket sock, msock;
+	struct drbd_conf *mdev;
+	struct net_conf *nc;
+	int vnr, timeout, try, h, ok;
+	bool discard_my_data;
 
-	if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
-		dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
-
-	if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
+	if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
 		return -2;
 
-	clear_bit(DISCARD_CONCURRENT, &mdev->flags);
+	mutex_init(&sock.mutex);
+	sock.sbuf = tconn->data.sbuf;
+	sock.rbuf = tconn->data.rbuf;
+	sock.socket = NULL;
+	mutex_init(&msock.mutex);
+	msock.sbuf = tconn->meta.sbuf;
+	msock.rbuf = tconn->meta.rbuf;
+	msock.socket = NULL;
+
+	clear_bit(DISCARD_CONCURRENT, &tconn->flags);
 
-	sock  = NULL;
-	msock = NULL;
+	/* Assume that the peer only understands protocol 80 until we know better.  */
+	tconn->agreed_pro_version = 80;
 
 	do {
+		struct socket *s;
+
 		for (try = 0;;) {
 			/* 3 tries, this should take less than a second! */
-			s = drbd_try_connect(mdev);
+			s = drbd_try_connect(tconn);
 			if (s || ++try >= 3)
 				break;
 			/* give the other side time to call bind() & listen() */
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ / 10);
+			schedule_timeout_interruptible(HZ / 10);
 		}
 
 		if (s) {
-			if (!sock) {
-				drbd_send_fp(mdev, s, P_HAND_SHAKE_S);
-				sock = s;
-				s = NULL;
-			} else if (!msock) {
-				drbd_send_fp(mdev, s, P_HAND_SHAKE_M);
-				msock = s;
-				s = NULL;
+			if (!sock.socket) {
+				sock.socket = s;
+				send_first_packet(tconn, &sock, P_INITIAL_DATA);
+			} else if (!msock.socket) {
+				msock.socket = s;
+				send_first_packet(tconn, &msock, P_INITIAL_META);
 			} else {
-				dev_err(DEV, "Logic error in drbd_connect()\n");
+				conn_err(tconn, "Logic error in conn_connect()\n");
 				goto out_release_sockets;
 			}
 		}
 
-		if (sock && msock) {
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ / 10);
-			ok = drbd_socket_okay(mdev, &sock);
-			ok = drbd_socket_okay(mdev, &msock) && ok;
+		if (sock.socket && msock.socket) {
+			rcu_read_lock();
+			nc = rcu_dereference(tconn->net_conf);
+			timeout = nc->ping_timeo * HZ / 10;
+			rcu_read_unlock();
+			schedule_timeout_interruptible(timeout);
+			ok = drbd_socket_okay(&sock.socket);
+			ok = drbd_socket_okay(&msock.socket) && ok;
 			if (ok)
 				break;
 		}
 
 retry:
-		s = drbd_wait_for_connect(mdev);
+		s = drbd_wait_for_connect(tconn);
 		if (s) {
-			try = drbd_recv_fp(mdev, s);
-			drbd_socket_okay(mdev, &sock);
-			drbd_socket_okay(mdev, &msock);
+			try = receive_first_packet(tconn, s);
+			drbd_socket_okay(&sock.socket);
+			drbd_socket_okay(&msock.socket);
 			switch (try) {
-			case P_HAND_SHAKE_S:
-				if (sock) {
-					dev_warn(DEV, "initial packet S crossed\n");
-					sock_release(sock);
+			case P_INITIAL_DATA:
+				if (sock.socket) {
+					conn_warn(tconn, "initial packet S crossed\n");
+					sock_release(sock.socket);
 				}
-				sock = s;
+				sock.socket = s;
 				break;
-			case P_HAND_SHAKE_M:
-				if (msock) {
-					dev_warn(DEV, "initial packet M crossed\n");
-					sock_release(msock);
+			case P_INITIAL_META:
+				if (msock.socket) {
+					conn_warn(tconn, "initial packet M crossed\n");
+					sock_release(msock.socket);
 				}
-				msock = s;
-				set_bit(DISCARD_CONCURRENT, &mdev->flags);
+				msock.socket = s;
+				set_bit(DISCARD_CONCURRENT, &tconn->flags);
 				break;
 			default:
-				dev_warn(DEV, "Error receiving initial packet\n");
+				conn_warn(tconn, "Error receiving initial packet\n");
 				sock_release(s);
 				if (random32() & 1)
 					goto retry;
 			}
 		}
 
-		if (mdev->state.conn <= C_DISCONNECTING)
+		if (tconn->cstate <= C_DISCONNECTING)
 			goto out_release_sockets;
 		if (signal_pending(current)) {
 			flush_signals(current);
 			smp_rmb();
-			if (get_t_state(&mdev->receiver) == Exiting)
+			if (get_t_state(&tconn->receiver) == EXITING)
 				goto out_release_sockets;
 		}
 
-		if (sock && msock) {
-			ok = drbd_socket_okay(mdev, &sock);
-			ok = drbd_socket_okay(mdev, &msock) && ok;
+		if (sock.socket && &msock.socket) {
+			ok = drbd_socket_okay(&sock.socket);
+			ok = drbd_socket_okay(&msock.socket) && ok;
 			if (ok)
 				break;
 		}
 	} while (1);
 
-	msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
-	sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
+	sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
+	msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
 
-	sock->sk->sk_allocation = GFP_NOIO;
-	msock->sk->sk_allocation = GFP_NOIO;
+	sock.socket->sk->sk_allocation = GFP_NOIO;
+	msock.socket->sk->sk_allocation = GFP_NOIO;
 
-	sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
-	msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
-
-	if (mdev->net_conf->sndbuf_size) {
-		sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size;
-		sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-	}
-
-	if (mdev->net_conf->rcvbuf_size) {
-		sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size;
-		sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-	}
+	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
+	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
 
 	/* NOT YET ...
-	 * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
-	 * first set it to the P_HAND_SHAKE timeout,
+	 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
+	 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	 * first set it to the P_CONNECTION_FEATURES timeout,
 	 * which we set to 4x the configured ping_timeout. */
-	sock->sk->sk_sndtimeo =
-	sock->sk->sk_rcvtimeo = mdev->net_conf->ping_timeo*4*HZ/10;
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
 
-	msock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	msock->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+	sock.socket->sk->sk_sndtimeo =
+	sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
 
-	/* we don't want delays.
-	 * we use TCP_CORK where apropriate, though */
-	drbd_tcp_nodelay(sock);
-	drbd_tcp_nodelay(msock);
+	msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
+	timeout = nc->timeout * HZ / 10;
+	discard_my_data = nc->discard_my_data;
+	rcu_read_unlock();
 
-	mdev->data.socket = sock;
-	mdev->meta.socket = msock;
-	mdev->last_received = jiffies;
+	msock.socket->sk->sk_sndtimeo = timeout;
 
-	D_ASSERT(mdev->asender.task == NULL);
+	/* we don't want delays.
+	 * we use TCP_CORK where appropriate, though */
+	drbd_tcp_nodelay(sock.socket);
+	drbd_tcp_nodelay(msock.socket);
+
+	tconn->data.socket = sock.socket;
+	tconn->meta.socket = msock.socket;
+	tconn->last_received = jiffies;
 
-	h = drbd_do_handshake(mdev);
+	h = drbd_do_features(tconn);
 	if (h <= 0)
 		return h;
 
-	if (mdev->cram_hmac_tfm) {
+	if (tconn->cram_hmac_tfm) {
 		/* drbd_request_state(mdev, NS(conn, WFAuth)); */
-		switch (drbd_do_auth(mdev)) {
+		switch (drbd_do_auth(tconn)) {
 		case -1:
-			dev_err(DEV, "Authentication of peer failed\n");
+			conn_err(tconn, "Authentication of peer failed\n");
 			return -1;
 		case 0:
-			dev_err(DEV, "Authentication of peer failed, trying again.\n");
+			conn_err(tconn, "Authentication of peer failed, trying again.\n");
 			return 0;
 		}
 	}
 
-	if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
-		return 0;
+	tconn->data.socket->sk->sk_sndtimeo = timeout;
+	tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
 
-	sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
-	sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+	if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
+		return -1;
 
-	atomic_set(&mdev->packet_seq, 0);
-	mdev->peer_seq = 0;
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
 
-	drbd_thread_start(&mdev->asender);
+		if (discard_my_data)
+			set_bit(DISCARD_MY_DATA, &mdev->flags);
+		else
+			clear_bit(DISCARD_MY_DATA, &mdev->flags);
 
-	drbd_send_protocol(mdev);
-	drbd_send_sync_param(mdev, &mdev->sync_conf);
-	drbd_send_sizes(mdev, 0);
-	drbd_send_uuids(mdev);
-	drbd_send_state(mdev);
-	clear_bit(USE_DEGR_WFC_T, &mdev->flags);
-	clear_bit(RESIZE_PENDING, &mdev->flags);
+		drbd_connected(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
 
-	return 1;
+	if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
+		return 0;
+
+	drbd_thread_start(&tconn->asender);
+
+	mutex_lock(&tconn->conf_update);
+	/* The discard_my_data flag is a single-shot modifier to the next
+	 * connection attempt, the handshake of which is now well underway.
+	 * No need for rcu style copying of the whole struct
+	 * just to clear a single value. */
+	tconn->net_conf->discard_my_data = 0;
+	mutex_unlock(&tconn->conf_update);
+
+	return h;
 
 out_release_sockets:
-	if (sock)
-		sock_release(sock);
-	if (msock)
-		sock_release(msock);
+	if (sock.socket)
+		sock_release(sock.socket);
+	if (msock.socket)
+		sock_release(msock.socket);
 	return -1;
 }
 
-STATIC int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h)
+static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
 {
-	int r;
+	unsigned int header_size = drbd_header_size(tconn);
 
-	r = drbd_recv(mdev, h, sizeof(*h));
-
-	if (unlikely(r != sizeof(*h))) {
-		dev_err(DEV, "short read expecting header on sock: r=%d\n", r);
-		return FALSE;
-	};
-	h->command = be16_to_cpu(h->command);
-	h->length  = be16_to_cpu(h->length);
-	if (unlikely(h->magic != BE_DRBD_MAGIC)) {
-		dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n",
-		    (long)be32_to_cpu(h->magic),
-		    h->command, h->length);
-		return FALSE;
+	if (header_size == sizeof(struct p_header100) &&
+	    *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
+		struct p_header100 *h = header;
+		if (h->pad != 0) {
+			conn_err(tconn, "Header padding is not zero\n");
+			return -EINVAL;
+		}
+		pi->vnr = be16_to_cpu(h->volume);
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+	} else if (header_size == sizeof(struct p_header95) &&
+		   *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
+		struct p_header95 *h = header;
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be32_to_cpu(h->length);
+		pi->vnr = 0;
+	} else if (header_size == sizeof(struct p_header80) &&
+		   *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
+		struct p_header80 *h = header;
+		pi->cmd = be16_to_cpu(h->command);
+		pi->size = be16_to_cpu(h->length);
+		pi->vnr = 0;
+	} else {
+		conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
+			 be32_to_cpu(*(__be32 *)header),
+			 tconn->agreed_pro_version);
+		return -EINVAL;
 	}
-	mdev->last_received = jiffies;
+	pi->data = header + header_size;
+	return 0;
+}
+
+static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	void *buffer = tconn->data.rbuf;
+	int err;
+
+	err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
+	if (err)
+		return err;
 
-	return TRUE;
+	err = decode_header(tconn, buffer, pi);
+	tconn->last_received = jiffies;
+
+	return err;
 }
 
-STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+STATIC enum finish_epoch drbd_flush_after_epoch(struct drbd_tconn *tconn, struct drbd_epoch *epoch)
 {
 	int rv;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	if (tconn->write_ordering >= WO_bdev_flush) {
+		rcu_read_lock();
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			if (!get_ldev(mdev))
+				continue;
+			kref_get(&mdev->kref);
+			rcu_read_unlock();
+
+			rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
+					GFP_NOIO, NULL);
+			if (rv) {
+				dev_info(DEV, "local disk flush failed with status %d\n", rv);
+				/* would rather check on EOPNOTSUPP, but that is not reliable.
+				 * don't try again for ANY return value != 0
+				 * if (rv == -EOPNOTSUPP) */
+				drbd_bump_write_ordering(tconn, WO_drain_io);
+			}
+			put_ldev(mdev);
+			kref_put(&mdev->kref, &drbd_minor_destroy);
 
-	if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
-		rv = blkdev_issue_flush(mdev->ldev->backing_bdev, NULL);
-		if (rv) {
-			dev_err(DEV, "local disk flush failed with status %d\n", rv);
-			/* would rather check on EOPNOTSUPP, but that is not reliable.
-			 * don't try again for ANY return value != 0
-			 * if (rv == -EOPNOTSUPP) */
-			drbd_bump_write_ordering(mdev, WO_drain_io);
+			rcu_read_lock();
+			if (rv)
+				break;
 		}
-		put_ldev(mdev);
+		rcu_read_unlock();
 	}
 
-	return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
+	return drbd_may_finish_epoch(tconn, epoch, EV_BARRIER_DONE);
 }
 
-STATIC int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+STATIC int w_flush(struct drbd_work *w, int cancel)
 {
 	struct flush_work *fw = (struct flush_work *)w;
 	struct drbd_epoch *epoch = fw->epoch;
+	struct drbd_conf *mdev = w->mdev;
 
 	kfree(w);
 
 	if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
-		drbd_flush_after_epoch(mdev, epoch);
+		drbd_flush_after_epoch(mdev->tconn, epoch);
 
-	drbd_may_finish_epoch(mdev, epoch, EV_PUT |
+	drbd_may_finish_epoch(mdev->tconn, epoch, EV_PUT |
 			      (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
 
-	return 1;
+	return 0;
 }
 
 /**
@@ -1024,7 +1178,7 @@
  * @epoch:	Epoch object.
  * @ev:		Epoch event.
  */
-STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
+STATIC enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
 					       struct drbd_epoch *epoch,
 					       enum epoch_event ev)
 {
@@ -1033,7 +1187,7 @@
 	int schedule_flush = 0;
 	enum finish_epoch rv = FE_STILL_LIVE;
 
-	spin_lock(&mdev->epoch_lock);
+	spin_lock(&tconn->epoch_lock);
 	do {
 		next_epoch = NULL;
 		finish = 0;
@@ -1050,8 +1204,8 @@
 			/* Special case: If we just switched from WO_bio_barrier to
 			   WO_bdev_flush we should not finish the current epoch */
 			if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
-			    mdev->write_ordering != WO_bio_barrier &&
-			    epoch == mdev->current_epoch)
+			    tconn->write_ordering != WO_bio_barrier &&
+			    epoch == tconn->current_epoch)
 				clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
 			break;
 		case EV_BARRIER_DONE:
@@ -1062,40 +1216,38 @@
 			break;
 		}
 
-		trace_drbd_epoch(mdev, epoch, ev);
-
 		if (epoch_size != 0 &&
 		    atomic_read(&epoch->active) == 0 &&
-		    test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
-		    epoch->list.prev == &mdev->current_epoch->list &&
+		    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP) &&
+		    epoch->list.prev == &tconn->current_epoch->list &&
 		    !test_bit(DE_IS_FINISHING, &epoch->flags)) {
 			/* Nearly all conditions are met to finish that epoch... */
 			if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
-			    mdev->write_ordering == WO_none ||
+			    tconn->write_ordering == WO_none ||
 			    (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
 			    ev & EV_CLEANUP) {
 				finish = 1;
 				set_bit(DE_IS_FINISHING, &epoch->flags);
 			} else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
-				 mdev->write_ordering == WO_bio_barrier) {
+				 tconn->write_ordering == WO_bio_barrier) {
 				atomic_inc(&epoch->active);
 				schedule_flush = 1;
 			}
 		}
 		if (finish) {
 			if (!(ev & EV_CLEANUP)) {
-				spin_unlock(&mdev->epoch_lock);
-				drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
-				spin_lock(&mdev->epoch_lock);
+				spin_unlock(&tconn->epoch_lock);
+				drbd_send_b_ack(epoch->mdev, epoch->barrier_nr, epoch_size);
+				spin_lock(&tconn->epoch_lock);
 			}
-			dec_unacked(mdev);
+			if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
+				dec_unacked(epoch->mdev);
 
-			if (mdev->current_epoch != epoch) {
+			if (tconn->current_epoch != epoch) {
 				next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
 				list_del(&epoch->list);
 				ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
-				mdev->epochs--;
-				trace_drbd_epoch(mdev, epoch, EV_TRACE_FREE);
+				tconn->epochs--;
 				kfree(epoch);
 
 				if (rv == FE_STILL_LIVE)
@@ -1115,22 +1267,22 @@
 		epoch = next_epoch;
 	} while (1);
 
-	spin_unlock(&mdev->epoch_lock);
+	spin_unlock(&tconn->epoch_lock);
 
 	if (schedule_flush) {
 		struct flush_work *fw;
 		fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
 		if (fw) {
-			trace_drbd_epoch(mdev, epoch, EV_TRACE_FLUSH);
 			fw->w.cb = w_flush;
 			fw->epoch = epoch;
-			drbd_queue_work(&mdev->data.work, &fw->w);
+			fw->w.mdev = epoch->mdev;
+			drbd_queue_work(&tconn->data.work, &fw->w);
 		} else {
-			dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
+			conn_warn(tconn, "Could not kmalloc a flush_work obj\n");
 			set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
 			/* That is not a recursion, only one level */
-			drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
-			drbd_may_finish_epoch(mdev, epoch, EV_PUT);
+			drbd_may_finish_epoch(tconn, epoch, EV_BARRIER_DONE);
+			drbd_may_finish_epoch(tconn, epoch, EV_PUT);
 		}
 	}
 
@@ -1139,12 +1291,15 @@
 
 /**
  * drbd_bump_write_ordering() - Fall back to an other write ordering method
- * @mdev:	DRBD device.
+ * @tconn:	DRBD connection.
  * @wo:		Write ordering method to try.
  */
-void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
+void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
 {
+	struct disk_conf *dc;
+	struct drbd_conf *mdev;
 	enum write_ordering_e pwo;
+	int vnr, i = 0;
 	static char *write_ordering_str[] = {
 		[WO_none] = "none",
 		[WO_drain_io] = "drain",
@@ -1152,113 +1307,258 @@
 		[WO_bio_barrier] = "barrier",
 	};
 
-	pwo = mdev->write_ordering;
+	pwo = tconn->write_ordering;
 	wo = min(pwo, wo);
-	if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
-		wo = WO_bdev_flush;
-	if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
-		wo = WO_drain_io;
-	if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
-		wo = WO_none;
-	mdev->write_ordering = wo;
-	if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
-		dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (i++ == 1 && wo == WO_bio_barrier)
+			wo = WO_bdev_flush; /* WO = barrier does not handle multiple volumes */
+		if (!get_ldev(mdev))
+			continue;
+		dc = rcu_dereference(mdev->ldev->disk_conf);
+
+		if (wo == WO_bio_barrier && !dc->disk_barrier)
+			wo = WO_bdev_flush;
+		if (wo == WO_bdev_flush && !dc->disk_flushes)
+			wo = WO_drain_io;
+		if (wo == WO_drain_io && !dc->disk_drain)
+			wo = WO_none;
+		put_ldev(mdev);
+	}
+	rcu_read_unlock();
+	tconn->write_ordering = wo;
+	if (pwo != tconn->write_ordering || wo == WO_bio_barrier)
+		conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
 }
 
 /**
- * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * drbd_submit_peer_request()
+ * @mdev:	DRBD device.
+ * @peer_req:	peer request
+ * @rw:		flag field, see bio->bi_rw
+ *
+ * May spread the pages to multiple bios,
+ * depending on bio_add_page restrictions.
+ *
+ * Returns 0 if all bios have been submitted,
+ * -ENOMEM if we could not allocate enough bios,
+ * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
+ *  single page to an empty bio (which should never happen and likely indicates
+ *  that the lower level IO stack is in some way broken). This has been observed
+ *  on certain Xen deployments.
+ */
+/* TODO allocate from our own bio_set. */
+int drbd_submit_peer_request(struct drbd_conf *mdev,
+			     struct drbd_peer_request *peer_req,
+			     const unsigned rw, const int fault_type)
+{
+	struct bio *bios = NULL;
+	struct bio *bio;
+	struct page *page = peer_req->pages;
+	sector_t sector = peer_req->i.sector;
+	unsigned ds = peer_req->i.size;
+	unsigned n_bios = 0;
+	unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
+	int err = -ENOMEM;
+
+	/* In most cases, we will only need one bio.  But in case the lower
+	 * level restrictions happen to be different at this offset on this
+	 * side than those of the sending peer, we may need to submit the
+	 * request in more than one bio.
+	 *
+	 * Plain bio_alloc is good enough here, this is no DRBD internally
+	 * generated bio, but a bio allocated on behalf of the peer.
+	 */
+next_bio:
+	bio = bio_alloc(GFP_NOIO, nr_pages);
+	if (!bio) {
+		dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
+		goto fail;
+	}
+	/* > peer_req->i.sector, unless this is the first bio */
+	bio->bi_sector = sector;
+	bio->bi_bdev = mdev->ldev->backing_bdev;
+	/* we special case some flags in the multi-bio case, see below
+	 * (REQ_FLUSH, or BIO_RW_BARRIER in older kernels) */
+	bio->bi_rw = rw;
+	bio->bi_private = peer_req;
+	bio->bi_end_io = drbd_peer_request_endio;
+
+	bio->bi_next = bios;
+	bios = bio;
+	++n_bios;
+
+	page_chain_for_each(page) {
+		unsigned len = min_t(unsigned, ds, PAGE_SIZE);
+		if (!bio_add_page(bio, page, len, 0)) {
+			/* A single page must always be possible!
+			 * But in case it fails anyways,
+			 * we deal with it, and complain (below). */
+			if (bio->bi_vcnt == 0) {
+				dev_err(DEV,
+					"bio_add_page failed for len=%u, "
+					"bi_vcnt=0 (bi_sector=%llu)\n",
+					len, (unsigned long long)bio->bi_sector);
+				err = -ENOSPC;
+				goto fail;
+			}
+			goto next_bio;
+		}
+		ds -= len;
+		sector += len >> 9;
+		--nr_pages;
+	}
+	D_ASSERT(page == NULL);
+	D_ASSERT(ds == 0);
+
+	atomic_set(&peer_req->pending_bios, n_bios);
+	do {
+		bio = bios;
+		bios = bios->bi_next;
+		bio->bi_next = NULL;
+
+		drbd_generic_make_request(mdev, fault_type, bio);
+
+		/* strip off REQ_FLUSH,
+		 * unless it is the first or last bio */
+		if (bios && bios->bi_next)
+			bios->bi_rw &= ~DRBD_REQ_FLUSH;
+	} while (bios);
+	return 0;
+
+fail:
+	while (bios) {
+		bio = bios;
+		bios = bios->bi_next;
+		bio_put(bio);
+	}
+	return err;
+}
+
+static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
+					     struct drbd_peer_request *peer_req)
+{
+	struct drbd_interval *i = &peer_req->i;
+
+	drbd_remove_interval(&mdev->write_requests, i);
+	drbd_clear_interval(i);
+
+	/* Wake up any processes waiting for this peer request to complete.  */
+	if (i->waiting)
+		wake_up(&mdev->misc_wait);
+}
+
+/**
+ * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
  * @mdev:	DRBD device.
  * @w:		work object.
  * @cancel:	The connection will be closed anyways (unused in this callback)
  */
-int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
+int w_e_reissue(struct drbd_work *w, int cancel) __releases(local)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	struct bio *bio = e->private_bio;
-
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 	/* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
 	   (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
 	   so that we can finish that epoch in drbd_may_finish_epoch().
 	   That is necessary if we already have a long chain of Epochs, before
-	   we realize that BIO_RW_BARRIER is actually not supported */
+	   we realize that BARRIER is actually not supported */
 
 	/* As long as the -ENOTSUPP on the barrier is reported immediately
 	   that will never trigger. If it is reported late, we will just
 	   print that warning and continue correctly for all future requests
 	   with WO_bdev_flush */
-	if (previous_epoch(mdev, e->epoch))
+	if (previous_epoch(mdev->tconn, peer_req->epoch))
 		dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
 
-	/* prepare bio for re-submit,
-	 * re-init volatile members */
 	/* we still have a local reference,
 	 * get_ldev was done in receive_Data. */
-	bio->bi_bdev = mdev->ldev->backing_bdev;
-	bio->bi_sector = e->sector;
-	bio->bi_size = e->size;
-	bio->bi_idx = 0;
-
-	bio->bi_flags &= ~(BIO_POOL_MASK - 1);
-	bio->bi_flags |= 1 << BIO_UPTODATE;
-
-	/* don't know whether this is necessary: */
-	bio->bi_phys_segments = 0;
-	bio->bi_next = NULL;
-
-	/* these should be unchanged: */
-	/* bio->bi_end_io = drbd_endio_write_sec; */
-	/* bio->bi_vcnt = whatever; */
 
-	e->w.cb = e_end_block;
-
-	/* This is no longer a barrier request. */
-	bio->bi_rw &= ~(1UL << BIO_RW_BARRIER);
+	peer_req->w.cb = e_end_block;
+	err = drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_DT_WR);
+	switch (err) {
+	case -ENOMEM:
+		peer_req->w.cb = w_e_reissue;
+		drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
+		/* retry later; fall through */
+	case 0:
+		/* keep worker happy and connection up */
+		return 0;
 
-	drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, bio);
+	case -ENOSPC:
+		/* no other error expected, but anyways: */
+	default:
+		/* forget the object,
+		 * and cause a "Network failure" */
+		spin_lock_irq(&mdev->tconn->req_lock);
+		list_del(&peer_req->w.list);
+		drbd_remove_epoch_entry_interval(mdev, peer_req);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+			drbd_al_complete_io(mdev, &peer_req->i);
+		drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
+		drbd_free_peer_req(mdev, peer_req);
+		dev_err(DEV, "submit failed, triggering re-connect\n");
+		return err;
+	}
+}
 
-	return 1;
+void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
 }
 
-STATIC int receive_Barrier(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	int rv, issue_flush;
-	struct p_barrier *p = (struct p_barrier *)h;
+	struct p_barrier *p = pi->data;
 	struct drbd_epoch *epoch;
 
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-
-	rv = drbd_recv(mdev, h->payload, h->length);
-	ERR_IF(rv != h->length) return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	inc_unacked(mdev);
 
-	if (mdev->net_conf->wire_protocol != DRBD_PROT_C)
-		drbd_kick_lo(mdev);
-
-	mdev->current_epoch->barrier_nr = p->barrier;
-	rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
+	tconn->current_epoch->barrier_nr = p->barrier;
+	tconn->current_epoch->mdev = mdev;
+	rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
 
 	/* P_BARRIER_ACK may imply that the corresponding extent is dropped from
 	 * the activity log, which means it would not be resynced in case the
 	 * R_PRIMARY crashes now.
 	 * Therefore we must send the barrier_ack after the barrier request was
 	 * completed. */
-	switch (mdev->write_ordering) {
+	switch (tconn->write_ordering) {
 	case WO_bio_barrier:
 	case WO_none:
 		if (rv == FE_RECYCLED)
-			return TRUE;
+			return 0;
 		break;
 
 	case WO_bdev_flush:
 	case WO_drain_io:
 		if (rv == FE_STILL_LIVE) {
-			set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
-			drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
-			rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+			set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &tconn->current_epoch->flags);
+			conn_wait_active_ee_empty(tconn);
+			rv = drbd_flush_after_epoch(tconn, tconn->current_epoch);
 		}
 		if (rv == FE_RECYCLED)
-			return TRUE;
+			return 0;
 
 		/* The asender will send all the ACKs and barrier ACKs out, since
 		   all EEs moved from the active_ee to the done_ee. We need to
@@ -1271,101 +1571,117 @@
 	epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
 	if (!epoch) {
 		dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
-		issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
-		drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
+		issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &tconn->current_epoch->flags);
+		conn_wait_active_ee_empty(tconn);
 		if (issue_flush) {
-			rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
+			rv = drbd_flush_after_epoch(tconn, tconn->current_epoch);
 			if (rv == FE_RECYCLED)
-				return TRUE;
+				return 0;
 		}
 
 		drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
 
-		return TRUE;
+		return 0;
 	}
 
 	epoch->flags = 0;
 	atomic_set(&epoch->epoch_size, 0);
 	atomic_set(&epoch->active, 0);
 
-	spin_lock(&mdev->epoch_lock);
-	if (atomic_read(&mdev->current_epoch->epoch_size)) {
-		list_add(&epoch->list, &mdev->current_epoch->list);
-		mdev->current_epoch = epoch;
-		mdev->epochs++;
-		trace_drbd_epoch(mdev, epoch, EV_TRACE_ALLOC);
+	spin_lock(&tconn->epoch_lock);
+	if (atomic_read(&tconn->current_epoch->epoch_size)) {
+		list_add(&epoch->list, &tconn->current_epoch->list);
+		tconn->current_epoch = epoch;
+		tconn->epochs++;
 	} else {
 		/* The current_epoch got recycled while we allocated this one... */
 		kfree(epoch);
 	}
-	spin_unlock(&mdev->epoch_lock);
+	spin_unlock(&tconn->epoch_lock);
 
-	return TRUE;
+	return 0;
 }
 
 /* used from receive_RSDataReply (recv_resync_read)
  * and from receive_Data */
-STATIC struct drbd_epoch_entry *
-read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector, int data_size) __must_hold(local)
+STATIC struct drbd_peer_request *
+read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
+	      int data_size) __must_hold(local)
 {
-	struct drbd_epoch_entry *e;
-	struct bio_vec *bvec;
+	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
+	struct drbd_peer_request *peer_req;
 	struct page *page;
-	struct bio *bio;
-	int dgs, ds, i, rr;
-	void *dig_in = mdev->int_dig_in;
-	void *dig_vv = mdev->int_dig_vv;
-
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
-
-	if (dgs) {
-		rr = drbd_recv(mdev, dig_in, dgs);
-		if (rr != dgs) {
-			dev_warn(DEV, "short read receiving data digest: read %d expected %d\n",
-			     rr, dgs);
+	int dgs, ds, err;
+	void *dig_in = mdev->tconn->int_dig_in;
+	void *dig_vv = mdev->tconn->int_dig_vv;
+	unsigned long *data;
+
+	dgs = 0;
+	if (mdev->tconn->peer_integrity_tfm) {
+		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+		/*
+		 * FIXME: Receive the incoming digest into the receive buffer
+		 *	  here, together with its struct p_data?
+		 */
+		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
+		if (err)
 			return NULL;
-		}
+		data_size -= dgs;
 	}
 
-	data_size -= dgs;
+	if (!expect(data_size != 0))
+		return NULL;
+	if (!expect(IS_ALIGNED(data_size, 512)))
+		return NULL;
+	if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
+		return NULL;
 
-	ERR_IF(data_size &  0x1ff) return NULL;
-	ERR_IF(data_size >  DRBD_MAX_SEGMENT_SIZE) return NULL;
+	/* even though we trust out peer,
+	 * we sometimes have to double check. */
+	if (sector + (data_size>>9) > capacity) {
+		dev_err(DEV, "request from peer beyond end of local disk: "
+			"capacity: %llus < sector: %llus + size: %u\n",
+			(unsigned long long)capacity,
+			(unsigned long long)sector, data_size);
+		return NULL;
+	}
 
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	e = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
-	if (!e)
+	peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
+	if (!peer_req)
 		return NULL;
-	bio = e->private_bio;
+
 	ds = data_size;
-	bio_for_each_segment(bvec, bio, i) {
-		page = bvec->bv_page;
-		rr = drbd_recv(mdev, kmap(page), min_t(int, ds, PAGE_SIZE));
+	page = peer_req->pages;
+	page_chain_for_each(page) {
+		unsigned len = min_t(int, ds, PAGE_SIZE);
+		data = kmap(page);
+		err = drbd_recv_all_warn(mdev->tconn, data, len);
+		if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
+			dev_err(DEV, "Fault injection: Corrupting data on receive\n");
+			data[0] = data[0] ^ (unsigned long)-1;
+		}
 		kunmap(page);
-		if (rr != min_t(int, ds, PAGE_SIZE)) {
-			drbd_free_ee(mdev, e);
-			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
-			     rr, min_t(int, ds, PAGE_SIZE));
+		if (err) {
+			drbd_free_peer_req(mdev, peer_req);
 			return NULL;
 		}
-		ds -= rr;
+		ds -= len;
 	}
 
 	if (dgs) {
-		drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
 		if (memcmp(dig_in, dig_vv, dgs)) {
-			dev_err(DEV, "Digest integrity check FAILED.\n");
-			drbd_bcast_ee(mdev, "digest failed",
-					dgs, dig_in, dig_vv, e);
-			drbd_free_ee(mdev, e);
+			dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
+				(unsigned long long)sector, data_size);
+			drbd_free_peer_req(mdev, peer_req);
 			return NULL;
 		}
 	}
 	mdev->recv_cnt += data_size>>9;
-	return e;
+	return peer_req;
 }
 
 /* drbd_drain_block() just takes a data block
@@ -1374,25 +1690,26 @@
 STATIC int drbd_drain_block(struct drbd_conf *mdev, int data_size)
 {
 	struct page *page;
-	int rr, rv = 1;
+	int err = 0;
 	void *data;
 
-	page = drbd_pp_alloc(mdev, 1);
+	if (!data_size)
+		return 0;
+
+	page = drbd_alloc_pages(mdev, 1, 1);
 
 	data = kmap(page);
 	while (data_size) {
-		rr = drbd_recv(mdev, data, min_t(int, data_size, PAGE_SIZE));
-		if (rr != min_t(int, data_size, PAGE_SIZE)) {
-			rv = 0;
-			dev_warn(DEV, "short read receiving data: read %d expected %d\n",
-			     rr, min_t(int, data_size, PAGE_SIZE));
+		unsigned int len = min_t(int, data_size, PAGE_SIZE);
+
+		err = drbd_recv_all_warn(mdev->tconn, data, len);
+		if (err)
 			break;
-		}
-		data_size -= rr;
+		data_size -= len;
 	}
 	kunmap(page);
-	drbd_pp_free(mdev, page);
-	return rv;
+	drbd_free_pages(mdev, page, 0);
+	return err;
 }
 
 STATIC int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
@@ -1400,24 +1717,19 @@
 {
 	struct bio_vec *bvec;
 	struct bio *bio;
-	int dgs, rr, i, expect;
-	void *dig_in = mdev->int_dig_in;
-	void *dig_vv = mdev->int_dig_vv;
-
-	dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
-		crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
-
-	if (dgs) {
-		rr = drbd_recv(mdev, dig_in, dgs);
-		if (rr != dgs) {
-			dev_warn(DEV, "short read receiving data reply digest: read %d expected %d\n",
-			     rr, dgs);
-			return 0;
-		}
+	int dgs, err, i, expect;
+	void *dig_in = mdev->tconn->int_dig_in;
+	void *dig_vv = mdev->tconn->int_dig_vv;
+
+	dgs = 0;
+	if (mdev->tconn->peer_integrity_tfm) {
+		dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
+		err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
+		if (err)
+			return err;
+		data_size -= dgs;
 	}
 
-	data_size -= dgs;
-
 	/* optimistically update recv_cnt.  if receiving fails below,
 	 * we disconnect anyways, and counters will be reset. */
 	mdev->recv_cnt += data_size>>9;
@@ -1426,143 +1738,151 @@
 	D_ASSERT(sector == bio->bi_sector);
 
 	bio_for_each_segment(bvec, bio, i) {
+		void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
 		expect = min_t(int, data_size, bvec->bv_len);
-		rr = drbd_recv(mdev,
-			     kmap(bvec->bv_page)+bvec->bv_offset,
-			     expect);
+		err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
 		kunmap(bvec->bv_page);
-		if (rr != expect) {
-			dev_warn(DEV, "short read receiving data reply: "
-			     "read %d expected %d\n",
-			     rr, expect);
-			return 0;
-		}
-		data_size -= rr;
+		if (err)
+			return err;
+		data_size -= expect;
 	}
 
 	if (dgs) {
-		drbd_csum(mdev, mdev->integrity_r_tfm, bio, dig_vv);
+		drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
 		if (memcmp(dig_in, dig_vv, dgs)) {
 			dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
-			return 0;
+			return -EINVAL;
 		}
 	}
 
 	D_ASSERT(data_size == 0);
-	return 1;
+	return 0;
 }
 
-/* e_end_resync_block() is called via
- * drbd_process_done_ee() by asender only */
-STATIC int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
-{
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	sector_t sector = e->sector;
-	int ok;
-
-	D_ASSERT(hlist_unhashed(&e->colision));
-
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
-		drbd_set_in_sync(mdev, sector, e->size);
-		ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, e);
+/*
+ * e_end_resync_block() is called in asender context via
+ * drbd_finish_peer_reqs().
+ */
+STATIC int e_end_resync_block(struct drbd_work *w, int unused)
+{
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
+	int err;
+
+	D_ASSERT(drbd_interval_empty(&peer_req->i));
+
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		drbd_set_in_sync(mdev, sector, peer_req->i.size);
+		err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
 	} else {
 		/* Record failure to sync */
-		drbd_rs_failed_io(mdev, sector, e->size);
+		drbd_rs_failed_io(mdev, sector, peer_req->i.size);
 
-		ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+		err  = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 	}
 	dec_unacked(mdev);
 
-	return ok;
+	return err;
 }
 
 STATIC int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 
-	e = read_in_block(mdev, ID_SYNCER, sector, data_size);
-	if (!e) {
-		put_ldev(mdev);
-		return FALSE;
-	}
+	peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
+	if (!peer_req)
+		goto fail;
 
 	dec_rs_pending(mdev);
 
-	e->private_bio->bi_end_io = drbd_endio_write_sec;
-	e->private_bio->bi_rw = WRITE;
-	e->w.cb = e_end_resync_block;
-
 	inc_unacked(mdev);
 	/* corresponding dec_unacked() in e_end_resync_block()
 	 * respective _drbd_clear_done_ee */
 
-	spin_lock_irq(&mdev->req_lock);
-	list_add(&e->w.list, &mdev->sync_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	peer_req->w.cb = e_end_resync_block;
+
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->sync_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	atomic_add(data_size >> 9, &mdev->rs_sect_ev);
+	if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
+		return 0;
 
-	trace_drbd_ee(mdev, e, "submitting for (rs)write");
-	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
-	drbd_generic_make_request(mdev, DRBD_FAULT_RS_WR, e->private_bio);
-	/* accounting done in endio */
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	maybe_kick_lo(mdev);
-	return TRUE;
+	drbd_free_peer_req(mdev, peer_req);
+fail:
+	put_ldev(mdev);
+	return -EIO;
 }
 
-STATIC int receive_DataReply(struct drbd_conf *mdev, struct p_header *h)
+static struct drbd_request *
+find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
+	     sector_t sector, bool missing_ok, const char *func)
 {
 	struct drbd_request *req;
-	sector_t sector;
-	unsigned int header_size, data_size;
-	int ok;
-	struct p_data *p = (struct p_data *)h;
 
-	header_size = sizeof(*p) - sizeof(*h);
-	data_size   = h->length  - header_size;
+	/* Request object according to our peer */
+	req = (struct drbd_request *)(unsigned long)id;
+	if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
+		return req;
+	if (!missing_ok) {
+		dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
+			(unsigned long)id, (unsigned long long)sector);
+	}
+	return NULL;
+}
 
-	ERR_IF(data_size == 0) return FALSE;
+STATIC int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct drbd_request *req;
+	sector_t sector;
+	int err;
+	struct p_data *p = pi->data;
 
-	if (drbd_recv(mdev, h->payload, header_size) != header_size)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 
-	spin_lock_irq(&mdev->req_lock);
-	req = _ar_id_to_req(mdev, p->block_id, sector);
-	spin_unlock_irq(&mdev->req_lock);
-	if (unlikely(!req)) {
-		dev_err(DEV, "Got a corrupt block_id/sector pair(1).\n");
-		return FALSE;
-	}
+	spin_lock_irq(&mdev->tconn->req_lock);
+	req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	if (unlikely(!req))
+		return -EIO;
 
-	/* hlist_del(&req->colision) is done in _req_may_be_done, to avoid
+	/* drbd_remove_request_interval() is done in _req_may_be_done, to avoid
 	 * special casing it there for the various failure cases.
 	 * still no race with drbd_fail_pending_reads */
-	ok = recv_dless_read(mdev, req, sector, data_size);
-
-	if (ok)
-		req_mod(req, data_received);
+	err = recv_dless_read(mdev, req, sector, pi->size);
+	if (!err)
+		req_mod(req, DATA_RECEIVED);
 	/* else: nothing. handled from drbd_disconnect...
 	 * I don't think we may complete this just yet
 	 * in case we are "on-disconnect: freeze" */
 
-	return ok;
+	return err;
 }
 
-STATIC int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
-	unsigned int header_size, data_size;
-	int ok;
-	struct p_data *p = (struct p_data *)h;
-
-	header_size = sizeof(*p) - sizeof(*h);
-	data_size   = h->length  - header_size;
-
-	ERR_IF(data_size == 0) return FALSE;
+	int err;
+	struct p_data *p = pi->data;
 
-	if (drbd_recv(mdev, h->payload, header_size) != header_size)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 	D_ASSERT(p->block_id == ID_SYNCER);
@@ -1570,47 +1890,96 @@
 	if (get_ldev(mdev)) {
 		/* data is submitted to disk within recv_resync_read.
 		 * corresponding put_ldev done below on error,
-		 * or in drbd_endio_write_sec. */
-		ok = recv_resync_read(mdev, sector, data_size);
+		 * or in drbd_peer_request_endio. */
+		err = recv_resync_read(mdev, sector, pi->size);
 	} else {
 		if (DRBD_ratelimit(5*HZ, 5))
 			dev_err(DEV, "Can not write resync data to local disk.\n");
 
-		ok = drbd_drain_block(mdev, data_size);
+		err = drbd_drain_block(mdev, pi->size);
+
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
+	}
+
+	atomic_add(pi->size >> 9, &mdev->rs_sect_in);
+
+	return err;
+}
+
+static int w_restart_write(struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	struct bio *bio;
+	unsigned long start_time;
+	unsigned long flags;
 
-		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	if (!expect(req->rq_state & RQ_POSTPONED)) {
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+		return -EIO;
 	}
+	bio = req->master_bio;
+	start_time = req->start_time;
+	/* Postponed requests will not have their master_bio completed!  */
+	__req_mod(req, DISCARD_WRITE, NULL);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	while (__drbd_make_request(mdev, bio, start_time))
+		/* retry */ ;
+	return 0;
+}
+
+static void restart_conflicting_writes(struct drbd_conf *mdev,
+				       sector_t sector, int size)
+{
+	struct drbd_interval *i;
+	struct drbd_request *req;
 
-	return ok;
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		if (!i->local)
+			continue;
+		req = container_of(i, struct drbd_request, i);
+		if (req->rq_state & RQ_LOCAL_PENDING ||
+		    !(req->rq_state & RQ_POSTPONED))
+			continue;
+		if (expect(list_empty(&req->w.list))) {
+			req->w.mdev = mdev;
+			req->w.cb = w_restart_write;
+			drbd_queue_work(&mdev->tconn->data.work, &req->w);
+		}
+	}
 }
 
-/* e_end_block() is called via drbd_process_done_ee().
- * this means this function only runs in the asender thread
+/*
+ * e_end_block() is called in asender context via drbd_finish_peer_reqs().
  */
-STATIC int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+STATIC int e_end_block(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	sector_t sector = e->sector;
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
 	struct drbd_epoch *epoch;
-	int ok = 1, pcmd;
+	int err = 0, pcmd;
 
-	if (e->flags & EE_IS_BARRIER) {
-		epoch = previous_epoch(mdev, e->epoch);
+	if (peer_req->flags & EE_IS_BARRIER) {
+		epoch = previous_epoch(mdev->tconn, peer_req->epoch);
 		if (epoch)
-			drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
+			drbd_may_finish_epoch(mdev->tconn, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
 	}
 
-	if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
-		if (likely(drbd_bio_uptodate(e->private_bio))) {
+	if (peer_req->flags & EE_SEND_WRITE_ACK) {
+		if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 			pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
 				mdev->state.conn <= C_PAUSED_SYNC_T &&
-				e->flags & EE_MAY_SET_IN_SYNC) ?
+				peer_req->flags & EE_MAY_SET_IN_SYNC) ?
 				P_RS_WRITE_ACK : P_WRITE_ACK;
-			ok &= drbd_send_ack(mdev, pcmd, e);
+			err = drbd_send_ack(mdev, pcmd, peer_req);
 			if (pcmd == P_RS_WRITE_ACK)
-				drbd_set_in_sync(mdev, sector, e->size);
+				drbd_set_in_sync(mdev, sector, peer_req->i.size);
 		} else {
-			ok  = drbd_send_ack(mdev, P_NEG_ACK, e);
+			err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 			/* we expect it to be marked out of sync anyways...
 			 * maybe assert this?  */
 		}
@@ -1618,36 +1987,93 @@
 	}
 	/* we delete from the conflict detection hash _after_ we sent out the
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
-	if (mdev->net_conf->two_primaries) {
-		spin_lock_irq(&mdev->req_lock);
-		D_ASSERT(!hlist_unhashed(&e->colision));
-		hlist_del_init(&e->colision);
-		spin_unlock_irq(&mdev->req_lock);
-	} else {
-		D_ASSERT(hlist_unhashed(&e->colision));
-	}
+	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
+		spin_lock_irq(&mdev->tconn->req_lock);
+		D_ASSERT(!drbd_interval_empty(&peer_req->i));
+		drbd_remove_epoch_entry_interval(mdev, peer_req);
+		if (peer_req->flags & EE_RESTART_REQUESTS)
+			restart_conflicting_writes(mdev, sector, peer_req->i.size);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+	} else
+		D_ASSERT(drbd_interval_empty(&peer_req->i));
+
+	drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+
+	return err;
+}
+
+static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
+{
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_peer_request *peer_req =
+		container_of(w, struct drbd_peer_request, w);
+	int err;
 
-	drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+	err = drbd_send_ack(mdev, ack, peer_req);
+	dec_unacked(mdev);
 
-	return ok;
+	return err;
+}
+
+static int e_send_discard_write(struct drbd_work *w, int unused)
+{
+	return e_send_ack(w, P_DISCARD_WRITE);
 }
 
-STATIC int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
+static int e_send_retry_write(struct drbd_work *w, int unused)
 {
-	struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
-	int ok = 1;
+	struct drbd_tconn *tconn = w->mdev->tconn;
 
-	D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-	ok = drbd_send_ack(mdev, P_DISCARD_ACK, e);
+	return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
+			     P_RETRY_WRITE : P_DISCARD_WRITE);
+}
 
-	spin_lock_irq(&mdev->req_lock);
-	D_ASSERT(!hlist_unhashed(&e->colision));
-	hlist_del_init(&e->colision);
-	spin_unlock_irq(&mdev->req_lock);
+static bool seq_greater(u32 a, u32 b)
+{
+	/*
+	 * We assume 32-bit wrap-around here.
+	 * For 24-bit wrap-around, we would have to shift:
+	 *  a <<= 8; b <<= 8;
+	 */
+	return (s32)a - (s32)b > 0;
+}
 
-	dec_unacked(mdev);
+static u32 seq_max(u32 a, u32 b)
+{
+	return seq_greater(a, b) ? a : b;
+}
+
+static bool need_peer_seq(struct drbd_conf *mdev)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	int tp;
+
+	/*
+	 * We only need to keep track of the last packet_seq number of our peer
+	 * if we are in dual-primary mode and we have the discard flag set; see
+	 * handle_write_conflicts().
+	 */
+
+	rcu_read_lock();
+	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+	rcu_read_unlock();
 
-	return ok;
+	return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags);
+}
+
+static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
+{
+	unsigned int newest_peer_seq;
+
+	if (need_peer_seq(mdev)) {
+		spin_lock(&mdev->peer_seq_lock);
+		newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
+		mdev->peer_seq = newest_peer_seq;
+		spin_unlock(&mdev->peer_seq_lock);
+		/* wake up only if we actually changed mdev->peer_seq */
+		if (peer_seq == newest_peer_seq)
+			wake_up(&mdev->seq_wait);
+	}
 }
 
 /* Called from receive_Data.
@@ -1671,468 +2097,619 @@
  *
  * returns 0 if we may process the packet,
  * -ERESTARTSYS if we were interrupted (by disconnect signal). */
-static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
+static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
 {
 	DEFINE_WAIT(wait);
-	unsigned int p_seq;
 	long timeout;
-	int ret = 0;
+	int ret;
+
+	if (!need_peer_seq(mdev))
+		return 0;
+
 	spin_lock(&mdev->peer_seq_lock);
 	for (;;) {
-		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
-		if (seq_le(packet_seq, mdev->peer_seq+1))
+		if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
+			mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
+			ret = 0;
 			break;
+		}
 		if (signal_pending(current)) {
 			ret = -ERESTARTSYS;
 			break;
 		}
-		p_seq = mdev->peer_seq;
+		prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
 		spin_unlock(&mdev->peer_seq_lock);
-		timeout = schedule_timeout(30*HZ);
+		rcu_read_lock();
+		timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
+		rcu_read_unlock();
+		timeout = schedule_timeout(timeout);
 		spin_lock(&mdev->peer_seq_lock);
-		if (timeout == 0 && p_seq == mdev->peer_seq) {
+		if (!timeout) {
 			ret = -ETIMEDOUT;
-			dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
+			dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
 			break;
 		}
 	}
-	finish_wait(&mdev->seq_wait, &wait);
-	if (mdev->peer_seq+1 == packet_seq)
-		mdev->peer_seq++;
 	spin_unlock(&mdev->peer_seq_lock);
+	finish_wait(&mdev->seq_wait, &wait);
 	return ret;
 }
 
+/* see also bio_flags_to_wire()
+ * DRBD_REQ_*, because we need to semantically map the flags to data packet
+ * flags and back. We may replicate to other kernel versions. */
+static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
+{
+	if (mdev->tconn->agreed_pro_version >= 95)
+		return  (dpf & DP_RW_SYNC ? DRBD_REQ_SYNC : 0) |
+			(dpf & DP_FUA ? DRBD_REQ_FUA : 0) |
+			(dpf & DP_FLUSH ? DRBD_REQ_FLUSH : 0) |
+			(dpf & DP_DISCARD ? DRBD_REQ_DISCARD : 0);
+
+	/* else: we used to communicate one bit only in older DRBD */
+	return dpf & DP_RW_SYNC ? DRBD_REQ_SYNC : 0;
+}
+
+static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
+				    unsigned int size)
+{
+	struct drbd_interval *i;
+
+    repeat:
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		struct drbd_request *req;
+		struct bio_and_error m;
+
+		if (!i->local)
+			continue;
+		req = container_of(i, struct drbd_request, i);
+		if (!(req->rq_state & RQ_POSTPONED))
+			continue;
+		req->rq_state &= ~RQ_POSTPONED;
+		__req_mod(req, NEG_ACKED, &m);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		if (m.bio)
+			complete_master_bio(mdev, &m);
+		spin_lock_irq(&mdev->tconn->req_lock);
+		goto repeat;
+	}
+}
+
+static int handle_write_conflicts(struct drbd_conf *mdev,
+				  struct drbd_peer_request *peer_req)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+	bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
+	sector_t sector = peer_req->i.sector;
+	const unsigned int size = peer_req->i.size;
+	struct drbd_interval *i;
+	bool equal;
+	int err;
+
+	/*
+	 * Inserting the peer request into the write_requests tree will prevent
+	 * new conflicting local requests from being added.
+	 */
+	drbd_insert_interval(&mdev->write_requests, &peer_req->i);
+
+    repeat:
+	drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
+		if (i == &peer_req->i)
+			continue;
+
+		if (!i->local) {
+			/*
+			 * Our peer has sent a conflicting remote request; this
+			 * should not happen in a two-node setup.  Wait for the
+			 * earlier peer request to complete.
+			 */
+			err = drbd_wait_misc(mdev, i);
+			if (err)
+				goto out;
+			goto repeat;
+		}
+
+		equal = i->sector == sector && i->size == size;
+		if (resolve_conflicts) {
+			/*
+			 * If the peer request is fully contained within the
+			 * overlapping request, it can be discarded; otherwise,
+			 * it will be retried once all overlapping requests
+			 * have completed.
+			 */
+			bool discard = i->sector <= sector && i->sector +
+				       (i->size >> 9) >= sector + (size >> 9);
+
+			if (!equal)
+				dev_alert(DEV, "Concurrent writes detected: "
+					       "local=%llus +%u, remote=%llus +%u, "
+					       "assuming %s came first\n",
+					  (unsigned long long)i->sector, i->size,
+					  (unsigned long long)sector, size,
+					  discard ? "local" : "remote");
+
+			inc_unacked(mdev);
+			peer_req->w.cb = discard ? e_send_discard_write :
+						   e_send_retry_write;
+			list_add_tail(&peer_req->w.list, &mdev->done_ee);
+			wake_asender(mdev->tconn);
+
+			err = -ENOENT;
+			goto out;
+		} else {
+			struct drbd_request *req =
+				container_of(i, struct drbd_request, i);
+
+			if (!equal)
+				dev_alert(DEV, "Concurrent writes detected: "
+					       "local=%llus +%u, remote=%llus +%u\n",
+					  (unsigned long long)i->sector, i->size,
+					  (unsigned long long)sector, size);
+
+			if (req->rq_state & RQ_LOCAL_PENDING ||
+			    !(req->rq_state & RQ_POSTPONED)) {
+				/*
+				 * Wait for the node with the discard flag to
+				 * decide if this request will be discarded or
+				 * retried.  Requests that are discarded will
+				 * disappear from the write_requests tree.
+				 *
+				 * In addition, wait for the conflicting
+				 * request to finish locally before submitting
+				 * the conflicting peer request.
+				 */
+				err = drbd_wait_misc(mdev, &req->i);
+				if (err) {
+					_conn_request_state(mdev->tconn,
+							    NS(conn, C_TIMEOUT),
+							    CS_HARD);
+					fail_postponed_requests(mdev, sector, size);
+					goto out;
+				}
+				goto repeat;
+			}
+			/*
+			 * Remember to restart the conflicting requests after
+			 * the new peer request has completed.
+			 */
+			peer_req->flags |= EE_RESTART_REQUESTS;
+		}
+	}
+	err = 0;
+
+    out:
+	if (err)
+		drbd_remove_epoch_entry_interval(mdev, peer_req);
+	return err;
+}
+
 /* mirrored write */
-STATIC int receive_Data(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
-	struct drbd_epoch_entry *e;
-	struct p_data *p = (struct p_data *)h;
-	int header_size, data_size;
+	struct drbd_peer_request *peer_req;
+	struct p_data *p = pi->data;
+	u32 peer_seq = be32_to_cpu(p->seq_num);
 	int rw = WRITE;
 	u32 dp_flags;
+	int err, tp;
 
-	header_size = sizeof(*p) - sizeof(*h);
-	data_size   = h->length  - header_size;
-
-	ERR_IF(data_size == 0) return FALSE;
-
-	if (drbd_recv(mdev, h->payload, header_size) != header_size)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	if (!get_ldev(mdev)) {
-		if (DRBD_ratelimit(5*HZ, 5))
-			dev_err(DEV, "Can not write mirrored data block "
-			    "to local disk.\n");
-		spin_lock(&mdev->peer_seq_lock);
-		if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
-			mdev->peer_seq++;
-		spin_unlock(&mdev->peer_seq_lock);
+		int err2;
 
-		drbd_send_ack_dp(mdev, P_NEG_ACK, p);
-		atomic_inc(&mdev->current_epoch->epoch_size);
-		return drbd_drain_block(mdev, data_size);
+		err = wait_for_and_update_peer_seq(mdev, peer_seq);
+		drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
+		atomic_inc(&tconn->current_epoch->epoch_size);
+		err2 = drbd_drain_block(mdev, pi->size);
+		if (!err)
+			err = err2;
+		return err;
 	}
 
-	/* get_ldev(mdev) successful.
-	 * Corresponding put_ldev done either below (on various errors),
-	 * or in drbd_endio_write_sec, if we successfully submit the data at
-	 * the end of this function. */
+	/*
+	 * Corresponding put_ldev done either below (on various errors), or in
+	 * drbd_peer_request_endio, if we successfully submit the data at the
+	 * end of this function.
+	 */
 
 	sector = be64_to_cpu(p->sector);
-	e = read_in_block(mdev, p->block_id, sector, data_size);
-	if (!e) {
+	peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
+	if (!peer_req) {
 		put_ldev(mdev);
-		return FALSE;
+		return -EIO;
 	}
 
-	e->private_bio->bi_end_io = drbd_endio_write_sec;
-	e->w.cb = e_end_block;
+	peer_req->w.cb = e_end_block;
+
+	dp_flags = be32_to_cpu(p->dp_flags);
+	rw |= wire_flags_to_bio(mdev, dp_flags);
+
+	if (dp_flags & DP_MAY_SET_IN_SYNC)
+		peer_req->flags |= EE_MAY_SET_IN_SYNC;
+
+	/* last "fixes" to rw flags.
+	 * Strip off BIO_RW_BARRIER unconditionally,
+	 * it is not supposed to be here anyways.
+	 * (Was FUA or FLUSH on the peer,
+	 * and got translated to BARRIER on this side).
+	 * Note that the epoch handling code below
+	 * may add it again, though.
+	 */
+	rw &= ~DRBD_REQ_HARDBARRIER;
 
-	spin_lock(&mdev->epoch_lock);
-	e->epoch = mdev->current_epoch;
-	atomic_inc(&e->epoch->epoch_size);
-	atomic_inc(&e->epoch->active);
+	spin_lock(&tconn->epoch_lock);
+	peer_req->epoch = tconn->current_epoch;
+	atomic_inc(&peer_req->epoch->epoch_size);
+	atomic_inc(&peer_req->epoch->active);
 
-	if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
+	if (mdev->tconn->write_ordering == WO_bio_barrier &&
+	    atomic_read(&peer_req->epoch->epoch_size) == 1) {
 		struct drbd_epoch *epoch;
 		/* Issue a barrier if we start a new epoch, and the previous epoch
 		   was not a epoch containing a single request which already was
 		   a Barrier. */
-		epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
-		if (epoch == e->epoch) {
-			set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-			trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER);
-			rw |= (1<<BIO_RW_BARRIER);
-			e->flags |= EE_IS_BARRIER;
+		epoch = list_entry(peer_req->epoch->list.prev, struct drbd_epoch, list);
+		if (epoch == peer_req->epoch) {
+			set_bit(DE_CONTAINS_A_BARRIER, &peer_req->epoch->flags);
+			rw |= DRBD_REQ_FLUSH | DRBD_REQ_FUA;
+			peer_req->flags |= EE_IS_BARRIER;
 		} else {
 			if (atomic_read(&epoch->epoch_size) > 1 ||
 			    !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
 				set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
-				trace_drbd_epoch(mdev, epoch, EV_TRACE_SETTING_BI);
-				set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-				trace_drbd_epoch(mdev, e->epoch, EV_TRACE_ADD_BARRIER);
-				rw |= (1<<BIO_RW_BARRIER);
-				e->flags |= EE_IS_BARRIER;
+				set_bit(DE_CONTAINS_A_BARRIER, &peer_req->epoch->flags);
+				rw |= DRBD_REQ_FLUSH | DRBD_REQ_FUA;
+				peer_req->flags |= EE_IS_BARRIER;
 			}
 		}
 	}
-	spin_unlock(&mdev->epoch_lock);
-
-	dp_flags = be32_to_cpu(p->dp_flags);
-	if (dp_flags & DP_HARDBARRIER) {
-		dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
-		/* rw |= (1<<BIO_RW_BARRIER); */
-	}
-	if (dp_flags & DP_RW_SYNC)
-#ifdef BIO_RW_SYNC
-		rw |= (1<<BIO_RW_SYNC);
-#else
-		/* see upstream commits
-		 * 213d9417fec62ef4c3675621b9364a667954d4dd,
-		 * 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8
-		 * later, the defines even became an enum ;-) */
-		rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
-#endif
-	if (dp_flags & DP_MAY_SET_IN_SYNC)
-		e->flags |= EE_MAY_SET_IN_SYNC;
+	spin_unlock(&tconn->epoch_lock);
 
-	/* I'm the receiver, I do hold a net_cnt reference. */
-	if (!mdev->net_conf->two_primaries) {
-		spin_lock_irq(&mdev->req_lock);
-	} else {
-		/* don't get the req_lock yet,
-		 * we may sleep in drbd_wait_peer_seq */
-		const int size = e->size;
-		const int discard = test_bit(DISCARD_CONCURRENT, &mdev->flags);
-		DEFINE_WAIT(wait);
-		struct drbd_request *i;
-		struct hlist_node *n;
-		struct hlist_head *slot;
-		int first;
-
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		BUG_ON(mdev->ee_hash == NULL);
-		BUG_ON(mdev->tl_hash == NULL);
-
-		/* conflict detection and handling:
-		 * 1. wait on the sequence number,
-		 *    in case this data packet overtook ACK packets.
-		 * 2. check our hash tables for conflicting requests.
-		 *    we only need to walk the tl_hash, since an ee can not
-		 *    have a conflict with an other ee: on the submitting
-		 *    node, the corresponding req had already been conflicting,
-		 *    and a conflicting req is never sent.
-		 *
-		 * Note: for two_primaries, we are protocol C,
-		 * so there cannot be any request that is DONE
-		 * but still on the transfer log.
-		 *
-		 * unconditionally add to the ee_hash.
-		 *
-		 * if no conflicting request is found:
-		 *    submit.
-		 *
-		 * if any conflicting request is found
-		 * that has not yet been acked,
-		 * AND I have the "discard concurrent writes" flag:
-		 *	 queue (via done_ee) the P_DISCARD_ACK; OUT.
-		 *
-		 * if any conflicting request is found:
-		 *	 block the receiver, waiting on misc_wait
-		 *	 until no more conflicting requests are there,
-		 *	 or we get interrupted (disconnect).
-		 *
-		 *	 we do not just write after local io completion of those
-		 *	 requests, but only after req is done completely, i.e.
-		 *	 we wait for the P_DISCARD_ACK to arrive!
-		 *
-		 *	 then proceed normally, i.e. submit.
-		 */
-		if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
+	rcu_read_lock();
+	tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
+	rcu_read_unlock();
+	if (tp) {
+		peer_req->flags |= EE_IN_INTERVAL_TREE;
+		err = wait_for_and_update_peer_seq(mdev, peer_seq);
+		if (err)
 			goto out_interrupted;
-
-		spin_lock_irq(&mdev->req_lock);
-
-		hlist_add_head(&e->colision, ee_hash_slot(mdev, sector));
-
-#define OVERLAPS overlaps(i->sector, i->size, sector, size)
-		slot = tl_hash_slot(mdev, sector);
-		first = 1;
-		for (;;) {
-			int have_unacked = 0;
-			int have_conflict = 0;
-			prepare_to_wait(&mdev->misc_wait, &wait,
-				TASK_INTERRUPTIBLE);
-			hlist_for_each_entry(i, n, slot, colision) {
-				if (OVERLAPS) {
-					/* only ALERT on first iteration,
-					 * we may be woken up early... */
-					if (first)
-						dev_alert(DEV, "%s[%u] Concurrent local write detected!"
-						      "	new: %llus +%u; pending: %llus +%u\n",
-						      current->comm, current->pid,
-						      (unsigned long long)sector, size,
-						      (unsigned long long)i->sector, i->size);
-					if (i->rq_state & RQ_NET_PENDING)
-						++have_unacked;
-					++have_conflict;
-				}
-			}
-#undef OVERLAPS
-			if (!have_conflict)
-				break;
-
-			/* Discard Ack only for the _first_ iteration */
-			if (first && discard && have_unacked) {
-				dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
-				     (unsigned long long)sector);
-				inc_unacked(mdev);
-				e->w.cb = e_send_discard_ack;
-				list_add_tail(&e->w.list, &mdev->done_ee);
-
-				spin_unlock_irq(&mdev->req_lock);
-
-				/* we could probably send that P_DISCARD_ACK ourselves,
-				 * but I don't like the receiver using the msock */
-
+		spin_lock_irq(&mdev->tconn->req_lock);
+		err = handle_write_conflicts(mdev, peer_req);
+		if (err) {
+			spin_unlock_irq(&mdev->tconn->req_lock);
+			if (err == -ENOENT) {
 				put_ldev(mdev);
-				wake_asender(mdev);
-				finish_wait(&mdev->misc_wait, &wait);
-				return TRUE;
-			}
-
-			if (signal_pending(current)) {
-				hlist_del_init(&e->colision);
-
-				spin_unlock_irq(&mdev->req_lock);
-
-				finish_wait(&mdev->misc_wait, &wait);
-				goto out_interrupted;
+				return 0;
 			}
-
-			spin_unlock_irq(&mdev->req_lock);
-			if (first) {
-				first = 0;
-				dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
-				     "sec=%llus\n", (unsigned long long)sector);
-			} else if (discard) {
-				/* we had none on the first iteration.
-				 * there must be none now. */
-				D_ASSERT(have_unacked == 0);
-			}
-			schedule();
-			spin_lock_irq(&mdev->req_lock);
+			goto out_interrupted;
+		}
+	} else
+		spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->active_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	if (mdev->tconn->agreed_pro_version < 100) {
+		rcu_read_lock();
+		switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
+		case DRBD_PROT_C:
+			dp_flags |= DP_SEND_WRITE_ACK;
+			break;
+		case DRBD_PROT_B:
+			dp_flags |= DP_SEND_RECEIVE_ACK;
+			break;
 		}
-		finish_wait(&mdev->misc_wait, &wait);
+		rcu_read_unlock();
 	}
 
-	list_add(&e->w.list, &mdev->active_ee);
-	spin_unlock_irq(&mdev->req_lock);
-
-	switch (mdev->net_conf->wire_protocol) {
-	case DRBD_PROT_C:
+	if (dp_flags & DP_SEND_WRITE_ACK) {
+		peer_req->flags |= EE_SEND_WRITE_ACK;
 		inc_unacked(mdev);
 		/* corresponding dec_unacked() in e_end_block()
 		 * respective _drbd_clear_done_ee */
-		break;
-	case DRBD_PROT_B:
+	}
+
+	if (dp_flags & DP_SEND_RECEIVE_ACK) {
 		/* I really don't like it that the receiver thread
 		 * sends on the msock, but anyways */
-		drbd_send_ack(mdev, P_RECV_ACK, e);
-		break;
-	case DRBD_PROT_A:
-		/* nothing to do */
-		break;
+		drbd_send_ack(mdev, P_RECV_ACK, peer_req);
 	}
 
-	if (mdev->state.pdsk == D_DISKLESS) {
+	if (mdev->state.pdsk < D_INCONSISTENT) {
 		/* In case we have the only disk of the cluster, */
-		drbd_set_out_of_sync(mdev, e->sector, e->size);
-		e->flags |= EE_CALL_AL_COMPLETE_IO;
-		drbd_al_begin_io(mdev, e->sector);
+		drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
+		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
+		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
+		drbd_al_begin_io(mdev, &peer_req->i);
 	}
 
-	e->private_bio->bi_rw = rw;
-	trace_drbd_ee(mdev, e, "submitting for (data)write");
-	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
-	drbd_generic_make_request(mdev, DRBD_FAULT_DT_WR, e->private_bio);
-	/* accounting done in endio */
+	err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
+	if (!err)
+		return 0;
 
-	maybe_kick_lo(mdev);
-	return TRUE;
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	drbd_remove_epoch_entry_interval(mdev, peer_req);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+		drbd_al_complete_io(mdev, &peer_req->i);
 
 out_interrupted:
-	/* yes, the epoch_size now is imbalanced.
-	 * but we drop the connection anyways, so we don't have a chance to
-	 * receive a barrier... atomic_inc(&mdev->epoch_size); */
+	drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
 	put_ldev(mdev);
-	drbd_free_ee(mdev, e);
-	return FALSE;
+	drbd_free_peer_req(mdev, peer_req);
+	return err;
+}
+
+/* We may throttle resync, if the lower device seems to be busy,
+ * and current sync rate is above c_min_rate.
+ *
+ * To decide whether or not the lower device is busy, we use a scheme similar
+ * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
+ * (more than 64 sectors) of activity we cannot account for with our own resync
+ * activity, it obviously is "busy".
+ *
+ * The current sync rate used here uses only the most recent two step marks,
+ * to have a short time average so we can react faster.
+ */
+int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
+{
+	unsigned long db, dt, dbdt;
+	struct lc_element *tmp;
+	int curr_events;
+	int throttle = 0;
+	unsigned int c_min_rate;
+
+	rcu_read_lock();
+	c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
+	rcu_read_unlock();
+
+	/* feature disabled? */
+	if (c_min_rate == 0)
+		return 0;
+
+	spin_lock_irq(&mdev->al_lock);
+	tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
+	if (tmp) {
+		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
+		if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
+			spin_unlock_irq(&mdev->al_lock);
+			return 0;
+		}
+		/* Do not slow down if app IO is already waiting for this extent */
+	}
+	spin_unlock_irq(&mdev->al_lock);
+
+	curr_events = drbd_backing_bdev_events(mdev)
+		    - atomic_read(&mdev->rs_sect_ev);
+
+	if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
+		unsigned long rs_left;
+		int i;
+
+		mdev->rs_last_events = curr_events;
+
+		/* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
+		 * approx. */
+		i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
+
+		if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
+			rs_left = mdev->ov_left;
+		else
+			rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
+
+		dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
+		if (!dt)
+			dt++;
+		db = mdev->rs_mark_left[i] - rs_left;
+		dbdt = Bit2KB(db/dt);
+
+		if (dbdt > c_min_rate)
+			throttle = 1;
+	}
+	return throttle;
 }
 
-STATIC int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h)
+
+STATIC int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
-	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	struct drbd_epoch_entry *e;
+	sector_t capacity;
+	struct drbd_peer_request *peer_req;
 	struct digest_info *di = NULL;
-	int size, digest_size;
+	int size, verb;
 	unsigned int fault_type;
-	struct p_block_req *p =
-		(struct p_block_req *)h;
-	const int brps = sizeof(*p)-sizeof(*h);
+	struct p_block_req *p =	pi->data;
 
-	if (drbd_recv(mdev, h->payload, brps) != brps)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+	capacity = drbd_get_capacity(mdev->this_bdev);
 
 	sector = be64_to_cpu(p->sector);
 	size   = be32_to_cpu(p->blksize);
 
-	if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_SEGMENT_SIZE) {
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return FALSE;
+		return -EINVAL;
 	}
 	if (sector + (size>>9) > capacity) {
 		dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 				(unsigned long long)sector, size);
-		return FALSE;
+		return -EINVAL;
 	}
 
 	if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
-		if (DRBD_ratelimit(5*HZ, 5))
+		verb = 1;
+		switch (pi->cmd) {
+		case P_DATA_REQUEST:
+			drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
+			break;
+		case P_RS_DATA_REQUEST:
+		case P_CSUM_RS_REQUEST:
+		case P_OV_REQUEST:
+			drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
+			break;
+		case P_OV_REPLY:
+			verb = 0;
+			dec_rs_pending(mdev);
+			drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
+			break;
+		default:
+			BUG();
+		}
+		if (verb && DRBD_ratelimit(5*HZ, 5))
 			dev_err(DEV, "Can not satisfy peer's read request, "
 			    "no local data.\n");
-		drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY :
-				 P_NEG_RS_DREPLY , p);
-		return TRUE;
+
+		/* drain possibly payload */
+		return drbd_drain_block(mdev, pi->size);
 	}
 
 	/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
 	 * "criss-cross" setup, that might cause write-out on some other DRBD,
 	 * which in turn might block on the other node at this very place.  */
-	e = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
-	if (!e) {
+	peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
+	if (!peer_req) {
 		put_ldev(mdev);
-		return FALSE;
+		return -ENOMEM;
 	}
 
-	e->private_bio->bi_rw = READ;
-	e->private_bio->bi_end_io = drbd_endio_read_sec;
-
-	switch (h->command) {
+	switch (pi->cmd) {
 	case P_DATA_REQUEST:
-		e->w.cb = w_e_end_data_req;
+		peer_req->w.cb = w_e_end_data_req;
 		fault_type = DRBD_FAULT_DT_RD;
-		break;
+		/* application IO, don't drbd_rs_begin_io */
+		goto submit;
+
 	case P_RS_DATA_REQUEST:
-		e->w.cb = w_e_end_rsdata_req;
+		peer_req->w.cb = w_e_end_rsdata_req;
 		fault_type = DRBD_FAULT_RS_RD;
-		/* Eventually this should become asynchronously. Currently it
-		 * blocks the whole receiver just to delay the reading of a
-		 * resync data block.
-		 * the drbd_work_queue mechanism is made for this...
-		 */
-		if (!drbd_rs_begin_io(mdev, sector)) {
-			/* we have been interrupted,
-			 * probably connection lost! */
-			D_ASSERT(signal_pending(current));
-			goto out_free_e;
-		}
+		/* used in the sector offset progress display */
+		mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 		break;
 
 	case P_OV_REPLY:
 	case P_CSUM_RS_REQUEST:
 		fault_type = DRBD_FAULT_RS_RD;
-		digest_size = h->length - brps ;
-		di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
+		di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
 		if (!di)
 			goto out_free_e;
 
-		di->digest_size = digest_size;
+		di->digest_size = pi->size;
 		di->digest = (((char *)di)+sizeof(struct digest_info));
 
-		if (drbd_recv(mdev, di->digest, digest_size) != digest_size)
+		peer_req->digest = di;
+		peer_req->flags |= EE_HAS_DIGEST;
+
+		if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
 			goto out_free_e;
 
-		e->block_id = (u64)(unsigned long)di;
-		if (h->command == P_CSUM_RS_REQUEST) {
-			D_ASSERT(mdev->agreed_pro_version >= 89);
-			e->w.cb = w_e_end_csum_rs_req;
-		} else if (h->command == P_OV_REPLY) {
-			e->w.cb = w_e_end_ov_reply;
+		if (pi->cmd == P_CSUM_RS_REQUEST) {
+			D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
+			peer_req->w.cb = w_e_end_csum_rs_req;
+			/* used in the sector offset progress display */
+			mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+		} else if (pi->cmd == P_OV_REPLY) {
+			/* track progress, we may need to throttle */
+			atomic_add(size >> 9, &mdev->rs_sect_in);
+			peer_req->w.cb = w_e_end_ov_reply;
 			dec_rs_pending(mdev);
-			break;
-		}
-
-		if (!drbd_rs_begin_io(mdev, sector)) {
-			/* we have been interrupted, probably connection lost! */
-			D_ASSERT(signal_pending(current));
-			goto out_free_e;
+			/* drbd_rs_begin_io done when we sent this request,
+			 * but accounting still needs to be done. */
+			goto submit_for_resync;
 		}
 		break;
 
 	case P_OV_REQUEST:
-		if (mdev->state.conn >= C_CONNECTED &&
-		    mdev->state.conn != C_VERIFY_T)
-			dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
-				drbd_conn_str(mdev->state.conn));
 		if (mdev->ov_start_sector == ~(sector_t)0 &&
-		    mdev->agreed_pro_version >= 90) {
+		    mdev->tconn->agreed_pro_version >= 90) {
+			unsigned long now = jiffies;
+			int i;
 			mdev->ov_start_sector = sector;
 			mdev->ov_position = sector;
-			mdev->ov_left = mdev->rs_total - BM_SECT_TO_BIT(sector);
+			mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
+			mdev->rs_total = mdev->ov_left;
+			for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+				mdev->rs_mark_left[i] = mdev->ov_left;
+				mdev->rs_mark_time[i] = now;
+			}
 			dev_info(DEV, "Online Verify start sector: %llu\n",
 					(unsigned long long)sector);
 		}
-		e->w.cb = w_e_end_ov_req;
+		peer_req->w.cb = w_e_end_ov_req;
 		fault_type = DRBD_FAULT_RS_RD;
-		/* Eventually this should become asynchronous. Currently it
-		 * blocks the whole receiver just to delay the reading of a
-		 * resync data block.
-		 * the drbd_work_queue mechanism is made for this...
-		 */
-		if (!drbd_rs_begin_io(mdev, sector)) {
-			/* we have been interrupted,
-			 * probably connection lost! */
-			D_ASSERT(signal_pending(current));
-			goto out_free_e;
-		}
 		break;
 
-
 	default:
-		dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
-		    cmdname(h->command));
-		fault_type = DRBD_FAULT_MAX;
+		BUG();
 	}
 
-	spin_lock_irq(&mdev->req_lock);
-	list_add(&e->w.list, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	/* Throttle, drbd_rs_begin_io and submit should become asynchronous
+	 * wrt the receiver, but it is not as straightforward as it may seem.
+	 * Various places in the resync start and stop logic assume resync
+	 * requests are processed in order, requeuing this on the worker thread
+	 * introduces a bunch of new code for synchronization between threads.
+	 *
+	 * Unlimited throttling before drbd_rs_begin_io may stall the resync
+	 * "forever", throttling after drbd_rs_begin_io will lock that extent
+	 * for application writes for the same time.  For now, just throttle
+	 * here, where the rest of the code expects the receiver to sleep for
+	 * a while, anyways.
+	 */
+
+	/* Throttle before drbd_rs_begin_io, as that locks out application IO;
+	 * this defers syncer requests for some time, before letting at least
+	 * on request through.  The resync controller on the receiving side
+	 * will adapt to the incoming rate accordingly.
+	 *
+	 * We cannot throttle here if remote is Primary/SyncTarget:
+	 * we would also throttle its application reads.
+	 * In that case, throttling is done on the SyncTarget only.
+	 */
+	if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
+		schedule_timeout_uninterruptible(HZ/10);
+	if (drbd_rs_begin_io(mdev, sector))
+		goto out_free_e;
+
+submit_for_resync:
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
 
+submit:
 	inc_unacked(mdev);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add_tail(&peer_req->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	trace_drbd_ee(mdev, e, "submitting for read");
-	trace_drbd_bio(mdev, "Sec", e->private_bio, 0, NULL);
-	drbd_generic_make_request(mdev, fault_type, e->private_bio);
-	maybe_kick_lo(mdev);
+	if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
+		return 0;
 
-	return TRUE;
+	/* don't care for the reason here */
+	dev_err(DEV, "submit failed, triggering re-connect\n");
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
 
 out_free_e:
-	kfree(di);
 	put_ldev(mdev);
-	drbd_free_ee(mdev, e);
-	return FALSE;
+	drbd_free_peer_req(mdev, peer_req);
+	return -EIO;
 }
 
 STATIC int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
 {
 	int self, peer, rv = -100;
 	unsigned long ch_self, ch_peer;
+	enum drbd_after_sb_p after_sb_0p;
 
 	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
 	peer = mdev->p_uuid[UI_BITMAP] & 1;
@@ -2140,10 +2717,14 @@
 	ch_peer = mdev->p_uuid[UI_SIZE];
 	ch_self = mdev->comm_bm_set;
 
-	switch (mdev->net_conf->after_sb_0p) {
+	rcu_read_lock();
+	after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
+	rcu_read_unlock();
+	switch (after_sb_0p) {
 	case ASB_CONSENSUS:
 	case ASB_DISCARD_SECONDARY:
 	case ASB_CALL_HELPER:
+	case ASB_VIOLENTLY:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_DISCONNECT:
@@ -2172,14 +2753,14 @@
 		     "Using discard-least-changes instead\n");
 	case ASB_DISCARD_ZERO_CHG:
 		if (ch_peer == 0 && ch_self == 0) {
-			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
 				? -1 : 1;
 			break;
 		} else {
 			if (ch_peer == 0) { rv =  1; break; }
 			if (ch_self == 0) { rv = -1; break; }
 		}
-		if (mdev->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
+		if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
 			break;
 	case ASB_DISCARD_LEAST_CHG:
 		if	(ch_self < ch_peer)
@@ -2188,7 +2769,7 @@
 			rv =  1;
 		else /* ( ch_self == ch_peer ) */
 		     /* Well, then use something else. */
-			rv = test_bit(DISCARD_CONCURRENT, &mdev->flags)
+			rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
 				? -1 : 1;
 		break;
 	case ASB_DISCARD_LOCAL:
@@ -2203,17 +2784,19 @@
 
 STATIC int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
 {
-	int self, peer, hg, rv = -100;
-
-	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
-	peer = mdev->p_uuid[UI_BITMAP] & 1;
+	int hg, rv = -100;
+	enum drbd_after_sb_p after_sb_1p;
 
-	switch (mdev->net_conf->after_sb_1p) {
+	rcu_read_lock();
+	after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
+	rcu_read_unlock();
+	switch (after_sb_1p) {
 	case ASB_DISCARD_YOUNGER_PRI:
 	case ASB_DISCARD_OLDER_PRI:
 	case ASB_DISCARD_LEAST_CHG:
 	case ASB_DISCARD_LOCAL:
 	case ASB_DISCARD_REMOTE:
+	case ASB_DISCARD_ZERO_CHG:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_DISCONNECT:
@@ -2233,12 +2816,14 @@
 	case ASB_CALL_HELPER:
 		hg = drbd_asb_recover_0p(mdev);
 		if (hg == -1 && mdev->state.role == R_PRIMARY) {
-			self = drbd_set_role(mdev, R_SECONDARY, 0);
+			enum drbd_state_rv rv2;
+
+			drbd_set_role(mdev, R_SECONDARY, 0);
 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
 			  * we do not need to wait for the after state change work either. */
-			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
-			if (self != SS_SUCCESS) {
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
 				drbd_khelper(mdev, "pri-lost-after-sb");
 			} else {
 				dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2253,12 +2838,13 @@
 
 STATIC int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
 {
-	int self, peer, hg, rv = -100;
+	int hg, rv = -100;
+	enum drbd_after_sb_p after_sb_2p;
 
-	self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
-	peer = mdev->p_uuid[UI_BITMAP] & 1;
-
-	switch (mdev->net_conf->after_sb_2p) {
+	rcu_read_lock();
+	after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
+	rcu_read_unlock();
+	switch (after_sb_2p) {
 	case ASB_DISCARD_YOUNGER_PRI:
 	case ASB_DISCARD_OLDER_PRI:
 	case ASB_DISCARD_LEAST_CHG:
@@ -2266,6 +2852,7 @@
 	case ASB_DISCARD_REMOTE:
 	case ASB_CONSENSUS:
 	case ASB_DISCARD_SECONDARY:
+	case ASB_DISCARD_ZERO_CHG:
 		dev_err(DEV, "Configuration error.\n");
 		break;
 	case ASB_VIOLENTLY:
@@ -2276,11 +2863,13 @@
 	case ASB_CALL_HELPER:
 		hg = drbd_asb_recover_0p(mdev);
 		if (hg == -1) {
+			enum drbd_state_rv rv2;
+
 			 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
 			  * we might be here in C_WF_REPORT_PARAMS which is transient.
 			  * we do not need to wait for the after state change work either. */
-			self = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
-			if (self != SS_SUCCESS) {
+			rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
+			if (rv2 != SS_SUCCESS) {
 				drbd_khelper(mdev, "pri-lost-after-sb");
 			} else {
 				dev_warn(DEV, "Successfully gave up primary role.\n");
@@ -2319,6 +2908,8 @@
    -2	C_SYNC_TARGET set BitMap
  -100	after split brain, disconnect
 -1000	unrelated data
+-1091   requires proto 91
+-1096   requires proto 96
  */
 STATIC int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
 {
@@ -2347,8 +2938,8 @@
 
 		if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
 
-			if (mdev->agreed_pro_version < 91)
-				return -1001;
+			if (mdev->tconn->agreed_pro_version < 91)
+				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
 			    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
@@ -2368,8 +2959,8 @@
 
 		if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
 
-			if (mdev->agreed_pro_version < 91)
-				return -1001;
+			if (mdev->tconn->agreed_pro_version < 91)
+				return -1091;
 
 			if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
 			    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
@@ -2401,7 +2992,7 @@
 		case 1: /*  self_pri && !peer_pri */ return 1;
 		case 2: /* !self_pri &&  peer_pri */ return -1;
 		case 3: /*  self_pri &&  peer_pri */
-			dc = test_bit(DISCARD_CONCURRENT, &mdev->flags);
+			dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
 			return dc ? -1 : 1;
 		}
 	}
@@ -2414,17 +3005,22 @@
 	*rule_nr = 51;
 	peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
-		peer = mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1);
-		if (self == peer) {
+		if (mdev->tconn->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
+		    peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of the peer's UUIDs. */
 
-			if (mdev->agreed_pro_version < 91)
-				return -1001;
+			if (mdev->tconn->agreed_pro_version < 91)
+				return -1091;
 
 			mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
 			mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
+
+			dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
+			drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
+
 			return -1;
 		}
 	}
@@ -2446,20 +3042,20 @@
 	*rule_nr = 71;
 	self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		self = mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1);
-		peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
-		if (self == peer) {
+		if (mdev->tconn->agreed_pro_version < 96 ?
+		    (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
+		    (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
+		    self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of our UUIDs. */
 
-			if (mdev->agreed_pro_version < 91)
-				return -1001;
+			if (mdev->tconn->agreed_pro_version < 91)
+				return -1091;
 
 			_drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
 			_drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
 
-			dev_info(DEV, "Undid last start of resync:\n");
-
+			dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
 			drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
 				       mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
 
@@ -2501,9 +3097,10 @@
 STATIC enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
 					   enum drbd_disk_state peer_disk) __must_hold(local)
 {
-	int hg, rule_nr;
 	enum drbd_conns rv = C_MASK;
 	enum drbd_disk_state mydisk;
+	struct net_conf *nc;
+	int hg, rule_nr, rr_conflict, tentative;
 
 	mydisk = mdev->state.disk;
 	if (mydisk == D_NEGOTIATING)
@@ -2522,8 +3119,8 @@
 		dev_alert(DEV, "Unrelated data, aborting!\n");
 		return C_MASK;
 	}
-	if (hg == -1001) {
-		dev_alert(DEV, "To resolve this both sides have to support at least protocol\n");
+	if (hg < -1000) {
+		dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
 		return C_MASK;
 	}
 
@@ -2537,7 +3134,13 @@
 		     hg > 0 ? "source" : "target");
 	}
 
-	if (hg == 100 || (hg == -100 && mdev->net_conf->always_asbp)) {
+	if (abs(hg) == 100)
+		drbd_khelper(mdev, "initial-split-brain");
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+
+	if (hg == 100 || (hg == -100 && nc->always_asbp)) {
 		int pcount = (mdev->state.role == R_PRIMARY)
 			   + (peer_role == R_PRIMARY);
 		int forced = (hg == -100);
@@ -2566,9 +3169,9 @@
 	}
 
 	if (hg == -100) {
-		if (mdev->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
+		if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
 			hg = -1;
-		if (!mdev->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
+		if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
 			hg = 1;
 
 		if (abs(hg) < 100)
@@ -2576,9 +3179,16 @@
 			     "Sync from %s node\n",
 			     (hg < 0) ? "peer" : "this");
 	}
+	rr_conflict = nc->rr_conflict;
+	tentative = nc->tentative;
+	rcu_read_unlock();
 
 	if (hg == -100) {
-		dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
+		/* FIXME this log message is not correct if we end up here
+		 * after an attempted attach on a diskless node.
+		 * We just refuse to attach -- well, we drop the "connection"
+		 * to that disk, in a way... */
+		dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
 		drbd_khelper(mdev, "split-brain");
 		return C_MASK;
 	}
@@ -2590,7 +3200,7 @@
 
 	if (hg < 0 && /* by intention we do not use mydisk here. */
 	    mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
-		switch (mdev->net_conf->rr_conflict) {
+		switch (rr_conflict) {
 		case ASB_CALL_HELPER:
 			drbd_khelper(mdev, "pri-lost");
 			/* fall through */
@@ -2603,9 +3213,20 @@
 		}
 	}
 
+	if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
+		if (hg == 0)
+			dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
+		else
+			dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
+				 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
+				 abs(hg) >= 2 ? "full" : "bit-map based");
+		return C_MASK;
+	}
+
 	if (abs(hg) >= 2) {
 		dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
-		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
+		if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
+					BM_LOCKED_SET_ALLOWED))
 			return C_MASK;
 	}
 
@@ -2624,98 +3245,168 @@
 	return rv;
 }
 
-/* returns 1 if invalid */
-STATIC int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
+STATIC enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
 {
 	/* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
-	if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
-	    (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
-		return 0;
+	if (peer == ASB_DISCARD_REMOTE)
+		return ASB_DISCARD_LOCAL;
 
 	/* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
-	if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
-	    self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
-		return 1;
+	if (peer == ASB_DISCARD_LOCAL)
+		return ASB_DISCARD_REMOTE;
 
 	/* everything else is valid if they are equal on both sides. */
-	if (peer == self)
-		return 0;
-
-	/* everything es is invalid. */
-	return 1;
+	return peer;
 }
 
-STATIC int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_protocol *p = (struct p_protocol *)h;
-	int header_size, data_size;
-	int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
-	int p_want_lose, p_two_primaries;
-	char p_integrity_alg[SHARED_SECRET_MAX] = "";
-
-	header_size = sizeof(*p) - sizeof(*h);
-	data_size   = h->length  - header_size;
-
-	if (drbd_recv(mdev, h->payload, header_size) != header_size)
-		return FALSE;
+	struct p_protocol *p = pi->data;
+	enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
+	int p_proto, p_discard_my_data, p_two_primaries, cf;
+	struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
+	char integrity_alg[SHARED_SECRET_MAX] = "";
+	struct crypto_hash *peer_integrity_tfm = NULL;
+	void *int_dig_in = NULL, *int_dig_vv = NULL;
 
 	p_proto		= be32_to_cpu(p->protocol);
 	p_after_sb_0p	= be32_to_cpu(p->after_sb_0p);
 	p_after_sb_1p	= be32_to_cpu(p->after_sb_1p);
 	p_after_sb_2p	= be32_to_cpu(p->after_sb_2p);
-	p_want_lose	= be32_to_cpu(p->want_lose);
 	p_two_primaries = be32_to_cpu(p->two_primaries);
+	cf		= be32_to_cpu(p->conn_flags);
+	p_discard_my_data = cf & CF_DISCARD_MY_DATA;
 
-	if (p_proto != mdev->net_conf->wire_protocol) {
-		dev_err(DEV, "incompatible communication protocols\n");
-		goto disconnect;
-	}
+	if (tconn->agreed_pro_version >= 87) {
+		int err;
 
-	if (cmp_after_sb(p_after_sb_0p, mdev->net_conf->after_sb_0p)) {
-		dev_err(DEV, "incompatible after-sb-0pri settings\n");
-		goto disconnect;
+		if (pi->size > sizeof(integrity_alg))
+			return -EIO;
+		err = drbd_recv_all(tconn, integrity_alg, pi->size);
+		if (err)
+			return err;
+		integrity_alg[SHARED_SECRET_MAX - 1] = 0;
 	}
 
-	if (cmp_after_sb(p_after_sb_1p, mdev->net_conf->after_sb_1p)) {
-		dev_err(DEV, "incompatible after-sb-1pri settings\n");
-		goto disconnect;
-	}
+	if (pi->cmd != P_PROTOCOL_UPDATE) {
+		clear_bit(CONN_DRY_RUN, &tconn->flags);
 
-	if (cmp_after_sb(p_after_sb_2p, mdev->net_conf->after_sb_2p)) {
-		dev_err(DEV, "incompatible after-sb-2pri settings\n");
-		goto disconnect;
-	}
+		if (cf & CF_DRY_RUN)
+			set_bit(CONN_DRY_RUN, &tconn->flags);
 
-	if (p_want_lose && mdev->net_conf->want_lose) {
-		dev_err(DEV, "both sides have the 'want_lose' flag set\n");
-		goto disconnect;
-	}
+		rcu_read_lock();
+		nc = rcu_dereference(tconn->net_conf);
 
-	if (p_two_primaries != mdev->net_conf->two_primaries) {
-		dev_err(DEV, "incompatible setting of the two-primaries options\n");
-		goto disconnect;
+		if (p_proto != nc->wire_protocol) {
+			conn_err(tconn, "incompatible %s settings\n", "protocol");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
+			conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (p_discard_my_data && nc->discard_my_data) {
+			conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (p_two_primaries != nc->two_primaries) {
+			conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
+			goto disconnect_rcu_unlock;
+		}
+
+		if (strcmp(integrity_alg, nc->integrity_alg)) {
+			conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
+			goto disconnect_rcu_unlock;
+		}
+
+		rcu_read_unlock();
 	}
 
-	if (mdev->agreed_pro_version >= 87) {
-		unsigned char *my_alg = mdev->net_conf->integrity_alg;
+	if (integrity_alg[0]) {
+		int hash_size;
+
+		/*
+		 * We can only change the peer data integrity algorithm
+		 * here.  Changing our own data integrity algorithm
+		 * requires that we send a P_PROTOCOL_UPDATE packet at
+		 * the same time; otherwise, the peer has no way to
+		 * tell between which packets the algorithm should
+		 * change.
+		 */
 
-		if (drbd_recv(mdev, p_integrity_alg, data_size) != data_size)
-			return FALSE;
+		peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
+		if (!peer_integrity_tfm) {
+			conn_err(tconn, "peer data-integrity-alg %s not supported\n",
+				 integrity_alg);
+			goto disconnect;
+		}
 
-		p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
-		if (strcmp(p_integrity_alg, my_alg)) {
-			dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
+		hash_size = crypto_hash_digestsize(peer_integrity_tfm);
+		int_dig_in = kmalloc(hash_size, GFP_KERNEL);
+		int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
+		if (!(int_dig_in && int_dig_vv)) {
+			conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
 			goto disconnect;
 		}
-		dev_info(DEV, "data-integrity-alg: %s\n",
-		     my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
 	}
 
-	return TRUE;
+	new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
+	if (!new_net_conf) {
+		conn_err(tconn, "Allocation of new net_conf failed\n");
+		goto disconnect;
+	}
+
+	mutex_lock(&tconn->data.mutex);
+	mutex_lock(&tconn->conf_update);
+	old_net_conf = tconn->net_conf;
+	*new_net_conf = *old_net_conf;
+
+	new_net_conf->wire_protocol = p_proto;
+	new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
+	new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
+	new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
+	new_net_conf->two_primaries = p_two_primaries;
+
+	rcu_assign_pointer(tconn->net_conf, new_net_conf);
+	mutex_unlock(&tconn->conf_update);
+	mutex_unlock(&tconn->data.mutex);
+
+	crypto_free_hash(tconn->peer_integrity_tfm);
+	kfree(tconn->int_dig_in);
+	kfree(tconn->int_dig_vv);
+	tconn->peer_integrity_tfm = peer_integrity_tfm;
+	tconn->int_dig_in = int_dig_in;
+	tconn->int_dig_vv = int_dig_vv;
+
+	if (strcmp(old_net_conf->integrity_alg, integrity_alg))
+		conn_info(tconn, "peer data-integrity-alg: %s\n",
+			  integrity_alg[0] ? integrity_alg : "(none)");
 
+	synchronize_rcu();
+	kfree(old_net_conf);
+	return 0;
+
+disconnect_rcu_unlock:
+	rcu_read_unlock();
 disconnect:
-	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return FALSE;
+	crypto_free_hash(peer_integrity_tfm);
+	kfree(int_dig_in);
+	kfree(int_dig_vv);
+	conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+	return -EIO;
 }
 
 /* helper function
@@ -2737,50 +3428,114 @@
 			alg, name, PTR_ERR(tfm));
 		return tfm;
 	}
-	if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
-		crypto_free_hash(tfm);
-		dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
-		return ERR_PTR(-EINVAL);
-	}
 	return tfm;
 }
 
-STATIC int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h)
+static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	void *buffer = tconn->data.rbuf;
+	int size = pi->size;
+
+	while (size) {
+		int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
+		s = drbd_recv(tconn, buffer, s);
+		if (s <= 0) {
+			if (s < 0)
+				return s;
+			break;
+		}
+		size -= s;
+	}
+	if (size)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * config_unknown_volume  -  device configuration command for unknown volume
+ *
+ * When a device is added to an existing connection, the node on which the
+ * device is added first will send configuration commands to its peer but the
+ * peer will not know about the device yet.  It will warn and ignore these
+ * commands.  Once the device is added on the second node, the second node will
+ * send the same device configuration commands, but in the other direction.
+ *
+ * (We can also end up here if drbd is misconfigured.)
+ */
+static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
+		  cmdname(pi->cmd), pi->vnr);
+	return ignore_remaining_packet(tconn, pi);
+}
+
+STATIC int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	int ok = TRUE;
-	struct p_rs_param_89 *p = (struct p_rs_param_89 *)h;
+	struct drbd_conf *mdev;
+	struct p_rs_param_95 *p;
 	unsigned int header_size, data_size, exp_max_sz;
 	struct crypto_hash *verify_tfm = NULL;
 	struct crypto_hash *csums_tfm = NULL;
-	const int apv = mdev->agreed_pro_version;
+	struct net_conf *old_net_conf, *new_net_conf = NULL;
+	struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
+	const int apv = tconn->agreed_pro_version;
+	struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
+	int fifo_size = 0;
+	int err;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
 
 	exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
 		    : apv == 88 ? sizeof(struct p_rs_param)
 					+ SHARED_SECRET_MAX
-		    : /* 89 */    sizeof(struct p_rs_param_89);
+		    : apv <= 94 ? sizeof(struct p_rs_param_89)
+		    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
 
-	if (h->length > exp_max_sz) {
+	if (pi->size > exp_max_sz) {
 		dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
-		    h->length, exp_max_sz);
-		return FALSE;
+		    pi->size, exp_max_sz);
+		return -EIO;
 	}
 
 	if (apv <= 88) {
-		header_size = sizeof(struct p_rs_param) - sizeof(*h);
-		data_size   = h->length  - header_size;
-	} else /* apv >= 89 */ {
-		header_size = sizeof(struct p_rs_param_89) - sizeof(*h);
-		data_size   = h->length  - header_size;
+		header_size = sizeof(struct p_rs_param);
+		data_size = pi->size - header_size;
+	} else if (apv <= 94) {
+		header_size = sizeof(struct p_rs_param_89);
+		data_size = pi->size - header_size;
+		D_ASSERT(data_size == 0);
+	} else {
+		header_size = sizeof(struct p_rs_param_95);
+		data_size = pi->size - header_size;
 		D_ASSERT(data_size == 0);
 	}
 
 	/* initialize verify_alg and csums_alg */
+	p = pi->data;
 	memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
 
-	if (drbd_recv(mdev, h->payload, header_size) != header_size)
-		return FALSE;
+	err = drbd_recv_all(mdev->tconn, p, header_size);
+	if (err)
+		return err;
+
+	mutex_lock(&mdev->tconn->conf_update);
+	old_net_conf = mdev->tconn->net_conf;
+	if (get_ldev(mdev)) {
+		new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+		if (!new_disk_conf) {
+			put_ldev(mdev);
+			mutex_unlock(&mdev->tconn->conf_update);
+			dev_err(DEV, "Allocation of new disk_conf failed\n");
+			return -ENOMEM;
+		}
+
+		old_disk_conf = mdev->ldev->disk_conf;
+		*new_disk_conf = *old_disk_conf;
 
-	mdev->sync_conf.rate	  = be32_to_cpu(p->rate);
+		new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
+	}
 
 	if (apv >= 88) {
 		if (apv == 88) {
@@ -2788,12 +3543,13 @@
 				dev_err(DEV, "verify-alg too long, "
 				    "peer wants %u, accepting only %u byte\n",
 						data_size, SHARED_SECRET_MAX);
-				return FALSE;
+				err = -EIO;
+				goto reconnect;
 			}
 
-			if (drbd_recv(mdev, p->verify_alg, data_size) != data_size)
-				return FALSE;
-
+			err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
+			if (err)
+				goto reconnect;
 			/* we expect NUL terminated string */
 			/* but just in case someone tries to be evil */
 			D_ASSERT(p->verify_alg[data_size-1] == 0);
@@ -2808,10 +3564,10 @@
 			p->csums_alg[SHARED_SECRET_MAX-1] = 0;
 		}
 
-		if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
+		if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.verify_alg, p->verify_alg);
+				    old_net_conf->verify_alg, p->verify_alg);
 				goto disconnect;
 			}
 			verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2822,10 +3578,10 @@
 			}
 		}
 
-		if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
+		if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
 			if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 				dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
-				    mdev->sync_conf.csums_alg, p->csums_alg);
+				    old_net_conf->csums_alg, p->csums_alg);
 				goto disconnect;
 			}
 			csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
@@ -2836,35 +3592,91 @@
 			}
 		}
 
+		if (apv > 94 && new_disk_conf) {
+			new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
+			new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
+			new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
+			new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
+
+			fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+			if (fifo_size != mdev->rs_plan_s->size) {
+				new_plan = fifo_alloc(fifo_size);
+				if (!new_plan) {
+					dev_err(DEV, "kmalloc of fifo_buffer failed");
+					put_ldev(mdev);
+					goto disconnect;
+				}
+			}
+		}
+
+		if (verify_tfm || csums_tfm) {
+			new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
+			if (!new_net_conf) {
+				dev_err(DEV, "Allocation of new net_conf failed\n");
+				goto disconnect;
+			}
+
+			*new_net_conf = *old_net_conf;
 
-		spin_lock(&mdev->peer_seq_lock);
-		/* lock against drbd_nl_syncer_conf() */
-		if (verify_tfm) {
-			strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
-			mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
-			crypto_free_hash(mdev->verify_tfm);
-			mdev->verify_tfm = verify_tfm;
-			dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
-		}
-		if (csums_tfm) {
-			strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
-			mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
-			crypto_free_hash(mdev->csums_tfm);
-			mdev->csums_tfm = csums_tfm;
-			dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+			if (verify_tfm) {
+				strcpy(new_net_conf->verify_alg, p->verify_alg);
+				new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
+				crypto_free_hash(mdev->tconn->verify_tfm);
+				mdev->tconn->verify_tfm = verify_tfm;
+				dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
+			}
+			if (csums_tfm) {
+				strcpy(new_net_conf->csums_alg, p->csums_alg);
+				new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
+				crypto_free_hash(mdev->tconn->csums_tfm);
+				mdev->tconn->csums_tfm = csums_tfm;
+				dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
+			}
+			rcu_assign_pointer(tconn->net_conf, new_net_conf);
 		}
-		spin_unlock(&mdev->peer_seq_lock);
 	}
 
-	return ok;
+	if (new_disk_conf) {
+		rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+		put_ldev(mdev);
+	}
+
+	if (new_plan) {
+		old_plan = mdev->rs_plan_s;
+		rcu_assign_pointer(mdev->rs_plan_s, new_plan);
+	}
+
+	mutex_unlock(&mdev->tconn->conf_update);
+	synchronize_rcu();
+	if (new_net_conf)
+		kfree(old_net_conf);
+	kfree(old_disk_conf);
+	kfree(old_plan);
+
+	return 0;
+
+reconnect:
+	if (new_disk_conf) {
+		put_ldev(mdev);
+		kfree(new_disk_conf);
+	}
+	mutex_unlock(&mdev->tconn->conf_update);
+	return -EIO;
+
 disconnect:
+	kfree(new_plan);
+	if (new_disk_conf) {
+		put_ldev(mdev);
+		kfree(new_disk_conf);
+	}
+	mutex_unlock(&mdev->tconn->conf_update);
 	/* just for completeness: actually not needed,
 	 * as this is not reached if csums_tfm was ok. */
 	crypto_free_hash(csums_tfm);
 	/* but free the verify_tfm again, if csums_tfm did not work out */
 	crypto_free_hash(verify_tfm);
-	drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-	return FALSE;
+	conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+	return -EIO;
 }
 
 STATIC void drbd_setup_order_type(struct drbd_conf *mdev, int peer)
@@ -2886,95 +3698,94 @@
 		     (unsigned long long)a, (unsigned long long)b);
 }
 
-STATIC int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_sizes *p = (struct p_sizes *)h;
+	struct drbd_conf *mdev;
+	struct p_sizes *p = pi->data;
 	enum determine_dev_size dd = unchanged;
-	unsigned int max_seg_s;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
-	enum drbd_conns nconn;
+	enum dds_flags ddsf;
 
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
 
 	p_size = be64_to_cpu(p->d_size);
 	p_usize = be64_to_cpu(p->u_size);
 
-	if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
-		dev_err(DEV, "some backing storage is needed\n");
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
-	}
-
 	/* just store the peer's disk size for now.
 	 * we still need to figure out whether we accept that. */
 	mdev->p_size = p_size;
 
-#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 	if (get_ldev(mdev)) {
+		rcu_read_lock();
+		my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
+		rcu_read_unlock();
+
 		warn_if_differ_considerably(mdev, "lower level device sizes",
 			   p_size, drbd_get_max_capacity(mdev->ldev));
 		warn_if_differ_considerably(mdev, "user requested size",
-					    p_usize, mdev->ldev->dc.disk_size);
+					    p_usize, my_usize);
 
 		/* if this is the first connect, or an otherwise expected
 		 * param exchange, choose the minimum */
 		if (mdev->state.conn == C_WF_REPORT_PARAMS)
-			p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
-					     p_usize);
-
-		my_usize = mdev->ldev->dc.disk_size;
-
-		if (mdev->ldev->dc.disk_size != p_usize) {
-			mdev->ldev->dc.disk_size = p_usize;
-			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
-			     (unsigned long)mdev->ldev->dc.disk_size);
-		}
+			p_usize = min_not_zero(my_usize, p_usize);
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
-		   drbd_get_capacity(mdev->this_bdev) &&
-		   mdev->state.disk >= D_OUTDATED &&
-		   mdev->state.conn < C_CONNECTED) {
+		if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
+		    drbd_get_capacity(mdev->this_bdev) &&
+		    mdev->state.disk >= D_OUTDATED &&
+		    mdev->state.conn < C_CONNECTED) {
 			dev_err(DEV, "The peer's disk size is too small!\n");
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			mdev->ldev->dc.disk_size = my_usize;
+			conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 			put_ldev(mdev);
-			return FALSE;
+			return -EIO;
+		}
+
+		if (my_usize != p_usize) {
+			struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
+
+			new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
+			if (!new_disk_conf) {
+				dev_err(DEV, "Allocation of new disk_conf failed\n");
+				put_ldev(mdev);
+				return -ENOMEM;
+			}
+
+			mutex_lock(&mdev->tconn->conf_update);
+			old_disk_conf = mdev->ldev->disk_conf;
+			*new_disk_conf = *old_disk_conf;
+			new_disk_conf->disk_size = p_usize;
+
+			rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
+			mutex_unlock(&mdev->tconn->conf_update);
+			synchronize_rcu();
+			kfree(old_disk_conf);
+
+			dev_info(DEV, "Peer sets u_size to %lu sectors\n",
+				 (unsigned long)my_usize);
 		}
+
 		put_ldev(mdev);
 	}
-#undef min_not_zero
 
+	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-	  dd = drbd_determin_dev_size(mdev, 0);
+		dd = drbd_determine_dev_size(mdev, ddsf);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
-			return FALSE;
+			return -EIO;
 		drbd_md_sync(mdev);
 	} else {
 		/* I am diskless, need to accept the peer's size. */
 		drbd_set_my_capacity(mdev, p_size);
 	}
 
-	if (mdev->p_uuid && mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
-		nconn = drbd_sync_handshake(mdev,
-				mdev->state.peer, mdev->state.pdsk);
-		put_ldev(mdev);
-
-		if (nconn == C_MASK) {
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			return FALSE;
-		}
-
-		if (drbd_request_state(mdev, NS(conn, nconn)) < SS_SUCCESS) {
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-			return FALSE;
-		}
-	}
+	mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
+	drbd_reconsider_max_bio_size(mdev);
 
 	if (get_ldev(mdev)) {
 		if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
@@ -2982,11 +3793,7 @@
 			ldsc = 1;
 		}
 
-		max_seg_s = be32_to_cpu(p->max_segment_size);
-		if (max_seg_s != queue_max_segment_size(mdev->rq_queue))
-			drbd_setup_queue_param(mdev, max_seg_s);
-
-		drbd_setup_order_type(mdev, be32_to_cpu(p->queue_order_type));
+		drbd_setup_order_type(mdev, be16_to_cpu(p->queue_order_type));
 		put_ldev(mdev);
 	}
 
@@ -2995,30 +3802,34 @@
 		    drbd_get_capacity(mdev->this_bdev) || ldsc) {
 			/* we have different sizes, probably peer
 			 * needs to know my new size... */
-			drbd_send_sizes(mdev, 0);
+			drbd_send_sizes(mdev, 0, ddsf);
 		}
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
 		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
-			    mdev->state.disk >= D_INCONSISTENT)
-				resync_after_online_grow(mdev);
-			else
+			    mdev->state.disk >= D_INCONSISTENT) {
+				if (ddsf & DDSF_NO_RESYNC)
+					dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
+				else
+					resync_after_online_grow(mdev);
+			} else
 				set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 		}
 	}
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int receive_uuids(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_uuids *p = (struct p_uuids *)h;
+	struct drbd_conf *mdev;
+	struct p_uuids *p = pi->data;
 	u64 *p_uuid;
-	int i;
+	int i, updated_uuids = 0;
 
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
 
 	p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
 
@@ -3034,38 +3845,49 @@
 	    (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
 		dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
 		    (unsigned long long)mdev->ed_uuid);
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
+		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		return -EIO;
 	}
 
 	if (get_ldev(mdev)) {
 		int skip_initial_sync =
 			mdev->state.conn == C_CONNECTED &&
-			mdev->agreed_pro_version >= 90 &&
+			mdev->tconn->agreed_pro_version >= 90 &&
 			mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
 			(p_uuid[UI_FLAGS] & 8);
 		if (skip_initial_sync) {
 			dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
 			drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
-					"clear_n_write from receive_uuids");
+					"clear_n_write from receive_uuids",
+					BM_LOCKED_TEST_ALLOWED);
 			_drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
 			_drbd_uuid_set(mdev, UI_BITMAP, 0);
 			_drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 					CS_VERBOSE, NULL);
 			drbd_md_sync(mdev);
+			updated_uuids = 1;
 		}
 		put_ldev(mdev);
+	} else if (mdev->state.disk < D_INCONSISTENT &&
+		   mdev->state.role == R_PRIMARY) {
+		/* I am a diskless primary, the peer just created a new current UUID
+		   for me. */
+		updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 	}
 
 	/* Before we test for the disk state, we should wait until an eventually
 	   ongoing cluster wide state change is finished. That is important if
 	   we are primary and are detaching from our disk. We need to see the
 	   new disk state... */
-	wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
+	mutex_lock(mdev->state_mutex);
+	mutex_unlock(mdev->state_mutex);
 	if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
-		drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+		updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
+
+	if (updated_uuids)
+		drbd_print_uuids(mdev, "receiver updated UUIDs to");
 
-	return TRUE;
+	return 0;
 }
 
 /**
@@ -3077,6 +3899,7 @@
 	union drbd_state ms;
 
 	static enum drbd_conns c_tab[] = {
+		[C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
 		[C_CONNECTED] = C_CONNECTED,
 
 		[C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
@@ -3095,53 +3918,76 @@
 	ms.disk = ps.pdsk;
 	ms.peer_isp = (ps.aftr_isp | ps.user_isp);
 
-	return ms;
+	return ms;
+}
+
+STATIC int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_req_state *p = pi->data;
+	union drbd_state mask, val;
+	enum drbd_state_rv rv;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	mask.i = be32_to_cpu(p->mask);
+	val.i = be32_to_cpu(p->val);
+
+	if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
+	    mutex_is_locked(mdev->state_mutex)) {
+		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
+		return 0;
+	}
+
+	mask = convert_state(mask);
+	val = convert_state(val);
+
+	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
+	drbd_send_sr_reply(mdev, rv);
+
+	drbd_md_sync(mdev);
+
+	return 0;
 }
 
-STATIC int receive_req_state(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_req_state *p = (struct p_req_state *)h;
+	struct p_req_state *p = pi->data;
 	union drbd_state mask, val;
-	int rv;
-
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
+	enum drbd_state_rv rv;
 
 	mask.i = be32_to_cpu(p->mask);
 	val.i = be32_to_cpu(p->val);
 
-	if (test_bit(DISCARD_CONCURRENT, &mdev->flags) &&
-	    test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
-		drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
-		return TRUE;
+	if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
+	    mutex_is_locked(&tconn->cstate_mutex)) {
+		conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
+		return 0;
 	}
 
 	mask = convert_state(mask);
 	val = convert_state(val);
 
-	DRBD_STATE_DEBUG_INIT_VAL(val);
-	rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
-
-	drbd_send_sr_reply(mdev, rv);
-	drbd_md_sync(mdev);
+	rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
+	conn_send_sr_reply(tconn, rv);
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int receive_state(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_state *p = (struct p_state *)h;
-	enum drbd_conns nconn, oconn;
-	union drbd_state ns, peer_state;
+	struct drbd_conf *mdev;
+	struct p_state *p = pi->data;
+	union drbd_state os, ns, peer_state;
 	enum drbd_disk_state real_peer_disk;
+	enum chg_state_flags cs_flags;
 	int rv;
 
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h)))
-		return FALSE;
-
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return config_unknown_volume(tconn, pi);
 
 	peer_state.i = be32_to_cpu(p->state);
 
@@ -3151,154 +3997,249 @@
 		dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
 	}
 
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
  retry:
-	oconn = nconn = mdev->state.conn;
-	spin_unlock_irq(&mdev->req_lock);
+	os = ns = drbd_read_state(mdev);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	/* If some other part of the code (asender thread, timeout)
+	 * already decided to close the connection again,
+	 * we must not "re-establish" it here. */
+	if (os.conn <= C_TEAR_DOWN)
+		return false;
+
+	/* If this is the "end of sync" confirmation, usually the peer disk
+	 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
+	 * set) resync started in PausedSyncT, or if the timing of pause-/
+	 * unpause-sync events has been "just right", the peer disk may
+	 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
+	 */
+	if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
+	    real_peer_disk == D_UP_TO_DATE &&
+	    os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
+		/* If we are (becoming) SyncSource, but peer is still in sync
+		 * preparation, ignore its uptodate-ness to avoid flapping, it
+		 * will change to inconsistent once the peer reaches active
+		 * syncing states.
+		 * It may have changed syncer-paused flags, however, so we
+		 * cannot ignore this completely. */
+		if (peer_state.conn > C_CONNECTED &&
+		    peer_state.conn < C_SYNC_SOURCE)
+			real_peer_disk = D_INCONSISTENT;
+
+		/* if peer_state changes to connected at the same time,
+		 * it explicitly notifies us that it finished resync.
+		 * Maybe we should finish it up, too? */
+		else if (os.conn >= C_SYNC_SOURCE &&
+			 peer_state.conn == C_CONNECTED) {
+			if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
+				drbd_resync_finished(mdev);
+			return 0;
+		}
+	}
+
+	/* peer says his disk is inconsistent, while we think it is uptodate,
+	 * and this happens while the peer still thinks we have a sync going on,
+	 * but we think we are already done with the sync.
+	 * We ignore this to avoid flapping pdsk.
+	 * This should not happen, if the peer is a recent version of drbd. */
+	if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
+	    os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
+		real_peer_disk = D_UP_TO_DATE;
+
+	if (ns.conn == C_WF_REPORT_PARAMS)
+		ns.conn = C_CONNECTED;
 
-	if (nconn == C_WF_REPORT_PARAMS)
-		nconn = C_CONNECTED;
+	if (peer_state.conn == C_AHEAD)
+		ns.conn = C_BEHIND;
 
 	if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
 	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		int cr; /* consider resync */
 
 		/* if we established a new connection */
-		cr  = (oconn < C_CONNECTED);
+		cr  = (os.conn < C_CONNECTED);
 		/* if we had an established connection
 		 * and one of the nodes newly attaches a disk */
-		cr |= (oconn == C_CONNECTED &&
+		cr |= (os.conn == C_CONNECTED &&
 		       (peer_state.disk == D_NEGOTIATING ||
-			mdev->state.disk == D_NEGOTIATING));
+			os.disk == D_NEGOTIATING));
 		/* if we have both been inconsistent, and the peer has been
 		 * forced to be UpToDate with --overwrite-data */
 		cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
 		/* if we had been plain connected, and the admin requested to
 		 * start a sync by "invalidate" or "invalidate-remote" */
-		cr |= (oconn == C_CONNECTED &&
+		cr |= (os.conn == C_CONNECTED &&
 				(peer_state.conn >= C_STARTING_SYNC_S &&
 				 peer_state.conn <= C_WF_BITMAP_T));
 
 		if (cr)
-			nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+			ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
 
 		put_ldev(mdev);
-		if (nconn == C_MASK) {
+		if (ns.conn == C_MASK) {
+			ns.conn = C_CONNECTED;
 			if (mdev->state.disk == D_NEGOTIATING) {
-				drbd_force_state(mdev, NS(disk, D_DISKLESS));
-				nconn = C_CONNECTED;
+				drbd_force_state(mdev, NS(disk, D_FAILED));
 			} else if (peer_state.disk == D_NEGOTIATING) {
 				dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
 				peer_state.disk = D_DISKLESS;
+				real_peer_disk = D_DISKLESS;
 			} else {
-				D_ASSERT(oconn == C_WF_REPORT_PARAMS);
-				drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-				return FALSE;
+				if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
+					return -EIO;
+				D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
+				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+				return -EIO;
 			}
 		}
 	}
 
-	spin_lock_irq(&mdev->req_lock);
-	if (mdev->state.conn != oconn)
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (os.i != drbd_read_state(mdev).i)
 		goto retry;
 	clear_bit(CONSIDER_RESYNC, &mdev->flags);
-	ns.i = mdev->state.i;
-	ns.conn = nconn;
 	ns.peer = peer_state.role;
 	ns.pdsk = real_peer_disk;
 	ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
-	if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+	if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
 		ns.disk = mdev->new_state_tmp.disk;
-	DRBD_STATE_DEBUG_INIT_VAL(ns);
-	rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL);
-	ns = mdev->state;
-	spin_unlock_irq(&mdev->req_lock);
+	cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
+	if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
+	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
+		/* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
+		   for temporal network outages! */
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
+		tl_clear(mdev->tconn);
+		drbd_uuid_new_current(mdev);
+		clear_bit(NEW_CUR_UUID, &mdev->flags);
+		conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
+		return -EIO;
+	}
+	rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
+	ns = drbd_read_state(mdev);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (rv < SS_SUCCESS) {
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-		return FALSE;
+		conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+		return -EIO;
 	}
 
-	if (oconn > C_WF_REPORT_PARAMS) {
-		if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+	if (os.conn > C_WF_REPORT_PARAMS) {
+		if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
 		    peer_state.disk != D_NEGOTIATING ) {
 			/* we want resync, peer has not yet decided to sync... */
 			/* Nowadays only used when forcing a node into primary role and
 			   setting its disk to UpToDate with that */
 			drbd_send_uuids(mdev);
-			drbd_send_state(mdev);
+			drbd_send_current_state(mdev);
 		}
 	}
 
-	mdev->net_conf->want_lose = 0;
+	clear_bit(DISCARD_MY_DATA, &mdev->flags);
 
 	drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_rs_uuid *p = (struct p_rs_uuid *)h;
+	struct drbd_conf *mdev;
+	struct p_rs_uuid *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	wait_event(mdev->misc_wait,
 		   mdev->state.conn == C_WF_SYNC_UUID ||
+		   mdev->state.conn == C_BEHIND ||
 		   mdev->state.conn < C_CONNECTED ||
 		   mdev->state.disk < D_NEGOTIATING);
 
 	/* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
 
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
-
 	/* Here the _drbd_uuid_ functions are right, current should
 	   _not_ be rotated into the history */
 	if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
 		_drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
 		_drbd_uuid_set(mdev, UI_BITMAP, 0UL);
 
+		drbd_print_uuids(mdev, "updated sync uuid");
 		drbd_start_resync(mdev, C_SYNC_TARGET);
 
 		put_ldev(mdev);
 	} else
 		dev_err(DEV, "Ignoring SyncUUID packet!\n");
 
-	return TRUE;
+	return 0;
 }
 
-enum receive_bitmap_ret { OK, DONE, FAILED };
-
-static enum receive_bitmap_ret
-receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h,
-	unsigned long *buffer, struct bm_xfer_ctx *c)
-{
-	unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
-	unsigned want = num_words * sizeof(long);
+/**
+ * receive_bitmap_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
+receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
+		     unsigned long *p, struct bm_xfer_ctx *c)
+{
+	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
+				 drbd_header_size(mdev->tconn);
+	unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
+				       c->bm_words - c->word_offset);
+	unsigned int want = num_words * sizeof(*p);
+	int err;
 
-	if (want != h->length) {
-		dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length);
-		return FAILED;
+	if (want != size) {
+		dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
+		return -EIO;
 	}
 	if (want == 0)
-		return DONE;
-	if (drbd_recv(mdev, buffer, want) != want)
-		return FAILED;
+		return 0;
+	err = drbd_recv_all(mdev->tconn, p, want);
+	if (err)
+		return err;
 
-	drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
+	drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
 
 	c->word_offset += num_words;
 	c->bit_offset = c->word_offset * BITS_PER_LONG;
 	if (c->bit_offset > c->bm_bits)
 		c->bit_offset = c->bm_bits;
 
-	return OK;
+	return 1;
+}
+
+static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
+{
+	return (enum drbd_bitmap_code)(p->encoding & 0x0f);
+}
+
+static int dcbp_get_start(struct p_compressed_bm *p)
+{
+	return (p->encoding & 0x80) != 0;
+}
+
+static int dcbp_get_pad_bits(struct p_compressed_bm *p)
+{
+	return (p->encoding >> 4) & 0x7;
 }
 
-static enum receive_bitmap_ret
+/**
+ * recv_bm_rle_bits
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 recv_bm_rle_bits(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
-		struct bm_xfer_ctx *c)
+		 struct bm_xfer_ctx *c,
+		 unsigned int len)
 {
 	struct bitstream bs;
 	u64 look_ahead;
@@ -3306,27 +4247,26 @@
 	u64 tmp;
 	unsigned long s = c->bit_offset;
 	unsigned long e;
-	int len = p->head.length - (sizeof(*p) - sizeof(p->head));
-	int toggle = DCBP_get_start(p);
+	int toggle = dcbp_get_start(p);
 	int have;
 	int bits;
 
-	bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
+	bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
 
 	bits = bitstream_get_bits(&bs, &look_ahead, 64);
 	if (bits < 0)
-		return FAILED;
+		return -EIO;
 
 	for (have = bits; have > 0; s += rl, toggle = !toggle) {
 		bits = vli_decode_bits(&rl, look_ahead);
 		if (bits <= 0)
-			return FAILED;
+			return -EIO;
 
 		if (toggle) {
 			e = s + rl -1;
 			if (e >= c->bm_bits) {
 				dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
-				return FAILED;
+				return -EIO;
 			}
 			_drbd_bm_set_bits(mdev, s, e);
 		}
@@ -3336,14 +4276,14 @@
 				have, bits, look_ahead,
 				(unsigned int)(bs.cur.b - p->code),
 				(unsigned int)bs.buf_len);
-			return FAILED;
+			return -EIO;
 		}
 		look_ahead >>= bits;
 		have -= bits;
 
 		bits = bitstream_get_bits(&bs, &tmp, 64 - have);
 		if (bits < 0)
-			return FAILED;
+			return -EIO;
 		look_ahead |= tmp << have;
 		have += bits;
 	}
@@ -3351,35 +4291,44 @@
 	c->bit_offset = s;
 	bm_xfer_ctx_bit_to_word_offset(c);
 
-	return (s == c->bm_bits) ? DONE : OK;
+	return (s != c->bm_bits);
 }
 
-static enum receive_bitmap_ret
+/**
+ * decode_bitmap_c
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 decode_bitmap_c(struct drbd_conf *mdev,
 		struct p_compressed_bm *p,
-		struct bm_xfer_ctx *c)
+		struct bm_xfer_ctx *c,
+		unsigned int len)
 {
-	if (DCBP_get_code(p) == RLE_VLI_Bits)
-		return recv_bm_rle_bits(mdev, p, c);
+	if (dcbp_get_code(p) == RLE_VLI_Bits)
+		return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
 
 	/* other variants had been implemented for evaluation,
 	 * but have been dropped as this one turned out to be "best"
 	 * during all our tests. */
 
 	dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
-	drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-	return FAILED;
+	conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
+	return -EIO;
 }
 
 void INFO_bm_xfer_stats(struct drbd_conf *mdev,
 		const char *direction, struct bm_xfer_ctx *c)
 {
 	/* what would it take to transfer it "plaintext" */
-	unsigned plain = sizeof(struct p_header) *
-		((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
-		+ c->bm_words * sizeof(long);
-	unsigned total = c->bytes[0] + c->bytes[1];
-	unsigned r;
+	unsigned int header_size = drbd_header_size(mdev->tconn);
+	unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
+	unsigned int plain =
+		header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
+		c->bm_words * sizeof(unsigned long);
+	unsigned int total = c->bytes[0] + c->bytes[1];
+	unsigned int r;
 
 	/* total can not be zero. but just in case: */
 	if (total == 0)
@@ -3413,266 +4362,291 @@
    in order to be agnostic to the 32 vs 64 bits issue.
 
    returns 0 on failure, 1 if we successfully received it. */
-STATIC int receive_bitmap(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	struct bm_xfer_ctx c;
-	void *buffer;
-	enum receive_bitmap_ret ret;
-	int ok = FALSE;
-
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
-
-	drbd_bm_lock(mdev, "receive bitmap");
-
-	/* maybe we should use some per thread scratch page,
-	 * and allocate that during initial device creation? */
-	buffer	 = (unsigned long *) __get_free_page(GFP_NOIO);
-	if (!buffer) {
-		dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
-		goto out;
-	}
+	int err;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
+	/* you are supposed to send additional out-of-sync information
+	 * if you actually set bits during this phase */
 
 	c = (struct bm_xfer_ctx) {
 		.bm_bits = drbd_bm_bits(mdev),
 		.bm_words = drbd_bm_words(mdev),
 	};
 
-	do {
-		if (h->command == P_BITMAP) {
-			ret = receive_bitmap_plain(mdev, h, buffer, &c);
-		} else if (h->command == P_COMPRESSED_BITMAP) {
+	for(;;) {
+		if (pi->cmd == P_BITMAP)
+			err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
+		else if (pi->cmd == P_COMPRESSED_BITMAP) {
 			/* MAYBE: sanity check that we speak proto >= 90,
 			 * and the feature is enabled! */
-			struct p_compressed_bm *p;
+			struct p_compressed_bm *p = pi->data;
 
-			if (h->length > BM_PACKET_PAYLOAD_BYTES) {
+			if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
 				dev_err(DEV, "ReportCBitmap packet too large\n");
+				err = -EIO;
 				goto out;
 			}
-			/* use the page buff */
-			p = buffer;
-			memcpy(p, h, sizeof(*h));
-			if (drbd_recv(mdev, p->head.payload, h->length) != h->length)
+			if (pi->size <= sizeof(*p)) {
+				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
+				err = -EIO;
 				goto out;
-			if (p->head.length <= (sizeof(*p) - sizeof(p->head))) {
-				dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length);
-				return FAILED;
 			}
-			ret = decode_bitmap_c(mdev, p, &c);
+			err = drbd_recv_all(mdev->tconn, p, pi->size);
+			if (err)
+			       goto out;
+			err = decode_bitmap_c(mdev, p, &c, pi->size);
 		} else {
-			dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command);
+			dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
+			err = -EIO;
 			goto out;
 		}
 
-		c.packets[h->command == P_BITMAP]++;
-		c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length;
+		c.packets[pi->cmd == P_BITMAP]++;
+		c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
 
-		if (ret != OK)
+		if (err <= 0) {
+			if (err < 0)
+				goto out;
 			break;
-
-		if (!drbd_recv_header(mdev, h))
+		}
+		err = drbd_recv_header(mdev->tconn, pi);
+		if (err)
 			goto out;
-	} while (ret == OK);
-	if (ret == FAILED)
-		goto out;
+	}
 
 	INFO_bm_xfer_stats(mdev, "receive", &c);
 
 	if (mdev->state.conn == C_WF_BITMAP_T) {
-		ok = !drbd_send_bitmap(mdev);
-		if (!ok)
+		enum drbd_state_rv rv;
+
+		err = drbd_send_bitmap(mdev);
+		if (err)
 			goto out;
 		/* Omit CS_ORDERED with this state transition to avoid deadlocks. */
-		ok = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
-		D_ASSERT(ok == SS_SUCCESS);
+		rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		D_ASSERT(rv == SS_SUCCESS);
 	} else if (mdev->state.conn != C_WF_BITMAP_S) {
 		/* admin may have requested C_DISCONNECTING,
 		 * other threads may have noticed network errors */
 		dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
 		    drbd_conn_str(mdev->state.conn));
 	}
+	err = 0;
 
-	ok = TRUE;
  out:
 	drbd_bm_unlock(mdev);
-	if (ok && mdev->state.conn == C_WF_BITMAP_S)
+	if (!err && mdev->state.conn == C_WF_BITMAP_S)
 		drbd_start_resync(mdev, C_SYNC_SOURCE);
-	free_page((unsigned long) buffer);
-	return ok;
+	return err;
 }
 
-STATIC int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	/* TODO zero copy sink :) */
-	static char sink[128];
-	int size, want, r;
-
-	dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
-	     h->command, h->length);
+	conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
+		 pi->cmd, pi->size);
 
-	size = h->length;
-	while (size > 0) {
-		want = min_t(int, size, sizeof(sink));
-		r = drbd_recv(mdev, sink, want);
-		ERR_IF(r <= 0) break;
-		size -= r;
-	}
-	return size == 0;
+	return ignore_remaining_packet(tconn, pi);
 }
 
-STATIC int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
+STATIC int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	if (mdev->state.disk >= D_INCONSISTENT)
-		drbd_kick_lo(mdev);
+	struct drbd_conf *mdev;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	/* Make sure we've acked all the TCP data associated
 	 * with the data requests being unplugged */
-	drbd_tcp_quickack(mdev->data.socket);
+	drbd_tcp_quickack(mdev->tconn->data.socket);
 
-	return TRUE;
+	return 0;
 }
 
-typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
+STATIC int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_block_desc *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	switch (mdev->state.conn) {
+	case C_WF_SYNC_UUID:
+	case C_WF_BITMAP_T:
+	case C_BEHIND:
+			break;
+	default:
+		dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
+				drbd_conn_str(mdev->state.conn));
+	}
+
+	drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
+
+	return 0;
+}
 
-static drbd_cmd_handler_f drbd_default_handler[] = {
-	[P_DATA]	    = receive_Data,
-	[P_DATA_REPLY]	    = receive_DataReply,
-	[P_RS_DATA_REPLY]   = receive_RSDataReply,
-	[P_BARRIER]	    = receive_Barrier,
-	[P_BITMAP]	    = receive_bitmap,
-	[P_COMPRESSED_BITMAP]    = receive_bitmap,
-	[P_UNPLUG_REMOTE]   = receive_UnplugRemote,
-	[P_DATA_REQUEST]    = receive_DataRequest,
-	[P_RS_DATA_REQUEST] = receive_DataRequest,
-	[P_SYNC_PARAM]	    = receive_SyncParam,
-	[P_SYNC_PARAM89]	   = receive_SyncParam,
-	[P_PROTOCOL]        = receive_protocol,
-	[P_UUIDS]	    = receive_uuids,
-	[P_SIZES]	    = receive_sizes,
-	[P_STATE]	    = receive_state,
-	[P_STATE_CHG_REQ]   = receive_req_state,
-	[P_SYNC_UUID]       = receive_sync_uuid,
-	[P_OV_REQUEST]      = receive_DataRequest,
-	[P_OV_REPLY]        = receive_DataRequest,
-	[P_CSUM_RS_REQUEST]    = receive_DataRequest,
-	/* anything missing from this table is in
-	 * the asender_tbl, see get_asender_cmd */
-	[P_MAX_CMD]	    = NULL,
+struct data_cmd {
+	int expect_payload;
+	size_t pkt_size;
+	int (*fn)(struct drbd_tconn *, struct packet_info *);
 };
 
-static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler;
-static drbd_cmd_handler_f *drbd_opt_cmd_handler;
+static struct data_cmd drbd_cmd_handler[] = {
+	[P_DATA]	    = { 1, sizeof(struct p_data), receive_Data },
+	[P_DATA_REPLY]	    = { 1, sizeof(struct p_data), receive_DataReply },
+	[P_RS_DATA_REPLY]   = { 1, sizeof(struct p_data), receive_RSDataReply } ,
+	[P_BARRIER]	    = { 0, sizeof(struct p_barrier), receive_Barrier } ,
+	[P_BITMAP]	    = { 1, 0, receive_bitmap } ,
+	[P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
+	[P_UNPLUG_REMOTE]   = { 0, 0, receive_UnplugRemote },
+	[P_DATA_REQUEST]    = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_SYNC_PARAM]	    = { 1, 0, receive_SyncParam },
+	[P_SYNC_PARAM89]    = { 1, 0, receive_SyncParam },
+	[P_PROTOCOL]        = { 1, sizeof(struct p_protocol), receive_protocol },
+	[P_UUIDS]	    = { 0, sizeof(struct p_uuids), receive_uuids },
+	[P_SIZES]	    = { 0, sizeof(struct p_sizes), receive_sizes },
+	[P_STATE]	    = { 0, sizeof(struct p_state), receive_state },
+	[P_STATE_CHG_REQ]   = { 0, sizeof(struct p_req_state), receive_req_state },
+	[P_SYNC_UUID]       = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
+	[P_OV_REQUEST]      = { 0, sizeof(struct p_block_req), receive_DataRequest },
+	[P_OV_REPLY]        = { 1, sizeof(struct p_block_req), receive_DataRequest },
+	[P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
+	[P_DELAY_PROBE]     = { 0, sizeof(struct p_delay_probe93), receive_skip },
+	[P_OUT_OF_SYNC]     = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
+	[P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
+	[P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
+};
 
-STATIC void drbdd(struct drbd_conf *mdev)
+STATIC void drbdd(struct drbd_tconn *tconn)
 {
-	drbd_cmd_handler_f handler;
-	struct p_header *header = &mdev->data.rbuf.header;
+	struct packet_info pi;
+	size_t shs; /* sub header size */
+	int err;
 
-	while (get_t_state(&mdev->receiver) == Running) {
-		drbd_thread_current_set_cpu(mdev);
-		if (!drbd_recv_header(mdev, header)) {
-			drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-			break;
-		}
+	while (get_t_state(&tconn->receiver) == RUNNING) {
+		struct data_cmd *cmd;
 
-		if (header->command < P_MAX_CMD)
-			handler = drbd_cmd_handler[header->command];
-		else if (P_MAY_IGNORE < header->command
-		     && header->command < P_MAX_OPT_CMD)
-			handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE];
-		else if (header->command > P_MAX_OPT_CMD)
-			handler = receive_skip;
-		else
-			handler = NULL;
+		drbd_thread_current_set_cpu(&tconn->receiver);
+		if (drbd_recv_header(tconn, &pi))
+			goto err_out;
 
-		if (unlikely(!handler)) {
-			dev_err(DEV, "unknown packet type %d, l: %d!\n",
-			    header->command, header->length);
-			drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-			break;
-		}
-		if (unlikely(!handler(mdev, header))) {
-			dev_err(DEV, "error receiving %s, l: %d!\n",
-			    cmdname(header->command), header->length);
-			drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
-			break;
+		cmd = &drbd_cmd_handler[pi.cmd];
+		if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
+			conn_err(tconn, "Unexpected data packet %s (0x%04x)",
+				 cmdname(pi.cmd), pi.cmd);
+			goto err_out;
 		}
 
-		trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf,
-				__FILE__, __LINE__);
-	}
-}
+		shs = cmd->pkt_size;
+		if (pi.size > shs && !cmd->expect_payload) {
+			conn_err(tconn, "No payload expected %s l:%d\n",
+				 cmdname(pi.cmd), pi.size);
+			goto err_out;
+		}
 
-STATIC void drbd_fail_pending_reads(struct drbd_conf *mdev)
-{
-	struct hlist_head *slot;
-	struct hlist_node *pos;
-	struct hlist_node *tmp;
-	struct drbd_request *req;
-	int i;
+		if (shs) {
+			err = drbd_recv_all_warn(tconn, pi.data, shs);
+			if (err)
+				goto err_out;
+			pi.size -= shs;
+		}
 
-	/*
-	 * Application READ requests
-	 */
-	spin_lock_irq(&mdev->req_lock);
-	for (i = 0; i < APP_R_HSIZE; i++) {
-		slot = mdev->app_reads_hash+i;
-		hlist_for_each_entry_safe(req, pos, tmp, slot, colision) {
-			/* it may (but should not any longer!)
-			 * be on the work queue; if that assert triggers,
-			 * we need to also grab the
-			 * spin_lock_irq(&mdev->data.work.q_lock);
-			 * and list_del_init here. */
-			D_ASSERT(list_empty(&req->w.list));
-			/* It would be nice to complete outside of spinlock.
-			 * But this is easier for now. */
-			_req_mod(req, connection_lost_while_pending);
-		}
-	}
-	for (i = 0; i < APP_R_HSIZE; i++)
-		if (!hlist_empty(mdev->app_reads_hash+i))
-			dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: "
-				"%p, should be NULL\n", i, mdev->app_reads_hash[i].first);
+		err = cmd->fn(tconn, &pi);
+		if (err) {
+			conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
+				 cmdname(pi.cmd), err, pi.size);
+			goto err_out;
+		}
+	}
+	return;
 
-	memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
-	spin_unlock_irq(&mdev->req_lock);
+    err_out:
+	conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 }
 
-void drbd_flush_workqueue(struct drbd_conf *mdev)
+void conn_flush_workqueue(struct drbd_tconn *tconn)
 {
 	struct drbd_wq_barrier barr;
 
 	barr.w.cb = w_prev_work_done;
+	barr.w.tconn = tconn;
 	init_completion(&barr.done);
-	drbd_queue_work(&mdev->data.work, &barr.w);
+	drbd_queue_work(&tconn->data.work, &barr.w);
 	wait_for_completion(&barr.done);
 }
 
-STATIC void drbd_disconnect(struct drbd_conf *mdev)
+STATIC void conn_disconnect(struct drbd_tconn *tconn)
 {
-	enum drbd_fencing_p fp;
-	union drbd_state os, ns;
-	int rv = SS_UNKNOWN_ERROR;
-	unsigned int i;
+	struct drbd_conf *mdev;
+	enum drbd_conns oc;
+	int vnr;
 
-	if (mdev->state.conn == C_STANDALONE)
+	if (tconn->cstate == C_STANDALONE)
 		return;
-	if (mdev->state.conn >= C_WF_CONNECTION)
-		dev_err(DEV, "ASSERT FAILED cstate = %s, expected < WFConnection\n",
-				drbd_conn_str(mdev->state.conn));
+
+	/* We are about to start the cleanup after connection loss.
+	 * Make sure drbd_make_request knows about that.
+	 * Usually we should be in some network failure state already,
+	 * but just in case we are not, we fix it up here.
+	 */
+	conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 
 	/* asender does not clean up anything. it must not interfere, either */
-	drbd_thread_stop(&mdev->asender);
+	drbd_thread_stop(&tconn->asender);
+	drbd_free_sock(tconn);
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_disconnected(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
+	if (!list_empty(&tconn->current_epoch->list))
+		conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
+	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
+	atomic_set(&tconn->current_epoch->epoch_size, 0);
+
+	conn_info(tconn, "Connection closed\n");
+
+	if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
+		conn_try_outdate_peer_async(tconn);
+
+	spin_lock_irq(&tconn->req_lock);
+	oc = tconn->cstate;
+	if (oc >= C_UNCONNECTED)
+		_conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 
-	mutex_lock(&mdev->data.mutex);
-	drbd_free_sock(mdev);
-	mutex_unlock(&mdev->data.mutex);
+	spin_unlock_irq(&tconn->req_lock);
 
-	spin_lock_irq(&mdev->req_lock);
+	if (oc == C_DISCONNECTING)
+		conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
+}
+
+STATIC int drbd_disconnected(struct drbd_conf *mdev)
+{
+	unsigned int i;
+
+	/* wait for current activity to cease. */
+	spin_lock_irq(&mdev->tconn->req_lock);
 	_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
 	_drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
 	_drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	/* We do not have data structures that would allow us to
 	 * get the rs_pending_cnt down to 0 again.
@@ -3690,9 +4664,7 @@
 	atomic_set(&mdev->rs_pending_cnt, 0);
 	wake_up(&mdev->misc_wait);
 
-	/* make sure syncer is stopped and w_resume_next_sg queued */
 	del_timer_sync(&mdev->resync_timer);
-	set_bit(STOP_SYNC_TIMER, &mdev->flags);
 	resync_timer_fn((unsigned long)mdev);
 
 	/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
@@ -3700,81 +4672,24 @@
 	 * to be "canceled" */
 	drbd_flush_workqueue(mdev);
 
-	/* This also does reclaim_net_ee().  If we do this too early, we might
-	 * miss some resync ee and pages.*/
-	drbd_process_done_ee(mdev);
+	drbd_finish_peer_reqs(mdev);
+
+	/* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
+	   might have issued a work again. The one before drbd_finish_peer_reqs() is
+	   necessary to reclain net_ee in drbd_finish_peer_reqs(). */
+	drbd_flush_workqueue(mdev);
 
 	kfree(mdev->p_uuid);
 	mdev->p_uuid = NULL;
 
-	if (!mdev->state.susp)
-		tl_clear(mdev);
-
-	drbd_fail_pending_reads(mdev);
-
-	dev_info(DEV, "Connection closed\n");
+	if (!drbd_suspended(mdev))
+		tl_clear(mdev->tconn);
 
 	drbd_md_sync(mdev);
 
-	fp = FP_DONT_CARE;
-	if (get_ldev(mdev)) {
-		fp = mdev->ldev->dc.fencing;
-		put_ldev(mdev);
-	}
-
-	if (mdev->state.role == R_PRIMARY) {
-		if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) {
-			enum drbd_disk_state nps = drbd_try_outdate_peer(mdev);
-			drbd_request_state(mdev, NS(pdsk, nps));
-		}
-	}
-
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
-	if (os.conn >= C_UNCONNECTED) {
-		/* Do not restart in case we are C_DISCONNECTING */
-		ns = os;
-		ns.conn = C_UNCONNECTED;
-		DRBD_STATE_DEBUG_INIT_VAL(ns);
-		rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	}
-	spin_unlock_irq(&mdev->req_lock);
-
-	if (os.conn == C_DISCONNECTING) {
-		struct hlist_head *h;
-		wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0);
-
-		/* we must not free the tl_hash
-		 * while application io is still on the fly */
-		wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0);
-
-		spin_lock_irq(&mdev->req_lock);
-		/* paranoia code */
-		for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++)
-			if (h->first)
-				dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n",
-						(int)(h - mdev->ee_hash), h->first);
-		kfree(mdev->ee_hash);
-		mdev->ee_hash = NULL;
-		mdev->ee_hash_s = 0;
-
-		/* paranoia code */
-		for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++)
-			if (h->first)
-				dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n",
-						(int)(h - mdev->tl_hash), h->first);
-		kfree(mdev->tl_hash);
-		mdev->tl_hash = NULL;
-		mdev->tl_hash_s = 0;
-		spin_unlock_irq(&mdev->req_lock);
-
-		crypto_free_hash(mdev->cram_hmac_tfm);
-		mdev->cram_hmac_tfm = NULL;
-
-		kfree(mdev->net_conf);
-		mdev->net_conf = NULL;
-		drbd_request_state(mdev, NS(conn, C_STANDALONE));
-	}
+	/* serialize with bitmap writeout triggered by the state change,
+	 * if any. */
+	wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 
 	/* tcp_close and release of sendpage pages can be deferred.  I don't
 	 * want to use SO_LINGER, because apparently it can be deferred for
@@ -3783,21 +4698,22 @@
 	 * Actually we don't care for exactly when the network stack does its
 	 * put_page(), but release our reference on these pages right here.
 	 */
-	i = drbd_release_ee(mdev, &mdev->net_ee);
+	i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
 	if (i)
 		dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
+	i = atomic_read(&mdev->pp_in_use_by_net);
+	if (i)
+		dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
 	i = atomic_read(&mdev->pp_in_use);
 	if (i)
-		dev_info(DEV, "pp_in_use = %u, expected 0\n", i);
+		dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
 
 	D_ASSERT(list_empty(&mdev->read_ee));
 	D_ASSERT(list_empty(&mdev->active_ee));
 	D_ASSERT(list_empty(&mdev->sync_ee));
 	D_ASSERT(list_empty(&mdev->done_ee));
 
-	/* ok, no more ee's on the fly, it is safe to reset the epoch_size */
-	atomic_set(&mdev->current_epoch->epoch_size, 0);
-	D_ASSERT(list_empty(&mdev->current_epoch->list));
+	return 0;
 }
 
 /*
@@ -3809,29 +4725,19 @@
  *
  * for now, they are expected to be zero, but ignored.
  */
-STATIC int drbd_send_handshake(struct drbd_conf *mdev)
+STATIC int drbd_send_features(struct drbd_tconn *tconn)
 {
-	/* ASSERT current == mdev->receiver ... */
-	struct p_handshake *p = &mdev->data.sbuf.handshake;
-	int ok;
-
-	if (mutex_lock_interruptible(&mdev->data.mutex)) {
-		dev_err(DEV, "interrupted during initial handshake\n");
-		return 0; /* interrupted. not ok. */
-	}
-
-	if (mdev->data.socket == NULL) {
-		mutex_unlock(&mdev->data.mutex);
-		return 0;
-	}
+	struct drbd_socket *sock;
+	struct p_connection_features *p;
 
+	sock = &tconn->data;
+	p = conn_prepare_command(tconn, sock);
+	if (!p)
+		return -EIO;
 	memset(p, 0, sizeof(*p));
 	p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
 	p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
-	ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE,
-			     (struct p_header *)p, sizeof(*p), 0 );
-	mutex_unlock(&mdev->data.mutex);
-	return ok;
+	return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
 }
 
 /*
@@ -3841,43 +4747,38 @@
  *  -1 peer talks different language,
  *     no point in trying again, please go standalone.
  */
-STATIC int drbd_do_handshake(struct drbd_conf *mdev)
+STATIC int drbd_do_features(struct drbd_tconn *tconn)
 {
-	/* ASSERT current == mdev->receiver ... */
-	struct p_handshake *p = &mdev->data.rbuf.handshake;
-	const int expect = sizeof(struct p_handshake)
-			  -sizeof(struct p_header);
-	int rv;
+	/* ASSERT current == tconn->receiver ... */
+	struct p_connection_features *p;
+	const int expect = sizeof(struct p_connection_features);
+	struct packet_info pi;
+	int err;
 
-	rv = drbd_send_handshake(mdev);
-	if (!rv)
+	err = drbd_send_features(tconn);
+	if (err)
 		return 0;
 
-	rv = drbd_recv_header(mdev, &p->head);
-	if (!rv)
+	err = drbd_recv_header(tconn, &pi);
+	if (err)
 		return 0;
 
-	if (p->head.command != P_HAND_SHAKE) {
-		dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n",
-		     cmdname(p->head.command), p->head.command);
+	if (pi.cmd != P_CONNECTION_FEATURES) {
+		conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
 		return -1;
 	}
 
-	if (p->head.length != expect) {
-		dev_err(DEV, "expected HandShake length: %u, received: %u\n",
-		     expect, p->head.length);
+	if (pi.size != expect) {
+		conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
+		     expect, pi.size);
 		return -1;
 	}
 
-	rv = drbd_recv(mdev, &p->head.payload, expect);
-
-	if (rv != expect) {
-		dev_err(DEV, "short read receiving handshake packet: l=%u\n", rv);
+	p = pi.data;
+	err = drbd_recv_all_warn(tconn, p, expect);
+	if (err)
 		return 0;
-	}
-
-	trace_drbd_packet(mdev, mdev->data.socket, 2, &mdev->data.rbuf,
-			__FILE__, __LINE__);
 
 	p->protocol_min = be32_to_cpu(p->protocol_min);
 	p->protocol_max = be32_to_cpu(p->protocol_max);
@@ -3888,15 +4789,15 @@
 	    PRO_VERSION_MIN > p->protocol_max)
 		goto incompat;
 
-	mdev->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
+	tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
 
-	dev_info(DEV, "Handshake successful: "
-	     "Agreed network protocol version %d\n", mdev->agreed_pro_version);
+	conn_info(tconn, "Handshake successful: "
+	     "Agreed network protocol version %d\n", tconn->agreed_pro_version);
 
 	return 1;
 
  incompat:
-	dev_err(DEV, "incompatible DRBD dialects: "
+	conn_err(tconn, "incompatible DRBD dialects: "
 	    "I support %d-%d, peer supports %d-%d\n",
 	    PRO_VERSION_MIN, PRO_VERSION_MAX,
 	    p->protocol_min, p->protocol_max);
@@ -3904,7 +4805,7 @@
 }
 
 #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
-STATIC int drbd_do_auth(struct drbd_conf *mdev)
+STATIC int drbd_do_auth(struct drbd_tconn *tconn)
 {
 	dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
 	dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
@@ -3919,118 +4820,139 @@
 	-1 - auth failed, don't try again.
 */
 
-STATIC int drbd_do_auth(struct drbd_conf *mdev)
+STATIC int drbd_do_auth(struct drbd_tconn *tconn)
 {
+	struct drbd_socket *sock;
 	char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
 	struct scatterlist sg;
 	char *response = NULL;
 	char *right_response = NULL;
 	char *peers_ch = NULL;
-	struct p_header p;
-	unsigned int key_len = strlen(mdev->net_conf->shared_secret);
+	unsigned int key_len;
+	char secret[SHARED_SECRET_MAX]; /* 64 byte */
 	unsigned int resp_size;
 	struct hash_desc desc;
-	int rv;
+	struct packet_info pi;
+	struct net_conf *nc;
+	int err, rv;
+
+	/* FIXME: Put the challenge/response into the preallocated socket buffer.  */
+
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	key_len = strlen(nc->shared_secret);
+	memcpy(secret, nc->shared_secret, key_len);
+	rcu_read_unlock();
 
-	desc.tfm = mdev->cram_hmac_tfm;
+	desc.tfm = tconn->cram_hmac_tfm;
 	desc.flags = 0;
 
-	rv = crypto_hash_setkey(mdev->cram_hmac_tfm,
-				(u8 *)mdev->net_conf->shared_secret, key_len);
+	rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_setkey() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
 
 	get_random_bytes(my_challenge, CHALLENGE_LEN);
 
-	rv = drbd_send_cmd2(mdev, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
+	sock = &tconn->data;
+	if (!conn_prepare_command(tconn, sock)) {
+		rv = 0;
+		goto fail;
+	}
+	rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
+				my_challenge, CHALLENGE_LEN);
 	if (!rv)
 		goto fail;
 
-	rv = drbd_recv_header(mdev, &p);
-	if (!rv)
+	err = drbd_recv_header(tconn, &pi);
+	if (err) {
+		rv = 0;
 		goto fail;
+	}
 
-	if (p.command != P_AUTH_CHALLENGE) {
-		dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n",
-		    cmdname(p.command), p.command);
+	if (pi.cmd != P_AUTH_CHALLENGE) {
+		conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
 		rv = 0;
 		goto fail;
 	}
 
-	if (p.length > CHALLENGE_LEN*2) {
-		dev_err(DEV, "expected AuthChallenge payload too big.\n");
+	if (pi.size > CHALLENGE_LEN * 2) {
+		conn_err(tconn, "expected AuthChallenge payload too big.\n");
 		rv = -1;
 		goto fail;
 	}
 
-	peers_ch = kmalloc(p.length, GFP_NOIO);
+	peers_ch = kmalloc(pi.size, GFP_NOIO);
 	if (peers_ch == NULL) {
-		dev_err(DEV, "kmalloc of peers_ch failed\n");
+		conn_err(tconn, "kmalloc of peers_ch failed\n");
 		rv = -1;
 		goto fail;
 	}
 
-	rv = drbd_recv(mdev, peers_ch, p.length);
-
-	if (rv != p.length) {
-		dev_err(DEV, "short read AuthChallenge: l=%u\n", rv);
+	err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
-	resp_size = crypto_hash_digestsize(mdev->cram_hmac_tfm);
+	resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
 	response = kmalloc(resp_size, GFP_NOIO);
 	if (response == NULL) {
-		dev_err(DEV, "kmalloc of response failed\n");
+		conn_err(tconn, "kmalloc of response failed\n");
 		rv = -1;
 		goto fail;
 	}
 
 	sg_init_table(&sg, 1);
-	sg_set_buf(&sg, peers_ch, p.length);
+	sg_set_buf(&sg, peers_ch, pi.size);
 
 	rv = crypto_hash_digest(&desc, &sg, sg.length, response);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
 
-	rv = drbd_send_cmd2(mdev, P_AUTH_RESPONSE, response, resp_size);
-	if (!rv)
+	if (!conn_prepare_command(tconn, sock)) {
+		rv = 0;
 		goto fail;
-
-	rv = drbd_recv_header(mdev, &p);
+	}
+	rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
+				response, resp_size);
 	if (!rv)
 		goto fail;
 
-	if (p.command != P_AUTH_RESPONSE) {
-		dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n",
-		    cmdname(p.command), p.command);
+	err = drbd_recv_header(tconn, &pi);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
-	if (p.length != resp_size) {
-		dev_err(DEV, "expected AuthResponse payload of wrong size\n");
+	if (pi.cmd != P_AUTH_RESPONSE) {
+		conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
+			 cmdname(pi.cmd), pi.cmd);
 		rv = 0;
 		goto fail;
 	}
 
-	rv = drbd_recv(mdev, response , resp_size);
+	if (pi.size != resp_size) {
+		conn_err(tconn, "expected AuthResponse payload of wrong size\n");
+		rv = 0;
+		goto fail;
+	}
 
-	if (rv != resp_size) {
-		dev_err(DEV, "short read receiving AuthResponse: l=%u\n", rv);
+	err = drbd_recv_all_warn(tconn, response , resp_size);
+	if (err) {
 		rv = 0;
 		goto fail;
 	}
 
 	right_response = kmalloc(resp_size, GFP_NOIO);
 	if (right_response == NULL) {
-		dev_err(DEV, "kmalloc of right_response failed\n");
+		conn_err(tconn, "kmalloc of right_response failed\n");
 		rv = -1;
 		goto fail;
 	}
@@ -4039,7 +4961,7 @@
 
 	rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
 	if (rv) {
-		dev_err(DEV, "crypto_hash_digest() failed with %d\n", rv);
+		conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 		rv = -1;
 		goto fail;
 	}
@@ -4047,8 +4969,8 @@
 	rv = !memcmp(response, right_response, resp_size);
 
 	if (rv)
-		dev_info(DEV, "Peer authenticated using %d bytes of '%s' HMAC\n",
-		     resp_size, mdev->net_conf->cram_hmac_alg);
+		conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
+		     resp_size);
 	else
 		rv = -1;
 
@@ -4063,226 +4985,248 @@
 
 int drbdd_init(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
-	unsigned int minor = mdev_to_minor(mdev);
+	struct drbd_tconn *tconn = thi->tconn;
 	int h;
 
-	sprintf(current->comm, "drbd%d_receiver", minor);
-
-	dev_info(DEV, "receiver (re)started\n");
+	conn_info(tconn, "receiver (re)started\n");
 
 	do {
-		h = drbd_connect(mdev);
+		h = conn_connect(tconn);
 		if (h == 0) {
-			drbd_disconnect(mdev);
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(HZ);
+			conn_disconnect(tconn);
+			schedule_timeout_interruptible(HZ);
 		}
 		if (h == -1) {
-			dev_warn(DEV, "Discarding network configuration.\n");
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+			conn_warn(tconn, "Discarding network configuration.\n");
+			conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 		}
 	} while (h == 0);
 
-	if (h > 0) {
-		if (get_net_conf(mdev)) {
-			drbdd(mdev);
-			put_net_conf(mdev);
-		}
-	}
+	if (h > 0)
+		drbdd(tconn);
 
-	drbd_disconnect(mdev);
+	conn_disconnect(tconn);
 
-	dev_info(DEV, "receiver terminated\n");
+	conn_info(tconn, "receiver terminated\n");
 	return 0;
 }
 
 /* ********* acknowledge sender ******** */
 
-STATIC int got_RqSReply(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_req_state_reply *p = (struct p_req_state_reply *)h;
+	struct p_req_state_reply *p = pi->data;
+	int retcode = be32_to_cpu(p->retcode);
+
+	if (retcode >= SS_SUCCESS) {
+		set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
+	} else {
+		set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
+		conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
+			 drbd_set_st_err_str(retcode), retcode);
+	}
+	wake_up(&tconn->ping_wait);
+
+	return 0;
+}
 
+STATIC int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	struct drbd_conf *mdev;
+	struct p_req_state_reply *p = pi->data;
 	int retcode = be32_to_cpu(p->retcode);
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	if (retcode >= SS_SUCCESS) {
 		set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
 	} else {
 		set_bit(CL_ST_CHG_FAIL, &mdev->flags);
 		dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
-		    drbd_set_st_err_str(retcode), retcode);
+			drbd_set_st_err_str(retcode), retcode);
 	}
 	wake_up(&mdev->state_wait);
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int got_Ping(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	return drbd_send_ping_ack(mdev);
+	return drbd_send_ping_ack(tconn);
 
 }
 
-STATIC int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	/* restore idle timeout */
-	mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+	if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
+		wake_up(&tconn->ping_wait);
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int got_IsInSync(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
 
-	D_ASSERT(mdev->agreed_pro_version >= 89);
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	drbd_rs_complete_io(mdev, sector);
-	drbd_set_in_sync(mdev, sector, blksize);
-	/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
-	mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, sector);
+		drbd_set_in_sync(mdev, sector, blksize);
+		/* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
+		mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+		put_ldev(mdev);
+	}
 	dec_rs_pending(mdev);
+	atomic_add(blksize >> 9, &mdev->rs_sect_in);
 
-	return TRUE;
-}
-
-/* when we receive the ACK for a write request,
- * verify that we actually know about it */
-static struct drbd_request *_ack_id_to_req(struct drbd_conf *mdev,
-	u64 id, sector_t sector)
-{
-	struct hlist_head *slot = tl_hash_slot(mdev, sector);
-	struct hlist_node *n;
-	struct drbd_request *req;
-
-	hlist_for_each_entry(req, n, slot, colision) {
-		if ((unsigned long)req == (unsigned long)id) {
-			if (req->sector != sector) {
-				dev_err(DEV, "_ack_id_to_req: found req %p but it has "
-				    "wrong sector (%llus versus %llus)\n", req,
-				    (unsigned long long)req->sector,
-				    (unsigned long long)sector);
-				break;
-			}
-			return req;
-		}
-	}
-	dev_err(DEV, "_ack_id_to_req: failed to find req %p, sector %llus in list\n",
-		(void *)(unsigned long)id, (unsigned long long)sector);
-	return NULL;
+	return 0;
 }
 
-typedef struct drbd_request *(req_validator_fn)
-	(struct drbd_conf *mdev, u64 id, sector_t sector);
-
-static int validate_req_change_req_state(struct drbd_conf *mdev,
-	u64 id, sector_t sector, req_validator_fn validator,
-	const char *func, enum drbd_req_event what)
+static int
+validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
+			      struct rb_root *root, const char *func,
+			      enum drbd_req_event what, bool missing_ok)
 {
 	struct drbd_request *req;
 	struct bio_and_error m;
 
-	spin_lock_irq(&mdev->req_lock);
-	req = validator(mdev, id, sector);
+	spin_lock_irq(&mdev->tconn->req_lock);
+	req = find_request(mdev, root, id, sector, missing_ok, func);
 	if (unlikely(!req)) {
-		spin_unlock_irq(&mdev->req_lock);
-		dev_err(DEV, "%s: got a corrupt block_id/sector pair\n", func);
-		return FALSE;
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		return -EIO;
 	}
 	__req_mod(req, what, &m);
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
-	return TRUE;
+	return 0;
 }
 
-STATIC int got_BlockAck(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 	int blksize = be32_to_cpu(p->blksize);
 	enum drbd_req_event what;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	if (is_syncer_block_id(p->block_id)) {
+	if (p->block_id == ID_SYNCER) {
 		drbd_set_in_sync(mdev, sector, blksize);
 		dec_rs_pending(mdev);
-		return TRUE;
+		return 0;
 	}
-	switch (be16_to_cpu(h->command)) {
+	switch (pi->cmd) {
 	case P_RS_WRITE_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = write_acked_by_peer_and_sis;
+		what = WRITE_ACKED_BY_PEER_AND_SIS;
 		break;
 	case P_WRITE_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = write_acked_by_peer;
+		what = WRITE_ACKED_BY_PEER;
 		break;
 	case P_RECV_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_B);
-		what = recv_acked_by_peer;
+		what = RECV_ACKED_BY_PEER;
 		break;
-	case P_DISCARD_ACK:
-		D_ASSERT(mdev->net_conf->wire_protocol == DRBD_PROT_C);
-		what = conflict_discarded_by_peer;
+	case P_DISCARD_WRITE:
+		what = DISCARD_WRITE;
+		break;
+	case P_RETRY_WRITE:
+		what = POSTPONE_WRITE;
 		break;
 	default:
-		D_ASSERT(0);
-		return FALSE;
+		BUG();
 	}
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ack_id_to_req, __func__ , what);
+					     &mdev->write_requests, __func__,
+					     what, false);
 }
 
-STATIC int got_NegAck(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
+	int size = be32_to_cpu(p->blksize);
+	int err;
 
-	if (DRBD_ratelimit(5*HZ, 5))
-		dev_warn(DEV, "Got NegAck packet. Peer is in troubles?\n");
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
-	if (is_syncer_block_id(p->block_id)) {
-		int size = be32_to_cpu(p->blksize);
+	if (p->block_id == ID_SYNCER) {
 		dec_rs_pending(mdev);
 		drbd_rs_failed_io(mdev, sector, size);
-		return TRUE;
+		return 0;
 	}
-	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ack_id_to_req, __func__ , neg_acked);
+
+	err = validate_req_change_req_state(mdev, p->block_id, sector,
+					    &mdev->write_requests, __func__,
+					    NEG_ACKED, true);
+	if (err) {
+		/* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
+		   The master bio might already be completed, therefore the
+		   request is no longer in the collision hash. */
+		/* In Protocol B we might already have got a P_RECV_ACK
+		   but then get a P_NEG_ACK afterwards. */
+		drbd_set_out_of_sync(mdev, sector, size);
+	}
+	return 0;
 }
 
-STATIC int got_NegDReply(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	sector_t sector = be64_to_cpu(p->sector);
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
-	dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
+
+	dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
 	    (unsigned long long)sector, be32_to_cpu(p->blksize));
 
 	return validate_req_change_req_state(mdev, p->block_id, sector,
-		_ar_id_to_req, __func__ , neg_acked);
+					     &mdev->read_requests, __func__,
+					     NEG_ACKED, false);
 }
 
-STATIC int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 {
+	struct drbd_conf *mdev;
 	sector_t sector;
 	int size;
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct p_block_ack *p = pi->data;
+
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
 
 	sector = be64_to_cpu(p->sector);
 	size = be32_to_cpu(p->blksize);
-	D_ASSERT(p->block_id == ID_SYNCER);
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
@@ -4290,146 +5234,210 @@
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
 		drbd_rs_complete_io(mdev, sector);
-		drbd_rs_failed_io(mdev, sector, size);
+		switch (pi->cmd) {
+		case P_NEG_RS_DREPLY:
+			drbd_rs_failed_io(mdev, sector, size);
+		case P_RS_CANCEL:
+			break;
+		default:
+			BUG();
+		}
 		put_ldev(mdev);
 	}
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_barrier_ack *p = (struct p_barrier_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_barrier_ack *p = pi->data;
 
-	tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
+	tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
+
+	if (mdev->state.conn == C_AHEAD &&
+	    atomic_read(&mdev->ap_in_flight) == 0 &&
+	    !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
+		mdev->start_resync_timer.expires = jiffies + HZ;
+		add_timer(&mdev->start_resync_timer);
+	}
 
-	return TRUE;
+	return 0;
 }
 
-STATIC int got_OVResult(struct drbd_conf *mdev, struct p_header *h)
+STATIC int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
 {
-	struct p_block_ack *p = (struct p_block_ack *)h;
+	struct drbd_conf *mdev;
+	struct p_block_ack *p = pi->data;
 	struct drbd_work *w;
 	sector_t sector;
 	int size;
 
+	mdev = vnr_to_mdev(tconn, pi->vnr);
+	if (!mdev)
+		return -EIO;
+
 	sector = be64_to_cpu(p->sector);
 	size = be32_to_cpu(p->blksize);
 
 	update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 
 	if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
-		drbd_ov_oos_found(mdev, sector, size);
+		drbd_ov_out_of_sync_found(mdev, sector, size);
 	else
-		ov_oos_print(mdev);
+		ov_out_of_sync_print(mdev);
+
+	if (!get_ldev(mdev))
+		return 0;
 
 	drbd_rs_complete_io(mdev, sector);
 	dec_rs_pending(mdev);
 
-	if (--mdev->ov_left == 0) {
+	--mdev->ov_left;
+
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
 		w = kmalloc(sizeof(*w), GFP_NOIO);
 		if (w) {
 			w->cb = w_ov_finished;
-			drbd_queue_work_front(&mdev->data.work, w);
+			w->mdev = mdev;
+			drbd_queue_work_front(&mdev->tconn->data.work, w);
 		} else {
 			dev_err(DEV, "kmalloc(w) failed.");
-			ov_oos_print(mdev);
+			ov_out_of_sync_print(mdev);
 			drbd_resync_finished(mdev);
 		}
 	}
-	return TRUE;
+	put_ldev(mdev);
+	return 0;
+}
+
+STATIC int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
+{
+	return 0;
+}
+
+static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	int vnr, not_empty = 0;
+
+	do {
+		clear_bit(SIGNAL_ASENDER, &tconn->flags);
+		flush_signals(current);
+
+		rcu_read_lock();
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			kref_get(&mdev->kref);
+			rcu_read_unlock();
+			if (drbd_finish_peer_reqs(mdev)) {
+				kref_put(&mdev->kref, &drbd_minor_destroy);
+				return 1;
+			}
+			kref_put(&mdev->kref, &drbd_minor_destroy);
+			rcu_read_lock();
+		}
+		set_bit(SIGNAL_ASENDER, &tconn->flags);
+
+		spin_lock_irq(&tconn->req_lock);
+		idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+			not_empty = !list_empty(&mdev->done_ee);
+			if (not_empty)
+				break;
+		}
+		spin_unlock_irq(&tconn->req_lock);
+		rcu_read_unlock();
+	} while (not_empty);
+
+	return 0;
 }
 
 struct asender_cmd {
 	size_t pkt_size;
-	int (*process)(struct drbd_conf *mdev, struct p_header *h);
+	int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
 };
 
-static struct asender_cmd *get_asender_cmd(int cmd)
-{
-	static struct asender_cmd asender_tbl[] = {
-		/* anything missing from this table is in
-		 * the drbd_cmd_handler (drbd_default_handler) table,
-		 * see the beginning of drbdd() */
-	[P_PING]	    = { sizeof(struct p_header), got_Ping },
-	[P_PING_ACK]	    = { sizeof(struct p_header), got_PingAck },
+static struct asender_cmd asender_tbl[] = {
+	[P_PING]	    = { 0, got_Ping },
+	[P_PING_ACK]	    = { 0, got_PingAck },
 	[P_RECV_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_WRITE_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_RS_WRITE_ACK]    = { sizeof(struct p_block_ack), got_BlockAck },
-	[P_DISCARD_ACK]	    = { sizeof(struct p_block_ack), got_BlockAck },
+	[P_DISCARD_WRITE]   = { sizeof(struct p_block_ack), got_BlockAck },
 	[P_NEG_ACK]	    = { sizeof(struct p_block_ack), got_NegAck },
 	[P_NEG_DREPLY]	    = { sizeof(struct p_block_ack), got_NegDReply },
-	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply},
+	[P_NEG_RS_DREPLY]   = { sizeof(struct p_block_ack), got_NegRSDReply },
 	[P_OV_RESULT]	    = { sizeof(struct p_block_ack), got_OVResult },
 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
-	[P_MAX_CMD]	    = { 0, NULL },
-	};
-	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
-		return NULL;
-	return &asender_tbl[cmd];
-}
+	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe93), got_skip },
+	[P_RS_CANCEL]       = { sizeof(struct p_block_ack), got_NegRSDReply },
+	[P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
+	[P_RETRY_WRITE]	    = { sizeof(struct p_block_ack), got_BlockAck },
+};
 
 int drbd_asender(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
-	struct p_header *h = &mdev->meta.rbuf.header;
+	struct drbd_tconn *tconn = thi->tconn;
 	struct asender_cmd *cmd = NULL;
-
-	int rv, len;
-	void *buf    = h;
+	struct packet_info pi;
+	int rv;
+	void *buf    = tconn->meta.rbuf;
 	int received = 0;
-	int expect   = sizeof(struct p_header);
-	int empty;
-
-	sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev));
+	unsigned int header_size = drbd_header_size(tconn);
+	int expect   = header_size;
+	bool ping_timeout_active = false;
+	struct net_conf *nc;
+	int ping_timeo, tcp_cork, ping_int;
 
 	current->policy = SCHED_RR;  /* Make this a realtime task! */
 	current->rt_priority = 2;    /* more important than all other tasks */
 
-	while (get_t_state(thi) == Running) {
-		drbd_thread_current_set_cpu(mdev);
-		if (test_and_clear_bit(SEND_PING, &mdev->flags)) {
-			ERR_IF(!drbd_send_ping(mdev)) goto reconnect;
-			mdev->meta.socket->sk->sk_rcvtimeo =
-				mdev->net_conf->ping_timeo*HZ/10;
-		}
+	while (get_t_state(thi) == RUNNING) {
+		drbd_thread_current_set_cpu(thi);
 
-		/* conditionally cork;
-		 * it may hurt latency if we cork without much to send */
-		if (!mdev->net_conf->no_cork &&
-			3 < atomic_read(&mdev->unacked_cnt))
-			drbd_tcp_cork(mdev->meta.socket);
-		while (1) {
-			clear_bit(SIGNAL_ASENDER, &mdev->flags);
-			flush_signals(current);
-			if (!drbd_process_done_ee(mdev)) {
-				dev_err(DEV, "process_done_ee() = NOT_OK\n");
+		rcu_read_lock();
+		nc = rcu_dereference(tconn->net_conf);
+		ping_timeo = nc->ping_timeo;
+		tcp_cork = nc->tcp_cork;
+		ping_int = nc->ping_int;
+		rcu_read_unlock();
+
+		if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
+			if (drbd_send_ping(tconn)) {
+				conn_err(tconn, "drbd_send_ping has failed\n");
 				goto reconnect;
 			}
-			/* to avoid race with newly queued ACKs */
-			set_bit(SIGNAL_ASENDER, &mdev->flags);
-			spin_lock_irq(&mdev->req_lock);
-			empty = list_empty(&mdev->done_ee);
-			spin_unlock_irq(&mdev->req_lock);
-			/* new ack may have been queued right here,
-			 * but then there is also a signal pending,
-			 * and we start over... */
-			if (empty)
-				break;
+			tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+			ping_timeout_active = true;
+		}
+
+		/* TODO: conditionally cork; it may hurt latency if we cork without
+		   much to send */
+		if (tcp_cork)
+			drbd_tcp_cork(tconn->meta.socket);
+		if (tconn_finish_peer_reqs(tconn)) {
+			conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
+			goto reconnect;
 		}
 		/* but unconditionally uncork unless disabled */
-		if (!mdev->net_conf->no_cork)
-			drbd_tcp_uncork(mdev->meta.socket);
+		if (tcp_cork)
+			drbd_tcp_uncork(tconn->meta.socket);
 
 		/* short circuit, recv_msg would return EINTR anyways. */
 		if (signal_pending(current))
 			continue;
 
-		rv = drbd_recv_short(mdev, mdev->meta.socket,
-				     buf, expect-received, 0);
-		clear_bit(SIGNAL_ASENDER, &mdev->flags);
+		rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
+		clear_bit(SIGNAL_ASENDER, &tconn->flags);
 
 		flush_signals(current);
 
@@ -4447,70 +5455,78 @@
 			received += rv;
 			buf	 += rv;
 		} else if (rv == 0) {
-			dev_err(DEV, "meta connection shut down by peer.\n");
+			conn_err(tconn, "meta connection shut down by peer.\n");
 			goto reconnect;
 		} else if (rv == -EAGAIN) {
-			if (mdev->meta.socket->sk->sk_rcvtimeo ==
-			    mdev->net_conf->ping_timeo*HZ/10) {
-				dev_err(DEV, "PingAck did not arrive in time.\n");
+			/* If the data socket received something meanwhile,
+			 * that is good enough: peer is still alive. */
+			if (time_after(tconn->last_received,
+				jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
+				continue;
+			if (ping_timeout_active) {
+				conn_err(tconn, "PingAck did not arrive in time.\n");
 				goto reconnect;
 			}
-			set_bit(SEND_PING, &mdev->flags);
+			set_bit(SEND_PING, &tconn->flags);
 			continue;
 		} else if (rv == -EINTR) {
 			continue;
 		} else {
-			dev_err(DEV, "sock_recvmsg returned %d\n", rv);
+			conn_err(tconn, "sock_recvmsg returned %d\n", rv);
 			goto reconnect;
 		}
 
 		if (received == expect && cmd == NULL) {
-			if (unlikely(h->magic != BE_DRBD_MAGIC)) {
-				dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n",
-				    (long)be32_to_cpu(h->magic),
-				    h->command, h->length);
+			if (decode_header(tconn, tconn->meta.rbuf, &pi))
 				goto reconnect;
-			}
-			cmd = get_asender_cmd(be16_to_cpu(h->command));
-			len = be16_to_cpu(h->length);
-			if (unlikely(cmd == NULL)) {
-				dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n",
-				    (long)be32_to_cpu(h->magic),
-				    h->command, h->length);
+			cmd = &asender_tbl[pi.cmd];
+			if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+				conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
+					 cmdname(pi.cmd), pi.cmd);
 				goto disconnect;
 			}
-			expect = cmd->pkt_size;
-			ERR_IF(len != expect-sizeof(struct p_header)) {
-				trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__);
-				DUMPI(expect);
+			expect = header_size + cmd->pkt_size;
+			if (pi.size != expect - header_size) {
+				conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
+					pi.cmd, pi.size);
 				goto reconnect;
 			}
 		}
 		if (received == expect) {
-			D_ASSERT(cmd != NULL);
-			trace_drbd_packet(mdev, mdev->meta.socket, 1, (void *)h, __FILE__, __LINE__);
-			if (!cmd->process(mdev, h))
+			bool err;
+
+			err = cmd->fn(tconn, &pi);
+			if (err) {
+				conn_err(tconn, "%pf failed\n", cmd->fn);
 				goto reconnect;
+			}
+
+			tconn->last_received = jiffies;
+
+			if (cmd == &asender_tbl[P_PING_ACK]) {
+				/* restore idle timeout */
+				tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+				ping_timeout_active = false;
+			}
 
-			buf	 = h;
+			buf	 = tconn->meta.rbuf;
 			received = 0;
-			expect	 = sizeof(struct p_header);
+			expect	 = header_size;
 			cmd	 = NULL;
 		}
 	}
 
 	if (0) {
 reconnect:
-		drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+		conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 	}
 	if (0) {
 disconnect:
-		drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+		conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 	}
-	clear_bit(SIGNAL_ASENDER, &mdev->flags);
+	clear_bit(SIGNAL_ASENDER, &tconn->flags);
 
-	D_ASSERT(mdev->state.conn < C_CONNECTED);
-	dev_info(DEV, "asender terminated\n");
+	conn_info(tconn, "asender terminated\n");
 
 	return 0;
 }
diff -Nru drbd8-8.3.7/drbd/drbd_req.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_req.c
--- drbd8-8.3.7/drbd/drbd_req.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_req.c	2012-02-02 14:09:14.000000000 +0000
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/drbd.h>
 #include "drbd_int.h"
-#include "drbd_tracing.h"
 #include "drbd_req.h"
 
 
@@ -43,6 +42,8 @@
 #define _drbd_end_io_acct(...)   do {} while (0)
 #else
 
+STATIC bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size);
+
 /* Update disk stats at start of I/O request */
 static void _drbd_start_io_acct(struct drbd_conf *mdev, struct drbd_request *req, struct bio *bio)
 {
@@ -60,6 +61,8 @@
 	cpu = part_stat_lock();
 	part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
 	part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
+	(void) cpu; /* The macro invocations above want the cpu argument, I do not like
+		       the compiler warning about cpu only assigned but never used... */
 	part_inc_in_flight(&mdev->vdisk->part0, rw);
 	part_stat_unlock();
 #endif
@@ -89,33 +92,67 @@
 
 #endif
 
+static struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
+					       struct bio *bio_src)
+{
+	struct drbd_request *req;
+
+	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	if (!req)
+		return NULL;
+
+	drbd_req_make_private_bio(req, bio_src);
+	req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
+	req->w.mdev      = mdev;
+	req->master_bio  = bio_src;
+	req->epoch       = 0;
+
+	drbd_clear_interval(&req->i);
+	req->i.sector     = bio_src->bi_sector;
+	req->i.size      = bio_src->bi_size;
+	req->i.local = true;
+	req->i.waiting = false;
+
+	INIT_LIST_HEAD(&req->tl_requests);
+	INIT_LIST_HEAD(&req->w.list);
+
+	return req;
+}
+
+static void drbd_req_free(struct drbd_request *req)
+{
+	mempool_free(req, drbd_request_mempool);
+}
+
 /* rw is bio_data_dir(), only READ or WRITE */
 static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
 {
 	const unsigned long s = req->rq_state;
+
+	/* remove it from the transfer log.
+	 * well, only if it had been there in the first
+	 * place... if it had not (local only or conflicting
+	 * and never sent), it should still be "empty" as
+	 * initialized in drbd_req_new(), so we can list_del() it
+	 * here unconditionally */
+	list_del(&req->tl_requests);
+
 	/* if it was a write, we may have to set the corresponding
 	 * bit(s) out-of-sync first. If it had a local part, we need to
 	 * release the reference to the activity log. */
 	if (rw == WRITE) {
-		/* remove it from the transfer log.
-		 * well, only if it had been there in the first
-		 * place... if it had not (local only or conflicting
-		 * and never sent), it should still be "empty" as
-		 * initialized in drbd_req_new(), so we can list_del() it
-		 * here unconditionally */
-		list_del(&req->tl_requests);
 		/* Set out-of-sync unless both OK flags are set
 		 * (local only or remote failed).
 		 * Other places where we set out-of-sync:
 		 * READ with local io-error */
 		if (!(s & RQ_NET_OK) || !(s & RQ_LOCAL_OK))
-			drbd_set_out_of_sync(mdev, req->sector, req->size);
+			drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
 
 		if ((s & RQ_NET_OK) && (s & RQ_LOCAL_OK) && (s & RQ_NET_SIS))
-			drbd_set_in_sync(mdev, req->sector, req->size);
+			drbd_set_in_sync(mdev, req->i.sector, req->i.size);
 
 		/* one might be tempted to move the drbd_al_complete_io
-		 * to the local io completion callback drbd_endio_pri.
+		 * to the local io completion callback drbd_request_endio.
 		 * but, if this was a mirror write, we may only
 		 * drbd_al_complete_io after this is RQ_NET_DONE,
 		 * otherwise the extent could be dropped from the al
@@ -126,136 +163,83 @@
 		 */
 		if (s & RQ_LOCAL_MASK) {
 			if (get_ldev_if_state(mdev, D_FAILED)) {
-				drbd_al_complete_io(mdev, req->sector);
+				if (s & RQ_IN_ACT_LOG)
+					drbd_al_complete_io(mdev, &req->i);
 				put_ldev(mdev);
 			} else if (DRBD_ratelimit(5*HZ, 3)) {
-				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
+				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu, %u), "
 				     "but my Disk seems to have failed :(\n",
-				     (unsigned long long) req->sector);
+				     (unsigned long long) req->i.sector, req->i.size);
 			}
 		}
 	}
 
-	/* if it was a local io error, we want to notify our
-	 * peer about that, and see if we need to
-	 * detach the disk and stuff.
-	 * to avoid allocating some special work
-	 * struct, reuse the request. */
-
-	/* THINK
-	 * why do we do this not when we detect the error,
-	 * but delay it until it is "done", i.e. possibly
-	 * until the next barrier ack? */
-
-	if (rw == WRITE &&
-	    ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) {
-		if (!(req->w.list.next == LIST_POISON1 ||
-		      list_empty(&req->w.list))) {
-			/* DEBUG ASSERT only; if this triggers, we
-			 * probably corrupt the worker list here */
-			DUMPP(req->w.list.next);
-			DUMPP(req->w.list.prev);
-		}
-		req->w.cb = w_io_error;
-		drbd_queue_work(&mdev->data.work, &req->w);
-		/* drbd_req_free() is done in w_io_error */
-	} else {
-		drbd_req_free(req);
-	}
+	drbd_req_free(req);
 }
 
 static void queue_barrier(struct drbd_conf *mdev)
 {
 	struct drbd_tl_epoch *b;
+	struct drbd_tconn *tconn = mdev->tconn;
 
 	/* We are within the req_lock. Once we queued the barrier for sending,
 	 * we set the CREATE_BARRIER bit. It is cleared as soon as a new
 	 * barrier/epoch object is added. This is the only place this bit is
 	 * set. It indicates that the barrier for this epoch is already queued,
 	 * and no new epoch has been created yet. */
-	if (test_bit(CREATE_BARRIER, &mdev->flags))
+	if (test_bit(CREATE_BARRIER, &tconn->flags))
 		return;
 
-	b = mdev->newest_tle;
+	b = tconn->newest_tle;
 	b->w.cb = w_send_barrier;
+	b->w.mdev = mdev;
 	/* inc_ap_pending done here, so we won't
 	 * get imbalanced on connection loss.
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * or (on connection loss) in tl_clear.  */
 	inc_ap_pending(mdev);
-	drbd_queue_work(&mdev->data.work, &b->w);
-	set_bit(CREATE_BARRIER, &mdev->flags);
+	drbd_queue_work(&tconn->data.work, &b->w);
+	set_bit(CREATE_BARRIER, &tconn->flags);
 }
 
 static void _about_to_complete_local_write(struct drbd_conf *mdev,
 	struct drbd_request *req)
 {
 	const unsigned long s = req->rq_state;
-	struct drbd_request *i;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
 
-	/* before we can signal completion to the upper layers,
-	 * we may need to close the current epoch */
+	/* Before we can signal completion to the upper layers,
+	 * we may need to close the current epoch.
+	 * We can skip this, if this request has not even been sent, because we
+	 * did not have a fully established connection yet/anymore, during
+	 * bitmap exchange, or while we are C_AHEAD due to congestion policy.
+	 */
 	if (mdev->state.conn >= C_CONNECTED &&
-	    req->epoch == mdev->newest_tle->br_number)
+	    (s & RQ_NET_SENT) != 0 &&
+	    req->epoch == mdev->tconn->newest_tle->br_number)
 		queue_barrier(mdev);
-
-	/* we need to do the conflict detection stuff,
-	 * if we have the ee_hash (two_primaries) and
-	 * this has been on the network */
-	if ((s & RQ_NET_DONE) && mdev->ee_hash != NULL) {
-		const sector_t sector = req->sector;
-		const int size = req->size;
-
-		/* ASSERT:
-		 * there must be no conflicting requests, since
-		 * they must have been failed on the spot */
-#define OVERLAPS overlaps(sector, size, i->sector, i->size)
-		slot = tl_hash_slot(mdev, sector);
-		hlist_for_each_entry(i, n, slot, colision) {
-			if (OVERLAPS) {
-				dev_alert(DEV, "LOGIC BUG: completed: %p %llus +%u; "
-				      "other: %p %llus +%u\n",
-				      req, (unsigned long long)sector, size,
-				      i, (unsigned long long)i->sector, i->size);
-			}
-		}
-
-		/* maybe "wake" those conflicting epoch entries
-		 * that wait for this request to finish.
-		 *
-		 * currently, there can be only _one_ such ee
-		 * (well, or some more, which would be pending
-		 * P_DISCARD_ACK not yet sent by the asender...),
-		 * since we block the receiver thread upon the
-		 * first conflict detection, which will wait on
-		 * misc_wait.  maybe we want to assert that?
-		 *
-		 * anyways, if we found one,
-		 * we just have to do a wake_up.  */
-#undef OVERLAPS
-#define OVERLAPS overlaps(sector, size, e->sector, e->size)
-		slot = ee_hash_slot(mdev, req->sector);
-		hlist_for_each_entry(e, n, slot, colision) {
-			if (OVERLAPS) {
-				wake_up(&mdev->misc_wait);
-				break;
-			}
-		}
-	}
-#undef OVERLAPS
 }
 
 void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m)
 {
-	trace_drbd_bio(mdev, "Rq", m->bio, 1, NULL);
 	bio_endio(m->bio, m->error);
 	dec_ap_bio(mdev);
 }
 
+
+static void drbd_remove_request_interval(struct rb_root *root,
+					 struct drbd_request *req)
+{
+	struct drbd_conf *mdev = req->w.mdev;
+	struct drbd_interval *i = &req->i;
+
+	drbd_remove_interval(root, i);
+
+	/* Wake up any processes waiting for this request to complete.  */
+	if (i->waiting)
+		wake_up(&mdev->misc_wait);
+}
+
 /* Helper for __req_mod().
  * Set m->bio to the master bio, if it is fit to be completed,
  * or leave it alone (it is initialized to NULL in __req_mod),
@@ -265,11 +249,8 @@
 void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 {
 	const unsigned long s = req->rq_state;
-	struct drbd_conf *mdev = req->mdev;
-	/* only WRITES may end up here without a master bio (on barrier ack) */
-	int rw = req->master_bio ? bio_data_dir(req->master_bio) : WRITE;
-
-	trace_drbd_req(req, nothing, "_req_may_be_done");
+	struct drbd_conf *mdev = req->w.mdev;
+	int rw = req->rq_state & RQ_WRITE ? WRITE : READ;
 
 	/* we must not complete the master bio, while it is
 	 *	still being processed by _drbd_send_zc_bio (drbd_send_dblock)
@@ -280,18 +261,22 @@
 	 *	the receiver,
 	 *	the bio_endio completion callbacks.
 	 */
+	if (s & RQ_LOCAL_PENDING && !(s & RQ_LOCAL_ABORTED))
+		return;
+	if (req->i.waiting) {
+		/* Retry all conflicting peer requests.  */
+		wake_up(&mdev->misc_wait);
+	}
 	if (s & RQ_NET_QUEUED)
 		return;
 	if (s & RQ_NET_PENDING)
 		return;
-	if (s & RQ_LOCAL_PENDING)
-		return;
 
 	if (req->master_bio) {
-		/* this is data_received (remote read)
+		/* this is DATA_RECEIVED (remote read)
 		 * or protocol C P_WRITE_ACK
 		 * or protocol B P_RECV_ACK
-		 * or protocol A "handed_over_to_network" (SendAck)
+		 * or protocol A "HANDED_OVER_TO_NETWORK" (SendAck)
 		 * or canceled or failed,
 		 * or killed from the transfer log due to connection loss.
 		 */
@@ -307,17 +292,23 @@
 		 * what we need to do here is just: complete the master_bio.
 		 *
 		 * local completion error, if any, has been stored as ERR_PTR
-		 * in private_bio within drbd_endio_pri.
+		 * in private_bio within drbd_request_endio.
 		 */
 		int ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
 		int error = PTR_ERR(req->private_bio);
 
 		/* remove the request from the conflict detection
 		 * respective block_id verification hash */
-		if (!hlist_unhashed(&req->colision))
-			hlist_del(&req->colision);
-		else
-			D_ASSERT((s & RQ_NET_MASK) == 0);
+		if (!drbd_interval_empty(&req->i)) {
+			struct rb_root *root;
+
+			if (rw == WRITE)
+				root = &mdev->write_requests;
+			else
+				root = &mdev->read_requests;
+			drbd_remove_request_interval(root, req);
+		} else if (!(s & RQ_POSTPONED))
+			D_ASSERT((s & (RQ_NET_MASK & ~RQ_NET_DONE)) == 0);
 
 		/* for writes we need to do some extra housekeeping */
 		if (rw == WRITE)
@@ -326,108 +317,32 @@
 		/* Update disk stats */
 		_drbd_end_io_acct(mdev, req);
 
-		m->error = ok ? 0 : (error ?: -EIO);
-		m->bio = req->master_bio;
+		if (!(s & RQ_POSTPONED)) {
+			m->error = ok ? 0 : (error ?: -EIO);
+			m->bio = req->master_bio;
+		}
 		req->master_bio = NULL;
 	}
 
+	if (s & RQ_LOCAL_PENDING)
+		return;
+
 	if ((s & RQ_NET_MASK) == 0 || (s & RQ_NET_DONE)) {
 		/* this is disconnected (local only) operation,
-		 * or protocol C P_WRITE_ACK,
-		 * or protocol A or B P_BARRIER_ACK,
+		 * or protocol A, B, or C P_BARRIER_ACK,
 		 * or killed from the transfer log due to connection loss. */
 		_req_is_done(mdev, req, rw);
 	}
 	/* else: network part and not DONE yet. that is
-	 * protocol A or B, barrier ack still pending... */
+	 * protocol A, B, or C, barrier ack still pending... */
 }
 
-/*
- * checks whether there was an overlapping request
- * or ee already registered.
- *
- * if so, return 1, in which case this request is completed on the spot,
- * without ever being submitted or send.
- *
- * return 0 if it is ok to submit this request.
- *
- * NOTE:
- * paranoia: assume something above us is broken, and issues different write
- * requests for the same block simultaneously...
- *
- * To ensure these won't be reordered differently on both nodes, resulting in
- * diverging data sets, we discard the later one(s). Not that this is supposed
- * to happen, but this is the rationale why we also have to check for
- * conflicting requests with local origin, and why we have to do so regardless
- * of whether we allowed multiple primaries.
- *
- * BTW, in case we only have one primary, the ee_hash is empty anyways, and the
- * second hlist_for_each_entry becomes a noop. This is even simpler than to
- * grab a reference on the net_conf, and check for the two_primaries flag...
- */
-STATIC int _req_conflicts(struct drbd_request *req)
+static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
 {
-	struct drbd_conf *mdev = req->mdev;
-	const sector_t sector = req->sector;
-	const int size = req->size;
-	struct drbd_request *i;
-	struct drbd_epoch_entry *e;
-	struct hlist_node *n;
-	struct hlist_head *slot;
-
-	D_ASSERT(hlist_unhashed(&req->colision));
-
-	if (!get_net_conf(mdev))
-		return 0;
-
-	/* BUG_ON */
-	ERR_IF (mdev->tl_hash_s == 0)
-		goto out_no_conflict;
-	BUG_ON(mdev->tl_hash == NULL);
-
-#define OVERLAPS overlaps(i->sector, i->size, sector, size)
-	slot = tl_hash_slot(mdev, sector);
-	hlist_for_each_entry(i, n, slot, colision) {
-		if (OVERLAPS) {
-			dev_alert(DEV, "%s[%u] Concurrent local write detected! "
-			      "[DISCARD L] new: %llus +%u; "
-			      "pending: %llus +%u\n",
-			      current->comm, current->pid,
-			      (unsigned long long)sector, size,
-			      (unsigned long long)i->sector, i->size);
-			goto out_conflict;
-		}
-	}
-
-	if (mdev->ee_hash_s) {
-		/* now, check for overlapping requests with remote origin */
-		BUG_ON(mdev->ee_hash == NULL);
-#undef OVERLAPS
-#define OVERLAPS overlaps(e->sector, e->size, sector, size)
-		slot = ee_hash_slot(mdev, sector);
-		hlist_for_each_entry(e, n, slot, colision) {
-			if (OVERLAPS) {
-				dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
-				      " [DISCARD L] new: %llus +%u; "
-				      "pending: %llus +%u\n",
-				      current->comm, current->pid,
-				      (unsigned long long)sector, size,
-				      (unsigned long long)e->sector, e->size);
-				goto out_conflict;
-			}
-		}
-	}
-#undef OVERLAPS
-
-out_no_conflict:
-	/* this is like it should be, and what we expected.
-	 * our users do behave after all... */
-	put_net_conf(mdev);
-	return 0;
+	struct drbd_conf *mdev = req->w.mdev;
 
-out_conflict:
-	put_net_conf(mdev);
-	return 1;
+	if (!drbd_suspended(mdev))
+		_req_may_be_done(req, m);
 }
 
 /* obviously this could be coded as many single functions
@@ -442,13 +357,15 @@
  *  and it enforces that we have to think in a very structured manner
  *  about the "events" that may happen to a request during its life time ...
  */
-void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m)
 {
-	struct drbd_conf *mdev = req->mdev;
-	m->bio = NULL;
+	struct drbd_conf *mdev = req->w.mdev;
+	struct net_conf *nc;
+	int p, rv = 0;
 
-	trace_drbd_req(req, what, NULL);
+	if (m)
+		m->bio = NULL;
 
 	switch (what) {
 	default:
@@ -457,92 +374,106 @@
 
 	/* does not happen...
 	 * initialization done in drbd_req_new
-	case created:
+	case CREATED:
 		break;
 		*/
 
-	case to_be_send: /* via network */
-		/* reached via drbd_make_request_common
+	case TO_BE_SENT: /* via network */
+		/* reached via __drbd_make_request
 		 * and from w_read_retry_remote */
 		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
 		req->rq_state |= RQ_NET_PENDING;
+		rcu_read_lock();
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		p = nc->wire_protocol;
+		rcu_read_unlock();
+		req->rq_state |=
+			p == DRBD_PROT_C ? RQ_EXP_WRITE_ACK :
+			p == DRBD_PROT_B ? RQ_EXP_RECEIVE_ACK : 0;
 		inc_ap_pending(mdev);
 		break;
 
-	case to_be_submitted: /* locally */
-		/* reached via drbd_make_request_common */
+	case TO_BE_SUBMITTED: /* locally */
+		/* reached via __drbd_make_request */
 		D_ASSERT(!(req->rq_state & RQ_LOCAL_MASK));
 		req->rq_state |= RQ_LOCAL_PENDING;
 		break;
 
-	case completed_ok:
-		if (bio_data_dir(req->master_bio) == WRITE)
-			mdev->writ_cnt += req->size>>9;
+	case COMPLETED_OK:
+		if (req->rq_state & RQ_WRITE)
+			mdev->writ_cnt += req->i.size >> 9;
 		else
-			mdev->read_cnt += req->size>>9;
+			mdev->read_cnt += req->i.size >> 9;
 
 		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		break;
 
-	case write_completed_with_error:
+	case ABORT_DISK_IO:
+		req->rq_state |= RQ_LOCAL_ABORTED;
+		if (req->rq_state & RQ_WRITE)
+			_req_may_be_done_not_susp(req, m);
+		else
+			goto goto_queue_for_net_read;
+		break;
+
+	case WRITE_COMPLETED_WITH_ERROR:
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
-		dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n",
-		      (unsigned long long)req->sector, req->size);
-		/* and now: check how to handle local io error. */
-		__drbd_chk_io_error(mdev, FALSE);
-		_req_may_be_done(req, m);
+		__drbd_chk_io_error(mdev, false);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		break;
 
-	case read_ahead_completed_with_error:
+	case READ_AHEAD_COMPLETED_WITH_ERROR:
 		/* it is legal to fail READA */
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		break;
 
-	case read_completed_with_error:
-		drbd_set_out_of_sync(mdev, req->sector, req->size);
+	case READ_COMPLETED_WITH_ERROR:
+		drbd_set_out_of_sync(mdev, req->i.sector, req->i.size);
 
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
-		dev_alert(DEV, "Local READ failed sec=%llus size=%u\n",
-		      (unsigned long long)req->sector, req->size);
-		/* _req_mod(req,to_be_send); oops, recursion... */
 		D_ASSERT(!(req->rq_state & RQ_NET_MASK));
-		req->rq_state |= RQ_NET_PENDING;
-		inc_ap_pending(mdev);
 
-		__drbd_chk_io_error(mdev, FALSE);
+		__drbd_chk_io_error(mdev, false);
 		put_ldev(mdev);
-		/* NOTE: if we have no connection,
-		 * or know the peer has no good data either,
-		 * then we don't actually need to "queue_for_net_read",
-		 * but we do so anyways, since the drbd_io_error()
-		 * and the potential state change to "Diskless"
-		 * needs to be done from process context */
 
-		/* fall through: _req_mod(req,queue_for_net_read); */
+	goto_queue_for_net_read:
+
+		/* no point in retrying if there is no good remote data,
+		 * or we have no connection. */
+		if (mdev->state.pdsk != D_UP_TO_DATE) {
+			_req_may_be_done_not_susp(req, m);
+			break;
+		}
+
+		/* _req_mod(req,TO_BE_SENT); oops, recursion... */
+		req->rq_state |= RQ_NET_PENDING;
+		inc_ap_pending(mdev);
+		/* fall through: _req_mod(req,QUEUE_FOR_NET_READ); */
 
-	case queue_for_net_read:
+	case QUEUE_FOR_NET_READ:
 		/* READ or READA, and
 		 * no local disk,
 		 * or target area marked as invalid,
 		 * or just got an io-error. */
-		/* from drbd_make_request_common
+		/* from __drbd_make_request
 		 * or from bio_endio during read io-error recovery */
 
 		/* so we can verify the handle in the answer packet
 		 * corresponding hlist_del is in _req_may_be_done() */
-		hlist_add_head(&req->colision, ar_hash_slot(mdev, req->sector));
+		D_ASSERT(drbd_interval_empty(&req->i));
+		drbd_insert_interval(&mdev->read_requests, &req->i);
 
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
@@ -551,15 +482,16 @@
 		req->w.cb = (req->rq_state & RQ_LOCAL_MASK)
 			? w_read_retry_remote
 			: w_send_read_req;
-		drbd_queue_work(&mdev->data.work, &req->w);
+		drbd_queue_work(&mdev->tconn->data.work, &req->w);
 		break;
 
-	case queue_for_net_write:
+	case QUEUE_FOR_NET_WRITE:
 		/* assert something? */
-		/* from drbd_make_request_common only */
+		/* from __drbd_make_request only */
 
-		hlist_add_head(&req->colision, tl_hash_slot(mdev, req->sector));
 		/* corresponding hlist_del is in _req_may_be_done() */
+		D_ASSERT(drbd_interval_empty(&req->i));
+		drbd_insert_interval(&mdev->write_requests, &req->i);
 
 		/* NOTE
 		 * In case the req ended up on the transfer log before being
@@ -570,7 +502,7 @@
 		 *
 		 * _req_add_to_epoch(req); this has to be after the
 		 * _maybe_start_new_epoch(req); which happened in
-		 * drbd_make_request_common, because we now may set the bit
+		 * __drbd_make_request, because we now may set the bit
 		 * again ourselves to close the current epoch.
 		 *
 		 * Add req to the (now) current epoch (barrier). */
@@ -580,44 +512,57 @@
 		 * hurting performance. */
 		set_bit(UNPLUG_REMOTE, &mdev->flags);
 
-		/* see drbd_make_request_common,
+		/* see __drbd_make_request,
 		 * just after it grabs the req_lock */
-		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
+		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->tconn->flags) == 0);
 
-		req->epoch = mdev->newest_tle->br_number;
-		list_add_tail(&req->tl_requests,
-				&mdev->newest_tle->requests);
+		req->epoch = mdev->tconn->newest_tle->br_number;
 
 		/* increment size of current epoch */
-		mdev->newest_tle->n_req++;
+		mdev->tconn->newest_tle->n_writes++;
 
 		/* queue work item to send data */
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		req->rq_state |= RQ_NET_QUEUED;
 		req->w.cb =  w_send_dblock;
-		drbd_queue_work(&mdev->data.work, &req->w);
+		drbd_queue_work(&mdev->tconn->data.work, &req->w);
 
 		/* close the epoch, in case it outgrew the limit */
-		if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
+		rcu_read_lock();
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		p = nc->max_epoch_size;
+		rcu_read_unlock();
+		if (mdev->tconn->newest_tle->n_writes >= p)
 			queue_barrier(mdev);
 
 		break;
 
-	case send_canceled:
+	case QUEUE_FOR_SEND_OOS:
+		req->rq_state |= RQ_NET_QUEUED;
+		req->w.cb =  w_send_out_of_sync;
+		drbd_queue_work(&mdev->tconn->data.work, &req->w);
+		break;
+
+	case OOS_HANDED_TO_NETWORK:
+		/* actually the same */
+	case SEND_CANCELED:
 		/* treat it the same */
-	case send_failed:
+	case SEND_FAILED:
 		/* real cleanup will be done from tl_clear.  just update flags
 		 * so it is no longer marked as on the worker queue */
 		req->rq_state &= ~RQ_NET_QUEUED;
 		/* if we did it right, tl_clear should be scheduled only after
 		 * this, so this should not be necessary! */
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 
-	case handed_over_to_network:
+	case HANDED_OVER_TO_NETWORK:
 		/* assert something? */
+		if (bio_data_dir(req->master_bio) == WRITE)
+			atomic_add(req->i.size >> 9, &mdev->ap_in_flight);
+
 		if (bio_data_dir(req->master_bio) == WRITE &&
-		    mdev->net_conf->wire_protocol == DRBD_PROT_A) {
+		    !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
 			/* this is what is dangerous about protocol A:
 			 * pretend it was successfully written on the peer. */
 			if (req->rq_state & RQ_NET_PENDING) {
@@ -632,39 +577,48 @@
 		req->rq_state &= ~RQ_NET_QUEUED;
 		req->rq_state |= RQ_NET_SENT;
 		/* because _drbd_send_zc_bio could sleep, and may want to
-		 * dereference the bio even after the "write_acked_by_peer" and
-		 * "completed_ok" events came in, once we return from
+		 * dereference the bio even after the "WRITE_ACKED_BY_PEER" and
+		 * "COMPLETED_OK" events came in, once we return from
 		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
 		 * whether it is done already, and end it.  */
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 
-	case connection_lost_while_pending:
+	case READ_RETRY_REMOTE_CANCELED:
+		req->rq_state &= ~RQ_NET_QUEUED;
+		/* fall through, in case we raced with drbd_disconnect */
+	case CONNECTION_LOST_WHILE_PENDING:
 		/* transfer log cleanup after connection loss */
 		/* assert something? */
 		if (req->rq_state & RQ_NET_PENDING)
 			dec_ap_pending(mdev);
+
+		p = !(req->rq_state & RQ_WRITE) && req->rq_state & RQ_NET_PENDING;
+
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 		req->rq_state |= RQ_NET_DONE;
+		if (req->rq_state & RQ_NET_SENT && req->rq_state & RQ_WRITE)
+			atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
+
 		/* if it is still queued, we may not complete it here.
 		 * it will be canceled soon. */
-		if (!(req->rq_state & RQ_NET_QUEUED))
-			_req_may_be_done(req, m);
+		if (!(req->rq_state & RQ_NET_QUEUED)) {
+			if (p)
+				goto goto_read_retry_local;
+			_req_may_be_done(req, m); /* Allowed while state.susp */
+		}
 		break;
 
-	case write_acked_by_peer_and_sis:
+	case WRITE_ACKED_BY_PEER_AND_SIS:
 		req->rq_state |= RQ_NET_SIS;
-	case conflict_discarded_by_peer:
+	case DISCARD_WRITE:
 		/* for discarded conflicting writes of multiple primaries,
 		 * there is no need to keep anything in the tl, potential
 		 * node crashes are covered by the activity log. */
-		if (what == conflict_discarded_by_peer)
-			dev_alert(DEV, "Got DiscardAck packet %llus +%u!"
-			      " DRBD is not a random data generator!\n",
-			      (unsigned long long)req->sector, req->size);
 		req->rq_state |= RQ_NET_DONE;
 		/* fall through */
-	case write_acked_by_peer:
+	case WRITE_ACKED_BY_PEER:
+		D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
 		/* protocol C; successfully written on peer.
 		 * Nothing to do here.
 		 * We want to keep the tl in place for all protocols, to cater
@@ -675,51 +629,129 @@
 		 * request could set NET_DONE right here, and not wait for the
 		 * P_BARRIER_ACK, but that is an unnecessary optimization. */
 
+		goto ack_common;
 		/* this makes it effectively the same as for: */
-	case recv_acked_by_peer:
+	case RECV_ACKED_BY_PEER:
+		D_ASSERT(req->rq_state & RQ_EXP_RECEIVE_ACK);
 		/* protocol B; pretends to be successfully written on peer.
-		 * see also notes above in handed_over_to_network about
+		 * see also notes above in HANDED_OVER_TO_NETWORK about
 		 * protocol != C */
+	ack_common:
 		req->rq_state |= RQ_NET_OK;
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		dec_ap_pending(mdev);
+		atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
 		req->rq_state &= ~RQ_NET_PENDING;
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 
-	case neg_acked:
+	case POSTPONE_WRITE:
+		D_ASSERT(req->rq_state & RQ_EXP_WRITE_ACK);
+		/* If this node has already detected the write conflict, the
+		 * worker will be waiting on misc_wait.  Wake it up once this
+		 * request has completed locally.
+		 */
+		D_ASSERT(req->rq_state & RQ_NET_PENDING);
+		req->rq_state |= RQ_POSTPONED;
+		_req_may_be_done_not_susp(req, m);
+		break;
+
+	case NEG_ACKED:
 		/* assert something? */
-		if (req->rq_state & RQ_NET_PENDING)
+		if (req->rq_state & RQ_NET_PENDING) {
 			dec_ap_pending(mdev);
+			if (req->rq_state & RQ_WRITE)
+				atomic_sub(req->i.size >> 9, &mdev->ap_in_flight);
+		}
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 
 		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done(req, m);
-		/* else: done by handed_over_to_network */
+
+		if (!(req->rq_state & RQ_WRITE))
+			goto goto_read_retry_local;
+
+		_req_may_be_done_not_susp(req, m);
+		/* else: done by HANDED_OVER_TO_NETWORK */
+		break;
+
+	goto_read_retry_local:
+		if (!drbd_may_do_local_read(mdev, req->i.sector, req->i.size)) {
+			_req_may_be_done_not_susp(req, m);
+			break;
+		}
+		D_ASSERT(!(req->rq_state & RQ_LOCAL_PENDING));
+		req->rq_state |= RQ_LOCAL_PENDING;
+
+		get_ldev(mdev);
+		req->w.cb = w_restart_disk_io;
+		drbd_queue_work(&mdev->tconn->data.work, &req->w);
+		break;
+
+	case FAIL_FROZEN_DISK_IO:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		_req_may_be_done(req, m); /* Allowed while state.susp */
+		break;
+
+	case RESTART_FROZEN_DISK_IO:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		req->rq_state &= ~RQ_LOCAL_COMPLETED;
+
+		rv = MR_READ;
+		if (bio_data_dir(req->master_bio) == WRITE)
+			rv = MR_WRITE;
+
+		get_ldev(mdev);
+		req->w.cb = w_restart_disk_io;
+		drbd_queue_work(&mdev->tconn->data.work, &req->w);
 		break;
 
-	case barrier_acked:
+	case RESEND:
+		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
+		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
+		   We ensure that the peer was not rebooted */
+		if (!(req->rq_state & RQ_NET_OK)) {
+			if (req->w.cb) {
+				drbd_queue_work(&mdev->tconn->data.work, &req->w);
+				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+			}
+			break;
+		}
+		/* else, fall through to BARRIER_ACKED */
+
+	case BARRIER_ACKED:
+		if (!(req->rq_state & RQ_WRITE))
+			break;
+
 		if (req->rq_state & RQ_NET_PENDING) {
-			/* barrier came in before all requests have been acked.
+			/* barrier came in before all requests were acked.
 			 * this is bad, because if the connection is lost now,
 			 * we won't be able to clean them up... */
-			dev_err(DEV, "FIXME (barrier_acked but pending)\n");
-			trace_drbd_req(req, nothing, "FIXME (barrier_acked but pending)");
-			list_move(&req->tl_requests, &mdev->out_of_sequence_requests);
+			dev_err(DEV, "FIXME (BARRIER_ACKED but pending)\n");
+			list_move(&req->tl_requests, &mdev->tconn->out_of_sequence_requests);
 		}
-		D_ASSERT(req->rq_state & RQ_NET_SENT);
-		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done(req, m);
+		if ((req->rq_state & RQ_NET_MASK) != 0) {
+			req->rq_state |= RQ_NET_DONE;
+			if (!(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK)))
+				atomic_sub(req->i.size>>9, &mdev->ap_in_flight);
+		}
+		_req_may_be_done(req, m); /* Allowed while state.susp */
 		break;
 
-	case data_received:
+	case DATA_RECEIVED:
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		dec_ap_pending(mdev);
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 	};
+
+	return rv;
 }
 
 /* we may do a local read if:
@@ -729,39 +761,98 @@
  *   since size may be bigger than BM_BLOCK_SIZE,
  *   we may need to check several bits.
  */
-STATIC int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
+STATIC bool drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	unsigned long sbnr, ebnr;
 	sector_t esector, nr_sectors;
 
 	if (mdev->state.disk == D_UP_TO_DATE)
-		return 1;
-	if (mdev->state.disk >= D_OUTDATED)
-		return 0;
-	if (mdev->state.disk <  D_INCONSISTENT)
-		return 0;
-	/* state.disk == D_INCONSISTENT   We will have a look at the BitMap */
-	nr_sectors = drbd_get_capacity(mdev->this_bdev);
+		return true;
+	if (mdev->state.disk != D_INCONSISTENT)
+		return false;
 	esector = sector + (size >> 9) - 1;
-
+	nr_sectors = drbd_get_capacity(mdev->this_bdev);
 	D_ASSERT(sector  < nr_sectors);
 	D_ASSERT(esector < nr_sectors);
 
 	sbnr = BM_SECT_TO_BIT(sector);
 	ebnr = BM_SECT_TO_BIT(esector);
 
-	return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
+	return drbd_bm_count_bits(mdev, sbnr, ebnr) == 0;
+}
+
+static bool remote_due_to_read_balancing(struct drbd_conf *mdev, sector_t sector)
+{
+	enum drbd_read_balancing rbm;
+	struct backing_dev_info *bdi;
+	int stripe_shift;
+
+	if (mdev->state.pdsk < D_UP_TO_DATE)
+		return false;
+
+	rcu_read_lock();
+	rbm = rcu_dereference(mdev->ldev->disk_conf)->read_balancing;
+	rcu_read_unlock();
+
+	switch (rbm) {
+	case RB_CONGESTED_REMOTE:
+		bdi = &mdev->ldev->backing_bdev->bd_disk->queue->backing_dev_info;
+		return bdi_read_congested(bdi);
+	case RB_LEAST_PENDING:
+		return atomic_read(&mdev->local_cnt) >
+			atomic_read(&mdev->ap_pending_cnt) + atomic_read(&mdev->rs_pending_cnt);
+	case RB_32K_STRIPING:  /* stripe_shift = 15 */
+	case RB_64K_STRIPING:
+	case RB_128K_STRIPING:
+	case RB_256K_STRIPING:
+	case RB_512K_STRIPING:
+	case RB_1M_STRIPING:   /* stripe_shift = 20 */
+		stripe_shift = (rbm - RB_32K_STRIPING + 15);
+		return (sector >> (stripe_shift - 9)) & 1;
+	case RB_ROUND_ROBIN:
+		return test_and_change_bit(READ_BALANCE_RR, &mdev->flags);
+	case RB_PREFER_REMOTE:
+		return true;
+	case RB_PREFER_LOCAL:
+	default:
+		return false;
+	}
 }
 
-STATIC int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
+/*
+ * complete_conflicting_writes  -  wait for any conflicting write requests
+ *
+ * The write_requests tree contains all active write requests which we
+ * currently know about.  Wait for any requests to complete which conflict with
+ * the new one.
+ */
+static int complete_conflicting_writes(struct drbd_conf *mdev,
+				       sector_t sector, int size)
+{
+	for(;;) {
+		struct drbd_interval *i;
+		int err;
+
+		i = drbd_find_overlap(&mdev->write_requests, sector, size);
+		if (!i)
+			return 0;
+		err = drbd_wait_misc(mdev, i);
+		if (err)
+			return err;
+	}
+}
+
+int __drbd_make_request(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
 {
 	const int rw = bio_rw(bio);
 	const int size = bio->bi_size;
 	const sector_t sector = bio->bi_sector;
 	struct drbd_tl_epoch *b = NULL;
 	struct drbd_request *req;
-	int local, remote;
-	int err = -EIO;
+	struct net_conf *nc;
+	int local, remote, send_oos = 0;
+	int err;
+	int ret = 0;
 
 	/* allocate outside of all locks; */
 	req = drbd_req_new(mdev, bio);
@@ -773,8 +864,7 @@
 		bio_endio(bio, -ENOMEM);
 		return 0;
 	}
-
-	trace_drbd_bio(mdev, "Rq", bio, 0, req);
+	req->start_time = start_time;
 
 	local = get_ldev(mdev);
 	if (!local) {
@@ -786,7 +876,8 @@
 	} else {
 		/* READ || READA */
 		if (local) {
-			if (!drbd_may_do_local_read(mdev, sector, size)) {
+			if (!drbd_may_do_local_read(mdev, sector, size) ||
+			    remote_due_to_read_balancing(mdev, sector)) {
 				/* we could kick the syncer to
 				 * sync this extent asap, wait for
 				 * it, then continue locally.
@@ -819,15 +910,19 @@
 	 * resync extent to finish, and, if necessary, pulls in the target
 	 * extent into the activity log, which involves further disk io because
 	 * of transactional on-disk meta data updates. */
-	if (rw == WRITE && local)
-		drbd_al_begin_io(mdev, sector);
+	if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+		req->rq_state |= RQ_IN_ACT_LOG;
+		drbd_al_begin_io(mdev, &req->i);
+	}
 
-	remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
-			    (mdev->state.pdsk == D_INCONSISTENT &&
-			     mdev->state.conn >= C_CONNECTED));
+	remote = remote && drbd_should_do_remote(mdev->state);
+	send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state);
+	D_ASSERT(!(remote && send_oos));
 
-	if (!(local || remote)) {
-		dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+	if (!(local || remote) && !drbd_suspended(mdev)) {
+		if (DRBD_ratelimit(5*HZ, 3))
+			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
+		err = -EIO;
 		goto fail_free_complete;
 	}
 
@@ -837,9 +932,9 @@
 	 * but there is a race between testing the bit and pointer outside the
 	 * spinlock, and grabbing the spinlock.
 	 * if we lost that race, we retry.  */
-	if (rw == WRITE && remote &&
-	    mdev->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+	if (rw == WRITE && (remote || send_oos) &&
+	    mdev->tconn->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
 allocate_barrier:
 		b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_NOIO);
 		if (!b) {
@@ -850,31 +945,56 @@
 	}
 
 	/* GOOD, everything prepared, grab the spin_lock */
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 
-	if (remote) {
-		remote = (mdev->state.pdsk == D_UP_TO_DATE ||
-			    (mdev->state.pdsk == D_INCONSISTENT &&
-			     mdev->state.conn >= C_CONNECTED));
-		if (!remote)
+	if (rw == WRITE) {
+		err = complete_conflicting_writes(mdev, sector, size);
+		if (err) {
+			if (err != -ERESTARTSYS)
+				_conn_request_state(mdev->tconn,
+						    NS(conn, C_TIMEOUT),
+						    CS_HARD);
+			spin_unlock_irq(&mdev->tconn->req_lock);
+			err = -EIO;
+			goto fail_free_complete;
+		}
+	}
+
+	if (drbd_suspended(mdev)) {
+		/* If we got suspended, use the retry mechanism in
+		   drbd_make_request() to restart processing of this
+		   bio. In the next call to drbd_make_request
+		   we sleep in inc_ap_bio() */
+		ret = 1;
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		goto fail_free_complete;
+	}
+
+	if (remote || send_oos) {
+		remote = drbd_should_do_remote(mdev->state);
+		send_oos = rw == WRITE && drbd_should_send_out_of_sync(mdev->state);
+		D_ASSERT(!(remote && send_oos));
+
+		if (!(remote || send_oos))
 			dev_warn(DEV, "lost connection while grabbing the req_lock!\n");
 		if (!(local || remote)) {
 			dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
-			spin_unlock_irq(&mdev->req_lock);
+			spin_unlock_irq(&mdev->tconn->req_lock);
+			err = -EIO;
 			goto fail_free_complete;
 		}
 	}
 
-	if (b && mdev->unused_spare_tle == NULL) {
-		mdev->unused_spare_tle = b;
+	if (b && mdev->tconn->unused_spare_tle == NULL) {
+		mdev->tconn->unused_spare_tle = b;
 		b = NULL;
 	}
-	if (rw == WRITE && remote &&
-	    mdev->unused_spare_tle == NULL &&
-	    test_bit(CREATE_BARRIER, &mdev->flags)) {
+	if (rw == WRITE && (remote || send_oos) &&
+	    mdev->tconn->unused_spare_tle == NULL &&
+	    test_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
 		/* someone closed the current epoch
 		 * while we were grabbing the spinlock */
-		spin_unlock_irq(&mdev->req_lock);
+		spin_unlock_irq(&mdev->tconn->req_lock);
 		goto allocate_barrier;
 	}
 
@@ -892,13 +1012,13 @@
 	 * barrier packet.  To get the write ordering right, we only have to
 	 * make sure that, if this is a write request and it triggered a
 	 * barrier packet, this request is queued within the same spinlock. */
-	if (remote && mdev->unused_spare_tle &&
-	    test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
-		_tl_add_barrier(mdev, mdev->unused_spare_tle);
-		mdev->unused_spare_tle = NULL;
+	if ((remote || send_oos) && mdev->tconn->unused_spare_tle &&
+	    test_and_clear_bit(CREATE_BARRIER, &mdev->tconn->flags)) {
+		_tl_add_barrier(mdev->tconn, mdev->tconn->unused_spare_tle);
+		mdev->tconn->unused_spare_tle = NULL;
 	} else {
 		D_ASSERT(!(remote && rw == WRITE &&
-			   test_bit(CREATE_BARRIER, &mdev->flags)));
+			   test_bit(CREATE_BARRIER, &mdev->tconn->flags)));
 	}
 
 	/* NOTE
@@ -917,37 +1037,11 @@
 	/* mark them early for readability.
 	 * this just sets some state flags. */
 	if (remote)
-		_req_mod(req, to_be_send);
+		_req_mod(req, TO_BE_SENT);
 	if (local)
-		_req_mod(req, to_be_submitted);
+		_req_mod(req, TO_BE_SUBMITTED);
 
-	/* check this request on the collision detection hash tables.
-	 * if we have a conflict, just complete it here.
-	 * THINK do we want to check reads, too? (I don't think so...) */
-	if (rw == WRITE && _req_conflicts(req)) {
-		/* this is a conflicting request.
-		 * even though it may have been only _partially_
-		 * overlapping with one of the currently pending requests,
-		 * without even submitting or sending it, we will
-		 * pretend that it was successfully served right now.
-		 */
-		if (local) {
-			bio_put(req->private_bio);
-			req->private_bio = NULL;
-			drbd_al_complete_io(mdev, req->sector);
-			put_ldev(mdev);
-			local = 0;
-		}
-		if (remote)
-			dec_ap_pending(mdev);
-		_drbd_end_io_acct(mdev, req);
-		/* THINK: do we want to fail it (-EIO), or pretend success? */
-		bio_endio(req->master_bio, 0);
-		req->master_bio = NULL;
-		dec_ap_bio(mdev);
-		drbd_req_free(req);
-		remote = 0;
-	}
+	list_add_tail(&req->tl_requests, &mdev->tconn->newest_tle->requests);
 
 	/* NOTE remote first: to get the concurrent write detection right,
 	 * we must register the request before start of local IO.  */
@@ -957,189 +1051,124 @@
 		 * or READ, but not in sync.
 		 */
 		_req_mod(req, (rw == WRITE)
-				? queue_for_net_write
-				: queue_for_net_read);
+				? QUEUE_FOR_NET_WRITE
+				: QUEUE_FOR_NET_READ);
+	}
+	if (send_oos && drbd_set_out_of_sync(mdev, sector, size))
+		_req_mod(req, QUEUE_FOR_SEND_OOS);
+
+	rcu_read_lock();
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (remote &&
+	    nc->on_congestion != OC_BLOCK && mdev->tconn->agreed_pro_version >= 96) {
+		int congested = 0;
+
+		if (nc->cong_fill &&
+		    atomic_read(&mdev->ap_in_flight) >= nc->cong_fill) {
+			dev_info(DEV, "Congestion-fill threshold reached\n");
+			congested = 1;
+		}
+
+		if (mdev->act_log->used >= nc->cong_extents) {
+			dev_info(DEV, "Congestion-extents threshold reached\n");
+			congested = 1;
+		}
+
+		if (congested) {
+			queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+			if (nc->on_congestion == OC_PULL_AHEAD)
+				_drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+			else  /*nc->on_congestion == OC_DISCONNECT */
+				_drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+		}
 	}
-	spin_unlock_irq(&mdev->req_lock);
+	rcu_read_unlock();
+
+	spin_unlock_irq(&mdev->tconn->req_lock);
 	kfree(b); /* if someone else has beaten us to it... */
 
 	if (local) {
 		req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
 
-		trace_drbd_bio(mdev, "Pri", req->private_bio, 0, NULL);
-
-		if (FAULT_ACTIVE(mdev, rw == WRITE ? DRBD_FAULT_DT_WR
-				     : rw == READ  ? DRBD_FAULT_DT_RD
-				     :               DRBD_FAULT_DT_RA))
+		/* State may have changed since we grabbed our reference on the
+		 * mdev->ldev member. Double check, and short-circuit to endio.
+		 * In case the last activity log transaction failed to get on
+		 * stable storage, and this is a WRITE, we may not even submit
+		 * this bio. */
+		if (get_ldev(mdev)) {
+			if (drbd_insert_fault(mdev,   rw == WRITE ? DRBD_FAULT_DT_WR
+						    : rw == READ  ? DRBD_FAULT_DT_RD
+						    :               DRBD_FAULT_DT_RA))
+				bio_endio(req->private_bio, -EIO);
+			else
+				generic_make_request(req->private_bio);
+			put_ldev(mdev);
+		} else
 			bio_endio(req->private_bio, -EIO);
-		else
-			generic_make_request(req->private_bio);
 	}
 
-	/* we need to plug ALWAYS since we possibly need to kick lo_dev.
-	 * we plug after submit, so we won't miss an unplug event */
-	drbd_plug_device(mdev);
-
 	return 0;
 
 fail_free_complete:
-	if (rw == WRITE && local)
-		drbd_al_complete_io(mdev, sector);
+	if (req->rq_state & RQ_IN_ACT_LOG)
+		drbd_al_complete_io(mdev, &req->i);
 fail_and_free_req:
 	if (local) {
 		bio_put(req->private_bio);
 		req->private_bio = NULL;
 		put_ldev(mdev);
 	}
-	bio_endio(bio, err);
+	if (!ret)
+		bio_endio(bio, err);
+
 	drbd_req_free(req);
 	dec_ap_bio(mdev);
 	kfree(b);
 
-	return 0;
-}
-
-/* helper function for drbd_make_request
- * if we can determine just by the mdev (state) that this request will fail,
- * return 1
- * otherwise return 0
- */
-static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
-{
-	/* Unconfigured */
-	if (mdev->state.conn == C_DISCONNECTING &&
-	    mdev->state.disk == D_DISKLESS)
-		return 1;
-
-	if (mdev->state.role != R_PRIMARY &&
-		(!allow_oos || is_write)) {
-		if (DRBD_ratelimit(5*HZ, 5)) {
-			dev_err(DEV, "Process %s[%u] tried to %s; "
-			    "since we are not in Primary state, "
-			    "we cannot allow this\n",
-			    current->comm, current->pid,
-			    is_write ? "WRITE" : "READ");
-		}
-		return 1;
-	}
-
-	/*
-	 * Paranoia: we might have been primary, but sync target, or
-	 * even diskless, then lost the connection.
-	 * This should have been handled (panic? suspend?) somewhere
-	 * else. But maybe it was not, so check again here.
-	 * Caution: as long as we do not have a read/write lock on mdev,
-	 * to serialize state changes, this is racy, since we may lose
-	 * the connection *after* we test for the cstate.
-	 */
-	if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
-		if (DRBD_ratelimit(5*HZ, 5))
-			dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
-		return 1;
-	}
-
-	return 0;
+	return ret;
 }
 
-int drbd_make_request_26(struct request_queue *q, struct bio *bio)
+MAKE_REQUEST_TYPE drbd_make_request(struct request_queue *q, struct bio *bio)
 {
-	unsigned int s_enr, e_enr;
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
+	unsigned long start_time;
 
-	if (drbd_fail_request_early(mdev, bio_data_dir(bio) & WRITE)) {
-		bio_endio(bio, -EPERM);
-		return 0;
-	}
-
-	/* Reject barrier requests if we know the underlying device does
-	 * not support them.
-	 * XXX: Need to get this info from peer as well some how so we
-	 * XXX: reject if EITHER side/data/metadata area does not support them.
-	 *
-	 * because of those XXX, this is not yet enabled,
-	 * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
-	 */
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) {
-		/* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
+	/* We never supported BIO_RW_BARRIER.
+	 * We don't need to, anymore, either: starting with kernel 2.6.36,
+	 * we have REQ_FUA and REQ_FLUSH, which will be handled transparently
+	 * by the block layer. */
+	if (unlikely(bio->bi_rw & DRBD_REQ_HARDBARRIER)) {
 		bio_endio(bio, -EOPNOTSUPP);
-		return 0;
+		MAKE_REQUEST_RETURN;
 	}
 
+	start_time = jiffies;
+
 	/*
 	 * what we "blindly" assume:
 	 */
 	D_ASSERT(bio->bi_size > 0);
-	D_ASSERT((bio->bi_size & 0x1ff) == 0);
-	D_ASSERT(bio->bi_idx == 0);
+	D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
 
-	/* to make some things easier, force alignment of requests within the
-	 * granularity of our hash tables */
-	s_enr = bio->bi_sector >> HT_SHIFT;
-	e_enr = (bio->bi_sector+(bio->bi_size>>9)-1) >> HT_SHIFT;
-
-	if (likely(s_enr == e_enr)) {
-		inc_ap_bio(mdev, 1);
-		return drbd_make_request_common(mdev, bio);
-	}
-
-	/* can this bio be split generically?
-	 * Maybe add our own split-arbitrary-bios function. */
-	if (bio->bi_vcnt != 1 || bio->bi_idx != 0 || bio->bi_size > DRBD_MAX_SEGMENT_SIZE) {
-		/* rather error out here than BUG in bio_split */
-		dev_err(DEV, "bio would need to, but cannot, be split: "
-		    "(vcnt=%u,idx=%u,size=%u,sector=%llu)\n",
-		    bio->bi_vcnt, bio->bi_idx, bio->bi_size,
-		    (unsigned long long)bio->bi_sector);
-		bio_endio(bio, -EINVAL);
-	} else {
-		/* This bio crosses some boundary, so we have to split it. */
-		struct bio_pair *bp;
-		/* works for the "do not cross hash slot boundaries" case
-		 * e.g. sector 262269, size 4096
-		 * s_enr = 262269 >> 6 = 4097
-		 * e_enr = (262269+8-1) >> 6 = 4098
-		 * HT_SHIFT = 6
-		 * sps = 64, mask = 63
-		 * first_sectors = 64 - (262269 & 63) = 3
-		 */
-		const sector_t sect = bio->bi_sector;
-		const int sps = 1 << HT_SHIFT; /* sectors per slot */
-		const int mask = sps - 1;
-		const sector_t first_sectors = sps - (sect & mask);
-		bp = bio_split(bio,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-				bio_split_pool,
-#endif
-				first_sectors);
+	do {
+		inc_ap_bio(mdev);
+	} while (__drbd_make_request(mdev, bio, start_time));
 
-		/* we need to get a "reference count" (ap_bio_cnt)
-		 * to avoid races with the disconnect/reconnect/suspend code.
-		 * In case we need to split the bio here, we need to get two references
-		 * atomically, otherwise we might deadlock when trying to submit the
-		 * second one! */
-		inc_ap_bio(mdev, 2);
-
-		D_ASSERT(e_enr == s_enr + 1);
-
-		drbd_make_request_common(mdev, &bp->bio1);
-		drbd_make_request_common(mdev, &bp->bio2);
-		bio_pair_release(bp);
-	}
-	return 0;
+	MAKE_REQUEST_RETURN;
 }
 
-/* This is called by bio_add_page().  With this function we reduce
- * the number of BIOs that span over multiple DRBD_MAX_SEGMENT_SIZEs
- * units (was AL_EXTENTs).
+/* This is called by bio_add_page().
+ *
+ * q->max_hw_sectors and other global limits are already enforced there.
+ *
+ * We need to call down to our lower level device,
+ * in case it has special restrictions.
  *
- * we do the calculation within the lower 32bit of the byte offsets,
- * since we don't care for actual offset, but only check whether it
- * would cross "activity log extent" boundaries.
+ * We also may need to enforce configured max-bio-bvecs limits.
  *
  * As long as the BIO is empty we have to allow at least one bvec,
- * regardless of size and offset.  so the resulting bio may still
- * cross extent boundaries.  those are dealt with (bio_split) in
- * drbd_make_request_26.
+ * regardless of size and offset, so no need to ask lower levels.
  */
 int drbd_merge_bvec(struct request_queue *q,
 #ifdef HAVE_bvec_merge_data
@@ -1150,22 +1179,14 @@
 		struct bio_vec *bvec)
 {
 	struct drbd_conf *mdev = (struct drbd_conf *) q->queuedata;
-	unsigned int bio_offset =
-		(unsigned int)bvm->bi_sector << 9; /* 32 bit */
 	unsigned int bio_size = bvm->bi_size;
-	int limit, backing_limit;
+	int limit = DRBD_MAX_BIO_SIZE;
+	int backing_limit;
 
-	limit = DRBD_MAX_SEGMENT_SIZE
-	      - ((bio_offset & (DRBD_MAX_SEGMENT_SIZE-1)) + bio_size);
-	if (limit < 0)
-		limit = 0;
-	if (bio_size == 0) {
-		if (limit <= bvec->bv_len)
-			limit = bvec->bv_len;
-	} else if (limit && get_ldev(mdev)) {
+	if (bio_size && get_ldev(mdev)) {
 		struct request_queue * const b =
 			mdev->ldev->backing_bdev->bd_disk->queue;
-		if (b->merge_bvec_fn && mdev->ldev->dc.use_bmbv) {
+		if (b->merge_bvec_fn) {
 			backing_limit = b->merge_bvec_fn(b, bvm, bvec);
 			limit = min(limit, backing_limit);
 		}
@@ -1173,3 +1194,54 @@
 	}
 	return limit;
 }
+
+void request_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+	struct drbd_tconn *tconn = mdev->tconn;
+	struct drbd_request *req; /* oldest request */
+	struct list_head *le;
+	struct net_conf *nc;
+	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
+
+	rcu_read_lock();
+	nc = rcu_dereference(tconn->net_conf);
+	ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
+
+	if (get_ldev(mdev)) {
+		dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
+		put_ldev(mdev);
+	}
+	rcu_read_unlock();
+
+	et = min_not_zero(dt, ent);
+
+	if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
+		return; /* Recurring timer stopped */
+
+	spin_lock_irq(&tconn->req_lock);
+	le = &tconn->oldest_tle->requests;
+	if (list_empty(le)) {
+		spin_unlock_irq(&tconn->req_lock);
+		mod_timer(&mdev->request_timer, jiffies + et);
+		return;
+	}
+
+	le = le->prev;
+	req = list_entry(le, struct drbd_request, tl_requests);
+	if (ent && req->rq_state & RQ_NET_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + ent)) {
+			dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
+			_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
+		}
+	}
+	if (dt && req->rq_state & RQ_LOCAL_PENDING) {
+		if (time_is_before_eq_jiffies(req->start_time + dt)) {
+			dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
+			__drbd_chk_io_error(mdev, 1);
+		}
+	}
+	nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
+	spin_unlock_irq(&tconn->req_lock);
+	mod_timer(&mdev->request_timer, nt);
+}
diff -Nru drbd8-8.3.7/drbd/drbd_req.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_req.h
--- drbd8-8.3.7/drbd/drbd_req.h	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_req.h	2012-02-02 14:09:14.000000000 +0000
@@ -57,7 +57,7 @@
  *
  *  It may me handed over to the local disk subsystem.
  *  It may be completed by the local disk subsystem,
- *    either sucessfully or with io-error.
+ *    either successfully or with io-error.
  *  In case it is a READ request, and it failed locally,
  *    it may be retried remotely.
  *
@@ -77,33 +77,41 @@
  */
 
 enum drbd_req_event {
-	created,
-	to_be_send,
-	to_be_submitted,
+	CREATED,
+	TO_BE_SENT,
+	TO_BE_SUBMITTED,
 
 	/* XXX yes, now I am inconsistent...
-	 * these two are not "events" but "actions"
+	 * these are not "events" but "actions"
 	 * oh, well... */
-	queue_for_net_write,
-	queue_for_net_read,
-
-	send_canceled,
-	send_failed,
-	handed_over_to_network,
-	connection_lost_while_pending,
-	recv_acked_by_peer,
-	write_acked_by_peer,
-	write_acked_by_peer_and_sis, /* and set_in_sync */
-	conflict_discarded_by_peer,
-	neg_acked,
-	barrier_acked, /* in protocol A and B */
-	data_received, /* (remote read) */
-
-	read_completed_with_error,
-	read_ahead_completed_with_error,
-	write_completed_with_error,
-	completed_ok,
-	nothing, /* for tracing only */
+	QUEUE_FOR_NET_WRITE,
+	QUEUE_FOR_NET_READ,
+	QUEUE_FOR_SEND_OOS,
+
+	SEND_CANCELED,
+	SEND_FAILED,
+	HANDED_OVER_TO_NETWORK,
+	OOS_HANDED_TO_NETWORK,
+	CONNECTION_LOST_WHILE_PENDING,
+	READ_RETRY_REMOTE_CANCELED,
+	RECV_ACKED_BY_PEER,
+	WRITE_ACKED_BY_PEER,
+	WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
+	DISCARD_WRITE,
+	POSTPONE_WRITE,
+	NEG_ACKED,
+	BARRIER_ACKED, /* in protocol A and B */
+	DATA_RECEIVED, /* (remote read) */
+
+	READ_COMPLETED_WITH_ERROR,
+	READ_AHEAD_COMPLETED_WITH_ERROR,
+	WRITE_COMPLETED_WITH_ERROR,
+	ABORT_DISK_IO,
+	COMPLETED_OK,
+	RESEND,
+	FAIL_FROZEN_DISK_IO,
+	RESTART_FROZEN_DISK_IO,
+	NOTHING,
 };
 
 /* encoding of request states for now.  we don't actually need that many bits.
@@ -112,18 +120,21 @@
  * same time, so we should hold the request lock anyways.
  */
 enum drbd_req_state_bits {
-	/* 210
-	 * 000: no local possible
-	 * 001: to be submitted
+	/* 3210
+	 * 0000: no local possible
+	 * 0001: to be submitted
 	 *    UNUSED, we could map: 011: submitted, completion still pending
-	 * 110: completed ok
-	 * 010: completed with error
+	 * 0110: completed ok
+	 * 0010: completed with error
+	 * 1001: Aborted (before completion)
+	 * 1x10: Aborted and completed -> free
 	 */
 	__RQ_LOCAL_PENDING,
 	__RQ_LOCAL_COMPLETED,
 	__RQ_LOCAL_OK,
+	__RQ_LOCAL_ABORTED,
 
-	/* 76543
+	/* 87654
 	 * 00000: no network possible
 	 * 00001: to be send
 	 * 00011: to be send, on worker queue
@@ -132,8 +143,8 @@
 	 *        recv_ack (B) or implicit "ack" (A),
 	 *        still waiting for the barrier ack.
 	 *        master_bio may already be completed and invalidated.
-	 * 11100: write_acked (C),
-	 *        data_received (for remote read, any protocol)
+	 * 11100: write acked (C),
+	 *        data received (for remote read, any protocol)
 	 *        or finally the barrier ack has arrived (B,A)...
 	 *        request can be freed
 	 * 01100: neg-acked (write, protocol C)
@@ -182,13 +193,29 @@
 
 	/* keep this last, its for the RQ_NET_MASK */
 	__RQ_NET_MAX,
+
+	/* Set when this is a write, clear for a read */
+	__RQ_WRITE,
+
+	/* Should call drbd_al_complete_io() for this request... */
+	__RQ_IN_ACT_LOG,
+
+	/* The peer has sent a retry ACK */
+	__RQ_POSTPONED,
+
+	/* We expect a receive ACK (wire proto B) */
+	__RQ_EXP_RECEIVE_ACK,
+
+	/* We expect a write ACK (wite proto C) */
+	__RQ_EXP_WRITE_ACK,
 };
 
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
 #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
 #define RQ_LOCAL_OK        (1UL << __RQ_LOCAL_OK)
+#define RQ_LOCAL_ABORTED   (1UL << __RQ_LOCAL_ABORTED)
 
-#define RQ_LOCAL_MASK      ((RQ_LOCAL_OK << 1)-1) /* 0x07 */
+#define RQ_LOCAL_MASK      ((RQ_LOCAL_ABORTED << 1)-1)
 
 #define RQ_NET_PENDING     (1UL << __RQ_NET_PENDING)
 #define RQ_NET_QUEUED      (1UL << __RQ_NET_QUEUED)
@@ -200,85 +227,27 @@
 /* 0x1f8 */
 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
 
-/* epoch entries */
-static inline
-struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	BUG_ON(mdev->ee_hash_s == 0);
-	return mdev->ee_hash +
-		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
-}
-
-/* transfer log (drbd_request objects) */
-static inline
-struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	BUG_ON(mdev->tl_hash_s == 0);
-	return mdev->tl_hash +
-		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
-}
-
-/* application reads (drbd_request objects) */
-static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
-{
-	return mdev->app_reads_hash
-		+ ((unsigned int)(sector) % APP_R_HSIZE);
-}
-
-/* when we receive the answer for a read request,
- * verify that we actually know about it */
-static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
-	u64 id, sector_t sector)
-{
-	struct hlist_head *slot = ar_hash_slot(mdev, sector);
-	struct hlist_node *n;
-	struct drbd_request *req;
-
-	hlist_for_each_entry(req, n, slot, colision) {
-		if ((unsigned long)req == (unsigned long)id) {
-			D_ASSERT(req->sector == sector);
-			return req;
-		}
-	}
-	return NULL;
-}
+#define RQ_WRITE           (1UL << __RQ_WRITE)
+#define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
+#define RQ_POSTPONED	   (1UL << __RQ_POSTPONED)
+#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
+#define RQ_EXP_WRITE_ACK   (1UL << __RQ_EXP_WRITE_ACK)
+
+/* For waking up the frozen transfer log mod_req() has to return if the request
+   should be counted in the epoch object*/
+#define MR_WRITE       1
+#define MR_READ        2
 
-static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
-	struct bio *bio_src)
+static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
 {
 	struct bio *bio;
-	struct drbd_request *req =
-		mempool_alloc(drbd_request_mempool, GFP_NOIO);
-	if (likely(req)) {
-		bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
-
-		req->rq_state    = 0;
-		req->mdev        = mdev;
-		req->master_bio  = bio_src;
-		req->private_bio = bio;
-		req->epoch       = 0;
-		req->sector      = bio->bi_sector;
-		req->size        = bio->bi_size;
-		req->start_time  = jiffies;
-		INIT_HLIST_NODE(&req->colision);
-		INIT_LIST_HEAD(&req->tl_requests);
-		INIT_LIST_HEAD(&req->w.list);
-
-		bio->bi_private  = req;
-		bio->bi_end_io   = drbd_endio_pri;
-		bio->bi_next     = NULL;
-	}
-	return req;
-}
+	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
 
-static inline void drbd_req_free(struct drbd_request *req)
-{
-	mempool_free(req, drbd_request_mempool);
-}
+	req->private_bio = bio;
 
-static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
-{
-	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
+	bio->bi_private  = req;
+	bio->bi_end_io   = drbd_request_endio;
+	bio->bi_next     = NULL;
 }
 
 /* Short lived temporary struct on the stack.
@@ -291,36 +260,65 @@
 
 extern void _req_may_be_done(struct drbd_request *req,
 		struct bio_and_error *m);
-extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m);
 extern void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m);
+extern void request_timer_fn(unsigned long data);
+extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
+extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
 
 /* use this if you don't want to deal with calling complete_master_bio()
  * outside the spinlock, e.g. when walking some list on cleanup. */
-static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
+static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
 {
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
+	int rv;
 
 	/* __req_mod possibly frees req, do not touch req after that! */
-	__req_mod(req, what, &m);
+	rv = __req_mod(req, what, &m);
 	if (m.bio)
 		complete_master_bio(mdev, &m);
+
+	return rv;
 }
 
 /* completion of master bio is outside of spinlock.
- * If you need it irqsave, do it your self! */
-static inline void req_mod(struct drbd_request *req,
+ * If you need it irqsave, do it your self!
+ * Which means: don't use from bio endio callback. */
+static inline int req_mod(struct drbd_request *req,
 		enum drbd_req_event what)
 {
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
-	spin_lock_irq(&mdev->req_lock);
-	__req_mod(req, what, &m);
-	spin_unlock_irq(&mdev->req_lock);
+	int rv;
+
+	spin_lock_irq(&mdev->tconn->req_lock);
+	rv = __req_mod(req, what, &m);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
+
+	return rv;
+}
+
+static inline bool drbd_should_do_remote(union drbd_dev_state s)
+{
+	return s.pdsk == D_UP_TO_DATE ||
+		(s.pdsk >= D_INCONSISTENT &&
+		 s.conn >= C_WF_BITMAP_T &&
+		 s.conn < C_AHEAD);
+	/* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+	   states. */
 }
+static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
+{
+	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+	   since we enter state C_AHEAD only if proto >= 96 */
+}
+
 #endif
diff -Nru drbd8-8.3.7/drbd/drbd_state.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_state.c
--- drbd8-8.3.7/drbd/drbd_state.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_state.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,1762 @@
+/*
+   drbd_state.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
+   from Logicworks, Inc. for making SDP replication support possible.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/drbd_limits.h>
+#include "drbd_int.h"
+#include "drbd_req.h"
+
+/* in drbd_main.c */
+extern void tl_abort_disk_io(struct drbd_conf *mdev);
+
+struct after_state_chg_work {
+	struct drbd_work w;
+	union drbd_state os;
+	union drbd_state ns;
+	enum chg_state_flags flags;
+	struct completion *done;
+};
+
+enum sanitize_state_warnings {
+	NO_WARNING,
+	ABORTED_ONLINE_VERIFY,
+	ABORTED_RESYNC,
+	CONNECTION_LOST_NEGOTIATING,
+	IMPLICITLY_UPGRADED_DISK,
+	IMPLICITLY_UPGRADED_PDSK,
+};
+
+STATIC int w_after_state_ch(struct drbd_work *w, int unused);
+STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags);
+STATIC enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
+STATIC enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
+STATIC enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
+STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
+				       enum sanitize_state_warnings *warn);
+
+static inline bool is_susp(union drbd_state s)
+{
+        return s.susp || s.susp_nod || s.susp_fen;
+}
+
+bool conn_all_vols_unconf(struct drbd_tconn *tconn)
+{
+	struct drbd_conf *mdev;
+	bool rv = true;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		if (mdev->state.disk != D_DISKLESS ||
+		    mdev->state.conn != C_STANDALONE ||
+		    mdev->state.role != R_SECONDARY) {
+			rv = false;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return rv;
+}
+
+/* Unfortunately the states where not correctly ordered, when
+   they where defined. therefore can not use max_t() here. */
+static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_PRIMARY || role2 == R_PRIMARY)
+		return R_PRIMARY;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_UNKNOWN;
+}
+static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
+{
+	if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
+		return R_UNKNOWN;
+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
+		return R_SECONDARY;
+	return R_PRIMARY;
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
+{
+	enum drbd_role role = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		role = max_role(role, mdev->state.role);
+	rcu_read_unlock();
+
+	return role;
+}
+
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
+{
+	enum drbd_role peer = R_UNKNOWN;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		peer = max_role(peer, mdev->state.peer);
+	rcu_read_unlock();
+
+	return peer;
+}
+
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_MASK;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = min_t(enum drbd_disk_state, ds, mdev->state.disk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
+{
+	enum drbd_disk_state ds = D_DISKLESS;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
+	rcu_read_unlock();
+
+	return ds;
+}
+
+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn)
+{
+	enum drbd_conns conn = C_MASK;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
+		conn = min_t(enum drbd_conns, conn, mdev->state.conn);
+	rcu_read_unlock();
+
+	return conn;
+}
+
+/**
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
+ * @mdev:	DRBD device.
+ * @os:		old (current) state.
+ * @ns:		new (wanted) state.
+ */
+STATIC int cl_wide_st_chg(struct drbd_conf *mdev,
+			  union drbd_state os, union drbd_state ns)
+{
+	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
+		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
+		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
+		  (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))) ||
+		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
+		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) ||
+		(os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
+}
+
+static union drbd_state
+apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
+{
+	union drbd_state ns;
+	ns.i = (os.i & ~mask.i) | val.i;
+	return ns;
+}
+
+enum drbd_state_rv
+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
+		  union drbd_state mask, union drbd_state val)
+{
+	unsigned long flags;
+	union drbd_state ns;
+	enum drbd_state_rv rv;
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	ns = apply_mask_val(drbd_read_state(mdev), mask, val);
+	rv = _drbd_set_state(mdev, ns, f, NULL);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_force_state() - Impose a change which happens outside our control on our state
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ */
+void drbd_force_state(struct drbd_conf *mdev,
+	union drbd_state mask, union drbd_state val)
+{
+	drbd_change_state(mdev, CS_HARD, mask, val);
+}
+
+STATIC enum drbd_state_rv
+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
+	     union drbd_state val)
+{
+	union drbd_state os, ns;
+	unsigned long flags;
+	enum drbd_state_rv rv;
+
+	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	os = drbd_read_state(mdev);
+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+	rv = is_valid_transition(os, ns);
+	if (rv == SS_SUCCESS)
+		rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
+
+	if (!cl_wide_st_chg(mdev, os, ns))
+		rv = SS_CW_NO_NEED;
+	if (rv == SS_UNKNOWN_ERROR) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS) {
+			rv = is_valid_soft_transition(os, ns);
+			if (rv == SS_SUCCESS)
+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+		}
+	}
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	return rv;
+}
+
+/**
+ * drbd_req_state() - Perform an eventually cluster wide state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Should not be called directly, use drbd_request_state() or
+ * _drbd_request_state().
+ */
+STATIC enum drbd_state_rv
+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
+	       union drbd_state val, enum chg_state_flags f)
+{
+	struct completion done;
+	unsigned long flags;
+	union drbd_state os, ns;
+	enum drbd_state_rv rv;
+
+	init_completion(&done);
+
+	if (f & CS_SERIALIZE)
+		mutex_lock(mdev->state_mutex);
+
+	ns = val; /* assign debug info, if any */
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	os = drbd_read_state(mdev);
+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+	rv = is_valid_transition(os, ns);
+	if (rv < SS_SUCCESS) {
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+		goto abort;
+	}
+
+	if (cl_wide_st_chg(mdev, os, ns)) {
+		rv = is_valid_state(mdev, ns);
+		if (rv == SS_SUCCESS)
+			rv = is_valid_soft_transition(os, ns);
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		if (drbd_send_state_req(mdev, mask, val)) {
+			rv = SS_CW_FAILED_BY_PEER;
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+
+		wait_event(mdev->state_wait,
+			(rv = _req_st_cond(mdev, mask, val)));
+
+		if (rv < SS_SUCCESS) {
+			if (f & CS_VERBOSE)
+				print_st_err(mdev, os, ns, rv);
+			goto abort;
+		}
+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+		ns = apply_mask_val(drbd_read_state(mdev), mask, val);
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	} else {
+		rv = _drbd_set_state(mdev, ns, f, &done);
+	}
+
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+
+	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
+		D_ASSERT(current != mdev->tconn->worker.task);
+		wait_for_completion(&done);
+	}
+
+abort:
+	if (f & CS_SERIALIZE)
+		mutex_unlock(mdev->state_mutex);
+
+	return rv;
+}
+
+/**
+ * _drbd_request_state() - Request a state change (with flags)
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ * @f:		flags
+ *
+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
+ * flag, or when logging of failed state change requests is not desired.
+ */
+enum drbd_state_rv
+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
+		    union drbd_state val, enum chg_state_flags f)
+{
+	enum drbd_state_rv rv;
+
+	wait_event(mdev->state_wait,
+		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
+
+	return rv;
+}
+
+/* pretty print of drbd internal state */
+
+#define STATE_FMT	" %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n"
+#define STATE_ARGS(tag, s)		\
+		tag,			\
+		drbd_conn_str(s.conn),	\
+		drbd_role_str(s.role),	\
+		drbd_role_str(s.peer),	\
+		drbd_disk_str(s.disk),	\
+		drbd_disk_str(s.pdsk),	\
+		is_susp(s) ? 's' : 'r',	\
+		s.aftr_isp ? 'a' : '-',	\
+		s.peer_isp ? 'p' : '-',	\
+		s.user_isp ? 'u' : '-', \
+		s.susp_fen ? 'F' : '-', \
+		s.susp_nod ? 'N' : '-'
+
+void print_st(struct drbd_conf *mdev, const char *tag, union drbd_state s)
+{
+	dev_err(DEV, STATE_FMT, STATE_ARGS(tag, s));
+}
+
+
+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
+	          union drbd_state ns, enum drbd_state_rv err)
+{
+	if (err == SS_IN_TRANSIENT_STATE)
+		return;
+	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
+	print_st(mdev, " state", os);
+	print_st(mdev, "wanted", ns);
+}
+
+static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
+			       enum chg_state_flags flags)
+{
+	char *pbp;
+	pbp = pb;
+	*pbp = 0;
+
+	if (ns.role != os.role && flags & CS_DC_ROLE)
+		pbp += sprintf(pbp, "role( %s -> %s ) ",
+			       drbd_role_str(os.role),
+			       drbd_role_str(ns.role));
+	if (ns.peer != os.peer && flags & CS_DC_PEER)
+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
+			       drbd_role_str(os.peer),
+			       drbd_role_str(ns.peer));
+	if (ns.conn != os.conn && flags & CS_DC_CONN)
+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
+			       drbd_conn_str(os.conn),
+			       drbd_conn_str(ns.conn));
+	if (ns.disk != os.disk && flags & CS_DC_DISK)
+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
+			       drbd_disk_str(os.disk),
+			       drbd_disk_str(ns.disk));
+	if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
+			       drbd_disk_str(os.pdsk),
+			       drbd_disk_str(ns.pdsk));
+
+	return pbp - pb;
+}
+
+static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns,
+				 enum chg_state_flags flags)
+{
+	char pb[300];
+	char *pbp = pb;
+
+	pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK);
+
+	if (ns.aftr_isp != os.aftr_isp)
+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
+			       os.aftr_isp,
+			       ns.aftr_isp);
+	if (ns.peer_isp != os.peer_isp)
+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
+			       os.peer_isp,
+			       ns.peer_isp);
+	if (ns.user_isp != os.user_isp)
+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
+			       os.user_isp,
+			       ns.user_isp);
+
+	if (pbp != pb)
+		dev_info(DEV, "%s\n", pb);
+}
+
+static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns,
+				 enum chg_state_flags flags)
+{
+	char pb[300];
+	char *pbp = pb;
+
+	pbp += print_state_change(pbp, os, ns, flags);
+
+	if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP)
+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
+			       is_susp(os),
+			       is_susp(ns));
+
+	if (pbp != pb)
+		conn_info(tconn, "%s\n", pb);
+}
+
+
+/**
+ * is_valid_state() - Returns an SS_ error code if ns is not valid
+ * @mdev:	DRBD device.
+ * @ns:		State to consider.
+ */
+STATIC enum drbd_state_rv
+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
+{
+	/* See drbd_state_sw_errors in drbd_strings.c */
+
+	enum drbd_fencing_p fp;
+	enum drbd_state_rv rv = SS_SUCCESS;
+	struct net_conf *nc;
+
+	rcu_read_lock();
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+		put_ldev(mdev);
+	}
+
+	nc = rcu_dereference(mdev->tconn->net_conf);
+	if (nc) {
+		if (!nc->two_primaries && ns.role == R_PRIMARY) {
+			if (ns.peer == R_PRIMARY)
+				rv = SS_TWO_PRIMARIES;
+			else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
+				rv = SS_O_VOL_PEER_PRI;
+		}
+	}
+
+	if (rv <= 0)
+		/* already found a reason to abort */;
+	else if (ns.role == R_SECONDARY && mdev->open_cnt)
+		rv = SS_DEVICE_IN_USE;
+
+	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (fp >= FP_RESOURCE &&
+		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
+		rv = SS_PRIMARY_NOP;
+
+	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
+		rv = SS_NO_LOCAL_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
+		rv = SS_NO_REMOTE_DISK;
+
+	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
+		rv = SS_NO_UP_TO_DATE_DISK;
+
+	else if ((ns.conn == C_CONNECTED ||
+		  ns.conn == C_WF_BITMAP_S ||
+		  ns.conn == C_SYNC_SOURCE ||
+		  ns.conn == C_PAUSED_SYNC_S) &&
+		  ns.disk == D_OUTDATED)
+		rv = SS_CONNECTED_OUTDATES;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		 (nc->verify_alg[0] == 0))
+		rv = SS_NO_VERIFY_ALG;
+
+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+		  mdev->tconn->agreed_pro_version < 88)
+		rv = SS_NOT_SUPPORTED;
+
+	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
+		rv = SS_CONNECTED_OUTDATES;
+
+	rcu_read_unlock();
+
+	return rv;
+}
+
+/**
+ * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
+ * This function limits state transitions that may be declined by DRBD. I.e.
+ * user requests (aka soft transitions).
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+STATIC enum drbd_state_rv
+is_valid_soft_transition(union drbd_state os, union drbd_state ns)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+
+	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
+	    os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
+		rv = SS_ALREADY_STANDALONE;
+
+	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
+		rv = SS_NO_NET_CONFIG;
+
+	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
+		rv = SS_LOWER_THAN_OUTDATED;
+
+	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
+		rv = SS_IN_TRANSIENT_STATE;
+
+	/* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
+	   rv = SS_IN_TRANSIENT_STATE; */
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
+	    ns.conn != os.conn && os.conn > C_CONNECTED)
+		rv = SS_RESYNC_RUNNING;
+
+	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
+	    os.conn < C_CONNECTED)
+		rv = SS_NEED_CONNECTION;
+
+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
+	    && os.conn < C_WF_REPORT_PARAMS)
+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
+
+	return rv;
+}
+
+STATIC enum drbd_state_rv
+is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
+{
+	/* no change -> nothing to do, at least for the connection part */
+	if (oc == nc)
+		return SS_NOTHING_TO_DO;
+
+	/* disconnect of an unconfigured connection does not make sense */
+	if (oc == C_STANDALONE && nc == C_DISCONNECTING)
+		return SS_ALREADY_STANDALONE;
+
+	/* from C_STANDALONE, we start with C_UNCONNECTED */
+	if (oc == C_STANDALONE && nc != C_UNCONNECTED)
+		return SS_NEED_CONNECTION;
+
+	/* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
+	if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
+		return SS_IN_TRANSIENT_STATE;
+
+	/* After C_DISCONNECTING only C_STANDALONE may follow */
+	if (oc == C_DISCONNECTING && nc != C_STANDALONE)
+		return SS_IN_TRANSIENT_STATE;
+
+	return SS_SUCCESS;
+}
+
+
+/**
+ * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
+ * This limits hard state transitions. Hard state transitions are facts there are
+ * imposed on DRBD by the environment. E.g. disk broke or network broke down.
+ * But those hard state transitions are still not allowed to do everything.
+ * @ns:		new state.
+ * @os:		old state.
+ */
+STATIC enum drbd_state_rv
+is_valid_transition(union drbd_state os, union drbd_state ns)
+{
+	enum drbd_state_rv rv;
+
+	rv = is_valid_conn_transition(os.conn, ns.conn);
+
+	/* we cannot fail (again) if we already detached */
+	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
+		rv = SS_IS_DISKLESS;
+
+	return rv;
+}
+
+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
+{
+	static const char *msg_table[] = {
+		[NO_WARNING] = "",
+		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
+		[ABORTED_RESYNC] = "Resync aborted.",
+		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
+		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
+		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
+	};
+
+	if (warn != NO_WARNING)
+		dev_warn(DEV, "%s\n", msg_table[warn]);
+}
+
+/**
+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @warn_sync_abort:
+ *
+ * When we loose connection, we have to set the state of the peers disk (pdsk)
+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
+ */
+STATIC union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
+				       enum sanitize_state_warnings *warn)
+{
+	enum drbd_fencing_p fp;
+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
+
+	if (warn)
+		*warn = NO_WARNING;
+
+	fp = FP_DONT_CARE;
+	if (get_ldev(mdev)) {
+		rcu_read_lock();
+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
+		rcu_read_unlock();
+		put_ldev(mdev);
+	}
+
+	/* Implications from connection to peer and peer_isp */
+	if (ns.conn < C_CONNECTED) {
+		ns.peer_isp = 0;
+		ns.peer = R_UNKNOWN;
+		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
+			ns.pdsk = D_UNKNOWN;
+	}
+
+	/* Clear the aftr_isp when becoming unconfigured */
+	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
+		ns.aftr_isp = 0;
+
+	/* An implication of the disk states onto the connection state */
+	/* Abort resync if a disk fails/detaches */
+	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
+		if (warn)
+			*warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
+				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
+		ns.conn = C_CONNECTED;
+	}
+
+	/* Connection breaks down before we finished "Negotiating" */
+	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
+		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
+			ns.disk = mdev->new_state_tmp.disk;
+			ns.pdsk = mdev->new_state_tmp.pdsk;
+		} else {
+			if (warn)
+				*warn = CONNECTION_LOST_NEGOTIATING;
+			ns.disk = D_DISKLESS;
+			ns.pdsk = D_UNKNOWN;
+		}
+		put_ldev(mdev);
+	}
+
+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
+			ns.disk = D_UP_TO_DATE;
+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
+			ns.pdsk = D_UP_TO_DATE;
+	}
+
+	/* Implications of the connection stat on the disk states */
+	disk_min = D_DISKLESS;
+	disk_max = D_UP_TO_DATE;
+	pdsk_min = D_INCONSISTENT;
+	pdsk_max = D_UNKNOWN;
+	switch ((enum drbd_conns)ns.conn) {
+	case C_WF_BITMAP_T:
+	case C_PAUSED_SYNC_T:
+	case C_STARTING_SYNC_T:
+	case C_WF_SYNC_UUID:
+	case C_BEHIND:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_OUTDATED;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_VERIFY_S:
+	case C_VERIFY_T:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_CONNECTED:
+		disk_min = D_DISKLESS;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_DISKLESS;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_WF_BITMAP_S:
+	case C_PAUSED_SYNC_S:
+	case C_STARTING_SYNC_S:
+	case C_AHEAD:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
+		break;
+	case C_SYNC_TARGET:
+		disk_min = D_INCONSISTENT;
+		disk_max = D_INCONSISTENT;
+		pdsk_min = D_UP_TO_DATE;
+		pdsk_max = D_UP_TO_DATE;
+		break;
+	case C_SYNC_SOURCE:
+		disk_min = D_UP_TO_DATE;
+		disk_max = D_UP_TO_DATE;
+		pdsk_min = D_INCONSISTENT;
+		pdsk_max = D_INCONSISTENT;
+		break;
+	case C_STANDALONE:
+	case C_DISCONNECTING:
+	case C_UNCONNECTED:
+	case C_TIMEOUT:
+	case C_BROKEN_PIPE:
+	case C_NETWORK_FAILURE:
+	case C_PROTOCOL_ERROR:
+	case C_TEAR_DOWN:
+	case C_WF_CONNECTION:
+	case C_WF_REPORT_PARAMS:
+	case C_MASK:
+		break;
+	}
+	if (ns.disk > disk_max)
+		ns.disk = disk_max;
+
+	if (ns.disk < disk_min) {
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_DISK;
+		ns.disk = disk_min;
+	}
+	if (ns.pdsk > pdsk_max)
+		ns.pdsk = pdsk_max;
+
+	if (ns.pdsk < pdsk_min) {
+		if (warn)
+			*warn = IMPLICITLY_UPGRADED_PDSK;
+		ns.pdsk = pdsk_min;
+	}
+
+	if (fp == FP_STONITH &&
+	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
+		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
+
+	if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
+
+	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
+		if (ns.conn == C_SYNC_SOURCE)
+			ns.conn = C_PAUSED_SYNC_S;
+		if (ns.conn == C_SYNC_TARGET)
+			ns.conn = C_PAUSED_SYNC_T;
+	} else {
+		if (ns.conn == C_PAUSED_SYNC_S)
+			ns.conn = C_SYNC_SOURCE;
+		if (ns.conn == C_PAUSED_SYNC_T)
+			ns.conn = C_SYNC_TARGET;
+	}
+
+	return ns;
+}
+
+void drbd_resume_al(struct drbd_conf *mdev)
+{
+	if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
+		dev_info(DEV, "Resumed AL updates\n");
+}
+
+/* helper for __drbd_set_state */
+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
+{
+	if (mdev->tconn->agreed_pro_version < 90)
+		mdev->ov_start_sector = 0;
+	mdev->rs_total = drbd_bm_bits(mdev);
+	mdev->ov_position = 0;
+	if (cs == C_VERIFY_T) {
+		/* starting online verify from an arbitrary position
+		 * does not fit well into the existing protocol.
+		 * on C_VERIFY_T, we initialize ov_left and friends
+		 * implicitly in receive_DataRequest once the
+		 * first P_OV_REQUEST is received */
+		mdev->ov_start_sector = ~(sector_t)0;
+	} else {
+		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
+		if (bit >= mdev->rs_total) {
+			mdev->ov_start_sector =
+				BM_BIT_TO_SECT(mdev->rs_total - 1);
+			mdev->rs_total = 1;
+		} else
+			mdev->rs_total -= bit;
+		mdev->ov_position = mdev->ov_start_sector;
+	}
+	mdev->ov_left = mdev->rs_total;
+}
+
+/**
+ * __drbd_set_state() - Set a new DRBD state
+ * @mdev:	DRBD device.
+ * @ns:		new state.
+ * @flags:	Flags
+ * @done:	Optional completion, that will get completed after the after_state_ch() finished
+ *
+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ */
+enum drbd_state_rv
+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
+	         enum chg_state_flags flags, struct completion *done)
+{
+	union drbd_state os;
+	enum drbd_state_rv rv = SS_SUCCESS;
+	enum sanitize_state_warnings ssw;
+	struct after_state_chg_work *ascw;
+
+	os = drbd_read_state(mdev);
+
+	ns = sanitize_state(mdev, ns, &ssw);
+	if (ns.i == os.i)
+		return SS_NOTHING_TO_DO;
+
+	rv = is_valid_transition(os, ns);
+	if (rv < SS_SUCCESS)
+		return rv;
+
+	if (!(flags & CS_HARD)) {
+		/*  pre-state-change checks ; only look at ns  */
+		/* See drbd_state_sw_errors in drbd_strings.c */
+
+		rv = is_valid_state(mdev, ns);
+		if (rv < SS_SUCCESS) {
+			/* If the old state was illegal as well, then let
+			   this happen...*/
+
+			if (is_valid_state(mdev, os) == rv)
+				rv = is_valid_soft_transition(os, ns);
+		} else
+			rv = is_valid_soft_transition(os, ns);
+	}
+
+	if (rv < SS_SUCCESS) {
+		if (flags & CS_VERBOSE)
+			print_st_err(mdev, os, ns, rv);
+		return rv;
+	}
+
+	print_sanitize_warnings(mdev, ssw);
+
+	drbd_pr_state_change(mdev, os, ns, flags);
+
+	/* Display changes to the susp* flags that where caused by the call to
+	   sanitize_state(). Only display it here if we where not called from
+	   _conn_request_state() */
+	if (!(flags & CS_DC_SUSP))
+		conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP);
+
+	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
+	 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
+	 * drbd_ldev_destroy() won't happen before our corresponding
+	 * after_state_ch works run, where we put_ldev again. */
+	if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
+	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
+		atomic_inc(&mdev->local_cnt);
+
+	/* assignment inclusive debug info about what code path
+	 * initiated this state change. */
+	mdev->state.i = ns.i;
+	mdev->tconn->susp = ns.susp;
+	mdev->tconn->susp_nod = ns.susp_nod;
+	mdev->tconn->susp_fen = ns.susp_fen;
+
+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+		drbd_print_uuids(mdev, "attached to UUIDs");
+
+	wake_up(&mdev->misc_wait);
+	wake_up(&mdev->state_wait);
+	wake_up(&mdev->tconn->ping_wait);
+
+	/* aborted verify run. log the last position */
+	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
+	    ns.conn < C_CONNECTED) {
+		mdev->ov_start_sector =
+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
+		dev_info(DEV, "Online Verify reached sector %llu\n",
+			(unsigned long long)mdev->ov_start_sector);
+	}
+
+	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
+		dev_info(DEV, "Syncer continues.\n");
+		mdev->rs_paused += (long)jiffies
+				  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
+		if (ns.conn == C_SYNC_TARGET)
+			mod_timer(&mdev->resync_timer, jiffies);
+	}
+
+	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
+	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
+		dev_info(DEV, "Resync suspended\n");
+		mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
+	}
+
+	if (os.conn == C_CONNECTED &&
+	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
+		unsigned long now = jiffies;
+		int i;
+
+		set_ov_position(mdev, ns.conn);
+		mdev->rs_start = now;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->ov_last_oos_size = 0;
+		mdev->ov_last_oos_start = 0;
+
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = mdev->ov_left;
+			mdev->rs_mark_time[i] = now;
+		}
+
+		drbd_rs_controller_reset(mdev);
+
+		if (ns.conn == C_VERIFY_S) {
+			dev_info(DEV, "Starting Online Verify from sector %llu\n",
+					(unsigned long long)mdev->ov_position);
+			mod_timer(&mdev->resync_timer, jiffies);
+		}
+	}
+
+	if (get_ldev(mdev)) {
+		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
+						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
+						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
+
+		mdf &= ~MDF_AL_CLEAN;
+		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
+			mdf |= MDF_CRASHED_PRIMARY;
+		if (mdev->state.role == R_PRIMARY ||
+		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
+			mdf |= MDF_PRIMARY_IND;
+		if (mdev->state.conn > C_WF_REPORT_PARAMS)
+			mdf |= MDF_CONNECTED_IND;
+		if (mdev->state.disk > D_INCONSISTENT)
+			mdf |= MDF_CONSISTENT;
+		if (mdev->state.disk > D_OUTDATED)
+			mdf |= MDF_WAS_UP_TO_DATE;
+		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
+			mdf |= MDF_PEER_OUT_DATED;
+		if (mdf != mdev->ldev->md.flags) {
+			mdev->ldev->md.flags = mdf;
+			drbd_md_mark_dirty(mdev);
+		}
+		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
+			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
+		put_ldev(mdev);
+	}
+
+	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
+	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
+	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
+		set_bit(CONSIDER_RESYNC, &mdev->flags);
+
+	/* Receiver should clean up itself */
+	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
+
+	/* Now the receiver finished cleaning up itself, it should die */
+	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
+
+	/* Upon network failure, we need to restart the receiver. */
+	if (os.conn > C_WF_CONNECTION &&
+	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
+		drbd_thread_restart_nowait(&mdev->tconn->receiver);
+
+	/* Resume AL writing if we get a connection */
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+		drbd_resume_al(mdev);
+
+	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
+	if (ascw) {
+		ascw->os = os;
+		ascw->ns = ns;
+		ascw->flags = flags;
+		ascw->w.cb = w_after_state_ch;
+		ascw->w.mdev = mdev;
+		ascw->done = done;
+		drbd_queue_work(&mdev->tconn->data.work, &ascw->w);
+	} else {
+		dev_err(DEV, "Could not kmalloc an ascw\n");
+	}
+
+	return rv;
+}
+
+STATIC int w_after_state_ch(struct drbd_work *w, int unused)
+{
+	struct after_state_chg_work *ascw =
+		container_of(w, struct after_state_chg_work, w);
+	struct drbd_conf *mdev = w->mdev;
+
+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
+	if (ascw->flags & CS_WAIT_COMPLETE) {
+		D_ASSERT(ascw->done != NULL);
+		complete(ascw->done);
+	}
+	kfree(ascw);
+
+	return 0;
+}
+
+static void abw_start_sync(struct drbd_conf *mdev, int rv)
+{
+	if (rv) {
+		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
+		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
+		return;
+	}
+
+	switch (mdev->state.conn) {
+	case C_STARTING_SYNC_T:
+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
+		break;
+	case C_STARTING_SYNC_S:
+		drbd_start_resync(mdev, C_SYNC_SOURCE);
+		break;
+	}
+}
+
+static int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+		int (*io_fn)(struct drbd_conf *),
+		char *why, enum bm_flag flags)
+{
+	int rv;
+
+	D_ASSERT(current == mdev->tconn->worker.task);
+
+	/* open coded non-blocking drbd_suspend_io(mdev); */
+	set_bit(SUSPEND_IO, &mdev->flags);
+
+	drbd_bm_lock(mdev, why, flags);
+	rv = io_fn(mdev);
+	drbd_bm_unlock(mdev);
+
+	drbd_resume_io(mdev);
+
+	return rv;
+}
+
+/**
+ * after_state_ch() - Perform after state change actions that may sleep
+ * @mdev:	DRBD device.
+ * @os:		old state.
+ * @ns:		new state.
+ * @flags:	Flags
+ */
+STATIC void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
+			   union drbd_state ns, enum chg_state_flags flags)
+{
+	struct sib_info sib;
+
+	sib.sib_reason = SIB_STATE_CHANGE;
+	sib.os = os;
+	sib.ns = ns;
+
+	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
+		if (mdev->p_uuid)
+			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
+	}
+
+	/* Inform userspace about the change... */
+	drbd_bcast_event(mdev, &sib);
+
+	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
+		drbd_khelper(mdev, "pri-on-incon-degr");
+
+	/* Here we have the actions that are performed after a
+	   state change. This function might sleep */
+
+	if (ns.susp_nod) {
+		enum drbd_req_event what = NOTHING;
+
+		if (os.conn < C_CONNECTED && conn_lowest_conn(mdev->tconn) >= C_CONNECTED)
+			what = RESEND;
+
+		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
+		    conn_lowest_disk(mdev->tconn) > D_NEGOTIATING)
+			what = RESTART_FROZEN_DISK_IO;
+
+		if (what != NOTHING) {
+			spin_lock_irq(&mdev->tconn->req_lock);
+			_tl_restart(mdev->tconn, what);
+			_drbd_set_state(_NS(mdev, susp_nod, 0), CS_VERBOSE, NULL);
+			spin_unlock_irq(&mdev->tconn->req_lock);
+		}
+	}
+
+	/* Became sync source.  With protocol >= 96, we still need to send out
+	 * the sync uuid now. Need to do that before any drbd_send_state, or
+	 * the other side may go "paused sync" before receiving the sync uuids,
+	 * which is unexpected. */
+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+	    mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
+		drbd_gen_and_send_sync_uuid(mdev);
+		put_ldev(mdev);
+	}
+
+	/* Do not change the order of the if above and the two below... */
+	if (os.pdsk == D_DISKLESS &&
+	    ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) {      /* attach on the peer */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev, ns);
+	}
+	/* No point in queuing send_bitmap if we don't have a connection
+	 * anymore, so check also the _current_ state, not only the new state
+	 * at the time this work was queued. */
+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+	    mdev->state.conn == C_WF_BITMAP_S)
+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+				"send_bitmap (WFBitMapS)",
+				BM_LOCKED_TEST_ALLOWED);
+
+	/* Lost contact to peer's copy of the data */
+	if ((os.pdsk >= D_INCONSISTENT &&
+	     os.pdsk != D_UNKNOWN &&
+	     os.pdsk != D_OUTDATED)
+	&&  (ns.pdsk < D_INCONSISTENT ||
+	     ns.pdsk == D_UNKNOWN ||
+	     ns.pdsk == D_OUTDATED)) {
+		if (get_ldev(mdev)) {
+			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
+			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+				if (drbd_suspended(mdev)) {
+					set_bit(NEW_CUR_UUID, &mdev->flags);
+				} else {
+					drbd_uuid_new_current(mdev);
+					drbd_send_uuids(mdev);
+				}
+			}
+			put_ldev(mdev);
+		}
+	}
+
+	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
+		    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
+			drbd_uuid_new_current(mdev);
+			drbd_send_uuids(mdev);
+		}
+		/* D_DISKLESS Peer becomes secondary */
+		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
+			/* We may still be Primary ourselves.
+			 * No harm done if the bitmap still changes,
+			 * redirtied pages will follow later. */
+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Write out all changed bits on demote.
+	 * Though, no need to da that just yet
+	 * if there is a resync going on still */
+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+		/* No changes to the bitmap expected this time, so assert that,
+		 * even though no harm was done if it did change. */
+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+				"demote", BM_LOCKED_TEST_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	/* Last part of the attaching process ... */
+	if (ns.conn >= C_CONNECTED &&
+	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
+		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
+		drbd_send_uuids(mdev);
+		drbd_send_state(mdev, ns);
+	}
+
+	/* We want to pause/continue resync, tell peer. */
+	if (ns.conn >= C_CONNECTED &&
+	     ((os.aftr_isp != ns.aftr_isp) ||
+	      (os.user_isp != ns.user_isp)))
+		drbd_send_state(mdev, ns);
+
+	/* In case one of the isp bits got set, suspend other devices. */
+	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
+	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
+		suspend_other_sg(mdev);
+
+	/* Make sure the peer gets informed about eventual state
+	   changes (ISP bits) while we were in WFReportParams. */
+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
+		drbd_send_state(mdev, ns);
+
+	/* We are in the progress to start a full sync... */
+	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
+	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
+		/* no other bitmap changes expected during this phase */
+		drbd_queue_bitmap_io(mdev,
+			&drbd_bmio_set_n_write, &abw_start_sync,
+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
+
+	/* We are invalidating our self... */
+	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
+	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
+		/* other bitmap operation expected during this phase */
+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+			"set_n_write from invalidate", BM_LOCKED_MASK);
+
+	/* first half of local IO error, failure to attach,
+	 * or administrative detach */
+	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
+		enum drbd_io_error_p eh;
+		int was_io_error;
+		/* corresponding get_ldev was in __drbd_set_state, to serialize
+		 * our cleanup here with the transition to D_DISKLESS,
+		 * so it is safe to dreference ldev here. */
+		rcu_read_lock();
+		eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
+		rcu_read_unlock();
+		was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
+
+		/* Immediately allow completion of all application IO, that waits
+		   for completion from the local disk. */
+		tl_abort_disk_io(mdev);
+
+		/* current state still has to be D_FAILED,
+		 * there is only one way out: to D_DISKLESS,
+		 * and that may only happen after our put_ldev below. */
+		if (mdev->state.disk != D_FAILED)
+			dev_err(DEV,
+				"ASSERT FAILED: disk is %s during detach\n",
+				drbd_disk_str(mdev->state.disk));
+
+		if (ns.conn >= C_CONNECTED)
+			drbd_send_state(mdev, ns);
+
+		drbd_rs_cancel_all(mdev);
+
+		/* In case we want to get something to stable storage still,
+		 * this may be the last chance.
+		 * Following put_ldev may transition to D_DISKLESS. */
+		drbd_md_sync(mdev);
+		put_ldev(mdev);
+
+		if (was_io_error && eh == EP_CALL_HELPER)
+			drbd_khelper(mdev, "local-io-error");
+	}
+
+        /* second half of local IO error, failure to attach,
+         * or administrative detach,
+         * after local_cnt references have reached zero again */
+        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
+                /* We must still be diskless,
+                 * re-attach has to be serialized with this! */
+                if (mdev->state.disk != D_DISKLESS)
+                        dev_err(DEV,
+                                "ASSERT FAILED: disk is %s while going diskless\n",
+                                drbd_disk_str(mdev->state.disk));
+
+                mdev->rs_total = 0;
+                mdev->rs_failed = 0;
+                atomic_set(&mdev->rs_pending_cnt, 0);
+
+		if (ns.conn >= C_CONNECTED)
+			drbd_send_state(mdev, ns);
+		/* corresponding get_ldev in __drbd_set_state
+		 * this may finaly trigger drbd_ldev_destroy. */
+		put_ldev(mdev);
+	}
+
+	/* Notify peer that I had a local IO error and did not detach. */
+	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	/* Disks got bigger while they were detached */
+	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
+	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
+		if (ns.conn == C_CONNECTED)
+			resync_after_online_grow(mdev);
+	}
+
+	/* A resync finished or aborted, wake paused devices... */
+	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
+	    (os.peer_isp && !ns.peer_isp) ||
+	    (os.user_isp && !ns.user_isp))
+		resume_next_sg(mdev);
+
+	/* sync target done with resync.  Explicitly notify peer, even though
+	 * it should (at least for non-empty resyncs) already know itself. */
+	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
+		drbd_send_state(mdev, ns);
+
+	/* This triggers bitmap writeout of potentially still unwritten pages
+	 * if the resync finished cleanly, or aborted because of peer disk
+	 * failure, or because of connection loss.
+	 * For resync aborted because of local disk failure, we cannot do
+	 * any bitmap writeout anymore.
+	 * No harm done if some bits change during this phase.
+	 */
+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+			"write from resync_finished", BM_LOCKED_SET_ALLOWED);
+		put_ldev(mdev);
+	}
+
+	if (ns.disk == D_DISKLESS &&
+	    ns.conn == C_STANDALONE &&
+	    ns.role == R_SECONDARY) {
+		if (os.aftr_isp != ns.aftr_isp)
+			resume_next_sg(mdev);
+	}
+
+	drbd_md_sync(mdev);
+}
+
+struct after_conn_state_chg_work {
+	struct drbd_work w;
+	enum drbd_conns oc;
+	union drbd_state ns_min;
+	union drbd_state ns_max; /* new, max state, over all mdevs */
+	enum chg_state_flags flags;
+};
+
+STATIC int w_after_conn_state_ch(struct drbd_work *w, int unused)
+{
+	struct after_conn_state_chg_work *acscw =
+		container_of(w, struct after_conn_state_chg_work, w);
+	struct drbd_tconn *tconn = w->tconn;
+	enum drbd_conns oc = acscw->oc;
+	union drbd_state ns_max = acscw->ns_max;
+	union drbd_state ns_min = acscw->ns_min;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	kfree(acscw);
+
+	/* Upon network configuration, we need to start the receiver */
+	if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
+		drbd_thread_start(&tconn->receiver);
+
+	if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
+		struct net_conf *old_conf;
+
+		mutex_lock(&tconn->conf_update);
+		old_conf = tconn->net_conf;
+		tconn->my_addr_len = 0;
+		tconn->peer_addr_len = 0;
+		rcu_assign_pointer(tconn->net_conf, NULL);
+		conn_free_crypto(tconn);
+		mutex_unlock(&tconn->conf_update);
+
+		synchronize_rcu();
+		kfree(old_conf);
+	}
+
+	if (ns_max.susp_fen) {
+		/* case1: The outdate peer handler is successful: */
+		if (ns_max.pdsk <= D_OUTDATED) {
+			tl_clear(tconn);
+			rcu_read_lock();
+			idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+				if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+					drbd_uuid_new_current(mdev);
+					clear_bit(NEW_CUR_UUID, &mdev->flags);
+				}
+			}
+			rcu_read_unlock();
+			conn_request_state(tconn,
+					   (union drbd_state) { { .susp_fen = 1 } },
+					   (union drbd_state) { { .susp_fen = 0 } },
+					   CS_VERBOSE);
+		}
+		/* case2: The connection was established again: */
+		if (ns_min.conn >= C_CONNECTED) {
+			rcu_read_lock();
+			idr_for_each_entry(&tconn->volumes, mdev, vnr)
+				clear_bit(NEW_CUR_UUID, &mdev->flags);
+			rcu_read_unlock();
+			spin_lock_irq(&tconn->req_lock);
+			_tl_restart(tconn, RESEND);
+			_conn_request_state(tconn,
+					    (union drbd_state) { { .susp_fen = 1 } },
+					    (union drbd_state) { { .susp_fen = 0 } },
+					    CS_VERBOSE);
+			spin_unlock_irq(&tconn->req_lock);
+		}
+	}
+	kref_put(&tconn->kref, &conn_destroy);
+	return 0;
+}
+
+void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf)
+{
+	enum chg_state_flags flags = ~0;
+	struct drbd_conf *mdev;
+	int vnr, first_vol = 1;
+	union drbd_dev_state os, cs = {
+		{ .role = R_SECONDARY,
+		  .peer = R_UNKNOWN,
+		  .conn = tconn->cstate,
+		  .disk = D_DISKLESS,
+		  .pdsk = D_UNKNOWN,
+		} };
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		os = mdev->state;
+
+		if (first_vol) {
+			cs = os;
+			first_vol = 0;
+			continue;
+		}
+
+		if (cs.role != os.role)
+			flags &= ~CS_DC_ROLE;
+
+		if (cs.peer != os.peer)
+			flags &= ~CS_DC_PEER;
+
+		if (cs.conn != os.conn)
+			flags &= ~CS_DC_CONN;
+
+		if (cs.disk != os.disk)
+			flags &= ~CS_DC_DISK;
+
+		if (cs.pdsk != os.pdsk)
+			flags &= ~CS_DC_PDSK;
+	}
+	rcu_read_unlock();
+
+	*pf |= CS_DC_MASK;
+	*pf &= flags;
+	(*pcs).i = cs.i;
+}
+
+static enum drbd_state_rv
+conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+			 enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+	union drbd_state ns, os;
+	struct drbd_conf *mdev;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		os = drbd_read_state(mdev);
+		ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
+
+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
+			ns.disk = os.disk;
+
+		if (ns.i == os.i)
+			continue;
+
+		rv = is_valid_transition(os, ns);
+		if (rv < SS_SUCCESS)
+			break;
+
+		if (!(flags & CS_HARD)) {
+			rv = is_valid_state(mdev, ns);
+			if (rv < SS_SUCCESS) {
+				if (is_valid_state(mdev, os) == rv)
+					rv = is_valid_soft_transition(os, ns);
+			} else
+				rv = is_valid_soft_transition(os, ns);
+		}
+		if (rv < SS_SUCCESS)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (rv < SS_SUCCESS && flags & CS_VERBOSE)
+		print_st_err(mdev, os, ns, rv);
+
+	return rv;
+}
+
+void
+conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+	       union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
+{
+	union drbd_state ns, os, ns_max = { };
+	union drbd_state ns_min = {
+		{ .role = R_MASK,
+		  .peer = R_MASK,
+		  .conn = val.conn,
+		  .disk = D_MASK,
+		  .pdsk = D_MASK
+		} };
+	struct drbd_conf *mdev;
+	enum drbd_state_rv rv;
+	int vnr, number_of_volumes = 0;
+
+	if (mask.conn == C_MASK)
+		tconn->cstate = val.conn;
+
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		number_of_volumes++;
+		os = drbd_read_state(mdev);
+		ns = apply_mask_val(os, mask, val);
+		ns = sanitize_state(mdev, ns, NULL);
+
+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
+			ns.disk = os.disk;
+
+		rv = __drbd_set_state(mdev, ns, flags, NULL);
+		if (rv < SS_SUCCESS)
+			BUG();
+
+		ns.i = mdev->state.i;
+		ns_max.role = max_role(ns.role, ns_max.role);
+		ns_max.peer = max_role(ns.peer, ns_max.peer);
+		ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
+		ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
+		ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
+
+		ns_min.role = min_role(ns.role, ns_min.role);
+		ns_min.peer = min_role(ns.peer, ns_min.peer);
+		ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
+		ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
+		ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
+	}
+	rcu_read_unlock();
+
+	if (number_of_volumes == 0) {
+		ns_min = ns_max = (union drbd_state) { {
+				.role = R_SECONDARY,
+				.peer = R_UNKNOWN,
+				.conn = val.conn,
+				.disk = D_DISKLESS,
+				.pdsk = D_UNKNOWN
+			} };
+	}
+
+	ns_min.susp = ns_max.susp = tconn->susp;
+	ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod;
+	ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen;
+
+	*pns_min = ns_min;
+	*pns_max = ns_max;
+}
+
+static enum drbd_state_rv
+_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
+{
+	enum drbd_state_rv rv;
+
+	if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags))
+		return SS_CW_SUCCESS;
+
+	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
+		return SS_CW_FAILED_BY_PEER;
+
+	spin_lock_irq(&tconn->req_lock);
+	rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
+
+	if (rv == SS_UNKNOWN_ERROR)
+		rv = conn_is_valid_transition(tconn, mask, val, 0);
+
+	if (rv == SS_SUCCESS)
+		rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
+
+	spin_unlock_irq(&tconn->req_lock);
+
+	return rv;
+}
+
+static enum drbd_state_rv
+conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+	     enum chg_state_flags f)
+{
+	enum drbd_state_rv rv;
+
+	spin_unlock_irq(&tconn->req_lock);
+	mutex_lock(&tconn->cstate_mutex);
+
+	if (conn_send_state_req(tconn, mask, val)) {
+		rv = SS_CW_FAILED_BY_PEER;
+		/* if (f & CS_VERBOSE)
+		   print_st_err(mdev, os, ns, rv); */
+		goto abort;
+	}
+
+	wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val)));
+
+abort:
+	mutex_unlock(&tconn->cstate_mutex);
+	spin_lock_irq(&tconn->req_lock);
+
+	return rv;
+}
+
+enum drbd_state_rv
+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		    enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv = SS_SUCCESS;
+	struct after_conn_state_chg_work *acscw;
+	enum drbd_conns oc = tconn->cstate;
+	union drbd_state ns_max, ns_min, os;
+
+	rv = is_valid_conn_transition(oc, val.conn);
+	if (rv < SS_SUCCESS)
+		goto abort;
+
+	rv = conn_is_valid_transition(tconn, mask, val, flags);
+	if (rv < SS_SUCCESS)
+		goto abort;
+
+	if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
+	    !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
+		rv = conn_cl_wide(tconn, mask, val, flags);
+		if (rv < SS_SUCCESS)
+			goto abort;
+	}
+
+	conn_old_common_state(tconn, &os, &flags);
+	flags |= CS_DC_SUSP;
+	conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags);
+	conn_pr_state_change(tconn, os, ns_max, flags);
+
+	acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
+	if (acscw) {
+		acscw->oc = os.conn;
+		acscw->ns_min = ns_min;
+		acscw->ns_max = ns_max;
+		acscw->flags = flags;
+		acscw->w.cb = w_after_conn_state_ch;
+		kref_get(&tconn->kref);
+		acscw->w.tconn = tconn;
+		drbd_queue_work(&tconn->data.work, &acscw->w);
+	} else {
+		conn_err(tconn, "Could not kmalloc an acscw\n");
+	}
+
+	return rv;
+ abort:
+	if (flags & CS_VERBOSE) {
+		conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv));
+		conn_err(tconn, " state = { cs:%s }\n", drbd_conn_str(oc));
+		conn_err(tconn, "wanted = { cs:%s }\n", drbd_conn_str(val.conn));
+	}
+	return rv;
+}
+
+enum drbd_state_rv
+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		   enum chg_state_flags flags)
+{
+	enum drbd_state_rv rv;
+
+	spin_lock_irq(&tconn->req_lock);
+	rv = _conn_request_state(tconn, mask, val, flags);
+	spin_unlock_irq(&tconn->req_lock);
+
+	return rv;
+}
diff -Nru drbd8-8.3.7/drbd/drbd_state.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_state.h
--- drbd8-8.3.7/drbd/drbd_state.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_state.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,162 @@
+#ifndef DRBD_STATE_H
+#define DRBD_STATE_H
+
+struct drbd_conf;
+struct drbd_tconn;
+
+/**
+ * DOC: DRBD State macros
+ *
+ * These macros are used to express state changes in easily readable form.
+ *
+ * The NS macros expand to a mask and a value, that can be bit ored onto the
+ * current state as soon as the spinlock (req_lock) was taken.
+ *
+ * The _NS macros are used for state functions that get called with the
+ * spinlock. These macros expand directly to the new state value.
+ *
+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
+ * to express state changes that affect more than one aspect of the state.
+ *
+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
+ * Means that the network connection was established and that the peer
+ * is in secondary role.
+ */
+#define role_MASK R_MASK
+#define peer_MASK R_MASK
+#define disk_MASK D_MASK
+#define pdsk_MASK D_MASK
+#define conn_MASK C_MASK
+#define susp_MASK 1
+#define user_isp_MASK 1
+#define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
+
+#define NS(T, S) \
+	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T = (S); val; })
+#define NS2(T1, S1, T2, S2) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val; })
+#define NS3(T1, S1, T2, S2, T3, S3) \
+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
+	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
+	  val.T2 = (S2); val.T3 = (S3); val; })
+
+#define _NS(D, T, S) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
+#define _NS2(D, T1, S1, T2, S2) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns; })
+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
+	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
+
+
+enum chg_state_flags {
+	CS_HARD	         = 1 << 0,
+	CS_VERBOSE       = 1 << 1,
+	CS_WAIT_COMPLETE = 1 << 2,
+	CS_SERIALIZE     = 1 << 3,
+	CS_ORDERED       = CS_WAIT_COMPLETE + CS_SERIALIZE,
+	CS_LOCAL_ONLY    = 1 << 4, /* Do not consider a device pair wide state change */
+	CS_DC_ROLE       = 1 << 5, /* DC = display as connection state change */
+	CS_DC_PEER       = 1 << 6,
+	CS_DC_CONN       = 1 << 7,
+	CS_DC_DISK       = 1 << 8,
+	CS_DC_PDSK       = 1 << 9,
+	CS_DC_SUSP       = 1 << 10,
+	CS_DC_MASK       = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
+	CS_IGN_OUTD_FAIL = 1 << 11,
+};
+
+/* drbd_dev_state and drbd_state are different types. This is to stress the
+   small difference. There is no suspended flag (.susp), and no suspended
+   while fence handler runs flas (susp_fen). */
+union drbd_dev_state {
+	struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned _unused:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned peer_isp:1 ;
+		unsigned user_isp:1 ;
+		unsigned _pad:11;   /* 0	 unused */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+		unsigned _pad:11;
+		unsigned user_isp:1 ;
+		unsigned peer_isp:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned _unused:1 ;
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+#else
+# error "this endianess is not supported"
+#endif
+	};
+	unsigned int i;
+};
+
+extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
+					    enum chg_state_flags f,
+					    union drbd_state mask,
+					    union drbd_state val);
+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
+			union drbd_state);
+extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
+					      union drbd_state,
+					      union drbd_state,
+					      enum chg_state_flags);
+extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
+					   enum chg_state_flags,
+					   struct completion *done);
+extern void print_st_err(struct drbd_conf *, union drbd_state,
+			union drbd_state, int);
+
+enum drbd_state_rv
+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		    enum chg_state_flags flags);
+
+enum drbd_state_rv
+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
+		   enum chg_state_flags flags);
+
+extern void drbd_resume_al(struct drbd_conf *mdev);
+extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
+
+/**
+ * drbd_request_state() - Reqest a state change
+ * @mdev:	DRBD device.
+ * @mask:	mask of state bits to change.
+ * @val:	value of new state bits.
+ *
+ * This is the most graceful way of requesting a state change. It is verbose
+ * quite verbose in case the state change is not possible, and all those
+ * state changes are globally serialized.
+ */
+static inline int drbd_request_state(struct drbd_conf *mdev,
+				     union drbd_state mask,
+				     union drbd_state val)
+{
+	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
+}
+
+enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
+
+#endif
diff -Nru drbd8-8.3.7/drbd/drbd_strings.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_strings.c
--- drbd8-8.3.7/drbd/drbd_strings.c	2009-08-26 13:27:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_strings.c	2012-02-02 14:09:14.000000000 +0000
@@ -48,6 +48,8 @@
 	[C_PAUSED_SYNC_T]    = "PausedSyncT",
 	[C_VERIFY_S]         = "VerifyS",
 	[C_VERIFY_T]         = "VerifyT",
+	[C_AHEAD]            = "Ahead",
+	[C_BEHIND]           = "Behind",
 };
 
 static const char *drbd_role_s_names[] = {
@@ -70,7 +72,7 @@
 
 static const char *drbd_state_sw_errors[] = {
 	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
-	[-SS_NO_UP_TO_DATE_DISK] = "Refusing to be Primary without at least one UpToDate disk",
+	[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
 	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
 	[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
 	[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
@@ -87,12 +89,13 @@
 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
 };
 
 const char *drbd_conn_str(enum drbd_conns s)
 {
 	/* enums are unsigned... */
-	return s > C_PAUSED_SYNC_T ? "TOO_LARGE" : drbd_conn_s_names[s];
+	return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
 }
 
 const char *drbd_role_str(enum drbd_role s)
@@ -105,7 +108,7 @@
 	return s > D_UP_TO_DATE    ? "TOO_LARGE" : drbd_disk_s_names[s];
 }
 
-const char *drbd_set_st_err_str(enum drbd_state_ret_codes err)
+const char *drbd_set_st_err_str(enum drbd_state_rv err)
 {
 	return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
 	       err > SS_TWO_PRIMARIES ? "TOO_LARGE"
diff -Nru drbd8-8.3.7/drbd/drbd_tracing.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_tracing.c
--- drbd8-8.3.7/drbd/drbd_tracing.c	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_tracing.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,756 +0,0 @@
-/*
-   drbd_tracing.c
-
-   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
-
-   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
-   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
-   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
-
-   drbd is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   drbd is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with drbd; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
- */
-
-#include <linux/module.h>
-#include <linux/drbd.h>
-#include <linux/ctype.h>
-#include "drbd_int.h"
-#include "drbd_tracing.h"
-#include <linux/drbd_tag_magic.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Philipp Reisner, Lars Ellenberg");
-MODULE_DESCRIPTION("DRBD tracepoint probes");
-MODULE_PARM_DESC(trace_mask, "Bitmap of events to trace see drbd_tracing.c");
-MODULE_PARM_DESC(trace_level, "Current tracing level (changeable in /sys)");
-MODULE_PARM_DESC(trace_devs, "Bitmap of devices to trace (changeable in /sys)");
-
-unsigned int trace_mask = 0;  /* Bitmap of events to trace */
-int trace_level;              /* Current trace level */
-int trace_devs;		      /* Bitmap of devices to trace */
-
-module_param(trace_mask, uint, 0444);
-module_param(trace_level, int, 0644);
-module_param(trace_devs, int, 0644);
-
-enum {
-	TRACE_PACKET  = 0x0001,
-	TRACE_RQ      = 0x0002,
-	TRACE_UUID    = 0x0004,
-	TRACE_RESYNC  = 0x0008,
-	TRACE_EE      = 0x0010,
-	TRACE_UNPLUG  = 0x0020,
-	TRACE_NL      = 0x0040,
-	TRACE_AL_EXT  = 0x0080,
-	TRACE_INT_RQ  = 0x0100,
-	TRACE_MD_IO   = 0x0200,
-	TRACE_EPOCH   = 0x0400,
-};
-
-/* Buffer printing support
- * dbg_print_flags: used for Flags arg to drbd_print_buffer
- * - DBGPRINT_BUFFADDR; if set, each line starts with the
- *	 virtual address of the line being output. If clear,
- *	 each line starts with the offset from the beginning
- *	 of the buffer. */
-enum dbg_print_flags {
-    DBGPRINT_BUFFADDR = 0x0001,
-};
-
-/* Macro stuff */
-STATIC char *nl_packet_name(int packet_type)
-{
-/* Generate packet type strings */
-#define NL_PACKET(name, number, fields) \
-	[P_ ## name] = # name,
-#define NL_INTEGER Argh!
-#define NL_BIT Argh!
-#define NL_INT64 Argh!
-#define NL_STRING Argh!
-
-	static char *nl_tag_name[P_nl_after_last_packet] = {
-#include "linux/drbd_nl.h"
-	};
-
-	return (packet_type < sizeof(nl_tag_name)/sizeof(nl_tag_name[0])) ?
-	    nl_tag_name[packet_type] : "*Unknown*";
-}
-/* /Macro stuff */
-
-static inline int is_mdev_trace(struct drbd_conf *mdev, unsigned int level)
-{
-	return trace_level >= level && ((1 << mdev_to_minor(mdev)) & trace_devs);
-}
-
-static void probe_drbd_unplug(struct drbd_conf *mdev, char *msg)
-{
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	dev_info(DEV, "%s, ap_bio_count=%d\n", msg, atomic_read(&mdev->ap_bio_cnt));
-}
-
-static void probe_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
-	static char *uuid_str[UI_EXTENDED_SIZE] = {
-		[UI_CURRENT] = "CURRENT",
-		[UI_BITMAP] = "BITMAP",
-		[UI_HISTORY_START] = "HISTORY_START",
-		[UI_HISTORY_END] = "HISTORY_END",
-		[UI_SIZE] = "SIZE",
-		[UI_FLAGS] = "FLAGS",
-	};
-
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	if (index >= UI_EXTENDED_SIZE) {
-		dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
-		return;
-	}
-
-	dev_info(DEV, " uuid[%s] now %016llX\n",
-		 uuid_str[index],
-		 (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-static void probe_drbd_md_io(struct drbd_conf *mdev, int rw,
-			     struct drbd_backing_dev *bdev)
-{
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	dev_info(DEV, " %s metadata superblock now\n",
-		 rw == READ ? "Reading" : "Writing");
-}
-
-static void probe_drbd_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg)
-{
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	dev_info(DEV, "EE %s sec=%llus size=%u e=%p\n",
-		 msg, (unsigned long long)e->sector, e->size, e);
-}
-
-static void probe_drbd_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch,
-			     enum epoch_event ev)
-{
-	static char *epoch_event_str[] = {
-		[EV_PUT] = "put",
-		[EV_GOT_BARRIER_NR] = "got_barrier_nr",
-		[EV_BARRIER_DONE] = "barrier_done",
-		[EV_BECAME_LAST] = "became_last",
-		[EV_TRACE_FLUSH] = "issuing_flush",
-		[EV_TRACE_ADD_BARRIER] = "added_barrier",
-		[EV_TRACE_SETTING_BI] = "just set barrier_in_next_epoch",
-	};
-
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	ev &= ~EV_CLEANUP;
-
-	switch (ev) {
-	case EV_TRACE_ALLOC:
-		dev_info(DEV, "Allocate epoch %p/xxxx { } nr_epochs=%d\n", epoch, mdev->epochs);
-		break;
-	case EV_TRACE_FREE:
-		dev_info(DEV, "Freeing epoch %p/%d { size=%d } nr_epochs=%d\n",
-			 epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size),
-			 mdev->epochs);
-		break;
-	default:
-		dev_info(DEV, "Update epoch  %p/%d { size=%d active=%d %c%c n%c%c } ev=%s\n",
-			 epoch, epoch->barrier_nr, atomic_read(&epoch->epoch_size),
-			 atomic_read(&epoch->active),
-			 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) ? 'n' : '-',
-			 test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) ? 'b' : '-',
-			 test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) ? 'i' : '-',
-			 test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ? 'd' : '-',
-			 epoch_event_str[ev]);
-	}
-}
-
-static void probe_drbd_netlink(void *data, int is_req)
-{
-	struct cn_msg *msg = data;
-
-	if (is_req) {
-		struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)msg->data;
-
-		printk(KERN_INFO "drbd%d: "
-			 "Netlink: << %s (%d) - seq: %x, ack: %x, len: %x\n",
-			 nlp->drbd_minor,
-			 nl_packet_name(nlp->packet_type),
-			 nlp->packet_type,
-			 msg->seq, msg->ack, msg->len);
-	} else {
-		struct drbd_nl_cfg_reply *nlp = (struct drbd_nl_cfg_reply *)msg->data;
-
-		printk(KERN_INFO "drbd%d: "
-		       "Netlink: >> %s (%d) - seq: %x, ack: %x, len: %x\n",
-		       nlp->minor,
-		       nlp->packet_type == P_nl_after_last_packet ?
-		       "Empty-Reply" : nl_packet_name(nlp->packet_type),
-		       nlp->packet_type,
-		       msg->seq, msg->ack, msg->len);
-	}
-}
-
-static void probe_drbd_actlog(struct drbd_conf *mdev, sector_t sector, char* msg)
-{
-	unsigned int enr = (sector >> (AL_EXTENT_SHIFT-9));
-
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	dev_info(DEV, "%s (sec=%llus, al_enr=%u, rs_enr=%d)\n",
-		 msg, (unsigned long long) sector, enr,
-		 (int)BM_SECT_TO_EXT(sector));
-}
-
-/**
- * drbd_print_buffer() - Hexdump arbitrary binary data into a buffer
- * @prefix:	String is output at the beginning of each line output.
- * @flags:	Currently only defined flag: DBGPRINT_BUFFADDR; if set, each
- *		line starts with the virtual address of the line being
- *		output. If clear, each line starts with the offset from the
- *		beginning of the buffer.
- * @size:	Indicates the size of each entry in the buffer. Supported
- * 		values are sizeof(char), sizeof(short) and sizeof(int)
- * @buffer:	Start address of buffer
- * @buffer_va:	Virtual address of start of buffer (normally the same
- *		as Buffer, but having it separate allows it to hold
- *		file address for example)
- * @length:	length of buffer
- */
-static void drbd_print_buffer(const char *prefix, unsigned int flags, int size,
-			      const void *buffer, const void *buffer_va,
-			      unsigned int length)
-
-#define LINE_SIZE       16
-#define LINE_ENTRIES    (int)(LINE_SIZE/size)
-{
-	const unsigned char *pstart;
-	const unsigned char *pstart_va;
-	const unsigned char *pend;
-	char bytes_str[LINE_SIZE*3+8], ascii_str[LINE_SIZE+8];
-	char *pbytes = bytes_str, *pascii = ascii_str;
-	int  offset = 0;
-	long sizemask;
-	int  field_width;
-	int  index;
-	const unsigned char *pend_str;
-	const unsigned char *p;
-	int count;
-
-	/* verify size parameter */
-	if (size != sizeof(char) &&
-	    size != sizeof(short) &&
-	    size != sizeof(int)) {
-		printk(KERN_DEBUG "drbd_print_buffer: "
-			"ERROR invalid size %d\n", size);
-		return;
-	}
-
-	sizemask = size-1;
-	field_width = size*2;
-
-	/* Adjust start/end to be on appropriate boundary for size */
-	buffer = (const char *)((long)buffer & ~sizemask);
-	pend   = (const unsigned char *)
-		(((long)buffer + length + sizemask) & ~sizemask);
-
-	if (flags & DBGPRINT_BUFFADDR) {
-		/* Move start back to nearest multiple of line size,
-		 * if printing address. This results in nicely formatted output
-		 * with addresses being on line size (16) byte boundaries */
-		pstart = (const unsigned char *)((long)buffer & ~(LINE_SIZE-1));
-	} else {
-		pstart = (const unsigned char *)buffer;
-	}
-
-	/* Set value of start VA to print if addresses asked for */
-	pstart_va = (const unsigned char *)buffer_va
-		 - ((const unsigned char *)buffer-pstart);
-
-	/* Calculate end position to nicely align right hand side */
-	pend_str = pstart + (((pend-pstart) + LINE_SIZE-1) & ~(LINE_SIZE-1));
-
-	/* Init strings */
-	*pbytes = *pascii = '\0';
-
-	/* Start at beginning of first line */
-	p = pstart;
-	count = 0;
-
-	while (p < pend_str) {
-		if (p < (const unsigned char *)buffer || p >= pend) {
-			/* Before start of buffer or after end- print spaces */
-			pbytes += sprintf(pbytes, "%*c ", field_width, ' ');
-			pascii += sprintf(pascii, "%*c", size, ' ');
-			p += size;
-		} else {
-			/* Add hex and ascii to strings */
-			int val;
-			switch (size) {
-			default:
-			case 1:
-				val = *(unsigned char *)p;
-				break;
-			case 2:
-				val = *(unsigned short *)p;
-				break;
-			case 4:
-				val = *(unsigned int *)p;
-				break;
-			}
-
-			pbytes += sprintf(pbytes, "%0*x ", field_width, val);
-
-			for (index = size; index; index--) {
-				*pascii++ = isprint(*p) ? *p : '.';
-				p++;
-			}
-		}
-
-		count++;
-
-		if (count == LINE_ENTRIES || p >= pend_str) {
-			/* Null terminate and print record */
-			*pascii = '\0';
-			printk(KERN_DEBUG "%s%8.8lx: %*s|%*s|\n",
-			       prefix,
-			       (flags & DBGPRINT_BUFFADDR)
-			       ? (long)pstart_va:(long)offset,
-			       LINE_ENTRIES*(field_width+1), bytes_str,
-			       LINE_SIZE, ascii_str);
-
-			/* Move onto next line */
-			pstart_va += (p-pstart);
-			pstart = p;
-			count  = 0;
-			offset += LINE_SIZE;
-
-			/* Re-init strings */
-			pbytes = bytes_str;
-			pascii = ascii_str;
-			*pbytes = *pascii = '\0';
-		}
-	}
-}
-
-static void probe_drbd_resync(struct drbd_conf *mdev, int level, const char *fmt, va_list args)
-{
-	char str[256];
-
-	if (!is_mdev_trace(mdev, level))
-		return;
-
-	if (vsnprintf(str, 256, fmt, args) >= 256)
-		str[255] = 0;
-
-	printk(KERN_INFO "%s %s: %s", dev_driver_string(disk_to_dev(mdev->vdisk)),
-	       dev_name(disk_to_dev(mdev->vdisk)), str);
-}
-
-static void probe_drbd_bio(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
-			   struct drbd_request *r)
-{
-#if defined(CONFIG_LBDAF) || defined(CONFIG_LBD)
-#define SECTOR_FORMAT "%Lx"
-#else
-#define SECTOR_FORMAT "%lx"
-#endif
-#define SECTOR_SHIFT 9
-
-	unsigned long lowaddr = (unsigned long)(bio->bi_sector << SECTOR_SHIFT);
-	char *faddr = (char *)(lowaddr);
-	char rb[sizeof(void *)*2+6] = { 0, };
-	struct bio_vec *bvec;
-	int segno;
-
-	const int rw = bio->bi_rw;
-	const int biorw      = (rw & (RW_MASK|RWA_MASK));
-	const int biobarrier = (rw & (1<<BIO_RW_BARRIER));
-#ifdef BIO_RW_SYNC
-	const int biosync = (rw & (1<<BIO_RW_SYNC));
-#else
-	const int biosync = (rw & ((1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG)));
-#endif
-
-	if (!is_mdev_trace(mdev, TRACE_LVL_ALWAYS))
-		return;
-
-	if (r)
-		sprintf(rb, "Req:%p ", r);
-
-	dev_info(DEV, "%s %s:%s%s%s Bio:%p %s- %soffset " SECTOR_FORMAT ", size %x\n",
-		 complete ? "<<<" : ">>>",
-		 pfx,
-		 biorw == WRITE ? "Write" : "Read",
-		 biobarrier ? " : B" : "",
-		 biosync ? " : S" : "",
-		 bio,
-		 rb,
-		 complete ? (bio_flagged(bio, BIO_UPTODATE) ? "Success, " : "Failed, ") : "",
-		 bio->bi_sector << SECTOR_SHIFT,
-		 bio->bi_size);
-
-	if (trace_level >= TRACE_LVL_METRICS &&
-	    ((biorw == WRITE) ^ complete)) {
-		printk(KERN_DEBUG "  ind     page   offset   length\n");
-		__bio_for_each_segment(bvec, bio, segno, 0) {
-			printk(KERN_DEBUG "  [%d] %p %8.8x %8.8x\n", segno,
-			       bvec->bv_page, bvec->bv_offset, bvec->bv_len);
-
-			if (trace_level >= TRACE_LVL_ALL) {
-				char *bvec_buf;
-				unsigned long flags;
-
-				bvec_buf = bvec_kmap_irq(bvec, &flags);
-
-				drbd_print_buffer("    ", DBGPRINT_BUFFADDR, 1,
-						  bvec_buf,
-						  faddr,
-						  (bvec->bv_len <= 0x80)
-						  ? bvec->bv_len : 0x80);
-
-				bvec_kunmap_irq(bvec_buf, &flags);
-
-				if (bvec->bv_len > 0x40)
-					printk(KERN_DEBUG "    ....\n");
-
-				faddr += bvec->bv_len;
-			}
-		}
-	}
-}
-
-static void probe_drbd_req(struct drbd_request *req, enum drbd_req_event what, char *msg)
-{
-	static const char *rq_event_names[] = {
-		[created] = "created",
-		[to_be_send] = "to_be_send",
-		[to_be_submitted] = "to_be_submitted",
-		[queue_for_net_write] = "queue_for_net_write",
-		[queue_for_net_read] = "queue_for_net_read",
-		[send_canceled] = "send_canceled",
-		[send_failed] = "send_failed",
-		[handed_over_to_network] = "handed_over_to_network",
-		[connection_lost_while_pending] =
-					"connection_lost_while_pending",
-		[recv_acked_by_peer] = "recv_acked_by_peer",
-		[write_acked_by_peer] = "write_acked_by_peer",
-		[neg_acked] = "neg_acked",
-		[conflict_discarded_by_peer] = "conflict_discarded_by_peer",
-		[barrier_acked] = "barrier_acked",
-		[data_received] = "data_received",
-		[read_completed_with_error] = "read_completed_with_error",
-		[read_ahead_completed_with_error] = "reada_completed_with_error",
-		[write_completed_with_error] = "write_completed_with_error",
-		[completed_ok] = "completed_ok",
-	};
-
-	struct drbd_conf *mdev = req->mdev;
-
-	const int rw = (req->master_bio == NULL ||
-			bio_data_dir(req->master_bio) == WRITE) ?
-		'W' : 'R';
-	const unsigned long s = req->rq_state;
-
-	if (what != nothing) {
-		dev_info(DEV, "__req_mod(%p %c ,%s)\n", req, rw, rq_event_names[what]);
-	} else {
-		dev_info(DEV, "%s %p %c L%c%c%cN%c%c%c%c%c %u (%llus +%u) %s\n",
-			 msg, req, rw,
-			 s & RQ_LOCAL_PENDING ? 'p' : '-',
-			 s & RQ_LOCAL_COMPLETED ? 'c' : '-',
-			 s & RQ_LOCAL_OK ? 'o' : '-',
-			 s & RQ_NET_PENDING ? 'p' : '-',
-			 s & RQ_NET_QUEUED ? 'q' : '-',
-			 s & RQ_NET_SENT ? 's' : '-',
-			 s & RQ_NET_DONE ? 'd' : '-',
-			 s & RQ_NET_OK ? 'o' : '-',
-			 req->epoch,
-			 (unsigned long long)req->sector,
-			 req->size,
-			 drbd_conn_str(mdev->state.conn));
-	}
-}
-
-
-#define drbd_peer_str drbd_role_str
-#define drbd_pdsk_str drbd_disk_str
-
-#define PSM(A)							\
-do {								\
-	if (mask.A) {						\
-		int i = snprintf(p, len, " " #A "( %s )",	\
-				 drbd_##A##_str(val.A));	\
-		if (i >= len)					\
-			return op;				\
-		p += i;						\
-		len -= i;					\
-	}							\
-} while (0)
-
-STATIC char *dump_st(char *p, int len, union drbd_state mask, union drbd_state val)
-{
-	char *op = p;
-	*p = '\0';
-	PSM(role);
-	PSM(peer);
-	PSM(conn);
-	PSM(disk);
-	PSM(pdsk);
-
-	return op;
-}
-
-#define INFOP(fmt, args...) \
-do { \
-	if (trace_level >= TRACE_LVL_ALL) { \
-		dev_info(DEV, "%s:%d: %s [%d] %s %s " fmt , \
-		     file, line, current->comm, current->pid, \
-		     sockname, recv ? "<<<" : ">>>" , \
-		     ## args); \
-	} else { \
-		dev_info(DEV, "%s %s " fmt, sockname, \
-		     recv ? "<<<" : ">>>" , \
-		     ## args); \
-	} \
-} while (0)
-
-STATIC char *_dump_block_id(u64 block_id, char *buff)
-{
-	if (is_syncer_block_id(block_id))
-		strcpy(buff, "SyncerId");
-	else
-		sprintf(buff, "%llx", (unsigned long long)block_id);
-
-	return buff;
-}
-
-static void probe_drbd_packet(struct drbd_conf *mdev, struct socket *sock,
-			      int recv, union p_polymorph *p, char *file, int line)
-{
-	char *sockname = sock == mdev->meta.socket ? "meta" : "data";
-	int cmd = (recv == 2) ? p->header.command : be16_to_cpu(p->header.command);
-	char tmp[300];
-	union drbd_state m, v;
-
-	switch (cmd) {
-	case P_HAND_SHAKE:
-		INFOP("%s (protocol %u-%u)\n", cmdname(cmd),
-			be32_to_cpu(p->handshake.protocol_min),
-			be32_to_cpu(p->handshake.protocol_max));
-		break;
-
-	case P_BITMAP: /* don't report this */
-	case P_COMPRESSED_BITMAP: /* don't report this */
-		break;
-
-	case P_DATA:
-		INFOP("%s (sector %llus, id %s, seq %u, f %x)\n", cmdname(cmd),
-		      (unsigned long long)be64_to_cpu(p->data.sector),
-		      _dump_block_id(p->data.block_id, tmp),
-		      be32_to_cpu(p->data.seq_num),
-		      be32_to_cpu(p->data.dp_flags)
-			);
-		break;
-
-	case P_DATA_REPLY:
-	case P_RS_DATA_REPLY:
-		INFOP("%s (sector %llus, id %s)\n", cmdname(cmd),
-		      (unsigned long long)be64_to_cpu(p->data.sector),
-		      _dump_block_id(p->data.block_id, tmp)
-			);
-		break;
-
-	case P_RECV_ACK:
-	case P_WRITE_ACK:
-	case P_RS_WRITE_ACK:
-	case P_DISCARD_ACK:
-	case P_NEG_ACK:
-	case P_NEG_RS_DREPLY:
-		INFOP("%s (sector %llus, size %u, id %s, seq %u)\n",
-			cmdname(cmd),
-		      (long long)be64_to_cpu(p->block_ack.sector),
-		      be32_to_cpu(p->block_ack.blksize),
-		      _dump_block_id(p->block_ack.block_id, tmp),
-		      be32_to_cpu(p->block_ack.seq_num)
-			);
-		break;
-
-	case P_DATA_REQUEST:
-	case P_RS_DATA_REQUEST:
-		INFOP("%s (sector %llus, size %u, id %s)\n", cmdname(cmd),
-		      (long long)be64_to_cpu(p->block_req.sector),
-		      be32_to_cpu(p->block_req.blksize),
-		      _dump_block_id(p->block_req.block_id, tmp)
-			);
-		break;
-
-	case P_BARRIER:
-	case P_BARRIER_ACK:
-		INFOP("%s (barrier %u)\n", cmdname(cmd), p->barrier.barrier);
-		break;
-
-	case P_SYNC_PARAM:
-	case P_SYNC_PARAM89:
-		INFOP("%s (rate %u, verify-alg \"%.64s\", csums-alg \"%.64s\")\n",
-			cmdname(cmd), be32_to_cpu(p->rs_param_89.rate),
-			p->rs_param_89.verify_alg, p->rs_param_89.csums_alg);
-		break;
-
-	case P_UUIDS:
-		INFOP("%s Curr:%016llX, Bitmap:%016llX, "
-		      "HisSt:%016llX, HisEnd:%016llX\n",
-		      cmdname(cmd),
-		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_CURRENT]),
-		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_BITMAP]),
-		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_START]),
-		      (unsigned long long)be64_to_cpu(p->uuids.uuid[UI_HISTORY_END]));
-		break;
-
-	case P_SIZES:
-		INFOP("%s (d %lluMiB, u %lluMiB, c %lldMiB, "
-		      "max bio %x, q order %x)\n",
-		      cmdname(cmd),
-		      (long long)(be64_to_cpu(p->sizes.d_size)>>(20-9)),
-		      (long long)(be64_to_cpu(p->sizes.u_size)>>(20-9)),
-		      (long long)(be64_to_cpu(p->sizes.c_size)>>(20-9)),
-		      be32_to_cpu(p->sizes.max_segment_size),
-		      be32_to_cpu(p->sizes.queue_order_type));
-		break;
-
-	case P_STATE:
-		v.i = be32_to_cpu(p->state.state);
-		m.i = 0xffffffff;
-		dump_st(tmp, sizeof(tmp), m, v);
-		INFOP("%s (s %x {%s})\n", cmdname(cmd), v.i, tmp);
-		break;
-
-	case P_STATE_CHG_REQ:
-		m.i = be32_to_cpu(p->req_state.mask);
-		v.i = be32_to_cpu(p->req_state.val);
-		dump_st(tmp, sizeof(tmp), m, v);
-		INFOP("%s (m %x v %x {%s})\n", cmdname(cmd), m.i, v.i, tmp);
-		break;
-
-	case P_STATE_CHG_REPLY:
-		INFOP("%s (ret %x)\n", cmdname(cmd),
-		      be32_to_cpu(p->req_state_reply.retcode));
-		break;
-
-	case P_PING:
-	case P_PING_ACK:
-		/*
-		 * Dont trace pings at summary level
-		 */
-		if (trace_level < TRACE_LVL_ALL)
-			break;
-		/* fall through... */
-	default:
-		INFOP("%s (%u)\n", cmdname(cmd), cmd);
-		break;
-	}
-}
-
-
-static int __init drbd_trace_init(void)
-{
-	int ret;
-
-	if (trace_mask & TRACE_UNPLUG) {
-		ret = register_trace_drbd_unplug(probe_drbd_unplug);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_UUID) {
-		ret = register_trace_drbd_uuid(probe_drbd_uuid);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_EE) {
-		ret = register_trace_drbd_ee(probe_drbd_ee);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_PACKET) {
-		ret = register_trace_drbd_packet(probe_drbd_packet);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_MD_IO) {
-		ret = register_trace_drbd_md_io(probe_drbd_md_io);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_EPOCH) {
-		ret = register_trace_drbd_epoch(probe_drbd_epoch);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_NL) {
-		ret = register_trace_drbd_netlink(probe_drbd_netlink);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_AL_EXT) {
-		ret = register_trace_drbd_actlog(probe_drbd_actlog);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_RQ) {
-		ret = register_trace_drbd_bio(probe_drbd_bio);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_INT_RQ) {
-		ret = register_trace_drbd_req(probe_drbd_req);
-		WARN_ON(ret);
-	}
-	if (trace_mask & TRACE_RESYNC) {
-		ret = register_trace__drbd_resync(probe_drbd_resync);
-		WARN_ON(ret);
-	}
-	return 0;
-}
-
-module_init(drbd_trace_init);
-
-static void __exit drbd_trace_exit(void)
-{
-	if (trace_mask & TRACE_UNPLUG)
-		unregister_trace_drbd_unplug(probe_drbd_unplug);
-	if (trace_mask & TRACE_UUID)
-		unregister_trace_drbd_uuid(probe_drbd_uuid);
-	if (trace_mask & TRACE_EE)
-		unregister_trace_drbd_ee(probe_drbd_ee);
-	if (trace_mask & TRACE_PACKET)
-		unregister_trace_drbd_packet(probe_drbd_packet);
-	if (trace_mask & TRACE_MD_IO)
-		unregister_trace_drbd_md_io(probe_drbd_md_io);
-	if (trace_mask & TRACE_EPOCH)
-		unregister_trace_drbd_epoch(probe_drbd_epoch);
-	if (trace_mask & TRACE_NL)
-		unregister_trace_drbd_netlink(probe_drbd_netlink);
-	if (trace_mask & TRACE_AL_EXT)
-		unregister_trace_drbd_actlog(probe_drbd_actlog);
-	if (trace_mask & TRACE_RQ)
-		unregister_trace_drbd_bio(probe_drbd_bio);
-	if (trace_mask & TRACE_INT_RQ)
-		unregister_trace_drbd_req(probe_drbd_req);
-	if (trace_mask & TRACE_RESYNC)
-		unregister_trace__drbd_resync(probe_drbd_resync);
-
-	tracepoint_synchronize_unregister();
-}
-
-module_exit(drbd_trace_exit);
diff -Nru drbd8-8.3.7/drbd/drbd_tracing.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_tracing.h
--- drbd8-8.3.7/drbd/drbd_tracing.h	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_tracing.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,87 +0,0 @@
-/*
-   drbd_tracing.h
-
-   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
-
-   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
-   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
-   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
-
-   drbd is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   drbd is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with drbd; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
- */
-
-#ifndef DRBD_TRACING_H
-#define DRBD_TRACING_H
-
-#include <linux/tracepoint.h>
-#include "drbd_int.h"
-#include "drbd_req.h"
-
-enum {
-	TRACE_LVL_ALWAYS = 0,
-	TRACE_LVL_SUMMARY,
-	TRACE_LVL_METRICS,
-	TRACE_LVL_ALL,
-	TRACE_LVL_MAX
-};
-
-DECLARE_TRACE(drbd_unplug,
-	TP_PROTO(struct drbd_conf *mdev, char* msg),
-	TP_ARGS(mdev, msg));
-
-DECLARE_TRACE(drbd_uuid,
-	TP_PROTO(struct drbd_conf *mdev, enum drbd_uuid_index index),
-	TP_ARGS(mdev, index));
-
-DECLARE_TRACE(drbd_ee,
-	TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch_entry *e, char* msg),
-	TP_ARGS(mdev, e, msg));
-
-DECLARE_TRACE(drbd_md_io,
-	TP_PROTO(struct drbd_conf *mdev, int rw, struct drbd_backing_dev *bdev),
-	TP_ARGS(mdev, rw, bdev));
-
-DECLARE_TRACE(drbd_epoch,
-	TP_PROTO(struct drbd_conf *mdev, struct drbd_epoch *epoch, enum epoch_event ev),
-	TP_ARGS(mdev, epoch, ev));
-
-DECLARE_TRACE(drbd_netlink,
-	TP_PROTO(void *data, int is_req),
-	TP_ARGS(data, is_req));
-
-DECLARE_TRACE(drbd_actlog,
-	TP_PROTO(struct drbd_conf *mdev, sector_t sector, char* msg),
-	TP_ARGS(mdev, sector, msg));
-
-DECLARE_TRACE(drbd_bio,
-	TP_PROTO(struct drbd_conf *mdev, const char *pfx, struct bio *bio, int complete,
-		 struct drbd_request *r),
-	TP_ARGS(mdev, pfx, bio, complete, r));
-
-DECLARE_TRACE(drbd_req,
-	TP_PROTO(struct drbd_request *req, enum drbd_req_event what, char *msg),
-	      TP_ARGS(req, what, msg));
-
-DECLARE_TRACE(drbd_packet,
-	TP_PROTO(struct drbd_conf *mdev, struct socket *sock,
-		 int recv, union p_polymorph *p, char *file, int line),
-	TP_ARGS(mdev, sock, recv, p, file, line));
-
-DECLARE_TRACE(_drbd_resync,
-	TP_PROTO(struct drbd_conf *mdev, int level, const char *fmt, va_list args),
-	TP_ARGS(mdev, level, fmt, args));
-
-#endif
diff -Nru drbd8-8.3.7/drbd/drbd_vli.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_vli.h
--- drbd8-8.3.7/drbd/drbd_vli.h	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_vli.h	2012-02-02 14:09:14.000000000 +0000
@@ -32,7 +32,7 @@
  * the bitmap transfer time can take much too long,
  * if transmitted in plain text.
  *
- * We try to reduce the transfered bitmap information
+ * We try to reduce the transferred bitmap information
  * by encoding runlengths of bit polarity.
  *
  * We never actually need to encode a "zero" (runlengths are positive).
diff -Nru drbd8-8.3.7/drbd/drbd_worker.c drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_worker.c
--- drbd8-8.3.7/drbd/drbd_worker.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_worker.c	2012-02-02 14:09:14.000000000 +0000
@@ -27,41 +27,27 @@
 #include <linux/module.h>
 #include <linux/drbd.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
 #include <linux/mm_inline.h>
 #include <linux/slab.h>
 #include <linux/random.h>
-#ifdef HAVE_LINUX_SCATTERLIST_H
-/* 2.6.11 (suse 9.3, fc4) does not include requisites
- * from linux/scatterlist.h :( */
-#include <asm/scatterlist.h>
-#include <linux/string.h>
 #include <linux/scatterlist.h>
-#endif
 
 #include "drbd_int.h"
 #include "drbd_req.h"
-#include "drbd_tracing.h"
-
-#define SLEEP_TIME (HZ/10)
-
-STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
 
+STATIC int w_make_ov_request(struct drbd_work *w, int cancel);
 
 
-/* defined here:
-   drbd_md_io_complete
-   drbd_endio_write_sec
-   drbd_endio_read_sec
-   drbd_endio_pri
-
- * more endio handlers:
-   atodb_endio in drbd_actlog.c
-   drbd_bm_async_io_complete in drbd_bitmap.c
 
+/* endio handlers:
+ *   drbd_md_io_complete (defined here)
+ *   drbd_request_endio (defined here)
+ *   drbd_peer_request_endio (defined here)
+ *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *
  * For all these callbacks, note the following:
  * The callbacks will be called in irq context by the IDE drivers,
  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -72,7 +58,7 @@
 
 /* About the global_state_lock
    Each state transition on an device holds a read lock. In case we have
-   to evaluate the sync after dependencies, we grab a write lock, because
+   to evaluate the resync after dependencies, we grab a write lock, because
    we need stable states on all devices for that.  */
 rwlock_t global_state_lock;
 
@@ -82,171 +68,179 @@
 BIO_ENDIO_TYPE drbd_md_io_complete BIO_ENDIO_ARGS(struct bio *bio, int error)
 {
 	struct drbd_md_io *md_io;
+	struct drbd_conf *mdev;
 
 	BIO_ENDIO_FN_START;
 
 	md_io = (struct drbd_md_io *)bio->bi_private;
+	mdev = container_of(md_io, struct drbd_conf, md_io);
+
 	md_io->error = error;
 
-	trace_drbd_bio(md_io->mdev, "Md", bio, 1, NULL);
+	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
+	 * to timeout on the lower level device, and eventually detach from it.
+	 * If this io completion runs after that timeout expired, this
+	 * drbd_md_put_buffer() may allow us to finally try and re-attach.
+	 * During normal operation, this only puts that extra reference
+	 * down to 1 again.
+	 * Make sure we first drop the reference, and only then signal
+	 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
+	 * next drbd_md_sync_page_io(), that we trigger the
+	 * ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
+	 */
+	drbd_md_put_buffer(mdev);
+	md_io->done = 1;
+	wake_up(&mdev->misc_wait);
+	bio_put(bio);
+	put_ldev(mdev);
 
-	complete(&md_io->event);
 	BIO_ENDIO_FN_RETURN;
 }
 
 /* reads on behalf of the partner,
  * "submitted" by the receiver
  */
-BIO_ENDIO_TYPE drbd_endio_read_sec BIO_ENDIO_ARGS(struct bio *bio, int error) __releases(local)
+void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 {
 	unsigned long flags = 0;
-	struct drbd_epoch_entry *e = NULL;
-	struct drbd_conf *mdev;
-	int uptodate = bio_flagged(bio, BIO_UPTODATE);
-
-	e = bio->bi_private;
-	mdev = e->mdev;
-
-	BIO_ENDIO_FN_START;
-	if (error)
-		dev_warn(DEV, "read: error=%d s=%llus\n", error,
-				(unsigned long long)e->sector);
-	if (!error && !uptodate) {
-		dev_warn(DEV, "read: setting error to -EIO s=%llus\n",
-				(unsigned long long)e->sector);
-		/* strange behavior of some lower level drivers...
-		 * fail the request by clearing the uptodate flag,
-		 * but do not return any error?! */
-		error = -EIO;
-	}
+	struct drbd_conf *mdev = peer_req->w.mdev;
 
-	D_ASSERT(e->block_id != ID_VACANT);
-
-	trace_drbd_bio(mdev, "Sec", bio, 1, NULL);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	mdev->read_cnt += e->size >> 9;
-	list_del(&e->w.list);
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	mdev->read_cnt += peer_req->i.size >> 9;
+	list_del(&peer_req->w.list);
 	if (list_empty(&mdev->read_ee))
 		wake_up(&mdev->ee_wait);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+		__drbd_chk_io_error(mdev, false);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
-	drbd_chk_io_error(mdev, error, FALSE);
-	drbd_queue_work(&mdev->data.work, &e->w);
+	drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
 	put_ldev(mdev);
+}
 
-	trace_drbd_ee(mdev, e, "read completed");
-	BIO_ENDIO_FN_RETURN;
+static int is_failed_barrier(int ee_flags)
+{
+	return (ee_flags & (EE_IS_BARRIER|EE_WAS_ERROR|EE_RESUBMITTED))
+			== (EE_IS_BARRIER|EE_WAS_ERROR);
 }
 
 /* writes on behalf of the partner, or resync writes,
- * "submitted" by the receiver.
- */
-BIO_ENDIO_TYPE drbd_endio_write_sec BIO_ENDIO_ARGS(struct bio *bio, int error) __releases(local)
+ * "submitted" by the receiver, final stage.  */
+static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
 {
 	unsigned long flags = 0;
-	struct drbd_epoch_entry *e = NULL;
-	struct drbd_conf *mdev;
-	sector_t e_sector;
+	struct drbd_conf *mdev = peer_req->w.mdev;
+	struct drbd_interval i;
 	int do_wake;
-	int is_syncer_req;
+	u64 block_id;
 	int do_al_complete_io;
-	int uptodate = bio_flagged(bio, BIO_UPTODATE);
-	int is_barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
-
-	e = bio->bi_private;
-	mdev = e->mdev;
 
-	BIO_ENDIO_FN_START;
-	if (error)
-		dev_warn(DEV, "write: error=%d s=%llus\n", error,
-				(unsigned long long)e->sector);
-	if (!error && !uptodate) {
-		dev_warn(DEV, "write: setting error to -EIO s=%llus\n",
-				(unsigned long long)e->sector);
-		/* strange behavior of some lower level drivers...
-		 * fail the request by clearing the uptodate flag,
-		 * but do not return any error?! */
-		error = -EIO;
-	}
-
-	/* error == -ENOTSUPP would be a better test,
-	 * alas it is not reliable */
-	if (error && is_barrier && e->flags & EE_IS_BARRIER) {
-		drbd_bump_write_ordering(mdev, WO_bdev_flush);
-		spin_lock_irqsave(&mdev->req_lock, flags);
-		list_del(&e->w.list);
-		e->w.cb = w_e_reissue;
+	/* if this is a failed barrier request, disable use of barriers,
+	 * and schedule for resubmission */
+	if (is_failed_barrier(peer_req->flags)) {
+		drbd_bump_write_ordering(mdev->tconn, WO_bdev_flush);
+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+		list_del(&peer_req->w.list);
+		peer_req->flags = (peer_req->flags & ~EE_WAS_ERROR) | EE_RESUBMITTED;
+		peer_req->w.cb = w_e_reissue;
 		/* put_ldev actually happens below, once we come here again. */
 		__release(local);
-		spin_unlock_irqrestore(&mdev->req_lock, flags);
-		drbd_queue_work(&mdev->data.work, &e->w);
-		BIO_ENDIO_FN_RETURN;
+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
+		drbd_queue_work(&mdev->tconn->data.work, &peer_req->w);
+		return;
 	}
 
-	D_ASSERT(e->block_id != ID_VACANT);
-
-	trace_drbd_bio(mdev, "Sec", bio, 1, NULL);
-
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	mdev->writ_cnt += e->size >> 9;
-	is_syncer_req = is_syncer_block_id(e->block_id);
-
-	/* after we moved e to done_ee,
+	/* after we moved peer_req to done_ee,
 	 * we may no longer access it,
 	 * it may be freed/reused already!
 	 * (as soon as we release the req_lock) */
-	e_sector = e->sector;
-	do_al_complete_io = e->flags & EE_CALL_AL_COMPLETE_IO;
-
-	list_del(&e->w.list); /* has been on active_ee or sync_ee */
-	list_add_tail(&e->w.list, &mdev->done_ee);
+	i = peer_req->i;
+	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
+	block_id = peer_req->block_id;
+
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
+	mdev->writ_cnt += peer_req->i.size >> 9;
+	list_del(&peer_req->w.list); /* has been on active_ee or sync_ee */
+	list_add_tail(&peer_req->w.list, &mdev->done_ee);
+
+	/*
+	 * Do not remove from the write_requests tree here: we did not send the
+	 * Ack yet and did not wake possibly waiting conflicting requests.
+	 * Removed from the tree from "drbd_process_done_ee" within the
+	 * appropriate w.cb (e_end_block/e_end_resync_block) or from
+	 * _drbd_clear_done_ee.
+	 */
 
-	trace_drbd_ee(mdev, e, "write completed");
+	do_wake = list_empty(block_id == ID_SYNCER ? &mdev->sync_ee : &mdev->active_ee);
 
-	/* No hlist_del_init(&e->colision) here, we did not send the Ack yet,
-	 * neither did we wake possibly waiting conflicting requests.
-	 * done from "drbd_process_done_ee" within the appropriate w.cb
-	 * (e_end_block/e_end_resync_block) or from _drbd_clear_done_ee */
-
-	do_wake = is_syncer_req
-		? list_empty(&mdev->sync_ee)
-		: list_empty(&mdev->active_ee);
+	if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
+		__drbd_chk_io_error(mdev, false);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
-	if (error)
-		__drbd_chk_io_error(mdev, FALSE);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (is_syncer_req)
-		drbd_rs_complete_io(mdev, e_sector);
+	if (block_id == ID_SYNCER)
+		drbd_rs_complete_io(mdev, i.sector);
 
 	if (do_wake)
 		wake_up(&mdev->ee_wait);
 
 	if (do_al_complete_io)
-		drbd_al_complete_io(mdev, e_sector);
+		drbd_al_complete_io(mdev, &i);
 
-	wake_asender(mdev);
+	wake_asender(mdev->tconn);
 	put_ldev(mdev);
+}
 
+/* writes on behalf of the partner, or resync writes,
+ * "submitted" by the receiver.
+ */
+BIO_ENDIO_TYPE drbd_peer_request_endio BIO_ENDIO_ARGS(struct bio *bio, int error)
+{
+	struct drbd_peer_request *peer_req = bio->bi_private;
+	struct drbd_conf *mdev = peer_req->w.mdev;
+	int uptodate = bio_flagged(bio, BIO_UPTODATE);
+	int is_write = bio_data_dir(bio) == WRITE;
+
+	BIO_ENDIO_FN_START;
+	if (error && DRBD_ratelimit(5*HZ, 5))
+		dev_warn(DEV, "%s: error=%d s=%llus\n",
+				is_write ? "write" : "read", error,
+				(unsigned long long)peer_req->i.sector);
+	if (!error && !uptodate) {
+		if (DRBD_ratelimit(5*HZ, 5))
+			dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+					is_write ? "write" : "read",
+					(unsigned long long)peer_req->i.sector);
+		/* strange behavior of some lower level drivers...
+		 * fail the request by clearing the uptodate flag,
+		 * but do not return any error?! */
+		error = -EIO;
+	}
+
+	if (error)
+		set_bit(__EE_WAS_ERROR, &peer_req->flags);
+
+	bio_put(bio); /* no need for the bio anymore */
+	if (atomic_dec_and_test(&peer_req->pending_bios)) {
+		if (is_write)
+			drbd_endio_write_sec_final(peer_req);
+		else
+			drbd_endio_read_sec_final(peer_req);
+	}
 	BIO_ENDIO_FN_RETURN;
 }
 
 /* read, readA or write requests on R_PRIMARY coming from drbd_make_request
  */
-BIO_ENDIO_TYPE drbd_endio_pri BIO_ENDIO_ARGS(struct bio *bio, int error)
+BIO_ENDIO_TYPE drbd_request_endio BIO_ENDIO_ARGS(struct bio *bio, int error)
 {
 	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
-	struct drbd_conf *mdev = req->mdev;
+	struct drbd_conf *mdev = req->w.mdev;
 	struct bio_and_error m;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
 	BIO_ENDIO_FN_START;
-	if (error)
-		dev_warn(DEV, "p %s: error=%d\n",
-			 bio_data_dir(bio) == WRITE ? "write" : "read", error);
 	if (!error && !uptodate) {
 		dev_warn(DEV, "p %s: setting error to -EIO\n",
 			 bio_data_dir(bio) == WRITE ? "write" : "read");
@@ -256,74 +250,78 @@
 		error = -EIO;
 	}
 
-	trace_drbd_bio(mdev, "Pri", bio, 1, NULL);
-
 	/* to avoid recursion in __req_mod */
 	if (unlikely(error)) {
 		what = (bio_data_dir(bio) == WRITE)
-			? write_completed_with_error
-			: (bio_rw(bio) == READA)
-			  ? read_completed_with_error
-			  : read_ahead_completed_with_error;
+			? WRITE_COMPLETED_WITH_ERROR
+			: (bio_rw(bio) == READ)
+			  ? READ_COMPLETED_WITH_ERROR
+			  : READ_AHEAD_COMPLETED_WITH_ERROR;
 	} else
-		what = completed_ok;
+		what = COMPLETED_OK;
 
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	/* not req_mod(), we need irqsave here! */
+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
 	__req_mod(req, what, &m);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
 
 	if (m.bio)
 		complete_master_bio(mdev, &m);
 	BIO_ENDIO_FN_RETURN;
 }
 
-int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-	struct drbd_request *req = container_of(w, struct drbd_request, w);
-
-	/* NOTE: mdev->ldev can be NULL by the time we get here! */
-	/* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */
-
-	/* the only way this callback is scheduled is from _req_may_be_done,
-	 * when it is done and had a local write error, see comments there */
-	drbd_req_free(req);
-
-	return TRUE;
-}
-
-int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_read_retry_remote(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	struct drbd_conf *mdev = w->mdev;
 
 	/* We should not detach for read io-error,
 	 * but try to WRITE the P_DATA_REPLY to the failed location,
 	 * to give the disk the chance to relocate that block */
 
-	spin_lock_irq(&mdev->req_lock);
-	if (cancel ||
-	    mdev->state.conn < C_CONNECTED ||
-	    mdev->state.pdsk <= D_INCONSISTENT) {
-		_req_mod(req, send_canceled);
-		spin_unlock_irq(&mdev->req_lock);
-		dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n");
-		return 1;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	if (cancel || mdev->state.pdsk != D_UP_TO_DATE) {
+		_req_mod(req, READ_RETRY_REMOTE_CANCELED);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		return 0;
 	}
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	return w_send_read_req(mdev, w, 0);
+	return w_send_read_req(w, 0);
 }
 
-int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm,
+		  struct drbd_peer_request *peer_req, void *digest)
 {
-	ERR_IF(cancel) return 1;
-	dev_err(DEV, "resync inactive, but callback triggered??\n");
-	return 1; /* Simply ignore this! */
+	struct hash_desc desc;
+	struct scatterlist sg;
+	struct page *page = peer_req->pages;
+	struct page *tmp;
+	unsigned len;
+
+	desc.tfm = tfm;
+	desc.flags = 0;
+
+	sg_init_table(&sg, 1);
+	crypto_hash_init(&desc);
+
+	while ((tmp = page_chain_next(page))) {
+		/* all but the last page will be fully used */
+		sg_set_page(&sg, page, PAGE_SIZE, 0);
+		crypto_hash_update(&desc, &sg, sg.length);
+		page = tmp;
+	}
+	/* and now the last, possibly only partially used page */
+	len = peer_req->i.size & (PAGE_SIZE - 1);
+	sg_set_page(&sg, page, len ?: PAGE_SIZE, 0);
+	crypto_hash_update(&desc, &sg, sg.length);
+	crypto_hash_final(&desc, digest);
 }
 
-void drbd_csum(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
+void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *bio, void *digest)
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
@@ -343,169 +341,286 @@
 	crypto_hash_final(&desc, digest);
 }
 
-STATIC int w_e_send_csum(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+/* MAYBE merge common code with w_e_end_ov_req */
+STATIC int w_e_send_csum(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	int digest_size;
 	void *digest;
-	int ok;
+	int err = 0;
 
-	D_ASSERT(e->block_id == DRBD_MAGIC + 0xbeef);
-
-	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
-		return 1;
-	}
+	if (unlikely(cancel))
+		goto out;
 
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
-		digest_size = crypto_hash_digestsize(mdev->csums_tfm);
-		digest = kmalloc(digest_size, GFP_NOIO);
-		if (digest) {
-			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+	if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
+		goto out;
 
-			inc_rs_pending(mdev);
-			ok = drbd_send_drequest_csum(mdev,
-						     e->sector,
-						     e->size,
-						     digest,
-						     digest_size,
-						     P_CSUM_RS_REQUEST);
-			kfree(digest);
-		} else {
-			dev_err(DEV, "kmalloc() of digest failed.\n");
-			ok = 0;
-		}
-	} else
-		ok = 1;
+	digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
+	digest = kmalloc(digest_size, GFP_NOIO);
+	if (digest) {
+		sector_t sector = peer_req->i.sector;
+		unsigned int size = peer_req->i.size;
+		drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
+		/* Free peer_req and pages before send.
+		 * In case we block on congestion, we could otherwise run into
+		 * some distributed deadlock, if the other side blocks on
+		 * congestion as well, because our receiver blocks in
+		 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+		drbd_free_peer_req(mdev, peer_req);
+		peer_req = NULL;
+		inc_rs_pending(mdev);
+		err = drbd_send_drequest_csum(mdev, sector, size,
+					      digest, digest_size,
+					      P_CSUM_RS_REQUEST);
+		kfree(digest);
+	} else {
+		dev_err(DEV, "kmalloc() of digest failed.\n");
+		err = -ENOMEM;
+	}
 
-	drbd_free_ee(mdev, e);
+out:
+	if (peer_req)
+		drbd_free_peer_req(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_drequest(..., csum) failed\n");
-	return ok;
+	return err;
 }
 
 #define GFP_TRY	(__GFP_HIGHMEM | __GFP_NOWARN)
 
 STATIC int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 {
-	struct drbd_epoch_entry *e;
+	struct drbd_peer_request *peer_req;
 
 	if (!get_ldev(mdev))
-		return 0;
+		return -EIO;
+
+	if (drbd_rs_should_slow_down(mdev, sector))
+		goto defer;
 
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
-	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
-	if (!e) {
-		put_ldev(mdev);
-		return 2;
-	}
+	peer_req = drbd_alloc_peer_req(mdev, ID_SYNCER /* unused */, sector,
+				       size, GFP_TRY);
+	if (!peer_req)
+		goto defer;
+
+	peer_req->w.cb = w_e_send_csum;
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_add(&peer_req->w.list, &mdev->read_ee);
+	spin_unlock_irq(&mdev->tconn->req_lock);
+
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
+	if (drbd_submit_peer_request(mdev, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
+		return 0;
 
-	spin_lock_irq(&mdev->req_lock);
-	list_add(&e->w.list, &mdev->read_ee);
-	spin_unlock_irq(&mdev->req_lock);
+	/* If it failed because of ENOMEM, retry should help.  If it failed
+	 * because bio_add_page failed (probably broken lower level driver),
+	 * retry may or may not help.
+	 * If it does not, you may need to force disconnect. */
+	spin_lock_irq(&mdev->tconn->req_lock);
+	list_del(&peer_req->w.list);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 
-	e->private_bio->bi_end_io = drbd_endio_read_sec;
-	e->private_bio->bi_rw = READ;
-	e->w.cb = w_e_send_csum;
+	drbd_free_peer_req(mdev, peer_req);
+defer:
+	put_ldev(mdev);
+	return -EAGAIN;
+}
 
-	mdev->read_cnt += size >> 9;
-	drbd_generic_make_request(mdev, DRBD_FAULT_RS_RD, e->private_bio);
+int w_resync_timer(struct drbd_work *w, int cancel)
+{
+	struct drbd_conf *mdev = w->mdev;
+	switch (mdev->state.conn) {
+	case C_VERIFY_S:
+		w_make_ov_request(w, cancel);
+		break;
+	case C_SYNC_TARGET:
+		w_make_resync_request(w, cancel);
+		break;
+	}
 
-	return 1;
+	return 0;
 }
 
 void resync_timer_fn(unsigned long data)
 {
-	unsigned long flags;
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
-	int queue;
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
+	if (list_empty(&mdev->resync_work.list))
+		drbd_queue_work(&mdev->tconn->data.work, &mdev->resync_work);
+}
 
-	if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
-		queue = 1;
-		if (mdev->state.conn == C_VERIFY_S)
-			mdev->resync_work.cb = w_make_ov_request;
-		else
-			mdev->resync_work.cb = w_make_resync_request;
-	} else {
-		queue = 0;
-		mdev->resync_work.cb = w_resync_inactive;
+static void fifo_set(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] = value;
+}
+
+static int fifo_push(struct fifo_buffer *fb, int value)
+{
+	int ov;
+
+	ov = fb->values[fb->head_index];
+	fb->values[fb->head_index++] = value;
+
+	if (fb->head_index >= fb->size)
+		fb->head_index = 0;
+
+	return ov;
+}
+
+static void fifo_add_val(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] += value;
+}
+
+struct fifo_buffer *fifo_alloc(int fifo_size)
+{
+	struct fifo_buffer *fb;
+
+	fb = kzalloc(sizeof(struct fifo_buffer) + sizeof(int) * fifo_size, GFP_KERNEL);
+	if (!fb)
+		return NULL;
+
+	fb->head_index = 0;
+	fb->size = fifo_size;
+	fb->total = 0;
+
+	return fb;
+}
+
+STATIC int drbd_rs_controller(struct drbd_conf *mdev)
+{
+	struct disk_conf *dc;
+	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
+	unsigned int want;     /* The number of sectors we want in the proxy */
+	int req_sect; /* Number of sectors to request in this turn */
+	int correction; /* Number of sectors more we need in the proxy*/
+	int cps; /* correction per invocation of drbd_rs_controller() */
+	int steps; /* Number of time steps to plan ahead */
+	int curr_corr;
+	int max_sect;
+	struct fifo_buffer *plan;
+
+	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
+	mdev->rs_in_flight -= sect_in;
+
+	dc = rcu_dereference(mdev->ldev->disk_conf);
+	plan = rcu_dereference(mdev->rs_plan_s);
+
+	steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+
+	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
+		want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
+	} else { /* normal path */
+		want = dc->c_fill_target ? dc->c_fill_target :
+			sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
 	}
 
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	correction = want - mdev->rs_in_flight - plan->total;
+
+	/* Plan ahead */
+	cps = correction / steps;
+	fifo_add_val(plan, cps);
+	plan->total += cps * steps;
+
+	/* What we do in this step */
+	curr_corr = fifo_push(plan, 0);
+	plan->total -= curr_corr;
+
+	req_sect = sect_in + curr_corr;
+	if (req_sect < 0)
+		req_sect = 0;
+
+	max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
+	if (req_sect > max_sect)
+		req_sect = max_sect;
+
+	/*
+	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
+		 sect_in, mdev->rs_in_flight, want, correction,
+		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
+	*/
+
+	return req_sect;
+}
+
+STATIC int drbd_rs_number_requests(struct drbd_conf *mdev)
+{
+	int number;
+
+	rcu_read_lock();
+	if (rcu_dereference(mdev->rs_plan_s)->size) {
+		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+	} else {
+		mdev->c_sync_rate = rcu_dereference(mdev->ldev->disk_conf)->resync_rate;
+		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+	}
+	rcu_read_unlock();
 
-	/* harmless race: list_empty outside data.work.q_lock */
-	if (list_empty(&mdev->resync_work.list) && queue)
-		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
+	/* ignore the amount of pending requests, the resync controller should
+	 * throttle down to incoming reply rate soon enough anyways. */
+	return number;
 }
 
-int w_make_resync_request(struct drbd_conf *mdev,
-		struct drbd_work *w, int cancel)
+int w_make_resync_request(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	unsigned long bit;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-	int max_segment_size = queue_max_segment_size(mdev->rq_queue);
-	int number, i, size, pe, mx;
+	int max_bio_size;
+	int number, rollback_i, size;
 	int align, queued, sndbuf;
+	int i = 0;
 
-	PARANOIA_BUG_ON(w != &mdev->resync_work);
+#ifdef PARANOIA
+	BUG_ON(w != &mdev->resync_work);
+#endif
 
 	if (unlikely(cancel))
-		return 1;
+		return 0;
 
-	if (unlikely(mdev->state.conn < C_CONNECTED)) {
-		dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
+	if (mdev->rs_total == 0) {
+		/* empty resync? */
+		drbd_resync_finished(mdev);
 		return 0;
 	}
 
-	if (mdev->state.conn != C_SYNC_TARGET)
-		dev_err(DEV, "%s in w_make_resync_request\n",
-			drbd_conn_str(mdev->state.conn));
-
 	if (!get_ldev(mdev)) {
 		/* Since we only need to access mdev->rsync a
 		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
 		   to continue resync with a broken disk makes no sense at
 		   all */
 		dev_err(DEV, "Disk broke down during resync!\n");
-		mdev->resync_work.cb = w_resync_inactive;
-		return 1;
+		return 0;
 	}
 
-	number = SLEEP_TIME * mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
-	pe = atomic_read(&mdev->rs_pending_cnt);
-
-	mutex_lock(&mdev->data.mutex);
-	if (mdev->data.socket)
-		mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
-	else
-		mx = 1;
-	mutex_unlock(&mdev->data.mutex);
-
-	/* For resync rates >160MB/sec, allow more pending RS requests */
-	if (number > mx)
-		mx = number;
-
-	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
-	if ((pe + number) > mx) {
-		number = mx - pe;
-	}
+	max_bio_size = queue_max_hw_sectors(mdev->rq_queue) << 9;
+	number = drbd_rs_number_requests(mdev);
+	if (number == 0)
+		goto requeue;
 
 	for (i = 0; i < number; i++) {
 		/* Stop generating RS requests, when half of the send buffer is filled */
-		mutex_lock(&mdev->data.mutex);
-		if (mdev->data.socket) {
-			queued = mdev->data.socket->sk->sk_wmem_queued;
-			sndbuf = mdev->data.socket->sk->sk_sndbuf;
+		mutex_lock(&mdev->tconn->data.mutex);
+		if (mdev->tconn->data.socket) {
+			queued = mdev->tconn->data.socket->sk->sk_wmem_queued;
+			sndbuf = mdev->tconn->data.socket->sk->sk_sndbuf;
 		} else {
 			queued = 1;
 			sndbuf = 0;
 		}
-		mutex_unlock(&mdev->data.mutex);
+		mutex_unlock(&mdev->tconn->data.mutex);
 		if (queued > sndbuf / 2)
 			goto requeue;
 
@@ -513,16 +628,16 @@
 		size = BM_BLOCK_SIZE;
 		bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
 
-		if (bit == -1UL) {
+		if (bit == DRBD_END_OF_BITMAP) {
 			mdev->bm_resync_fo = drbd_bm_bits(mdev);
-			mdev->resync_work.cb = w_resync_inactive;
 			put_ldev(mdev);
-			return 1;
+			return 0;
 		}
 
 		sector = BM_BIT_TO_SECT(bit);
 
-		if (drbd_try_rs_begin_io(mdev, sector)) {
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
 			mdev->bm_resync_fo = bit;
 			goto requeue;
 		}
@@ -533,22 +648,17 @@
 			goto next_sector;
 		}
 
-#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
 		/* try to find some adjacent bits.
 		 * we stop if we have already the maximum req size.
 		 *
 		 * Additionally always align bigger requests, in order to
 		 * be prepared for all stripe sizes of software RAIDs.
-		 *
-		 * we _do_ care about the agreed-upon q->max_segment_size
-		 * here, as splitting up the requests on the other side is more
-		 * difficult.  the consequence is, that on lvm and md and other
-		 * "indirect" devices, this is dead code, since
-		 * q->max_segment_size will be PAGE_SIZE.
 		 */
 		align = 1;
+		rollback_i = i;
 		for (;;) {
-			if (size + BM_BLOCK_SIZE > max_segment_size)
+			if (size + BM_BLOCK_SIZE > max_bio_size)
 				break;
 
 			/* Be always aligned */
@@ -580,25 +690,33 @@
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
+		if (mdev->tconn->agreed_pro_version >= 89 && mdev->tconn->csums_tfm) {
 			switch (read_for_csum(mdev, sector, size)) {
-			case 0: /* Disk failure*/
+			case -EIO: /* Disk failure */
 				put_ldev(mdev);
-				return 0;
-			case 2: /* Allocation failed */
+				return -EIO;
+			case -EAGAIN: /* allocation failed, or ldev busy */
 				drbd_rs_complete_io(mdev, sector);
 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+				i = rollback_i;
 				goto requeue;
-			/* case 1: everything ok */
+			case 0:
+				/* everything ok */
+				break;
+			default:
+				BUG();
 			}
 		} else {
+			int err;
+
 			inc_rs_pending(mdev);
-			if (!drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
-					       sector, size, ID_SYNCER)) {
+			err = drbd_send_drequest(mdev, P_RS_DATA_REQUEST,
+						 sector, size, ID_SYNCER);
+			if (err) {
 				dev_err(DEV, "drbd_send_drequest() failed, aborting...\n");
 				dec_rs_pending(mdev);
 				put_ldev(mdev);
-				return 0;
+				return err;
 			}
 		}
 	}
@@ -610,19 +728,20 @@
 		 * resync data block, and the last bit is cleared.
 		 * until then resync "work" is "inactive" ...
 		 */
-		mdev->resync_work.cb = w_resync_inactive;
 		put_ldev(mdev);
-		return 1;
+		return 0;
 	}
 
  requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	put_ldev(mdev);
-	return 1;
+	return 0;
 }
 
-STATIC int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+STATIC int w_make_ov_request(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	int number, i, size;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
@@ -630,27 +749,18 @@
 	if (unlikely(cancel))
 		return 1;
 
-	if (unlikely(mdev->state.conn < C_CONNECTED)) {
-		dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
-		return 0;
-	}
-
-	number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
-	if (atomic_read(&mdev->rs_pending_cnt) > number)
-		goto requeue;
-
-	number -= atomic_read(&mdev->rs_pending_cnt);
+	number = drbd_rs_number_requests(mdev);
 
 	sector = mdev->ov_position;
 	for (i = 0; i < number; i++) {
 		if (sector >= capacity) {
-			mdev->resync_work.cb = w_resync_inactive;
 			return 1;
 		}
 
 		size = BM_BLOCK_SIZE;
 
-		if (drbd_try_rs_begin_io(mdev, sector)) {
+		if (drbd_rs_should_slow_down(mdev, sector) ||
+		    drbd_try_rs_begin_io(mdev, sector)) {
 			mdev->ov_position = sector;
 			goto requeue;
 		}
@@ -659,7 +769,7 @@
 			size = (capacity-sector)<<9;
 
 		inc_rs_pending(mdev);
-		if (!drbd_send_ov_request(mdev, sector, size)) {
+		if (drbd_send_ov_request(mdev, sector, size)) {
 			dec_rs_pending(mdev);
 			return 0;
 		}
@@ -668,27 +778,39 @@
 	mdev->ov_position = sector;
 
  requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	return 1;
 }
 
-
-int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_ov_finished(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	kfree(w);
-	ov_oos_print(mdev);
+	ov_out_of_sync_print(mdev);
 	drbd_resync_finished(mdev);
 
-	return 1;
+	return 0;
 }
 
-STATIC int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+STATIC int w_resync_finished(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
 	kfree(w);
 
 	drbd_resync_finished(mdev);
 
-	return 1;
+	return 0;
+}
+
+STATIC void ping_peer(struct drbd_conf *mdev)
+{
+	struct drbd_tconn *tconn = mdev->tconn;
+
+	clear_bit(GOT_PING_ACK, &tconn->flags);
+	request_ping(tconn);
+	wait_event(tconn->ping_wait,
+		   test_bit(GOT_PING_ACK, &tconn->flags) || mdev->state.conn < C_CONNECTED);
 }
 
 int drbd_resync_finished(struct drbd_conf *mdev)
@@ -698,6 +820,7 @@
 	union drbd_state os, ns;
 	struct drbd_work *w;
 	char *khelper_cmd = NULL;
+	int verify_done = 0;
 
 	/* Remove all elements from the resync LRU. Since future actions
 	 * might set bits in the (main) bitmap, then the entries in the
@@ -708,13 +831,12 @@
 		 * queue (or even the read operations for those packets
 		 * is not finished by now).   Retry in 100ms. */
 
-		drbd_kick_lo(mdev);
-		__set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 10);
+		schedule_timeout_interruptible(HZ / 10);
 		w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
 		if (w) {
 			w->cb = w_resync_finished;
-			drbd_queue_work(&mdev->data.work, w);
+			w->mdev = mdev;
+			drbd_queue_work(&mdev->tconn->data.work, w);
 			return 1;
 		}
 		dev_err(DEV, "Warn failed to drbd_rs_del_all() and to kmalloc(w).\n");
@@ -730,8 +852,12 @@
 	if (!get_ldev(mdev))
 		goto out;
 
-	spin_lock_irq(&mdev->req_lock);
-	os = mdev->state;
+	ping_peer(mdev);
+
+	spin_lock_irq(&mdev->tconn->req_lock);
+	os = drbd_read_state(mdev);
+
+	verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
 
 	/* This protects us against multiple calls (that can happen in the presence
 	   of application IO), and against connectivity loss just before we arrive here. */
@@ -742,8 +868,7 @@
 	ns.conn = C_CONNECTED;
 
 	dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-	     (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
-	     "Online verify " : "Resync",
+	     verify_done ? "Online verify " : "Resync",
 	     dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
 	n_oos = drbd_bm_total_weight(mdev);
@@ -760,13 +885,13 @@
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (mdev->csums_tfm && mdev->rs_total) {
+		if (mdev->tconn->csums_tfm && mdev->rs_total) {
 			const unsigned long s = mdev->rs_same_csum;
 			const unsigned long t = mdev->rs_total;
 			const int ratio =
 				(t == 0)     ? 0 :
 			(t < 100000) ? ((s*100)/t) : (s/(t/100));
-			dev_info(DEV, "%u %% had equal check sums, eliminated: %luK; "
+			dev_info(DEV, "%u %% had equal checksums, eliminated: %luK; "
 			     "transferred %luK total %luK\n",
 			     ratio,
 			     Bit2KB(mdev->rs_same_csum),
@@ -801,32 +926,33 @@
 			}
 		}
 
-		drbd_uuid_set_bm(mdev, 0UL);
-
-		if (mdev->p_uuid) {
-			/* Now the two UUID sets are equal, update what we
-			 * know of the peer. */
-			int i;
-			for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
-				mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+		if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+			/* for verify runs, we don't update uuids here,
+			 * so there would be nothing to report. */
+			drbd_uuid_set_bm(mdev, 0UL);
+			drbd_print_uuids(mdev, "updated UUIDs");
+			if (mdev->p_uuid) {
+				/* Now the two UUID sets are equal, update what we
+				 * know of the peer. */
+				int i;
+				for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+					mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+			}
 		}
 	}
 
-	DRBD_STATE_DEBUG_INIT_VAL(ns);
 	_drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
 out_unlock:
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 	put_ldev(mdev);
 out:
 	mdev->rs_total  = 0;
 	mdev->rs_failed = 0;
 	mdev->rs_paused = 0;
-	mdev->ov_start_sector = 0;
+	if (verify_done)
+		mdev->ov_start_sector = 0;
 
-	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
-		dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
-		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
-	}
+	drbd_md_sync(mdev);
 
 	if (khelper_cmd)
 		drbd_khelper(mdev, khelper_cmd);
@@ -835,15 +961,19 @@
 }
 
 /* helper */
-static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
 {
-	if (drbd_bio_has_active_page(e->private_bio)) {
+	if (drbd_peer_req_has_active_page(peer_req)) {
 		/* This might happen if sendpage() has not finished */
-		spin_lock_irq(&mdev->req_lock);
-		list_add_tail(&e->w.list, &mdev->net_ee);
-		spin_unlock_irq(&mdev->req_lock);
+		int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
+		atomic_add(i, &mdev->pp_in_use_by_net);
+		atomic_sub(i, &mdev->pp_in_use);
+		spin_lock_irq(&mdev->tconn->req_lock);
+		list_add_tail(&peer_req->w.list, &mdev->net_ee);
+		spin_unlock_irq(&mdev->tconn->req_lock);
+		wake_up(&drbd_pp_wait);
 	} else
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 }
 
 /**
@@ -852,182 +982,203 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_e_end_data_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_data_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
-	int ok;
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
-		ok = drbd_send_block(mdev, P_DATA_REPLY, e);
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		err = drbd_send_block(mdev, P_DATA_REPLY, peer_req);
 	} else {
 		if (DRBD_ratelimit(5*HZ, 5))
 			dev_err(DEV, "Sending NegDReply. sector=%llus.\n",
-			    (unsigned long long)e->sector);
+			    (unsigned long long)peer_req->i.sector);
 
-		ok = drbd_send_ack(mdev, P_NEG_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_DREPLY, peer_req);
 	}
 
 	dec_unacked(mdev);
 
-	move_to_net_ee_or_free(mdev, e);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block() failed\n");
-	return ok;
+	return err;
 }
 
 /**
- * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUESTRS
+ * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
  * @mdev:	DRBD device.
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
-	int ok;
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
 	if (get_ldev_if_state(mdev, D_FAILED)) {
-		drbd_rs_complete_io(mdev, e->sector);
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
 		put_ldev(mdev);
 	}
 
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
+	if (mdev->state.conn == C_AHEAD) {
+		err = drbd_send_ack(mdev, P_RS_CANCEL, peer_req);
+	} else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 		if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
 			inc_rs_pending(mdev);
-			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
 		} else {
 			if (DRBD_ratelimit(5*HZ, 5))
 				dev_err(DEV, "Not sending RSDataReply, "
 				    "partner DISKLESS!\n");
-			ok = 1;
+			err = 0;
 		}
 	} else {
 		if (DRBD_ratelimit(5*HZ, 5))
 			dev_err(DEV, "Sending NegRSDReply. sector %llus.\n",
-			    (unsigned long long)e->sector);
+			    (unsigned long long)peer_req->i.sector);
 
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
 
 		/* update resync data with failure */
-		drbd_rs_failed_io(mdev, e->sector, e->size);
+		drbd_rs_failed_io(mdev, peer_req->i.sector, peer_req->i.size);
 	}
 
 	dec_unacked(mdev);
 
-	move_to_net_ee_or_free(mdev, e);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block() failed\n");
-	return ok;
+	return err;
 }
 
-int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	struct digest_info *di;
 	int digest_size;
 	void *digest = NULL;
-	int ok, eq = 0;
+	int err, eq = 0;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
-	drbd_rs_complete_io(mdev, e->sector);
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
+		put_ldev(mdev);
+	}
 
-	di = (struct digest_info *)(unsigned long)e->block_id;
+	di = peer_req->digest;
 
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 		/* quick hack to try to avoid a race against reconfiguration.
 		 * a real fix would be much more involved,
 		 * introducing more locking mechanisms */
-		if (mdev->csums_tfm) {
-			digest_size = crypto_hash_digestsize(mdev->csums_tfm);
+		if (mdev->tconn->csums_tfm) {
+			digest_size = crypto_hash_digestsize(mdev->tconn->csums_tfm);
 			D_ASSERT(digest_size == di->digest_size);
 			digest = kmalloc(digest_size, GFP_NOIO);
 		}
 		if (digest) {
-			drbd_csum(mdev, mdev->csums_tfm, e->private_bio, digest);
+			drbd_csum_ee(mdev, mdev->tconn->csums_tfm, peer_req, digest);
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
 
 		if (eq) {
-			drbd_set_in_sync(mdev, e->sector, e->size);
-			mdev->rs_same_csum++;
-			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
+			drbd_set_in_sync(mdev, peer_req->i.sector, peer_req->i.size);
+			/* rs_same_csums unit is BM_BLOCK_SIZE */
+			mdev->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
+			err = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, peer_req);
 		} else {
 			inc_rs_pending(mdev);
-			e->block_id = ID_SYNCER;
-			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
+			peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
+			peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
+			kfree(di);
+			err = drbd_send_block(mdev, P_RS_DATA_REPLY, peer_req);
 		}
 	} else {
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
+		err = drbd_send_ack(mdev, P_NEG_RS_DREPLY, peer_req);
 		if (DRBD_ratelimit(5*HZ, 5))
 			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
 	}
 
 	dec_unacked(mdev);
+	move_to_net_ee_or_free(mdev, peer_req);
 
-	kfree(di);
-
-	move_to_net_ee_or_free(mdev, e);
-
-	if (unlikely(!ok))
+	if (unlikely(err))
 		dev_err(DEV, "drbd_send_block/ack() failed\n");
-	return ok;
+	return err;
 }
 
-int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_ov_req(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	sector_t sector = peer_req->i.sector;
+	unsigned int size = peer_req->i.size;
 	int digest_size;
 	void *digest;
-	int ok = 1;
+	int err = 0;
 
 	if (unlikely(cancel))
 		goto out;
 
-	if (unlikely(!drbd_bio_uptodate(e->private_bio)))
-		goto out;
-
-	digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 	/* FIXME if this allocation fails, online verify will not terminate! */
 	digest = kmalloc(digest_size, GFP_NOIO);
-	if (digest) {
-		drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
-		inc_rs_pending(mdev);
-		ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
-					     digest, digest_size, P_OV_REPLY);
-		if (!ok)
-			dec_rs_pending(mdev);
-		kfree(digest);
+	if (!digest) {
+		err = -ENOMEM;
+		goto out;
 	}
 
-out:
-	drbd_free_ee(mdev, e);
+	if (!(peer_req->flags & EE_WAS_ERROR))
+		drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
+	else
+		memset(digest, 0, digest_size);
 
-	dec_unacked(mdev);
+	/* Free peer_req and pages before send.
+	 * In case we block on congestion, we could otherwise run into
+	 * some distributed deadlock, if the other side blocks on
+	 * congestion as well, because our receiver blocks in
+	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+	drbd_free_peer_req(mdev, peer_req);
+	peer_req = NULL;
+
+	inc_rs_pending(mdev);
+	err = drbd_send_drequest_csum(mdev, sector, size, digest, digest_size, P_OV_REPLY);
+	if (err)
+		dec_rs_pending(mdev);
+	kfree(digest);
 
-	return ok;
+out:
+	if (peer_req)
+		drbd_free_peer_req(mdev, peer_req);
+	dec_unacked(mdev);
+	return err;
 }
 
-void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
+void drbd_ov_out_of_sync_found(struct drbd_conf *mdev, sector_t sector, int size)
 {
 	if (mdev->ov_last_oos_start + mdev->ov_last_oos_size == sector) {
 		mdev->ov_last_oos_size += size>>9;
@@ -1036,110 +1187,142 @@
 		mdev->ov_last_oos_size = size>>9;
 	}
 	drbd_set_out_of_sync(mdev, sector, size);
-	set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 }
 
-int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_e_end_ov_reply(struct drbd_work *w, int cancel)
 {
-	struct drbd_epoch_entry *e = container_of(w, struct drbd_epoch_entry, w);
+	struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
+	struct drbd_conf *mdev = w->mdev;
 	struct digest_info *di;
-	int digest_size;
 	void *digest;
-	int ok, eq = 0;
+	sector_t sector = peer_req->i.sector;
+	unsigned int size = peer_req->i.size;
+	int digest_size;
+	int err, eq = 0;
 
 	if (unlikely(cancel)) {
-		drbd_free_ee(mdev, e);
+		drbd_free_peer_req(mdev, peer_req);
 		dec_unacked(mdev);
-		return 1;
+		return 0;
 	}
 
 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
 	 * the resync lru has been cleaned up already */
-	drbd_rs_complete_io(mdev, e->sector);
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, peer_req->i.sector);
+		put_ldev(mdev);
+	}
 
-	di = (struct digest_info *)(unsigned long)e->block_id;
+	di = peer_req->digest;
 
-	if (likely(drbd_bio_uptodate(e->private_bio))) {
-		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
+	if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
+		digest_size = crypto_hash_digestsize(mdev->tconn->verify_tfm);
 		digest = kmalloc(digest_size, GFP_NOIO);
 		if (digest) {
-			drbd_csum(mdev, mdev->verify_tfm, e->private_bio, digest);
+			drbd_csum_ee(mdev, mdev->tconn->verify_tfm, peer_req, digest);
 
 			D_ASSERT(digest_size == di->digest_size);
 			eq = !memcmp(digest, di->digest, digest_size);
 			kfree(digest);
 		}
-	} else {
-		ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
-		if (DRBD_ratelimit(5*HZ, 5))
-			dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
 	}
 
-	dec_unacked(mdev);
-
-	kfree(di);
-
+	/* Free peer_req and pages before send.
+	 * In case we block on congestion, we could otherwise run into
+	 * some distributed deadlock, if the other side blocks on
+	 * congestion as well, because our receiver blocks in
+	 * drbd_alloc_pages due to pp_in_use > max_buffers. */
+	drbd_free_peer_req(mdev, peer_req);
 	if (!eq)
-		drbd_ov_oos_found(mdev, e->sector, e->size);
+		drbd_ov_out_of_sync_found(mdev, sector, size);
 	else
-		ov_oos_print(mdev);
+		ov_out_of_sync_print(mdev);
 
-	ok = drbd_send_ack_ex(mdev, P_OV_RESULT, e->sector, e->size,
-			      eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+	err = drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size,
+			       eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
+
+	dec_unacked(mdev);
 
-	drbd_free_ee(mdev, e);
+	--mdev->ov_left;
 
-	if (--mdev->ov_left == 0) {
-		ov_oos_print(mdev);
+	/* let's advance progress step marks only for every other megabyte */
+	if ((mdev->ov_left & 0x200) == 0x200)
+		drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+	if (mdev->ov_left == 0) {
+		ov_out_of_sync_print(mdev);
 		drbd_resync_finished(mdev);
 	}
 
-	return ok;
+	return err;
 }
 
-int w_prev_work_done(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_prev_work_done(struct drbd_work *w, int cancel)
 {
 	struct drbd_wq_barrier *b = container_of(w, struct drbd_wq_barrier, w);
+
 	complete(&b->done);
-	return 1;
+	return 0;
 }
 
-int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_barrier(struct drbd_work *w, int cancel)
 {
+	struct drbd_socket *sock;
 	struct drbd_tl_epoch *b = container_of(w, struct drbd_tl_epoch, w);
-	struct p_barrier *p = &mdev->data.sbuf.barrier;
-	int ok = 1;
+	struct drbd_conf *mdev = w->mdev;
+	struct p_barrier *p;
 
 	/* really avoid racing with tl_clear.  w.cb may have been referenced
 	 * just before it was reassigned and re-queued, so double check that.
 	 * actually, this race was harmless, since we only try to send the
 	 * barrier packet here, and otherwise do nothing with the object.
 	 * but compare with the head of w_clear_epoch */
-	spin_lock_irq(&mdev->req_lock);
+	spin_lock_irq(&mdev->tconn->req_lock);
 	if (w->cb != w_send_barrier || mdev->state.conn < C_CONNECTED)
 		cancel = 1;
-	spin_unlock_irq(&mdev->req_lock);
+	spin_unlock_irq(&mdev->tconn->req_lock);
 	if (cancel)
-		return 1;
-
-	if (!drbd_get_data_sock(mdev))
 		return 0;
+
+	sock = &mdev->tconn->data;
+	p = drbd_prepare_command(mdev, sock);
+	if (!p)
+		return -EIO;
 	p->barrier = b->br_number;
 	/* inc_ap_pending was done where this was queued.
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * or (on connection loss) in w_clear_epoch.  */
-	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
-				(struct p_header *)p, sizeof(*p), 0);
-	drbd_put_data_sock(mdev);
-
-	return ok;
+	return drbd_send_command(mdev, sock, P_BARRIER, sizeof(*p), NULL, 0);
 }
 
-int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_write_hint(struct drbd_work *w, int cancel)
 {
+	struct drbd_conf *mdev = w->mdev;
+	struct drbd_socket *sock;
+
 	if (cancel)
-		return 1;
-	return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
+		return 0;
+	sock = &mdev->tconn->data;
+	if (!drbd_prepare_command(mdev, sock))
+		return -EIO;
+	return drbd_send_command(mdev, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
+}
+
+int w_send_out_of_sync(struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	struct drbd_conf *mdev = w->mdev;
+	int err;
+
+	if (unlikely(cancel)) {
+		req_mod(req, SEND_CANCELED);
+		return 0;
+	}
+
+	err = drbd_send_out_of_sync(mdev, req);
+	req_mod(req, OOS_HANDED_TO_NETWORK);
+
+	return err;
 }
 
 /**
@@ -1148,20 +1331,21 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_send_dblock(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_dblock(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
-	int ok;
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		req_mod(req, send_canceled);
-		return 1;
+		req_mod(req, SEND_CANCELED);
+		return 0;
 	}
 
-	ok = drbd_send_dblock(mdev, req);
-	req_mod(req, ok ? handed_over_to_network : send_failed);
+	err = drbd_send_dblock(mdev, req);
+	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
 
-	return ok;
+	return err;
 }
 
 /**
@@ -1170,39 +1354,56 @@
  * @w:		work object.
  * @cancel:	The connection will be closed anyways
  */
-int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+int w_send_read_req(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
-	int ok;
+	struct drbd_conf *mdev = w->mdev;
+	int err;
 
 	if (unlikely(cancel)) {
-		req_mod(req, send_canceled);
-		return 1;
+		req_mod(req, SEND_CANCELED);
+		return 0;
 	}
 
-	ok = drbd_send_drequest(mdev, P_DATA_REQUEST, req->sector, req->size,
-				(unsigned long)req);
+	err = drbd_send_drequest(mdev, P_DATA_REQUEST, req->i.sector, req->i.size,
+				 (unsigned long)req);
 
-	if (!ok) {
-		/* ?? we set C_TIMEOUT or C_BROKEN_PIPE in drbd_send();
-		 * so this is probably redundant */
-		if (mdev->state.conn >= C_CONNECTED)
-			drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
-	}
-	req_mod(req, ok ? handed_over_to_network : send_failed);
+	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
+
+	return err;
+}
+
+int w_restart_disk_io(struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+	struct drbd_conf *mdev = w->mdev;
 
-	return ok;
+	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
+		drbd_al_begin_io(mdev, &req->i);
+
+	drbd_req_make_private_bio(req, req->master_bio);
+	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+	generic_make_request(req->private_bio);
+
+	return 0;
 }
 
 STATIC int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 	struct drbd_conf *odev = mdev;
+	int resync_after;
 
 	while (1) {
-		if (odev->sync_conf.after == -1)
+		if (!odev->ldev)
+			return 1;
+		rcu_read_lock();
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		rcu_read_unlock();
+		if (resync_after == -1)
+			return 1;
+		odev = minor_to_mdev(resync_after);
+		if (!expect(odev))
 			return 1;
-		odev = minor_to_mdev(odev->sync_conf.after);
-		ERR_IF(!odev) return 1;
 		if ((odev->state.conn >= C_SYNC_SOURCE &&
 		     odev->state.conn <= C_PAUSED_SYNC_T) ||
 		    odev->state.aftr_isp || odev->state.peer_isp ||
@@ -1222,16 +1423,15 @@
 	struct drbd_conf *odev;
 	int i, rv = 0;
 
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev)
-			continue;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, odev, i) {
 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
 			continue;
 		if (!_drbd_may_sync_now(odev))
 			rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
 			       != SS_NOTHING_TO_DO);
 	}
+	rcu_read_unlock();
 
 	return rv;
 }
@@ -1247,10 +1447,8 @@
 	struct drbd_conf *odev;
 	int i, rv = 0;
 
-	for (i = 0; i < minor_count; i++) {
-		odev = minor_to_mdev(i);
-		if (!odev)
-			continue;
+	rcu_read_lock();
+	idr_for_each_entry(&minors, odev, i) {
 		if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
 			continue;
 		if (odev->state.aftr_isp) {
@@ -1260,6 +1458,7 @@
 				       != SS_NOTHING_TO_DO) ;
 		}
 	}
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -1277,46 +1476,86 @@
 	write_unlock_irq(&global_state_lock);
 }
 
-static int sync_after_error(struct drbd_conf *mdev, int o_minor)
+/* caller must hold global_state_lock */
+enum drbd_ret_code drbd_resync_after_valid(struct drbd_conf *mdev, int o_minor)
 {
 	struct drbd_conf *odev;
+	int resync_after;
 
 	if (o_minor == -1)
 		return NO_ERROR;
 	if (o_minor < -1 || minor_to_mdev(o_minor) == NULL)
-		return ERR_SYNC_AFTER;
+		return ERR_RESYNC_AFTER;
 
 	/* check for loops */
 	odev = minor_to_mdev(o_minor);
 	while (1) {
 		if (odev == mdev)
-			return ERR_SYNC_AFTER_CYCLE;
+			return ERR_RESYNC_AFTER_CYCLE;
 
+		rcu_read_lock();
+		resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
+		rcu_read_unlock();
 		/* dependency chain ends here, no cycles. */
-		if (odev->sync_conf.after == -1)
+		if (resync_after == -1)
 			return NO_ERROR;
 
 		/* follow the dependency chain */
-		odev = minor_to_mdev(odev->sync_conf.after);
+		odev = minor_to_mdev(resync_after);
 	}
 }
 
-int drbd_alter_sa(struct drbd_conf *mdev, int na)
+/* caller must hold global_state_lock */
+void drbd_resync_after_changed(struct drbd_conf *mdev)
 {
 	int changes;
-	int retcode;
 
-	write_lock_irq(&global_state_lock);
-	retcode = sync_after_error(mdev, na);
-	if (retcode == NO_ERROR) {
-		mdev->sync_conf.after = na;
-		do {
-			changes  = _drbd_pause_after(mdev);
-			changes |= _drbd_resume_next(mdev);
-		} while (changes);
+	do {
+		changes  = _drbd_pause_after(mdev);
+		changes |= _drbd_resume_next(mdev);
+	} while (changes);
+}
+
+void drbd_rs_controller_reset(struct drbd_conf *mdev)
+{
+	struct fifo_buffer *plan;
+
+	atomic_set(&mdev->rs_sect_in, 0);
+	atomic_set(&mdev->rs_sect_ev, 0);
+	mdev->rs_in_flight = 0;
+
+	/* Updating the RCU protected object in place is necessary since
+	   this function gets called from atomic context.
+	   It is valid since all other updates also lead to an completely
+	   empty fifo */
+	rcu_read_lock();
+	plan = rcu_dereference(mdev->rs_plan_s);
+	plan->total = 0;
+	fifo_set(plan, 0);
+	rcu_read_unlock();
+}
+
+void start_resync_timer_fn(unsigned long data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+	drbd_queue_work(&mdev->tconn->data.work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_work *w, int cancel)
+{
+	struct drbd_conf *mdev = w->mdev;
+
+	if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+		dev_warn(DEV, "w_start_resync later...\n");
+		mdev->start_resync_timer.expires = jiffies + HZ/10;
+		add_timer(&mdev->start_resync_timer);
+		return 0;
 	}
-	write_unlock_irq(&global_state_lock);
-	return retcode;
+
+	drbd_start_resync(mdev, C_SYNC_SOURCE);
+	clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
+	return 0;
 }
 
 /**
@@ -1332,52 +1571,71 @@
 	union drbd_state ns;
 	int r;
 
-	if (mdev->state.conn >= C_SYNC_SOURCE) {
+	if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
 		dev_err(DEV, "Resync already running!\n");
 		return;
 	}
 
-	trace_drbd_resync(mdev, TRACE_LVL_SUMMARY, "Resync starting: side=%s\n",
-			  side == C_SYNC_TARGET ? "SyncTarget" : "SyncSource");
-
-	/* In case a previous resync run was aborted by an IO error/detach on the peer. */
-	drbd_rs_cancel_all(mdev);
+	if (mdev->state.conn < C_AHEAD) {
+		/* In case a previous resync run was aborted by an IO error/detach on the peer. */
+		drbd_rs_cancel_all(mdev);
+		/* This should be done when we abort the resync. We definitely do not
+		   want to have this for connections going back and forth between
+		   Ahead/Behind and SyncSource/SyncTarget */
+	}
+
+	if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
+		if (side == C_SYNC_TARGET) {
+			/* Since application IO was locked out during C_WF_BITMAP_T and
+			   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+			   we check that we might make the data inconsistent. */
+			r = drbd_khelper(mdev, "before-resync-target");
+			r = (r >> 8) & 0xff;
+			if (r > 0) {
+				dev_info(DEV, "before-resync-target handler returned %d, "
+					 "dropping connection.\n", r);
+				conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+				return;
+			}
+		} else /* C_SYNC_SOURCE */ {
+			r = drbd_khelper(mdev, "before-resync-source");
+			r = (r >> 8) & 0xff;
+			if (r > 0) {
+				if (r == 3) {
+					dev_info(DEV, "before-resync-source handler returned %d, "
+						 "ignoring. Old userland tools?", r);
+				} else {
+					dev_info(DEV, "before-resync-source handler returned %d, "
+						 "dropping connection.\n", r);
+					conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
+					return;
+				}
+			}
+		}
+	}
 
-	if (side == C_SYNC_TARGET) {
-		/* Since application IO was locked out during C_WF_BITMAP_T and
-		   C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
-		   we check that we might make the data inconsistent. */
-		r = drbd_khelper(mdev, "before-resync-target");
-		r = (r >> 8) & 0xff;
-		if (r > 0) {
-			dev_info(DEV, "before-resync-target handler returned %d, "
-			     "dropping connection.\n", r);
-			drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+	if (current == mdev->tconn->worker.task) {
+		/* The worker should not sleep waiting for state_mutex,
+		   that can take long */
+		if (!mutex_trylock(mdev->state_mutex)) {
+			set_bit(B_RS_H_DONE, &mdev->flags);
+			mdev->start_resync_timer.expires = jiffies + HZ/5;
+			add_timer(&mdev->start_resync_timer);
 			return;
 		}
+	} else {
+		mutex_lock(mdev->state_mutex);
 	}
+	clear_bit(B_RS_H_DONE, &mdev->flags);
 
-	drbd_state_lock(mdev);
-
+	write_lock_irq(&global_state_lock);
 	if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
-		drbd_state_unlock(mdev);
+		write_unlock_irq(&global_state_lock);
+		mutex_unlock(mdev->state_mutex);
 		return;
 	}
 
-	if (side == C_SYNC_TARGET) {
-		mdev->bm_resync_fo = 0;
-	} else /* side == C_SYNC_SOURCE */ {
-		u64 uuid;
-
-		get_random_bytes(&uuid, sizeof(u64));
-		drbd_uuid_set(mdev, UI_BITMAP, uuid);
-		drbd_send_sync_uuid(mdev, uuid);
-
-		D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
-	}
-
-	write_lock_irq(&global_state_lock);
-	ns = mdev->state;
+	ns = drbd_read_state(mdev);
 
 	ns.aftr_isp = !_drbd_may_sync_now(mdev);
 
@@ -1388,42 +1646,75 @@
 	else /* side == C_SYNC_SOURCE */
 		ns.pdsk = D_INCONSISTENT;
 
-	DRBD_STATE_DEBUG_INIT_VAL(ns);
 	r = __drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
-	ns = mdev->state;
+	ns = drbd_read_state(mdev);
 
 	if (ns.conn < C_CONNECTED)
 		r = SS_UNKNOWN_ERROR;
 
 	if (r == SS_SUCCESS) {
-		mdev->rs_total     =
-		mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+		unsigned long tw = drbd_bm_total_weight(mdev);
+		unsigned long now = jiffies;
+		int i;
+
 		mdev->rs_failed    = 0;
 		mdev->rs_paused    = 0;
-		mdev->rs_start     =
-		mdev->rs_mark_time = jiffies;
 		mdev->rs_same_csum = 0;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->rs_total     = tw;
+		mdev->rs_start     = now;
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = tw;
+			mdev->rs_mark_time[i] = now;
+		}
 		_drbd_pause_after(mdev);
 	}
 	write_unlock_irq(&global_state_lock);
-	drbd_state_unlock(mdev);
-	put_ldev(mdev);
 
 	if (r == SS_SUCCESS) {
 		dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) mdev->rs_total);
+		if (side == C_SYNC_TARGET)
+			mdev->bm_resync_fo = 0;
 
-		if (mdev->rs_total == 0) {
-			/* Peer still reachable? Beware of failing before-resync-target handlers! */
-			request_ping(mdev);
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(mdev->net_conf->ping_timeo*HZ/9); /* 9 instead 10 */
+		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+		 * with w_send_oos, or the sync target will get confused as to
+		 * how much bits to resync.  We cannot do that always, because for an
+		 * empty resync and protocol < 95, we need to do it here, as we call
+		 * drbd_resync_finished from here in that case.
+		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+		 * and from after_state_ch otherwise. */
+		if (side == C_SYNC_SOURCE && mdev->tconn->agreed_pro_version < 96)
+			drbd_gen_and_send_sync_uuid(mdev);
+
+		if (mdev->tconn->agreed_pro_version < 95 && mdev->rs_total == 0) {
+			/* This still has a race (about when exactly the peers
+			 * detect connection loss) that can lead to a full sync
+			 * on next handshake. In 8.3.9 we fixed this with explicit
+			 * resync-finished notifications, but the fix
+			 * introduces a protocol change.  Sleeping for some
+			 * time longer than the ping interval + timeout on the
+			 * SyncSource, to give the SyncTarget the chance to
+			 * detect connection loss, then waiting for a ping
+			 * response (implicit in drbd_resync_finished) reduces
+			 * the race considerably, but does not solve it. */
+			if (side == C_SYNC_SOURCE) {
+				struct net_conf *nc;
+				int timeo;
+
+				rcu_read_lock();
+				nc = rcu_dereference(mdev->tconn->net_conf);
+				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
+				rcu_read_unlock();
+				schedule_timeout_interruptible(timeo);
+			}
 			drbd_resync_finished(mdev);
-			return;
 		}
 
+		drbd_rs_controller_reset(mdev);
 		/* ns.conn may already be != mdev->state.conn,
 		 * we may have been paused in between, or become paused until
 		 * the timer triggers.
@@ -1433,51 +1724,61 @@
 
 		drbd_md_sync(mdev);
 	}
+	put_ldev(mdev);
+	mutex_unlock(mdev->state_mutex);
 }
 
 int drbd_worker(struct drbd_thread *thi)
 {
-	struct drbd_conf *mdev = thi->mdev;
+	struct drbd_tconn *tconn = thi->tconn;
 	struct drbd_work *w = NULL;
+	struct drbd_conf *mdev;
+	struct net_conf *nc;
 	LIST_HEAD(work_list);
-	int intr = 0, i;
+	int vnr, intr = 0;
+	int cork;
 
-	sprintf(current->comm, "drbd%d_worker", mdev_to_minor(mdev));
+	while (get_t_state(thi) == RUNNING) {
+		drbd_thread_current_set_cpu(thi);
 
-	while (get_t_state(thi) == Running) {
-		drbd_thread_current_set_cpu(mdev);
+		if (down_trylock(&tconn->data.work.s)) {
+			mutex_lock(&tconn->data.mutex);
 
-		if (down_trylock(&mdev->data.work.s)) {
-			mutex_lock(&mdev->data.mutex);
-			if (mdev->data.socket && !mdev->net_conf->no_cork)
-				drbd_tcp_uncork(mdev->data.socket);
-			mutex_unlock(&mdev->data.mutex);
-
-			intr = down_interruptible(&mdev->data.work.s);
-
-			mutex_lock(&mdev->data.mutex);
-			if (mdev->data.socket  && !mdev->net_conf->no_cork)
-				drbd_tcp_cork(mdev->data.socket);
-			mutex_unlock(&mdev->data.mutex);
+			rcu_read_lock();
+			nc = rcu_dereference(tconn->net_conf);
+			cork = nc ? nc->tcp_cork : 0;
+			rcu_read_unlock();
+
+			if (tconn->data.socket && cork)
+				drbd_tcp_uncork(tconn->data.socket);
+			mutex_unlock(&tconn->data.mutex);
+
+			intr = down_interruptible(&tconn->data.work.s);
+
+			mutex_lock(&tconn->data.mutex);
+			if (tconn->data.socket  && cork)
+				drbd_tcp_cork(tconn->data.socket);
+			mutex_unlock(&tconn->data.mutex);
 		}
 
 		if (intr) {
-			D_ASSERT(intr == -EINTR);
 			flush_signals(current);
-			ERR_IF (get_t_state(thi) == Running)
+			if (get_t_state(thi) == RUNNING) {
+				conn_warn(tconn, "Worker got an unexpected signal\n");
 				continue;
+			}
 			break;
 		}
 
-		if (get_t_state(thi) != Running)
+		if (get_t_state(thi) != RUNNING)
 			break;
 		/* With this break, we have done a down() but not consumed
 		   the entry from the list. The cleanup code takes care of
 		   this...   */
 
 		w = NULL;
-		spin_lock_irq(&mdev->data.work.q_lock);
-		ERR_IF(list_empty(&mdev->data.work.q)) {
+		spin_lock_irq(&tconn->data.work.q_lock);
+		if (list_empty(&tconn->data.work.q)) {
 			/* something terribly wrong in our logic.
 			 * we were able to down() the semaphore,
 			 * but the list is empty... doh.
@@ -1489,57 +1790,52 @@
 			 *
 			 * I'll try to get away just starting over this loop.
 			 */
-			spin_unlock_irq(&mdev->data.work.q_lock);
+			conn_warn(tconn, "Work list unexpectedly empty\n");
+			spin_unlock_irq(&tconn->data.work.q_lock);
 			continue;
 		}
-		w = list_entry(mdev->data.work.q.next, struct drbd_work, list);
+		w = list_entry(tconn->data.work.q.next, struct drbd_work, list);
 		list_del_init(&w->list);
-		spin_unlock_irq(&mdev->data.work.q_lock);
+		spin_unlock_irq(&tconn->data.work.q_lock);
 
-		if (!w->cb(mdev, w, mdev->state.conn < C_CONNECTED)) {
+		if (w->cb(w, tconn->cstate < C_WF_REPORT_PARAMS)) {
 			/* dev_warn(DEV, "worker: a callback failed! \n"); */
-			if (mdev->state.conn >= C_CONNECTED)
-				drbd_force_state(mdev,
-						NS(conn, C_NETWORK_FAILURE));
+			if (tconn->cstate >= C_WF_REPORT_PARAMS)
+				conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 		}
 	}
-	D_ASSERT(test_bit(DEVICE_DYING, &mdev->flags));
-	D_ASSERT(test_bit(CONFIG_PENDING, &mdev->flags));
-
-	spin_lock_irq(&mdev->data.work.q_lock);
-	i = 0;
-	while (!list_empty(&mdev->data.work.q)) {
-		list_splice_init(&mdev->data.work.q, &work_list);
-		spin_unlock_irq(&mdev->data.work.q_lock);
+
+	spin_lock_irq(&tconn->data.work.q_lock);
+	while (!list_empty(&tconn->data.work.q)) {
+		list_splice_init(&tconn->data.work.q, &work_list);
+		spin_unlock_irq(&tconn->data.work.q_lock);
 
 		while (!list_empty(&work_list)) {
 			w = list_entry(work_list.next, struct drbd_work, list);
 			list_del_init(&w->list);
-			w->cb(mdev, w, 1);
-			i++; /* dead debugging code */
+			w->cb(w, 1);
 		}
 
-		spin_lock_irq(&mdev->data.work.q_lock);
+		spin_lock_irq(&tconn->data.work.q_lock);
 	}
-	sema_init(&mdev->data.work.s, 0);
+	sema_init(&tconn->data.work.s, 0);
 	/* DANGEROUS race: if someone did queue his work within the spinlock,
 	 * but up() ed outside the spinlock, we could get an up() on the
 	 * semaphore without corresponding list entry.
 	 * So don't do that.
 	 */
-	spin_unlock_irq(&mdev->data.work.q_lock);
+	spin_unlock_irq(&tconn->data.work.q_lock);
 
-	D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
-	/* _drbd_set_state only uses stop_nowait.
-	 * wait here for the Exiting receiver. */
-	drbd_thread_stop(&mdev->receiver);
-	drbd_mdev_cleanup(mdev);
-
-	dev_info(DEV, "worker terminated\n");
-
-	clear_bit(DEVICE_DYING, &mdev->flags);
-	clear_bit(CONFIG_PENDING, &mdev->flags);
-	wake_up(&mdev->state_wait);
+	rcu_read_lock();
+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
+		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);
+		kref_get(&mdev->kref);
+		rcu_read_unlock();
+		drbd_mdev_cleanup(mdev);
+		kref_put(&mdev->kref, &drbd_minor_destroy);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
 
 	return 0;
 }
diff -Nru drbd8-8.3.7/drbd/drbd_wrappers.h drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_wrappers.h
--- drbd8-8.3.7/drbd/drbd_wrappers.h	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/drbd_wrappers.h	2012-02-02 14:09:14.000000000 +0000
@@ -5,40 +5,38 @@
 #include <linux/net.h>
 
 #include <linux/version.h>
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-# error "use a 2.6 kernel, please"
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+# error "At least kernel version 2.6.18 (with patches) required"
 #endif
 
-#include <linux/bio.h>
-#ifndef bio_rw_flagged
-#define bio_rw_flagged(bio, flag)      ((bio)->bi_rw & (1 << (flag)))
+/* The history of blkdev_issue_flush()
+
+   It had 2 arguments before fbd9b09a177a481eda256447c881f014f29034fe,
+   after it had 4 arguments. (With that commit came BLKDEV_IFL_WAIT)
+
+   It had 4 arguments before dd3932eddf428571762596e17b65f5dc92ca361b,
+   after it got 3 arguments. (With that commit came BLKDEV_DISCARD_SECURE
+   and BLKDEV_IFL_WAIT disappeared again.) */
+#include <linux/blkdev.h>
+#ifndef BLKDEV_IFL_WAIT
+#ifndef BLKDEV_DISCARD_SECURE
+/* before fbd9b09a177 */
+#define blkdev_issue_flush(b, gfpf, s)	blkdev_issue_flush(b, s)
+#endif
+/* after dd3932eddf4 no define at all */
+#else
+/* between fbd9b09a177 and dd3932eddf4 */
+#define blkdev_issue_flush(b, gfpf, s)	blkdev_issue_flush(b, gfpf, s, BLKDEV_IFL_WAIT)
 #endif
 
+#include <linux/fs.h>
+#include <linux/bio.h>
 #include <linux/slab.h>
+#include <linux/completion.h>
 
 /* for the proc_create wrapper */
 #include <linux/proc_fs.h>
 
-/* struct page has a union in 2.6.15 ...
- * an anonymous union and struct since 2.6.16
- * or in fc5 "2.6.15" */
-#include <linux/mm.h>
-#ifndef page_private
-# define page_private(page)		((page)->private)
-# define set_page_private(page, v)	((page)->private = (v))
-#endif
-
-/* mutex was not available before 2.6.16.
- * various vendors provide various degrees of backports.
- * we provide the missing parts ourselves, if neccessary.
- * this one is for RHEL/Centos 4 */
-#if defined(mutex_lock) && !defined(mutex_is_locked)
-#define mutex_is_locked(m) (atomic_read(&(m)->count) != 1)
-#endif
-
-/* see get_sb_bdev and bd_claim */
-extern char *drbd_sec_holder;
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
 static inline unsigned short queue_logical_block_size(struct request_queue *q)
 {
@@ -53,9 +51,9 @@
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
-static inline unsigned int queue_max_segment_size(struct request_queue *q)
+static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
 {
-	return q->max_segment_size;
+	return q->max_hw_sectors;
 }
 
 static inline unsigned int queue_max_sectors(struct request_queue *q)
@@ -73,9 +71,21 @@
 static inline sector_t drbd_get_capacity(struct block_device *bdev)
 {
 	/* return bdev ? get_capacity(bdev->bd_disk) : 0; */
-	return bdev ? bdev->bd_inode->i_size >> 9 : 0;
+	return bdev ? i_size_read(bdev->bd_inode) >> 9 : 0;
 }
 
+#ifdef COMPAT_HAVE_VOID_MAKE_REQUEST
+/* in Commit 5a7bbad27a410350e64a2d7f5ec18fc73836c14f (between Linux-3.1 and 3.2)
+   make_request() becomes type void. Before it had type int. */
+#define MAKE_REQUEST_TYPE void
+#define MAKE_REQUEST_RETURN return
+#else
+#define MAKE_REQUEST_TYPE int
+#define MAKE_REQUEST_RETURN return 0
+#endif
+
+#include "drbd_int.h"
+
 /* sets the number of 512 byte sectors of our virtual device */
 static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
 					sector_t size)
@@ -85,20 +95,58 @@
 	mdev->this_bdev->bd_inode->i_size = (loff_t)size << 9;
 }
 
-#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
+#ifndef COMPAT_HAVE_FMODE_T
+typedef unsigned __bitwise__ fmode_t;
+#endif
 
-static inline int drbd_bio_has_active_page(struct bio *bio)
+#ifndef COMPAT_HAVE_BLKDEV_GET_BY_PATH
+/* see kernel 2.6.37,
+ * d4d7762 block: clean up blkdev_get() wrappers and their users
+ * e525fd8 block: make blkdev_get/put() handle exclusive access
+ * and kernel 2.6.28
+ * 30c40d2 [PATCH] propagate mode through open_bdev_excl/close_bdev_excl
+ * Also note that there is no FMODE_EXCL before
+ * 86d434d [PATCH] eliminate use of ->f_flags in block methods
+ */
+#ifndef COMPAT_HAVE_OPEN_BDEV_EXCLUSIVE
+#ifndef FMODE_EXCL
+#define FMODE_EXCL 0
+#endif
+static inline
+struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
+{
+	/* drbd does not open readonly, but try to be correct, anyways */
+	return open_bdev_excl(path, (mode & FMODE_WRITE) ? 0 : MS_RDONLY, holder);
+}
+static inline
+void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
+{
+	/* mode ignored. */
+	close_bdev_excl(bdev);
+}
+#endif
+static inline struct block_device *blkdev_get_by_path(const char *path,
+		fmode_t mode, void *holder)
 {
-	struct bio_vec *bvec;
-	int i;
+	return open_bdev_exclusive(path, mode, holder);
+}
 
-	__bio_for_each_segment(bvec, bio, i, 0) {
-		if (page_count(bvec->bv_page) > 1)
-			return 1;
-	}
+static inline int drbd_blkdev_put(struct block_device *bdev, fmode_t mode)
+{
+	/* blkdev_put != close_bdev_exclusive, in general, so this is obviously
+	 * not correct, and there should be some if (mode & FMODE_EXCL) ...
+	 * But this is the only way it is used in DRBD,
+	 * and for <= 2.6.27, there is no FMODE_EXCL anyways. */
+	close_bdev_exclusive(bdev, mode);
 
+	/* blkdev_put seems to not have useful return values,
+	 * close_bdev_exclusive is void. */
 	return 0;
 }
+#define blkdev_put(b, m)	drbd_blkdev_put(b, m)
+#endif
+
+#define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE)
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 /* Before Linux-2.6.24 bie_endio() had the size of the bio as second argument.
@@ -117,9 +165,8 @@
 
 /* bi_end_io handlers */
 extern BIO_ENDIO_TYPE drbd_md_io_complete BIO_ENDIO_ARGS(struct bio *bio, int error);
-extern BIO_ENDIO_TYPE drbd_endio_read_sec BIO_ENDIO_ARGS(struct bio *bio, int error);
-extern BIO_ENDIO_TYPE drbd_endio_write_sec BIO_ENDIO_ARGS(struct bio *bio, int error);
-extern BIO_ENDIO_TYPE drbd_endio_pri BIO_ENDIO_ARGS(struct bio *bio, int error);
+extern BIO_ENDIO_TYPE drbd_peer_request_endio BIO_ENDIO_ARGS(struct bio *bio, int error);
+extern BIO_ENDIO_TYPE drbd_request_endio BIO_ENDIO_ARGS(struct bio *bio, int error);
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
 #define part_inc_in_flight(A, B) part_inc_in_flight(A)
@@ -148,14 +195,6 @@
 
 #define sg_init_table(S,N) ({})
 
-#ifdef NEED_SG_SET_BUF
-static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
-			      unsigned int buflen)
-{
-	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
-}
-#endif
-
 #endif
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28)
@@ -178,17 +217,11 @@
 #endif
 static inline void drbd_kobject_uevent(struct drbd_conf *mdev)
 {
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15)
-	kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE, NULL);
-#else
 	kobject_uevent(disk_to_kobj(mdev->vdisk), KOBJ_CHANGE);
 	/* rhel4 / sles9 and older don't have this at all,
 	 * which means user space (udev) won't get events about possible changes of
 	 * corresponding resource + disk names after the initial drbd minor creation.
 	 */
-#endif
-#endif
 }
 
 
@@ -208,39 +241,28 @@
 		return;
 	}
 
-	if (FAULT_ACTIVE(mdev, fault_type))
+	if (drbd_insert_fault(mdev, fault_type))
 		bio_endio(bio, -EIO);
 	else
 		generic_make_request(bio);
 }
 
-static inline void drbd_plug_device(struct drbd_conf *mdev)
+static inline int drbd_backing_bdev_events(struct drbd_conf *mdev)
 {
-	struct request_queue *q;
-	q = bdev_get_queue(mdev->this_bdev);
-
-	spin_lock_irq(q->queue_lock);
-
-/* XXX the check on !blk_queue_plugged is redundant,
- * implicitly checked in blk_plug_device */
-
-	if (!blk_queue_plugged(q)) {
-		blk_plug_device(q);
-		del_timer(&q->unplug_timer);
-		/* unplugging should not happen automatically... */
-	}
-	spin_unlock_irq(q->queue_lock);
-}
-
-#ifdef DEFINE_SOCK_CREATE_KERN
-#define sock_create_kern sock_create
-#endif
-
-#ifdef USE_KMEM_CACHE_S
-#define kmem_cache kmem_cache_s
+	struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
+#if defined(__disk_stat_inc)
+	/* older kernel */
+	return (int)disk_stat_read(disk, sectors[0])
+	     + (int)disk_stat_read(disk, sectors[1]);
+#else
+	/* recent kernel */
+	return (int)part_stat_read(&disk->part0, sectors[0])
+	     + (int)part_stat_read(&disk->part0, sectors[1]);
 #endif
+}
 
-#ifdef DEFINE_KERNEL_SOCK_SHUTDOWN
+#ifndef COMPAT_HAVE_SOCK_SHUTDOWN
+#define COMPAT_HAVE_SOCK_SHUTDOWN 1
 enum sock_shutdown_cmd {
 	SHUT_RD = 0,
 	SHUT_WR = 1,
@@ -263,72 +285,6 @@
 #define drbd_unregister_blkdev unregister_blkdev
 #endif
 
-#ifdef NEED_BACKPORT_OF_ATOMIC_ADD
-
-#if defined(__x86_64__)
-
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	int __i = i;
-	__asm__ __volatile__(
-		LOCK_PREFIX "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
-	return i + __i;
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	return atomic_add_return(-i, v);
-}
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-#elif defined(__i386__) || defined(__arch_um__)
-
-static __inline__ int atomic_add_return(int i, atomic_t *v)
-{
-	int __i;
-#ifdef CONFIG_M386
-	unsigned long flags;
-	if(unlikely(boot_cpu_data.x86==3))
-		goto no_xadd;
-#endif
-	/* Modern 486+ processor */
-	__i = i;
-	__asm__ __volatile__(
-		LOCK_PREFIX "xaddl %0, %1;"
-		:"=r"(i)
-		:"m"(v->counter), "0"(i));
-	return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
-	local_irq_save(flags);
-	__i = atomic_read(v);
-	atomic_set(v, i + __i);
-	local_irq_restore(flags);
-	return i + __i;
-#endif
-}
-
-static __inline__ int atomic_sub_return(int i, atomic_t *v)
-{
-	return atomic_add_return(-i, v);
-}
-
-#define atomic_inc_return(v)  (atomic_add_return(1,v))
-#define atomic_dec_return(v)  (atomic_sub_return(1,v))
-
-#else
-# error "You need to copy/past atomic_inc_return()/atomic_dec_return() here"
-# error "for your architecture. (Hint: Kernels after 2.6.10 have those"
-# error "by default! Using a later kernel might be less effort!)"
-#endif
-
-#endif
-
 #if !defined(CRYPTO_ALG_ASYNC)
 /* With Linux-2.6.19 the crypto API changed! */
 /* This is not a generic backport of the new api, it just implements
@@ -449,33 +405,12 @@
 
 #endif
 
-static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
-{
-#ifdef CRYPTO_ALG_TYPE_HASH_MASK
-	/* see include/linux/crypto.h */
-	return !((crypto_tfm_alg_type(tfm) ^ CRYPTO_ALG_TYPE_HASH)
-		& CRYPTO_ALG_TYPE_HASH_MASK);
-#else
-	return crypto_tfm_alg_type(tfm) == CRYPTO_ALG_TYPE_HASH;
-#endif
-}
-
-
-#ifdef NEED_BACKPORT_OF_KZALLOC
-static inline void *kzalloc(size_t size, int flags)
-{
-	void *rv = kmalloc(size, flags);
-	if (rv)
-		memset(rv, 0, size);
-
-	return rv;
-}
-#endif
-
 /* see upstream commit 2d3854a37e8b767a51aba38ed6d22817b0631e33 */
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
 #ifndef cpumask_bits
+#ifndef COMPAT_HAVE_NR_CPU_IDS
 #define nr_cpu_ids NR_CPUS
+#endif
 #define nr_cpumask_bits nr_cpu_ids
 
 typedef cpumask_t cpumask_var_t[1];
@@ -623,19 +558,6 @@
 # define __cond_lock(x,c) (c)
 #endif
 
-#ifndef KERNEL_HAS_GFP_T
-#define KERNEL_HAS_GFP_T
-typedef unsigned gfp_t;
-#endif
-
-
-/* struct kvec didn't exist before 2.6.8, this is an ugly
- * #define to work around it ... - jt */
-
-#ifndef KERNEL_HAS_KVEC
-#define kvec iovec
-#endif
-
 #ifndef net_random
 #define random32 net_random
 #endif
@@ -651,43 +573,530 @@
  * this "backport" does not close the race that lead to the API change,
  * but only provides an equivalent function call.
  */
-#ifndef KERNEL_HAS_PROC_CREATE
-static inline struct proc_dir_entry *proc_create(const char *name,
+#ifndef COMPAT_HAVE_PROC_CREATE_DATA
+static inline struct proc_dir_entry *proc_create_data(const char *name,
 	mode_t mode, struct proc_dir_entry *parent,
-	struct file_operations *proc_fops)
+	struct file_operations *proc_fops, void *data)
 {
 	struct proc_dir_entry *pde = create_proc_entry(name, mode, parent);
-	if (pde)
+	if (pde) {
 		pde->proc_fops = proc_fops;
+		pde->data = data;
+	}
 	return pde;
 }
 
 #endif
 
+#ifndef COMPAT_HAVE_BLK_QUEUE_MAX_HW_SECTORS
+static inline void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max)
+{
+	blk_queue_max_sectors(q, max);
+}
+#elif defined(COMPAT_USE_BLK_QUEUE_MAX_SECTORS_ANYWAYS)
+	/* For kernel versions 2.6.31 to 2.6.33 inclusive, even though
+	 * blk_queue_max_hw_sectors is present, we actually need to use
+	 * blk_queue_max_sectors to set max_hw_sectors. :-(
+	 * RHEL6 2.6.32 chose to be different and already has eliminated
+	 * blk_queue_max_sectors as upstream 2.6.34 did.
+	 */
+#define blk_queue_max_hw_sectors(q, max)	blk_queue_max_sectors(q, max)
+#endif
+
+#ifndef COMPAT_HAVE_BLK_QUEUE_MAX_SEGMENTS
+static inline void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
+{
+	blk_queue_max_phys_segments(q, max_segments);
+	blk_queue_max_hw_segments(q, max_segments);
+#define BLK_MAX_SEGMENTS MAX_HW_SEGMENTS /* or max MAX_PHYS_SEGMENTS. Probably does not matter */
+}
+#endif
+
+#ifndef COMPAT_HAVE_BOOL_TYPE
+typedef _Bool                   bool;
+enum {
+	false = 0,
+	true = 1
+};
+#endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
-#define TP_PROTO(args...)	args
-#define TP_ARGS(args...)		args
+/* REQ_* and BIO_RW_* flags have been moved around in the tree,
+ * and have finally been "merged" with
+ * 7b6d91daee5cac6402186ff224c3af39d79f4a0e and
+ * 7cc015811ef8992dfcce314d0ed9642bc18143d1
+ * We communicate between different systems,
+ * so we have to somehow semantically map the bi_rw flags
+ * bi_rw (some kernel version) -> data packet flags -> bi_rw (other kernel version)
+ */
+
+/* RHEL 6.1 backported FLUSH/FUA as BIO_RW_FLUSH/FUA
+ * and at that time also introduced the defines BIO_FLUSH/FUA.
+ * There is also REQ_FLUSH/FUA, but these do NOT share
+ * the same value space as the bio rw flags, yet.
+ */
+#ifdef BIO_FLUSH
+
+#define DRBD_REQ_FLUSH		(1UL << BIO_RW_FLUSH)
+#define DRBD_REQ_FUA		(1UL << BIO_RW_FUA)
+#define DRBD_REQ_HARDBARRIER	(1UL << BIO_RW_BARRIER)
+#define DRBD_REQ_DISCARD	(1UL << BIO_RW_DISCARD)
+#define DRBD_REQ_SYNC		(1UL << BIO_RW_SYNCIO)
+#define DRBD_REQ_UNPLUG		(1UL << BIO_RW_UNPLUG)
+
+#elif defined(REQ_FLUSH)	/* introduced in 2.6.36,
+				 * now equivalent to bi_rw */
+
+#define DRBD_REQ_SYNC		REQ_SYNC
+#define DRBD_REQ_UNPLUG		REQ_UNPLUG
+#define DRBD_REQ_FLUSH		REQ_FLUSH
+#define DRBD_REQ_FUA		REQ_FUA
+#define DRBD_REQ_DISCARD	REQ_DISCARD
+/* REQ_HARDBARRIER has been around for a long time,
+ * without being directly related to bi_rw.
+ * so the ifdef is only usful inside the ifdef REQ_FLUSH!
+ * commit 7cc0158 (v2.6.36-rc1) made it a bi_rw flag, ...  */
+#ifdef REQ_HARDBARRIER
+#define DRBD_REQ_HARDBARRIER	REQ_HARDBARRIER
+#else
+/* ... but REQ_HARDBARRIER was removed again in 02e031c (v2.6.37-rc4). */
+#define DRBD_REQ_HARDBARRIER	0
+#endif
+
+#else				/* "older", and hopefully not
+				 * "partially backported" kernel */
+
+#if defined(BIO_RW_SYNC)
+/* see upstream commits
+ * 213d9417fec62ef4c3675621b9364a667954d4dd,
+ * 93dbb393503d53cd226e5e1f0088fe8f4dbaa2b8
+ * later, the defines even became an enum ;-) */
+#define DRBD_REQ_SYNC		(1UL << BIO_RW_SYNC)
+#define DRBD_REQ_UNPLUG		(1UL << BIO_RW_SYNC)
+#else
+/* cannot test on defined(BIO_RW_SYNCIO), it may be an enum */
+#define DRBD_REQ_SYNC		(1UL << BIO_RW_SYNCIO)
+#define DRBD_REQ_UNPLUG		(1UL << BIO_RW_UNPLUG)
+#endif
+
+#define DRBD_REQ_FLUSH		(1UL << BIO_RW_BARRIER)
+/* REQ_FUA has been around for a longer time,
+ * without a direct equivalent in bi_rw. */
+#define DRBD_REQ_FUA		(1UL << BIO_RW_BARRIER)
+#define DRBD_REQ_HARDBARRIER	(1UL << BIO_RW_BARRIER)
+
+/* we don't support DISCARDS yet, anyways.
+ * cannot test on defined(BIO_RW_DISCARD), it may be an enum */
+#define DRBD_REQ_DISCARD	0
+#endif
+
+/* this results in:
+	bi_rw   -> dp_flags
+
+< 2.6.28
+	SYNC	-> SYNC|UNPLUG
+	BARRIER	-> FUA|FLUSH
+	there is no DISCARD
+2.6.28
+	SYNC	-> SYNC|UNPLUG
+	BARRIER	-> FUA|FLUSH
+	DISCARD	-> DISCARD
+2.6.29
+	SYNCIO	-> SYNC
+	UNPLUG	-> UNPLUG
+	BARRIER	-> FUA|FLUSH
+	DISCARD	-> DISCARD
+2.6.36
+	SYNC	-> SYNC
+	UNPLUG	-> UNPLUG
+	FUA	-> FUA
+	FLUSH	-> FLUSH
+	DISCARD	-> DISCARD
+--------------------------------------
+	dp_flags   -> bi_rw
+< 2.6.28
+	SYNC	-> SYNC (and unplug)
+	UNPLUG	-> SYNC (and unplug)
+	FUA	-> BARRIER
+	FLUSH	-> BARRIER
+	there is no DISCARD,
+	it will be silently ignored on the receiving side.
+2.6.28
+	SYNC	-> SYNC (and unplug)
+	UNPLUG	-> SYNC (and unplug)
+	FUA	-> BARRIER
+	FLUSH	-> BARRIER
+	DISCARD -> DISCARD
+	(if that fails, we handle it like any other IO error)
+2.6.29
+	SYNC	-> SYNCIO
+	UNPLUG	-> UNPLUG
+	FUA	-> BARRIER
+	FLUSH	-> BARRIER
+	DISCARD -> DISCARD
+2.6.36
+	SYNC	-> SYNC
+	UNPLUG	-> UNPLUG
+	FUA	-> FUA
+	FLUSH	-> FLUSH
+	DISCARD	-> DISCARD
+
+NOTE: DISCARDs likely need some work still.  We should actually never see
+DISCARD requests, as our queue does not announce QUEUE_FLAG_DISCARD yet.
+*/
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+/* At least in 2.6.34 the function macro dynamic_dev_dbg() is broken when compiling
+   without CONFIG_DYNAMIC_DEBUG. It has 'format' in the argument list, it references
+   to 'fmt' in its body. */
+#ifdef dynamic_dev_dbg
+#undef dynamic_dev_dbg
+#define dynamic_dev_dbg(dev, fmt, ...)                               \
+        do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
+#endif
+#endif
+
+#ifndef min_not_zero
+#define min_not_zero(x, y) ({			\
+	typeof(x) __x = (x);			\
+	typeof(y) __y = (y);			\
+	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+#endif
+
+/* Introduced with 2.6.26. See include/linux/jiffies.h */
+#ifndef time_is_before_eq_jiffies
+#define time_is_before_jiffies(a) time_after(jiffies, a)
+#define time_is_after_jiffies(a) time_before(jiffies, a)
+#define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a)
+#define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a)
+#endif
+
+#ifdef COMPAT_BIO_SPLIT_HAS_BIO_SPLIT_POOL_PARAMETER
+#define bio_split(bi, first_sectors) bio_split(bi, bio_split_pool, first_sectors)
+#endif
+
+#ifndef COMPAT_HAVE_BIOSET_CREATE_FRONT_PAD
+/* see comments in compat/tests/have_bioset_create_front_pad.c */
+#ifdef COMPAT_BIOSET_CREATE_HAS_THREE_PARAMETERS
+#define bioset_create(pool_size, front_pad)	bioset_create(pool_size, pool_size, 1)
+#else
+#define bioset_create(pool_size, front_pad)	bioset_create(pool_size, 1)
+#endif
+#endif
+
+
+#if !(defined(COMPAT_HAVE_RB_AUGMENT_FUNCTIONS) && \
+      defined(AUGMENTED_RBTREE_SYMBOLS_EXPORTED))
+
+/*
+ * Make sure the replacements for the augmented rbtree helper functions do not
+ * clash with functions the kernel implements but does not export.
+ */
+#define rb_augment_f drbd_rb_augment_f
+#define rb_augment_path drbd_rb_augment_path
+#define rb_augment_insert drbd_rb_augment_insert
+#define rb_augment_erase_begin drbd_rb_augment_erase_begin
+#define rb_augment_erase_end drbd_rb_augment_erase_end
+
+typedef void (*rb_augment_f)(struct rb_node *node, void *data);
+
+static inline void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+	struct rb_node *parent;
+
+up:
+	func(node, data);
+	parent = rb_parent(node);
+	if (!parent)
+		return;
+
+	if (node == parent->rb_left && parent->rb_right)
+		func(parent->rb_right, data);
+	else if (parent->rb_left)
+		func(parent->rb_left, data);
 
-#undef DECLARE_TRACE
-#define DECLARE_TRACE(name, proto, args)				\
-	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
-	{ }								\
-	static inline void trace_##name(proto)				\
-	{ }								\
-	static inline int register_trace_##name(void (*probe)(proto))	\
-	{								\
-		return -ENOSYS;						\
-	}								\
-	static inline int unregister_trace_##name(void (*probe)(proto))	\
-	{								\
-		return -ENOSYS;						\
+	node = parent;
+	goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+static inline void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node->rb_left)
+		node = node->rb_left;
+	else if (node->rb_right)
+		node = node->rb_right;
+
+	rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+static inline struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+	struct rb_node *deepest;
+
+	if (!node->rb_right && !node->rb_left)
+		deepest = rb_parent(node);
+	else if (!node->rb_right)
+		deepest = node->rb_left;
+	else if (!node->rb_left)
+		deepest = node->rb_right;
+	else {
+		deepest = rb_next(node);
+		if (deepest->rb_right)
+			deepest = deepest->rb_right;
+		else if (rb_parent(deepest) != node)
+			deepest = rb_parent(deepest);
 	}
 
-#undef DEFINE_TRACE
-#define DEFINE_TRACE(name)
+	return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+static inline void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+	if (node)
+		rb_augment_path(node, func, data);
+}
+#endif
+
+/*
+ * In commit c4945b9e (v2.6.39-rc1), the little-endian bit operations have been
+ * renamed to be less weird.
+ */
+#ifndef COMPAT_HAVE_FIND_NEXT_ZERO_BIT_LE
+#define find_next_zero_bit_le(addr, size, offset) \
+	generic_find_next_zero_le_bit(addr, size, offset)
+#define find_next_bit_le(addr, size, offset) \
+	generic_find_next_le_bit(addr, size, offset)
+#define test_bit_le(nr, addr) \
+	generic_test_le_bit(nr, addr)
+#define __test_and_set_bit_le(nr, addr) \
+	generic___test_and_set_le_bit(nr, addr)
+#define __test_and_clear_bit_le(nr, addr) \
+	generic___test_and_clear_le_bit(nr, addr)
+#endif
+
+#ifndef IDR_GET_NEXT_EXPORTED
+/* Body in compat/idr.c */
+extern void *idr_get_next(struct idr *idp, int *nextidp);
+#endif
+
+/* #ifndef COMPAT_HAVE_LIST_ENTRY_RCU */
+#ifndef list_entry_rcu
+#ifndef rcu_dereference_raw
+/* see c26d34a rcu: Add lockdep-enabled variants of rcu_dereference() */
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#endif
+#define list_entry_rcu(ptr, type, member) \
+	({typeof (*ptr) *__ptr = (typeof (*ptr) __force *)ptr; \
+	 container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
+	})
+#endif
+
+/*
+ * Introduced in 930631ed (v2.6.19-rc1).
+ */
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
+/*
+ * IS_ALIGNED() was added to <linux/kernel.h> in mainline commit 0c0e6195 (and
+ * improved in f10db627); 2.6.24-rc1.
+ */
+#ifndef IS_ALIGNED
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+#endif
+
+/*
+ * NLA_TYPE_MASK and nla_type() were added to <linux/netlink.h> in mainline
+ * commit 8f4c1f9b; v2.6.24-rc1.  Before that, none of the nlattr->nla_type
+ * flags had a special meaning.
+ */
+
+#ifndef NLA_TYPE_MASK
+#define NLA_TYPE_MASK ~0
+
+static inline int nla_type(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_TYPE_MASK;
+}
+
+#endif
+
+/*
+ * nlmsg_hdr was added to <linux/netlink.h> in mainline commit b529ccf2
+ * (v2.6.22-rc1).
+ */
+
+#ifndef COMPAT_HAVE_NLMSG_HDR
+static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
+{
+	return (struct nlmsghdr *)skb->data;
+}
+#endif
+
+/*
+ * genlmsg_reply() was added to <net/genetlink.h> in mainline commit 81878d27
+ * (v2.6.20-rc2).
+ */
+
+#ifndef COMPAT_HAVE_GENLMSG_REPLY
+#include <net/genetlink.h>
+
+static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
+{
+	return genlmsg_unicast(skb, info->snd_pid);
+}
+#endif
+
+/*
+ * genlmsg_msg_size() and genlmsg_total_size() were added to <net/genetlink.h>
+ * in mainline commit 17db952c (v2.6.19-rc1).
+ */
+
+#ifndef COMPAT_HAVE_GENLMSG_MSG_SIZE
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+
+static inline int genlmsg_msg_size(int payload)
+{
+	return GENL_HDRLEN + payload;
+}
+
+static inline int genlmsg_total_size(int payload)
+{
+	return NLMSG_ALIGN(genlmsg_msg_size(payload));
+}
+#endif
+
+/*
+ * genlmsg_new() was added to <net/genetlink.h> in mainline commit 3dabc715
+ * (v2.6.20-rc2).
+ */
+
+#ifndef COMPAT_HAVE_GENLMSG_NEW
+#include <net/genetlink.h>
+
+static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
+{
+	return nlmsg_new(genlmsg_total_size(payload), flags);
+}
+#endif
+
+/*
+ * genlmsg_put() was introduced in mainline commit 482a8524 (v2.6.15-rc1) and
+ * changed in 17c157c8 (v2.6.20-rc2).  genlmsg_put_reply() was introduced in
+ * 17c157c8.  We replace the compat_genlmsg_put() from 482a8524.
+ */
+
+#ifndef COMPAT_HAVE_GENLMSG_PUT_REPLY
+#include <net/genetlink.h>
+
+static inline void *compat_genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq,
+				       struct genl_family *family, int flags,
+				       u8 cmd)
+{
+	struct nlmsghdr *nlh;
+	struct genlmsghdr *hdr;
+
+	nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN +
+			family->hdrsize, flags);
+	if (nlh == NULL)
+		return NULL;
+
+	hdr = nlmsg_data(nlh);
+	hdr->cmd = cmd;
+	hdr->version = family->version;
+	hdr->reserved = 0;
+
+	return (char *) hdr + GENL_HDRLEN;
+}
+
+#define genlmsg_put compat_genlmsg_put
+
+static inline void *genlmsg_put_reply(struct sk_buff *skb,
+                                      struct genl_info *info,
+                                      struct genl_family *family,
+                                      int flags, u8 cmd)
+{
+	return genlmsg_put(skb, info->snd_pid, info->snd_seq, family,
+			   flags, cmd);
+}
+#endif
+
+/*
+ * compat_genlmsg_multicast() got a gfp_t parameter in mainline commit d387f6ad
+ * (v2.6.19-rc1).
+ */
+
+#ifdef COMPAT_NEED_GENLMSG_MULTICAST_WRAPPER
+#include <net/genetlink.h>
+
+static inline int compat_genlmsg_multicast(struct sk_buff *skb, u32 pid,
+					   unsigned int group, gfp_t flags)
+{
+	return genlmsg_multicast(skb, pid, group);
+}
+
+#define genlmsg_multicast compat_genlmsg_multicast
+
+#endif
+
+/*
+ * Dynamic generic netlink multicast groups were introduced in mainline commit
+ * 2dbba6f7 (v2.6.23-rc1).  Before that, netlink had a fixed number of 32
+ * multicast groups.  Use an arbitrary hard-coded group number for that case.
+ */
+
+#ifndef COMPAT_HAVE_CTRL_ATTR_MCAST_GROUPS
+
+struct genl_multicast_group {
+	struct genl_family	*family;	/* private */
+        struct list_head	list;		/* private */
+        char			name[GENL_NAMSIZ];
+	u32			id;
+};
+
+static inline int genl_register_mc_group(struct genl_family *family,
+					 struct genl_multicast_group *grp)
+{
+	grp->id = 1;
+	return 0;
+}
+
+static inline void genl_unregister_mc_group(struct genl_family *family,
+					    struct genl_multicast_group *grp)
+{
+}
 
 #endif
 
+/* pr_warning was introduced with 2.6.37 (commit 968ab183)
+ */
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#ifndef pr_warning
+#define pr_warning(fmt, ...) \
+        printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#endif
+
+#ifndef COMPAT_HAVE_IS_ERR_OR_NULL
+static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
+{
+	return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+#endif
 
 #endif
diff -Nru drbd8-8.3.7/drbd/linux/connector.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/connector.h
--- drbd8-8.3.7/drbd/linux/connector.h	2009-07-27 08:47:42.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/connector.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,186 +0,0 @@
-/*
- * 	connector.h
- * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
- * All rights reserved.
- * 
- * Modified by Philipp Reiser to work on older 2.6.x kernels.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __CONNECTOR_H
-#define __CONNECTOR_H
-#define DRBD_CONNECTOR_BACKPORT_HEADER
-
-#include <asm/types.h>
-
-#define NETLINK_CONNECTOR       11
-
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
-/*
- * Process Events connector unique ids -- used for message routing
- */
-#define CN_IDX_PROC			0x1
-#define CN_VAL_PROC			0x1
-#define CN_IDX_CIFS			0x2
-#define CN_VAL_CIFS                     0x1
-
-#define CN_NETLINK_USERS		1
-
-/*
- * Maximum connector's message size.
- */
-#define CONNECTOR_MAX_MSG_SIZE 	1024
-
-/*
- * idx and val are unique identifiers which 
- * are used for message routing and 
- * must be registered in connector.h for in-kernel usage.
- */
-
-struct cb_id {
-	__u32 idx;
-	__u32 val;
-};
-
-struct cn_msg {
-	struct cb_id id;
-
-	__u32 seq;
-	__u32 ack;
-
-	__u16 len;		/* Length of the following data */
-	__u16 flags;
-	__u8 data[0];
-};
-
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
-#ifdef __KERNEL__
-#include <linux/drbd_config.h>
-
-#ifndef KERNEL_HAS_GFP_T
-#define KERNEL_HAS_GFP_T
-typedef unsigned gfp_t;
-#endif
-
-#include <asm/atomic.h>
-
-#include <linux/list.h>
-#include <linux/workqueue.h>
-
-#include <net/sock.h>
-
-#define CN_CBQ_NAMELEN		32
-
-struct cn_queue_dev {
-	atomic_t refcnt;
-	unsigned char name[CN_CBQ_NAMELEN];
-
-	struct workqueue_struct *cn_queue;
-
-	struct list_head queue_list;
-	spinlock_t queue_lock;
-
-	int netlink_groups;
-	struct sock *nls;
-};
-
-struct cn_callback_id {
-	unsigned char name[CN_CBQ_NAMELEN];
-	struct cb_id id;
-};
-
-struct cn_callback_data {
-	void (*destruct_data) (void *);
-	void *ddata;
-	
-	void *callback_priv;
-	void (*callback) (void *);
-
-	void *free;
-};
-
-struct cn_callback_entry {
-	struct list_head callback_entry;
-	struct cn_callback *cb;
-	struct work_struct work;
-	struct cn_queue_dev *pdev;
-
-	struct cn_callback_id id;
-	struct cn_callback_data data;
-
-	int seq, group;
-	struct sock *nls;
-};
-
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
-struct cn_dev {
-	struct cb_id id;
-
-	u32 seq, groups;
-	struct sock *nls;
-	void (*input) (struct sock * sk, int len);
-
-	struct cn_queue_dev *cbdev;
-};
-
-int cn_add_callback(struct cb_id *, char *, void (*callback) (void *));
-void cn_del_callback(struct cb_id *);
-int cn_netlink_send(struct cn_msg *, u32, gfp_t);
-
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
-void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
-
-struct cn_queue_dev *cn_queue_alloc_dev(char *name, struct sock *);
-void cn_queue_free_dev(struct cn_queue_dev *dev);
-
-int cn_cb_equal(struct cb_id *, struct cb_id *);
-
-void cn_queue_wrapper(void *data);
-
-extern int cn_already_initialized;
-
-#endif				/* __KERNEL__ */
-#endif				/* __CONNECTOR_H */
diff -Nru drbd8-8.3.7/drbd/linux/drbd.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd.h
--- drbd8-8.3.7/drbd/linux/drbd.h	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd.h	2012-02-02 14:09:14.000000000 +0000
@@ -25,7 +25,6 @@
 */
 #ifndef DRBD_H
 #define DRBD_H
-#include <linux/drbd_config.h>
 #include <linux/connector.h>
 
 #include <asm/types.h>
@@ -38,9 +37,9 @@
 #include <sys/wait.h>
 #include <limits.h>
 
-/* Altough the Linux source code makes a difference between
+/* Although the Linux source code makes a difference between
    generic endianness and the bitfields' endianness, there is no
-   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   architecture as of Linux-2.6.24-rc4 where the bitfields' endianness
    does not match the generic endianness. */
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
@@ -53,7 +52,6 @@
 
 #endif
 
-
 enum drbd_io_error_p {
 	EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
 	EP_CALL_HELPER,
@@ -61,7 +59,8 @@
 };
 
 enum drbd_fencing_p {
-	FP_DONT_CARE,
+	FP_NOT_AVAIL = -1, /* Not a policy */
+	FP_DONT_CARE = 0,
 	FP_RESOURCE,
 	FP_STONITH
 };
@@ -86,8 +85,33 @@
 	ASB_VIOLENTLY
 };
 
+enum drbd_on_no_data {
+	OND_IO_ERROR,
+	OND_SUSPEND_IO
+};
+
+enum drbd_on_congestion {
+	OC_BLOCK,
+	OC_PULL_AHEAD,
+	OC_DISCONNECT,
+};
+
+enum drbd_read_balancing {
+	RB_PREFER_LOCAL,
+	RB_PREFER_REMOTE,
+	RB_ROUND_ROBIN,
+	RB_LEAST_PENDING,
+	RB_CONGESTED_REMOTE,
+	RB_32K_STRIPING,
+	RB_64K_STRIPING,
+	RB_128K_STRIPING,
+	RB_256K_STRIPING,
+	RB_512K_STRIPING,
+	RB_1M_STRIPING,
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
-enum drbd_ret_codes {
+enum drbd_ret_code {
 	ERR_CODE_BASE		= 100,
 	NO_ERROR		= 101,
 	ERR_LOCAL_ADDR		= 102,
@@ -96,8 +120,8 @@
 	ERR_OPEN_MD_DISK	= 105,
 	ERR_DISK_NOT_BDEV	= 107,
 	ERR_MD_NOT_BDEV		= 108,
-	ERR_DISK_TO_SMALL	= 111,
-	ERR_MD_DISK_TO_SMALL	= 112,
+	ERR_DISK_TOO_SMALL	= 111,
+	ERR_MD_DISK_TOO_SMALL	= 112,
 	ERR_BDCLAIM_DISK	= 114,
 	ERR_BDCLAIM_MD_DISK	= 115,
 	ERR_MD_IDX_INVALID	= 116,
@@ -114,8 +138,8 @@
 	ERR_INTR		= 129, /* EINTR */
 	ERR_RESIZE_RESYNC	= 130,
 	ERR_NO_PRIMARY		= 131,
-	ERR_SYNC_AFTER		= 132,
-	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_RESYNC_AFTER	= 132,
+	ERR_RESYNC_AFTER_CYCLE	= 133,
 	ERR_PAUSE_IS_SET	= 134,
 	ERR_PAUSE_IS_CLEAR	= 135,
 	ERR_PACKET_NR		= 137,
@@ -134,6 +158,19 @@
 	ERR_DATA_NOT_CURRENT	= 150,
 	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
 	ERR_PERM		= 152,
+	ERR_NEED_APV_93		= 153,
+	ERR_STONITH_AND_PROT_A  = 154,
+	ERR_CONG_NOT_PROTO_A	= 155,
+	ERR_PIC_AFTER_DEP	= 156,
+	ERR_PIC_PEER_DEP	= 157,
+	ERR_RES_NOT_KNOWN	= 158,
+	ERR_RES_IN_USE		= 159,
+	ERR_MINOR_CONFIGURED    = 160,
+	ERR_MINOR_EXISTS	= 161,
+	ERR_INVALID_REQUEST	= 162,
+	ERR_NEED_APV_100	= 163,
+	ERR_NEED_ALLOW_TWO_PRI  = 164,
+	ERR_MD_UNCLEAN          = 165,
 
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
@@ -163,7 +200,7 @@
 	/* These temporal states are all used on the way
 	 * from >= C_CONNECTED to Unconnected.
 	 * The 'disconnect reason' states
-	 * I do not allow to change beween them. */
+	 * I do not allow to change between them. */
 	C_TIMEOUT,
 	C_BROKEN_PIPE,
 	C_NETWORK_FAILURE,
@@ -174,7 +211,7 @@
 	C_WF_REPORT_PARAMS, /* we have a socket */
 	C_CONNECTED,      /* we have introduced each other */
 	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
-	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_STARTING_SYNC_T,  /* starting full sync by admin request. */
 	C_WF_BITMAP_S,
 	C_WF_BITMAP_T,
 	C_WF_SYNC_UUID,
@@ -187,6 +224,10 @@
 	C_VERIFY_T,
 	C_PAUSED_SYNC_S,
 	C_PAUSED_SYNC_T,
+
+	C_AHEAD,
+	C_BEHIND,
+
 	C_MASK = 31
 };
 
@@ -211,7 +252,7 @@
  * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
  * even though we transmit as "cpu_to_be32(state)",
  * the offsets of the bitfields still need to be swapped
- * on different endianess.
+ * on different endianness.
  */
 	struct {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
@@ -220,13 +261,17 @@
 		unsigned conn:5 ;   /* 17/32	 cstates */
 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
-		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned susp:1 ;   /* 2/2	 IO suspended no/yes (by user) */
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
 		unsigned peer_isp:1 ;
 		unsigned user_isp:1 ;
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned susp_nod:1 ; /* IO suspended because no data */
+		unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
+		unsigned _pad:9;   /* 0	 unused */
 #elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned _pad:9;
+		unsigned susp_fen:1 ;
+		unsigned susp_nod:1 ;
 		unsigned user_isp:1 ;
 		unsigned peer_isp:1 ;
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
@@ -237,20 +282,13 @@
 		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
 		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
 #else
-# error "this endianess is not supported"
-#endif
-#ifndef DRBD_DEBUG_STATE_CHANGES
-#define DRBD_DEBUG_STATE_CHANGES 0
-#endif
-#if DRBD_DEBUG_STATE_CHANGES
-		unsigned int line;
-		const char *func;
+# error "this endianness is not supported"
 #endif
 	};
 	unsigned int i;
 };
 
-enum drbd_state_ret_codes {
+enum drbd_state_rv {
 	SS_CW_NO_NEED = 4,
 	SS_CW_SUCCESS = 3,
 	SS_NOTHING_TO_DO = 2,
@@ -274,14 +312,15 @@
 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
-	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+	SS_O_VOL_PEER_PRI = -20,
+	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
 };
 
 /* from drbd_strings.c */
 extern const char *drbd_conn_str(enum drbd_conns);
 extern const char *drbd_role_str(enum drbd_role);
 extern const char *drbd_disk_str(enum drbd_disk_state);
-extern const char *drbd_set_st_err_str(enum drbd_state_ret_codes);
+extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 
 #define SHARED_SECRET_MAX 64
 
@@ -291,7 +330,8 @@
 #define MDF_FULL_SYNC		(1 << 3)
 #define MDF_WAS_UP_TO_DATE	(1 << 4)
 #define MDF_PEER_OUT_DATED	(1 << 5)
-#define MDF_CRASHED_PRIMARY      (1 << 6)
+#define MDF_CRASHED_PRIMARY	(1 << 6)
+#define MDF_AL_CLEAN		(1 << 7)
 
 enum drbd_uuid_index {
 	UI_CURRENT,
@@ -311,42 +351,23 @@
 
 #define UUID_JUST_CREATED ((__u64)4)
 
+/* magic numbers used in meta data and network packets */
 #define DRBD_MAGIC 0x83740267
-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
+#define DRBD_MAGIC_BIG 0x835a
+#define DRBD_MAGIC_100 0x8620ec20
+
+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
+#define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5)
+
+
+/* how I came up with this magic?
+ * base64 decode "actlog==" ;) */
+#define DRBD_AL_MAGIC 0x69cb65a2
 
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
 #define DRBD_MD_INDEX_FLEX_EXT -2
 #define DRBD_MD_INDEX_FLEX_INT -3
 
-/* Start of the new netlink/connector stuff */
-
-#define DRBD_NL_CREATE_DEVICE 0x01
-#define DRBD_NL_SET_DEFAULTS  0x02
-
-/* The following line should be moved over to linux/connector.h
- * when the time comes */
-#ifndef CN_IDX_DRBD
-# define CN_IDX_DRBD			0x4
-/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */
-#endif
-#define CN_VAL_DRBD			0x1
-
-/* For searching a vacant cn_idx value */
-#define CN_IDX_STEP			6977
-
-struct drbd_nl_cfg_req {
-	int packet_type;
-	unsigned int drbd_minor;
-	int flags;
-	unsigned short tag_list[];
-};
-
-struct drbd_nl_cfg_reply {
-	int packet_type;
-	unsigned int minor;
-	int ret_code; /* enum ret_code or set_st_err_t */
-	unsigned short tag_list[]; /* only used with get_* calls */
-};
-
 #endif
diff -Nru drbd8-8.3.7/drbd/linux/drbd_config.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_config.h
--- drbd8-8.3.7/drbd/linux/drbd_config.h	2010-01-13 16:14:27.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_config.h	2012-02-02 14:09:14.000000000 +0000
@@ -22,10 +22,20 @@
 
 extern const char *drbd_buildtag(void);
 
-#define REL_VERSION "8.3.7"
-#define API_VERSION 88
+/* Necessary to build the external module against >= Linux-2.6.33 */
+#ifdef REL_VERSION
+#undef REL_VERSION
+#undef API_VERSION
+#undef PRO_VERSION_MIN
+#undef PRO_VERSION_MAX
+#endif
+
+/* End of external module for 2.6.33 stuff */
+
+#define REL_VERSION "8.4.1"
+#define API_VERSION 1
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 91
+#define PRO_VERSION_MAX 100
 
 #ifndef __CHECKER__   /* for a sparse run, we need all STATICs */
 #define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */
@@ -45,60 +55,8 @@
 /* Enable fault insertion code */
 #define DRBD_ENABLE_FAULTS
 
-/* RedHat's 2.6.9 kernels have the gfp_t type. Mainline has this feature
- * since 2.6.16. If you build for RedHat enable the line below. */
-#define KERNEL_HAS_GFP_T
-
-/* kernel.org has atomic_add_return since 2.6.10. some vendor kernels
- * have it backported, though. Others don't. */
-//#define NEED_BACKPORT_OF_ATOMIC_ADD
-
-/* 2.6.something has deprecated kmem_cache_t
- * some older still use it.
- * some have it defined as struct kmem_cache_s, some as struct kmem_cache */
-//#define USE_KMEM_CACHE_S
-
-/* 2.6.something has sock_create_kern (SE-linux security context stuff)
- * some older distribution kernels don't. */
-//#define DEFINE_SOCK_CREATE_KERN
-
-/* 2.6.24 and later have kernel_sock_shutdown.
- * some older distribution kernels may also have a backport. */
-//#define DEFINE_KERNEL_SOCK_SHUTDOWN
-
-/* in older kernels (vanilla < 2.6.16) struct netlink_skb_parms has a
- * member called dst_groups. Later it is called dst_group (without 's'). */
-//#define DRBD_NL_DST_GROUPS
-
-/* in older kernels (vanilla < 2.6.14) is no kzalloc() */
-//#define NEED_BACKPORT_OF_KZALLOC
-
-// some vendor kernels have it, some don't
-//#define NEED_SG_SET_BUF
-#define HAVE_LINUX_SCATTERLIST_H
-
-/* 2.6.29 and up no longer have swabb.h */
-//#define HAVE_LINUX_BYTEORDER_SWABB_H
-
-/* some vendor kernel have it backported. */
-#define HAVE_SET_CPUS_ALLOWED_PTR
-
-/* Some vendor kernels < 2.6.7 might define msleep in one or
- * another way .. */
-
-#define KERNEL_HAS_MSLEEP
-
-/* Some other kernels < 2.6.8 do not have struct kvec,
- * others do.. */
-
-#define KERNEL_HAS_KVEC
-
-/* Actually availabe since 2.6.25, but venders have backported...
- */
-#define KERNEL_HAS_PROC_CREATE
-
-/* In 2.6.32 we finally fixed connector to pass netlink_skb_parms to the callback
- */
-#define KERNEL_HAS_CN_SKB_PARMS
+#ifdef __KERNEL__
+#include "compat.h"
+#endif
 
 #endif
diff -Nru drbd8-8.3.7/drbd/linux/drbd_genl.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_genl.h
--- drbd8-8.3.7/drbd/linux/drbd_genl.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_genl.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,364 @@
+/*
+ * General overview:
+ * full generic netlink message:
+ * |nlmsghdr|genlmsghdr|<payload>
+ *
+ * payload:
+ * |optional fixed size family header|<sequence of netlink attributes>
+ *
+ * sequence of netlink attributes:
+ * I chose to have all "top level" attributes NLA_NESTED,
+ * corresponding to some real struct.
+ * So we have a sequence of |tla, len|<nested nla sequence>
+ *
+ * nested nla sequence:
+ * may be empty, or contain a sequence of netlink attributes
+ * representing the struct fields.
+ *
+ * The tag number of any field (regardless of containing struct)
+ * will be available as T_ ## field_name,
+ * so you cannot have the same field name in two differnt structs.
+ *
+ * The tag numbers themselves are per struct, though,
+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
+ * which we won't use here).
+ * The tag numbers are used as index in the respective nla_policy array.
+ *
+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
+ *	genl_magic_struct.h
+ *		generates the struct declaration,
+ *		generates an entry in the tla enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static tla policy
+ *		with .type = NLA_NESTED
+ *		generates the static <struct_name>_nl_policy definition,
+ *		and static conversion functions
+ *
+ *	genl_magic_func.h
+ *
+ * GENL_mc_group(group)
+ *	genl_magic_struct.h
+ *		does nothing
+ *	genl_magic_func.h
+ *		defines and registers the mcast group,
+ *		and provides a send helper
+ *
+ * GENL_notification(op_name, op_num, mcast_group, tla list)
+ *	These are notifications to userspace.
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		does nothing
+ *
+ *	mcast group: the name of the mcast group this notification should be
+ *	expected on
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ *
+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
+ *	These are requests from userspace.
+ *
+ *	_op and _notification share the same "number space",
+ *	op_nr will be assigned to "genlmsghdr->cmd"
+ *
+ *	genl_magic_struct.h
+ *		generates an entry in the genl_ops enum,
+ *	genl_magic_func.h
+ *		generates an entry in the static genl_ops array,
+ *		and static register/unregister functions to
+ *		genl_register_family_with_ops().
+ *
+ *	flags and handler:
+ *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
+ *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
+ *	tla list: the list of expected top level attributes,
+ *	for documentation and sanity checking.
+ */
+
+/*
+ * STRUCTS
+ */
+
+/* this is sent kernel -> userland on various error conditions, and contains
+ * informational textual info, which is supposedly human readable.
+ * The computer relevant return code is in the drbd_genlmsghdr.
+ */
+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
+		/* "arbitrary" size strings, nla_policy.len = 0 */
+	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
+)
+
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name,
+ * and the volume id within the resource. */
+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
+	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
+	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
+)
+
+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
+	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128)
+	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128)
+	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
+
+	/* use the resize command to try and change the disk_size */
+	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
+	/* we could change the max_bio_bvecs,
+	 * but it won't propagate through the stack */
+	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
+
+	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
+	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
+
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
+	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
+
+	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
+	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
+	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
+	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
+	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
+)
+
+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32)
+	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
+)
+
+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
+						shared_secret,	SHARED_SECRET_MAX)
+	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
+	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
+	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
+	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
+	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
+	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
+	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
+)
+
+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
+)
+
+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
+	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
+	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+)
+
+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
+	/* the reason of the broadcast,
+	 * if this is an event triggered broadcast. */
+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
+	__u32_field(2, DRBD_F_REQUIRED,	current_state)
+	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
+	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
+
+	/* These are for broadcast from after state change work.
+	 * prev_state and new_state are from the moment the state change took
+	 * place, new_state is not neccessarily the same as current_state,
+	 * there may have been more state changes since.  Which will be
+	 * broadcasted soon, in their respective after state change work.  */
+	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
+	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
+
+	/* if we have a local disk: */
+	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
+	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
+	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
+	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
+	/* and in case resync or online verify is active */
+	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
+	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
+
+	/* for pre and post notifications of helper execution */
+	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
+	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
+)
+
+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
+)
+
+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
+)
+
+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
+	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type)
+)
+
+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
+)
+
+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
+)
+
+/*
+ * Notifications and commands (genlmsghdr->cmd)
+ */
+GENL_mc_group(events)
+
+	/* kernel -> userspace announcement of changes */
+GENL_notification(
+	DRBD_EVENT, 1, events,
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
+)
+
+	/* query kernel for specific or all info */
+GENL_op(
+	DRBD_ADM_GET_STATUS, 2,
+	GENL_op_init(
+		.doit = drbd_adm_get_status,
+		.dumpit = drbd_adm_get_status_all,
+		/* anyone may ask for the status,
+		 * it is broadcasted anyways */
+	),
+	/* To select the object .doit.
+	 * Or a subset of objects in .dumpit. */
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+)
+
+	/* add DRBD minor devices as volumes to resources */
+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+	/* add or delete resources */
+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
+	GENL_doit(drbd_adm_resource_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_CONNECT, 10,
+	GENL_doit(drbd_adm_connect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_CHG_NET_OPTS, 29,
+	GENL_doit(drbd_adm_net_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_ATTACH, 12,
+	GENL_doit(drbd_adm_attach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
+)
+
+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
+	GENL_doit(drbd_adm_disk_opts),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_RESIZE, 13,
+	GENL_doit(drbd_adm_resize),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_PRIMARY, 14,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_SECONDARY, 15,
+	GENL_doit(drbd_adm_set_role),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
+)
+
+GENL_op(
+	DRBD_ADM_NEW_C_UUID, 16,
+	GENL_doit(drbd_adm_new_c_uuid),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(
+	DRBD_ADM_START_OV, 17,
+	GENL_doit(drbd_adm_start_ov),
+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
+)
+
+GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
diff -Nru drbd8-8.3.7/drbd/linux/drbd_genl_api.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_genl_api.h
--- drbd8-8.3.7/drbd/linux/drbd_genl_api.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_genl_api.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,55 @@
+#ifndef DRBD_GENL_STRUCT_H
+#define DRBD_GENL_STRUCT_H
+
+/**
+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
+ * @minor:
+ *     For admin requests (user -> kernel): which minor device to operate on.
+ *     For (unicast) replies or informational (broadcast) messages
+ *     (kernel -> user): which minor device the information is about.
+ *     If we do not operate on minors, but on connections or resources,
+ *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
+ *     is used instead.
+ * @flags: possible operation modifiers (relevant only for user->kernel):
+ *     DRBD_GENL_F_SET_DEFAULTS
+ * @volume:
+ *     When creating a new minor (adding it to a resource), the resource needs
+ *     to know which volume number within the resource this is supposed to be.
+ *     The volume number corresponds to the same volume number on the remote side,
+ *     whereas the minor number on the remote side may be different
+ *     (union with flags).
+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
+ */
+struct drbd_genlmsghdr {
+	__u32 minor;
+	union {
+	__u32 flags;
+	__s32 ret_code;
+	};
+};
+
+/* To be used in drbd_genlmsghdr.flags */
+enum {
+	DRBD_GENL_F_SET_DEFAULTS = 1,
+};
+
+enum drbd_state_info_bcast_reason {
+	SIB_GET_STATUS_REPLY = 1,
+	SIB_STATE_CHANGE = 2,
+	SIB_HELPER_PRE = 3,
+	SIB_HELPER_POST = 4,
+	SIB_SYNC_PROGRESS = 5,
+};
+
+/* hack around predefined gcc/cpp "linux=1",
+ * we cannot possibly include <1/drbd_genl.h> */
+#undef linux
+
+#include <linux/drbd.h>
+#define GENL_MAGIC_VERSION	API_VERSION
+#define GENL_MAGIC_FAMILY	drbd
+#define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr)
+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
+#include <linux/genl_magic_struct.h>
+
+#endif
diff -Nru drbd8-8.3.7/drbd/linux/drbd_limits.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_limits.h
--- drbd8-8.3.7/drbd/linux/drbd_limits.h	2009-09-29 07:51:14.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_limits.h	2012-02-02 14:09:14.000000000 +0000
@@ -17,121 +17,202 @@
 
 #define DRBD_MINOR_COUNT_MIN 1
 #define DRBD_MINOR_COUNT_MAX 255
+#define DRBD_MINOR_COUNT_DEF 32
+#define DRBD_MINOR_COUNT_SCALE '1'
+
+#define DRBD_VOLUME_MAX 65535
 
 #define DRBD_DIALOG_REFRESH_MIN 0
 #define DRBD_DIALOG_REFRESH_MAX 600
+#define DRBD_DIALOG_REFRESH_SCALE '1'
 
 /* valid port number */
 #define DRBD_PORT_MIN 1
 #define DRBD_PORT_MAX 0xffff
+#define DRBD_PORT_SCALE '1'
 
 /* startup { */
   /* if you want more than 3.4 days, disable */
 #define DRBD_WFC_TIMEOUT_MIN 0
 #define DRBD_WFC_TIMEOUT_MAX 300000
 #define DRBD_WFC_TIMEOUT_DEF 0
+#define DRBD_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_DEGR_WFC_TIMEOUT_MIN 0
 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
 
 #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
 /* }*/
 
 /* net { */
   /* timeout, unit centi seconds
-   * more than one minute timeout is not usefull */
+   * more than one minute timeout is not useful */
 #define DRBD_TIMEOUT_MIN 1
 #define DRBD_TIMEOUT_MAX 600
 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+#define DRBD_TIMEOUT_SCALE '1'
+
+ /* If backing disk takes longer than disk_timeout, mark the disk as failed */
+#define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
+#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
+#define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
+#define DRBD_DISK_TIMEOUT_SCALE '1'
 
   /* active connection retries when C_WF_CONNECTION */
 #define DRBD_CONNECT_INT_MIN 1
 #define DRBD_CONNECT_INT_MAX 120
 #define DRBD_CONNECT_INT_DEF 10   /* seconds */
+#define DRBD_CONNECT_INT_SCALE '1'
 
   /* keep-alive probes when idle */
 #define DRBD_PING_INT_MIN 1
 #define DRBD_PING_INT_MAX 120
 #define DRBD_PING_INT_DEF 10
+#define DRBD_PING_INT_SCALE '1'
 
  /* timeout for the ping packets.*/
 #define DRBD_PING_TIMEO_MIN  1
-#define DRBD_PING_TIMEO_MAX  100
+#define DRBD_PING_TIMEO_MAX  300
 #define DRBD_PING_TIMEO_DEF  5
+#define DRBD_PING_TIMEO_SCALE '1'
 
   /* max number of write requests between write barriers */
 #define DRBD_MAX_EPOCH_SIZE_MIN 1
 #define DRBD_MAX_EPOCH_SIZE_MAX 20000
 #define DRBD_MAX_EPOCH_SIZE_DEF 2048
+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
 
-  /* I don't think that a tcp send buffer of more than 10M is usefull */
+  /* I don't think that a tcp send buffer of more than 10M is useful */
 #define DRBD_SNDBUF_SIZE_MIN  0
 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
 #define DRBD_SNDBUF_SIZE_DEF  0
+#define DRBD_SNDBUF_SIZE_SCALE '1'
 
 #define DRBD_RCVBUF_SIZE_MIN  0
 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
 #define DRBD_RCVBUF_SIZE_DEF  0
+#define DRBD_RCVBUF_SIZE_SCALE '1'
 
   /* @4k PageSize -> 128kB - 512MB */
 #define DRBD_MAX_BUFFERS_MIN  32
 #define DRBD_MAX_BUFFERS_MAX  131072
 #define DRBD_MAX_BUFFERS_DEF  2048
+#define DRBD_MAX_BUFFERS_SCALE '1'
 
   /* @4k PageSize -> 4kB - 512MB */
 #define DRBD_UNPLUG_WATERMARK_MIN  1
 #define DRBD_UNPLUG_WATERMARK_MAX  131072
 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
 
   /* 0 is disabled.
    * 200 should be more than enough even for very short timeouts */
 #define DRBD_KO_COUNT_MIN  0
 #define DRBD_KO_COUNT_MAX  200
-#define DRBD_KO_COUNT_DEF  0
+#define DRBD_KO_COUNT_DEF  7
+#define DRBD_KO_COUNT_SCALE '1'
 /* } */
 
 /* syncer { */
   /* FIXME allow rate to be zero? */
-#define DRBD_RATE_MIN 1
+#define DRBD_RESYNC_RATE_MIN 1
 /* channel bonding 10 GbE, or other hardware */
-#define DRBD_RATE_MAX (4 << 20)
-#define DRBD_RATE_DEF 250  /* kb/second */
-
-  /* less than 7 would hit performance unneccessarily.
-   * 3833 is the largest prime that still does fit
-   * into 64 sectors of activity log */
+#define DRBD_RESYNC_RATE_MAX (4 << 20)
+#define DRBD_RESYNC_RATE_DEF 250
+#define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
+
+  /* less than 7 would hit performance unnecessarily.
+   * 919 slots context information per transaction,
+   * 32k activity log, 4k transaction size,
+   * one transaction in flight:
+   * 919 * 7 = 6433 */
 #define DRBD_AL_EXTENTS_MIN  7
-#define DRBD_AL_EXTENTS_MAX  3833
-#define DRBD_AL_EXTENTS_DEF  127
-
-#define DRBD_AFTER_MIN  -1
-#define DRBD_AFTER_MAX  255
-#define DRBD_AFTER_DEF  -1
+#define DRBD_AL_EXTENTS_MAX  6433
+#define DRBD_AL_EXTENTS_DEF  1237
+#define DRBD_AL_EXTENTS_SCALE '1'
+
+#define DRBD_MINOR_NUMBER_MIN  -1
+#define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1)
+#define DRBD_MINOR_NUMBER_DEF  -1
+#define DRBD_MINOR_NUMBER_SCALE '1'
 
 /* } */
 
 /* drbdsetup XY resize -d Z
  * you are free to reduce the device size to nothing, if you want to.
  * the upper limit with 64bit kernel, enough ram and flexible meta data
- * is 16 TB, currently. */
+ * is 1 PiB, currently. */
 /* DRBD_MAX_SECTORS */
-#define DRBD_DISK_SIZE_SECT_MIN  0
-#define DRBD_DISK_SIZE_SECT_MAX  (16 * (2LLU << 30))
-#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_MIN  0
+#define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40))
+#define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
+#define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
 
-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
 #define DRBD_FENCING_DEF FP_DONT_CARE
 #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
+#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
+#define DRBD_ON_CONGESTION_DEF OC_BLOCK
+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
+#define DRBD_MAX_BIO_BVECS_SCALE '1'
+
+#define DRBD_C_PLAN_AHEAD_MIN  0
+#define DRBD_C_PLAN_AHEAD_MAX  300
+#define DRBD_C_PLAN_AHEAD_DEF  20
+#define DRBD_C_PLAN_AHEAD_SCALE '1'
+
+#define DRBD_C_DELAY_TARGET_MIN 1
+#define DRBD_C_DELAY_TARGET_MAX 100
+#define DRBD_C_DELAY_TARGET_DEF 10
+#define DRBD_C_DELAY_TARGET_SCALE '1'
+
+#define DRBD_C_FILL_TARGET_MIN 0
+#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
+#define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */
+
+#define DRBD_C_MAX_RATE_MIN     250
+#define DRBD_C_MAX_RATE_MAX     (4 << 20)
+#define DRBD_C_MAX_RATE_DEF     102400
+#define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */
+
+#define DRBD_C_MIN_RATE_MIN     0
+#define DRBD_C_MIN_RATE_MAX     (4 << 20)
+#define DRBD_C_MIN_RATE_DEF     250
+#define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */
+
+#define DRBD_CONG_FILL_MIN	0
+#define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
+#define DRBD_CONG_FILL_DEF	0
+#define DRBD_CONG_FILL_SCALE	's'  /* sectors */
+
+#define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
+#define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
+#define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
+
+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
+
+#define DRBD_DISK_BARRIER_DEF	1
+#define DRBD_DISK_FLUSHES_DEF	1
+#define DRBD_DISK_DRAIN_DEF	1
+#define DRBD_MD_FLUSHES_DEF	1
+#define DRBD_TCP_CORK_DEF	1
+
+#define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
+#define DRBD_ALWAYS_ASBP_DEF	0
+#define DRBD_USE_RLE_DEF	1
 
-#undef RANGE
 #endif
diff -Nru drbd8-8.3.7/drbd/linux/drbd_nl.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_nl.h
--- drbd8-8.3.7/drbd/linux/drbd_nl.h	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_nl.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,138 +0,0 @@
-/*
-   PAKET( name,
-	  TYPE ( pn, pr, member )
-	  ...
-   )
-
-   You may never reissue one of the pn arguments
-*/
-
-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
-#endif
-
-NL_PACKET(primary, 1,
-       NL_BIT(		1,	T_MAY_IGNORE,	overwrite_peer)
-)
-
-NL_PACKET(secondary, 2, )
-
-NL_PACKET(disk_conf, 3,
-	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
-	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
-	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
-	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
-	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
-	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
-	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
-	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
-	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
-	  /*  55 max_bio_size was available in 8.2.6rc2 */
-	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
-	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
-	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
-)
-
-NL_PACKET(detach, 4, )
-
-NL_PACKET(net_conf, 5,
-	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
-	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
-	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
-	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
-	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
-	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
-	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
-	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
-	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
-	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
-	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
-	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
-	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
-	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
-	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
-	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
-	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
-	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
-	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
-	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
-	  /* 59 addr_family was available in GIT, never released */
-	NL_BIT(		60,	T_MANDATORY,	mind_af)
-	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
-	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
-	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
-	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
-	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
-)
-
-NL_PACKET(disconnect, 6, )
-
-NL_PACKET(resize, 7,
-	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
-	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
-)
-
-NL_PACKET(syncer_conf, 8,
-	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
-	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
-	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
-	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
-	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
-	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
-)
-
-NL_PACKET(invalidate, 9, )
-NL_PACKET(invalidate_peer, 10, )
-NL_PACKET(pause_sync, 11, )
-NL_PACKET(resume_sync, 12, )
-NL_PACKET(suspend_io, 13, )
-NL_PACKET(resume_io, 14, )
-NL_PACKET(outdate, 15, )
-NL_PACKET(get_config, 16, )
-NL_PACKET(get_state, 17,
-	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
-)
-
-NL_PACKET(get_uuids, 18,
-	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
-	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
-)
-
-NL_PACKET(get_timeout_flag, 19,
-	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
-)
-
-NL_PACKET(call_helper, 20,
-	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
-)
-
-/* Tag nr 42 already allocated in drbd-8.1 development. */
-
-NL_PACKET(sync_progress, 23,
-	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
-)
-
-NL_PACKET(dump_ee, 24,
-	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
-	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
-	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
-	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
-	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
-	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
-)
-
-NL_PACKET(start_ov, 25,
-	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
-)
-
-NL_PACKET(new_c_uuid, 26,
-       NL_BIT(		63,	T_MANDATORY,	clear_bm)
-)
-
-#undef NL_PACKET
-#undef NL_INTEGER
-#undef NL_INT64
-#undef NL_BIT
-#undef NL_STRING
-
diff -Nru drbd8-8.3.7/drbd/linux/drbd_tag_magic.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_tag_magic.h
--- drbd8-8.3.7/drbd/linux/drbd_tag_magic.h	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/drbd_tag_magic.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,83 +0,0 @@
-#ifndef DRBD_TAG_MAGIC_H
-#define DRBD_TAG_MAGIC_H
-
-#define TT_END     0
-#define TT_REMOVED 0xE000
-
-/* declare packet_type enums */
-enum packet_types {
-#define NL_PACKET(name, number, fields) P_ ## name = number,
-#define NL_INTEGER(pn, pr, member)
-#define NL_INT64(pn, pr, member)
-#define NL_BIT(pn, pr, member)
-#define NL_STRING(pn, pr, member, len)
-#include "drbd_nl.h"
-	P_nl_after_last_packet,
-};
-
-/* These struct are used to deduce the size of the tag lists: */
-#define NL_PACKET(name, number, fields)	\
-	struct name ## _tag_len_struct { fields };
-#define NL_INTEGER(pn, pr, member)		\
-	int member; int tag_and_len ## member;
-#define NL_INT64(pn, pr, member)		\
-	__u64 member; int tag_and_len ## member;
-#define NL_BIT(pn, pr, member)		\
-	unsigned char member:1; int tag_and_len ## member;
-#define NL_STRING(pn, pr, member, len)	\
-	unsigned char member[len]; int member ## _len; \
-	int tag_and_len ## member;
-#include "linux/drbd_nl.h"
-
-/* declate tag-list-sizes */
-static const int tag_list_sizes[] = {
-#define NL_PACKET(name, number, fields) 2 fields ,
-#define NL_INTEGER(pn, pr, member)      + 4 + 4
-#define NL_INT64(pn, pr, member)        + 4 + 8
-#define NL_BIT(pn, pr, member)          + 4 + 1
-#define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include "drbd_nl.h"
-};
-
-/* The two highest bits are used for the tag type */
-#define TT_MASK      0xC000
-#define TT_INTEGER   0x0000
-#define TT_INT64     0x4000
-#define TT_BIT       0x8000
-#define TT_STRING    0xC000
-/* The next bit indicates if processing of the tag is mandatory */
-#define T_MANDATORY  0x2000
-#define T_MAY_IGNORE 0x0000
-#define TN_MASK      0x1fff
-/* The remaining 13 bits are used to enumerate the tags */
-
-#define tag_type(T)   ((T) & TT_MASK)
-#define tag_number(T) ((T) & TN_MASK)
-
-/* declare tag enums */
-#define NL_PACKET(name, number, fields) fields
-enum drbd_tags {
-#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
-#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
-#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include "drbd_nl.h"
-};
-
-struct tag {
-	const char *name;
-	int type_n_flags;
-	int max_len;
-};
-
-/* declare tag names */
-#define NL_PACKET(name, number, fields) fields
-static const struct tag tag_descriptions[] = {
-#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
-#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
-#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include "drbd_nl.h"
-};
-
-#endif
diff -Nru drbd8-8.3.7/drbd/linux/genl_magic_func.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/genl_magic_func.h
--- drbd8-8.3.7/drbd/linux/genl_magic_func.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/genl_magic_func.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,422 @@
+#ifndef GENL_MAGIC_FUNC_H
+#define GENL_MAGIC_FUNC_H
+
+#include <linux/genl_magic_struct.h>
+
+/*
+ * Magic: declare tla policy						{{{1
+ * Magic: declare nested policies
+ *									{{{2
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+	[tag_name] = { .type = NLA_NESTED },
+
+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[]	\
+		__attribute__((unused)) = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
+{ s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
+		 __put, __is_signed)					\
+	[attr_nr] = { .type = nla_type },
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
+		__get, __put, __is_signed)				\
+	[attr_nr] = { .type = nla_type,					\
+		      .len = maxlen - (nla_type == NLA_NUL_STRING) },
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#ifndef __KERNEL__
+#ifndef pr_info
+#define pr_info(args...)	fprintf(stderr, args);
+#endif
+#endif
+
+#ifdef GENL_MAGIC_DEBUG
+static void dprint_field(const char *dir, int nla_type,
+		const char *name, void *valp)
+{
+	__u64 val = valp ? *(__u32 *)valp : 1;
+	switch (nla_type) {
+	case NLA_U8:  val = (__u8)val;
+	case NLA_U16: val = (__u16)val;
+	case NLA_U32: val = (__u32)val;
+		pr_info("%s attr %s: %d 0x%08x\n", dir,
+			name, (int)val, (unsigned)val);
+		break;
+	case NLA_U64:
+		val = *(__u64*)valp;
+		pr_info("%s attr %s: %lld 0x%08llx\n", dir,
+			name, (long long)val, (unsigned long long)val);
+		break;
+	case NLA_FLAG:
+		if (val)
+			pr_info("%s attr %s: set\n", dir, name);
+		break;
+	}
+}
+
+static void dprint_array(const char *dir, int nla_type,
+		const char *name, const char *val, unsigned len)
+{
+	switch (nla_type) {
+	case NLA_NUL_STRING:
+		if (len && val[len-1] == '\0')
+			len--;
+		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
+		break;
+	default:
+		/* we can always show 4 byte,
+		 * thats what nlattr are aligned to. */
+		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
+			dir, name, len, val[0], val[1], val[2], val[3]);
+	}
+}
+
+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
+
+/* Name is a member field name of the struct s.
+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
+ * nla is supposed to point to the attribute containing the information
+ * corresponding to that struct member. */
+#define DPRINT_FIELD(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_field(dir, nla_type, #name, &s->name);	\
+		else if (nla)						\
+			dprint_field(dir, nla_type, #name,		\
+				(nla_type == NLA_FLAG) ? NULL		\
+						: nla_data(nla));	\
+	} while (0)
+
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\
+	do {								\
+		if (s)							\
+			dprint_array(dir, nla_type, #name,		\
+					s->name, s->name ## _len);	\
+		else if (nla)						\
+			dprint_array(dir, nla_type, #name,		\
+					nla_data(nla), nla_len(nla));	\
+	} while (0)
+#else
+#define DPRINT_TLA(a, op, b) do {} while (0)
+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
+#endif
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate struct from attribute table:
+ *									{{{2
+ */
+
+/* processing of generic netlink messages is serialized.
+ * use one static buffer for parsing of nested attributes */
+static struct nlattr *nested_attr_tb[128];
+
+#ifndef BUILD_BUG_ON
+/* Force a compilation error if condition is true */
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+/* Force a compilation error if condition is true, but also produce a
+   result (of value 0 and type size_t), so the expression can be used
+   e.g. in a structure initializer (or where-ever else comma expressions
+   aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
+#endif
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static int __ ## s_name ## _from_attrs(struct s_name *s,		\
+		struct genl_info *info, bool exclude_invariants)	\
+{									\
+	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
+	struct nlattr *tla = info->attrs[tag_number];			\
+	struct nlattr **ntb = nested_attr_tb;				\
+	struct nlattr *nla;						\
+	int err;							\
+	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\
+	if (!tla)							\
+		return -ENOMSG;						\
+	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
+	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
+	if (err)							\
+		return err;						\
+									\
+	s_fields							\
+	return 0;							\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs(struct s_name *s,			\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, false);		\
+}					__attribute__((unused))		\
+static int s_name ## _from_attrs_for_change(struct s_name *s,		\
+						struct genl_info *info)	\
+{									\
+	return __ ## s_name ## _from_attrs(s, info, true);		\
+}					__attribute__((unused))		\
+
+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
+		nla = ntb[attr_nr];						\
+		if (nla) {						\
+			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+				pr_info("<< must not change invariant attr: %s\n", #name);	\
+				return -EEXIST;				\
+			}						\
+			assignment;					\
+		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+			/* attribute missing from payload, */		\
+			/* which was expected */			\
+		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
+			pr_info("<< missing attr: %s\n", #name);	\
+			return -ENOMSG;					\
+		}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name = __get(nla);			\
+			DPRINT_FIELD("<<", nla_type, name, s, nla))
+
+/* validate_nla() already checked nla_len <= maxlen appropriately. */
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
+			if (s)						\
+				s->name ## _len =			\
+					__get(s->name, nla, maxlen);	\
+			DPRINT_ARRAY("<<", nla_type, name, s, nla))
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+/*
+ * Magic: define op number to op name mapping				{{{1
+ *									{{{2
+ */
+static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+__attribute__ ((unused));
+static const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
+{
+	switch (cmd) {
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+	case op_num: return #op_name;
+#include GENL_MAGIC_INCLUDE_FILE
+	default:
+		     return "unknown";
+	}
+}
+
+#ifdef __KERNEL__
+#include <linux/stringify.h>
+/*
+ * Magic: define genl_ops						{{{1
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)		\
+{								\
+	handler							\
+	.cmd = op_name,						\
+	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\
+},
+
+#define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+/*
+ * Define the genl_family, multicast groups,				{{{1
+ * and provide register/unregister functions.
+ *									{{{2
+ */
+#define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
+static struct genl_family ZZZ_genl_family __read_mostly = {
+	.id = GENL_ID_GENERATE,
+	.name = __stringify(GENL_MAGIC_FAMILY),
+	.version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+};
+
+/*
+ * Magic: define multicast groups
+ * Magic: define multicast group registration helper
+ */
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+static struct genl_multicast_group					\
+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
+	.name = #group,							\
+};									\
+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
+	struct sk_buff *skb, gfp_t flags)				\
+{									\
+	unsigned int group_id =						\
+		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
+	if (!group_id)							\
+		return -EINVAL;						\
+	return genlmsg_multicast(skb, 0, group_id, flags);		\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
+{
+	int err = genl_register_family_with_ops(&ZZZ_genl_family,
+		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
+	if (err)
+		return err;
+#undef GENL_mc_group
+#define GENL_mc_group(group)						\
+	err = genl_register_mc_group(&ZZZ_genl_family,			\
+		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
+	if (err)							\
+		goto fail;						\
+	else								\
+		pr_info("%s: mcg %s: %u\n", #group,			\
+			__stringify(GENL_MAGIC_FAMILY),			\
+			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+	return 0;
+fail:
+	genl_unregister_family(&ZZZ_genl_family);
+	return err;
+}
+
+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
+{
+	genl_unregister_family(&ZZZ_genl_family);
+}
+
+/*
+ * Magic: provide conversion functions					{{{1
+ * populate skb from struct.
+ *									{{{2
+ */
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\
+		const bool exclude_sensitive)				\
+{									\
+	struct nlattr *tla = nla_nest_start(skb, tag_number);		\
+	if (!tla)							\
+		goto nla_put_failure;					\
+	DPRINT_TLA(#s_name, "-=>", #tag_name);				\
+	s_fields							\
+	nla_nest_end(skb, tla);						\
+	return 0;							\
+									\
+nla_put_failure:							\
+	if (tla)							\
+		nla_nest_cancel(skb, tla);				\
+        return -EMSGSIZE;						\
+}									\
+static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 0);				\
+}									\
+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
+		struct s_name *s)					\
+{									\
+	return s_name ## _to_skb(skb, s, 1);				\
+}
+
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
+		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, s->name);				\
+	}
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
+		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
+		__put(skb, attr_nr, min_t(int, maxlen,			\
+			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
+						s->name);		\
+	}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+
+/* Functions for initializing structs to default values.  */
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)
+#undef __u32_field_def
+#define __u32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __s32_field_def
+#define __s32_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __flg_field_def
+#define __flg_field_def(attr_nr, attr_flag, name, default)		\
+	x->name = default;
+#undef __str_field_def
+#define __str_field_def(attr_nr, attr_flag, name, maxlen)		\
+	memset(x->name, 0, sizeof(x->name));				\
+	x->name ## _len = 0;
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
+static void set_ ## s_name ## _defaults(struct s_name *x) {	\
+s_fields								\
+}
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#endif /* __KERNEL__ */
+
+/* }}}1 */
+#endif /* GENL_MAGIC_FUNC_H */
+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
diff -Nru drbd8-8.3.7/drbd/linux/genl_magic_struct.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/genl_magic_struct.h
--- drbd8-8.3.7/drbd/linux/genl_magic_struct.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/genl_magic_struct.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,278 @@
+#ifndef GENL_MAGIC_STRUCT_H
+#define GENL_MAGIC_STRUCT_H
+
+#ifndef GENL_MAGIC_FAMILY
+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_VERSION
+# error "you need to define GENL_MAGIC_VERSION before inclusion"
+#endif
+
+#ifndef GENL_MAGIC_INCLUDE_FILE
+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
+#endif
+
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/types.h>
+
+#define CONCAT__(a,b)	a ## b
+#define CONCAT_(a,b)	CONCAT__(a,b)
+
+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
+
+/*
+ * Extension of genl attribute validation policies			{{{2
+ */
+
+/*
+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
+ * know about.  This flag can be set in nlattr->nla_type to indicate that this
+ * attribute must not be ignored.
+ *
+ * We check and remove this flag in drbd_nla_check_mandatory() before
+ * validating the attribute types and lengths via nla_parse_nested().
+ */
+#define DRBD_GENLA_F_MANDATORY (1 << 14)
+
+/*
+ * Flags specific to drbd and not visible at the netlink layer, used in
+ * <struct>_from_attrs and <struct>_to_skb:
+ *
+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
+ * invalid.
+ *
+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
+ * included in unpriviledged get requests or broadcasts.
+ *
+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
+ * cannot subsequently be changed.
+ */
+#define DRBD_F_REQUIRED (1 << 0)
+#define DRBD_F_SENSITIVE (1 << 1)
+#define DRBD_F_INVARIANT (1 << 2)
+
+#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
+
+/*									}}}1
+ * MAGIC
+ * multi-include macro expansion magic starts here
+ */
+
+/* MAGIC helpers							{{{2 */
+
+/* possible field types */
+#define __flg_field(attr_nr, attr_flag, name) \
+	__field(attr_nr, attr_flag, name, NLA_U8, char, \
+			nla_get_u8, NLA_PUT_U8, false)
+#define __u8_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
+			nla_get_u8, NLA_PUT_U8, false)
+#define __u16_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
+			nla_get_u16, NLA_PUT_U16, false)
+#define __u32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
+			nla_get_u32, NLA_PUT_U32, false)
+#define __s32_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
+			nla_get_u32, NLA_PUT_U32, true)
+#define __u64_field(attr_nr, attr_flag, name)	\
+	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
+			nla_get_u64, NLA_PUT_U64, false)
+#define __str_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
+			nla_strlcpy, NLA_PUT, false)
+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
+	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
+			nla_memcpy, NLA_PUT, false)
+
+/* fields with default values */
+#define __flg_field_def(attr_nr, attr_flag, name, default) \
+	__flg_field(attr_nr, attr_flag, name)
+#define __u32_field_def(attr_nr, attr_flag, name, default) \
+	__u32_field(attr_nr, attr_flag, name)
+#define __s32_field_def(attr_nr, attr_flag, name, default) \
+	__s32_field(attr_nr, attr_flag, name)
+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
+	__str_field(attr_nr, attr_flag, name, maxlen)
+
+#define GENL_op_init(args...)	args
+#define GENL_doit(handler)		\
+	.doit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+#define GENL_dumpit(handler)		\
+	.dumpit = handler,		\
+	.flags = GENL_ADMIN_PERM,
+
+/*									}}}1
+ * Magic: define the enum symbols for genl_ops
+ * Magic: define the enum symbols for top level attributes
+ * Magic: define the enum symbols for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_mc_group
+#define GENL_mc_group(group)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	op_name = op_num,
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, tla_list)			\
+	op_name = op_num,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
+		tag_name = tag_number,
+
+enum {
+#include GENL_MAGIC_INCLUDE_FILE
+};
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)	\
+enum {								\
+	s_fields						\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type,	\
+		__get, __put, __is_signed)			\
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type,	\
+		maxlen, __get, __put, __is_signed)		\
+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: compile time assert unique numbers for operations
+ * Magic: -"- unique numbers for top level attributes
+ * Magic: -"- unique numbers for nested attributes
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)	\
+	case op_name:
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
+	case op_name:
+
+static inline void ct_assert_unique_operations(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_op
+#define GENL_op(op_name, op_num, handler, attr_list)
+
+#undef GENL_notification
+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+		case tag_number:
+
+static inline void ct_assert_unique_top_level_attributes(void)
+{
+	switch (0) {
+#include GENL_MAGIC_INCLUDE_FILE
+		;
+	}
+}
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
+{									\
+	switch (0) {							\
+		s_fields						\
+			;						\
+	}								\
+}
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	case attr_nr:
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	case attr_nr:
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/*									}}}1
+ * Magic: declare structs
+ * struct <name> {
+ *	fields
+ * };
+ *									{{{2
+ */
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+struct s_name { s_fields };
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		__is_signed)						\
+	type name;
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, __is_signed)				\
+	type name[maxlen];	\
+	__u32 name ## _len;
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+#undef GENL_struct
+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
+enum {									\
+	s_fields							\
+};
+
+#undef __field
+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
+		is_signed)						\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#undef __array
+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
+		__get, __put, is_signed)				\
+	F_ ## name ## _IS_SIGNED = is_signed,
+
+#include GENL_MAGIC_INCLUDE_FILE
+
+/* }}}1 */
+#endif /* GENL_MAGIC_STRUCT_H */
+/* vim: set foldmethod=marker nofoldenable : */
diff -Nru drbd8-8.3.7/drbd/linux/hardirq.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/hardirq.h
--- drbd8-8.3.7/drbd/linux/hardirq.h	2009-07-27 08:47:42.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/hardirq.h	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-/* Just an empty file. */
diff -Nru drbd8-8.3.7/drbd/linux/lru_cache.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/lru_cache.h
--- drbd8-8.3.7/drbd/linux/lru_cache.h	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/lru_cache.h	2012-02-02 14:09:14.000000000 +0000
@@ -32,29 +32,28 @@
 #include <linux/string.h> /* for memset */
 #include <linux/seq_file.h>
 
-/* { compatibility crap */
-
-/* needs to be included here,
- * because of various old kernel compatibility wrappers */
-#include <linux/drbd_config.h>
-#ifdef USE_KMEM_CACHE_S
-#define kmem_cache kmem_cache_s
-#endif
-
-#ifdef NEED_BACKPORT_OF_KZALLOC
-static inline void *kzalloc(size_t size, int flags)
+/* Compatibility code */
+#include "compat.h"
+#ifndef COMPAT_HAVE_CLEAR_BIT_UNLOCK
+static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
 {
-	void *rv = kmalloc(size, flags);
-	if (rv)
-		memset(rv, 0, size);
-
-	return rv;
+#if defined(__x86_64__) ||  defined(__i386__) || defined(__arch_um__)
+	barrier();
+#else
+	smp_mb(); /* Be on the save side for alpha, and others */
+#endif
+        clear_bit(nr, addr);
 }
-#undef NEED_BACKPORT_OF_KZALLOC
 #endif
-
-/* } compatibility crap */
-
+#ifndef COMPAT_HAVE_BOOL_TYPE
+typedef _Bool                   bool;
+enum {
+	false = 0,
+	true = 1
+};
+#define COMPAT_HAVE_BOOL_TYPE
+#endif
+/* End of Compatibility code */
 
 /*
 This header file (and its .c file; kernel-doc of functions see there)
@@ -88,7 +87,7 @@
   usually the condition is softened to regions that _may_ have been target of
   in-flight WRITE IO, e.g. by only lazily clearing the on-disk write-intent
   bitmap, trading frequency of meta data transactions against amount of
-  (possibly unneccessary) resync traffic.
+  (possibly unnecessary) resync traffic.
 
   If we set a hard limit on the area that may be "hot" at any given time, we
   limit the amount of resync traffic needed for crash recovery.
@@ -163,16 +162,16 @@
  * .list is on one of three lists:
  *  in_use: currently in use (refcnt > 0, lc_number != LC_FREE)
  *     lru: unused but ready to be reused or recycled
- *          (ts_refcnt == 0, lc_number != LC_FREE),
+ *          (lc_refcnt == 0, lc_number != LC_FREE),
  *    free: unused but ready to be recycled
- *          (ts_refcnt == 0, lc_number == LC_FREE),
+ *          (lc_refcnt == 0, lc_number == LC_FREE),
  *
  * an element is said to be "in the active set",
  * if either on "in_use" or "lru", i.e. lc_number != LC_FREE.
  *
  * DRBD currently (May 2009) only uses 61 elements on the resync lru_cache
  * (total memory usage 2 pages), and up to 3833 elements on the act_log
- * lru_cache, totalling ~215 kB for 64bit architechture, ~53 pages.
+ * lru_cache, totalling ~215 kB for 64bit architecture, ~53 pages.
  *
  * We usually do not actually free these objects again, but only "recycle"
  * them, as the change "index: -old_label, +LC_FREE" would need a transaction
@@ -184,15 +183,17 @@
 	struct hlist_node colision;
 	struct list_head list;		 /* LRU list or free list */
 	unsigned refcnt;
-	/* back "pointer" into ts_cache->element[index],
-	 * for paranoia, and for "ts_element_to_index" */
+	/* back "pointer" into lc_cache->element[index],
+	 * for paranoia, and for "lc_element_to_index" */
 	unsigned lc_index;
 	/* if we want to track a larger set of objects,
 	 * it needs to become arch independend u64 */
 	unsigned lc_number;
-
 	/* special label when on free list */
 #define LC_FREE (~0U)
+
+	/* for pending changes */
+	unsigned lc_new_number;
 };
 
 struct lru_cache {
@@ -200,6 +201,7 @@
 	struct list_head lru;
 	struct list_head free;
 	struct list_head in_use;
+	struct list_head to_be_changed;
 
 	/* the pre-created kmem cache to allocate the objects from */
 	struct kmem_cache *lc_cache;
@@ -210,26 +212,27 @@
 	size_t element_off;
 
 	/* number of elements (indices) */
-	unsigned int  nr_elements;
+	unsigned int nr_elements;
 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
 	 * lower due to allocation failures, probably. For typical use cases,
 	 * nr_elements should be a few thousand at most.
-	 * This also limits the maximum value of ts_element.ts_index, allowing the
-	 * 8 high bits of .ts_index to be overloaded with flags in the future. */
+	 * This also limits the maximum value of lc_element.lc_index, allowing the
+	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
 #define LC_MAX_ACTIVE	(1<<24)
 
+	/* allow to accumulate a few (index:label) changes,
+	 * but no more than max_pending_changes */
+	unsigned int max_pending_changes;
+	/* number of elements currently on to_be_changed list */
+	unsigned int pending_changes;
+
 	/* statistics */
-	unsigned used; /* number of lelements currently on in_use list */
-	unsigned long hits, misses, starving, dirty, changed;
+	unsigned used; /* number of elements currently on in_use list */
+	unsigned long hits, misses, starving, locked, changed;
 
 	/* see below: flag-bits for lru_cache */
 	unsigned long flags;
 
-	/* when changing the label of an index element */
-	unsigned int  new_number;
-
-	/* for paranoia when changing the label of an index element */
-	struct lc_element *changing_element;
 
 	void  *lc_private;
 	const char *name;
@@ -245,10 +248,15 @@
 	/* debugging aid, to catch concurrent access early.
 	 * user needs to guarantee exclusive access by proper locking! */
 	__LC_PARANOIA,
-	/* if we need to change the set, but currently there is a changing
-	 * transaction pending, we are "dirty", and must deferr further
-	 * changing requests */
+
+	/* annotate that the set is "dirty", possibly accumulating further
+	 * changes, until a transaction is finally triggered */
 	__LC_DIRTY,
+
+	/* Locked, no further changes allowed.
+	 * Also used to serialize changing transactions. */
+	__LC_LOCKED,
+
 	/* if we need to change the set, but currently there is no free nor
 	 * unused element available, we are "starving", and must not give out
 	 * further references, to guarantee that eventually some refcnt will
@@ -260,9 +268,11 @@
 };
 #define LC_PARANOIA (1<<__LC_PARANOIA)
 #define LC_DIRTY    (1<<__LC_DIRTY)
+#define LC_LOCKED   (1<<__LC_LOCKED)
 #define LC_STARVING (1<<__LC_STARVING)
 
 extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
@@ -273,7 +283,7 @@
 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
 extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
+extern void lc_committed(struct lru_cache *lc);
 
 struct seq_file;
 extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
@@ -282,32 +292,40 @@
 				void (*detail) (struct seq_file *, struct lc_element *));
 
 /**
- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
  * @lc: the lru cache to operate on
  *
- * Note that the reference counts and order on the active and lru lists may
- * still change.  Returns true if we aquired the lock.
+ * Allows (expects) the set to be "dirty".  Note that the reference counts and
+ * order on the active and lru lists may still change.  Used to serialize
+ * changing transactions.  Returns true if we aquired the lock.
  */
-static inline int lc_try_lock(struct lru_cache *lc)
+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
 {
-	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
+	return !test_and_set_bit(__LC_LOCKED, &lc->flags);
 }
 
 /**
+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
+ * @lc: the lru cache to operate on
+ *
+ * Note that the reference counts and order on the active and lru lists may
+ * still change.  Only works on a "clean" set.  Returns true if we aquired the
+ * lock, which means there are no pending changes, and any further attempt to
+ * change the set will not succeed until the next lc_unlock().
+ */
+extern int lc_try_lock(struct lru_cache *lc);
+
+/**
  * lc_unlock - unlock @lc, allow lc_get() to change the set again
  * @lc: the lru cache to operate on
  */
 static inline void lc_unlock(struct lru_cache *lc)
 {
 	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	clear_bit_unlock(__LC_LOCKED, &lc->flags);
 }
 
-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
-{
-	struct lc_element *e = lc_find(lc, enr);
-	return e && e->refcnt;
-}
+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
 
 #define lc_entry(ptr, type, member) \
 	container_of(ptr, type, member)
diff -Nru drbd8-8.3.7/drbd/linux/memcontrol.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/memcontrol.h
--- drbd8-8.3.7/drbd/linux/memcontrol.h	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/memcontrol.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,3 +0,0 @@
-/* just an empty file
- * memcontrol.h did not exist prior to 2.6.25.
- * but it needs more recent kernels for mm_inline.h to work. */
diff -Nru drbd8-8.3.7/drbd/linux/mutex.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/mutex.h
--- drbd8-8.3.7/drbd/linux/mutex.h	2009-07-27 08:47:42.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/mutex.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,37 +0,0 @@
-/* "Backport" of the mutex to older Linux-2.6.x kernels.
- */
-#ifndef __LINUX_MUTEX_H
-#define __LINUX_MUTEX_H
-
-#include <asm/semaphore.h>
-
-struct mutex {
-	struct semaphore sem;
-};
-
-static inline void mutex_init(struct mutex *m)
-{
-	sema_init(&m->sem, 1);
-}
-
-static inline void mutex_lock(struct mutex *m)
-{
-	down(&m->sem);
-}
-
-static inline int mutex_lock_interruptible(struct mutex *m)
-{
-	return down_interruptible(&m->sem);
-}
-
-static inline void mutex_unlock(struct mutex *m)
-{
-	up(&m->sem);
-}
-
-static inline int mutex_is_locked(struct mutex *lock)
-{
-        return atomic_read(&lock->sem.count) != 1;
-}
-
-#endif
diff -Nru drbd8-8.3.7/drbd/linux/tracepoint.h drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/tracepoint.h
--- drbd8-8.3.7/drbd/linux/tracepoint.h	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/linux/tracepoint.h	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-struct tracepoint;
diff -Nru drbd8-8.3.7/drbd/lru_cache.c drbd8-8.4.1+git55a81dc~cmd1/drbd/lru_cache.c
--- drbd8-8.3.7/drbd/lru_cache.c	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd/lru_cache.c	2012-02-02 14:09:14.000000000 +0000
@@ -39,8 +39,8 @@
 } while (0)
 
 #define RETURN(x...)     do { \
-	clear_bit(__LC_PARANOIA, &lc->flags); \
-	smp_mb__after_clear_bit(); return x ; } while (0)
+	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
+	return x ; } while (0)
 
 /* BUG() if e is not one of the elements tracked by lc */
 #define PARANOIA_LC_ELEMENT(lc, e) do {	\
@@ -50,9 +50,40 @@
 	BUG_ON(i >= lc_->nr_elements);	\
 	BUG_ON(lc_->lc_element[i] != e_); } while (0)
 
+
+/* We need to atomically
+ *  - try to grab the lock (set LC_LOCKED)
+ *  - only if there is no pending transaction
+ *    (neither LC_DIRTY nor LC_STARVING is set)
+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
+ * it is not sufficient to just say
+ *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
+ */
+int lc_try_lock(struct lru_cache *lc)
+{
+	unsigned long val;
+	do {
+		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
+	} while (unlikely (val == LC_PARANOIA));
+	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
+	return 0 == val;
+#if 0
+	/* Alternative approach, spin in case someone enters or leaves a
+	 * PARANOIA_ENTRY()/RETURN() section. */
+	unsigned long old, new, val;
+	do {
+		old = lc->flags & LC_PARANOIA;
+		new = old | LC_LOCKED;
+		val = cmpxchg(&lc->flags, old, new);
+	} while (unlikely (val == (old ^ LC_PARANOIA)));
+	return old == val;
+#endif
+}
+
 /**
  * lc_create - prepares to track objects in an active set
  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
  * @e_count: number of elements allowed to be active simultaneously
  * @e_size: size of the tracked objects
  * @e_off: offset to the &struct lc_element member in a tracked object
@@ -61,6 +92,7 @@
  * or NULL on (allocation) failure.
  */
 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+		unsigned max_pending_changes,
 		unsigned e_count, size_t e_size, size_t e_off)
 {
 	struct hlist_head *slot = NULL;
@@ -79,7 +111,7 @@
 	if (e_count > LC_MAX_ACTIVE)
 		return NULL;
 
-	slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+	slot = kcalloc(e_count, sizeof(struct hlist_head), GFP_KERNEL);
 	if (!slot)
 		goto out_fail;
 	element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
@@ -93,12 +125,13 @@
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 
 	lc->name = name;
 	lc->element_size = e_size;
 	lc->element_off = e_off;
 	lc->nr_elements = e_count;
-	lc->new_number = LC_FREE;
+	lc->max_pending_changes = max_pending_changes;
 	lc->lc_cache = cache;
 	lc->lc_element = element;
 	lc->lc_slot = slot;
@@ -112,6 +145,7 @@
 		e = p + e_off;
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 		element[i] = e;
 	}
@@ -170,15 +204,15 @@
 	INIT_LIST_HEAD(&lc->in_use);
 	INIT_LIST_HEAD(&lc->lru);
 	INIT_LIST_HEAD(&lc->free);
+	INIT_LIST_HEAD(&lc->to_be_changed);
 	lc->used = 0;
 	lc->hits = 0;
 	lc->misses = 0;
 	lc->starving = 0;
-	lc->dirty = 0;
+	lc->locked = 0;
 	lc->changed = 0;
+	lc->pending_changes = 0;
 	lc->flags = 0;
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
 
 	for (i = 0; i < lc->nr_elements; i++) {
@@ -189,6 +223,7 @@
 		/* re-init it */
 		e->lc_index = i;
 		e->lc_number = LC_FREE;
+		e->lc_new_number = LC_FREE;
 		list_add(&e->list, &lc->free);
 	}
 }
@@ -203,14 +238,14 @@
 	/* NOTE:
 	 * total calls to lc_get are
 	 * (starving + hits + misses)
-	 * misses include "dirty" count (update from an other thread in
+	 * misses include "locked" count (update from an other thread in
 	 * progress) and "changed", when this in fact lead to an successful
 	 * update of the cache.
 	 */
 	return seq_printf(seq, "\t%s: used:%u/%u "
-		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
 		lc->name, lc->used, lc->nr_elements,
-		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
 }
 
 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
@@ -219,16 +254,8 @@
 }
 
 
-/**
- * lc_find - find element by label, if present in the hash table
- * @lc: The lru_cache object
- * @enr: element number
- *
- * Returns the pointer to an element, if the element with the requested
- * "label" or element number is present in the hash table,
- * or NULL if not found. Does not change the refcnt.
- */
-struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
+		bool include_changing)
 {
 	struct hlist_node *n;
 	struct lc_element *e;
@@ -236,29 +263,48 @@
 	BUG_ON(!lc);
 	BUG_ON(!lc->nr_elements);
 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
-		if (e->lc_number == enr)
+		/* "about to be changed" elements, pending transaction commit,
+		 * are hashed by their "new number". "Normal" elements have
+		 * lc_number == lc_new_number. */
+		if (e->lc_new_number != enr)
+			continue;
+		if (e->lc_new_number == e->lc_number || include_changing)
 			return e;
+		break;
 	}
 	return NULL;
 }
 
-/* returned element will be "recycled" immediately */
-static struct lc_element *lc_evict(struct lru_cache *lc)
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ * Ignores elements that are "about to be used", i.e. not yet in the active
+ * set, but still pending transaction commit.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 {
-	struct list_head  *n;
-	struct lc_element *e;
-
-	if (list_empty(&lc->lru))
-		return NULL;
-
-	n = lc->lru.prev;
-	e = list_entry(n, struct lc_element, list);
-
-	PARANOIA_LC_ELEMENT(lc, e);
+	return __lc_find(lc, enr, 0);
+}
 
-	list_del(&e->list);
-	hlist_del(&e->colision);
-	return e;
+/**
+ * lc_is_used - find element by label
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns true, if the element with the requested "label" or element number is
+ * present in the hash table, and is used (refcnt > 0).
+ * Also finds elements that are not _currently_ used but only "about to be
+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
+ */
+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
+{
+	struct lc_element *e = __lc_find(lc, enr, 1);
+	return e && e->refcnt;
 }
 
 /**
@@ -275,22 +321,34 @@
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt);
 
-	e->lc_number = LC_FREE;
+	e->lc_number = e->lc_new_number = LC_FREE;
 	hlist_del_init(&e->colision);
 	list_move(&e->list, &lc->free);
 	RETURN();
 }
 
-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
 {
 	struct list_head *n;
+	struct lc_element *e;
 
-	if (list_empty(&lc->free))
-		return lc_evict(lc);
+	if (!list_empty(&lc->free))
+		n = lc->free.next;
+	else if (!list_empty(&lc->lru))
+		n = lc->lru.prev;
+	else
+		return NULL;
+
+	e = list_entry(n, struct lc_element, list);
+	PARANOIA_LC_ELEMENT(lc, e);
+
+	e->lc_new_number = new_number;
+	if (!hlist_unhashed(&e->colision))
+		__hlist_del(&e->colision);
+	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
+	list_move(&e->list, &lc->to_be_changed);
 
-	n = lc->free.next;
-	list_del(n);
-	return list_entry(n, struct lc_element, list);
+	return e;
 }
 
 static int lc_unused_element_available(struct lru_cache *lc)
@@ -303,45 +361,7 @@
 	return 0;
 }
 
-
-/**
- * lc_get - get element by label, maybe change the active set
- * @lc: the lru cache to operate on
- * @enr: the label to look up
- *
- * Finds an element in the cache, increases its usage count,
- * "touches" and returns it.
- *
- * In case the requested number is not present, it needs to be added to the
- * cache. Therefore it is possible that an other element becomes evicted from
- * the cache. In either case, the user is notified so he is able to e.g. keep
- * a persistent log of the cache changes, and therefore the objects in use.
- *
- * Return values:
- *  NULL
- *     The cache was marked %LC_STARVING,
- *     or the requested label was not in the active set
- *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
- *     Or no unused or free element could be recycled (@lc will be marked as
- *     %LC_STARVING, blocking further lc_get() operations).
- *
- *  pointer to the element with the REQUESTED element number.
- *     In this case, it can be used right away
- *
- *  pointer to an UNUSED element with some different element number,
- *          where that different number may also be %LC_FREE.
- *
- *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
- *          and the returned element pointer is removed from the lru list and
- *          hash collision chains.  The user now should do whatever housekeeping
- *          is necessary.
- *          Then he must call lc_changed(lc,element_pointer), to finish
- *          the change.
- *
- * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
- *       any cache set change.
- */
-struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
+static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
 {
 	struct lc_element *e;
 
@@ -351,8 +371,12 @@
 		RETURN(NULL);
 	}
 
-	e = lc_find(lc, enr);
-	if (e) {
+	e = __lc_find(lc, enr, 1);
+	/* if lc_new_number != lc_number,
+	 * this enr is currently being pulled in already,
+	 * and will be available once the pending transaction
+	 * has been committed. */
+	if (e && e->lc_new_number == e->lc_number) {
 		++lc->hits;
 		if (e->refcnt++ == 0)
 			lc->used++;
@@ -361,6 +385,26 @@
 	}
 
 	++lc->misses;
+	if (!may_change)
+		RETURN(NULL);
+
+	/* It has been found above, but on the "to_be_changed" list, not yet
+	 * committed.  Don't pull it in twice, wait for the transaction, then
+	 * try again */
+	if (e)
+		RETURN(NULL);
+
+	/* To avoid races with lc_try_lock(), first, mark us dirty
+	 * (using test_and_set_bit, as it implies memory barriers), ... */
+	test_and_set_bit(__LC_DIRTY, &lc->flags);
+
+	/* ... only then check if it is locked anyways. If lc_unlock clears
+	 * the dirty bit again, that's not a problem, we will come here again.
+	 */
+	if (test_bit(__LC_LOCKED, &lc->flags)) {
+		++lc->locked;
+		RETURN(NULL);
+	}
 
 	/* In case there is nothing available and we can not kick out
 	 * the LRU element, we have to wait ...
@@ -370,71 +414,109 @@
 		RETURN(NULL);
 	}
 
-	/* it was not present in the active set.
-	 * we are going to recycle an unused (or even "free") element.
-	 * user may need to commit a transaction to record that change.
-	 * we serialize on flags & TF_DIRTY */
-	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
-		++lc->dirty;
+	/* It was not present in the active set.  We are going to recycle an
+	 * unused (or even "free") element, but we won't accumulate more than
+	 * max_pending_changes changes.  */
+	if (lc->pending_changes >= lc->max_pending_changes)
 		RETURN(NULL);
-	}
 
-	e = lc_get_unused_element(lc);
+	e = lc_prepare_for_change(lc, enr);
 	BUG_ON(!e);
 
 	clear_bit(__LC_STARVING, &lc->flags);
 	BUG_ON(++e->refcnt != 1);
 	lc->used++;
-
-	lc->changing_element = e;
-	lc->new_number = enr;
+	lc->pending_changes++;
 
 	RETURN(e);
 }
 
-/* similar to lc_get,
- * but only gets a new reference on an existing element.
- * you either get the requested element, or NULL.
- * will be consolidated into one function.
+/**
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * In case the requested number is not present, it needs to be added to the
+ * cache. Therefore it is possible that an other element becomes evicted from
+ * the cache. In either case, the user is notified so he is able to e.g. keep
+ * a persistent log of the cache changes, and therefore the objects in use.
+ *
+ * Return values:
+ *  NULL
+ *     The cache was marked %LC_STARVING,
+ *     or the requested label was not in the active set
+ *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ *     Or no unused or free element could be recycled (@lc will be marked as
+ *     %LC_STARVING, blocking further lc_get() operations).
+ *
+ *  pointer to the element with the REQUESTED element number.
+ *     In this case, it can be used right away
+ *
+ *  pointer to an UNUSED element with some different element number,
+ *          where that different number may also be %LC_FREE.
+ *
+ *          In this case, the cache is marked %LC_DIRTY,
+ *          so lc_try_lock() will no longer succeed.
+ *          The returned element pointer is moved to the "to_be_changed" list,
+ *          and registered with the new element number on the hash collision chains,
+ *          so it is possible to pick it up from lc_is_used().
+ *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
+ *          The user now should do whatever housekeeping is necessary,
+ *          typically serialize on lc_try_lock_for_transaction(), then call
+ *          lc_committed(lc) and lc_unlock(), to finish the change.
+ *
+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
+ *       any cache set change.
  */
-struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 {
-	struct lc_element *e;
-
-	PARANOIA_ENTRY();
-	if (lc->flags & LC_STARVING) {
-		++lc->starving;
-		RETURN(NULL);
-	}
+	return __lc_get(lc, enr, 1);
+}
 
-	e = lc_find(lc, enr);
-	if (e) {
-		++lc->hits;
-		if (e->refcnt++ == 0)
-			lc->used++;
-		list_move(&e->list, &lc->in_use); /* Not evictable... */
-	}
-	RETURN(e);
+/**
+ * lc_try_get - get element by label, if present; do not change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * Return values:
+ *  NULL
+ *     The cache was marked %LC_STARVING,
+ *     or the requested label was not in the active set
+ *
+ *  pointer to the element with the REQUESTED element number.
+ *     In this case, it can be used right away
+ */
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+{
+	return __lc_get(lc, enr, 0);
 }
 
 /**
- * lc_changed - tell @lc that the change has been recorded
+ * lc_committed - tell @lc that pending changes have been recorded
  * @lc: the lru cache to operate on
- * @e: the element pending label change
+ *
+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
+ * before the transaction is started, and later needs to lc_unlock() explicitly
+ * as well.
  */
-void lc_changed(struct lru_cache *lc, struct lc_element *e)
+void lc_committed(struct lru_cache *lc)
 {
+	struct lc_element *e, *tmp;
+
 	PARANOIA_ENTRY();
-	BUG_ON(e != lc->changing_element);
-	PARANOIA_LC_ELEMENT(lc, e);
-	++lc->changed;
-	e->lc_number = lc->new_number;
-	list_add(&e->list, &lc->in_use);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
-	lc->changing_element = NULL;
-	lc->new_number = LC_FREE;
-	clear_bit(__LC_DIRTY, &lc->flags);
-	smp_mb__after_clear_bit();
+	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
+		/* count number of changes, not number of transactions */
+		++lc->changed;
+		e->lc_number = e->lc_new_number;
+		list_move(&e->list, &lc->in_use);
+	}
+	lc->pending_changes = 0;
 	RETURN();
 }
 
@@ -453,13 +535,12 @@
 	PARANOIA_ENTRY();
 	PARANOIA_LC_ELEMENT(lc, e);
 	BUG_ON(e->refcnt == 0);
-	BUG_ON(e == lc->changing_element);
+	BUG_ON(e->lc_number != e->lc_new_number);
 	if (--e->refcnt == 0) {
 		/* move it to the front of LRU. */
 		list_move(&e->list, &lc->lru);
 		lc->used--;
-		clear_bit(__LC_STARVING, &lc->flags);
-		smp_mb__after_clear_bit();
+		clear_bit_unlock(__LC_STARVING, &lc->flags);
 	}
 	RETURN(e->refcnt);
 }
@@ -499,16 +580,24 @@
 void lc_set(struct lru_cache *lc, unsigned int enr, int index)
 {
 	struct lc_element *e;
+	struct list_head *lh;
 
 	if (index < 0 || index >= lc->nr_elements)
 		return;
 
 	e = lc_element_by_index(lc, index);
-	e->lc_number = enr;
+	BUG_ON(e->lc_number != e->lc_new_number);
+	BUG_ON(e->refcnt != 0);
 
+	e->lc_number = e->lc_new_number = enr;
 	hlist_del_init(&e->colision);
-	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
-	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+	if (enr == LC_FREE)
+		lh = &lc->free;
+	else {
+		hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+		lh = &lc->lru;
+	}
+	list_move(&e->list, lh);
 }
 
 /**
@@ -538,18 +627,3 @@
 		}
 	}
 }
-
-EXPORT_SYMBOL(lc_create);
-EXPORT_SYMBOL(lc_reset);
-EXPORT_SYMBOL(lc_destroy);
-EXPORT_SYMBOL(lc_set);
-EXPORT_SYMBOL(lc_del);
-EXPORT_SYMBOL(lc_try_get);
-EXPORT_SYMBOL(lc_find);
-EXPORT_SYMBOL(lc_get);
-EXPORT_SYMBOL(lc_put);
-EXPORT_SYMBOL(lc_changed);
-EXPORT_SYMBOL(lc_element_by_index);
-EXPORT_SYMBOL(lc_index_of);
-EXPORT_SYMBOL(lc_seq_printf_stats);
-EXPORT_SYMBOL(lc_seq_dump_details);
diff -Nru drbd8-8.3.7/drbd-kernel.spec.in drbd8-8.4.1+git55a81dc~cmd1/drbd-kernel.spec.in
--- drbd8-8.3.7/drbd-kernel.spec.in	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd-kernel.spec.in	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,110 @@
+Name: drbd-kernel
+Summary: Kernel driver for DRBD
+Version: @PACKAGE_VERSION@
+Release: 1%{?dist}
+Source: http://oss.linbit.com/%{name}/8.3/drbd-%{version}.tar.gz
+License: GPLv2+
+Group: System Environment/Kernel
+URL: http://www.drbd.org/
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+%if ! %{defined suse_version}
+BuildRequires: redhat-rpm-config
+%endif
+%if %{defined kernel_module_package_buildreqs}
+BuildRequires: %kernel_module_package_buildreqs
+%endif
+
+%description
+This module is the kernel-dependent driver for DRBD.  This is split out so
+that multiple kernel driver versions can be installed, one for each
+installed kernel.
+
+%prep
+%setup -q -n drbd-%{version}
+
+%if %{defined suse_kernel_module_package}
+# Support also sles10, where kernel_module_package was not yet defined.
+# In sles11, suse_k_m_p became a wrapper around k_m_p.
+
+%if 0%{?suse_version} < 1110
+# We need to exclude some flavours on sles10 etc,
+# or we hit an rpm internal buffer limit.
+%suse_kernel_module_package -n drbd -p preamble -f filelist-suse kdump kdumppae vmi vmipae um
+%else
+%suse_kernel_module_package -n drbd -p preamble -f filelist-suse
+%endif
+%else
+# Concept stolen from sles kernel-module-subpackage:
+# include the kernel version in the package version,
+# so we can have more than one kmod-drbd.
+# Needed, because even though kABI is still "compatible" in RHEL 6.0 to 6.1,
+# the actual functionality differs very much: 6.1 does no longer do BARRIERS,
+# but wants FLUSH/FUA instead.
+# For convenience, we want both 6.0 and 6.1 in the same repository,
+# and have yum/rpm figure out via dependencies, which kmod version should be installed.
+# This is a dirty hack, non generic, and should probably be enclosed in some "if-on-rhel6".
+%define _this_kmp_version %{version}_%(echo %kernel_version | sed -r 'y/-/_/; s/\.el.\.(x86_64|i.86)$//;')
+%kernel_module_package -v %_this_kmp_version -n drbd -p preamble -f filelist-redhat
+%endif
+
+%build
+rm -rf obj
+mkdir obj
+ln -s ../scripts obj/
+
+for flavor in %flavors_to_build; do
+    cp -r drbd obj/$flavor
+    #make -C %{kernel_source $flavor} M=$PWD/obj/$flavor
+    make -C obj/$flavor %{_smp_mflags} all KDIR=%{kernel_source $flavor}
+done
+
+%install
+export INSTALL_MOD_PATH=$RPM_BUILD_ROOT
+
+%if %{defined kernel_module_package_moddir}
+export INSTALL_MOD_DIR=%{kernel_module_package_moddir drbd}
+%else
+%if %{defined suse_kernel_module_package}
+export INSTALL_MOD_DIR=updates
+%else
+export INSTALL_MOD_DIR=extra/drbd
+%endif
+%endif
+
+# Very likely kernel_module_package_moddir did ignore the parameter,
+# so we just append it here. The weak-modules magic expects that location.
+[ $INSTALL_MOD_DIR = extra ] && INSTALL_MOD_DIR=extra/drbd
+
+for flavor in %flavors_to_build ; do
+    make -C %{kernel_source $flavor} modules_install \
+	M=$PWD/obj/$flavor
+    kernelrelease=$(make -s -C %{kernel_source $flavor} kernelrelease)
+    mv obj/$flavor/.kernel.config.gz obj/k-config-$kernelrelease.gz
+done
+
+%if %{defined suse_kernel_module_package}
+# On SUSE, putting the modules into the default path determined by
+# %kernel_module_package_moddir is enough to give them priority over
+# shipped modules.
+rm -f drbd.conf
+%else
+mkdir -p $RPM_BUILD_ROOT/etc/depmod.d
+echo "override drbd * weak-updates" \
+    > $RPM_BUILD_ROOT/etc/depmod.d/drbd.conf
+%endif
+
+%clean
+rm -rf %{buildroot}
+
+%changelog
+* Tue Dec 20 2011 Philipp Reisner <phil@linbit.com> - 8.4.1-1
+- New upstream release.
+
+* Mon Jul 18 2011 Philipp Reisner <phil@linbit.com> - 8.4.0-1
+- New upstream release.
+
+* Fri Jan 28 2011 Philipp Reisner <phil@linbit.com> - 8.3.10-1
+- New upstream release.
+
+* Wed Nov 25 2010 Andreas Gruenbacher <agruen@linbit.com> - 8.3.9-1
+- Convert to a Kernel Module Package.
diff -Nru drbd8-8.3.7/drbd-km.spec.in drbd8-8.4.1+git55a81dc~cmd1/drbd-km.spec.in
--- drbd8-8.3.7/drbd-km.spec.in	2010-01-07 09:09:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd-km.spec.in	2012-02-02 14:09:14.000000000 +0000
@@ -10,7 +10,7 @@
 Name: @PACKAGE_TARNAME@-km
 Summary: DRBD driver for Linux
 Version: @PACKAGE_VERSION@
-Release: 12@RPM_DIST_TAG@
+Release: 1@RPM_DIST_TAG@
 Source: http://oss.linbit.com/%{name}/8.3/drbd-%{version}.tar.gz
 License: GPLv2+
 ExclusiveOS: linux
@@ -100,6 +100,30 @@
 
 
 %changelog
+* Tue Dec 20 2011 Philipp Reisner <phil@linbit.com> - 8.4.1-1
+- New upstream release.
+
+* Mon Jul 18 2011 Philipp Reisner <phil@linbit.com> - 8.4.0-1
+- New upstream release.
+
+* Fri Jan 28 2011 Philipp Reisner <phil@linbit.com> - 8.3.10-1
+- New upstream release.
+
+* Fri Oct 22 2010 Philipp Reisner <phil@linbit.com> - 8.3.9-1
+- New upstream release.
+
+* Wed Jun  2 2010 Philipp Reisner <phil@linbit.com> - 8.3.8-1
+- New upstream release.
+
+* Thu Jan 13 2010 Philipp Reisner <phil@linbit.com> - 8.3.7-1
+- New upstream release.
+
+* Thu Nov  8 2009 Philipp Reisner <phil@linbit.com> - 8.3.6-1
+- New upstream release.
+
+* Thu Oct 27 2009 Philipp Reisner <phil@linbit.com> - 8.3.5-1
+- New upstream release.
+
 * Wed Oct 21 2009 Florian Haas <florian@linbit.com> - 8.3.4-12
 - Packaging makeover.
 
diff -Nru drbd8-8.3.7/drbd.spec.in drbd8-8.4.1+git55a81dc~cmd1/drbd.spec.in
--- drbd8-8.3.7/drbd.spec.in	2010-01-13 16:16:02.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd.spec.in	2012-02-02 14:09:14.000000000 +0000
@@ -21,6 +21,7 @@
 %bcond_without bashcompletion
 # --with xen is ignored on any non-x86 architecture
 %bcond_without xen
+%bcond_without legacy_utils
 %ifnarch %{ix86} x86_64
 %global _without_xen --without-xen
 %endif
@@ -49,9 +50,12 @@
 %if %{with pacemaker}
 Requires: %{name}-pacemaker = %{version}
 %endif
-%if %{with rgmanager}
-Requires: %{name}-rgmanager = %{version}
-%endif
+## %if %{with rgmanager}
+## ## No.
+## ## We don't want to annoy the majority of our userbase on pacemaker
+## ## by pulling in the full rgmanager stack via drbd-rgmanager as well.
+## Requires: %{name}-rgmanager = %{version}
+## %endif
 %if %{with heartbeat}
 Requires: %{name}-heartbeat = %{version}
 %endif
@@ -77,6 +81,12 @@
 %package utils
 Summary: Management utilities for DRBD
 Group: System Environment/Kernel
+# We used to have one monolithic userland package.
+# Since all other packages require drbd-utils,
+# it should be sufficient to add the conflict here.
+Conflicts: drbd < 8.3.6
+# These exist in centos extras:
+Conflicts: drbd82 drbd83
 @RPM_REQ_CHKCONFIG_POST@
 @RPM_REQ_CHKCONFIG_PREUN@
 
@@ -92,6 +102,11 @@
 /sbin/drbdsetup
 /sbin/drbdadm
 /sbin/drbdmeta
+%if %{with legacy_utils}
+%dir /lib/drbd/
+/lib/drbd/drbdsetup-83
+/lib/drbd/drbdadm-83
+%endif
 %{_initddir}/%{name}
 %{_sbindir}/drbd-overview
 %dir %{_prefix}/lib/%{name}
@@ -179,12 +194,32 @@
 %{_prefix}/lib/ocf/resource.d/linbit/drbd
 %endif # with pacemaker
 
+# Dependencies for drbd-rgmanager are particularly awful. On RHEL 5
+# and prior (and corresponding Fedora releases), %{_datadir}/cluster
+# was owned by rgmanager version 2, so we have to depend on that.
+#
+# With Red Hat Cluster 3.0.1 (around Fedora 12), the DRBD resource
+# agent was merged in, and it became part of the resource-agents 3
+# package (which of course is different from resource-agents on all
+# other platforms -- go figure). So for resource-agents >= 3, we must
+# generally conflict.
+#
+# Then for RHEL 6, Red Hat in all their glory decided to keep the
+# packaging scheme, but kicked DRBD out of the resource-agents
+# package. Thus, for RHEL 6 specifically, we must not conflict with
+# resource-agents >=3, but instead require it.
+#
+# The saga continues:
+# In RHEL 6.1 they have listed the drbd resource agent as valid agent,
+# but do not include it in their resource-agents package. -> So we
+# drop any dependency regarding rgmanager's version.
+#
+# All of this for exactly two (2) files.
 %if %{with rgmanager}
 %package rgmanager
 Summary: Red Hat Cluster Suite agent for DRBD
 Group: System Environment/Base
-Requires: %{name}-utils = %{version}-%{release}, rgmanager < 3
-Conflicts: resource-agents >= 3
+Requires: %{name}-utils = %{version}-%{release}
 @RPM_SUBPACKAGE_NOARCH@
 
 %description rgmanager
@@ -255,6 +290,7 @@
     %{?_without_heartbeat} \
     %{?_with_rgmanager} \
     %{?_without_bashcompletion} \
+    %{?_without_legacy_utils} \
     --with-initdir=%{_initddir}
 make %{?_smp_mflags}
 
@@ -281,6 +317,21 @@
 
 
 %changelog
+* Tue Dec 20 2011 Philipp Reisner <phil@linbit.com> - 8.4.1-1
+- New upstream release.
+
+* Wed Jul 15 2011 Philipp Reisner <phil@linbit.com> - 8.4.0-1
+- New upstream release.
+
+* Fri Jan 28 2011 Philipp Reisner <phil@linbit.com> - 8.3.10-1
+- New upstream release.
+
+* Fri Oct 22 2010 Philipp Reisner <phil@linbit.com> - 8.3.9-1
+- New upstream release.
+
+* Wed Jun  2 2010 Philipp Reisner <phil@linbit.com> - 8.3.8-1
+- New upstream release.
+
 * Thu Jan 13 2010 Philipp Reisner <phil@linbit.com> - 8.3.7-1
 - New upstream release.
 
diff -Nru drbd8-8.3.7/drbd_config.h drbd8-8.4.1+git55a81dc~cmd1/drbd_config.h
--- drbd8-8.3.7/drbd_config.h	2010-01-13 16:14:27.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/drbd_config.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,104 +0,0 @@
-/*
-  drbd_config.h
-  DRBD's compile time configuration.
-
-  drbd is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2, or (at your option)
-  any later version.
-
-  drbd is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with drbd; see the file COPYING.  If not, write to
-  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-*/
-
-#ifndef DRBD_CONFIG_H
-#define DRBD_CONFIG_H
-
-extern const char *drbd_buildtag(void);
-
-#define REL_VERSION "8.3.7"
-#define API_VERSION 88
-#define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 91
-
-#ifndef __CHECKER__   /* for a sparse run, we need all STATICs */
-#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */
-#endif
-
-/* drbd_assert_breakpoint() function
-#define DBG_ASSERTS
- */
-
-/* Dump all cstate changes */
-#define DUMP_MD 2
-
-/* some extra checks
-#define PARANOIA
- */
-
-/* Enable fault insertion code */
-#define DRBD_ENABLE_FAULTS
-
-/* RedHat's 2.6.9 kernels have the gfp_t type. Mainline has this feature
- * since 2.6.16. If you build for RedHat enable the line below. */
-#define KERNEL_HAS_GFP_T
-
-/* kernel.org has atomic_add_return since 2.6.10. some vendor kernels
- * have it backported, though. Others don't. */
-//#define NEED_BACKPORT_OF_ATOMIC_ADD
-
-/* 2.6.something has deprecated kmem_cache_t
- * some older still use it.
- * some have it defined as struct kmem_cache_s, some as struct kmem_cache */
-//#define USE_KMEM_CACHE_S
-
-/* 2.6.something has sock_create_kern (SE-linux security context stuff)
- * some older distribution kernels don't. */
-//#define DEFINE_SOCK_CREATE_KERN
-
-/* 2.6.24 and later have kernel_sock_shutdown.
- * some older distribution kernels may also have a backport. */
-//#define DEFINE_KERNEL_SOCK_SHUTDOWN
-
-/* in older kernels (vanilla < 2.6.16) struct netlink_skb_parms has a
- * member called dst_groups. Later it is called dst_group (without 's'). */
-//#define DRBD_NL_DST_GROUPS
-
-/* in older kernels (vanilla < 2.6.14) is no kzalloc() */
-//#define NEED_BACKPORT_OF_KZALLOC
-
-// some vendor kernels have it, some don't
-//#define NEED_SG_SET_BUF
-#define HAVE_LINUX_SCATTERLIST_H
-
-/* 2.6.29 and up no longer have swabb.h */
-//#define HAVE_LINUX_BYTEORDER_SWABB_H
-
-/* some vendor kernel have it backported. */
-#define HAVE_SET_CPUS_ALLOWED_PTR
-
-/* Some vendor kernels < 2.6.7 might define msleep in one or
- * another way .. */
-
-#define KERNEL_HAS_MSLEEP
-
-/* Some other kernels < 2.6.8 do not have struct kvec,
- * others do.. */
-
-#define KERNEL_HAS_KVEC
-
-/* Actually availabe since 2.6.25, but venders have backported...
- */
-#define KERNEL_HAS_PROC_CREATE
-
-/* In 2.6.32 we finally fixed connector to pass netlink_skb_parms to the callback
- */
-#define KERNEL_HAS_CN_SKB_PARMS
-
-#endif
diff -Nru drbd8-8.3.7/filelist-redhat drbd8-8.4.1+git55a81dc~cmd1/filelist-redhat
--- drbd8-8.3.7/filelist-redhat	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/filelist-redhat	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+%defattr(644,root,root,755)
+%doc COPYING
+%doc ChangeLog
+%if 0%(grep -q "release 5" /etc/redhat-release && echo 1)
+/lib/modules/%verrel%variant
+%doc obj/k-config-%verrel%variant.gz
+%else
+/lib/modules/%verrel%dotvariant
+%doc obj/k-config-%verrel%dotvariant.gz
+%endif
+%config /etc/depmod.d/drbd.conf
diff -Nru drbd8-8.3.7/filelist-suse drbd8-8.4.1+git55a81dc~cmd1/filelist-suse
--- drbd8-8.3.7/filelist-suse	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/filelist-suse	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+%defattr(-,root,root)
+%doc COPYING
+%doc ChangeLog
+%if %{defined 3}
+# on sles10, _suse_kernel_module_subpackage takes 3 arguments still
+/lib/modules/%3-%1
+%doc obj/k-config-%3-%1.gz
+%else
+/lib/modules/%2-%1
+%doc obj/k-config-%2-%1.gz
+%endif
diff -Nru drbd8-8.3.7/preamble drbd8-8.4.1+git55a81dc~cmd1/preamble
--- drbd8-8.3.7/preamble	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/preamble	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,22 @@
+# always require a suitable userland
+Requires: drbd-utils = %{version}
+
+%if %{defined suse_kernel_module_package}
+%if 0%{?sles_version} == 10
+%{expand:%(cat %_sourcedir/drbd/preamble-sles10)}
+
+%else
+%if 0%{?sles_version} == 11
+%{expand:%(cat %_sourcedir/drbd/preamble-sles11)}
+
+%endif
+%endif
+%else
+%if 0%((test -e /etc/redhat-release && grep -q "release 5" /etc/redhat-release) && echo 1)
+%{expand:%(cat %_sourcedir/drbd/preamble-rhel5)}
+
+# CentOS:
+Conflicts: kmod-drbd82 kmod-drbd83
+
+%endif
+%endif
diff -Nru drbd8-8.3.7/preamble-rhel5 drbd8-8.4.1+git55a81dc~cmd1/preamble-rhel5
--- drbd8-8.3.7/preamble-rhel5	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/preamble-rhel5	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,100 @@
+Provides: drbd-km-2.6.18_238.1.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_238.1.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_238.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_238.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.32.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.32.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.26.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.26.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.17.4.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.17.4.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.17.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.17.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.11.4.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.11.4.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.11.3.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.11.3.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.11.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.11.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.8.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.8.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.3.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.3.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_194.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_194.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.15.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.15.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.11.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.11.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.10.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.10.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.9.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.9.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.6.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.6.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.2.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.2.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_164.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_164.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.7.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.7.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.4.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.4.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.2.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.2.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.1.16.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.1.16.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.1.14.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.1.14.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.1.10.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.1.10.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.1.6.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.1.6.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.1.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.1.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_128.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_128.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.22.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.22.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.18.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.18.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.13.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.13.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.10.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.10.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.6.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.6.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.1.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.1.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_92.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_92.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.21.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.21.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.19.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.19.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.14.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.14.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.13.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.13.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.6.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.6.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.1.4.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.1.4.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_53.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_53.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.15.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.15.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.14.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.14.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.8.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.8.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.6.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.6.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.4.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.4.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.3.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.3.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.1.1.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.1.1.el5%variant < 8.3.10
+Provides: drbd-km-2.6.18_8.el5%variant = 8.3.10
+Obsoletes: drbd-km-2.6.18_8.el5%variant < 8.3.10
diff -Nru drbd8-8.3.7/preamble-sles10 drbd8-8.4.1+git55a81dc~cmd1/preamble-sles10
--- drbd8-8.3.7/preamble-sles10	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/preamble-sles10	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,52 @@
+Provides: drbd-km-2.6.16.60_0.60.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.60.1_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.59.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.59.1_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.58.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.58.1_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.54.5_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.54.5_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.42.7_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.42.7_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.42.5_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.42.5_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.42.4_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.42.4_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.39.3_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.39.3_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.37_f594963d_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.37_f594963d_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.34_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.34_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.33_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.33_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.31_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.31_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.30_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.30_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.29_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.29_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.27_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.27_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.25_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.25_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.23_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.23_%1 < 8.3.10
+Provides: drbd-km-2.6.16.60_0.21_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.60_0.21_%1 < 8.3.10
+Provides: drbd-km-2.6.16.54_0.2.5_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.54_0.2.5_%1 < 8.3.10
+Provides: drbd-km-2.6.16.54_0.2.3_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.54_0.2.3_%1 < 8.3.10
+Provides: drbd-km-2.6.16.53_0.16_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.53_0.16_%1 < 8.3.10
+Provides: drbd-km-2.6.16_53_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16_53_%1 < 8.3.10
+Provides: drbd-km-2.6.16.46_0.14_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.46_0.14_%1 < 8.3.10
+Provides: drbd-km-2.6.16.46_0.12_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.46_0.12_%1 < 8.3.10
+Provides: drbd-km-2.6.16.21_0.15_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.21_0.15_%1 < 8.3.10
+Provides: drbd-km-2.6.16.21_0.8_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.16.21_0.8_%1 < 8.3.10
diff -Nru drbd8-8.3.7/preamble-sles11 drbd8-8.4.1+git55a81dc~cmd1/preamble-sles11
--- drbd8-8.3.7/preamble-sles11	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/preamble-sles11	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,37 @@
+# SLES 11 SP1
+Provides: drbd-km-2.6.32.27_0.2_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.27_0.2_%1 < 8.3.10
+Provides: drbd-km-2.6.32.24_0.2_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.24_0.2_%1 < 8.3.10
+Provides: drbd-km-2.6.32.23_0.3_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.23_0.3_%1 < 8.3.10
+Provides: drbd-km-2.6.32.19_0.3_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.19_0.3_%1 < 8.3.10
+Provides: drbd-km-2.6.32.19_0.2_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.19_0.2_%1 < 8.3.10
+Provides: drbd-km-2.6.32.13_0.5_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.13_0.5_%1 < 8.3.10
+Provides: drbd-km-2.6.32.13_0.4_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.13_0.4_%1 < 8.3.10
+Provides: drbd-km-2.6.32.12_0.7_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.32.12_0.7_%1 < 8.3.10
+
+# SLES 11
+Provides: drbd-km-2.6.27.45_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.45_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.42_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.42_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.39_0.3_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.39_0.3_%1 < 8.3.10
+Provides: drbd-km-2.6.27.37_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.37_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.29_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.29_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.25_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.25_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.23_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.23_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.21_0.1_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.21_0.1_%1 < 8.3.10
+Provides: drbd-km-2.6.27.19_5_%1 = 8.3.10
+Obsoletes: drbd-km-2.6.27.19_5_%1 < 8.3.10
diff -Nru drbd8-8.3.7/rpm-macro-fixes/README drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/README
--- drbd8-8.3.7/rpm-macro-fixes/README	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/README	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,20 @@
+macros.kernel-source.sles11-sp1.diff:
+
+	Patch needed on SUSE products in order to allow building kernel module
+	packages for a specific kernel version.
+	See the patch for more detailed documentation.
+
+macros.kernel-source.sles11.diff:
+
+	Same thing for sles11 (no sp1)
+
+suse_macros.sles10.diff:
+
+	Similar thing for sles10
+
+kmodtool.rhel5.diff
+
+	Add filelist tag substitution capabilities to rhel5 kmodtool,
+	and drop the dependency on a ...-kmod-common package,
+	similar to what rhel6 does. 
+
diff -Nru drbd8-8.3.7/rpm-macro-fixes/kmodtool.rhel5.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/kmodtool.rhel5.diff
--- drbd8-8.3.7/rpm-macro-fixes/kmodtool.rhel5.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/kmodtool.rhel5.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,30 @@
+--- /usr/lib/rpm/redhat/kmodtool
++++ /usr/lib/rpm/redhat/kmodtool
+@@ -65,12 +65,19 @@
+ {
+     local variant="${1}"
+     local dashvariant="${variant:+-${variant}}"
++    local dotvariant="${variant:+.${variant}}"
++
+     case "$verrel" in
+         *.el*) kdep="kernel${dashvariant}-%{_target_cpu} = ${verrel}" ;;
+         *.EL*) kdep="kernel${dashvariant}-%{_target_cpu} = ${verrel}" ;;
+         *)     kdep="kernel-%{_target_cpu} = ${verrel}${variant}"     ;;
+     esac
+ 
++    echo "%global verrel $verrel"
++    echo "%global variant ${variant:-%nil}"
++    echo "%global dashvariant ${dashvariant:-%nil}"
++    echo "%global dotvariant ${dotvariant:-%nil}"
++
+     echo "%package       -n kmod-${kmod_name}${dashvariant}"
+ 
+     if [ -z "$kmp_provides_summary" ]; then
+@@ -100,7 +107,6 @@
+     fi
+     
+     cat <<EOF
+-Requires:         ${kmod_name}-kmod-common >= %{?epoch:%{epoch}:}%{version}
+ Requires(post):   /sbin/depmod
+ Requires(postun): /sbin/depmod
+ EOF
diff -Nru drbd8-8.3.7/rpm-macro-fixes/macros.kernel-source.sles11-sp1.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.kernel-source.sles11-sp1.diff
--- drbd8-8.3.7/rpm-macro-fixes/macros.kernel-source.sles11-sp1.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.kernel-source.sles11-sp1.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,48 @@
+By default, the %kernel_module_package will build packages for all kernel
+flavors it finds in /usr/src/linux-obj: this directory contains symlinks to the
+latest kernel-$flavor-devel packages installed.
+
+This default can be overridden by defining the %kernel_version macro on the
+rpmbuild command line.  For example, you can build against version
+2.6.32.19-0.2 with:
+
+	rpmbuild --define 'kernel_version 2.6.32.19-0.2'
+
+When doing that, rpmbuild will iterate over the kernels defined in
+/usr/src/linux-%kernel_version-obj, instead.
+
+It is not possible to iterate over all installed kernel-$flavor-devel packages
+in one rpmbuild command: rpm only allows to build a single sub-package with a
+given name (for example, drbd-kmp-default), and cannot build two separate
+drbd-kmp-default sub-packages with different versions.
+
+	Andreas Gruenbacher <agruen@linbit.com>
+
+--- /etc/rpm/macros.kernel-source.orig
++++ /etc/rpm/macros.kernel-source
+@@ -9,14 +9,14 @@
+ 	echo "%%define _suse_kernel_module_subpackage(n:v:r:f:p:) %%{expand:%%(cd %_sourcedir; cat $subpkg; echo %%%%nil)}" \
+ 	flavors_to_build= \
+ 	flavors="%*" \
+-	for flavor in $(ls /usr/src/linux-obj/%_target_cpu 2>/dev/null); do \
++	for flavor in $(ls /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu 2>/dev/null); do \
+ 	    case " $flavors " in \
+ 	    (*" $flavor "*) \
+ 		[ -n "%{-X}" ] && continue ;; \
+ 	    (*) \
+ 		[ -z "%{-X}" -a -n "$flavors" ] && continue ;; \
+ 	    esac \
+-	    krel=$(make -s -C /usr/src/linux-obj/%_target_cpu/$flavor kernelrelease) \
++	    krel=$(make -s -C /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/$flavor kernelrelease) \
+ 	    kver=${krel%%-*} \
+ 	    [ -e /boot/symsets-$kver-$flavor.tar.gz ] || continue \
+ 	    flavors_to_build="$flavors_to_build $flavor" \
+@@ -24,7 +24,7 @@
+ 	done \
+ 	echo "%%global flavors_to_build${flavors_to_build:-%%nil}" \
+ 	echo "%%{expand:%%(test -z '%flavors_to_build' && echo %%%%internal_kmp_error)}" \
+-	echo "%%global kernel_source() /usr/src/linux-obj/%_target_cpu/%%%%{1}" \
++	echo "%%global kernel_source() /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/%%%%{1}" \
+ 	echo "%%global kernel_module_package_moddir() updates" \
+ 	\
+ 	echo "%package -n %{-n*}%{!-n:%name}-kmp-_dummy_" \
diff -Nru drbd8-8.3.7/rpm-macro-fixes/macros.kernel-source.sles11.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.kernel-source.sles11.diff
--- drbd8-8.3.7/rpm-macro-fixes/macros.kernel-source.sles11.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.kernel-source.sles11.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,30 @@
+See comment in macros.kernel-source.sles11-sp1.diff
+
+--- /etc/rpm/macros.kernel-source.orig
++++ /etc/rpm/macros.kernel-source
+@@ -9,14 +9,14 @@
+ 	echo "%%define _suse_kernel_module_subpackage(n:v:r:f:p:) %%{expand:%%(cd %_sourcedir; cat $subpkg; echo %%%%nil)}" \
+ 	flavors_to_build= \
+ 	flavors="%*" \
+-	for flavor in $(ls /usr/src/linux-obj/%_target_cpu 2>/dev/null); do \
++	for flavor in $(ls /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu 2>/dev/null); do \
+ 	    case " $flavors " in \
+ 	    (*" $flavor "*) \
+ 		[ -n "%{-X}" ] && continue ;; \
+ 	    (*) \
+ 		[ -z "%{-X}" -a -n "$flavors" ] && continue ;; \
+ 	    esac \
+-	    krel=$(make -s -C /usr/src/linux-obj/%_target_cpu/$flavor kernelrelease) \
++	    krel=$(make -s -C /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/$flavor kernelrelease) \
+ 	    kver=${krel%%-*} \
+ 	    [ -e /boot/symsets-$kver-$flavor.tar.gz ] || continue \
+ 	    flavors_to_build="$flavors_to_build $flavor" \
+@@ -24,7 +24,7 @@
+ 	done \
+ 	echo "%%global flavors_to_build${flavors_to_build:-%%nil}" \
+ 	echo "%%{expand:%%(test -z '%flavors_to_build' && echo %%%%internal_kmp_error)}" \
+-	echo "%%global kernel_source() /usr/src/linux-obj/%_target_cpu/%%%%{1}" \
++ 	echo "%%global kernel_source() /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/%%%%{1}" \
+ 	\
+ 	echo "%package -n %{-n*}%{!-n:%name}-kmp-_dummy_" \
+ 	echo "Version: %version" \
diff -Nru drbd8-8.3.7/rpm-macro-fixes/macros.rhel5.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.rhel5.diff
--- drbd8-8.3.7/rpm-macro-fixes/macros.rhel5.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/macros.rhel5.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,13 @@
+--- /usr/lib/rpm/redhat/macros.orig
++++ /usr/lib/rpm/redhat/macros
+@@ -170,8 +170,8 @@
+ 
+ %kernel_module_package(n:v:r:s:f:xp:) %{expand:%( \
+ 	%define kmodtool %{-s*}%{!-s:/usr/lib/rpm/redhat/kmodtool} \
+-	%define kmp_version %{-v*}%{!-v:%{version}} \
+-	%define kmp_release %{-r*}%{!-r:%{release}} \
++	%global kmp_version %{-v*}%{!-v:%{version}} \
++	%global kmp_release %{-r*}%{!-r:%{release}} \
+ 	%define latest_kernel %(rpm -q --qf '%{VERSION}-%{RELEASE}\\\\n' `rpm -q kernel-devel | /usr/lib/rpm/redhat/rpmsort -r | head -n 1` | head -n 1) \
+ 	%{!?kernel_version:%{expand:%%global kernel_version %{latest_kernel}}} \
+ 	%global kverrel %(%{kmodtool} verrel %{?kernel_version} 2>/dev/null) \
diff -Nru drbd8-8.3.7/rpm-macro-fixes/suse_macros.sles10.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/suse_macros.sles10.diff
--- drbd8-8.3.7/rpm-macro-fixes/suse_macros.sles10.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/suse_macros.sles10.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,46 @@
+See comment in macros.kernel-source.sles11-sp1.diff
+
+--- /usr/lib/rpm/suse_macros.orig
++++ /usr/lib/rpm/suse_macros
+@@ -473,12 +473,12 @@
+ 
+ # Defines %flavors_to_build as a side effect.
+ %suse_kernel_module_package(n:v:r:s:f:xp:) \
+-%{expand:%( \
++%{expand:%{expand:%( \
++	( \
+         subpkg=%{-s*}%{!-s:/usr/lib/rpm/rpm-suse-kernel-module-subpackage} \
+         echo "%%define _suse_kernel_module_subpackage(n:v:r:f:p:) %%{expand:%%(cd %_sourcedir; cat $subpkg; echo %%%%nil)}" \
+-        flavors="%{-x:%*}%{!-x:$(ls /usr/src/linux-obj/%_target_cpu 2>/dev/null)}" \
++        flavors="%{-x:%*}%{!-x:$(ls /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu 2>/dev/null)}" \
+         flavors_to_build= \
+-        kver=$(rpm -q --qf '%{VERSION}-%{RELEASE}' kernel-source) \
+         for flavor in $flavors; do \
+             if [ -z "%{-x}" ]; then \
+                 case " %* " in \
+@@ -486,19 +486,23 @@
+                     continue ;; \
+                 esac \
+             fi \
+-            krel=$(make -s -C /usr/src/linux-obj/%_target_cpu/$flavor kernelrelease) \
++            krel=$(make -s -C /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/$flavor kernelrelease) \
++	    kver=${krel%%-*} \
+             [ -e /boot/symsets-$krel.tar.gz ] || continue \
+             flavors_to_build="$flavors_to_build $flavor" \
+             echo "%%_suse_kernel_module_subpackage -n %{-n*}%{!-n:%name}-kmp -v %{-v*}%{!-v:%version} -r %{-r*}%{!-r:%release} %{-p} $flavor $krel $kver" \
+         done \
+         echo "%%global flavors_to_build${flavors_to_build:-%%nil}" \
++	echo "%%global kernel_source() /usr/src/linux-%{?kernel_version:%kernel_version-}obj/%_target_cpu/%%%%{1}" \
++	echo "%%global kernel_module_package_moddir() updates" \
+         \
+         echo "%package -n %{-n*}%{!-n:%name}-kmp-_dummy_" \
+         echo "Version: %version" \
+         echo "Summary: %summary" \
+         echo "Group: %group" \
+         echo "%description -n %{-n*}%{!-n:%name}-kmp-_dummy_" \
+-        )}
++	) | sed -e 's/%%/%%%%/g' \
++        )}}
+ 
+ %suse_version 1010
+ %sles_version 10
diff -Nru drbd8-8.3.7/rpm-macro-fixes/symset-table.diff drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/symset-table.diff
--- drbd8-8.3.7/rpm-macro-fixes/symset-table.diff	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/rpm-macro-fixes/symset-table.diff	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,54 @@
+symsets-xyz-tar.gz contain only the current symsets,
+and potentially compatible symsets.
+
+To be compatible by definition means to be a subset of the current symset.
+
+If we scan through the symsets in ascending order of their size in bytes,
+the first symset to match a particular symbol will be the "oldest",
+"most compatible".
+
+This way, even if the most recent kernel version provides some new
+symset containing new symbols, a kernel module package built
+against it will still only require the weakest symset(s) necessary,
+so will stay compatible on the rpm dependency level with all older
+kernels that provide the actually used symbols.
+
+Without the sorting and filtering, the resulting kmp would require
+all symsets the respective symbols are defined in, including the
+latest symset, even if only a subset of the contained symbols is
+actually used.  Thus the kmp may become "incompatible" on the rpm
+level with older kernel versions, even though it works just fine
+with "weak-modules" on the actual symbol version level.
+
+--- /usr/lib/rpm/symset-table
++++ /usr/lib/rpm/symset-table
+@@ -21,15 +21,26 @@
+ 
+ for symsets in *; do
+     krel=${symsets#symsets-}
+-    for symset in $symsets/*; do
++    for symset in $(ls -Sr $symsets/* ); do
+ 	class=${symset##*/} ; class=${class%.*}
+ 	hash=${symset##*.}
+ 	awk '
+ 	BEGIN { FS = "\t" ; OFS = "\t" }
+ 	{ sub(/0x0*/, "", $1)
+-	  print krel "/" $1 "/" $2, class, hash }
++	  print krel "/" $1, $2, class, hash }
+ 	' krel="$krel" class="$class" hash="$hash" $symset
+-    done
++    done \
++    | awk '
++	# Filter out duplicate symbols.  Since we went through the symset
++	# files in increasing size order, each symbol will remain in the
++	# table with the oldest symset it is defined in.
++	BEGIN { FS = "\t" ; OFS = "\t" }
++	{ if ($2 in seen)
++	    next
++	  seen[$2]=1
++	  print $1 "/" $2, $3, $4 }
++	' \
++    | sort -t $'\t' -k 1,1
+ done
+ 
+ # vim:shiftwidth=4 softtabstop=4
diff -Nru drbd8-8.3.7/scripts/Makefile drbd8-8.4.1+git55a81dc~cmd1/scripts/Makefile
--- drbd8-8.3.7/scripts/Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/Makefile	2012-09-03 22:37:14.000000000 +0000
@@ -45,7 +45,7 @@
 WITH_XEN = yes
 WITH_PACEMAKER = yes
 WITH_HEARTBEAT = yes
-WITH_RGMANAGER = yes
+WITH_RGMANAGER = no
 WITH_BASHCOMPLETION = yes
 
 # variables meant to be overridden from the make command line
diff -Nru drbd8-8.3.7/scripts/adjust_drbd_config_h.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/adjust_drbd_config_h.sh
--- drbd8-8.3.7/scripts/adjust_drbd_config_h.sh	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/adjust_drbd_config_h.sh	1970-01-01 00:00:00.000000000 +0000
@@ -1,212 +0,0 @@
-#!/bin/bash
-# drbd_config.h auto edit magic for 2.4 kernels ...
-
-# expects KDIR in the environment to be set correctly!
-
-set -e
-sorry() {
-	cat <<___
-	Sorry, automagic adjustment of drbd_config.h failed.
-	For well known 2.6. kernels, no adjustment to the shipped drbd_config is necessary.
-	You need to verify it yourself.
-___
-}
-trap "sorry" 0
-grep_q() { grep "$@" /dev/null &>/dev/null ; }
-
-# PARANOIA:
-test -e ./linux/drbd_config.h || {
-       echo >&2 "oops, invoked in unexpected directory..."
-       exit 1
-}
-
-if [[ -z $KDIR ]] ; then
-	echo >&2 "You did not tell me which kernel I should check"
-	echo >&2 "So I'm taking a guess..."
-	O=
-	KDIR_BEST_GUESS=/lib/modules/`uname -r`/source
-	O_BEST_GUESS=/lib/modules/`uname -r`/build
-	test -d $KDIR_BEST_GUESS && KDIR=$KDIR_BEST_GUESS
-	test -d $O_BEST_GUESS && O=$O_BEST_GUESS
-fi
-test -n "$KDIR"
-
-# ok, now we have a KDIR; cd into it, in case we detect relative pathes
-pushd $KDIR
-
-KDIR=${KDIR%/}
-if test -z "$O"; then
-	## just in case...
-	## detect if $KDIR points to something which is actually $O ...
-	X=$( make no-such-makefile-target 2>/dev/null |
-	     sed -ne '/ -C .* O=.* no-such-makefile-target$/p' | tr -s ' ' )
-	if [[ -n $X ]]; then
-		KDIR=${X##* -C }; KDIR=${KDIR%% *}; KDIR=$(cd $KDIR && pwd)
-		O=${X##* O=}; O=${O%% *}; O=$(cd $KDIR && cd $O && pwd)
-	else
-		O=$KDIR;
-	fi
-else
-	O=${O%/}
-fi
-
-# some paranoia: check that all files are where we expect them
-ls > /dev/null \
-$KDIR/{Makefile,include/linux/{gfp,types,slab,net}.h}
-ls > /dev/null \
-$O/{.config,Makefile,include/linux/version.h}
-test -e $O/include/asm/atomic.h  ||
-test -e $O/include/asm/arch/atomic.h  ||
-test -e $O/include2/asm/atomic.h ||
-test -e $KDIR/include/asm-generic/atomic.h ||
-exit 1
-
-if grep_q "^PATCHLEVEL *= *6" $KDIR/Makefile ; then
-  # do we have gfp_t?
-  if grep_q "typedef.*gfp_t" $KDIR/include/linux/gfp.h $KDIR/include/linux/types.h; then
-    have_gfp_t=1
-  else
-    have_gfp_t=0
-  fi
-  # stupid vendor kernels grrr...
-  have_atomic_add=0
-  # btw, don't ask why I don't use grep -qs $a $b $c 
-  # it simply does not work always...
-  for f in $O/include/asm/atomic.h \
-    $O/include/asm/arch/atomic.h \
-    $O/include2/asm/atomic.h \
-    $O/include/asm/atomic_32.h \
-    $O/include2/asm/atomic_32.h \
-    $O/include/asm/arch/atomic_32.h \
-    $KDIR/include/asm-generic/atomic.h
-  do
-    if grep_q "atomic_add_return" $f; then
-      have_atomic_add=1
-      break
-    fi
-  done
-  if grep_q "typedef.*kmem_cache_s" $KDIR/include/linux/slab.h ; then
-    have_kmem_cache_s=1
-  else
-    have_kmem_cache_s=0
-  fi
-  if grep_q "sock_create_kern" $KDIR/include/linux/net.h ; then
-    have_sock_create_kern=1
-  else
-    have_sock_create_kern=0
-  fi
-  if grep_q "kernel_sock_shutdown" $KDIR/include/linux/net.h ; then
-    have_kernel_sock_shutdown=1
-  else
-    have_kernel_sock_shutdown=0
-  fi
-  if grep_q "dst_groups" $KDIR/include/linux/netlink.h ; then
-    have_nl_dst_groups=1
-  else
-    have_nl_dst_groups=0
-  fi
-  if grep_q "kzalloc" $KDIR/include/linux/slab.h ; then
-    need_backport_of_kzalloc=0
-  else
-    need_backport_of_kzalloc=1
-  fi
-  if test -e $KDIR/include/linux/scatterlist.h ; then
-    have_linux_scatterlist_h=1
-    if grep_q "sg_set_buf" $KDIR/include/linux/scatterlist.h ; then
-      need_sg_set_buf=0
-    else
-      need_sg_set_buf=1
-    fi
-  else
-    have_linux_scatterlist_h=0
-    need_sg_set_buf=1
-  fi
-  if grep_q "msleep" $KDIR/include/linux/delay.h ; then
-    have_msleep=1
-  else
-    have_msleep=0
-  fi
-  if grep_q "kvec" $KDIR/include/linux/uio.h ; then
-    have_kvec=1
-  else
-    have_kvec=0
-  fi
-  if test -e $KDIR/include/linux/byteorder/swabb.h ; then
-    have_linux_byteorder_swabb_h=1
-  else
-    have_linux_byteorder_swabb_h=0
-  fi
-  if grep_q "proc_create(" $KDIR/include/linux/proc_fs.h ; then
-    have_proc_create=1
-  else
-    have_proc_create=0
-  fi
-  if grep_q "set_cpus_allowed_ptr(" $KDIR/include/linux/sched.h ; then
-    have_set_cpus_allowed_ptr=1
-  else
-    have_set_cpus_allowed_ptr=0
-  fi
-  if grep_q "netlink_skb_parms" $KDIR/include/linux/connector.h ; then
-    have_netlink_skb_parms=1
-  else
-    have_netlink_skb_parms=0
-  fi
-else
-    # not a 2.6. kernel. just leave it alone...
-    exit 0
-fi
-
-# and back do drbd source
-popd
-
-test -e ./linux/drbd_config.h.orig || cp ./linux/drbd_config.h{,.orig}
-
-perl -pe "
- s{.*(#define KERNEL_HAS_GFP_T.*)}
-  { ( $have_gfp_t ? '' : '//' ) . \$1}e;
- s{.*(#define NEED_BACKPORT_OF_ATOMIC_ADD.*)}
-  { ( $have_atomic_add ? '//' : '' ) . \$1}e;
- s{.*(#define USE_KMEM_CACHE_S.*)}
-  { ( $have_kmem_cache_s ? '' : '//' ) . \$1}e;
- s{.*(#define DEFINE_SOCK_CREATE_KERN.*)}
-  { ( $have_sock_create_kern ? '//' : '' ) . \$1}e;
- s{.*(#define DEFINE_KERNEL_SOCK_SHUTDOWN.*)}
-  { ( $have_kernel_sock_shutdown ? '//' : '' ) . \$1}e;
- s{.*(#define DRBD_NL_DST_GROUPS.*)}
-  { ( $have_nl_dst_groups ? '' : '//' ) . \$1}e;
- s{.*(#define NEED_BACKPORT_OF_KZALLOC.*)}
-  { ( $need_backport_of_kzalloc ? '' : '//' ) . \$1}e;
- s{.*(#define NEED_SG_SET_BUF.*)}
-  { ( $need_sg_set_buf ? '' : '//' ) . \$1}e;
- s{.*(#define HAVE_LINUX_SCATTERLIST_H.*)}
-  { ( $have_linux_scatterlist_h ? '' : '//' ) . \$1}e;
- s{.*(#define KERNEL_HAS_MSLEEP.*)}
-  { ( $have_msleep ? '' : '//' ) . \$1}e;
- s{.*(#define KERNEL_HAS_KVEC.*)}
-  { ( $have_kvec ? '' : '//' ) . \$1}e;
- s{.*(#define HAVE_LINUX_BYTEORDER_SWABB_H.*)}
-  { ( $have_linux_byteorder_swabb_h ? '' : '//' ) . \$1}e;
- s{.*(#define KERNEL_HAS_PROC_CREATE.*)}
-  { ( $have_proc_create ? '' : '//' ) . \$1}e;
- s{.*(#define HAVE_SET_CPUS_ALLOWED_PTR.*)}
-  { ( $have_set_cpus_allowed_ptr ? '' : '//' ) . \$1}e;
- s{.*(#define KERNEL_HAS_CN_SKB_PARMS.*)}
-  { ( $have_netlink_skb_parms ? '' : '//' ) . \$1}e;
- " \
-	  < ./linux/drbd_config.h \
-	  > ./linux/drbd_config.h.new
-
-if ! DIFF=$(diff -s -U0 ./linux/drbd_config.h{,.new}) ; then
-  mv ./linux/drbd_config.h{.new,}
-  sed -e 's/^/  /' <<___
-
-Adjusted drbd_config.h:
-$DIFF
-
-___
-else
-	rm ./linux/drbd_config.h.new
-	echo -e "\n  Using unmodified drbd_config.h\n"
-fi
-trap - 0
-exit 0
diff -Nru drbd8-8.3.7/scripts/crm-fence-peer.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/crm-fence-peer.sh
--- drbd8-8.3.7/scripts/crm-fence-peer.sh	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/crm-fence-peer.sh	2012-02-02 14:09:14.000000000 +0000
@@ -12,7 +12,7 @@
 		s/ *\bid="[^"]*"//  # remove id tag
 		# print each attribute on its own line, by
 		: attr
-		h # rememver the current rest line
+		h # remember the current (tail of the) line
 		# remove all but the first attribute, and print,
 		s/^\([^[:space:]]*[[:space:]][^= ]*="[^"]*"\).*$/\1/p
 		g # then restore the remembered line,
@@ -155,26 +155,59 @@
 	local peer_state
 	check_peer_node_reachable
 	set_states_from_proc_drbd
-	case $peer_state/$DRBD_disk in
-	reachable/*)
-		cibadmin -C -o constraints -X "$new_constraint" &&
-		drbd_fence_peer_exit_code=4 rc=0
+	: == DEBUG == DRBD_peer=${DRBD_peer[*]} ===
+	case "${DRBD_peer[*]}" in
+	*Secondary*|*Primary*)
+		# WTF? We are supposed to fence the peer,
+		# but the replication link is just fine?
+		echo WARNING "peer is not Unknown, did not place the constraint!"
+		rc=0
+		return
 		;;
-	*/UpToDate)
+	esac
+	: == DEBUG == CTS_mode=$CTS_mode ==
+	: == DEBUG == DRBD_disk_all_consistent=$DRBD_disk_all_consistent ==
+	: == DEBUG == DRBD_disk_all_uptodate=$DRBD_disk_all_uptodate ==
+	: == DEBUG == $peer_state/${DRBD_disk[*]}/$unreachable_peer_is ==
+	if [[ ${#DRBD_disk[*]} = 0 ]]; then
+		# Someone called this script, without the corresponding drbd
+		# resource being configured. That's not very useful.
+		echo WARNING "could not determine my disk state: did not place the constraint!"
+		rc=0
+		# keep drbd_fence_peer_exit_code at "generic error",
+		# which will cause a "script is broken" message in case it was
+		# indeed called as handler from within drbd
+	elif [[ $peer_state = reachable ]] && $DRBD_disk_all_consistent; then
+		cibadmin -C -o constraints -X "$new_constraint" &&
+		drbd_fence_peer_exit_code=4 rc=0 &&
+		echo INFO "peer is $peer_state, my disk is ${DRBD_disk[*]}: placed constraint '$id_prefix-$master_id'"
+	elif $DRBD_disk_all_uptodate ; then
 		# We could differentiate between unreachable,
 		# and DC-unreachable.  In the latter case, placing the
 		# constraint will fail anyways, and  drbd_fence_peer_exit_code
 		# will stay at "generic error".
 		cibadmin -C -o constraints -X "$new_constraint" &&
-		drbd_fence_peer_exit_code=5 rc=0
-		;;
-	*)
-		echo WARNING "did not place the constraint!"
+		drbd_fence_peer_exit_code=5 rc=0 &&
+		echo INFO "peer is not reachable, my disk is UpToDate: placed constraint '$id_prefix-$master_id'"
+	elif [[ $peer_state = unreachable ]] && [[ $unreachable_peer_is = outdated ]] && $DRBD_disk_all_consistent; then
+		# If the peer is not reachable, but we are only Consistent, we
+		# may need some way to still allow promotion.
+		# Easy way out: --force primary with drbdsetup.
+		# But that would not place the constraint, nor outdate the
+		# peer.  With this --unreachable-peer-is-outdated, we still try
+		# to set the constraint.  Next promotion attempt will find the
+		# "correct" constraint, consider the peer as successfully
+		# fenced, and continue.
+		cibadmin -C -o constraints -X "$new_constraint" &&
+		drbd_fence_peer_exit_code=5 rc=0 &&
+		echo WARNING "peer is unreachable, my disk is only Consistent: --unreachable-peer-is-outdated FORCED constraint '$id_prefix-$master_id'" &&
+		echo WARNING "This MAY RISK DATA INTEGRITY"
+	else
+		echo WARNING "peer is $peer_state, my disk is ${DRBD_disk[*]}: did not place the constraint!"
 		drbd_fence_peer_exit_code=5 rc=0
 		# I'd like to return 6 here, otherwise pacemaker will retry
 		# forever to promote, even though 6 is not strictly correct.
-		;;
-	esac
+	fi
 }
 
 # drbd_peer_fencing fence|unfence
@@ -220,7 +253,7 @@
 			# and try to go online with stale data.
 			# Exactly what this "fence" hanler should prevent.
 			# But setting contraints in a cluster partition with
-			# "no-quorum-policy=ignore" will usually succeed. 
+			# "no-quorum-policy=ignore" will usually succeed.
 			#
 			# So we need to differentiate between node reachable or
 			# not, and DRBD "Consistent" or "UpToDate".
@@ -228,7 +261,7 @@
 			try_place_constraint
 		elif [[ "$have_constraint" = "$(set +x; echo "$new_constraint" |
 			sed_rsc_location_suitable_for_string_compare "$id_prefix-$master_id")" ]]; then
-			: "identical constraint already placed"
+			echo INFO "suitable constraint already placed: '$id_prefix-$master_id'"
 			drbd_fence_peer_exit_code=4
 			rc=0
 		else
@@ -245,7 +278,7 @@
 			# better data than us, and wants us outdated.
 		fi
 
-		if [ $rc != 0 ]; then
+		if [[ $rc != 0 ]]; then
 			# at least we tried.
 			# maybe it was already in place?
 			echo WARNING "DATA INTEGRITY at RISK: could not place the fencing constraint!"
@@ -311,6 +344,20 @@
 			let "cibtimeout = cibtimeout * 5 / 4"
 		done
 		state_lines=$(echo "$cib_xml" | grep '<node_state')
+
+		if $CTS_mode; then
+			# CTS requires startup-fencing=false.
+			# For PartialStart, NearQuorumPoint and similar tests,
+			# we would likely stay Consistent, and refuse to Promote.
+			# And CTS would be very unhappy.
+			# Pretend that the peer was reachable if we are missing a node_state entry for it.
+			if [[ $DRBD_PEER ]] && ! echo "$state_lines" | grep -q -F uname=\"$DRBD_PEER\" ; then
+				peer_state="reachable"
+				echo WARNING "CTS-mode: pretending that unseen node $DRBD_PEER was reachable"
+				return
+			fi
+		fi
+
 		nr_other_nodes=$(echo "$state_lines" | grep -v -F uname=\"$HOSTNAME\" | wc -l)
 		if [[ $nr_other_nodes -gt 1 ]]; then
 			# Many nodes cluster, look at $DRBD_PEER, if set.
@@ -341,7 +388,7 @@
 				return
 			fi
 		fi
-		
+
 		# For a resource-and-stonith setup, or dual-primaries (which
 		# you should only use with resource-and-stonith, anyways),
 		# the recommended timeout is larger than the deadtime or
@@ -361,31 +408,92 @@
 
 set_states_from_proc_drbd()
 {
+	local IFS line lines i disk
 	# DRBD_MINOR exported by drbdadm since 8.3.3
 	[[ $DRBD_MINOR ]] || DRBD_MINOR=$(drbdadm ${DRBD_CONF:+ -c "$DRBD_CONF"} sh-minor $DRBD_RESOURCE) || return
+
+	# if we have more than one minor, do a word split, ...
+	set -- $DRBD_MINOR
+	# ... and convert into regex:
+	IFS="|$IFS"; DRBD_MINOR="($*)"; IFS=${IFS#?}
+
 	# We must not recurse into netlink,
 	# this may be a callback triggered by "drbdsetup primary".
 	# grep /proc/drbd instead
-	set -- $(sed -ne "/^ *$DRBD_MINOR: cs:/ { s/:/ /g; p; q; }" /proc/drbd)
-	DRBD_role=${5%/*}
-	DRBD_disk=${7%/*}
+	# This magic does not work, if 
+	#
+
+	DRBD_peer=()
+	DRBD_role=()
+	DRBD_disk=()
+	DRBD_disk_all_uptodate=true
+	DRBD_disk_all_consistent=true
+
+	IFS=$'\n'
+	lines=($(sed -nre "/^ *$DRBD_MINOR: cs:/ { s/:/ /g; p; }" /proc/drbd))
+	IFS=$' \t\n'
+
+	i=0
+	for line in "${lines[@]}"; do
+		set -- $line
+		DRBD_peer[i]=${5#*/}
+		DRBD_role[i]=${5%/*}
+		disk=${7%/*}
+		DRBD_disk[i]=${disk:-Unconfigured}
+		case $disk in
+		UpToDate) ;;
+		Consistent)
+			DRBD_disk_all_uptodate=false ;;
+		*)
+			DRBD_disk_all_uptodate=false
+			DRBD_disk_all_consistent=false ;;
+		esac
+		let i++
+	done
+	if (( i = 0 )) ; then
+		DRBD_disk_all_uptodate=false
+		DRBD_disk_all_consistent=false
+	fi
 }
 ############################################################
 
 # try to get possible output on stdout/err to syslog
 PROG=${0##*/}
+redirect_to_logger()
+{
+	local lf=${1:-local5}
+	case $lf in 
+	# do we want to exclude some?
+	auth|authpriv|cron|daemon|ftp|kern|lpr|mail|news|syslog|user|uucp|local[0-7])
+		: OK ;;
+	*)
+		echo >&2 "invalid logfacility: $lf"
+		return
+		;;
+	esac
+	exec > >(2>&- ; logger -t "$PROG[$$]" -p $lf.info) 2>&1
+}
 if [[ $- != *x* ]]; then
-	exec > >(2>&- ; logger -t "$PROG[$$]" -p local5.info) 2>&1
+	# you may override with --logfacility below
+	redirect_to_logger local5
 fi
 
 # clean environment just in case.
-unset fencing_attribute id_prefix timeout dc_timeout
+unset fencing_attribute id_prefix timeout dc_timeout unreachable_peer_is
+CTS_mode=false
 suicide_on_failure_if_primary=false
 
 # poor mans command line argument parsing,
 # allow for command line overrides
 while [[ $# != 0 ]]; do
 	case $1 in
+	--logfacility=*)
+		redirect_to_logger ${1#*=}
+		;;
+	--logfacility)
+		redirect_to_logger $2
+		shift
+		;;
 	--resource=*)
 		DRBD_RESOURCE=${1#*=}
 		;;
@@ -435,6 +543,16 @@
 		dc_timeout=$2
 		shift
 		;;
+	--CTS-mode)
+		CTS_mode=true
+		;;
+	--unreachable-peer-is-outdated)
+		# This is NOT to be scripted.
+		# Or people will put this into the handler definition in
+		# drbd.conf, and all this nice work was useless.
+		test -t 0 &&
+		unreachable_peer_is=outdated
+		;;
 	# --suicide-on-failure-if-primary)
 	# 	suicide_on_failure_if_primary=true
 	# 	;;
@@ -449,14 +567,16 @@
 done
 # DRBD_RESOURCE: from environment
 # master_id: parsed from cib
-# apply defaults: 
-: ${fencing_attribute:="#uname"}
-: ${id_prefix:="drbd-fence-by-handler"}
-: ${role:="Master"}
+
+: "== unreachable_peer_is == ${unreachable_peer_is:=unknown}"
+# apply defaults:
+: "== fencing_attribute   == ${fencing_attribute:="#uname"}"
+: "== id_prefix           == ${id_prefix:="drbd-fence-by-handler"}"
+: "== role                == ${role:="Master"}"
 
 # defaults suitable for single-primary no-stonith.
-: ${timeout:=1}
-: ${dc_timeout:=$[20+timeout]}
+: "== timeout             == ${timeout:=1}"
+: "== dc_timeout          == ${dc_timeout:=$[20+timeout]}"
 
 # check envars normally passed in by drbdadm
 # TODO DRBD_CONF is also passed in.  we may need to use it in the
@@ -471,6 +591,17 @@
 	fi
 done
 
+# Fixup id-prefix to include the resource name
+# There may be multiple drbd instances part of the same M/S Group, pointing to
+# the same master-id. Still they need to all have their own constraint, to be
+# able to unfence independently when they finish their resync independently.
+# Be nice to people who already explicitly configure an id prefix containing
+# the resource name.
+if [[ $id_prefix != *"-$DRBD_RESOURCE" ]] ; then
+	id_prefix="$id_prefix-$DRBD_RESOURCE"
+	: "== id_prefix           == ${id_prefix}"
+fi
+
 # make sure it contains what we expect
 HOSTNAME=$(uname -n)
 
diff -Nru drbd8-8.3.7/scripts/drbd drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd
--- drbd8-8.3.7/scripts/drbd	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd	2012-02-02 14:09:14.000000000 +0000
@@ -3,7 +3,8 @@
 # chkconfig: - 70 08
 # description: Loads and unloads the drbd module
 #
-# Copright 2001-2008 LINBIT Information Technologies
+# Copyright 2001-2010 LINBIT
+#
 # Philipp Reisner, Lars Ellenberg
 #
 ### BEGIN INIT INFO
@@ -12,8 +13,10 @@
 # Required-Stop:  $local_fs $network $syslog
 # Should-Start:   sshd multipathd
 # Should-Stop:    sshd multipathd
-# Default-Start:
-# Default-Stop:
+# Default-Start:  2 3 4 5
+# Default-Stop:   0 1 6
+# X-Start-Before: heartbeat corosync
+# X-Stop-After:   heartbeat corosync
 # Short-Description:    Control drbd resources.
 ### END INIT INFO
 
@@ -44,41 +47,13 @@
 {
     [ -e "$PROC_DRBD" ] && return
 
-    $MODPROBE -s drbd `$DRBDADM sh-mod-parms` $ADD_MOD_PARAM || {
+    $MODPROBE -s drbd $ADD_MOD_PARAM || {
 	echo "Can not load the drbd module."$'\n'; exit 20
     }
     # tell klogd to reload module symbol information ...
     [ -e /var/run/klogd.pid ] && [ -x /sbin/klogd ] && /sbin/klogd -i
 }
 
-function adjust_with_progress
-{
-    IFS_O=$IFS
-    NEWLINE='
-'
-    IFS=$NEWLINE
-    local res
-
-    COMMANDS=`$DRBDADM -d -n res adjust all` || exit 20
-    echo -n "[ "
-
-    for CMD in $COMMANDS; do
-	case "$CMD" in
-		res=*)		eval "$CMD";;
-		*\ disk\ *)	echo -n "d($res) " ;;
-		*\ syncer\ *)	echo -n "s($res) " ;;
-		*\ net\ *)	echo -n "n($res) " ;;
-		*)		echo ".. " ;;
-	esac
-	if ! eval "$CMD"; then
-	    echo -e "\n[$res] cmd $CMD failed - continuing!\n "
-	fi
-    done
-    echo -n "]"
-
-    IFS=$IFS_O
-}
-
 drbd_pretty_status()
 {
 	local proc_drbd=$1
@@ -137,6 +112,21 @@
 	) | column -t
 }
 
+# Try to settle regardless of udev version or presence,
+# so "/etc/init.d/drbd stop" is able to rmmod, without interfering
+# temporary module references caused by udev scanning the devices.
+# But don't wait too long.
+_udev_settle()
+{
+	if udevadm version ; then
+		# ok, we have udevadm, use it.
+		udevadm settle --timeout=5
+	else
+		# if udevsettle is not there,
+		# no matter.
+		udevsettle --timeout=5
+	fi
+}
 
 case "$1" in
     start)
@@ -144,19 +134,27 @@
 	# file, or we need to ask the user about registering this installation
 	# at http://usage.drbd.org, we call drbdadm here without any IO
 	# redirection.
-	$DRBDADM sh-nop
+	# If "no op" has a non-zero exit code, the config is unusable,
+	# and every other command will fail.
 	log_daemon_msg "Starting DRBD resources"
+	if ! out=$($DRBDADM sh-nop 2>&1) ; then
+	    printf "\n%s\n" "$out" >&2
+	    log_end_msg 1
+	    exit 1
+	fi
 	assure_module_is_loaded
-	adjust_with_progress
+
+	$DRBDADM adjust-with-progress all
+	[[ $? -gt 1 ]] && exit 20
 
 	# make sure udev has time to create the device files
-	for RESOURCE in `$DRBDADM sh-resources`; do
-	    for DEVICE in `$DRBDADM sh-dev $RESOURCE`; do
-		UDEV_TIMEOUT_LOCAL=$UDEV_TIMEOUT
-		while [ ! -e $DEVICE ] && [ $UDEV_TIMEOUT_LOCAL -gt 0 ] ; do
-		    sleep 1
+	# FIXME this probably should, on platforms that have it,
+	# use udevadm settle --timeout=X --exit-if-exists=$DEVICE
+	for DEVICE in `$DRBDADM sh-dev all`; do
+	    UDEV_TIMEOUT_LOCAL=$UDEV_TIMEOUT
+	    while [ ! -e $DEVICE ] && [ $UDEV_TIMEOUT_LOCAL -gt 0 ] ; do
+		sleep 1
 		UDEV_TIMEOUT_LOCAL=$(( $UDEV_TIMEOUT_LOCAL-1 ))
-		done
 	    done
 	done
 
@@ -169,7 +167,9 @@
     stop)
 	$DRBDADM sh-nop
 	log_daemon_msg "Stopping all DRBD resources"
-	if [ -e $PROC_DRBD ] ; then
+	for try in 1 2; do
+	    if [ -e $PROC_DRBD ] ; then
+		[[ $try = 2 ]] && echo "Retrying once..."
 		# bypass drbdadm and drbd config file and everything,
 		# to avoid leaving devices around that are not referenced by
 		# the current config file, or in case the current config file
@@ -182,10 +182,14 @@
 			*" not mounted") :;;
 			*) echo "$M" >&2 ;;
 			esac
-			$DRBDSETUP "$d" down
 		done
-		$RMMOD drbd
-	fi
+		for res in $(drbdsetup all show | sed -ne 's/^resource \(.*\) {$/\1/p'); do
+			drbdsetup "$res" down
+		done
+		_udev_settle &> /dev/null
+		$RMMOD drbd && break
+	    fi
+	done
 	[ -f /var/lock/subsys/drbd ] && rm /var/lock/subsys/drbd
 	log_end_msg 0
 	;;
@@ -208,13 +212,8 @@
 	log_end_msg 0
 	;;
     restart|force-reload)
-	$DRBDADM sh-nop
-	log_daemon_msg "Restarting all DRBD resources"
-	$DRBDADM down all
-	$RMMOD drbd
-	assure_module_is_loaded
-	$DRBDADM up all
-	log_end_msg 0
+	( . $0 stop )
+	( . $0 start )
 	;;
     *)
 	echo "Usage: /etc/init.d/drbd {start|stop|status|reload|restart|force-reload}"
diff -Nru drbd8-8.3.7/scripts/drbd-overview.pl drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd-overview.pl
--- drbd8-8.3.7/scripts/drbd-overview.pl	2009-06-09 11:33:03.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd-overview.pl	2012-02-02 14:09:14.000000000 +0000
@@ -24,21 +24,40 @@
 # sets $drbd{minor}->{name} (and possibly ->{ll_dev})
 sub map_minor_to_resource_names()
 {
-	my $drbdadm_sh_status = `drbdadm sh-status`;
+	my @drbdadm_sh_status = `drbdadm sh-status`;
+	my ($ll_res, $ll_dev, $ll_minor, $conf_res, $conf_vnr, $minor, $name, $vnr);
 
-	while ($drbdadm_sh_status =~ m{
-		\n
-		_stacked_on=(.*?)\n
-		(?:_stacked_on_device=(.*)\n
-		   _stacked_on_minor=(\d*)\n)?
-		_minor=(.*?)\n
-		_res_name=(.*?)\n
-		}xg)
-	{
-		$drbd{$4}{name} = $5;
-		$minor_of_name{$5} = $4;
-		$drbd{$4}{ll_dev} = defined($2) ? $3 : $1
-			if $1;
+	for (@drbdadm_sh_status) {
+		# volumes only present in >= 8.4
+		# some things generated by drbdadm
+
+		/^_conf_res_name=(.*)\n/	and $conf_res = $1, $name = $conf_res;
+		/^_conf_volume=(\d+)\n/		and $conf_vnr = $1;
+
+		/^_stacked_on=(.*?)\n/		and $ll_res = $1;
+		# not always present:
+		/^_stacked_on_device=(.*)\n/	and $ll_dev = $1;
+		/^_stacked_on_minor=(\d+)\n/	and $ll_minor = $1;
+
+		# rest generated by drbdsetup
+		/^_minor=(.*?)\n/		and $minor = $1;
+		/^_res_name=(.+?)\n/		and $name = $1;
+		/^_volume=(\d+)\n/		and $vnr = $1;
+
+		/^_sh_status_process/	or next;
+
+		$drbd{$minor}{name} = $name;
+		if (defined $conf_vnr) {
+			# >= 8.4, append /volume to resource name.
+			# If both are present, they should be the same.  But
+			# just in case, prefer the kernel volume number, if it
+			# is present and positive. Else, use the volume number
+			# from the config.
+			$drbd{$minor}{name} .= defined $vnr ? "/$vnr" : "/$conf_vnr";
+		}
+		$minor_of_name{$name} = $minor;
+		$drbd{$minor}{ll_dev} = defined($ll_dev) ? $ll_minor : $ll_res
+			if $ll_res;
 	}
 
 	# fix up hack for git versions 8.3.1 > x > 8.3.0:
@@ -103,6 +122,7 @@
 		};
 	}
 	close PD;
+	for (values %drbd) { $_->{state} ||= "Unconfigured . . . ."; }
 }
 
 # sets $drbd{minor}->{pv_info}
@@ -196,16 +216,20 @@
 		# parent
 		$_ = <V>;
 		close(V) or warn "virsh dumpxml exit code: $?\n";
-		while (m{<disk\ [^>]*>\s*
-			  <source\ dev='/dev/drbd(\d+)'/>\s*
-			  <target\ dev='([^']*)'\s+bus='([^']*)'}xg)
-		{
-			$drbd{$1}{virsh_info} = {
+		for (m{<disk\ [^>]*>.*</disk>}gs) {
+			m{<source\ dev='/dev/drbd([^']+)'/>} or next;
+			my $dev = $1;
+			if ($dev !~ /^\d+$/) {
+				my @stat = stat("/dev/drbd$dev") or next;
+				$dev = $stat[6] & 0xff;
+			}
+			m{<target\ dev='([^']*)'\s+bus='([^']*)'}xg;
+			$drbd{$dev}{virsh_info} = {
 				domname =>
 					$info{$dom}->{state} eq 'running' ?
 					"\*$dom" : "_$dom",
-				vdev => $2,
-				bus => $3,
+				vdev => $1,
+				bus => $2,
 			};
 		}
 	}
@@ -256,7 +280,7 @@
 
 	$out[$line] = [
 		sprintf("%3u:%s", $m, $t->{name} || "??not-found??"),
-		$t->{ll_dev} ? "^^$t->{ll_dev}" : "",
+		defined($t->{ll_dev}) ? "^^$t->{ll_dev}" : "",
 		split(/\s+/, $t->{state}),
 		@used_by
 	];
diff -Nru drbd8-8.3.7/scripts/drbd.conf.example drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.conf.example
--- drbd8-8.3.7/scripts/drbd.conf.example	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.conf.example	2012-02-02 14:09:14.000000000 +0000
@@ -1,624 +1,170 @@
-#
-# drbd.conf example
-#
-# parameters you _need_ to change are the hostname, device, disk,
-# meta-disk, address and port in the "on <hostname> {}" sections.
-#
-# you ought to know about the protocol, and the various timeouts.
-#
-# you probably want to set the rate in the syncer sections
-
-#
-# NOTE common pitfall:
-# rate is given in units of _byte_ not bit
-#
-
-#
-# increase timeout and maybe ping-int in net{}, if you see
-# problems with "connection lost/connection established"
-# (or change your setup to reduce network latency; make sure full
-#  duplex behaves as such; check average roundtrip times while
-#  network is saturated; and so on ...)
-#
-
-skip {
-  As you can see, you can also comment chunks of text
-  with a 'skip[optional nonsense]{ skipped text }' section.
-  This comes in handy, if you just want to comment out
-  some 'resource <some name> {...}' section:
-  just precede it with 'skip'.
-
-  The basic format of option assignment is
-  <option name><linear whitespace><value>;
-
-  It should be obvious from the examples below,
-  but if you really care to know the details:
-
-  <option name> :=
-        valid options in the respective scope
-  <value>  := <num>|<string>|<choice>|...
-              depending on the set of allowed values
-              for the respective option.
-  <num>    := [0-9]+, sometimes with an optional suffix of K,M,G
-  <string> := (<name>|\"([^\"\\\n]*|\\.)*\")+
-  <name>   := [/_.A-Za-z0-9-]+
-}
-
-#
-# At most ONE global section is allowed.
-# It must precede any resource section.
-#
-global {
-    # By default we load the module with a minor-count of 32. In case you
-    # have more devices in your config, the module gets loaded with
-    # a minor-count that ensures that you have 10 minors spare.
-    # In case 10 spare minors are too little for you, you can set the
-    # minor-count exeplicit here. ( Note, in contrast to DRBD-0.7 an
-    # unused, spare minor has only a very little overhead of allocated
-    # memory (a single pointer to be exact). )
-    #
-    # minor-count 64;
-
-    # The user dialog counts and displays the seconds it waited so
-    # far. You might want to disable this if you have the console
-    # of your server connected to a serial terminal server with
-    # limited logging capacity.
-    # The Dialog will print the count each 'dialog-refresh' seconds,
-    # set it to 0 to disable redrawing completely. [ default = 1 ]
-    #
-    # dialog-refresh 5; # 5 seconds
-
-    # You might disable one of drbdadm's sanity check.
-    # disable-ip-verification;
-
-    # Participate in DRBD's online usage counter at http://usage.drbd.org
-    # possilbe options: ask, yes, no. Default is ask. In case you do not
-    # know, set it to ask, and follow the on screen instructions later.
-    usage-count yes;
-}
-
-
-#
-# The common section can have all the sections a resource can have but
-# not the host section (started with the "on" keyword).
-# The common section must precede all resources.
-# All resources inherit the settings from the common section.
-# Whereas settings in the resources have precedence over the common
-# setting.
-#
-
-common {
-  syncer { rate 10M; }
-}
-
-#
-# this need not be r#, you may use phony resource names,
-# like "resource web" or "resource mail", too
-#
-
-resource r0 {
-
-  # transfer protocol to use.
-  # C: write IO is reported as completed, if we know it has
-  #    reached _both_ local and remote DISK.
-  #    * for critical transactional data.
-  # B: write IO is reported as completed, if it has reached
-  #    local DISK and remote buffer cache.
-  #    * for most cases.
-  # A: write IO is reported as completed, if it has reached
-  #    local DISK and local tcp send buffer. (see also sndbuf-size)
-  #    * for high latency networks
-  #
-  #**********
-  # uhm, benchmarks have shown that C is actually better than B.
-  # this note shall disappear, when we are convinced that B is
-  # the right choice "for most cases".
-  # Until then, always use C unless you have a reason not to.
-  #	--lge
-  #**********
-  #
-  protocol C;
-
-  handlers {
-    # what should be done in case the node is primary, degraded
-    # (=no connection) and has inconsistent data.
-    pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
-
-    # The node is currently primary, but lost the after split brain
-    # auto recovery procedure. As as consequence it should go away.
-    pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
-
-    # In case you have set the on-io-error option to "call-local-io-error",
-    # this script will get executed in case of a local IO error. It is
-    # expected that this script will case a immediate failover in the
-    # cluster.
-    local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
-
-    # Commands to run in case we need to downgrade the peer's disk
-    # state to "Outdated". Should be implemented by the superior
-    # communication possibilities of our cluster manager.
-    # The provided script uses ssh, and is for demonstration/development
-    # purposis.
-    # fence-peer "/usr/lib/drbd/outdate-peer.sh on amd 192.168.22.11 192.168.23.11 on alf 192.168.22.12 192.168.23.12";
-    #
-    # Update: Now there is a solution that relies on heartbeat's
-    # communication layers. You should really use this.
-    fence-peer "/usr/lib/heartbeat/drbd-peer-outdater -t 5";
-    # For Pacemaker you might use:
-    # fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
-
-    # The node is currently primary, but should become sync target
-    # after the negotiating phase. Alert someone about this incident.
-    #pri-lost "/usr/lib/drbd/notify-pri-lost.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
-
-    # Notify someone in case DRBD split brained. 
-    #split-brain "/usr/lib/drbd/notify-split-brain.sh root";
-    # Notify someone in case an online verify run found the backing devices out of sync.
-    #out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
-    #
-
-    # These two handlers can be used to snapshot sync-target devices
-    # before for the time of the resync.
-    # The provided scripts has these options:
-    # -p | --percent <reserve space in percent of the original volume. Default: 10%>
-    # -a | --additional <snapshot space in KiB. Default: 10 MiB>
-    # -n | --disconnect-on-error
-    #    By default the script tells DRBD to do the resync no matter
-    #    if the taking the snapshot works or not.
-    #    If you prefer to drop connection in case taking the snapshot
-    #    failes use the --disconnect-on-error option.
-    # -v | --verbose
-    # -- <additional lvcreate options>
-    #before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
-    #after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
-  }
-
-  startup {
-    # Wait for connection timeout.
-    # The init script blocks the boot process until the resources
-    # are connected. This is so when the cluster manager starts later,
-    # it does not see a resource with internal split-brain.
-    # In case you want to limit the wait time, do it here.
-    # Default is 0, which means unlimited. Unit is seconds.
-    #
-    # wfc-timeout  0;
-
-    # Wait for connection timeout if this node was a degraded cluster.
-    # In case a degraded cluster (= cluster with only one node left)
-    # is rebooted, this timeout value is used.
-    #
-    degr-wfc-timeout 120;    # 2 minutes.
-
-    # Wait for connection timeout if the peer node is already outdated.
-    # (Do not set this to 0, since that means unlimited)
-    #
-    outdated-wfc-timeout 2;  # 2 seconds.
-
-    # In case there was a split brain situation the devices will
-    # drop their network configuration instead of connecting. Since
-    # this means that the network is working, the cluster manager
-    # should be able to communicate as well. Therefore the default
-    # of DRBD's init script is to terminate in this case. To make
-    # it to continue waiting in this case set this option.
-    # 
-    # wait-after-sb;
-
-    # In case you are using DRBD for GFS/OCFS2 you want that the
-    # startup script promotes it to primary. Nodenames are also
-    # possible instead of "both".
-    # become-primary-on both;
-  }
-
-  disk {
-    # if the lower level device reports io-error you have the choice of
-    #  "pass_on"  ->  Report the io-error to the upper layers.
-    #                 Primary   -> report it to the mounted file system.
-    #                 Secondary -> ignore it.
-    #  "call-local-io-error"
-    #	          ->  Call the script configured by the name "local-io-error".
-    #  "detach"   ->  The node drops its backing storage device, and
-    #                 continues in disk less mode.
-    #
-    on-io-error   detach;
-
-    # Controls the fencing policy, default is "dont-care". Before you
-    # set any policy you need to make sure that you have a working
-    # fence-peer handler. Possible values are:
-    #  "dont-care"     -> Never call the fence-peer handler. [ DEFAULT ]
-    #  "resource-only" -> Call the fence-peer handler if we primary and
-    #			  loose the connection to the secondary. As well
-    #			  whenn a unconnected secondary wants to become
-    #			  primary.
-    #  "resource-and-stonith"
-    #                  -> Calls the fence-peer handler and freezes local
-    #                     IO immediately after loss of connection. This is
-    #			  necessary if your heartbeat can STONITH the other
-    #                     node.
-    # fencing resource-only;
-
-    # In case you only want to use a fraction of the available space
-    # you might use the "size" option here.
-    #
-    # size 10G;
-
-    # In case you are sure that your storage subsystem has battery
-    # backed up RAM and you know from measurements that it really honors
-    # flush instructions by flushing data out from its non volatile
-    # write cache to disk, you have double security. You might then
-    # reduce this to single security by disabling disk flushes with
-    # this option. It might improve performance in this case.
-    # ONLY USE THIS OPTION IF YOU KNOW WHAT YOU ARE DOING.
-    # no-disk-flushes;
-    # no-md-flushes;
-
-    # In some special circumstances the device mapper stack manages to
-    # pass BIOs to DRBD that violate the constraints that are set forth
-    # by DRBD's merge_bvec() function and which have more than one bvec.
-    # A known example is:
-    # phys-disk -> DRBD -> LVM -> Xen -> missaligned partition (63) -> DomU FS
-    # Then you might see "bio would need to, but cannot, be split:" in
-    # the Dom0's kernel log.
-    # The best workaround is to proper align the partition within
-    # the VM (E.g. start it at sector 1024). (Costs 480 KiByte of storage)
-    # Unfortunately the default of most Linux partitioning tools is
-    # to start the first partition at an odd number (63). Therefore
-    # most distribution's install helpers for virtual linux machines will
-    # end up with missaligned partitions.
-    # The second best workaround is to limit DRBD's max bvecs per BIO
-    # (= max-bio-bvecs) to 1. (Costs performance).
-    # max-bio-bvecs 1;
-  }
-
-  net {
-    # this is the size of the tcp socket send buffer
-    # increase it _carefully_ if you want to use protocol A over a
-    # high latency network with reasonable write throughput.
-    # defaults to 2*65535; you might try even 1M, but if your kernel or
-    # network driver chokes on that, you have been warned.
-    # sndbuf-size 512k;
-
-    # timeout       60;    #  6 seconds  (unit = 0.1 seconds)
-    # connect-int   10;    # 10 seconds  (unit = 1 second)
-    # ping-int      10;    # 10 seconds  (unit = 1 second)
-    # ping-timeout   5;    # 500 ms (unit = 0.1 seconds)
-
-    # Maximal number of requests (4K) to be allocated by DRBD.
-    # The minimum is hardcoded to 32 (=128 kByte).
-    # For high performance installations it might help if you
-    # increase that number. These buffers are used to hold
-    # datablocks while they are written to disk.
-    #
-    # max-buffers     2048;
-
-    # When the number of outstanding requests on a standby (secondary)
-    # node exceeds bdev-threshold, we start to kick the backing device
-    # to start its request processing. This is an advanced tuning
-    # parameter to get more performance out of capable storage controlers.
-    # Some controlers like to be kicked often, other controlers
-    # deliver better performance when they are kicked less frequently.
-    # Set it to the value of max-buffers to get the least possible
-    # number of run_task_queue_disk() / q->unplug_fn(q) calls.
-    #
-    # unplug-watermark   128;
-
-
-    # The highest number of data blocks between two write barriers.
-    # If you set this < 10 you might decrease your performance.
-    # max-epoch-size  2048;
-
-    # if some block send times out this many times, the peer is
-    # considered dead, even if it still answers ping requests.
-    # ko-count 4;
-
-    # If you want to use OCFS2/openGFS on top of DRBD enable
-    # this optione, and only enable it if you are going to use
-    # one of these filesystems. Do not enable it for ext2,
-    # ext3,reiserFS,XFS,JFS etc...
-    # allow-two-primaries;
-
-    # This enables peer authentication. Without this everybody
-    # on the network could connect to one of your DRBD nodes with
-    # a program that emulates DRBD's protocoll and could suck off
-    # all your data.
-    # Specify one of the kernel's digest algorithms, e.g.:
-    # md5, sha1, sha256, sha512, wp256, wp384, wp512, michael_mic ...
-    # an a shared secret.
-    # Authentication is only done once after the TCP connection
-    # is establised, there are no disadvantages from using authentication,
-    # therefore I suggest to enable it in any case.
-    # cram-hmac-alg "sha1";
-    # shared-secret "FooFunFactory";
-
-    # In case the nodes of your cluster nodes see each other again, after
-    # an split brain situation in which both nodes where primary
-    # at the same time, you have two diverged versions of your data.
-    #
-    # In case both nodes are secondary you can control DRBD's
-    # auto recovery strategy by the "after-sb-0pri" options. The
-    # default is to disconnect.
-    #    "disconnect" ... No automatic resynchronisation, simply disconnect.
-    #    "discard-younger-primary"
-    #                     Auto sync from the node that was primary before
-    #                     the split brain situation happened.
-    #    "discard-older-primary"
-    #                     Auto sync from the node that became primary
-    #                     as second during the split brain situation.
-    #    "discard-least-changes"
-    #                     Auto sync from the node that touched more
-    #                     blocks during the split brain situation.
-    #    "discard-node-NODENAME"
-    #                     Auto sync _to_ the named node.
-    after-sb-0pri disconnect;
-
-    # In one of the nodes is already primary, then the auto-recovery
-    # strategie is controled by the "after-sb-1pri" options.
-    #    "disconnect" ... always disconnect
-    #    "consensus"  ... discard the version of the secondary if the outcome
-    #                     of the "after-sb-0pri" algorithm would also destroy
-    #                     the current secondary's data. Otherwise disconnect.
-    #    "violently-as0p" Always take the decission of the "after-sb-0pri"
-    #                     algorithm. Even if that causes case an erratic change
-    #		          of the primarie's view of the data.
-    #                     This is only usefull if you use an 1node FS (i.e.
-    #		          not OCFS2 or GFS) with the allow-two-primaries
-    #		          flag, _AND_ you really know what you are doing.
-    #		          This is DANGEROUS and MAY CRASH YOUR MACHINE if you
-    #		          have a FS mounted on the primary node.
-    #    "discard-secondary"
-    #                     discard the version of the secondary.
-    #    "call-pri-lost-after-sb"  Always honour the outcome of the "after-sb-0pri"
-    #                     algorithm. In case it decides the the current
-    #                     secondary has the right data, it panics the
-    #                     current primary.
-    #    "suspend-primary" ???
-    after-sb-1pri disconnect;
-
-    # In case both nodes are primary you control DRBD's strategy by
-    # the "after-sb-2pri" option.
-    #    "disconnect" ... Go to StandAlone mode on both sides.
-    #    "violently-as0p" Always take the decission of the "after-sb-0pri".
-    #    "call-pri-lost-after-sb" ... Honor the outcome of the "after-sb-0pri"
-    #                     algorithm and panic the other node.
-
-    after-sb-2pri disconnect;
-
-    # To solve the cases when the outcome of the resync descissions is
-    # incompatible to the current role asignment in the cluster.
-    #    "disconnect" ... No automatic resynchronisation, simply disconnect.
-    #    "violently" .... Sync to the primary node is allowed, violating the
-    #	                  assumption that data on a block device is stable
-    #		          for one of the nodes. DANGEROUS, DO NOT USE.
-    #    "call-pri-lost"  Call the "pri-lost" helper program on one of the
-    #	                  machines. This program is expected to reboot the
-    #                     machine. (I.e. make it secondary.)
-    rr-conflict disconnect;
-
-    # DRBD-0.7's behaviour is equivalent to
-    #   after-sb-0pri discard-younger-primary;
-    #   after-sb-1pri consensus;
-    #   after-sb-2pri disconnect;
-
-    # DRBD can ensure the data integrity of the user's data on the network
-    # by comparing hash values. 
-    # Note: Normally this is ensured by the 16 bit checksums in the headers 
-    # of TCP/IP packets. Unforunately it turned out that GBit NICs with 
-    # various offloading engines might produce valid checksums for corrupted 
-    # data. Use this option during your pre-production tests, usually you
-    # want to turn it off for production to reduce CPU overhead.
-    # Note2: If data blocks that gets written to disk are changed while the
-    # transfer goes on cause false positives. Known block device users which
-    # do so are the swap code and ReiserFS
-    # data-integrity-alg "md5";
-
-    # DRBD usually uses the TCP socket option TCP_CORK to hint to the network
-    # stack when it can expect more data, and when it should flush out what it
-    # has in its send queue. It turned out that there is at lease one network
-    # stack that performs worse when one uses this hinting method. Therefore
-    # we introducted this option, which disable the setting and clearing of
-    # the TCP_CORK socket option by DRBD.
-    # no-tcp-cork;
-  }
-
-  syncer {
-    # Limit the bandwith used by the resynchronisation process.
-    # default unit is kByte/sec; optional suffixes K,M,G are allowed.
-    #
-    # Even though this is a network setting, the units are based
-    # on _byte_ (octet for our french friends) not bit.
-    # We are storage guys.
-    #
-    # Note that on 100Mbit ethernet, you cannot expect more than
-    # 12.5 MByte total transfer rate.
-    # Consider using GigaBit Ethernet.
-    #
-    rate 10M;
-
-    # Normally all devices are resynchronized parallel.
-    # To achieve better resynchronisation performance you should resync
-    # DRBD resources which have their backing storage on one physical
-    # disk sequentially. The express this use the "after" keyword.
-    after "r2";
-
-    # Configures the size of the active set. Each extent is 4M,
-    # 257 Extents ~> 1GB active set size. In case your syncer
-    # runs @ 10MB/sec, all resync after a primary's crash will last
-    # 1GB / ( 10MB/sec ) ~ 102 seconds ~ One Minute and 42 Seconds.
-    # BTW, the hash algorithm works best if the number of al-extents
-    # is prime. (To test the worst case performace use a power of 2)
-    al-extents 257;
-
-    # Sets the CPU affinity mask of DRBD's threads. Might be of interest
-    # for advanced performance tuning.
-    # cpu-mask 15;
-  }
-
-  on amd {
-    device     /dev/drbd0;
-    disk       /dev/hde5;
-    address    192.168.22.11:7788;
-    flexible-meta-disk  internal;
-
-    # meta-disk is either 'internal' or '/dev/ice/name [idx]'
-    #
-    # You can use a single block device to store meta-data
-    # of multiple DRBD's.
-    # E.g. use meta-disk /dev/hde6[0]; and meta-disk /dev/hde6[1];
-    # for two different resources. In this case the meta-disk
-    # would need to be at least 256 MB in size.
-    #
-    # 'internal' means, that the last 128 MB of the lower device
-    # are used to store the meta-data.
-    # You must not give an index with 'internal'.
-  }
-
-  on alf {
-    device    /dev/drbd0;
-    disk      /dev/hdc5;
-    address   192.168.22.12:7788;
-    meta-disk internal;
-  }
-}
-
-#
-# yes, you may also quote the resource name.
-# but don't include whitespace, unless you mean it :)
-#
-resource "r1" {
-  protocol C;
-  startup {
-    wfc-timeout         0;  ## Infinite!
-    degr-wfc-timeout  120;  ## 2 minutes.
-  }
-  disk {
-    on-io-error detach;
-  }
-  net {
-    # timeout           60;
-    # connect-int       10;
-    # ping-int          10;
-    # max-buffers     2048;
-    # max-epoch-size  2048;
-  }
-  syncer {
-  }
-
-  # It is valid to move device, disk and meta-disk to the
-  # resource level.
-  device	/dev/drbd1;
-  disk		/dev/hde6;
-  meta-disk	/dev/somewhere [7];
-
-  on amd {
-    # Here is an example of ipv6.
-    # If you want to use ipv4 in ipv6 i.e. something like [::ffff:192.168.22.11]
-    # you have to set disable-ip-verification in the global section.
-    address	ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5c9a]:7789;
-  }
-
-  on alf {
-    address     ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5ebe]:7789;
-  }
-}
-
-resource r2 {
-  protocol C;
-
-  startup { wfc-timeout 0; degr-wfc-timeout 120; }
-  disk    { on-io-error detach; }
-  net     { timeout 60; connect-int 10; ping-int 10;
-            max-buffers 2048; max-epoch-size 2048; }
-  syncer  { rate 4M; } # sync when r0 and r1 are finished syncing.
-  on amd {
-    address 192.168.22.11:7790;
-    disk /dev/hde7; device /dev/drbd2; meta-disk "internal";
-  }
-  on alf {
-    device "/dev/drbd2"; disk "/dev/hdc7"; meta-disk "internal";
-    address 192.168.22.12:7790;
-  }
-}
-
-resource r3 {
-  protocol	C;
-  device	/dev/drbd3;
-
-  on amd {
-    disk	/dev/hde8;
-    address	192.168.22.11:7791;
-    meta-disk	internal;
-  }
-  on alf {
-    disk	/dev/hdc8;
-    address	192.168.22.12:7791;
-    meta-disk	/some/where[8];
-  }
-}
-
-resource r4 {
-  protocol	C;
-  device	minor 4;
-
-  on amd {
-    disk	/dev/hde9;
-    address	192.168.22.11:7792;
-    meta-disk	internal;
-  }
-  on alf {
-    disk	/dev/hdc9;
-    address	192.168.22.12:7792;
-    meta-disk	/some/where[9];
-  }
-}
-
-resource lower-alice-bob {
-  protocol	C;
-  on alice {
-    device	/dev/drbd4;
-    disk	/dev/hde9;
-    address	192.168.23.11:7791;
-    meta-disk	internal;
-  }
-  on bob {
-    device	/dev/drbd4;
-    disk	/dev/hdc9;
-    address	192.168.23.12:7791;
-    meta-disk	/some/where[8];
-  }
-}
-
-resource lower-charly-daisy {
-  protocol	C;
-  on charly {
-    device	/dev/drbd4;
-    disk	/dev/hde9;
-    address	192.168.23.13:7791;
-    meta-disk	internal;
-  }
-  on daisy {
-    device	/dev/drbd4;
-    disk	/dev/hdc9;
-    address	192.168.23.14:7791;
-    meta-disk	/some/where[8];
-  }
-}
-
-resource upper {
-  protocol	A;
-  stacked-on-top-of lower-alice-bob {
-    device	/dev/drbd10;
-    address	127.0.0.1:1230;
-    proxy on alic bob {
-     inside     127.0.0.1:1234;
-     outside	192.168.23.21:7791;
-    }
-  }
-
-  stacked-on-top-of lower-charly-daisy {
-    device	/dev/drbd10;
-    address	127.0.0.1:1230;
-    proxy on charly daisy {
-     inside     127.0.0.1:1234;
-     outside	192.168.23.22:7791;
-    }
-  }
+resource example {
+	options {
+		on-no-data-accessible suspend-io;
+	}
+
+	net {
+		cram-hmac-alg "sha1";
+		shared-secret "secret_string";
+	}
+
+	# The disk section is possible on resource level and in each
+	# volume section
+	disk {
+		# If you have a resonable RAID controller
+		# with non volatile write cache (BBWC, flash)
+		disk-flushes no;
+		disk-barrier no;
+		md-flushes no;
+	}
+
+	# volume sections on resource level, are inherited to all node
+	# sections. Place it here if the backing devices have the same
+	# device names on all your nodes.
+	volume 1 {
+		device minor 1;
+		disk /dev/sdb1;
+		meta-disk internal;
+
+		disk {
+			resync-after example/0;
+		}
+	}
+
+	on wurzel {
+		address	192.168.47.1:7780;
+
+		volume 0 {
+		       device minor 0;
+		       disk /dev/vg_wurzel/lg_example;
+		       meta-disk /dev/vg_wurzel/lv_example_md;
+		}
+	}
+	on sepp {
+		address	192.168.47.2:7780;
+
+		volume 0 {
+		       device minor 0;
+		       disk /dev/vg_sepp/lg_example;
+		       meta-disk /dev/vg_sepp/lv_example_md;
+		}
+	}
+}
+
+resource "ipv6_example_res" {
+	net {
+		cram-hmac-alg "sha1";
+		shared-secret "ieho4CiiUmaes6Ai";
+	}
+
+	volume 2 {
+		device	"/dev/drbd_fancy_name" minor 0;
+		disk	/dev/vg0/example2;
+		meta-disk internal;
+	}
+
+	on amd {
+		# Here is an example of ipv6.
+		# If you want to use ipv4 in ipv6 i.e. something like [::ffff:192.168.22.11]
+		# you have to set disable-ip-verification in the global section.
+		address	ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5c9a]:7789;
+	}
+
+	on alf {
+		address ipv6 [fd0c:39f4:f135:305:230:48ff:fe63:5ebe]:7789;
+	}
+}
+
+
+#
+# A two volume setup with a node for disaster recovery in an off-site location.
+#
+
+resource alpha-bravo {
+	net {
+		cram-hmac-alg "sha1";
+		shared-secret "Gei6mahcui4Ai0Oh";
+	}
+
+	on alpha {
+		volume 0 {
+			device minor 0;
+			disk /dev/foo;
+			meta-disk /dev/bar;
+		}
+		volume 1 {
+			device minor 1;
+			disk /dev/foo1;
+			meta-disk /dev/bar1;
+		}
+		address	192.168.23.21:7780;
+	}
+	on bravo {
+		volume 0 {
+			device minor 0;
+			disk /dev/foo;
+			meta-disk /dev/bar;
+		}
+		volume 1 {
+			device minor 1;
+			disk /dev/foo1;
+			meta-disk /dev/bar1;
+		}
+		address	192.168.23.22:7780;
+	}
+}
+
+resource stacked_multi_volume {
+	net {
+		protocol A;
+
+		on-congestion pull-ahead;
+		congestion-fill 400M;
+		congestion-extents 1000;
+	}
+
+	disk {
+		c-fill-target 10M;
+	}
+
+	volume 0 { device minor 10; }
+	volume 1 { device minor 11; }
+
+	proxy {
+		memlimit 500M;
+		plugin {
+			lzma contexts 4 level 9;
+		}
+	}
+
+	stacked-on-top-of alpha-bravo {
+		address	192.168.23.23:7780;
+
+		proxy on charly {
+		      # In the regular production site, there is a dedicated host to run
+		      # DRBD-proxy
+		      inside    192.168.23.24:7780; # for connections to DRBD
+		      outside   172.16.17.18:7780; # for connections over the WAN or VPN
+		}
+
+	}
+	on delta {
+		volume 0 {
+			device minor 0;
+			disk /dev/foo;
+			meta-disk /dev/bar;
+		}
+		volume 1 {
+			device minor 1;
+			disk /dev/foo1;
+			meta-disk /dev/bar1;
+		}
+		address	127.0.0.2:7780;
+
+		proxy on delta {
+			# In the DR-site the proxy runs on the machine that stores the data
+			inside 127.0.0.1:7780;
+			outside 172.16.17.19:7780;
+		}
+	}
 }
diff -Nru drbd8-8.3.7/scripts/drbd.gentoo drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.gentoo
--- drbd8-8.3.7/scripts/drbd.gentoo	2008-11-24 10:43:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.gentoo	2012-02-02 14:09:14.000000000 +0000
@@ -5,6 +5,12 @@
 # Original script adapted to gentoo environment
 
 
+# I so do not see why gentoo would need it's own init script.
+# But if you think it does, well, you get to fix it.
+# See what we do in the generic one.
+echo "FIXME, contributers welcome. This is broken for 8.4" >&2
+exit 255
+
 depend() {
 	use logger
 	need net
diff -Nru drbd8-8.3.7/scripts/drbd.ocf drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.ocf
--- drbd8-8.3.7/scripts/drbd.ocf	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.ocf	2012-02-02 14:09:14.000000000 +0000
@@ -34,6 +34,7 @@
 # OCF instance parameters
 #	OCF_RESKEY_drbd_resource
 #	OCF_RESKEY_drbdconf
+#	OCF_RESKEY_stop_outdates_secondary
 #	OCF_RESKEY_CRM_meta_clone_max
 #	OCF_RESKEY_CRM_meta_clone_node_max
 #	OCF_RESKEY_CRM_meta_master_max
@@ -42,13 +43,39 @@
 #######################################################################
 # Initialization:
 
-. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
-export LANG=C LANGUAGE=C LC_ALL=C
+# Resource-agents have moved their ocf-shellfuncs file around.
+# There are supposed to be symlinks or wrapper files in the old location,
+# pointing to the new one, but people seem to get it wrong all the time.
+# Try several locations.
+
+if test -n "${OCF_FUNCTIONS_DIR}" ; then
+	if test -e "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs" ; then
+		. "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs"
+	elif test -e "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs" ; then
+		. "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs"
+	fi
+else
+	if test -e "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs" ; then
+		. "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs"
+	elif test -e "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"; then
+		. "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"
+	fi
+fi
 
 # Defaults
 OCF_RESKEY_drbdconf_default="/etc/drbd.conf"
 
+# The passed in OCF_CRM_meta_notify_* environment
+# is not reliably with pacemaker up to at least
+# 1.0.10 and 1.1.4. It should be fixed later.
+# Until that is fixed, the "self-outdating feature" would base its actions on
+# wrong information, and possibly not outdate when it should, or, even worse,
+# outdate the last remaining valid copy.
+# Disable.
+OCF_RESKEY_stop_outdates_secondary_default="false"
+
 : ${OCF_RESKEY_drbdconf:=${OCF_RESKEY_drbdconf_default}}
+: ${OCF_RESKEY_stop_outdates_secondary:=${OCF_RESKEY_stop_outdates_secondary_default}}
 
 # Defaults according to "Configuration 1.0 Explained",
 # "Multi-state resource configuration options"
@@ -84,15 +111,18 @@
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="drbd">
-<version>1.1</version>
+<version>1.3</version>
 
 <longdesc lang="en">
-Master/Slave OCF Resource Agent for DRBD
+This resource agent manages a DRBD resource as a master/slave resource.
+DRBD is a shared-nothing replicated storage device.
+Note that you should configure resource level fencing in DRBD,
+this cannot be done from this resource agent.
+See the DRBD User's Guide for more information.
+http://www.drbd.org/docs/applications/
 </longdesc>
 
-<shortdesc lang="en">This resource agent manages a DRBD resource
-as a master/slave resource. DRBD is a shared-nothing replicated storage
-device.</shortdesc>
+<shortdesc lang="en">Manages a DRBD device as a Master/Slave resource</shortdesc>
 
 <parameters>
 <parameter name="drbd_resource" unique="1" required="1">
@@ -110,6 +140,27 @@
 <shortdesc lang="en">Path to drbd.conf</shortdesc>
 <content type="string" default="${OCF_RESKEY_drbdconf_default}"/>
 </parameter>
+
+<parameter name="stop_outdates_secondary">
+<longdesc lang="en">
+Recommended setting: until pacemaker is fixed, leave at default (disabled).
+
+Note that this feature depends on the passed in information in
+OCF_RESKEY_CRM_meta_notify_master_uname to be correct, which unfortunately is
+not reliable for pacemaker versions up to at least 1.0.10 / 1.1.4.
+
+If a Secondary is stopped (unconfigured), it may be marked as outdated in the
+drbd meta data, if we know there is still a Primary running in the cluster.
+Note that this does not affect fencing policies set in drbd config,
+but is an additional safety feature of this resource agent only.
+You can enable this behaviour by setting the parameter to true.
+
+If this feature seems to not do what you expect, make sure you have defined
+fencing policies in the drbd configuration as well.
+</longdesc>
+<shortdesc lang="en">outdate a secondary on stop</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_stop_outdates_secondary_default}"/>
+</parameter>
 </parameters>
 
 <actions>
@@ -118,8 +169,8 @@
 <action name="demote"	timeout="90" />
 <action name="notify"	timeout="90" />
 <action name="stop"    timeout="100" />
-<action name="monitor" depth="0"  timeout="20" interval="20" start-delay="1m" role="Slave" />
-<action name="monitor" depth="0"  timeout="20" interval="10" start-delay="1m" role="Master" />
+<action name="monitor" depth="0"  timeout="20" interval="20" role="Slave" />
+<action name="monitor" depth="0"  timeout="20" interval="10" role="Master" />
 <action name="meta-data"  timeout="5" />
 <action name="validate-all"  timeout="30" />
 </actions>
@@ -150,11 +201,19 @@
 }
 
 do_drbdadm() {
+	local ret
 	# Run drbdadm with appropriate command line options, and capture
 	# its output.
 	# $DRBDADM is defined during drbd_validate as "drbdadm" plus
 	# appropriate command line options
 	do_cmd $DRBDADM "$@"
+	ret=$?
+
+	# having the version mismatch warning once per RA invokation
+	# should be enough.
+	export DRBD_DONT_WARN_ON_VERSION_MISMATCH=
+
+	return $ret
 }
 
 set_master_score() {
@@ -168,11 +227,16 @@
 }
 
 _sh_status_process() {
-	DRBD_ROLE_LOCAL=${_role:-Unconfigured}
-	DRBD_ROLE_REMOTE=${_peer:-Unknown}
-	DRBD_CSTATE=$_cstate
-	DRBD_DSTATE_LOCAL=${_disk:-Unconfigured}
-	DRBD_DSTATE_REMOTE=${_pdsk:-DUnknown}
+	# _volume not present should not happen,
+	# but may help make this agent work even if it talks to drbd 8.3.
+	: ${_volume:=0}
+	# not-yet-created volumes are reported as -1
+	(( _volume >= 0 )) || _volume=$[1 << 16]
+	DRBD_ROLE_LOCAL[$_volume]=${_role:-Unconfigured}
+	DRBD_ROLE_REMOTE[$_volume]=${_peer:-Unknown}
+	DRBD_CSTATE[$_volume]=$_cstate
+	DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured}
+	DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
 }
 drbd_set_status_variables() {
 	# drbdsetup sh-status prints these values to stdout,
@@ -180,44 +244,22 @@
 	#
 	# if we eval that, we do not need several drbdadm/drbdsetup commands
 	# to figure out the various aspects of the state.
-	#
-	# also avoid the "state" -> "role" transition from drbd 8.2 -> 8.3.
-	# drbdadm --version was only introduced in 8.3.2.  so for older drbd
-	# versions we fall back to old-style state,dstate,cstate calls.
-	local _minor _res_name _known _cstate _role _peer _disk _pdisk
+	local _minor _res_name _known _cstate _role _peer _disk _pdsk
+	local _volume
 	local _flags_susp _flags_aftr_isp _flags_peer_isp _flags_user_isp
-	local _resynced_percent sh_status_done X
+	local _resynced_percent
 
 	# NOT local! but "return values"
-	DRBD_ROLE_LOCAL=Unconfigured
-	DRBD_ROLE_REMOTE=Unknown
-	DRBD_CSTATE=Unconfigured
-	DRBD_DSTATE_LOCAL=Unconfigured
-	DRBD_DSTATE_REMOTE=DUnknown
-
-	if $DRBDADM_HAS_SH_STATUS; then
-		# Populates a set of variables relevant to DRBD's status
-		eval "$($DRBDSETUP "$DRBD_DEVICE" sh-status)"
-	else
-		# redirect stderr on state, as it may print a deprecation
-		# warning on 8.3.0 and 8.3.1.
-		# not redirect on cstate, in case drbdsetup actually wants
-		# to complain about something!
-		if _cstate=$(do_cmd $DRBDSETUP "$DRBD_DEVICE" cstate) &&
-		   _role=$($DRBDSETUP "$DRBD_DEVICE" state 2>/dev/null) &&
-		   _disk=$($DRBDSETUP "$DRBD_DEVICE" dstate 2>/dev/null)
-		then
-			DRBD_CSTATE=${_cstate:-Unconfigured}
-			if [[ $_role = */* ]] ; then
-				DRBD_ROLE_LOCAL=${_role%%/*}
-				DRBD_ROLE_REMOTE=${_role##*/}
-			fi
-			if [[ $_disk = */* ]]; then
-				DRBD_DSTATE_LOCAL=${_disk%%/*}
-				DRBD_DSTATE_REMOTE=${_disk##*/}
-			fi
-		fi
-	fi
+	# since 8.4 supports multi volumes per resource,
+	# these are shell arrays
+	DRBD_ROLE_LOCAL=(Unconfigured)
+	DRBD_ROLE_REMOTE=(Unknown)
+	DRBD_CSTATE=(Unconfigured)
+	DRBD_DSTATE_LOCAL=(Unconfigured)
+	DRBD_DSTATE_REMOTE=(DUnknown)
+
+	# Populates a set of variables relevant to DRBD's status
+	eval "$($DRBDSETUP "$DRBD_RESOURCE" sh-status)"
 }
 
 # This is not the only fencing mechanism.
@@ -228,16 +270,24 @@
 # called after stop, and from post notification events.
 maybe_outdate_self()
 {
+	# if you claim your right to go online with stale data,
+	# there you are.
+	ocf_is_true $OCF_RESKEY_stop_outdates_secondary || return 1
+
 	local host stop_uname
-	for host in $OCF_RESKEY_CRM_meta_notify_master_uname \
-		$OCF_RESKEY_CRM_meta_notify_promote_uname \
-		""
-	do
-		[[ $host == "$HOSTNAME" ]] || break
-	done
+	# We ignore $OCF_RESKEY_CRM_meta_notify_promote_uname here
+	# because: if demote and promote for a _stacked_ resource
+	# (or a "floating" one, where DRBD sits on top of some SAN)
+	# happen in the same transition, demote will see the promote
+	# hostname here, and voluntarily outdate itself. Which would
+	# result in promote failure, as it is using the same meta
+	# data, which would then be outdated.
+	# If that is not sufficient for you, you probably need to
+	# configure fencing policies in the drbd configuration.
+	host=$(printf "%s\n" $OCF_RESKEY_CRM_meta_notify_master_uname |
+		grep -vix -m1 -e "$HOSTNAME" )
 	if [[ -z $host ]] ; then
-		# no current nor future master host found
-		# do not outdate myself
+		# no current master host found, do not outdate myself
 		return 1
 	fi
 	for stop_uname in $OCF_RESKEY_CRM_meta_notify_stop_uname; do
@@ -251,22 +301,26 @@
 	# e.g. post/promote of some other peer.
 	# Should not happen, fencing constraints should take care of that.
 	# But in case it does, scream out loud.
-	if [[ $DRBD_ROLE_LOCAL == Primary ]] && [[ $OCF_RESKEY_CRM_meta_master_max = 2 ]]; then
-		# I am Primary. The other one is Primary.
-		# But we cannot talk to each other :(
-		# one of us has to die.
-		# which one, however, is not ours to decide.
+	case "${DRBD_ROLE_LOCAL[*]}" in
+	*Primary*)
+		# I am Primary.
+		# The other one is Primary (according to OCF_RESKEY_CRM_meta_notify_master_uname).
+		# But we cannot talk to each other :( (otherwise this function was not called)
+		# One of us has to die.
+		# Which one, however, is not ours to decide.
 
 		ocf_log crit "resource internal SPLIT BRAIN: both $HOSTNAME and $host are Primary for $DRBD_RESOURCE, but the replication link is down!"
 		return 1
-	fi
+	esac
 
-	# OK, I am not Primary, but there is an other node Primary (or about to be promoted)
+	# OK, I am not Primary, but there is an other node Primary
 	# Outdate myself
+	ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master"
 	do_drbdadm outdate $DRBD_RESOURCE
 
-	# currently, -INFINITY may cause resource instance stop/start.  But in
-	# this case that is ok, it may even clear the replication link problem.
+	# on some pacemaker versions, -INFINITY may cause resource instance stop/start.
+	# But in this case that is ok, it may even clear the replication link
+	# problem.
 	set_master_score -INFINITY
 
 	return 0
@@ -288,19 +342,28 @@
 	#     5: please, do not promote, unless this is your only option.
 	#    10: promotion is probably a bad idea, our local data is no good,
 	#        you'd probably run into severe performance problems, and risk
-	#        application crashes in case you lose the replication
-	#        connection.
+	#        application crashes or blocking IO in case you lose the
+	#        replication connection.
 	#  1000: Ok to be promoted, we have good data locally (though we don't
 	#        know about the peer, so possibly it has even better data?).
 	#        You sould use the crm-fence-peer.sh handler or similar
 	#        mechanism to avoid data divergence.
-	# 10000: Please promote me.
+	# 10000: Please promote me/keep me Primary.
 	#        I'm confident that my data is as good as it gets.
 	#
-	: == DEBUG == $DRBD_ROLE_LOCAL/$DRBD_DSTATE_LOCAL/$DRBD_DSTATE_REMOTE ==
-	case $DRBD_ROLE_LOCAL/$DRBD_DSTATE_LOCAL/$DRBD_DSTATE_REMOTE in
-	Secondary/UpToDate/DUnknown)
-		# We don't know about our peer.
+	: == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]}/${DRBD_DSTATE_REMOTE[*]}/ ==
+	# For multi volume, we need to compare who is "better" a bit more sophisticated.
+	# The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space,
+	# if all are UpToDate.
+	case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate}/${DRBD_DSTATE_REMOTE[*]//UpToDate}/ in
+	*Primary*/\ /*/)
+		# I am Primary, all local disks are UpToDate
+		set_master_score 10000
+		;;
+	*/\ /*DUnknown*/)
+		# all local disks are UpToDate,
+		# but I'm not Primary,
+		# and I'm not sure about the peer's disk state(s).
 		# We may need to outdate ourselves?
 		# But if we outdate in a MONITOR, and are disconnected
 		# secondary because of a hard primary crash, before CRM noticed
@@ -310,36 +373,37 @@
 		# its fence-peer handler callback.
 		set_master_score  1000
 		;;
-
-	*/UpToDate/*)
+	*/\ /*/)
 		# We know something about our peer, which means that either the
 		# replication link is established, or it was not even
 		# consistent last time we talked to each other.
-		# Also our local disk is UpToDate, which means even if we are
+		# Also all our local disks are UpToDate, which means even if we are
 		# currently synchronizing, we do so as SyncSource.
 		set_master_score 10000
 		;;
 
-	*/*/UpToDate)
-		# Our local disk is not up to date.
-		# But our peer is OK.
+	*/*/\ /)
+		# At least one of our local disks is not up to date.
+		# But our peer is ALL OK.
 		# We can expect to have access to useful
 		# data, but must expect degraded performance.
 		set_master_score 10
 		;;
-	*/Consistent/*)
-		# Our local disk is Consistent.
-		# It _may_ be up to date, or not.
-		# We hope that fencing mechanisms have put constraints in
-		# place, so we won't be promoted with stale data.
-		# But in case this was a cluster crash,
-		# at least allow _someone_ to be promoted.
-		set_master_score 5
-		;;
-	*)
-		# Our local disk is not up to date.
-		# Our peer is not available.
-		# We have no access to useful data.
+	*/*Attaching*/*/|*/*Negotiating*/*/)
+		# some transitional state.
+		# just don't do anything
+		: ;;
+
+	*/*Diskless*/*/|*/*Failed*/*/|*/*Inconsistent*/*/|*/*Outdated*/*/)
+		# ALWAYS put the cluster in MAINTENANCE MODE
+		# if you add a volume to a live replication group,
+		# because the new volume will typically come up as Inconsistent
+		# the first time, which would cause a monitor to revoke the
+		# master score!
+		#
+		# At least some of our local disks are not really useable.
+		# Our peer is not all good either (or some previous case block
+		# would have matched).  We have no access to useful data.
 		# DRBD would refuse to be promoted, anyways.
 		#
 		# set_master_score -INFINITY
@@ -347,6 +411,16 @@
 		# negative master scores cause instance stop; restart cycle :(
 		# Hope that this will suffice.
 		remove_master_score
+		;;
+	*)
+		# All local disks seem to be Consistent.
+		# They _may_ be up to date, or not.
+		# We hope that fencing mechanisms have put constraints in
+		# place, so we won't be promoted with stale data.
+		# But in case this was a cluster crash,
+		# at least allow _someone_ to be promoted.
+		set_master_score 5
+		;;
 	esac
 
 	return $OCF_SUCCESS
@@ -367,33 +441,45 @@
 
 drbd_status() {
 	local rc
+	local dev
 	rc=$OCF_NOT_RUNNING
 
-	if ! is_drbd_enabled || ! [ -b "$DRBD_DEVICE" ]; then
-		return $rc
-	fi
+	is_drbd_enabled || return $rc
 
-	# ok, module is loaded, block device node exists.
-	# lets see its status
+	# Not running, if no block devices exist.
+	#
+	# FIXME what if some do, and some do not exist?
+	# Adding/removing volumes to/from existing resources should only be
+	# done with maintenance-mode enabled.
+	# If someone does manually kill/remove only some of the volumes,
+	# we tolerate that here.
+	for dev in ${DRBD_DEVICES[@]} ""; do
+		test -b $dev && break
+	done
+	[[ $dev ]] || return $rc
+
+	# ok, module is loaded, block device nodes exist.
+	# lets see the status
 	drbd_set_status_variables
-	case "${DRBD_ROLE_LOCAL}" in
-	Primary)
+	case "${DRBD_ROLE_LOCAL[*]}" in
+	*Primary*)
 		rc=$OCF_RUNNING_MASTER
 		;;
-	Secondary)
+	*Secondary*)
 		rc=$OCF_SUCCESS
 		;;
-	Unconfigured)
+	*Unconfigured*)
 		rc=$OCF_NOT_RUNNING
 		;;
 	*)
-		ocf_log err "Unexpected role ${DRBD_ROLE_LOCAL}"
+		ocf_log err "Unexpected role(s) >>${DRBD_ROLE_LOCAL[*]}<<"
 		rc=$OCF_ERR_GENERIC
 	esac
 
 	return $rc
 }
 
+# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet.
 drbd_monitor() {
 	local status
 
@@ -407,28 +493,46 @@
 
 figure_out_drbd_peer_uname()
 {
-	if ! $DRBDADM_HAS_MULTI_PEER; then
-		DRBD_TO_PEER="" # old drbdadm, no --peer option
-		return
-	fi
 	# depending on whether or not the peer is currently
 	# configured, slave, master, or about to be started,
 	# it may be mentioned in various variables (or not at all)
 	local x
 	# intentionally not cared for stop_uname
-	for x in \
+	x=$(printf "%s\n" \
 		$OCF_RESKEY_CRM_meta_notify_start_uname \
 		$OCF_RESKEY_CRM_meta_notify_promote_uname \
 		$OCF_RESKEY_CRM_meta_notify_master_uname \
 		$OCF_RESKEY_CRM_meta_notify_slave_uname \
-		$OCF_RESKEY_CRM_meta_notify_demote_uname \
-		""
-	do
-		[[ $x == "$HOSTNAME" ]] || break
-	done
+		$OCF_RESKEY_CRM_meta_notify_demote_uname |
+		grep -vix -m1 -e "$HOSTNAME" )
 	DRBD_TO_PEER=${x:+ --peer $x}
 }
 
+my_udevsettle()
+{
+	for dev in ${DRBD_DEVICES[@]}; do
+		while ! test -b $dev; do
+			sleep 1;
+		done
+	done
+	return 0
+}
+create_device_udev_settle() {
+	local dev
+	if $DRBD_HAS_MULTI_VOLUME; then
+		if do_drbdadm new-resource $DRBD_RESOURCE &&
+		   do_drbdadm sh-new-minor $DRBD_RESOURCE; then
+			my_udevsettle
+		else
+			return 1
+		fi
+	elif do_drbdadm syncer $DRBD_RESOURCE ; then
+		my_udevsettle
+	else
+		return 1
+	fi
+}
+
 drbd_start() {
 	local rc
 	local status
@@ -438,7 +542,7 @@
 
 	if ! is_drbd_enabled; then
 		do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || {
-			ocf_log err "Cannot load the drbd module."$'\n';
+			ocf_log err "Cannot load the drbd module.";
 			return $OCF_ERR_INSTALLED
 		}
 		ocf_log debug "$DRBD_RESOURCE start: Module loaded."
@@ -451,12 +555,34 @@
 		status=$?
 		case "$status" in
 		$OCF_SUCCESS)
+			# Just in case we have to adjust something, this is a
+			# good place to do it.  Actually, we don't expect to be
+			# called to "start" an already "running" resource, so
+			# this is probably dead code.
+			# Also, ignore the exit code of adjust, as we are
+			# "running" already, anyways, right?
+			figure_out_drbd_peer_uname
+			do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE
 			rc=$OCF_SUCCESS
 			break
 			;;
 		$OCF_NOT_RUNNING)
+			# Check for offline resize. If using internal meta data,
+			# we may need to move it first to its expected location.
+			$first_try && do_drbdadm check-resize $DRBD_RESOURCE
 			figure_out_drbd_peer_uname
-			do_drbdadm $DRBD_TO_PEER up $DRBD_RESOURCE
+			if ! create_device_udev_settle; then
+				# We cannot even create the objects
+				exit $OCF_ERR_GENERIC
+			fi
+			if ! do_drbdadm $DRBD_TO_PEER attach $DRBD_RESOURCE ; then
+				# If we cannot up it, even on the second try,
+				# it is unlikely to get better.  Don't wait for
+				# this operation to timeout, but short circuit
+				# exit with generic error.
+				$first_try || exit $OCF_ERR_GENERIC
+				sleep 1
+			fi
 			;;
 		$OCF_RUNNING_MASTER)
 			ocf_log warn "$DRBD_RESOURCE already Primary, demoting."
@@ -556,6 +682,12 @@
 			do_drbdadm down $DRBD_RESOURCE
 			;;
 		$OCF_NOT_RUNNING)
+			# Just in case, down it anyways, in case it has been
+			# deconfigured but not yet removed.
+			# Relevant for >= 8.4.
+			do_drbdadm down $DRBD_RESOURCE
+			# But ignore any return codes,
+			# we are not running, so stop is successfull.
 			rc=$OCF_SUCCESS
 			break
 			;;
@@ -615,13 +747,9 @@
 		drbd_set_status_variables
 		drbd_update_master_score
 
-		: == DEBUG == $DRBD_DSTATE_REMOTE ==
-		case $DRBD_DSTATE_REMOTE in
-		Outdated)
-			# Known bad, and not communicating.
-			# Nothing else to do.
-			:;;
-		DUnknown)
+		: == DEBUG == ${DRBD_DSTATE_REMOTE[*]} ==
+		case ${DRBD_DSTATE_REMOTE[*]} in
+		*DUnknown*)
 			# Still not communicating.
 			# Maybe someone else is primary (too)?
 			maybe_outdate_self
@@ -664,27 +792,19 @@
 drbd_validate_all () {
 	DRBDADM="drbdadm"
 	DRBDSETUP="drbdsetup"
+	DRBD_HAS_MULTI_VOLUME=false
 
+	# these will _exit_ if they don't find the binaries
 	check_binary $DRBDADM
 	check_binary $DRBDSETUP
 	# XXX I really take cibadmin, sed, grep, etc. for granted.
 
-	# This resource agent uses some features introduced in drbd 8.3.2.
-	# check with drbdadm --version, which is one of the new features ;)
 	local VERSION DRBDADM_VERSION_CODE=0
 	if VERSION="$($DRBDADM --version 2>/dev/null)"; then
 		eval $VERSION
 	fi
-	if (( $DRBDADM_VERSION_CODE >= 0x080302 )); then
-		DRBDADM_HAS_SH_STATUS=true
-		DRBDADM_HAS_MULTI_PEER=true
-	else
-		ocf_log warn "You may be disappointed: This RA is intended for DRBD 8.3.2 or better!"
-		#return $OCF_ERR_INSTALLED
-		# actually, sh-status is available for >= 8.2.7
-		# but that did not yet support drbdadm --version.
-		DRBDADM_HAS_SH_STATUS=false
-		DRBDADM_HAS_MULTI_PEER=false
+	if (( $DRBDADM_VERSION_CODE >= 0x080400 )); then
+		DRBD_HAS_MULTI_VOLUME=true
 	fi
 	check_crm_feature_set
 
@@ -736,10 +856,15 @@
 
 	# The resource should appear in the config file,
 	# otherwise something's fishy
-	if DRBD_DEVICE=$($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null); then
+	# NOTE
+	# since 8.4 has multi volume support,
+	# DRBD_DEVICES will be a shell array!
+	# FIXME we should double check that we explicitly restrict the set of
+	# valid characters in device names...
+	if DRBD_DEVICES=($($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null)); then
 		# apparently a "stacked" resource. Remember for future DRBDADM calls.
 		DRBDADM="$DRBDADM -S"
-	elif DRBD_DEVICE=$($DRBDADM sh-dev $DRBD_RESOURCE); then
+	elif DRBD_DEVICES=($($DRBDADM sh-dev $DRBD_RESOURCE 2>/dev/null)); then
 		: # nothing to do.
 	else
 		if [[ $__OCF_ACTION = "monitor" && $OCF_RESKEY_CRM_meta_interval = 0 ]]; then
@@ -755,21 +880,12 @@
 		fi
 	fi
 
-	# very special paranoia: someone edited the config file, and changed
-	# the device name, while drbd was configured and up.  try to recover!
-	# if you change the resource name, or the minor number, while the
-	# former config was active, result are "undefined", anyways.
-	if ! [ -b $DRBD_DEVICE ] &&
-	   ls_stat_is_block_maj_147 /dev/drbd/by-res/$DRBD_RESOURCE; then
-		DRBD_DEVICE=/dev/drbd/by-res/$DRBD_RESOURCE
-	fi
-
 	# check for master-max and allow-two-primaries on start|promote only,
 	# so it could be stopped still, if someone re-configured while running.
 	case $__OCF_ACTION:$OCF_RESKEY_CRM_meta_master_max in
 	start:2|promote:2)
 		if ! $DRBDADM -d -v dump $DRBD_RESOURCE 2>/dev/null |
-			grep '^[[:space:]]*allow-two-primaries;$'
+			grep -q -Ee '^[[:space:]]*allow-two-primaries([[:space:]]+yes)?;$'
 		then
 			ocf_log err "master-max = 2, but DRBD resource $DRBD_RESOURCE does not allow-two-primaries."
 			return $OCF_ERR_CONFIGURED
@@ -782,7 +898,11 @@
 	monitor|validate-all)
 		:;;
 	*)
-		if [[ ${OCF_RESKEY_CRM_meta_notify_start_uname- NOT SET } = " NOT SET " ]]; then
+		# Test if the environment variables for either the notify
+		# enabled, or one of its effects, are set.
+		# If both are unset, we complain.
+		if ! ocf_is_true ${OCF_RESKEY_CRM_meta_notify} &&
+		   [[ ${OCF_RESKEY_CRM_meta_notify_start_uname- NOT SET } = " NOT SET " ]]; then
 			ocf_log err "you really should enable notify when using this RA"
 			return $OCF_ERR_CONFIGURED
 		fi
diff -Nru drbd8-8.3.7/scripts/drbd.sh.rhcs drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.sh.rhcs
--- drbd8-8.3.7/scripts/drbd.sh.rhcs	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbd.sh.rhcs	2012-02-02 14:09:14.000000000 +0000
@@ -54,10 +54,11 @@
 
       # Is the backing device a locally available block device?
       backing_dev=$(drbdadm sh-ll-dev $OCF_RESKEY_resource)
-      if [ ! -b $backing_dev ]; then
-  	ocf_log error "Backing device for DRBD resource \"$OCF_RESKEY_resource\" ($backing_dev) not found or not a block device."
-  	return $OCF_ERR_INSTALLED
-      fi
+      for dev in $backing_dev ; do
+	[ -b $dev ] && continue;
+	ocf_log error "Backing device for DRBD resource \"$OCF_RESKEY_resource\" ($dev) not found or not a block device."
+	return $OCF_ERR_INSTALLED
+      done
 
     fi
 
@@ -65,17 +66,21 @@
 }
 
 drbd_status() {
-    role=$(drbdadm role $OCF_RESKEY_resource)
-    case $role in
-	Primary/*)
-	    return $OCF_RUNNING
-	    ;;
-	Secondary/*)
-	    return $OCF_NOT_RUNNING
-	    ;;
-
-    esac
-    return $OCF_ERR_GENERIC
+    local all_primary=true
+    for role in $(drbdadm role $OCF_RESKEY_resource); do
+	case $role in
+	    Primary/*)
+		;;
+	    Secondary/*)
+		all_primary=false
+		;;
+	    *)
+		return $OCF_ERR_GENERIC
+		;;
+	esac
+    done
+    $all_primary && return $OCF_SUCCESS
+    return $OCF_NOT_RUNNING
 }
 
 drbd_promote() {
diff -Nru drbd8-8.3.7/scripts/drbdadm.bash_completion drbd8-8.4.1+git55a81dc~cmd1/scripts/drbdadm.bash_completion
--- drbd8-8.3.7/scripts/drbdadm.bash_completion	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbdadm.bash_completion	2012-02-02 14:09:14.000000000 +0000
@@ -40,6 +40,9 @@
 	local res
 	for res in $resources; do
 		local resource_status="$(${DRBDADM} $status_type $res 2>/dev/null)"
+		# In case of multiple volumes, consider only the first line
+		set -- $resource_status
+		resource_status=$1
 		local i
 		for i in $status_filter; do
 			if [ "${resource_status%%/*}" = $i ]; then
@@ -52,25 +55,32 @@
 
 __drbdadm_commands() {
 	# Lists drbdadm sub-commands
-	local commands='attach detach connect disconnect up down primary secondary invalidate invalidate-remote outdate verify syncer pause-sync resume-sync resize adjust wait-connect role state cstate dstate dump wait-connect wait-con-int create-md dump-md wipe-md get-gi show-gi help hidden-commands'
-	COMPREPLY=( $(compgen -W "$commands" -- "$current") )
+	COMPREPLY=( $(compgen -W "$drbdadm_command_list" -- "$current") )
 }
 
 __drbdadm_options() {
 	# Lists global drbdadm options
-	local options='-d --dry-run -v --verbose -S --stacked'
+	local options='-d --dry-run -v --verbose -S --stacked -t --config-to-test'
 	COMPREPLY=( $(compgen -W "$options" -- "$current") )
 }
 
-__drbdadm_drbdsetup_options() {
-	# Lists those drbdadm options that are in fact options for drbdsetup,
-	# and which are passed though using "--" syntax
-	local drbdsetup_options='-D --discard-my-data -o --overwrite-data-of-peer'
-	COMPREPLY=( $(compgen -W "$drbdsetup_options" -- "$current") )
+__drbdadm_subcmd_options() {
+	local subcmd="$1"
+	local options=($(drbdadm help $subcmd | sed -e '1,/OPTIONS FOR/ d;/^$/,$ d;s/  \(--[a-z-]*\).*/\1/'))
+	local filtered
+	local o have
+	for o in ${options[@]}; do
+		for have in ${COMP_WORDS[@]}; do
+			[[ $o = "$have" ]] && continue 2
+		done
+		filtered="$filtered $o"
+	done
+	COMPREPLY=( $(compgen -W "$filtered" -- "$current") )
 }
 
 _drbdadm() {
 	local DRBDADM=${COMP_WORDS[0]}
+	local drbdadm_command_list=' attach disk-options detach connect net-options disconnect up resource-options down primary secondary invalidate invalidate-remote outdate verify pause-sync resume-sync resize adjust wait-connect role cstate dstate dump wait-connect wait-con-int create-md dump-md wipe-md get-gi show-gi help apply-al hidden-commands '
 
 	# Redefine the drbdadm we use in __drbdadm_all_resources and
 	# __drbdadm_resources_by_status, if running in stacked mode
@@ -79,7 +89,7 @@
 		DRBDADM="$DRBDADM --stacked"
 		;;
 	esac
-	
+
 	local current previous
 	# The word currently being evaluated for completion
 	current=${COMP_WORDS[COMP_CWORD]}
@@ -97,31 +107,19 @@
 					;;
 			esac
 			;;
-		--)
-			__drbdadm_drbdsetup_options
-			;;
-		-D|--discard-my-data)
-			COMPREPLY=( $(compgen -W "connect" -- "$current") )
-			;;
-		-o|--overwrite-data-of-peer)
-			COMPREPLY=( $(compgen -W "primary" -- "$current") )
-			;;
-		-*)
-			__drbdadm_commands
-			;;
 		primary)
 			__drbdadm_resources_by_status "role" "Secondary"
 			;;
 		secondary)
 			__drbdadm_resources_by_status "role" "Primary"
 			;;
-		detach)
+		detach|disk-options)
 			__drbdadm_resources_by_status "dstate" "UpToDate" "Inconsistent" "Outdated"
 			;;
 		outdate)
 			__drbdadm_resources_by_status "dstate" "UpToDate"
 			;;
-		attach|up)
+		attach|apply-al)
 			__drbdadm_resources_by_status "dstate" "Diskless" "Unconfigured"
 			;;
 		connect)
@@ -130,7 +128,7 @@
 		invalidate-remote)
 			__drbdadm_resources_by_status "cstate" "Connected"
 			;;
-		disconnect)
+		disconnect|net-options)
 			__drbdadm_resources_by_status "cstate" "Connected" "WFConnection" "VerifyT" "VerifyS"
 			;;
 		verify)
@@ -142,8 +140,18 @@
 		resume-sync)
 			__drbdadm_resources_by_status "cstate" "PausedSyncS" "PausedSyncT"
 			;;
-		*) 
-			__drbdadm_all_resources
+		*)
+			if (( COMP_CWORD > 2 )); then
+				local subcmd
+				subcmd=${COMP_WORDS[1]}
+				case "$drbdadm_command_list" in
+				    *" $subcmd "*)
+					__drbdadm_subcmd_options $subcmd
+					;;
+				esac
+			else
+				__drbdadm_all_resources
+			fi
 			;;
 	esac
 }
diff -Nru drbd8-8.3.7/scripts/drbddisk drbd8-8.4.1+git55a81dc~cmd1/scripts/drbddisk
--- drbd8-8.3.7/scripts/drbddisk	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbddisk	2012-02-02 14:09:14.000000000 +0000
@@ -32,6 +32,34 @@
 #	LSB-Core-generic/LSB-Core-generic/iniscrptact.html
 ####
 
+drbd_set_role_from_proc_drbd()
+{
+	local out
+	if ! test -e /proc/drbd; then
+		ROLE="Unconfigured"
+		return
+	fi
+
+	dev=$( $DRBDADM sh-dev $RES )
+	minor=${dev#/dev/drbd}
+	if [[ $minor = *[!0-9]* ]] ; then
+		# sh-minor is only supported since drbd 8.3.1
+		minor=$( $DRBDADM sh-minor $RES )
+	fi
+	if [[ -z $minor ]] || [[ $minor = *[!0-9]* ]] ; then
+		ROLE=Unknown
+		return
+	fi
+
+	if out=$(sed -ne "/^ *$minor: cs:/ { s/:/ /g; p; q; }" /proc/drbd); then
+		set -- $out
+		ROLE=${5%/*}
+		: ${ROLE:=Unconfigured} # if it does not show up
+	else
+		ROLE=Unknown
+	fi
+}
+
 case "$CMD" in
     start)
 	# try several times, in case heartbeat deadtime
@@ -44,28 +72,19 @@
 	done
 	;;
     stop)
-	$DRBDADM secondary $RES
-	ex=$?
-	case $ex in
-	0)
-		exit 0
-		;;
-	11)
-		# see drbdadm_main.c adm_generic and m_system
-		# as well as drbdsetup.c:
-		# in fact a role change was attempted, but failed.
-		echo >&2 "$DRBDADM secondary $RES: exit code $ex, mapping to 1"
-		exit 1 # LSB generic error
-		;;
-	*)
-		# other error, may be syntax error in config file,
-		# anything else: to not confuse heartbeat further,
-		# and avoid reboot due so "failed stop recovery",
-		# pretend that we succeeded in stopping this.
-		echo >&2 "$DRBDADM secondary $RES: exit code $ex, mapping to 0"
-		exit 0
-		;;
-	esac
+	# heartbeat (haresources mode) will retry failed stop
+	# for a number of times in addition to this internal retry.
+	try=3
+	while true; do
+		$DRBDADM secondary $RES && break
+		# We used to lie here, and pretend success for anything != 11,
+		# to avoid the reboot on failed stop recovery for "simple
+		# config errors" and such. But that is incorrect.
+		# Don't lie to your cluster manager.
+		# And don't do config errors...
+		let --try || exit 1 # LSB generic error
+		sleep 1
+	done
 	;;
     status)
 	if [ "$RES" = "all" ]; then
@@ -73,21 +92,37 @@
 	    exit 10
 	fi
 	ST=$( $DRBDADM role $RES )
-	STATE=${ST%/*}
-	case $STATE in
+	ROLE=${ST%/*}
+	case $ROLE in
+	Primary|Secondary|Unconfigured)
+		# expected
+		;;
+	*)
+		# unexpected. whatever...
+		# If we are unsure about the state of a resource, we need to
+		# report it as possibly running, so heartbeat can, after failed
+		# stop, do a recovery by reboot.
+		# drbdsetup may fail for obscure reasons, e.g. if /var/lock/ is
+		# suddenly readonly.  So we retry by parsing /proc/drbd.
+		drbd_set_role_from_proc_drbd
+	esac
+	case $ROLE in
 		Primary)
 			echo "running (Primary)"
 			exit 0 # LSB status "service is OK"
 			;;
 		Secondary|Unconfigured)
-			echo "stopped ($STATE)" ;;
-		"")
-			echo "stopped" ;;
+			echo "stopped ($ROLE)"
+			exit 3 # LSB status "service is not running"
+			;;
 		*)
-			# unexpected. whatever...
-			echo "stopped ($ST)" ;;
+			# NOTE the "running" in below message.
+			# this is a "heartbeat" resource script,
+			# the exit code is _ignored_.
+			echo "cannot determine status, may be running ($ROLE)"
+			exit 4 #  LSB status "service status is unknown"
+			;;
 	esac
-	exit 3 # LSB status "service is not running"
 	;;
     *)
 	echo "Usage: drbddisk [resource] {start|stop|status}"
diff -Nru drbd8-8.3.7/scripts/drbdupper drbd8-8.4.1+git55a81dc~cmd1/scripts/drbdupper
--- drbd8-8.3.7/scripts/drbdupper	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/drbdupper	2012-02-02 14:09:14.000000000 +0000
@@ -34,6 +34,7 @@
     start)
 	set -e # exit if one of these fails
 	$DRBDADM primary `$DRBDADM -S sh-lr-of $RES`
+	$DRBDADM -S check-resize $RES || true # may fail
 	$DRBDADM -S adjust $RES
 	$DRBDADM -S wait-connect $RES || true # may fail
 	$DRBDADM -S primary $RES
diff -Nru drbd8-8.3.7/scripts/get_uts_release.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/get_uts_release.sh
--- drbd8-8.3.7/scripts/get_uts_release.sh	2008-11-24 10:43:33.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/get_uts_release.sh	2012-02-02 14:09:14.000000000 +0000
@@ -1,6 +1,6 @@
 #!/bin/bash
 {
-    for x in include/linux/{utsrelease,version}.h;
+    for x in include/generated/utsrelease.h include/linux/{utsrelease,version}.h;
     do
         for d in $KDIR $O;
         do
diff -Nru drbd8-8.3.7/scripts/global_common.conf drbd8-8.4.1+git55a81dc~cmd1/scripts/global_common.conf
--- drbd8-8.3.7/scripts/global_common.conf	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/global_common.conf	2012-02-02 14:09:14.000000000 +0000
@@ -4,8 +4,6 @@
 }
 
 common {
-	protocol C;
-
 	handlers {
 		pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
 		pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
@@ -18,21 +16,27 @@
 	}
 
 	startup {
-		# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb;
+		# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
 	}
 
-	disk {
-		# on-io-error fencing use-bmbv no-disk-barrier no-disk-flushes
-		# no-disk-drain no-md-flushes max-bio-bvecs   
+	options {
+		# cpu-mask on-no-data-accessible
 	}
 
-	net {
-		# snd‐buf-size rcvbuf-size timeout connect-int ping-int ping-timeout max-buffers
-		# max-epoch-size ko-count allow-two-primaries cram-hmac-alg shared-secret
-		# after-sb-0pri after-sb-1pri after-sb-2pri data-integrity-alg no-tcp-cork
+	disk {
+		# size max-bio-bvecs on-io-error fencing disk-barrier disk-flushes
+		# disk-drain md-flushes resync-rate resync-after al-extents
+                # c-plan-ahead c-delay-target c-fill-target c-max-rate
+                # c-min-rate disk-timeout
 	}
 
-	syncer {
-		# rate after al-extents use-rle cpu-mask verify-alg csums-alg
+	net {
+		# protocol timeout max-epoch-size max-buffers unplug-watermark
+		# connect-int ping-int sndbuf-size rcvbuf-size ko-count
+		# allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri
+		# after-sb-1pri after-sb-2pri always-asbp rr-conflict
+		# ping-timeout data-integrity-alg tcp-cork on-congestion
+		# congestion-fill congestion-extents csums-alg verify-alg
+		# use-rle
 	}
 }
diff -Nru drbd8-8.3.7/scripts/notify.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/notify.sh
--- drbd8-8.3.7/scripts/notify.sh	2009-08-26 13:27:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/notify.sh	2012-02-02 14:09:14.000000000 +0000
@@ -7,7 +7,14 @@
 # try to get possible output on stdout/err to syslog
 PROG=${0##*/}
 exec > >(2>&- ; logger -t "$PROG[$$]" -p local5.info) 2>&1
-echo "invoked for $DRBD_RESOURCE"
+
+if [[ $DRBD_VOLUME ]]; then
+	pretty_print="$DRBD_RESOURCE/$DRBD_VOLUME (drbd$DRBD_MINOR)"
+else
+	pretty_print="$DRBD_RESOURCE"
+fi
+
+echo "invoked for $pretty_print"
 
 # Default to sending email to root, unless otherwise specified
 RECIPIENT=${1:-root}
@@ -32,45 +39,45 @@
 
 case "$0" in
 	*split-brain.sh)
-		SUBJECT="DRBD split brain on resource $DRBD_RESOURCE"
+		SUBJECT="DRBD split brain on resource $pretty_print"
 		BODY="
-DRBD has detected split brain on resource $DRBD_RESOURCE
+DRBD has detected split brain on resource $pretty_print
 between $DRBD_LOCAL_HOST and $DRBD_PEER.
 Please rectify this immediately.
 Please see http://www.drbd.org/users-guide/s-resolve-split-brain.html for details on doing so."
 		;;
 	*out-of-sync.sh)
-		SUBJECT="DRBD resource $DRBD_RESOURCE has out-of-sync blocks"
+		SUBJECT="DRBD resource $pretty_print has out-of-sync blocks"
 		BODY="
-DRBD has detected out-of-sync blocks on resource $DRBD_RESOURCE
+DRBD has detected out-of-sync blocks on resource $pretty_print
 between $DRBD_LOCAL_HOST and $DRBD_PEER.
 Please see the system logs for details."
 		;;
     *io-error.sh)
-		SUBJECT="DRBD resource $DRBD_RESOURCE detected a local I/O error"
+		SUBJECT="DRBD resource $pretty_print detected a local I/O error"
 		BODY="
-DRBD has detected an I/O error on resource $DRBD_RESOURCE
+DRBD has detected an I/O error on resource $pretty_print
 on $DRBD_LOCAL_HOST.
 Please see the system logs for details."
 		;;
-	*pri-lost)
-		SUBJECT="DRBD resource $DRBD_RESOURCE is currently Primary, but is to become SyncTarget on $DRBD_LOCAL_HOST"
+	*pri-lost.sh)
+		SUBJECT="DRBD resource $pretty_print is currently Primary, but is to become SyncTarget on $DRBD_LOCAL_HOST"
 		BODY="
-The DRBD resource $DRBD_RESOURCE is currently in the Primary
+The DRBD resource $pretty_print is currently in the Primary
 role on host $DRBD_LOCAL_HOST, but lost the SyncSource election
 process."
 		;;
-	*pri-lost-after-sb)
-		SUBJECT="DRBD resource $DRBD_RESOURCE is currently Primary, but lost split brain auto recovery on $DRBD_LOCAL_HOST"
+	*pri-lost-after-sb.sh)
+		SUBJECT="DRBD resource $pretty_print is currently Primary, but lost split brain auto recovery on $DRBD_LOCAL_HOST"
 		BODY="
-The DRBD resource $DRBD_RESOURCE is currently in the Primary
+The DRBD resource $pretty_print is currently in the Primary
 role on host $DRBD_LOCAL_HOST, but was selected as the split
 brain victim in a post split brain auto-recovery."
 		;;
 	*pri-on-incon-degr.sh)
-		SUBJECT="DRBD resource $DRBD_RESOURCE no longer has access to valid data on $DRBD_LOCAL_HOST"
+		SUBJECT="DRBD resource $pretty_print no longer has access to valid data on $DRBD_LOCAL_HOST"
 		BODY="
-DRBD has detected that the resource $DRBD_RESOURCE
+DRBD has detected that the resource $pretty_print
 on $DRBD_LOCAL_HOST has lost access to its backing device,
 and has also lost connection to its peer, $DRBD_PEER.
 This resource now no longer has access to valid data."
@@ -93,7 +100,7 @@
 		SUBJECT="Unspecified DRBD notification"
 		BODY="
 DRBD on $DRBD_LOCAL_HOST was configured to launch a notification handler
-for resource $DRBD_RESOURCE,
+for resource $pretty_print,
 but no specific notification event was set.
 This is most likely due to DRBD misconfiguration.
 Please check your configuration file ($DRBD_CONF)."
diff -Nru drbd8-8.3.7/scripts/patch-kernel drbd8-8.4.1+git55a81dc~cmd1/scripts/patch-kernel
--- drbd8-8.3.7/scripts/patch-kernel	2009-11-25 09:06:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/patch-kernel	1970-01-01 00:00:00.000000000 +0000
@@ -1,175 +0,0 @@
-#!/bin/bash
-#
-# Create a patch against a kernel tree which adds the DRBD sources.
-#
-# original script:
-# Copyright (C) 2003 Kees Cook, OSDL
-# kees@osdl.org, http://developer.osdl.org/kees/
-#
-# heavily modified:
-# Copyright (C) 2003-2008 LINBIT Information Technologies GmbH, Lars Ellenberg
-# lars@linbit.com, http://www.linbit.com
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-# http://www.gnu.org/copyleft/gpl.html
-#
-
-set -o errexit
-fatal() { echo >&2 "$*" ; exit 1; }
-
-KERNEL=$1
-  DRBD=$2
-    [[ $3 == full ]] && INCREMENT=false || INCREMENT=true
-
-#
-# naive sanity checks
-#
-
-test -n "$KERNEL" -a -n "$DRBD" ||
-	fatal "Usage: kernel-patch KERNEL_SOURCE_DIR DRBD_PACKAGE_DIR"
-test -d "$KERNEL/Documentation" ||
-	fatal "Please specify the kernel tree to patch"
-test -d "$DRBD/documentation"   ||
-	fatal "Please specify the drbd tree to use"
-
-# only allow 2.6
-KDIR_PATCHLEVEL=$(grep "^PATCHLEVEL = " $KERNEL/Makefile | cut -d " " -f 3)
-test "$KDIR_PATCHLEVEL" = 6 ||
-	fatal "wrong kernel version patchlevel ($KDIR_PATCHLEVEL), expected 6."
-
-test -e "$KERNEL/drivers/connector/Kconfig" ||
-	fatal "this won't work: your kernel lacks the connector :("
-#
-# convert to absolute pathnames
-# maybe even do a "test -L $d && readlink $d" first?
-# as a side effect this makes sure that KERNEL and DRBD are accessible
-# directories, and do not contain whitespace or something...
-#
-KERNEL=$(cd $KERNEL && pwd)
-KERNEL_BASE=$(basename $KERNEL)
-
-DRBD=$(cd $DRBD && pwd)
-DRBD_BASE=$KERNEL_BASE-drbd
-DRBD_SUB=$DRBD_BASE/drivers/block/drbd
-
-#
-# Seems that there are mktemp proggies out there that do not understand
-# the -d flag (reportedly Slackware 9.1); We do not need to waste
-# entropy anyways.
-#
-mymktemp() { umask 077 && mkdir "$1" && cd "$1" && pwd; }
-TEMPDIR=$(mymktemp /tmp/drbd-patch-$$) ||
-	fatal "Could Could not make temp directory"
-# cleanup on              0   1   2    3   13   15
-trap 'rm -r "$TEMPDIR"' EXIT HUP INT QUIT PIPE TERM
-
-#
-# Set up our work area
-#
-
-cd $TEMPDIR
-
-# Set up the diff directories
-for d in include/linux drivers/block/drbd arch/um ; do
-	mkdir -p $KERNEL_BASE/$d
-	mkdir -p $DRBD_BASE/$d
-done
-
-#
-# Pull in the base drbd source
-#
-include_linux="drbd.h drbd_limits.h drbd_nl.h drbd_tag_magic.h lru_cache.h"
-drbd_source="drbd_actlog.c drbd_bitmap.c drbd_buildtag.c
-	drbd_wrappers.h drbd_int.h drbd_main.c drbd_nl.c drbd_proc.c
-	drbd_receiver.c drbd_req.c drbd_req.h drbd_strings.c drbd_worker.c
-	lru_cache.c drbd_vli.h drbd_tracing.c drbd_tracing.h"
-
-# clean it first
-make -s -C $DRBD/drbd clean
-
-# FIXME drbd_config.h probably needs different treatment,
-# these should become Kconfig options or go away!
-cp -a $DRBD/drbd/linux/drbd_config.h	$DRBD_BASE/include/linux/
-for f in $include_linux; do
-	cp -a $DRBD/drbd/linux/$f	$DRBD_BASE/include/linux/
-done
-
-cp -a $DRBD/drbd/Kconfig		$DRBD_SUB
-for f in $drbd_source; do
-	cp -a $DRBD/drbd/$f		$DRBD_SUB
-done
-# we also need the in-kernel-tree Makefile
-# remove the trace of the connector backport
-sed -e '/^ifndef CONFIG_CONNECTOR/,/^$/d' \
-	< $DRBD/drbd/Makefile-2.6 > $DRBD_SUB/Makefile
-
-sed -e '/#ifndef DRBD_MAJOR/','/#endif/d' \
-	< $DRBD/drbd/drbd_int.h > $DRBD_SUB/drbd_int.h
-
-# and, in case this kernel was already patched:
-if test -e $KERNEL/include/linux/drbd.h ; then
-	$INCREMENT || fatal "drbd already in $KERNEL"
-	cp -a $KERNEL/include/linux/drbd*.h \
-	 $KERNEL_BASE/include/linux/
-	cp -a $KERNEL/drivers/block/drbd/{*.[ch],Makefile,Kconfig} \
-	 $KERNEL_BASE/drivers/block/drbd/
-	# remove dangling drbd.mod.c, if it hangs around
-	rm -f $KERNEL_BASE/drivers/block/drbd/drbd.mod.c
-fi
-
-# Bring over the current kernel Kconfig and Makefile
-for f in drivers/block/{Kconfig,Makefile} arch/um/Kconfig_block \
-		include/linux/major.h; do
-	test -e $KERNEL/$f || continue
-	cp -a $KERNEL/$f $KERNEL_BASE/$f
-	cp -a $KERNEL/$f $DRBD_BASE/$f
-done
-
-# Add drbd to the block drivers Makefile and Kconfig if we need to
-grep drbd/ $DRBD_BASE/drivers/block/Makefile >/dev/null || \
-	echo 'obj-$(CONFIG_BLK_DEV_DRBD)     += drbd/' >> \
-		$DRBD_BASE/drivers/block/Makefile \
-	|| exit 1
-
-patch_Kconfig() { sed -e '
-/^config BLK_DEV_NBD/,/^config /{
-	/^config BLK_DEV_NBD/i\
-source "drivers/block/drbd/Kconfig"\
-
-}'
-}
-for f in drivers/block/Kconfig arch/um/Kconfig_block ; do
-	test -e $KERNEL_BASE/$f || continue
-	grep drbd/ $DRBD_BASE/$f >/dev/null && continue
-	patch_Kconfig < $KERNEL_BASE/$f > $DRBD_BASE/$f
-done
-
-patch_major.h() { sed -e '/#define RTF_MAJOR/i\
-#define DRBD_MAJOR		147' < $KERNEL_BASE/$f > $DRBD_BASE/$f
-}
-f=include/linux/major.h
-patch_major.h < $KERNEL_BASE/$f > $DRBD_BASE/$f
-
-#
-# finally: Create diff!
-#
-if diff -uNrp $KERNEL_BASE $DRBD_BASE ; then
-	echo "$KERNEL already completely patched."
-	test /proc/$$/fd/1 -ef /proc/$$/fd/2 ||
-		echo >&2 "$KERNEL already completely patched."
-fi
-
-cd -
-## clean up done by trap on EXIT
diff -Nru drbd8-8.3.7/scripts/pretty-proc-drbd.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/pretty-proc-drbd.sh
--- drbd8-8.3.7/scripts/pretty-proc-drbd.sh	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/pretty-proc-drbd.sh	2012-02-02 14:09:14.000000000 +0000
@@ -52,26 +52,64 @@
 
 drbd_pretty_status()
 {
-	# add resource names
+    if ! $short ||
+       ! type column &> /dev/null ||
+       ! type paste &> /dev/null ||
+       ! type join &> /dev/null ||
+       ! type sed &> /dev/null ||
+       ! type tr &> /dev/null
+    then
+	cat /proc/drbd
+    else
+	sed -e '2q' < /proc/drbd
 	sed_script=$(
-	   (
-	   paste <(drbdadm sh-dev all) \
-		 <(drbdadm sh-resources| tr ' /' '\n_') ;
-	   paste <(drbdadm -S sh-dev all) \
-		 <(drbdadm -S sh-resources | tr ' /' '\n_' )
-	   ) | sed -e 's,^/dev/drbd,s/^ *,;s,\t,:/,;s,$, \&/;,')
-	sed -e "$sed_script;s/^ *\([0-9]\+:\)/??not-found?? \1/" < /proc/drbd |
-	if [[ $short == true ]]; then
-		sed -e '1,2d;/^$/d;/ns:.*nr:.*dw:/d;/resync:/d;/act_log:/d;' | column -t
-	else
-		sed -e 's/ cs:/\n    cs:/;'
-	fi |
+		i=0;
+		_sh_status_process() {
+			let i++ ;
+			stacked=${_stacked_on:+"^^${_stacked_on_minor:-${_stacked_on//[!a-zA-Z0-9_ -]/_}}"}
+			printf "s|^ *%u:|%6u\t&%s%s|\n" \
+				$_minor $i \
+				"${_res_name//[!a-zA-Z0-9_ -]/_}" "$stacked"
+		};
+		eval "$(drbdadm sh-status)" )
+
+	p() {
+		sed -e "1,2d" \
+		      -e "$sed_script" \
+		      -e '/^ *[0-9]\+: cs:Unconfigured/d;' \
+		      -e 's/^\(.* cs:.*[^ ]\)   \([rs]...\)$/\1 - \2/g' \
+		      -e 's/^\(.* \)cs:\([^ ]* \)st:\([^ ]* \)ds:\([^ ]*\)/\1\2\3\4/' \
+		      -e 's/^\(.* \)cs:\([^ ]* \)ro:\([^ ]* \)ds:\([^ ]*\)/\1\2\3\4/' \
+		      -e 's/^\(.* \)cs:\([^ ]*\)$/\1\2/' \
+		      -e 's/^ *[0-9]\+:/ x &??not-found??/;' \
+		      -e '/^$/d;/ns:.*nr:.*dw:/d;/resync:/d;/act_log:/d;' \
+		      -e 's/^\(.\[.*\)\(sync.ed:\)/... ... \2/;/^.finish:/d;' \
+		      -e 's/^\(.[0-9 %]*oos:\)/... ... \1/' \
+		      < "/proc/drbd" | tr -s '\t ' '  ' 
+	}
+	m() {
+		join -1 2 -2 1 -o 1.1,2.2,2.3 \
+			<( ( drbdadm sh-dev all ; drbdadm -S sh-dev all ) | cat -n | sort -k2,2) \
+			<(sort < /proc/mounts ) |
+			sort -n | tr -s '\t ' '  ' | sed -e 's/^ *//'
+	}
+	# echo "=== p ==="
+	# p
+	# echo "=== m ==="
+	# m
+	# echo "========="
+	# join -a1 <(p|sort) <(m|sort)
+	# echo "========="
+	(
+	echo m:res cs ro ds p mounted fstype
+	join -a1 <(p|sort) <(m|sort) | cut -d' ' -f2-6,8- | sort -k1,1n -k2,2
+	) | column -t
+    fi |
 	if [[ $colorize != true ]]; then
 		cat
 	else
 		c_bold=$'\e[1m' c_norm=$'\e[0m'
 		sed -e "
-$sed_script
 s/^??not-found??/$c_dsk_bad_1&$c_dsk_bad_0/g;
 s/^[^\t ]\+/$c_bold&$c_norm/;
 s/Primary/$c_pri_1&$c_pri_0/g;
diff -Nru drbd8-8.3.7/scripts/snapshot-resync-target-lvm.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/snapshot-resync-target-lvm.sh
--- drbd8-8.3.7/scripts/snapshot-resync-target-lvm.sh	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/snapshot-resync-target-lvm.sh	2012-02-02 14:09:14.000000000 +0000
@@ -3,7 +3,8 @@
 #  snapshot-resync-target-lvm.sh
 #  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
 #
-# The caller (drbdadm) sets DRBD_RESOURCE for us.
+# The caller (drbdadm) sets for us:
+# DRBD_RESOURCE, DRBD_VOLUME, DRBD_MINOR, DRBD_LL_DISK etc.
 #
 ###########
 #
@@ -11,15 +12,15 @@
 # exit code != 0. So be carefull with the exit code!
 #
 
-if [ -z "$DRBD_RESOURCE" ]; then
-	echo "DRBD_RESOURCE not set. This script is supposed to"
+if [ -z "$DRBD_RESOURCE" || -z "$DRBD_LL_DISK" ]; then
+	echo "DRBD_RESOURCE/DRBD_LL_DISK is not set. This script is supposed to"
 	echo "get called by drbdadm as a handler script"
 	exit 0
 fi
 
 PROG=$(basename $0)
 exec > >(exec 2>&- ; logger -t "$PROG[$$]" -p local5.info) 2>&1
-echo "invoked for $DRBD_RESOURCE"
+echo "invoked for $DRBD_RESOURCE/$DRBD_VOLUME (drbd$DRBD_MINOR)"
 
 TEMP=$(getopt -o p:a:nv --long percent:,additional:,disconnect-on-error,verbose -- "$@")
 
@@ -28,70 +29,68 @@
 	exit 0
 fi
 
-BACKING_BDEV=$(drbdadm sh-ll-dev $DRBD_RESOURCE)
-lvdisplay $BACKING_BDEV > /dev/null || exit 0 # not a LV
+lvdisplay "$DRBD_LL_DISK" > /dev/null || exit 0 # not a LV
 
 SNAP_PERC=10
 SNAP_ADDITIONAL=10240
 DISCONNECT_ON_ERROR=0
 LVC_OPTIONS=""
 BE_VERBOSE=0
-SNAP_NAME=${BACKING_BDEV##*/}-before-resync
+SNAP_NAME=${DRBD_LL_DISK##*/}-before-resync
 DEFAULTFILE="/etc/default/drbd-snapshot"
 
+if [ -f $DEFAULTFILE ]; then
+	. $DEFAULTFILE
+fi
+
+## command line parameters override default file
+
 eval set -- "$TEMP"
 while true; do
 	case $1 in
-		-p|--percent)
-			SNAP_PERC="$2"
-			shift
-			;;
-		-a|--additional)
-			SNAP_ADDITIONAL="$2"
-			shift 2
-			;;
-		-n|--disconnect-on-error)
-			DISCONNECT_ON_ERROR=1
-			shift
-			;;
-		-v|--verbose)
-			BE_VERBOSE=1
-			shift
-			;;
-		--)
-			shift
-			break
-			;;
+	-p|--percent)
+		SNAP_PERC="$2"
+		shift
+		;;
+	-a|--additional)
+		SNAP_ADDITIONAL="$2"
+		shift
+		;;
+	-n|--disconnect-on-error)
+		DISCONNECT_ON_ERROR=1
+		;;
+	-v|--verbose)
+		BE_VERBOSE=1
+		;;
+	--)
+		break
+		;;
 	esac
+	shift
 done
+shift # the --
 
 LVC_OPTIONS="$@"
 
-if [ -f $DEFAULTFILE ]; then
-	. $DEFAULTFILE
-fi
-
 if [[ $0 == *unsnapshot* ]]; then
 	[ $BE_VERBOSE = 1 ] && set -x
-	VG_PATH=${BACKING_BDEV%/*}
+	VG_PATH=${DRBD_LL_DISK%/*}
 	lvremove -f ${VG_PATH}/${SNAP_NAME}
 	exit 0
 else
 	(
 		set -e
 		[ $BE_VERBOSE = 1 ] && set -x
-		DRBD_DEV=$(drbdadm sh-dev $DRBD_RESOURCE)
-		DRBD_MINOR=${DRBD_DEV##/dev/drbd}
 		OUT_OF_SYNC=$(sed -ne "/^ *$DRBD_MINOR:/ "'{
 				n;
 				s/^.* oos:\([0-9]*\).*$/\1/;
 				s/^$/0/; # default if not found
 				p;
 				q; }' < /proc/drbd) # unit KiB
-		_BDS=$(blockdev --getsize64 $BACKING_BDEV)
+		_BDS=$(blockdev --getsize64 $DRBD_LL_DISK)
 		BACKING=$((_BDS / 1024)) # unit KiB
 		SNAP_SIZE=$((OUT_OF_SYNC + SNAP_ADDITIONAL + BACKING * SNAP_PERC / 100))
-		lvcreate -s -n $SNAP_NAME -L ${SNAP_SIZE}k $LVC_OPTIONS $BACKING_BDEV
+		lvcreate -s -n $SNAP_NAME -L ${SNAP_SIZE}k $LVC_OPTIONS $DRBD_LL_DISK
 	)
 	RV=$?
 	[ $DISCONNECT_ON_ERROR = 0 ] && exit 0
diff -Nru drbd8-8.3.7/scripts/unsnapshot-resync-target-lvm.sh drbd8-8.4.1+git55a81dc~cmd1/scripts/unsnapshot-resync-target-lvm.sh
--- drbd8-8.3.7/scripts/unsnapshot-resync-target-lvm.sh	2009-05-26 12:49:07.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/scripts/unsnapshot-resync-target-lvm.sh	2012-02-02 14:09:14.000000000 +0000
@@ -3,7 +3,8 @@
 #  snapshot-resync-target-lvm.sh
 #  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
 #
-# The caller (drbdadm) sets DRBD_RESOURCE for us.
+# The caller (drbdadm) sets for us:
+# DRBD_RESOURCE, DRBD_VOLUME, DRBD_MINOR, DRBD_LL_DISK etc.
 #
 ###########
 #
@@ -11,15 +12,15 @@
 # exit code != 0. So be carefull with the exit code!
 #
 
-if [ -z "$DRBD_RESOURCE" ]; then
-	echo "DRBD_RESOURCE not set. This script is supposed to"
+if [ -z "$DRBD_RESOURCE" || -z "$DRBD_LL_DISK" ]; then
+	echo "DRBD_RESOURCE/DRBD_LL_DISK is not set. This script is supposed to"
 	echo "get called by drbdadm as a handler script"
 	exit 0
 fi
 
 PROG=$(basename $0)
 exec > >(exec 2>&- ; logger -t "$PROG[$$]" -p local5.info) 2>&1
-echo "invoked for $DRBD_RESOURCE"
+echo "invoked for $DRBD_RESOURCE/$DRBD_VOLUME (drbd$DRBD_MINOR)"
 
 TEMP=$(getopt -o p:a:nv --long percent:,additional:,disconnect-on-error,verbose -- "$@")
 
@@ -28,70 +29,68 @@
 	exit 0
 fi
 
-BACKING_BDEV=$(drbdadm sh-ll-dev $DRBD_RESOURCE)
-lvdisplay $BACKING_BDEV > /dev/null || exit 0 # not a LV
+lvdisplay "$DRBD_LL_DISK" > /dev/null || exit 0 # not a LV
 
 SNAP_PERC=10
 SNAP_ADDITIONAL=10240
 DISCONNECT_ON_ERROR=0
 LVC_OPTIONS=""
 BE_VERBOSE=0
-SNAP_NAME=${BACKING_BDEV##*/}-before-resync
+SNAP_NAME=${DRBD_LL_DISK##*/}-before-resync
 DEFAULTFILE="/etc/default/drbd-snapshot"
 
+if [ -f $DEFAULTFILE ]; then
+	. $DEFAULTFILE
+fi
+
+## command line parameters override default file
+
 eval set -- "$TEMP"
 while true; do
 	case $1 in
-		-p|--percent)
-			SNAP_PERC="$2"
-			shift
-			;;
-		-a|--additional)
-			SNAP_ADDITIONAL="$2"
-			shift 2
-			;;
-		-n|--disconnect-on-error)
-			DISCONNECT_ON_ERROR=1
-			shift
-			;;
-		-v|--verbose)
-			BE_VERBOSE=1
-			shift
-			;;
-		--)
-			shift
-			break
-			;;
+	-p|--percent)
+		SNAP_PERC="$2"
+		shift
+		;;
+	-a|--additional)
+		SNAP_ADDITIONAL="$2"
+		shift
+		;;
+	-n|--disconnect-on-error)
+		DISCONNECT_ON_ERROR=1
+		;;
+	-v|--verbose)
+		BE_VERBOSE=1
+		;;
+	--)
+		break
+		;;
 	esac
+	shift
 done
+shift # the --
 
 LVC_OPTIONS="$@"
 
-if [ -f $DEFAULTFILE ]; then
-	. $DEFAULTFILE
-fi
-
 if [[ $0 == *unsnapshot* ]]; then
 	[ $BE_VERBOSE = 1 ] && set -x
-	VG_PATH=${BACKING_BDEV%/*}
+	VG_PATH=${DRBD_LL_DISK%/*}
 	lvremove -f ${VG_PATH}/${SNAP_NAME}
 	exit 0
 else
 	(
 		set -e
 		[ $BE_VERBOSE = 1 ] && set -x
-		DRBD_DEV=$(drbdadm sh-dev $DRBD_RESOURCE)
-		DRBD_MINOR=${DRBD_DEV##/dev/drbd}
 		OUT_OF_SYNC=$(sed -ne "/^ *$DRBD_MINOR:/ "'{
 				n;
 				s/^.* oos:\([0-9]*\).*$/\1/;
 				s/^$/0/; # default if not found
 				p;
 				q; }' < /proc/drbd) # unit KiB
-		_BDS=$(blockdev --getsize64 $BACKING_BDEV)
+		_BDS=$(blockdev --getsize64 $DRBD_LL_DISK)
 		BACKING=$((_BDS / 1024)) # unit KiB
 		SNAP_SIZE=$((OUT_OF_SYNC + SNAP_ADDITIONAL + BACKING * SNAP_PERC / 100))
-		lvcreate -s -n $SNAP_NAME -L ${SNAP_SIZE}k $LVC_OPTIONS $BACKING_BDEV
+		lvcreate -s -n $SNAP_NAME -L ${SNAP_SIZE}k $LVC_OPTIONS $DRBD_LL_DISK
 	)
 	RV=$?
 	[ $DISCONNECT_ON_ERROR = 0 ] && exit 0
diff -Nru drbd8-8.3.7/user/Makefile drbd8-8.4.1+git55a81dc~cmd1/user/Makefile
--- drbd8-8.3.7/user/Makefile	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/Makefile	2012-09-03 22:37:14.000000000 +0000
@@ -29,40 +29,45 @@
 BASH_COMPLETION_SUFFIX = 
 UDEV_RULE_SUFFIX = 
 INITDIR = /etc/init.d
-KDIR ?= 
 LIBDIR = /usr/lib/drbd
 CC = gcc
-CFLAGS = -Wall -g -O2
+CFLAGS := -Wall -g -O2
 LN_S = ln -s
 
 # features enabled or disabled by configure
 WITH_UTILS = yes
+WITH_LEGACY_UTILS = yes
 WITH_KM = no
 WITH_UDEV = yes
 WITH_XEN = yes
 WITH_PACEMAKER = yes
 WITH_HEARTBEAT = yes
-WITH_RGMANAGER = yes
+WITH_RGMANAGER = no
 WITH_BASHCOMPLETION = yes
 
 # variables meant to be overridden from the make command line
 DESTDIR ?= /
 
-CFLAGS += -I../drbd
+CFLAGS += -Wall -I../drbd -I../drbd/compat
+
+GENETLINK_H := /usr/include/linux/genetlink.h
+libgenl.o: CFLAGS += $(shell for w in CTRL_ATTR_VERSION CTRL_ATTR_HDRSIZE CTRL_ATTR_MCAST_GROUPS; do grep -qw $$w $(GENETLINK_H) && echo -DHAVE_$$w; done)
 
 drbdadm-obj = drbdadm_scanner.o drbdadm_parser.o drbdadm_main.o \
 	      drbdadm_adjust.o drbdtool_common.o drbdadm_usage_cnt.o \
-	      drbd_buildtag.o drbdadm_minor_table.o
+	      drbd_buildtag.o registry.o config_flags.o libgenl.o \
+	      drbd_nla.o
 
-drbdsetup-obj = drbdsetup.o drbdtool_common.o drbd_buildtag.o \
-	        drbd_strings.o
+drbdsetup-obj = libgenl.o registry.o drbdsetup.o drbdtool_common.o \
+		drbd_buildtag.o drbd_strings.o config_flags.o drbd_nla.o \
+		wrap_printf.o
 
 drbdmeta-obj = drbdmeta.o drbdmeta_scanner.o drbdtool_common.o drbd_buildtag.o
 
 all: tools
 
 ifeq ($(WITH_UTILS),yes)
-tools: drbdadm drbdmeta drbdsetup
+tools: drbdadm drbdmeta drbdsetup legacy-tools
 else
 tools:
 endif
@@ -74,7 +79,7 @@
 	cp $^ $@
 
 drbdadm: $(drbdadm-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
 
 drbdadm_scanner.c: drbdadm_scanner.fl drbdadm_parser.h
 	flex -s -odrbdadm_scanner.c drbdadm_scanner.fl
@@ -83,16 +88,25 @@
 	flex -s -odrbdmeta_scanner.c drbdmeta_scanner.fl
 
 drbdsetup: $(drbdsetup-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
 
 drbdmeta: $(drbdmeta-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
+
+legacy-tools:
+ifeq ($(WITH_LEGACY_UTILS),yes)
+	$(MAKE) -C legacy
+	ln -f -s legacy/drbdadm-83
+	ln -f -s legacy/drbdsetup-83
+endif
 
 clean:
 	rm -f drbdadm_scanner.c drbdmeta_scanner.c
-	rm -f drbdsetup drbdadm drbdmeta drbdmeta_unfinished_rewrite *.o
+	rm -f drbdsetup drbdadm drbdmeta *.o
 	rm -f drbd_buildtag.c drbd_strings.c
+	rm -f drbdadm-83 drbdsetup-83
 	rm -f *~
+	$(MAKE) -C legacy clean
 
 distclean: clean
 
@@ -110,11 +124,13 @@
 		install -m 755 drbdmeta $(DESTDIR)/sbin/ ;		\
 		install -m 755 drbdadm $(DESTDIR)/sbin/ ; 		\
 	fi
+	$(MAKE) -C legacy install
 endif
 
 uninstall:
 	rm -f $(DESTDIR)/sbin/drbdsetup
 	rm -f $(DESTDIR)/sbin/drbdadm
+	$(MAKE) -C legacy uninstall
 
 spell:
 	for f in drbdadm_adjust.c drbdadm_main.c drbdadm_parser.c drbdadm_usage_cnt.c drbdmeta.c drbdsetup.c drbdtool_common.c; do \
@@ -122,16 +138,16 @@
 	done
 
 ###dependencies
-drbdsetup.o:       drbdtool_common.h ../drbd/linux/drbd_limits.h
-drbdsetup.o:       ../drbd/linux/drbd_tag_magic.h ../drbd/linux/drbd.h
-drbdsetup.o:       ../drbd/linux/drbd_config.h ../drbd/linux/drbd_nl.h
-drbdsetup.o:       unaligned.h
+drbdset.o:     drbdtool_common.h ../drbd/linux/drbd_limits.h
+drbdsetup.o:       ../drbd/linux/drbd_genl.h ../drbd/linux/drbd.h
+drbdsetup.o:       ../drbd/linux/genl_magic_struct.h ../drbd/linux/genl_magic_func.h
+drbdsetup.o:       libgenl.h config_flags.h
+libgenl.o:       libgenl.h
 drbdtool_common.o: drbdtool_common.h
 drbdadm_main.o:    drbdtool_common.h drbdadm.h
 drbdadm_adjust.o:  drbdtool_common.h drbdadm.h
 drbdadm_parser.o:  drbdtool_common.h drbdadm.h ../drbd/linux/drbd_limits.h
-drbdadm_scanner.o:                   drbdadm.h               drbdadm_parser.h
+drbdadm_scanner.o: drbdtool_common.h drbdadm.h               drbdadm_parser.h
 drbdsetup.o:       drbdtool_common.h           ../drbd/linux/drbd_limits.h
 drbdmeta.o:        drbdtool_common.h drbd_endian.h
-drbdmeta_unfinished_rewrite.o:        drbdtool_common.h drbd_endian.h
-drbdadm_usage_cnt.o: drbdadm.h drbd_endian.h
+drbdadm_usage_cnt.o: drbdtool_common.h drbdadm.h drbd_endian.h
diff -Nru drbd8-8.3.7/user/Makefile.in drbd8-8.4.1+git55a81dc~cmd1/user/Makefile.in
--- drbd8-8.3.7/user/Makefile.in	2010-01-13 16:04:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/Makefile.in	2012-02-02 14:09:14.000000000 +0000
@@ -29,14 +29,14 @@
 BASH_COMPLETION_SUFFIX = @BASH_COMPLETION_SUFFIX@
 UDEV_RULE_SUFFIX = @UDEV_RULE_SUFFIX@
 INITDIR = @INITDIR@
-KDIR ?= @KDIR@
 LIBDIR = @prefix@/lib/@PACKAGE_TARNAME@
 CC = @CC@
-CFLAGS = @CFLAGS@
+CFLAGS := @CFLAGS@
 LN_S = @LN_S@
 
 # features enabled or disabled by configure
 WITH_UTILS = @WITH_UTILS@
+WITH_LEGACY_UTILS = @WITH_LEGACY_UTILS@
 WITH_KM = @WITH_KM@
 WITH_UDEV = @WITH_UDEV@
 WITH_XEN = @WITH_XEN@
@@ -48,21 +48,26 @@
 # variables meant to be overridden from the make command line
 DESTDIR ?= /
 
-CFLAGS += -I../drbd
+CFLAGS += -Wall -I../drbd -I../drbd/compat
+
+GENETLINK_H := /usr/include/linux/genetlink.h
+libgenl.o: CFLAGS += $(shell for w in CTRL_ATTR_VERSION CTRL_ATTR_HDRSIZE CTRL_ATTR_MCAST_GROUPS; do grep -qw $$w $(GENETLINK_H) && echo -DHAVE_$$w; done)
 
 drbdadm-obj = drbdadm_scanner.o drbdadm_parser.o drbdadm_main.o \
 	      drbdadm_adjust.o drbdtool_common.o drbdadm_usage_cnt.o \
-	      drbd_buildtag.o drbdadm_minor_table.o
+	      drbd_buildtag.o registry.o config_flags.o libgenl.o \
+	      drbd_nla.o
 
-drbdsetup-obj = drbdsetup.o drbdtool_common.o drbd_buildtag.o \
-	        drbd_strings.o
+drbdsetup-obj = libgenl.o registry.o drbdsetup.o drbdtool_common.o \
+		drbd_buildtag.o drbd_strings.o config_flags.o drbd_nla.o \
+		wrap_printf.o
 
 drbdmeta-obj = drbdmeta.o drbdmeta_scanner.o drbdtool_common.o drbd_buildtag.o
 
 all: tools
 
 ifeq ($(WITH_UTILS),yes)
-tools: drbdadm drbdmeta drbdsetup
+tools: drbdadm drbdmeta drbdsetup legacy-tools
 else
 tools:
 endif
@@ -74,7 +79,7 @@
 	cp $^ $@
 
 drbdadm: $(drbdadm-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
 
 drbdadm_scanner.c: drbdadm_scanner.fl drbdadm_parser.h
 	flex -s -odrbdadm_scanner.c drbdadm_scanner.fl
@@ -83,16 +88,25 @@
 	flex -s -odrbdmeta_scanner.c drbdmeta_scanner.fl
 
 drbdsetup: $(drbdsetup-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
 
 drbdmeta: $(drbdmeta-obj)
-	$(CC) -o $@ $^
+	$(LINK.c) -o $@ $^
+
+legacy-tools:
+ifeq ($(WITH_LEGACY_UTILS),yes)
+	$(MAKE) -C legacy
+	ln -f -s legacy/drbdadm-83
+	ln -f -s legacy/drbdsetup-83
+endif
 
 clean:
 	rm -f drbdadm_scanner.c drbdmeta_scanner.c
-	rm -f drbdsetup drbdadm drbdmeta drbdmeta_unfinished_rewrite *.o
+	rm -f drbdsetup drbdadm drbdmeta *.o
 	rm -f drbd_buildtag.c drbd_strings.c
+	rm -f drbdadm-83 drbdsetup-83
 	rm -f *~
+	$(MAKE) -C legacy clean
 
 distclean: clean
 
@@ -110,11 +124,13 @@
 		install -m 755 drbdmeta $(DESTDIR)/sbin/ ;		\
 		install -m 755 drbdadm $(DESTDIR)/sbin/ ; 		\
 	fi
+	$(MAKE) -C legacy install
 endif
 
 uninstall:
 	rm -f $(DESTDIR)/sbin/drbdsetup
 	rm -f $(DESTDIR)/sbin/drbdadm
+	$(MAKE) -C legacy uninstall
 
 spell:
 	for f in drbdadm_adjust.c drbdadm_main.c drbdadm_parser.c drbdadm_usage_cnt.c drbdmeta.c drbdsetup.c drbdtool_common.c; do \
@@ -122,16 +138,16 @@
 	done
 
 ###dependencies
-drbdsetup.o:       drbdtool_common.h ../drbd/linux/drbd_limits.h
-drbdsetup.o:       ../drbd/linux/drbd_tag_magic.h ../drbd/linux/drbd.h
-drbdsetup.o:       ../drbd/linux/drbd_config.h ../drbd/linux/drbd_nl.h
-drbdsetup.o:       unaligned.h
+drbdset.o:     drbdtool_common.h ../drbd/linux/drbd_limits.h
+drbdsetup.o:       ../drbd/linux/drbd_genl.h ../drbd/linux/drbd.h
+drbdsetup.o:       ../drbd/linux/genl_magic_struct.h ../drbd/linux/genl_magic_func.h
+drbdsetup.o:       libgenl.h config_flags.h
+libgenl.o:       libgenl.h
 drbdtool_common.o: drbdtool_common.h
 drbdadm_main.o:    drbdtool_common.h drbdadm.h
 drbdadm_adjust.o:  drbdtool_common.h drbdadm.h
 drbdadm_parser.o:  drbdtool_common.h drbdadm.h ../drbd/linux/drbd_limits.h
-drbdadm_scanner.o:                   drbdadm.h               drbdadm_parser.h
+drbdadm_scanner.o: drbdtool_common.h drbdadm.h               drbdadm_parser.h
 drbdsetup.o:       drbdtool_common.h           ../drbd/linux/drbd_limits.h
 drbdmeta.o:        drbdtool_common.h drbd_endian.h
-drbdmeta_unfinished_rewrite.o:        drbdtool_common.h drbd_endian.h
-drbdadm_usage_cnt.o: drbdadm.h drbd_endian.h
+drbdadm_usage_cnt.o: drbdtool_common.h drbdadm.h drbd_endian.h
diff -Nru drbd8-8.3.7/user/config.h drbd8-8.4.1+git55a81dc~cmd1/user/config.h
--- drbd8-8.3.7/user/config.h	2012-09-03 23:12:22.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/config.h	2012-02-02 14:09:45.000000000 +0000
@@ -4,12 +4,19 @@
 /* Local configuration directory. Commonly /etc or /usr/local/etc */
 #define DRBD_CONFIG_DIR "/etc"
 
+/* Include support for drbd-8.3 kernel code */
+#define DRBD_LEGACY_83 1
+
 /* Local state directory. Commonly /var/lib/drbd or /usr/local/var/lib/drbd */
 #define DRBD_LIB_DIR "/var/lib/drbd"
 
 /* Local lock directory. Commonly /var/lock or /usr/local/var/lock */
 #define DRBD_LOCK_DIR "/var/lock"
 
+/* Runtime state directory. Commonly /var/run/drbd or /usr/local/var/run/drbd
+   */
+#define DRBD_RUN_DIR "/var/run/drbd"
+
 /* Define to the address where bug reports for this package should be sent. */
 #define PACKAGE_BUGREPORT "drbd-dev@lists.linbit.com"
 
@@ -17,7 +24,7 @@
 #define PACKAGE_NAME "DRBD"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "DRBD 8.3.7"
+#define PACKAGE_STRING "DRBD 8.4.1"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "drbd"
@@ -26,4 +33,4 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "8.3.7"
+#define PACKAGE_VERSION "8.4.1"
diff -Nru drbd8-8.3.7/user/config.h.in drbd8-8.4.1+git55a81dc~cmd1/user/config.h.in
--- drbd8-8.3.7/user/config.h.in	2009-11-23 10:47:36.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/config.h.in	2012-02-02 14:09:44.000000000 +0000
@@ -3,12 +3,19 @@
 /* Local configuration directory. Commonly /etc or /usr/local/etc */
 #undef DRBD_CONFIG_DIR
 
+/* Include support for drbd-8.3 kernel code */
+#undef DRBD_LEGACY_83
+
 /* Local state directory. Commonly /var/lib/drbd or /usr/local/var/lib/drbd */
 #undef DRBD_LIB_DIR
 
 /* Local lock directory. Commonly /var/lock or /usr/local/var/lock */
 #undef DRBD_LOCK_DIR
 
+/* Runtime state directory. Commonly /var/run/drbd or /usr/local/var/run/drbd
+   */
+#undef DRBD_RUN_DIR
+
 /* Define to the address where bug reports for this package should be sent. */
 #undef PACKAGE_BUGREPORT
 
diff -Nru drbd8-8.3.7/user/config_flags.c drbd8-8.4.1+git55a81dc~cmd1/user/config_flags.c
--- drbd8-8.3.7/user/config_flags.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/config_flags.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,736 @@
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+
+#include "libgenl.h"
+#include <linux/drbd.h>
+#include <linux/drbd_config.h>
+#include <linux/drbd_genl_api.h>
+#include <linux/drbd_limits.h>
+#include "drbd_nla.h"
+#include <linux/genl_magic_func.h>
+#include "drbdtool_common.h"
+#include "config_flags.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+#define NLA_POLICY(p)									\
+	.nla_policy = p ## _nl_policy,							\
+	.nla_policy_size = ARRAY_SIZE(p ## _nl_policy)
+
+/* ============================================================================================== */
+
+static int enum_string_to_int(const char **map, int size, const char *value,
+			      int (*strcmp)(const char *, const char *))
+{
+	int n;
+
+	if (!value)
+		return -1;
+	for (n = 0; n < size; n++) {
+		if (map[n] && !strcmp(value, map[n]))
+			return n;
+	}
+	return -1;
+}
+
+static bool enum_is_default(struct field_def *field, const char *value)
+{
+	int n;
+
+	n = enum_string_to_int(field->u.e.map, field->u.e.size, value, strcmp);
+	return n == field->u.e.def;
+}
+
+static bool enum_is_equal(struct field_def *field, const char *a, const char *b)
+{
+	return !strcmp(a, b);
+}
+
+static int type_of_field(struct context_def *ctx, struct field_def *field)
+{
+	return ctx->nla_policy[__nla_type(field->nla_type)].type;
+}
+
+static int len_of_field(struct context_def *ctx, struct field_def *field)
+{
+	return ctx->nla_policy[__nla_type(field->nla_type)].len;
+}
+
+static const char *get_enum(struct context_def *ctx, struct field_def *field, struct nlattr *nla)
+{
+	int i;
+
+	assert(type_of_field(ctx, field) == NLA_U32);
+	i = nla_get_u32(nla);
+	if (i < 0 || i >= field->u.e.size)
+		return NULL;
+	return field->u.e.map[i];
+}
+
+static bool put_enum(struct context_def *ctx, struct field_def *field,
+		     struct msg_buff *msg, const char *value)
+{
+	int n;
+
+	n = enum_string_to_int(field->u.e.map, field->u.e.size, value, strcmp);
+	if (n == -1)
+		return false;
+	assert(type_of_field(ctx, field) == NLA_U32);
+	nla_put_u32(msg, field->nla_type, n);
+	return true;
+}
+
+static int enum_usage(struct field_def *field, char *str, int size)
+{
+	const char** map = field->u.e.map;
+	char sep = '{';
+	int n, len = 0, l;
+
+	l = snprintf(str, size, "[--%s=", field->name);
+	len += l; size -= l;
+	for (n = 0; n < field->u.e.size; n++) {
+		if (!map[n])
+			continue;
+		l = snprintf(str + len, size, "%c%s", sep, map[n]);
+		len += l; size -= l;
+		sep = '|';
+	}
+	assert (sep != '{');
+	l = snprintf(str+len, size, "}]");
+	len += l; size -= l;
+	return len;
+}
+
+static bool enum_is_default_nocase(struct field_def *field, const char *value)
+{
+	int n;
+
+	n = enum_string_to_int(field->u.e.map, field->u.e.size, value, strcasecmp);
+	return n == field->u.e.def;
+}
+
+static bool enum_is_equal_nocase(struct field_def *field, const char *a, const char *b)
+{
+	return !strcasecmp(a, b);
+}
+
+static bool put_enum_nocase(struct context_def *ctx, struct field_def *field,
+			    struct msg_buff *msg, const char *value)
+{
+	int n;
+
+	n = enum_string_to_int(field->u.e.map, field->u.e.size, value, strcasecmp);
+	if (n == -1)
+		return false;
+	assert(type_of_field(ctx, field) == NLA_U32);
+	nla_put_u32(msg, field->nla_type, n);
+	return true;
+}
+
+static void enum_describe_xml(struct field_def *field)
+{
+	const char **map = field->u.e.map;
+	int n;
+
+	printf("\t<option name=\"%s\" type=\"handler\">\n",
+	       field->name);
+	for (n = 0; n < field->u.e.size; n++) {
+		if (!map[n])
+			continue;
+		printf("\t\t<handler>%s</handler>\n", map[n]);
+	}
+	printf("\t</option>\n");
+}
+
+/* ---------------------------------------------------------------------------------------------- */
+
+static bool numeric_is_default(struct field_def *field, const char *value)
+{
+	long long l;
+
+	/* FIXME: unsigned long long values are broken. */
+	l = m_strtoll(value, field->u.n.scale);
+	return l == field->u.n.def;
+}
+
+static bool numeric_is_equal(struct field_def *field, const char *a, const char *b)
+{
+	long long la, lb;
+
+	/* FIXME: unsigned long long values are broken. */
+	la = m_strtoll(a, field->u.n.scale);
+	lb = m_strtoll(b, field->u.n.scale);
+	return la == lb;
+}
+
+static const char *get_numeric(struct context_def *ctx, struct field_def *field, struct nlattr *nla)
+{
+	static char buffer[1 + 20 + 2];
+	char scale = field->u.n.scale;
+	unsigned long long l;
+	int n;
+
+	switch(type_of_field(ctx, field)) {
+	case NLA_U8:
+		l = nla_get_u8(nla);
+		break;
+	case NLA_U16:
+		l = nla_get_u16(nla);
+		break;
+	case NLA_U32:
+		l = nla_get_u32(nla);
+		break;
+	case NLA_U64:
+		l = nla_get_u64(nla);
+		break;
+	default:
+		return NULL;
+	}
+
+	if (field->u.n.is_signed) {
+		/* Sign extend.  */
+		switch(type_of_field(ctx, field)) {
+		case NLA_U8:
+			l = (int8_t)l;
+			break;
+		case NLA_U16:
+			l = (int16_t)l;
+			break;
+		case NLA_U32:
+			l = (int32_t)l;
+			break;
+		case NLA_U64:
+			l = (int64_t)l;
+			break;
+		}
+		n = snprintf(buffer, sizeof(buffer), "%lld%c",
+			     l, scale == '1' ? 0 : scale);
+	} else
+		n = snprintf(buffer, sizeof(buffer), "%llu%c",
+			     l, scale == '1' ? 0 : scale);
+
+	assert(n < sizeof(buffer));
+	return buffer;
+}
+
+static bool put_numeric(struct context_def *ctx, struct field_def *field,
+			struct msg_buff *msg, const char *value)
+{
+	long long l;
+
+	/* FIXME: unsigned long long values are broken. */
+	l = m_strtoll(value, field->u.n.scale);
+	switch(type_of_field(ctx, field)) {
+	case NLA_U8:
+		nla_put_u8(msg, field->nla_type, l);
+		break;
+	case NLA_U16:
+		nla_put_u16(msg, field->nla_type, l);
+		break;
+	case NLA_U32:
+		nla_put_u32(msg, field->nla_type, l);
+		break;
+	case NLA_U64:
+		nla_put_u64(msg, field->nla_type, l);
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static int numeric_usage(struct field_def *field, char *str, int size)
+{
+        return snprintf(str, size,"[--%s=(%lld ... %lld)]",
+			field->name,
+			field->u.n.min,
+			field->u.n.max);
+}
+
+static void numeric_describe_xml(struct field_def *field)
+{
+	printf("\t<option name=\"%s\" type=\"numeric\">\n"
+	       "\t\t<min>%lld</min>\n"
+	       "\t\t<max>%lld</max>\n"
+	       "\t\t<default>%lld</default>\n"
+	       "\t\t<unit_prefix>%c</unit_prefix>\n",
+	       field->name,
+	       field->u.n.min,
+	       field->u.n.max,
+	       field->u.n.def,
+	       field->u.n.scale);
+	if(field->unit) {
+		printf("\t\t<unit>%s</unit>\n",
+		       field->unit);
+	}
+	printf("\t</option>\n");
+}
+
+/* ---------------------------------------------------------------------------------------------- */
+
+static int boolean_string_to_int(const char *value)
+{
+	if (!value || !strcmp(value, "yes"))
+		return 1;
+	else if (!strcmp(value, "no"))
+		return 0;
+	else
+		return -1;
+}
+
+static bool boolean_is_default(struct field_def *field, const char *value)
+{
+	int yesno;
+
+	yesno = boolean_string_to_int(value);
+	return yesno == field->u.b.def;
+}
+
+static bool boolean_is_equal(struct field_def *field, const char *a, const char *b)
+{
+	return boolean_string_to_int(a) == boolean_string_to_int(b);
+}
+
+static const char *get_boolean(struct context_def *ctx, struct field_def *field, struct nlattr *nla)
+{
+	int i;
+
+	assert(type_of_field(ctx, field) == NLA_U8);
+	i = nla_get_u8(nla);
+	return i ? "yes" : "no";
+}
+
+static bool put_boolean(struct context_def *ctx, struct field_def *field,
+			struct msg_buff *msg, const char *value)
+{
+	int yesno;
+
+	yesno = boolean_string_to_int(value);
+	if (yesno == -1)
+		return false;
+	assert(type_of_field(ctx, field) == NLA_U8);
+	nla_put_u8(msg, field->nla_type, yesno);
+	return true;
+}
+
+static bool put_flag(struct context_def *ctx, struct field_def *field,
+		     struct msg_buff *msg, const char *value)
+{
+	int yesno;
+
+	yesno = boolean_string_to_int(value);
+	if (yesno == -1)
+		return false;
+	assert(type_of_field(ctx, field) == NLA_U8);
+	if (yesno)
+		nla_put_u8(msg, field->nla_type, yesno);
+	return true;
+}
+
+static int boolean_usage(struct field_def *field, char *str, int size)
+{
+        return snprintf(str, size,"[--%s={yes|no}]",
+			field->name);
+}
+
+static void boolean_describe_xml(struct field_def *field)
+{
+	printf("\t<option name=\"%s\" type=\"boolean\">\n"
+	       "\t\t<default>%s</default>\n"
+	       "\t</option>\n",
+	       field->name,
+	       field->u.b.def ? "yes" : "no");
+}
+
+/* ---------------------------------------------------------------------------------------------- */
+
+static bool string_is_default(struct field_def *field, const char *value)
+{
+	return value && !strcmp(value, "");
+}
+
+static bool string_is_equal(struct field_def *field, const char *a, const char *b)
+{
+	return !strcmp(a, b);
+}
+
+static const char *get_string(struct context_def *ctx, struct field_def *field, struct nlattr *nla)
+{
+	char *str;
+	int len;
+
+	assert(type_of_field(ctx, field) == NLA_NUL_STRING);
+	str = (char *)nla_data(nla);
+	len = len_of_field(ctx, field);
+	assert(memchr(str, 0, len + 1) != NULL);
+	return str;
+}
+
+static bool put_string(struct context_def *ctx, struct field_def *field,
+		       struct msg_buff *msg, const char *value)
+{
+	assert(type_of_field(ctx, field) == NLA_NUL_STRING);
+	nla_put_string(msg, field->nla_type, value);
+	return true;
+}
+
+static int string_usage(struct field_def *field, char *str, int size)
+{
+        return snprintf(str, size,"[--%s=<str>]",
+			field->name);
+}
+
+static void string_describe_xml(struct field_def *field)
+{
+	printf("\t<option name=\"%s\" type=\"string\">\n"
+	       "\t</option>\n",
+	       field->name);
+}
+
+const char *double_quote_string(const char *str)
+{
+	static char *buffer;
+	const char *s;
+	char *b;
+	int len = 0;
+
+	for (s = str; *s; s++) {
+		if (*s == '\\' || *s == '"')
+			len++;
+		len++;
+	}
+	b = realloc(buffer, len + 3);
+	if (!b)
+		return NULL;
+	buffer = b;
+	*b++ = '"';
+	for (s = str; *s; s++) {
+		if (*s == '\\' || *s == '"')
+			*b++ = '\\';
+		*b++ = *s;
+	}
+	*b++ = '"';
+	*b++ = 0;
+	return buffer;
+}
+
+/* ============================================================================================== */
+
+#define ENUM(f, d)									\
+	.nla_type = T_ ## f,								\
+	.is_default = enum_is_default,							\
+	.is_equal = enum_is_equal,							\
+	.get = get_enum,								\
+	.put = put_enum,								\
+	.usage = enum_usage,								\
+	.describe_xml = enum_describe_xml,						\
+	.u = { .e = {									\
+		.map = f ## _map,							\
+		.size = ARRAY_SIZE(f ## _map),						\
+		.def = DRBD_ ## d ## _DEF } }
+
+#define ENUM_NOCASE(f, d)								\
+	.nla_type = T_ ## f,								\
+	.is_default = enum_is_default_nocase,						\
+	.is_equal = enum_is_equal_nocase,						\
+	.get = get_enum,								\
+	.put = put_enum_nocase,								\
+	.usage = enum_usage,								\
+	.describe_xml = enum_describe_xml,						\
+	.u = { .e = {									\
+		.map = f ## _map,							\
+		.size = ARRAY_SIZE(f ## _map),						\
+		.def = DRBD_ ## d ## _DEF } }
+
+#define NUMERIC(f, d)									\
+	.nla_type = T_ ## f,								\
+	.is_default = numeric_is_default,						\
+	.is_equal = numeric_is_equal,							\
+	.get = get_numeric,								\
+	.put = put_numeric,								\
+	.usage = numeric_usage,								\
+	.describe_xml = numeric_describe_xml,						\
+	.u = { .n = {									\
+		.min = DRBD_ ## d ## _MIN,						\
+		.max = DRBD_ ## d ## _MAX,						\
+		.def = DRBD_ ## d ## _DEF,						\
+		.is_signed = F_ ## f ## _IS_SIGNED,					\
+		.scale = DRBD_ ## d ## _SCALE } }
+
+#define BOOLEAN(f, d)									\
+	.nla_type = T_ ## f,								\
+	.is_default = boolean_is_default,						\
+	.is_equal = boolean_is_equal,							\
+	.get = get_boolean,								\
+	.put = put_boolean,								\
+	.usage = boolean_usage,								\
+	.describe_xml = boolean_describe_xml,						\
+	.u = { .b = {									\
+		.def = DRBD_ ## d ## _DEF } },						\
+	.argument_is_optional = true
+
+#define FLAG(f)										\
+	.nla_type = T_ ## f,								\
+	.is_default = boolean_is_default,						\
+	.is_equal = boolean_is_equal,							\
+	.get = get_boolean,								\
+	.put = put_flag,								\
+	.usage = boolean_usage,								\
+	.describe_xml = boolean_describe_xml,						\
+	.u = { .b = {									\
+		.def = false } },							\
+	.argument_is_optional = true
+
+#define STRING(f)									\
+	.nla_type = T_ ## f,								\
+	.is_default = string_is_default,						\
+	.is_equal = string_is_equal,							\
+	.get = get_string,								\
+	.put = put_string,								\
+	.usage = string_usage,								\
+	.describe_xml = string_describe_xml,						\
+	.needs_double_quoting = true
+
+/* ============================================================================================== */
+
+const char *wire_protocol_map[] = {
+	[DRBD_PROT_A] = "A",
+	[DRBD_PROT_B] = "B",
+	[DRBD_PROT_C] = "C",
+};
+
+const char *on_io_error_map[] = {
+	[EP_PASS_ON] = "pass_on",
+	[EP_CALL_HELPER] = "call-local-io-error",
+	[EP_DETACH] = "detach",
+};
+
+const char *fencing_map[] = {
+	[FP_DONT_CARE] = "dont-care",
+	[FP_RESOURCE] = "resource-only",
+	[FP_STONITH] = "resource-and-stonith",
+};
+
+const char *after_sb_0p_map[] = {
+	[ASB_DISCONNECT] = "disconnect",
+	[ASB_DISCARD_YOUNGER_PRI] = "discard-younger-primary",
+	[ASB_DISCARD_OLDER_PRI] = "discard-older-primary",
+	[ASB_DISCARD_ZERO_CHG] = "discard-zero-changes",
+	[ASB_DISCARD_LEAST_CHG] = "discard-least-changes",
+	[ASB_DISCARD_LOCAL] = "discard-local",
+	[ASB_DISCARD_REMOTE] = "discard-remote",
+};
+
+const char *after_sb_1p_map[] = {
+	[ASB_DISCONNECT] = "disconnect",
+	[ASB_CONSENSUS] = "consensus",
+	[ASB_VIOLENTLY] = "violently-as0p",
+	[ASB_DISCARD_SECONDARY] = "discard-secondary",
+	[ASB_CALL_HELPER] = "call-pri-lost-after-sb",
+};
+
+const char *after_sb_2p_map[] = {
+	[ASB_DISCONNECT] = "disconnect",
+	[ASB_VIOLENTLY] = "violently-as0p",
+	[ASB_CALL_HELPER] = "call-pri-lost-after-sb",
+};
+
+const char *rr_conflict_map[] = {
+	[ASB_DISCONNECT] = "disconnect",
+	[ASB_VIOLENTLY] = "violently",
+	[ASB_CALL_HELPER] = "call-pri-lost",
+};
+
+const char *on_no_data_map[] = {
+	[OND_IO_ERROR]		= "io-error",
+	[OND_SUSPEND_IO]	= "suspend-io",
+};
+
+const char *on_congestion_map[] = {
+	[OC_BLOCK] = "block",
+	[OC_PULL_AHEAD] = "pull-ahead",
+	[OC_DISCONNECT] = "disconnect",
+};
+
+const char *read_balancing_map[] = {
+	[RB_PREFER_LOCAL] = "prefer-local",
+	[RB_PREFER_REMOTE] = "prefer-remote",
+	[RB_ROUND_ROBIN] = "round-robin",
+	[RB_LEAST_PENDING] = "least-pending",
+	[RB_CONGESTED_REMOTE] = "when-congested-remote",
+	[RB_32K_STRIPING] = "32K-striping",
+	[RB_64K_STRIPING] = "64K-striping",
+	[RB_128K_STRIPING] = "128K-striping",
+	[RB_256K_STRIPING] = "256K-striping",
+	[RB_512K_STRIPING] = "512K-striping",
+	[RB_1M_STRIPING] = "1M-striping"
+};
+
+#define CHANGEABLE_DISK_OPTIONS								\
+	{ "on-io-error", ENUM(on_io_error, ON_IO_ERROR) },				\
+	{ "fencing", ENUM(fencing, FENCING) },						\
+	{ "disk-barrier", BOOLEAN(disk_barrier, DISK_BARRIER) },			\
+	{ "disk-flushes", BOOLEAN(disk_flushes, DISK_FLUSHES) },			\
+	{ "disk-drain", BOOLEAN(disk_drain, DISK_DRAIN) },				\
+	{ "md-flushes", BOOLEAN(md_flushes, MD_FLUSHES) },				\
+	{ "resync-rate", NUMERIC(resync_rate, RESYNC_RATE),				\
+          .unit = "bytes/second" },							\
+	{ "resync-after", NUMERIC(resync_after, MINOR_NUMBER) },			\
+	{ "al-extents", NUMERIC(al_extents, AL_EXTENTS) },				\
+	{ "c-plan-ahead", NUMERIC(c_plan_ahead, C_PLAN_AHEAD),				\
+          .unit = "1/10 seconds" },							\
+	{ "c-delay-target", NUMERIC(c_delay_target, C_DELAY_TARGET),			\
+          .unit = "1/10 seconds" },							\
+	{ "c-fill-target", NUMERIC(c_fill_target, C_FILL_TARGET),			\
+          .unit = "bytes" },								\
+	{ "c-max-rate", NUMERIC(c_max_rate, C_MAX_RATE),				\
+          .unit = "bytes/second" },							\
+	{ "c-min-rate", NUMERIC(c_min_rate, C_MIN_RATE),				\
+	  .unit = "bytes/second" },							\
+	{ "disk-timeout", NUMERIC(disk_timeout,	DISK_TIMEOUT),				\
+	  .unit = "1/10 seconds" },							\
+	{ "read-balancing", ENUM(read_balancing, READ_BALANCING) }			\
+
+#define CHANGEABLE_NET_OPTIONS								\
+	{ "protocol", ENUM_NOCASE(wire_protocol, PROTOCOL) },				\
+	{ "timeout", NUMERIC(timeout, TIMEOUT),						\
+          .unit = "1/10 seconds" },							\
+	{ "max-epoch-size", NUMERIC(max_epoch_size, MAX_EPOCH_SIZE) },			\
+	{ "max-buffers", NUMERIC(max_buffers, MAX_BUFFERS) },				\
+	{ "unplug-watermark", NUMERIC(unplug_watermark, UNPLUG_WATERMARK) },		\
+	{ "connect-int", NUMERIC(connect_int, CONNECT_INT),				\
+          .unit = "seconds" },								\
+	{ "ping-int", NUMERIC(ping_int, PING_INT),					\
+          .unit = "seconds" },								\
+	{ "sndbuf-size", NUMERIC(sndbuf_size, SNDBUF_SIZE),				\
+          .unit = "bytes" },								\
+	{ "rcvbuf-size", NUMERIC(rcvbuf_size, RCVBUF_SIZE),				\
+          .unit = "bytes" },								\
+	{ "ko-count", NUMERIC(ko_count, KO_COUNT) },					\
+	{ "allow-two-primaries", BOOLEAN(two_primaries, ALLOW_TWO_PRIMARIES) },		\
+	{ "cram-hmac-alg", STRING(cram_hmac_alg) },					\
+	{ "shared-secret", STRING(shared_secret) },					\
+	{ "after-sb-0pri", ENUM(after_sb_0p, AFTER_SB_0P) },				\
+	{ "after-sb-1pri", ENUM(after_sb_1p, AFTER_SB_1P) },				\
+	{ "after-sb-2pri", ENUM(after_sb_2p, AFTER_SB_2P) },				\
+	{ "always-asbp", BOOLEAN(always_asbp, ALWAYS_ASBP) },				\
+	{ "rr-conflict", ENUM(rr_conflict, RR_CONFLICT) },				\
+	{ "ping-timeout", NUMERIC(ping_timeo, PING_TIMEO),				\
+          .unit = "1/10 seconds" },							\
+	{ "data-integrity-alg", STRING(integrity_alg) },				\
+	{ "tcp-cork", BOOLEAN(tcp_cork, TCP_CORK) },					\
+	{ "on-congestion", ENUM(on_congestion, ON_CONGESTION) },			\
+	{ "congestion-fill", NUMERIC(cong_fill, CONG_FILL),				\
+          .unit = "bytes" },								\
+	{ "congestion-extents", NUMERIC(cong_extents, CONG_EXTENTS) },			\
+	{ "csums-alg", STRING(csums_alg) },						\
+	{ "verify-alg", STRING(verify_alg) },						\
+	{ "use-rle", BOOLEAN(use_rle, USE_RLE) }
+
+struct context_def disk_options_ctx = {
+	NLA_POLICY(disk_conf),
+	.fields = {
+		CHANGEABLE_DISK_OPTIONS,
+		{ } },
+};
+
+struct context_def net_options_ctx = {
+	NLA_POLICY(net_conf),
+	.fields = {
+		CHANGEABLE_NET_OPTIONS,
+		{ } },
+};
+
+struct context_def primary_cmd_ctx = {
+	NLA_POLICY(set_role_parms),
+	.fields = {
+		{ "force", FLAG(assume_uptodate) },
+		{ } },
+};
+
+struct context_def attach_cmd_ctx = {
+	NLA_POLICY(disk_conf),
+	.fields = {
+		{ "size", NUMERIC(disk_size, DISK_SIZE),
+		  .unit = "bytes" },
+		{ "max-bio-bvecs", NUMERIC(max_bio_bvecs, MAX_BIO_BVECS) },
+		CHANGEABLE_DISK_OPTIONS,
+		/* { "*", STRING(backing_dev) }, */
+		/* { "*", STRING(meta_dev) }, */
+		/* { "*", NUMERIC(meta_dev_idx, MINOR_NUMBER) }, */
+		{ } },
+};
+
+struct context_def detach_cmd_ctx = {
+	NLA_POLICY(detach_parms),
+	.fields = {
+		{ "force", FLAG(force_detach) },
+		{ }
+	},
+};
+
+struct context_def connect_cmd_ctx = {
+	NLA_POLICY(net_conf),
+	.fields = {
+		{ "tentative", FLAG(tentative) },
+		{ "discard-my-data", FLAG(discard_my_data) },
+		CHANGEABLE_NET_OPTIONS,
+		{ } },
+};
+
+struct context_def disconnect_cmd_ctx = {
+	NLA_POLICY(disconnect_parms),
+	.fields = {
+		{ "force", FLAG(force_disconnect) },
+		{ } },
+};
+
+struct context_def resize_cmd_ctx = {
+	NLA_POLICY(resize_parms),
+	.fields = {
+		{ "size", NUMERIC(resize_size, DISK_SIZE),
+		  .unit = "bytes" },
+		{ "assume-peer-has-space", FLAG(resize_force) },
+		{ "assume-clean", FLAG(no_resync) },
+		{ } },
+};
+
+struct context_def resource_options_cmd_ctx = {
+	NLA_POLICY(res_opts),
+	.fields = {
+		{ "cpu-mask", STRING(cpu_mask) },
+		{ "on-no-data-accessible", ENUM(on_no_data, ON_NO_DATA) },
+		{ } },
+};
+
+struct context_def new_current_uuid_cmd_ctx = {
+	NLA_POLICY(new_c_uuid_parms),
+	.fields = {
+		{ "clear-bitmap", FLAG(clear_bm) },
+		{ } },
+};
+
+struct context_def verify_cmd_ctx = {
+	NLA_POLICY(start_ov_parms),
+	.fields = {
+		{ "start", NUMERIC(ov_start_sector, DISK_SIZE),
+		  .unit = "bytes" },
+		{ } },
+};
+
+struct context_def new_minor_cmd_ctx = {
+	NLA_POLICY(drbd_cfg_context),
+	.fields = {
+		/* { "*", STRING(ctx_resource_name) }, */
+		/* { "*", NUMERIC(ctx_volume, >= 0) }, */
+		/* { "*", BINARY(ctx_my_addr) }, */
+		/* { "*", BINARY(ctx_peer_addr) }, */
+		{ } },
+};
diff -Nru drbd8-8.3.7/user/config_flags.h drbd8-8.4.1+git55a81dc~cmd1/user/config_flags.h
--- drbd8-8.3.7/user/config_flags.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/config_flags.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,61 @@
+#ifndef __DRBD_CONFIG_FLAGS_H
+#define __DRBD_CONFIG_FLAGS_H
+
+struct msg_buff;
+struct nlattr;
+
+struct context_def;
+
+struct field_def {
+	const char *name;
+	unsigned short nla_type;
+	bool (*is_default)(struct field_def *, const char *);
+	bool (*is_equal)(struct field_def *, const char *, const char *);
+	const char *(*get)(struct context_def *, struct field_def *, struct nlattr *);
+	bool (*put)(struct context_def *, struct field_def *, struct msg_buff *, const char *);
+	int (*usage)(struct field_def *, char *, int);
+	void (*describe_xml)(struct field_def *);
+	union {
+		struct {
+			const char **map;
+			int size;
+			int def;
+		} e;  /* ENUM, ENUM_NOCASE */
+		struct {
+			long long min;
+			long long max;
+			long long def;
+			bool is_signed;
+			char scale;
+		} n;  /* NUMERIC */
+		struct {
+			bool def;
+		} b;  /* BOOLEAN */
+	} u;
+	bool needs_double_quoting;
+	bool argument_is_optional;
+	const char *unit;
+};
+
+struct context_def {
+	struct nla_policy *nla_policy;
+	int nla_policy_size;
+	struct field_def fields[];
+};
+
+extern struct context_def disk_options_ctx;
+extern struct context_def net_options_ctx;
+extern struct context_def primary_cmd_ctx;
+extern struct context_def attach_cmd_ctx;
+extern struct context_def detach_cmd_ctx;
+extern struct context_def connect_cmd_ctx;
+extern struct context_def disconnect_cmd_ctx;
+extern struct context_def resize_cmd_ctx;
+extern struct context_def resource_options_cmd_ctx;
+extern struct context_def new_current_uuid_cmd_ctx;
+extern struct context_def verify_cmd_ctx;
+extern struct context_def new_minor_cmd_ctx;
+
+extern const char *double_quote_string(const char *str);
+
+#endif  /* __DRBD_CONFIG_FLAGS_H */
diff -Nru drbd8-8.3.7/user/drbd_nla.c drbd8-8.4.1+git55a81dc~cmd1/user/drbd_nla.c
--- drbd8-8.3.7/user/drbd_nla.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbd_nla.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,50 @@
+#include <stdbool.h>
+#include "libgenl.h"
+#include <linux/drbd_genl_api.h>
+#include "drbd_nla.h"
+
+static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
+{
+	struct nlattr *head = nla_data(nla);
+	int len = nla_len(nla);
+	int rem;
+
+	/*
+	 * validate_nla (called from nla_parse_nested) ignores attributes
+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
+	 * flag set also, check and remove that flag before calling
+	 * nla_parse_nested.
+	 */
+
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
+			if (nla_type(nla) > maxtype)
+				return -EOPNOTSUPP;
+		}
+	}
+	return 0;
+}
+
+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+			  const struct nla_policy *policy)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (!err)
+		err = nla_parse_nested(tb, maxtype, nla, policy);
+
+	return err;
+}
+
+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
+{
+	int err;
+
+	err = drbd_nla_check_mandatory(maxtype, nla);
+	if (err)
+		/* ignore */;
+	return nla_find_nested(nla, attrtype);
+}
diff -Nru drbd8-8.3.7/user/drbd_nla.h drbd8-8.4.1+git55a81dc~cmd1/user/drbd_nla.h
--- drbd8-8.3.7/user/drbd_nla.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbd_nla.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,8 @@
+#ifndef __DRBD_NLA_H
+#define __DRBD_NLA_H
+
+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+				 const struct nla_policy *policy);
+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
+
+#endif  /* __DRBD_NLA_H */
diff -Nru drbd8-8.3.7/user/drbdadm.h drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm.h
--- drbd8-8.3.7/user/drbdadm.h	2010-01-07 09:09:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm.h	2012-02-02 14:09:14.000000000 +0000
@@ -1,6 +1,7 @@
 #ifndef DRBDADM_H
 #define DRBDADM_H
 
+#include <stdbool.h>
 #include <linux/drbd_config.h>
 #include <sys/utsname.h>
 #include <sys/types.h>
@@ -84,25 +85,45 @@
   char* outside_af;
 };
 
-struct d_host_info
+struct d_volume
 {
-  struct d_name *on_hosts;
+  unsigned vnr;
   char* device;
   unsigned device_minor;
   char* disk;
-  char* address;
-  char* port;
   char* meta_disk;
-  char* address_family;
+  char* meta_index;
   int meta_major;
   int meta_minor;
-  char* meta_index;
+  struct d_volume *next;
+  struct d_option* disk_options; /* Additional per volume options */
+
+  /* Do not dump an explicit volume section */
+  unsigned int implicit :1 ;
+
+  /* flags for "drbdadm adjust" */
+  unsigned int adj_del_minor :1;
+  unsigned int adj_add_minor :1;
+  unsigned int adj_detach :1;
+  unsigned int adj_attach :1;
+  unsigned int adj_resize :1;
+  unsigned int adj_disk_opts :1;
+};
+
+struct d_host_info
+{
+  struct d_name *on_hosts;
+  struct d_volume *volumes;
+  char* address;
+  char* port;
+  char* address_family;
   struct d_proxy_info *proxy;
   struct d_host_info* next;
   struct d_resource* lower;  /* for device stacking */
   char *lower_name;          /* for device stacking, before bind_stacked_res() */
   int config_line;
   unsigned int by_address:1; /* Match to machines by address, not by names (=on_hosts) */
+  struct d_option* res_options; /* Additional per host options */
 };
 
 struct d_option
@@ -111,31 +132,26 @@
   char* value;
   struct d_option* next;
   unsigned int mentioned  :1 ; // for the adjust command.
-  unsigned int is_default :1 ; // for the adjust command.
   unsigned int is_escaped :1 ;
+  unsigned int adj_skip :1;
 };
 
 struct d_resource
 {
   char* name;
-  char* protocol;
 
-  /* these get propagated to host_info sections later. */
-  char* device;
-  unsigned device_minor;
-  char* disk;
-  char* meta_disk;
-  char* meta_index;
+  struct d_volume *volumes;   /* gets propagated to host_info sections later. */
 
   struct d_host_info* me;
   struct d_host_info* peer;
   struct d_host_info* all_hosts;
   struct d_option* net_options;
   struct d_option* disk_options;
-  struct d_option* sync_options;
+  struct d_option* res_options;
   struct d_option* startup_options;
   struct d_option* handlers;
   struct d_option* proxy_options;
+  struct d_option* proxy_plugins;
   struct d_resource* next;
   struct d_name *become_primary_on;
   char *config_file; /* The config file this resource is define in.*/
@@ -144,44 +160,106 @@
   unsigned int ignore:1;
   unsigned int stacked:1;        /* Stacked on this node */
   unsigned int stacked_on_one:1; /* Stacked either on me or on peer */
+
+  /* if a prerequisite command failed, don't try any further commands.
+   * see run_deferred_cmds() */
+  unsigned int skip_further_deferred_command:1;
+};
+
+struct adm_cmd;
+
+struct cfg_ctx {
+	/* res == NULL: does not care for resources, or iterates over all
+	 * resources in the global "struct d_resource *config" */
+	struct d_resource *res;
+	/* vol == NULL: operate on the resource itself, or iterates over all
+	 * volumes in res */
+	struct d_volume *vol;
+
+	const char *arg;
 };
 
+
 extern char *canonify_path(char *path);
-extern int adm_attach(struct d_resource* ,const char* );
-extern int adm_connect(struct d_resource* ,const char* );
-extern int adm_resize(struct d_resource* ,const char* );
-extern int adm_syncer(struct d_resource* ,const char* );
-extern int adm_generic_s(struct d_resource* ,const char* );
-extern int _admm_generic(struct d_resource* ,const char*, int flags);
-extern int m_system(char**, int, struct d_resource*);
+
+extern int adm_adjust(struct cfg_ctx *);
+extern int adm_new_minor(struct cfg_ctx *ctx);
+extern int adm_new_resource(struct cfg_ctx *);
+extern int adm_res_options(struct cfg_ctx *);
+extern int adm_set_default_res_options(struct cfg_ctx *);
+extern int adm_attach(struct cfg_ctx *);
+extern int adm_disk_options(struct cfg_ctx *);
+extern int adm_set_default_disk_options(struct cfg_ctx *);
+extern int adm_resize(struct cfg_ctx *);
+extern int adm_connect(struct cfg_ctx *);
+extern int adm_net_options(struct cfg_ctx *);
+extern int adm_set_default_net_options(struct cfg_ctx *);
+extern int adm_disconnect(struct cfg_ctx *);
+extern int adm_generic_s(struct cfg_ctx *);
+
+extern int adm_create_md(struct cfg_ctx *);
+extern int _admm_generic(struct cfg_ctx *, int flags);
+
+extern void m__system(char **argv, int flags, const char *res_name, pid_t *kid, int *fd, int *ex);
+static inline int m_system_ex(char **argv, int flags, const char *res_name)
+{
+	int ex;
+	m__system(argv, flags, res_name, NULL, NULL, &ex);
+	return ex;
+}
 extern struct d_option* find_opt(struct d_option*,char*);
 extern void validate_resource(struct d_resource *);
-extern void schedule_dcmd( int (* function)(struct d_resource*,const char* ),
-			   struct d_resource* res,
-			   char* arg,
-			   int order);
+/* stages of configuration, as performed on "drbdadm up"
+ * or "drbdadm adjust":
+ */
+enum drbd_cfg_stage {
+	/* prerequisite stage: create objects, start daemons, ... */
+	CFG_PREREQ,
+
+	/* run time changeable settings of resources */
+	CFG_RESOURCE,
+
+	/* detach/attach local disks, */
+	CFG_DISK_PREREQ,
+	CFG_DISK,
+
+	/* The stage to discard network configuration, during adjust.
+	 * This is after the DISK stage, because we don't want to cut access to
+	 * good data while in primary role.  And before the SETTINGS stage, as
+	 * some proxy or syncer settings may cause side effects and additional
+	 * handshakes while we have an established connection.
+	 */
+	CFG_NET_PREREQ,
+
+	/* discard/set connection parameters */
+	CFG_NET,
+
+	__CFG_LAST
+};
+
+extern void schedule_deferred_cmd( int (*function)(struct cfg_ctx *),
+				   struct cfg_ctx *ctx,
+				   const char *arg,
+				   enum drbd_cfg_stage stage);
 
 extern int version_code_kernel(void);
 extern int version_code_userland(void);
 extern void warn_on_version_mismatch(void);
+extern void maybe_exec_drbdadm_83(char **argv);
 extern void uc_node(enum usage_count_type type);
-extern int adm_create_md(struct d_resource* res ,const char* cmd);
 extern void convert_discard_opt(struct d_resource* res);
 extern void convert_after_option(struct d_resource* res);
 extern int have_ip(const char *af, const char *ip);
 
-/* See drbdadm_minor_table.c */
-extern int register_minor(int minor, const char *path);
-extern int unregister_minor(int minor);
-extern char *lookup_minor(int minor);
-
 enum pr_flags {
   NoneHAllowed  = 4,
-  IgnDiscardMyData = 8
+  PARSE_FOR_ADJUST = 8
 };
 enum pp_flags {
 	match_on_proxy = 1,
 };
+
+extern struct d_resource* parse_resource_for_adjust(struct cfg_ctx *ctx);
 extern struct d_resource* parse_resource(char*, enum pr_flags);
 extern void post_parse(struct d_resource *config, enum pp_flags);
 extern struct d_option *new_opt(char *name, char *value);
@@ -196,6 +274,12 @@
 extern void set_peer_in_resource(struct d_resource* res, int peer_required);
 extern void set_on_hosts_in_res(struct d_resource *res);
 extern void set_disk_in_res(struct d_resource *res);
+extern char *proxy_connection_name(struct d_resource *res);
+int parse_proxy_settings(struct d_resource *res, int check_proxy_token);
+/* conn_name is optional and mostly for compatibility with dcmd */
+int do_proxy_conn_up(struct cfg_ctx *ctx);
+int do_proxy_conn_down(struct cfg_ctx *ctx);
+int do_proxy_conn_plugins(struct cfg_ctx *ctx);
 
 extern char *config_file;
 extern char *config_save;
@@ -210,13 +294,19 @@
 extern int dry_run;
 extern int verbose;
 extern char* drbdsetup;
-extern char ss_buffer[255];
+extern char* drbd_proxy_ctl;
+extern char* drbdadm_83;
+extern char ss_buffer[1024];
 extern struct utsname nodeinfo;
-
-extern char* setup_opts[10];
 extern char* connect_to_host;
-extern int soi;
 
+struct setup_option {
+	bool explicit;
+	char *option;
+};
+struct setup_option *setup_options;
+
+extern void add_setup_option(bool explicit, char *option);
 
 /* ssprintf() places the result of the printf in the current stack
    frame and sets ptr to the resulting string. If the current stack
@@ -235,13 +325,14 @@
 */
 
 #define ssprintf(ptr,...) \
-  ptr=strcpy(alloca(snprintf(ss_buffer,255,##__VA_ARGS__)+1),ss_buffer)
+  ptr=strcpy(alloca(snprintf(ss_buffer,sizeof(ss_buffer),##__VA_ARGS__)+1),ss_buffer)
 
 /* CAUTION: arguments may not have side effects! */
 #define for_each_resource(res,tmp,config) \
 	for (res = (config); res && (tmp = res->next, 1); res = tmp)
 
-#endif
+#define for_each_volume(v_,volumes_) \
+	for (v_ = volumes_; v_; v_ = v_->next)
 
 #define APPEND(LIST,ITEM) ({		      \
   typeof((LIST)) _l = (LIST);		      \
@@ -255,3 +346,35 @@
   };					      \
   _l;					      \
 })
+
+#define INSERT_SORTED(LIST,ITEM,SORT) ({	\
+	typeof((LIST)) _l = (LIST);	\
+	typeof((ITEM)) _i = (ITEM);	\
+	typeof((ITEM)) _t, _p = NULL;	\
+	for (_t = _l; _t && _t->SORT <= _i->SORT; _p = _t, _t = _t->next); \
+	if (_p)				\
+		_p->next = _i;		\
+	else				\
+		_l = _i;		\
+	_i->next = _t;			\
+	_l;				\
+})
+
+#define SPLICE(LIST,ITEMS) ({		      \
+  typeof((LIST)) _l = (LIST);		      \
+  typeof((ITEMS)) _i = (ITEMS);		      \
+  typeof((ITEMS)) _t;			      \
+  if (_l == NULL) { _l = _i; }		      \
+  else {				      \
+    for (_t = _l; _t->next; _t = _t->next);   \
+    _t->next = _i;			      \
+  };					      \
+  _l;					      \
+})
+
+
+#define PARSER_CHECK_PROXY_KEYWORD (1)
+#define PARSER_STOP_IF_INVALID (2)
+
+#endif
+
diff -Nru drbd8-8.3.7/user/drbdadm_adjust.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_adjust.c
--- drbd8-8.3.7/user/drbdadm_adjust.c	2009-08-26 13:27:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_adjust.c	2012-02-02 14:09:14.000000000 +0000
@@ -23,6 +23,8 @@
  */
 
 #define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -32,13 +34,16 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <ctype.h>
+#include <errno.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdbool.h>
 
 #include "drbdadm.h"
 #include "drbdtool_common.h"
-
-extern FILE* yyin;
+#include "drbdadm_parser.h"
+#include "config_flags.h"
 
 /* drbdsetup show might complain that the device minor does
    not exist at all. Redirect stderr to /dev/null therefore.
@@ -82,46 +87,66 @@
 	return fdopen(pipes[0],"r");
 }
 
-/* option value equal? */
-static int ov_eq(char* val1, char* val2)
+static int is_equal(struct context_def *ctx, struct d_option *a, struct d_option *b)
 {
-	unsigned long long v1,v2;
+	struct field_def *field;
+
+	for (field = ctx->fields; field->name; field++) {
+		if (!strcmp(field->name, a->name))
+			return field->is_equal(field, a->value, b->value);
+	}
 
-	if(val1 == NULL && val2 == NULL) return 1;
-	if(val1 == NULL || val2 == NULL) return 0;
+	fprintf(stderr, "Internal error: option '%s' not known in this context\n", a->name);
+	abort();
+}
 
-	if(new_strtoll(val1,0,&v1) == MSE_OK &&
-	   new_strtoll(val2,0,&v2) == MSE_OK) return v1 == v2;
+static bool is_default(struct context_def *ctx, struct d_option *opt)
+{
+	struct field_def *field;
 
-	return !strcmp(val1,val2);
+	for (field = ctx->fields; field->name; field++) {
+		if (strcmp(field->name, opt->name))
+			continue;
+		return field->is_default(field, opt->value);
+	}
+	return false;
 }
 
-static int opts_equal(struct d_option* conf, struct d_option* running)
+static int opts_equal(struct context_def *ctx, struct d_option* conf, struct d_option* running)
 {
 	struct d_option* opt;
 
 	while(running) {
 		if((opt=find_opt(conf,running->name))) {
-			if(!ov_eq(running->value,opt->value)) {
-			/* printf("Value of '%s' differs: r=%s c=%s\n",
-			   opt->name,running->value,opt->value); */
+			if(!is_equal(ctx, running, opt)) {
+				if (verbose > 2)
+					fprintf(stderr, "Value of '%s' differs: r=%s c=%s\n",
+						opt->name,running->value,opt->value);
 				return 0;
 			}
+			if (verbose > 3)
+				fprintf(stderr, "Value of '%s' equal: r=%s c=%s\n",
+					opt->name,running->value,opt->value);
 			opt->mentioned=1;
 		} else {
-			if(!running->is_default) {
-				/*printf("Only in running config %s: %s\n",
-				  running->name,running->value);*/
+			if(!is_default(ctx, running)) {
+				if (verbose > 2)
+					fprintf(stderr, "Only in running config %s: %s\n",
+						running->name,running->value);
 				return 0;
 			}
+			if (verbose > 3)
+				fprintf(stderr, "Is default: '%s' equal: r=%s\n",
+					running->name,running->value);
 		}
 		running=running->next;
 	}
 
 	while(conf) {
-		if(conf->mentioned==0) {
-			/*printf("Only in config file %s: %s\n",
-			  conf->name,conf->value);*/
+		if(conf->mentioned==0 && !is_default(ctx, conf)) {
+			if (verbose > 2)
+				fprintf(stderr, "Only in config file %s: %s\n",
+					conf->name,conf->value);
 			return 0;
 		}
 		conf=conf->next;
@@ -154,35 +179,225 @@
 	return equal;
 }
 
-static int proto_equal(struct d_resource* conf, struct d_resource* running)
-{
-	if (conf->protocol == NULL && running->protocol == NULL) return 1;
-	if (conf->protocol == NULL || running->protocol == NULL) return 0;
-
-	return !strcmp(conf->protocol, running->protocol);
-}
-
 /* Are both internal, or are both not internal. */
 static int int_eq(char* m_conf, char* m_running)
 {
 	return !strcmp(m_conf,"internal") == !strcmp(m_running,"internal");
 }
 
-static int disk_equal(struct d_host_info* conf, struct d_host_info* running)
+static int disk_equal(struct d_volume *conf, struct d_volume *running)
 {
 	int eq = 1;
 
-	if (conf->disk == NULL && running->disk == NULL) return 1;
-	if (conf->disk == NULL || running->disk == NULL) return 0;
+	if (conf->disk == NULL && running->disk == NULL)
+		return 1;
+	if (conf->disk == NULL || running->disk == NULL)
+		return 0;
 
-	eq &= !strcmp(conf->disk,running->disk);
-	eq &= int_eq(conf->meta_disk,running->meta_disk);
-	if(!strcmp(conf->meta_disk,"internal")) return eq;
-	eq &= !strcmp(conf->meta_disk,running->meta_disk);
+	eq &= !strcmp(conf->disk, running->disk);
+	eq &= int_eq(conf->meta_disk, running->meta_disk);
+	if (!strcmp(conf->meta_disk, "internal"))
+		return eq;
+	eq &= !strcmp(conf->meta_disk, running->meta_disk);
 
 	return eq;
 }
 
+
+/* NULL terminated */
+static void find_option_in_resources(char *name,
+		struct d_option *list, struct d_option **opt, ...)
+{
+	va_list va;
+
+	va_start(va, opt);
+	/* We need to keep setting *opt to NULL, even if a list == NULL. */
+	while (list || opt) {
+		while (list) {
+			if (strcmp(list->name, name) == 0)
+				break;
+			list = list->next;
+		}
+
+		*opt = list;
+
+		list = va_arg(va, struct d_option*);
+		opt  = va_arg(va, struct d_option**);
+	}
+	va_end(va);
+}
+
+static int do_proxy_reconf(struct cfg_ctx *ctx)
+{
+	int rv;
+	char *argv[4] = { drbd_proxy_ctl, "-c", (char*)ctx->arg, NULL };
+
+	rv = m_system_ex(argv, SLEEPS_SHORT, ctx->res->name);
+	return rv;
+}
+
+#define MAX_PLUGINS (10)
+#define MAX_PLUGIN_NAME (16)
+
+/* The new name is appended to the alist. */
+int _is_plugin_in_list(char *string,
+		char slist[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		char alist[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		int list_len)
+{
+	int word_len, i;
+	char *copy;
+
+	for(word_len=0; string[word_len]; word_len++)
+		if (isspace(string[word_len]))
+			break;
+
+	if (word_len+1 >= MAX_PLUGIN_NAME) {
+		fprintf(stderr, "Wrong proxy plugin name %*.*s",
+				word_len, word_len, string);
+		exit(E_config_invalid);
+	}
+
+	copy = alist[list_len];
+	strncpy(copy, string, word_len);
+	copy[word_len] = 0;
+
+
+	for(i=0; i<list_len && *slist; i++) {
+		if (strcmp(slist[i], copy) == 0)
+			return 1;
+	}
+
+	/* Not found, insert into list. */
+	if (list_len >= MAX_PLUGINS) {
+		fprintf(stderr, "Too many proxy plugins.");
+		exit(E_config_invalid);
+	}
+
+	return 0;
+}
+
+
+static int proxy_reconf(struct cfg_ctx *ctx, struct d_resource *running)
+{
+	int reconn = 0;
+	struct d_resource *res = ctx->res;
+	struct d_option* res_o, *run_o;
+	unsigned long long v1, v2, minimum;
+	char *plugin_changes[MAX_PLUGINS], *cp, *conn_name;
+	/* It's less memory usage when we're storing char[]. malloc overhead for
+	 * the few bytes + pointers is much more. */
+	char p_res[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		 p_run[MAX_PLUGINS][MAX_PLUGIN_NAME];
+	int used, i, re_do;
+
+	reconn = 0;
+
+	if (!running)
+		goto redo_whole_conn;
+
+	find_option_in_resources("memlimit",
+			res->proxy_options, &res_o,
+			running->proxy_options, &run_o,
+			NULL, NULL);
+	v1 = res_o ? m_strtoll(res_o->value, 1) : 0;
+	v2 = run_o ? m_strtoll(run_o->value, 1) : 0;
+	minimum = v1 < v2 ? v1 : v2;
+	/* We allow an є [epsilon] of 2%, so that small (rounding) deviations do
+	 * not cause the connection to be re-established. */
+	if (res_o &&
+			(!run_o || abs(v1-v2)/(float)minimum > 0.02))
+	{
+redo_whole_conn:
+		/* As the memory is in use while the connection is allocated we have to
+		 * completely destroy and rebuild the connection. */
+
+		schedule_deferred_cmd( do_proxy_conn_down, ctx, NULL, CFG_NET_PREREQ);
+		schedule_deferred_cmd( do_proxy_conn_up, ctx, NULL, CFG_NET_PREREQ);
+		schedule_deferred_cmd( do_proxy_conn_plugins, ctx, NULL, CFG_NET_PREREQ);
+
+		/* With connection cleanup and reopen everything is rebuild anyway, and
+		 * DRBD will get a reconnect too.  */
+		return 0;
+	}
+
+
+	res_o = res->proxy_plugins;
+	run_o = running->proxy_plugins;
+	used = 0;
+	conn_name = proxy_connection_name(res);
+	for(i=0; i<MAX_PLUGINS; i++)
+	{
+		if (used >= sizeof(plugin_changes)-1) {
+			fprintf(stderr, "Too many proxy plugin changes");
+			exit(E_config_invalid);
+		}
+		/* Now we can be sure that we can store another pointer. */
+
+		if (!res_o) {
+			if (run_o) {
+				/* More plugins running than configured - just stop here. */
+				m_asprintf(&cp, "set plugin %s %d end", conn_name, i);
+				plugin_changes[used++] = cp;
+			}
+			else {
+				/* Both at the end? ok, quit loop */
+			}
+			break;
+		}
+
+		/* res_o != NULL. */
+
+		if (!run_o) {
+			p_run[i][0] = 0;
+			if (_is_plugin_in_list(res_o->name, p_run, p_res, i)) {
+				/* Current plugin was already active, just at another position.
+				 * Redo the whole connection. */
+				goto redo_whole_conn;
+			}
+
+			/* More configured than running - just add it, if it's not already
+			 * somewhere else. */
+			m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
+			plugin_changes[used++] = cp;
+		} else {
+			/* If we get here, both lists have been filled in parallel, so we
+			 * can simply use the common counter. */
+			re_do = _is_plugin_in_list(res_o->name, p_run, p_res, i) ||
+				_is_plugin_in_list(run_o->name, p_res, p_run, i);
+			if (re_do) {
+				/* Plugin(s) were moved, not simple reconfigured.
+				 * Re-do the whole connection. */
+				goto redo_whole_conn;
+			}
+
+			/* TODO: We don't (yet) account for possible different ordering of
+			 * the parameters to the plugin.
+			 *    plugin A 1 B 2
+			 * should be treated as equal to
+			 *    plugin B 2 A 1. */
+			if (strcmp(run_o->name, res_o->name) != 0) {
+				/* Either a different plugin, or just different settings
+				 * - plugin can be overwritten.  */
+				m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
+				plugin_changes[used++] = cp;
+			}
+		}
+
+
+		if (res_o)
+			res_o = res_o->next;
+		if (run_o)
+			run_o = run_o->next;
+	}
+
+	/* change only a few plugin settings. */
+	for(i=0; i<used; i++)
+		schedule_deferred_cmd(do_proxy_reconf, ctx, plugin_changes[i], CFG_NET);
+
+	return reconn;
+}
+
 int need_trigger_kobj_change(struct d_resource *res)
 {
 	struct stat sbuf;
@@ -205,7 +420,7 @@
 		return 1;
 	if (major(sbuf.st_rdev) != DRBD_MAJOR)
 		return 1;
-	if (minor(sbuf.st_rdev) != res->me->device_minor)
+	if (minor(sbuf.st_rdev) != res->me->volumes->device_minor)
 		return 1;
 
 	/* Link exists, and is expected block major:minor.
@@ -213,68 +428,302 @@
 	return 0;
 }
 
+/* moves option to the head of the single linked option list,
+ * and marks it as to be skiped for "adjust only" commands
+ * like disk-options see e.g. adm_attach_and_or_disk_options().
+ */
+static void move_opt_to_head(struct d_option **head, struct d_option *o)
+{
+	struct d_option *t;
+	if (!o)
+		return;
+	o->adj_skip = 1;
+	if (o == *head)
+		return;
+
+	for (t = *head; t->next != o; t = t->next)
+		;
+	t->next = o->next;
+	o->next = *head;
+	*head = o;
+}
+
+void compare_max_bio_bvecs(struct d_volume *conf, struct d_volume *kern)
+{
+	struct d_option *c = find_opt(conf->disk_options, "max-bio-bvecs");
+	struct d_option *k = find_opt(kern->disk_options, "max-bio-bvecs");
+
+	/* move to front of list, so we can skip it
+	 * for the following opts_equal */
+	move_opt_to_head(&conf->disk_options, c);
+	move_opt_to_head(&kern->disk_options, k);
+
+	/* simplify logic below, would otherwise have to
+	 * (!x || is_default(x) all the time. */
+	if (k && is_default(&disk_options_ctx, k))
+		k = NULL;
+
+	/* there was a bvec restriction set,
+	 * but it is no longer in config, or vice versa */
+	if (!k != !c)
+		conf->adj_attach = 1;
+
+	/* restrictions differ */
+	if (k && c && !is_equal(&disk_options_ctx, k, c))
+		conf->adj_attach = 1;
+}
+
+/* similar to compare_max_bio_bvecs above */
+void compare_size(struct d_volume *conf, struct d_volume *kern)
+{
+	struct d_option *c = find_opt(conf->disk_options, "size");
+	struct d_option *k = find_opt(kern->disk_options, "size");
+
+	move_opt_to_head(&conf->disk_options, c);
+	move_opt_to_head(&kern->disk_options, k);
+
+	if (k && is_default(&disk_options_ctx, k))
+		k = NULL;
+	if (!k != !c)
+		conf->adj_resize = 1;
+	if (k && c && !is_equal(&disk_options_ctx, c, k))
+		conf->adj_resize = 1;
+}
+
+void compare_volume(struct d_volume *conf, struct d_volume *kern)
+{
+	/* Special-case "max-bio-bvecs", we do not allow to change that
+	 * while attached, yet.
+	 * Also special case "size", we need to issue a resize command to change that.
+	 * Move both options to the head of the disk_options list,
+	 * so we can easily skip them in the opts_equal, later.
+	 */
+	struct d_option *c, *k;
+
+	/* do we need to do a full attach,
+	 * potentially with a detach first? */
+	conf->adj_attach = (conf->device_minor != kern->device_minor)
+			|| !disk_equal(conf, kern);
+
+	/* do we need to do a full (detach/)attach,
+	 * because max_bio_bvec setting differs? */
+	compare_max_bio_bvecs(conf, kern);
+
+	/* do we need to resize? */
+	compare_size(conf, kern);
+
+	/* skip these two options (if present) for the opts_equal below.
+	 * These have been move_opt_to_head()ed before already. */
+	k = kern->disk_options;
+	while (k && (!strcmp(k->name, "size") || !strcmp(k->name, "max-bio-bvecs")))
+		k = k->next;
+	c = conf->disk_options;
+	while (c && (!strcmp(c->name, "size") || !strcmp(c->name, "max-bio-bvecs")))
+		c = c->next;
+
+	/* is it sufficient to only adjust the disk options? */
+	if (!conf->adj_attach)
+		conf->adj_disk_opts = !opts_equal(&disk_options_ctx, c, k);
+
+	if (conf->adj_attach && kern->disk)
+		conf->adj_detach = 1;
+}
+
+struct d_volume *new_to_be_deleted_minor_from_template(struct d_volume *kern)
+{
+	/* need to delete it from kernel.
+	 * Create a minimal volume,
+	 * and flag it as "del_minor". */
+	struct d_volume *conf = calloc(1, sizeof(*conf));
+	conf->vnr = kern->vnr;
+	/* conf->device: no need */
+	conf->device_minor = kern->device_minor;
+	conf->disk = strdup(kern->disk);
+	conf->meta_disk = strdup(kern->meta_disk);
+	conf->meta_index = strdup(kern->meta_index);
+
+	conf->adj_detach = 1;
+	conf->adj_del_minor = 1;
+	return conf;
+}
+
+#define ASSERT(x) do { if (!(x)) {				\
+	fprintf(stderr, "%s:%u:%s: ASSERT(%s) failed.\n",	\
+		__FILE__ , __LINE__ , __func__ , #x );		\
+	abort(); }						\
+	} while (0)
+
+/* Both conf and kern are single linked lists
+ * supposed to be ordered by ->vnr;
+ * We may need to conjure dummy volumes to issue "del-minor" on,
+ * and insert these into the conf list.
+ * The resulting new conf list head is returned.
+ */
+struct d_volume *compare_volumes(struct d_volume *conf, struct d_volume *kern)
+{
+	struct d_volume *to_be_deleted = NULL;
+	struct d_volume *conf_head = conf;
+	while (conf || kern) {
+		if (kern && (conf == NULL || kern->vnr < conf->vnr)) {
+			to_be_deleted = INSERT_SORTED(to_be_deleted,
+					new_to_be_deleted_minor_from_template(kern),
+					vnr);
+			kern = kern->next;
+		} else if (conf && (kern == NULL || kern->vnr > conf->vnr)) {
+			conf->adj_add_minor = 1;
+			conf->adj_attach = 1;
+			conf = conf->next;
+		} else {
+			ASSERT(conf);
+			ASSERT(kern);
+			ASSERT(conf->vnr == kern->vnr);
+
+			compare_volume(conf, kern);
+			conf = conf->next;
+			kern = kern->next;
+		}
+	}
+	for_each_volume(conf, to_be_deleted)
+		conf_head = INSERT_SORTED(conf_head, conf, vnr);
+	return conf_head;
+}
+
 /*
  * CAUTION this modifies global static char * config_file!
  */
-int adm_adjust(struct d_resource* res,char* unused __attribute((unused)))
+int adm_adjust(struct cfg_ctx *ctx)
 {
 	char* argv[20];
-	int pid,argc=0;
+	int pid,argc, i;
 	struct d_resource* running;
-	int do_attach=0,do_connect=0,do_syncer=0;
-	int have_disk=0,have_net=0;
-	char config_file_dummy[250];
+	struct d_volume *vol;
 
-	argv[argc++]=drbdsetup;
-	argv[argc++]=res->me->device;
-	argv[argc++]="show";
-	argv[argc++]=0;
+	/* necessary per resource actions */
+	int do_res_options = 0;
+
+	/* necessary per connection actions
+	 * (currently we still only have one connection per resource */
+	int do_net_options = 0;
+	int do_disconnect = 0;
+	int do_connect = 0;
+
+	/* necessary per volume actions are flagged
+	 * in the vol->adj_* members. */
+
+	int can_do_proxy = 1;
+	char config_file_dummy[250];
+	char show_conn[128];
+	char *resource_name;
 
 	/* disable check_uniq, so it won't interfere
 	 * with parsing of drbdsetup show output */
 	config_valid = 2;
 
-	yyin = m_popen(&pid,argv);
+
+	/* setup error reporting context for the parsing routines */
 	line = 1;
-	sprintf(config_file_dummy,"drbdsetup %u show", res->me->device_minor);
+	sprintf(config_file_dummy,"drbdsetup show %s", ctx->res->name);
 	config_file = config_file_dummy;
-	running = parse_resource(config_file_dummy, IgnDiscardMyData);
+
+	argc=0;
+	argv[argc++]=drbdsetup;
+	argv[argc++]="show";
+	ssprintf(argv[argc++], "%s", ctx->res->name);
+	argv[argc++]=0;
+
+	/* actually parse drbdsetup show output */
+	yyin = m_popen(&pid,argv);
+	running = parse_resource_for_adjust(ctx);
 	fclose(yyin);
-	waitpid(pid,0,0);
-	post_parse(running, 0);
-	set_peer_in_resource(running, 0);
-
-	do_attach  = !opts_equal(res->disk_options, running->disk_options);
-	if(running->me) {
-		do_attach |= (res->me->device_minor != running->me->device_minor);
-		do_attach |= !disk_equal(res->me, running->me);
-		have_disk = (running->me->disk != NULL);
-	} else  do_attach |= 1;
-
-	do_connect  = !opts_equal(res->net_options, running->net_options);
-	do_connect |= !addr_equal(res,running);
-	do_connect |= !proto_equal(res,running);
-	have_net = (running->protocol != NULL);
-
-	do_syncer = !opts_equal(res->sync_options, running->sync_options);
-
-	/* Special case: nothing changed, but the resource name.
-	 * Trigger a no-op syncer request, which will cause a KOBJ_CHANGE
-	 * to be broadcast, so udev may pick up the resource name change
-	 * and update its symlinks. */
-	if (!(do_attach || do_syncer || do_connect))
-		do_syncer = need_trigger_kobj_change(running);
-
-	if(do_attach) {
-		if(have_disk) schedule_dcmd(adm_generic_s,res,"detach",0);
-		schedule_dcmd(adm_attach,res,"attach",0);
-	}
-	if(do_syncer)  schedule_dcmd(adm_syncer,res,"syncer",1);
-	if(do_connect) {
-		if (have_net && res->peer)
-			schedule_dcmd(adm_generic_s,res,"disconnect",0);
-		schedule_dcmd(adm_connect,res,"connect",2);
+	waitpid(pid, 0, 0);
+
+	if (running) {
+		/* Sets "me" and "peer" pointer */
+		post_parse(running, 0);
+		set_peer_in_resource(running, 0);
+	}
+
+
+	/* Parse proxy settings, if this host has a proxy definition.
+	 * FIXME what about "zombie" proxy settings, if we remove proxy
+	 * settings from the config file without prior proxy-down, this won't
+	 * clean them from the proxy. */
+	if (ctx->res->me->proxy) {
+		line = 1;
+		resource_name = proxy_connection_name(ctx->res);
+		i=snprintf(show_conn, sizeof(show_conn), "show proxy-settings %s", resource_name);
+		if (i>= sizeof(show_conn)-1) {
+			fprintf(stderr,"connection name too long");
+			exit(E_thinko);
+		}
+		sprintf(config_file_dummy,"drbd-proxy-ctl -c '%s'", show_conn);
+		config_file = config_file_dummy;
+
+		argc=0;
+		argv[argc++]=drbd_proxy_ctl;
+		argv[argc++]="-c";
+		argv[argc++]=show_conn;
+		argv[argc++]=0;
+
+		/* actually parse "drbd-proxy-ctl show" output */
+		yyin = m_popen(&pid,argv);
+		can_do_proxy = !parse_proxy_settings(running,
+				PARSER_CHECK_PROXY_KEYWORD | PARSER_STOP_IF_INVALID);
+		fclose(yyin);
+
+		waitpid(pid,0,0);
+	}
+
+	ctx->res->me->volumes = compare_volumes(ctx->res->me->volumes,
+			running ? running->me->volumes : NULL);
+
+	if (running) {
+		do_connect = !addr_equal(ctx->res,running);
+		do_net_options = !opts_equal(&net_options_ctx, ctx->res->net_options, running->net_options);
+		do_res_options = !opts_equal(&resource_options_cmd_ctx, ctx->res->res_options, running->res_options);
+	} else {
+		do_res_options = 0;
+		do_connect = 1;
+		schedule_deferred_cmd(adm_new_resource, ctx, "new-resource", CFG_PREREQ);
 	}
 
+	if (ctx->res->me->proxy && can_do_proxy)
+		do_connect |= proxy_reconf(ctx, running);
+
+	if (do_connect && running)
+		do_disconnect = running->net_options != NULL;
+
+	if (do_res_options)
+		schedule_deferred_cmd(adm_set_default_res_options, ctx, "resource-options", CFG_RESOURCE);
+
+	/* do we need to attach,
+	 * do we need to detach first,
+	 * or is this just some attribute change? */
+	for_each_volume(vol, ctx->res->me->volumes) {
+		struct cfg_ctx tmp_ctx = { .res = ctx->res, .vol = vol };
+		if (vol->adj_detach)
+			schedule_deferred_cmd(adm_generic_s, &tmp_ctx, "detach", CFG_PREREQ);
+		if (vol->adj_del_minor)
+			schedule_deferred_cmd(adm_generic_s, &tmp_ctx, "del-minor", CFG_PREREQ);
+		if (vol->adj_add_minor)
+			schedule_deferred_cmd(adm_new_minor, &tmp_ctx, "new-minor", CFG_DISK_PREREQ);
+		if (vol->adj_attach)
+			schedule_deferred_cmd(adm_attach, &tmp_ctx, "attach", CFG_DISK);
+		if (vol->adj_disk_opts)
+			schedule_deferred_cmd(adm_set_default_disk_options, &tmp_ctx, "disk-options", CFG_DISK);
+		if (vol->adj_resize)
+			schedule_deferred_cmd(adm_resize, &tmp_ctx, "resize", CFG_DISK);
+	}
+
+	if (do_connect) {
+		if (do_disconnect && ctx->res->peer)
+			schedule_deferred_cmd(adm_disconnect, ctx, "disconnect", CFG_NET_PREREQ);
+		schedule_deferred_cmd(adm_connect, ctx, "connect", CFG_NET);
+		do_net_options = 0;
+	}
+
+	if (do_net_options)
+		schedule_deferred_cmd(adm_set_default_net_options, ctx, "net-options", CFG_NET);
+
 	return 0;
 }
diff -Nru drbd8-8.3.7/user/drbdadm_main.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_main.c
--- drbd8-8.3.7/user/drbdadm_main.c	2010-01-07 09:09:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_main.c	2012-02-02 14:09:14.000000000 +0000
@@ -24,19 +24,24 @@
  */
 
 #define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
 
 #include <stdio.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdlib.h>
 #include <search.h>
+#include <assert.h>
 
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <sys/poll.h>
 #include <sys/socket.h>
 #include <sys/ioctl.h>
+#include <sys/stat.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <fcntl.h>
@@ -45,8 +50,11 @@
 #include <getopt.h>
 #include <signal.h>
 #include <time.h>
+#include "linux/drbd_limits.h"
 #include "drbdtool_common.h"
 #include "drbdadm.h"
+#include "registry.h"
+#include "config_flags.h"
 
 #define MAX_ARGS 40
 
@@ -55,8 +63,8 @@
 #define BFMT  "%s;\n"
 #define IPV4FMT "%-16s %s %s:%s;\n"
 #define IPV6FMT "%-16s %s [%s]:%s;\n"
-#define MDISK "%-16s %s [%s];\n"
-#define FMDISK "%-16s %s;\n"
+#define MDISK "%-16s %s;\n"
+#define MDISKI "%-16s %s [%s];\n"
 #define printI(fmt, args... ) printf("%*s" fmt,INDENT_WIDTH * indent,"" , ## args )
 #define printA(name, val ) \
 	printf("%*s%*s %3s;\n", \
@@ -68,7 +76,7 @@
 
 struct adm_cmd {
 	const char *name;
-	int (*function) (struct d_resource *, const char *);
+	int (*function) (struct cfg_ctx *);
 	/* which level this command is for.
 	 * 0: don't show this command, ever
 	 * 1: normal administrative commands, shown in normal help
@@ -79,6 +87,10 @@
 	unsigned int show_in_usage:3;
 	/* if set, command requires an explicit resource name */
 	unsigned int res_name_required:1;
+	/* if set, command requires an explicit volume number as well */
+	unsigned int vol_id_required:1;
+	/* most commands need to iterate over all volumes in the resource */
+	unsigned int iterate_volumes:1;
 	/* error out if the ip specified is not available/active now */
 	unsigned int verify_ips:1;
 	/* if set, use the "cache" in /var/lib/drbd to figure out
@@ -90,16 +102,16 @@
 	unsigned int is_proxy_cmd:1;
 	unsigned int uc_dialog:1; /* May show usage count dialog */
 	unsigned int test_config:1; /* Allow -t option */
+	const struct context_def *drbdsetup_ctx;
 };
 
 struct deferred_cmd {
-	int (*function) (struct d_resource *, const char *);
-	const char *arg;
-	struct d_resource *res;
+	int (*function) (struct cfg_ctx *);
+	struct cfg_ctx ctx;
 	struct deferred_cmd *next;
 };
 
-struct option admopt[] = {
+struct option general_admopt[] = {
 	{"stacked", no_argument, 0, 'S'},
 	{"dry-run", no_argument, 0, 'd'},
 	{"verbose", no_argument, 0, 'v'},
@@ -109,57 +121,60 @@
 	{"drbdmeta", required_argument, 0, 'm'},
 	{"drbd-proxy-ctl", required_argument, 0, 'p'},
 	{"sh-varname", required_argument, 0, 'n'},
-	{"force", no_argument, 0, 'f'},
 	{"peer", required_argument, 0, 'P'},
 	{"version", no_argument, 0, 'V'},
+	{"setup-option", required_argument, 0, 'W'},
+	{"help", no_argument, 0, 'h'},
 	{0, 0, 0, 0}
 };
+struct option *admopt = general_admopt;
 
 extern int my_parse();
 extern int yydebug;
 extern FILE *yyin;
 
-int adm_attach(struct d_resource *, const char *);
-int adm_connect(struct d_resource *, const char *);
-int adm_generic_s(struct d_resource *, const char *);
-int adm_status_xml(struct d_resource *, const char *);
-int adm_generic_l(struct d_resource *, const char *);
-int adm_resize(struct d_resource *, const char *);
-int adm_syncer(struct d_resource *, const char *);
-static int adm_up(struct d_resource *, const char *);
-extern int adm_adjust(struct d_resource *, const char *);
-static int adm_dump(struct d_resource *, const char *);
-static int adm_dump_xml(struct d_resource *, const char *);
-static int adm_wait_c(struct d_resource *, const char *);
-static int adm_wait_ci(struct d_resource *, const char *);
-static int adm_proxy_up(struct d_resource *, const char *);
-static int adm_proxy_down(struct d_resource *, const char *);
-static int sh_nop(struct d_resource *, const char *);
-static int sh_resources(struct d_resource *, const char *);
-static int sh_resource(struct d_resource *, const char *);
-static int sh_mod_parms(struct d_resource *, const char *);
-static int sh_dev(struct d_resource *, const char *);
-static int sh_udev(struct d_resource *, const char *);
-static int sh_minor(struct d_resource *, const char *);
-static int sh_ip(struct d_resource *, const char *);
-static int sh_lres(struct d_resource *, const char *);
-static int sh_ll_dev(struct d_resource *, const char *);
-static int sh_md_dev(struct d_resource *, const char *);
-static int sh_md_idx(struct d_resource *, const char *);
-static int sh_b_pri(struct d_resource *, const char *);
-static int sh_status(struct d_resource *, const char *);
-static int admm_generic(struct d_resource *, const char *);
-static int adm_khelper(struct d_resource *, const char *);
-static int adm_generic_b(struct d_resource *, const char *);
-static int hidden_cmds(struct d_resource *, const char *);
-static int adm_outdate(struct d_resource *, const char *);
+
+static int adm_generic_l(struct cfg_ctx *);
+static int adm_up(struct cfg_ctx *);
+static int adm_dump(struct cfg_ctx *);
+static int adm_dump_xml(struct cfg_ctx *);
+static int adm_wait_c(struct cfg_ctx *);
+static int adm_wait_ci(struct cfg_ctx *);
+static int adm_proxy_up(struct cfg_ctx *);
+static int adm_proxy_down(struct cfg_ctx *);
+static int sh_nop(struct cfg_ctx *);
+static int sh_resources(struct cfg_ctx *);
+static int sh_resource(struct cfg_ctx *);
+static int sh_mod_parms(struct cfg_ctx *);
+static int sh_dev(struct cfg_ctx *);
+static int sh_udev(struct cfg_ctx *);
+static int sh_minor(struct cfg_ctx *);
+static int sh_ip(struct cfg_ctx *);
+static int sh_lres(struct cfg_ctx *);
+static int sh_ll_dev(struct cfg_ctx *);
+static int sh_md_dev(struct cfg_ctx *);
+static int sh_md_idx(struct cfg_ctx *);
+static int sh_b_pri(struct cfg_ctx *);
+static int sh_status(struct cfg_ctx *);
+static int admm_generic(struct cfg_ctx *);
+static int adm_khelper(struct cfg_ctx *);
+static int adm_generic_b(struct cfg_ctx *);
+static int hidden_cmds(struct cfg_ctx *);
+static int adm_outdate(struct cfg_ctx *);
+static int adm_chk_resize(struct cfg_ctx *);
+static void dump_options(char *name, struct d_option *opts);
+
+
+struct d_volume *volume_by_vnr(struct d_volume *volumes, int vnr);
+struct d_resource *res_by_name(const char *name);
+int ctx_by_name(struct cfg_ctx *ctx, const char *id);
+int ctx_set_implicit_volume(struct cfg_ctx *ctx);
 
 static char *get_opt_val(struct d_option *, const char *, char *);
-static void register_config_file(struct d_resource *res, const char *cfname);
 
 static struct ifreq *get_ifreq();
 
-char ss_buffer[255];
+char ss_buffer[1024];
 struct utsname nodeinfo;
 int line = 1;
 int fline;
@@ -172,31 +187,60 @@
 struct d_resource *common = NULL;
 struct ifreq *ifreq_list = NULL;
 int is_drbd_top;
-int nr_resources;
-int nr_stacked;
-int nr_normal;
-int nr_ignore;
+enum { NORMAL, STACKED, IGNORED, __N_RESOURCE_TYPES };
+int nr_resources[__N_RESOURCE_TYPES];
+int nr_volumes[__N_RESOURCE_TYPES];
 int highest_minor;
+int number_of_minors = 0;
 int config_from_stdin = 0;
 int config_valid = 1;
 int no_tty;
 int dry_run = 0;
 int verbose = 0;
+int adjust_with_progress = 0;
+bool help;
 int do_verify_ips = 0;
-int do_register_minor = 1;
+int do_register = 1;
 /* whether drbdadm was called with "all" instead of resource name(s) */
 int all_resources = 0;
 char *drbdsetup = NULL;
 char *drbdmeta = NULL;
+char *drbdadm_83 = NULL;
 char *drbd_proxy_ctl;
 char *sh_varname = NULL;
-char *setup_opts[10];
+struct setup_option *setup_options;
+
+
 char *connect_to_host = NULL;
-int soi = 0;
 volatile int alarm_raised;
 
-struct deferred_cmd *deferred_cmds[3] = { NULL, NULL, NULL };
-struct deferred_cmd *deferred_cmds_tail[3] = { NULL, NULL, NULL };
+struct deferred_cmd *deferred_cmds[__CFG_LAST] = { NULL, };
+struct deferred_cmd *deferred_cmds_tail[__CFG_LAST] = { NULL, };
+
+void add_setup_option(bool explicit, char *option)
+{
+	int n = 0;
+	if (setup_options) {
+		while (setup_options[n].option)
+			n++;
+	}
+
+	setup_options = realloc(setup_options, (n + 2) * sizeof(*setup_options));
+	if (!setup_options) {
+		/* ... */
+	}
+	setup_options[n].explicit = explicit;
+	setup_options[n].option = option;
+	n++;
+	setup_options[n].option = NULL;
+}
+
+int adm_adjust_wp(struct cfg_ctx *ctx)
+{
+	if (!verbose && !dry_run)
+		adjust_with_progress = 1;
+	return adm_adjust(ctx);
+}
 
 /* DRBD adm_cmd flags shortcuts,
  * to avoid merge conflicts and unreadable diffs
@@ -205,12 +249,27 @@
 #define DRBD_acf1_default		\
 	.show_in_usage = 1,		\
 	.res_name_required = 1,		\
+	.iterate_volumes = 1,		\
 	.verify_ips = 0,		\
 	.uc_dialog = 1,			\
 
+#define DRBD_acf1_resname		\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.uc_dialog = 1,			\
+
 #define DRBD_acf1_connect		\
 	.show_in_usage = 1,		\
 	.res_name_required = 1,		\
+	.iterate_volumes = 0,		\
+	.verify_ips = 1,		\
+	.need_peer = 1,			\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf1_up			\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.iterate_volumes = 1,		\
 	.verify_ips = 1,		\
 	.need_peer = 1,			\
 	.uc_dialog = 1,			\
@@ -218,18 +277,38 @@
 #define DRBD_acf1_defnet		\
 	.show_in_usage = 1,		\
 	.res_name_required = 1,		\
+	.iterate_volumes = 1,		\
 	.verify_ips = 1,		\
 	.uc_dialog = 1,			\
 
 #define DRBD_acf3_handler		\
 	.show_in_usage = 3,		\
 	.res_name_required = 1,		\
+	.iterate_volumes = 0,		\
+	.vol_id_required = 1,		\
+	.verify_ips = 0,		\
+	.use_cached_config_file = 1,	\
+
+#define DRBD_acf3_res_handler		\
+	.show_in_usage = 3,		\
+	.res_name_required = 1,		\
+	.iterate_volumes = 0,		\
+	.vol_id_required = 0,		\
 	.verify_ips = 0,		\
 	.use_cached_config_file = 1,	\
 
 #define DRBD_acf4_advanced		\
 	.show_in_usage = 4,		\
 	.res_name_required = 1,		\
+	.iterate_volumes = 1,		\
+	.verify_ips = 0,		\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf4_advanced_need_vol	\
+	.show_in_usage = 4,		\
+	.res_name_required = 1,		\
+	.iterate_volumes = 0,		\
+	.vol_id_required = 1,		\
 	.verify_ips = 0,		\
 	.uc_dialog = 1,			\
 
@@ -242,6 +321,13 @@
 
 #define DRBD_acf2_shell			\
 	.show_in_usage = 2,		\
+	.iterate_volumes = 1,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+
+#define DRBD_acf2_sh_resname		\
+	.show_in_usage = 2,		\
+	.iterate_volumes = 0,		\
 	.res_name_required = 1,		\
 	.verify_ips = 0,		\
 
@@ -272,27 +358,37 @@
 	 *  - handler
 	 *  - advanced
 	 ***/
-	{"attach", adm_attach, DRBD_acf1_default},
-	{"detach", adm_generic_l, DRBD_acf1_default},
-	{"connect", adm_connect, DRBD_acf1_connect},
-	{"disconnect", adm_generic_s, DRBD_acf1_default},
-	{"up", adm_up, DRBD_acf1_connect},
-	{"down", adm_generic_l, DRBD_acf1_default},
-	{"primary", adm_generic_l, DRBD_acf1_default},
+	{"attach", adm_attach, DRBD_acf1_default
+	 .drbdsetup_ctx = &attach_cmd_ctx, },
+	{"disk-options", adm_disk_options, DRBD_acf1_default
+	 .drbdsetup_ctx = &disk_options_ctx, },
+	{"detach", adm_generic_l, DRBD_acf1_default
+	 .drbdsetup_ctx = &detach_cmd_ctx, },
+	{"connect", adm_connect, DRBD_acf1_connect
+	 .drbdsetup_ctx = &connect_cmd_ctx, },
+	{"net-options", adm_net_options, DRBD_acf1_connect
+	 .drbdsetup_ctx = &net_options_ctx, },
+	{"disconnect", adm_disconnect, DRBD_acf1_resname
+	 .drbdsetup_ctx = &disconnect_cmd_ctx, },
+	{"up", adm_up, DRBD_acf1_up},
+	{"resource-options", adm_res_options, DRBD_acf1_resname
+	 .drbdsetup_ctx = &resource_options_cmd_ctx, },
+	{"down", adm_generic_l, DRBD_acf1_resname},
+	{"primary", adm_generic_l, DRBD_acf1_default
+	 .drbdsetup_ctx = &primary_cmd_ctx, },
 	{"secondary", adm_generic_l, DRBD_acf1_default},
 	{"invalidate", adm_generic_b, DRBD_acf1_default},
 	{"invalidate-remote", adm_generic_l, DRBD_acf1_defnet},
 	{"outdate", adm_outdate, DRBD_acf1_default},
 	{"resize", adm_resize, DRBD_acf1_defnet},
-	{"syncer", adm_syncer, DRBD_acf1_defnet},
 	{"verify", adm_generic_s, DRBD_acf1_defnet},
 	{"pause-sync", adm_generic_s, DRBD_acf1_defnet},
 	{"resume-sync", adm_generic_s, DRBD_acf1_defnet},
 	{"adjust", adm_adjust, DRBD_acf1_connect},
+	{"adjust-with-progress", adm_adjust_wp, DRBD_acf1_connect},
 	{"wait-connect", adm_wait_c, DRBD_acf1_defnet},
 	{"wait-con-int", adm_wait_ci,
 	 .show_in_usage = 1,.verify_ips = 1,},
-	{"status", adm_status_xml, DRBD_acf2_gen_shell},
 	{"role", adm_generic_s, DRBD_acf1_default},
 	{"cstate", adm_generic_s, DRBD_acf1_default},
 	{"dstate", adm_generic_b, DRBD_acf1_default},
@@ -305,14 +401,16 @@
 	{"get-gi", adm_generic_b, DRBD_acf1_default},
 	{"dump-md", admm_generic, DRBD_acf1_default},
 	{"wipe-md", admm_generic, DRBD_acf1_default},
+	{"apply-al", admm_generic, DRBD_acf1_default},
+
 	{"hidden-commands", hidden_cmds,.show_in_usage = 1,},
 
 	{"sh-nop", sh_nop, DRBD_acf2_gen_shell .uc_dialog = 1, .test_config = 1},
 	{"sh-resources", sh_resources, DRBD_acf2_gen_shell},
-	{"sh-resource", sh_resource, DRBD_acf2_shell},
+	{"sh-resource", sh_resource, DRBD_acf2_sh_resname},
 	{"sh-mod-parms", sh_mod_parms, DRBD_acf2_gen_shell},
 	{"sh-dev", sh_dev, DRBD_acf2_shell},
-	{"sh-udev", sh_udev, DRBD_acf2_hook},
+	{"sh-udev", sh_udev, .vol_id_required = 1, DRBD_acf2_hook},
 	{"sh-minor", sh_minor, DRBD_acf2_shell},
 	{"sh-ll-dev", sh_ll_dev, DRBD_acf2_shell},
 	{"sh-md-dev", sh_md_dev, DRBD_acf2_shell},
@@ -325,25 +423,33 @@
 	{"proxy-up", adm_proxy_up, DRBD_acf2_proxy},
 	{"proxy-down", adm_proxy_down, DRBD_acf2_proxy},
 
+	{"new-resource", adm_new_resource, DRBD_acf2_sh_resname},
+	{"sh-new-minor", adm_new_minor, DRBD_acf4_advanced},
+
 	{"before-resync-target", adm_khelper, DRBD_acf3_handler},
 	{"after-resync-target", adm_khelper, DRBD_acf3_handler},
+	{"before-resync-source", adm_khelper, DRBD_acf3_handler},
 	{"pri-on-incon-degr", adm_khelper, DRBD_acf3_handler},
 	{"pri-lost-after-sb", adm_khelper, DRBD_acf3_handler},
-	{"fence-peer", adm_khelper, DRBD_acf3_handler},
+	{"fence-peer", adm_khelper, DRBD_acf3_res_handler},
 	{"local-io-error", adm_khelper, DRBD_acf3_handler},
 	{"pri-lost", adm_khelper, DRBD_acf3_handler},
+	{"initial-split-brain", adm_khelper, DRBD_acf3_handler},
 	{"split-brain", adm_khelper, DRBD_acf3_handler},
 	{"out-of-sync", adm_khelper, DRBD_acf3_handler},
 
 	{"suspend-io", adm_generic_s, DRBD_acf4_advanced},
 	{"resume-io", adm_generic_s, DRBD_acf4_advanced},
-	{"set-gi", admm_generic, DRBD_acf4_advanced},
-	{"new-current-uuid", adm_generic_s, DRBD_acf4_advanced},
+	{"set-gi", admm_generic, DRBD_acf4_advanced_need_vol},
+	{"new-current-uuid", adm_generic_s, DRBD_acf4_advanced_need_vol
+	 .drbdsetup_ctx = &new_current_uuid_cmd_ctx, },
+	{"check-resize", adm_chk_resize, DRBD_acf4_advanced},
 };
 
 
-void schedule_dcmd(int (*function) (struct d_resource *, const char *),
-		   struct d_resource *res, char *arg, int order)
+void schedule_deferred_cmd(int (*function) (struct cfg_ctx *),
+		   struct cfg_ctx *ctx,
+		   const char *arg, enum drbd_cfg_stage stage)
 {
 	struct deferred_cmd *d, *t;
 
@@ -354,163 +460,118 @@
 	}
 
 	d->function = function;
-	d->res = res;
-	d->arg = arg;
+	d->ctx.res = ctx->res;
+	d->ctx.vol = ctx->vol;
+	d->ctx.arg = arg;
 
 	/* first to come is head */
-	if (!deferred_cmds[order])
-		deferred_cmds[order] = d;
+	if (!deferred_cmds[stage])
+		deferred_cmds[stage] = d;
 
 	/* link it in at tail */
-	t = deferred_cmds_tail[order];
+	t = deferred_cmds_tail[stage];
 	if (t)
 		t->next = d;
 
 	/* advance tail */
-	deferred_cmds_tail[order] = d;
-}
-
-static int adm_generic(struct d_resource *res, const char *cmd, int flags);
-
-/* Returns non-zero if the resource is down. */
-static int test_if_resource_is_down(struct d_resource *res)
-{
-	char buf[1024];
-	char *line;
-	int fd;
-	int old_verbose = verbose;
-	FILE *f;
-
-	if (dry_run) {
-		fprintf(stderr, "Logic bug: should not be dry-running here.\n");
-		exit(E_thinko);
-	}
-	if (verbose == 1)
-		verbose = 0;
-	fd = adm_generic(res, "role", RETURN_STDOUT_FD | SUPRESS_STDERR);
-	verbose = old_verbose;
-
-	if (fd < 0) {
-		fprintf(stderr, "Strange: got negative fd.\n");
-		exit(E_thinko);
-	}
-
-	f = fdopen(fd, "r");
-	if (f == NULL) {
-		fprintf(stderr, "Bad: could not fdopen file descriptor.\n");
-		exit(E_thinko);
-	}
-
-	errno = 0;
-	line = fgets(buf, sizeof(buf) - 1, f);
-	if (line == NULL && errno != 0) {
-		fprintf(stderr,
-			"Bad: fgets returned NULL while waiting for data: %m\n");
-		exit(E_thinko);
-	}
-	fclose(f);
-
-	waitpid(0, NULL, WNOHANG);	/* Reap the child process, do not leave a zombie around. */
-
-	if (line == NULL
-	    || strncmp(line, "Unconfigured", strlen("Unconfigured")) == 0)
-		return 1;
-
-	return 0;
-}
-
-enum do_register { SAME_ANYWAYS, DO_REGISTER };
-enum do_register if_conf_differs_confirm_or_abort(struct d_resource *res)
-{
-	int minor = res->me->device_minor;
-	char *f;
-
-	/* if the resource was down,
-	 * just register the new config file */
-	if (test_if_resource_is_down(res)) {
-		unregister_minor(minor);
-		return DO_REGISTER;
-	}
-
-	f = lookup_minor(minor);
-
-	/* if there was nothing registered before,
-	 * there is nothing to compare to */
-	if (!f)
-		return DO_REGISTER;
-
-	/* no need to register the same thing again */
-	if (strcmp(f, config_save) == 0)
-		return SAME_ANYWAYS;
-
-	fprintf(stderr, "Warning: resource %s\n"
-		"last used config file: %s\n"
-		"  current config file: %s\n", res->name, f, config_save);
-
-	/* implicitly force if we don't have a tty */
-	if (no_tty)
-		force = 1;
-
-	if (!confirmed("Do you want to proceed "
-		       "and register the current config file?")) {
-		printf("Operation canceled.\n");
-		exit(E_usage);
-	}
-	return DO_REGISTER;
-}
-
-static void register_config_file(struct d_resource *res, const char *cfname)
-{
-	int minor = res->me->device_minor;
-	if (test_if_resource_is_down(res))
-		unregister_minor(minor);
-	else
-		register_minor(minor, cfname);
+	deferred_cmds_tail[stage] = d;
 }
 
 enum on_error { KEEP_RUNNING, EXIT_ON_FAIL };
-int call_cmd_fn(int (*function) (struct d_resource *, const char *),
-		const char *fn_name, struct d_resource *res,
-		enum on_error on_error)
+int call_cmd_fn(int (*function) (struct cfg_ctx *),
+		struct cfg_ctx *ctx, enum on_error on_error)
 {
 	int rv;
-	int really_register = do_register_minor &&
-	    DO_REGISTER == if_conf_differs_confirm_or_abort(res) &&
-	    /* adm_up and adm_adjust only
-	     * "schedule" the commands, don't register yet! */
-	    function != adm_up && function != adm_adjust;
 
-	rv = function(res, fn_name);
+	rv = function(ctx);
 	if (rv >= 20) {
-		fprintf(stderr, "%s %s %s: exited with code %d\n",
-			progname, fn_name, res->name, rv);
 		if (on_error == EXIT_ON_FAIL)
 			exit(rv);
 	}
-	if (rv == 0 && really_register)
-		register_config_file(res, config_save);
-
 	return rv;
 }
 
-int call_cmd(struct adm_cmd *cmd, struct d_resource *res,
+/* If ctx->vol is NULL, and cmd->iterate_volumes is set,
+ * iterate over all volumes in ctx->res.
+ * Else, just pass it on.
+ * */
+int call_cmd(struct adm_cmd *cmd, struct cfg_ctx *ctx,
 	     enum on_error on_error)
 {
+	struct d_resource *res = ctx->res;
+	struct d_volume *vol;
+	int ret;
+
 	if (!res->peer)
 		set_peer_in_resource(res, cmd->need_peer);
 
-	return call_cmd_fn(cmd->function, cmd->name, res, on_error);
+	if (!cmd->iterate_volumes || ctx->vol != NULL)
+		return call_cmd_fn(cmd->function, ctx, on_error);
+
+	for_each_volume(vol, res->me->volumes) {
+		ctx->vol = vol;
+		ret = call_cmd_fn(cmd->function, ctx, on_error);
+		/* FIXME: Do we want to keep running?
+		 * When?
+		 * How would we determine which return value to return? */
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
-int _run_dcmds(int order)
+static char *drbd_cfg_stage_string[] = {
+	[CFG_PREREQ] = "create res",
+	[CFG_RESOURCE] = "adjust res",
+	[CFG_DISK_PREREQ] = "prepare disk",
+	[CFG_DISK] = "adjust disk",
+	[CFG_NET_PREREQ] = "prepare net",
+	[CFG_NET] = "adjust net",
+};
+
+int _run_deferred_cmds(enum drbd_cfg_stage stage)
 {
-	struct deferred_cmd *d = deferred_cmds[order];
+	struct d_resource *last_res = NULL;
+	struct deferred_cmd *d = deferred_cmds[stage];
 	struct deferred_cmd *t;
-	int r = 0;
+	int r;
 	int rv = 0;
 
+	if (d && adjust_with_progress) {
+		printf("\n%15s:", drbd_cfg_stage_string[stage]);
+		fflush(stdout);
+	}
+
 	while (d) {
-		r = call_cmd_fn(d->function, d->arg, d->res, KEEP_RUNNING);
+		if (d->ctx.res->skip_further_deferred_command) {
+			if (adjust_with_progress) {
+				if (d->ctx.res != last_res)
+					printf(" [skipped:%s]", d->ctx.res->name);
+			} else
+				fprintf(stderr, "%s: %s %s: skipped due to earlier error\n",
+					progname, d->ctx.arg, d->ctx.res->name);
+			r = 0;
+		} else {
+			if (adjust_with_progress) {
+				if (d->ctx.res != last_res)
+					printf(" %s", d->ctx.res->name);
+			}
+			r = call_cmd_fn(d->function, &d->ctx, KEEP_RUNNING);
+			if (r) {
+				/* If something in the "prerequisite" stages failed,
+				 * there is no point in trying to continue.
+				 * However if we just failed to adjust some
+				 * options, or failed to attach, we still want
+				 * to adjust other options, or try to connect.
+				 */
+				if (stage == CFG_PREREQ || stage == CFG_DISK_PREREQ)
+					d->ctx.res->skip_further_deferred_command = 1;
+				if (adjust_with_progress)
+					printf(":failed(%s:%u)", d->ctx.arg, r);
+			}
+		}
+		last_res = d->ctx.res;
 		t = d->next;
 		free(d);
 		d = t;
@@ -520,9 +581,24 @@
 	return rv;
 }
 
-int run_dcmds(void)
+int run_deferred_cmds(void)
 {
-	return _run_dcmds(0) || _run_dcmds(1) || _run_dcmds(2);
+	enum drbd_cfg_stage stage;
+	int r;
+	int ret = 0;
+	if (adjust_with_progress)
+		printf("[");
+	for (stage = CFG_PREREQ; stage < __CFG_LAST; stage++) {
+		r = _run_deferred_cmds(stage);
+		if (r) {
+			if (!adjust_with_progress)
+				return 1; /* FIXME r? */
+			ret = 1;
+		}
+	}
+	if (adjust_with_progress)
+		printf("\n]\n");
+	return ret;
 }
 
 /*** These functions are used to the print the config ***/
@@ -609,9 +685,10 @@
 	return str;
 }
 
-static void dump_options(char *name, struct d_option *opts)
+static void dump_options2(char *name, struct d_option *opts,
+		void(*within)(void*), void *ctx)
 {
-	if (!opts)
+	if (!opts && !(within && ctx))
 		return;
 
 	printI("%s {\n", name);
@@ -625,10 +702,24 @@
 			printI(BFMT, opts->name);
 		opts = opts->next;
 	}
+	if (within)
+		within(ctx);
 	--indent;
 	printI("}\n");
 }
 
+static void dump_options(char *name, struct d_option *opts)
+{
+	dump_options2(name, opts, NULL, NULL);
+}
+
+void dump_proxy_plugins(void *ctx)
+{
+	struct d_option *opt = ctx;
+
+	dump_options("plugin", opt);
+}
+
 static void dump_global_info()
 {
 	if (!global_options.minor_count
@@ -655,14 +746,14 @@
 		return;
 	printI("common {\n");
 	++indent;
-	if (common->protocol)
-		printA("protocol", common->protocol);
+
 	fake_startup_options(common);
+	dump_options("options", common->res_options);
 	dump_options("net", common->net_options);
 	dump_options("disk", common->disk_options);
-	dump_options("syncer", common->sync_options);
 	dump_options("startup", common->startup_options);
-	dump_options("proxy", common->proxy_options);
+	dump_options2("proxy", common->proxy_options,
+			dump_proxy_plugins, common->proxy_plugins);
 	dump_options("handlers", common->handlers);
 	--indent;
 	printf("}\n\n");
@@ -687,8 +778,43 @@
 	printI("}\n");
 }
 
+static void dump_volume(int has_lower, struct d_volume *vol)
+{
+	if (!vol->implicit) {
+		printI("volume %d {\n", vol->vnr);
+		++indent;
+	}
+
+	dump_options("disk", vol->disk_options);
+
+	printI("device%*s", -19 + INDENT_WIDTH * indent, "");
+	if (vol->device)
+		printf("%s ", esc(vol->device));
+	printf("minor %d;\n", vol->device_minor);
+
+	if (!has_lower)
+		printA("disk", esc(vol->disk));
+
+	if (!has_lower) {
+		if (!strcmp(vol->meta_index, "flexible"))
+			printI(MDISK, "meta-disk", esc(vol->meta_disk));
+		else if (!strcmp(vol->meta_index, "internal"))
+			printA("meta-disk", "internal");
+		else
+			printI(MDISKI, "meta-disk", esc(vol->meta_disk),
+			       vol->meta_index);
+	}
+
+	if (!vol->implicit) {
+		--indent;
+		printI("}\n");
+	}
+}
+
 static void dump_host_info(struct d_host_info *hi)
 {
+	struct d_volume *vol;
+
 	if (!hi) {
 		printI("  # No host section data available.\n");
 		return;
@@ -708,33 +834,24 @@
 		printI("on %s {\n", names_to_str(hi->on_hosts));
 		++indent;
 	}
-	printI("device%*s", -19 + INDENT_WIDTH * indent, "");
-	if (hi->device)
-		printf("%s ", esc(hi->device));
-	printf("minor %d;\n", hi->device_minor);
-	if (!hi->lower)
-		printA("disk", esc(hi->disk));
+
+	dump_options("options", hi->res_options);
+
+	for_each_volume(vol, hi->volumes)
+		dump_volume(!!hi->lower, vol);
+
 	if (!hi->by_address)
 		dump_address("address", hi->address, hi->port, hi->address_family);
-	if (!hi->lower) {
-		if (!strncmp(hi->meta_index, "flex", 4))
-			printI(FMDISK, "flexible-meta-disk",
-			       esc(hi->meta_disk));
-		else if (!strcmp(hi->meta_index, "internal"))
-			printA("meta-disk", "internal");
-		else
-			printI(MDISK, "meta-disk", esc(hi->meta_disk),
-			       hi->meta_index);
-	}
 	if (hi->proxy)
 		dump_proxy_info(hi->proxy);
 	--indent;
 	printI("}\n");
 }
 
-static void dump_options_xml(char *name, struct d_option *opts)
+static void dump_options_xml2(char *name, struct d_option *opts,
+		void(*within)(void*), void *ctx)
 {
-	if (!opts)
+	if (!opts && !(within && ctx))
 		return;
 
 	printI("<section name=\"%s\">\n", name);
@@ -749,10 +866,24 @@
 			printI("<option name=\"%s\"/>\n", opts->name);
 		opts = opts->next;
 	}
+	if (within)
+		within(ctx);
 	--indent;
 	printI("</section>\n");
 }
 
+static void dump_options_xml(char *name, struct d_option *opts)
+{
+	dump_options_xml2(name, opts, NULL, NULL);
+}
+
+void dump_proxy_plugins_xml(void *ctx)
+{
+	struct d_option *opt = ctx;
+
+	dump_options_xml("plugin", opt);
+}
+
 static void dump_global_info_xml()
 {
 	if (!global_options.minor_count
@@ -777,17 +908,15 @@
 {
 	if (!common)
 		return;
-	printI("<common");
-	if (common->protocol)
-		printf(" protocol=\"%s\"", common->protocol);
-	printf(">\n");
+	printI("<common>\n");
 	++indent;
 	fake_startup_options(common);
+	dump_options_xml("options", common->res_options);
 	dump_options_xml("net", common->net_options);
 	dump_options_xml("disk", common->disk_options);
-	dump_options_xml("syncer", common->sync_options);
 	dump_options_xml("startup", common->startup_options);
-	dump_options_xml("proxy", common->proxy_options);
+	dump_options2("proxy", common->proxy_options,
+			dump_proxy_plugins, common->proxy_plugins);
 	dump_options_xml("handlers", common->handlers);
 	--indent;
 	printI("</common>\n");
@@ -805,8 +934,33 @@
 	printI("</proxy>\n");
 }
 
+static void dump_volume_xml(struct d_volume *vol)
+{
+	printI("<volume vnr=\"%d\">\n", vol->vnr);
+	++indent;
+
+	dump_options_xml("disk", vol->disk_options);
+	printI("<device minor=\"%d\">%s</device>\n", vol->device_minor,
+	       esc_xml(vol->device));
+	printI("<disk>%s</disk>\n", esc_xml(vol->disk));
+
+	if (!strcmp(vol->meta_index, "flexible"))
+		printI("<meta-disk>%s</meta-disk>\n",
+		       esc_xml(vol->meta_disk));
+	else if (!strcmp(vol->meta_index, "internal"))
+		printI("<meta-disk>internal</meta-disk>\n");
+	else {
+		printI("<meta-disk index=\"%s\">%s</meta-disk>\n",
+		       vol->meta_index, esc_xml(vol->meta_disk));
+	}
+	--indent;
+	printI("</volume>\n");
+}
+
 static void dump_host_info_xml(struct d_host_info *hi)
 {
+	struct d_volume *vol;
+
 	if (!hi) {
 		printI("<!-- No host section data available. -->\n");
 		return;
@@ -818,20 +972,13 @@
 		printI("<host name=\"%s\">\n", names_to_str(hi->on_hosts));
 
 	++indent;
-	printI("<device minor=\"%d\">%s</device>\n", hi->device_minor,
-	       esc_xml(hi->device));
-	printI("<disk>%s</disk>\n", esc_xml(hi->disk));
+
+	dump_options_xml("options", hi->res_options);
+	for_each_volume(vol, hi->volumes)
+		dump_volume_xml(vol);
+
 	printI("<address family=\"%s\" port=\"%s\">%s</address>\n",
 	       hi->address_family, hi->port, hi->address);
-	if (!strncmp(hi->meta_index, "flex", 4))
-		printI("<flexible-meta-disk>%s</flexible-meta-disk>\n",
-		       esc_xml(hi->meta_disk));
-	else if (!strcmp(hi->meta_index, "internal"))
-		printI("<meta-disk>internal</meta-disk>\n");
-	else {
-		printI("<meta-disk index=\"%s\">%s</meta-disk>\n",
-		       hi->meta_index, esc_xml(hi->meta_disk));
-	}
 	if (hi->proxy)
 		dump_proxy_info_xml(hi->proxy);
 	--indent;
@@ -856,29 +1003,29 @@
 	}
 }
 
-static int adm_dump(struct d_resource *res,
-		    const char *unused __attribute((unused)))
+static int adm_dump(struct cfg_ctx *ctx)
 {
 	struct d_host_info *host;
+	struct d_resource *res = ctx->res;
 
 	printI("# resource %s on %s: %s, %s\n",
 	       esc(res->name), nodeinfo.nodename,
 	       res->ignore ? "ignored" : "not ignored",
 	       res->stacked ? "stacked" : "not stacked");
+	printI("# defined at %s:%u\n", res->config_file, res->start_line);
 	printI("resource %s {\n", esc(res->name));
 	++indent;
-	if (res->protocol)
-		printA("protocol", res->protocol);
 
 	for (host = res->all_hosts; host; host = host->next)
 		dump_host_info(host);
 
 	fake_startup_options(res);
+	dump_options("options", res->res_options);
 	dump_options("net", res->net_options);
 	dump_options("disk", res->disk_options);
-	dump_options("syncer", res->sync_options);
 	dump_options("startup", res->startup_options);
-	dump_options("proxy", res->proxy_options);
+	dump_options2("proxy", res->proxy_options,
+			dump_proxy_plugins, res->proxy_plugins);
 	dump_options("handlers", res->handlers);
 	--indent;
 	printf("}\n\n");
@@ -886,24 +1033,25 @@
 	return 0;
 }
 
-static int adm_dump_xml(struct d_resource *res,
-			const char *unused __attribute((unused)))
+static int adm_dump_xml(struct cfg_ctx *ctx)
 {
 	struct d_host_info *host;
-	printI("<resource name=\"%s\"", esc_xml(res->name));
-	if (res->protocol)
-		printf(" protocol=\"%s\"", res->protocol);
-	printf(">\n");
+	struct d_resource *res = ctx->res;
+
+	printI("<resource name=\"%s\" conf-file-line=\"%s:%u\">\n",
+		esc_xml(res->name),
+		esc_xml(res->config_file), res->start_line);
 	++indent;
 	// else if (common && common->protocol) printA("# common protocol", common->protocol);
 	for (host = res->all_hosts; host; host = host->next)
 		dump_host_info_xml(host);
 	fake_startup_options(res);
+	dump_options_xml("options", res->res_options);
 	dump_options_xml("net", res->net_options);
 	dump_options_xml("disk", res->disk_options);
-	dump_options_xml("syncer", res->sync_options);
 	dump_options_xml("startup", res->startup_options);
-	dump_options_xml("proxy", res->proxy_options);
+	dump_options_xml2("proxy", res->proxy_options,
+			dump_proxy_plugins_xml, res->proxy_plugins);
 	dump_options_xml("handlers", res->handlers);
 	--indent;
 	printI("</resource>\n");
@@ -911,14 +1059,12 @@
 	return 0;
 }
 
-static int sh_nop(struct d_resource *ignored __attribute((unused)),
-		  const char *unused __attribute((unused)))
+static int sh_nop(struct cfg_ctx *ctx)
 {
 	return 0;
 }
 
-static int sh_resources(struct d_resource *ignored __attribute((unused)),
-			const char *unused __attribute((unused)))
+static int sh_resources(struct cfg_ctx *ctx)
 {
 	struct d_resource *res, *t;
 	int first = 1;
@@ -937,60 +1083,62 @@
 	return 0;
 }
 
-static int sh_resource(struct d_resource *res,
-		       const char *unused __attribute((unused)))
+static int sh_resource(struct cfg_ctx *ctx)
 {
-	printf("%s\n", res->name);
-
+	printf("%s\n", ctx->res->name);
 	return 0;
 }
 
-static int sh_dev(struct d_resource *res,
-		  const char *unused __attribute((unused)))
+static int sh_dev(struct cfg_ctx *ctx)
 {
-	printf("%s\n", res->me->device);
-
+	printf("%s\n", ctx->vol->device);
 	return 0;
 }
 
-static int sh_udev(struct d_resource *res,
-		   const char *unused __attribute((unused)))
+static int sh_udev(struct cfg_ctx *ctx)
 {
+	struct d_resource *res = ctx->res;
+	struct d_volume *vol = ctx->vol;
+
 	/* No shell escape necessary. Udev does not handle it anyways... */
-	printf("RESOURCE=%s\n", res->name);
+	if (!vol) {
+		fprintf(stderr, "volume not specified\n");
+		return 1;
+	}
 
-	if (!strncmp(res->me->device, "/dev/drbd", 9))
-		printf("DEVICE=%s\n", res->me->device + 5);
+	if (vol->implicit)
+		printf("RESOURCE=%s\n", res->name);
 	else
-		printf("DEVICE=drbd%u\n", res->me->device_minor);
+		printf("RESOURCE=%s/%u\n", res->name, vol->vnr);
 
-	if (!strncmp(res->me->disk, "/dev/", 5))
-		printf("DISK=%s\n", res->me->disk + 5);
+	if (!strncmp(vol->device, "/dev/drbd", 9))
+		printf("DEVICE=%s\n", vol->device + 5);
 	else
-		printf("DISK=%s\n", res->me->disk);
+		printf("DEVICE=drbd%u\n", vol->device_minor);
+
+	if (!strncmp(vol->disk, "/dev/", 5))
+		printf("DISK=%s\n", vol->disk + 5);
+	else
+		printf("DISK=%s\n", vol->disk);
 
 	return 0;
 }
 
-static int sh_minor(struct d_resource *res,
-		    const char *unused __attribute((unused)))
+static int sh_minor(struct cfg_ctx *ctx)
 {
-	printf("%d\n", res->me->device_minor);
-
+	printf("%d\n", ctx->vol->device_minor);
 	return 0;
 }
 
-static int sh_ip(struct d_resource *res,
-		 const char *unused __attribute((unused)))
+static int sh_ip(struct cfg_ctx *ctx)
 {
-	printf("%s\n", res->me->address);
-
+	printf("%s\n", ctx->res->me->address);
 	return 0;
 }
 
-static int sh_lres(struct d_resource *res,
-		   const char *unused __attribute((unused)))
+static int sh_lres(struct cfg_ctx *ctx)
 {
+	struct d_resource *res = ctx->res;
 	if (!is_drbd_top) {
 		fprintf(stderr,
 			"sh-lower-resource only available in stacked mode\n");
@@ -1006,48 +1154,47 @@
 	return 0;
 }
 
-static int sh_ll_dev(struct d_resource *res,
-		     const char *unused __attribute((unused)))
+static int sh_ll_dev(struct cfg_ctx *ctx)
 {
-	printf("%s\n", res->me->disk);
-
+	printf("%s\n", ctx->vol->disk);
 	return 0;
 }
 
-static int sh_md_dev(struct d_resource *res,
-		     const char *unused __attribute((unused)))
+
+static int sh_md_dev(struct cfg_ctx *ctx)
 {
+	struct d_volume *vol = ctx->vol;
 	char *r;
 
-	if (strcmp("internal", res->me->meta_disk) == 0)
-		r = res->me->disk;
+	if (strcmp("internal", vol->meta_disk) == 0)
+		r = vol->disk;
 	else
-		r = res->me->meta_disk;
+		r = vol->meta_disk;
 
 	printf("%s\n", r);
-
 	return 0;
 }
 
-static int sh_md_idx(struct d_resource *res,
-		     const char *unused __attribute((unused)))
+static int sh_md_idx(struct cfg_ctx *ctx)
 {
-	printf("%s\n", res->me->meta_index);
-
+	printf("%s\n", ctx->vol->meta_index);
 	return 0;
 }
 
-static int sh_b_pri(struct d_resource *res,
-		    const char *unused __attribute((unused)))
+static int sh_b_pri(struct cfg_ctx *ctx)
 {
+	struct d_resource *res = ctx->res;
 	int i, rv;
 
 	if (name_in_names(nodeinfo.nodename, res->become_primary_on) ||
 	    name_in_names("both", res->become_primary_on)) {
-		/* Opon connect resync starts, and both sides become primary at the same time.
+		/* upon connect resync starts, and both sides become primary at the same time.
 		   One's try might be declined since an other state transition happens. Retry. */
 		for (i = 0; i < 5; i++) {
-			rv = adm_generic_s(res, "primary");
+			const char *old_arg = ctx->arg;
+			ctx->arg = "primary";
+			rv = adm_generic_s(ctx);
+			ctx->arg = old_arg;
 			if (rv == 0)
 				return rv;
 			sleep(1);
@@ -1057,32 +1204,48 @@
 	return 0;
 }
 
-static int sh_mod_parms(struct d_resource *res __attribute((unused)),
-			const char *unused __attribute((unused)))
+/* FIXME this module parameter will go */
+static int sh_mod_parms(struct cfg_ctx *ctx)
 {
 	int mc = global_options.minor_count;
 
-	if (mc == 0)
-		mc = highest_minor + 11;
-	if (mc < 32)
-		mc = 32;
+	if (mc == 0) {
+		mc = number_of_minors + 3;
+		if (mc > DRBD_MINOR_COUNT_MAX)
+			mc = DRBD_MINOR_COUNT_MAX;
+
+		if (mc < DRBD_MINOR_COUNT_DEF)
+			mc = DRBD_MINOR_COUNT_DEF;
+	}
 	printf("minor_count=%d\n", mc);
 	return 0;
 }
 
+static void free_volume(struct d_volume *vol)
+{
+	if (!vol)
+		return;
+
+	free(vol->device);
+	free(vol->disk);
+	free(vol->meta_disk);
+	free(vol->meta_index);
+	free(vol);
+}
+
 static void free_host_info(struct d_host_info *hi)
 {
+	struct d_volume *vol;
+
 	if (!hi)
 		return;
 
 	free_names(hi->on_hosts);
-	free(hi->device);
-	free(hi->disk);
+	for_each_volume(vol, hi->volumes)
+		free_volume(vol);
 	free(hi->address);
 	free(hi->address_family);
 	free(hi->port);
-	free(hi->meta_disk);
-	free(hi->meta_index);
 }
 
 static void free_options(struct d_option *opts)
@@ -1104,16 +1267,11 @@
 
 	for_each_resource(f, t, res) {
 		free(f->name);
-		free(f->protocol);
-		free(f->device);
-		free(f->disk);
-		free(f->meta_disk);
-		free(f->meta_index);
+		free_volume(f->volumes);
 		for (host = f->all_hosts; host; host = host->next)
 			free_host_info(host);
 		free_options(f->net_options);
 		free_options(f->disk_options);
-		free_options(f->sync_options);
 		free_options(f->startup_options);
 		free_options(f->proxy_options);
 		free_options(f->handlers);
@@ -1122,7 +1280,6 @@
 	if (common) {
 		free_options(common->net_options);
 		free_options(common->disk_options);
-		free_options(common->sync_options);
 		free_options(common->startup_options);
 		free_options(common->proxy_options);
 		free_options(common->handlers);
@@ -1151,29 +1308,30 @@
 static void expand_common(void)
 {
 	struct d_resource *res, *tmp;
+	struct d_volume *vol, *host_vol;
 	struct d_host_info *h;
 
+	/* make sure vol->device is non-NULL */
 	for_each_resource(res, tmp, config) {
 		for (h = res->all_hosts; h; h = h->next) {
-			if (!h->device)
-				m_asprintf(&h->device, "/dev/drbd%u",
-					   h->device_minor);
+			for_each_volume(vol, h->volumes) {
+				if (!vol->device)
+					m_asprintf(&vol->device, "/dev/drbd%u",
+						   vol->device_minor);
+			}
 		}
 	}
 
-	if (!common)
-		return;
-
 	for_each_resource(res, tmp, config) {
+		if (!common)
+			break;
+
 		expand_opts(common->net_options, &res->net_options);
 		expand_opts(common->disk_options, &res->disk_options);
-		expand_opts(common->sync_options, &res->sync_options);
 		expand_opts(common->startup_options, &res->startup_options);
 		expand_opts(common->proxy_options, &res->proxy_options);
 		expand_opts(common->handlers, &res->handlers);
-
-		if (common->protocol && !res->protocol)
-			res->protocol = strdup(common->protocol);
+		expand_opts(common->res_options, &res->res_options);
 
 		if (common->stacked_timeouts)
 			res->stacked_timeouts = 1;
@@ -1181,6 +1339,29 @@
 		if (!res->become_primary_on)
 			res->become_primary_on = common->become_primary_on;
 
+		if (common->proxy_plugins && !res->proxy_plugins)
+			expand_opts(common->proxy_plugins, &res->proxy_plugins);
+
+	}
+
+	/* now that common disk options (if any) have been propagated to the
+	 * resource level, further propagate them to the volume level. */
+	for_each_resource(res, tmp, config) {
+		for (h = res->all_hosts; h; h = h->next) {
+			for_each_volume(vol, h->volumes) {
+				expand_opts(res->disk_options, &vol->disk_options);
+			}
+		}
+	}
+
+	/* now from all volume/disk-options on resource level to host level */
+	for_each_resource(res, tmp, config) {
+		for_each_volume(vol, res->volumes) {
+			for (h = res->all_hosts; h; h = h->next) {
+				host_vol = volume_by_vnr(h->volumes, vol->vnr);
+				expand_opts(vol->disk_options, &host_vol->disk_options);
+			}
+		}
 	}
 }
 
@@ -1206,7 +1387,7 @@
 	alarm_raised = 1;
 }
 
-pid_t m_system(char **argv, int flags, struct d_resource *res)
+void m__system(char **argv, int flags, const char *res_name, pid_t *kid, int *fd, int *ex)
 {
 	pid_t pid;
 	int status, rv = -1;
@@ -1223,13 +1404,21 @@
 
 	if (dry_run || verbose) {
 		if (sh_varname && *cmdline)
-			printf("%s=%s\n", sh_varname, shell_escape(res->name));
+			printf("%s=%s\n", sh_varname,
+					res_name ? shell_escape(res_name) : "");
 		while (*cmdline) {
 			printf("%s ", shell_escape(*cmdline++));
 		}
 		printf("\n");
-		if (dry_run)
-			return 0;
+		if (dry_run) {
+			if (kid)
+				*kid = -1;
+			if (fd)
+				*fd = 0;
+			if (ex)
+				*ex = 0;
+			return;
+		}
 	}
 
 	/* flush stdout and stderr, so output of drbdadm
@@ -1237,6 +1426,9 @@
 	fflush(stdout);
 	fflush(stderr);
 
+	if (adjust_with_progress && !(flags & RETURN_STDERR_FD))
+		flags |= SUPRESS_STDERR;
+
 	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD)) {
 		if (pipe(pipe_fds) < 0) {
 			perror("pipe");
@@ -1290,12 +1482,14 @@
 		alarm(timeout);
 	}
 
-	if (flags == RETURN_PID) {
-		return pid;
-	}
+	if (kid)
+		*kid = pid;
+	if (fd)
+		*fd = pipe_fds[0];
 
-	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD))
-		return pipe_fds[0];
+	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD)
+	||  flags == RETURN_PID)
+		return;
 
 	while (1) {
 		if (waitpid(pid, &status, 0) == -1) {
@@ -1342,7 +1536,8 @@
 	fflush(stdout);
 	fflush(stderr);
 
-	return rv;
+	if (ex)
+		*ex = rv;
 }
 
 #define NA(ARGC) \
@@ -1352,6 +1547,19 @@
      (ARGC)++; \
   })
 
+static void add_setup_options(char **argv, int *argcp)
+{
+	int argc = *argcp;
+	int i;
+
+	if (!setup_options)
+		return;
+
+	for (i = 0; setup_options[i].option; i++)
+		argv[NA(argc)] = setup_options[i].option;
+	*argcp = argc;
+}
+
 #define make_options(OPT) \
   while(OPT) { \
     if(OPT->value) { \
@@ -1362,36 +1570,69 @@
     OPT=OPT->next; \
   }
 
+/* FIXME: Don't leak the memory allocated by asprintf. */
 #define make_address(ADDR, PORT, AF)		\
-  if (strcmp(AF, "ipv4")) { \
-    ssprintf(argv[NA(argc)],"%s:%s:%s", AF, ADDR, PORT); \
+  if (!strcmp(AF, "ipv6")) { \
+    m_asprintf(&argv[NA(argc)], "%s:[%s]:%s", AF, ADDR, PORT); \
   } else { \
-    ssprintf(argv[NA(argc)],"%s:%s", ADDR, PORT); \
+    m_asprintf(&argv[NA(argc)], "%s:%s:%s", AF, ADDR, PORT); \
   }
 
-int adm_attach(struct d_resource *res, const char *unused __attribute((unused)))
+static int adm_attach_or_disk_options(struct cfg_ctx *ctx, bool do_attach, bool reset)
 {
+	struct d_volume *vol = ctx->vol;
 	char *argv[MAX_ARGS];
 	struct d_option *opt;
 	int argc = 0;
 
 	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
-	argv[NA(argc)] = "disk";
-	argv[NA(argc)] = res->me->disk;
-	if (!strcmp(res->me->meta_disk, "internal")) {
-		argv[NA(argc)] = res->me->disk;
-	} else {
-		argv[NA(argc)] = res->me->meta_disk;
+	argv[NA(argc)] = do_attach ? "attach" : "disk-options";
+	ssprintf(argv[NA(argc)], "%d", vol->device_minor);
+	if (do_attach) {
+		argv[NA(argc)] = vol->disk;
+		if (!strcmp(vol->meta_disk, "internal")) {
+			argv[NA(argc)] = vol->disk;
+		} else {
+			argv[NA(argc)] = vol->meta_disk;
+		}
+		argv[NA(argc)] = vol->meta_index;
 	}
-	argv[NA(argc)] = res->me->meta_index;
-	argv[NA(argc)] = "--set-defaults";
-	argv[NA(argc)] = "--create-device";
-	opt = res->disk_options;
-	make_options(opt);
+	if (reset)
+		argv[NA(argc)] = "--set-defaults";
+	if (reset || do_attach) {
+		opt = ctx->vol->disk_options;
+		if (!do_attach) {
+			while (opt && opt->adj_skip)
+				opt = opt->next;
+		}
+		make_options(opt);
+	}
+	add_setup_options(argv, &argc);
 	argv[NA(argc)] = 0;
 
-	return m_system(argv, SLEEPS_LONG, res);
+	return m_system_ex(argv, SLEEPS_LONG, ctx->res->name);
+}
+
+int adm_attach(struct cfg_ctx *ctx)
+{
+	int rv;
+
+	ctx->arg = "apply-al";
+	rv = admm_generic(ctx);
+	if (rv)
+		return rv;
+	ctx->arg = "attach";
+	return adm_attach_or_disk_options(ctx, true, false);
+}
+
+int adm_disk_options(struct cfg_ctx *ctx)
+{
+	return adm_attach_or_disk_options(ctx, false, false);
+}
+
+int adm_set_default_disk_options(struct cfg_ctx *ctx)
+{
+	return adm_attach_or_disk_options(ctx, false, true);
 }
 
 struct d_option *find_opt(struct d_option *base, char *name)
@@ -1405,150 +1646,245 @@
 	return 0;
 }
 
-int adm_resize(struct d_resource *res, const char *unused __attribute((unused)))
+int adm_new_minor(struct cfg_ctx *ctx)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0, ex;
+
+	argv[NA(argc)] = drbdsetup;
+	argv[NA(argc)] = "new-minor";
+	ssprintf(argv[NA(argc)], "%s", ctx->res->name);
+	ssprintf(argv[NA(argc)], "%u", ctx->vol->device_minor);
+	ssprintf(argv[NA(argc)], "%u", ctx->vol->vnr);
+	argv[NA(argc)] = NULL;
+
+	ex = m_system_ex(argv, SLEEPS_SHORT, ctx->res->name);
+	if (!ex && do_register)
+		register_minor(ctx->vol->device_minor, config_save);
+	return ex;
+}
+
+static int adm_new_resource_or_res_options(struct cfg_ctx *ctx, bool do_new_resource, bool reset)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0, ex;
+
+	argv[NA(argc)] = drbdsetup;
+	argv[NA(argc)] = do_new_resource ? "new-resource" : "resource-options";
+	ssprintf(argv[NA(argc)], "%s", ctx->res->name);
+	if (reset)
+		argv[NA(argc)] = "--set-defaults";
+	if (reset || do_new_resource)
+		make_options(ctx->res->res_options);
+
+	add_setup_options(argv, &argc);
+	argv[NA(argc)] = NULL;
+
+	ex = m_system_ex(argv, SLEEPS_SHORT, ctx->res->name);
+	if (!ex && do_new_resource && do_register)
+		register_resource(ctx->res->name, config_save);
+	return ex;
+}
+
+int adm_new_resource(struct cfg_ctx *ctx)
+{
+	return adm_new_resource_or_res_options(ctx, true, false);
+}
+
+int adm_res_options(struct cfg_ctx *ctx)
+{
+	return adm_new_resource_or_res_options(ctx, false, false);
+}
+
+int adm_set_default_res_options(struct cfg_ctx *ctx)
+{
+	return adm_new_resource_or_res_options(ctx, false, true);
+}
+
+int adm_resize(struct cfg_ctx *ctx)
 {
 	char *argv[MAX_ARGS];
 	struct d_option *opt;
-	int i, argc = 0;
+	int argc = 0;
+	int silent;
+	int ex;
 
 	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
 	argv[NA(argc)] = "resize";
-	opt = find_opt(res->disk_options, "size");
+	ssprintf(argv[NA(argc)], "%d", ctx->vol->device_minor);
+	opt = find_opt(ctx->vol->disk_options, "size");
+	if (!opt)
+		opt = find_opt(ctx->res->disk_options, "size");
 	if (opt)
 		ssprintf(argv[NA(argc)], "--%s=%s", opt->name, opt->value);
-	for (i = 0; i < soi; i++) {
-		argv[NA(argc)] = setup_opts[i];
-	}
+	add_setup_options(argv, &argc);
 	argv[NA(argc)] = 0;
 
-	return m_system(argv, SLEEPS_SHORT, res);
+	/* if this is not "resize", but "check-resize", be silent! */
+	silent = !strcmp(ctx->arg, "check-resize") ? SUPRESS_STDERR : 0;
+	ex = m_system_ex(argv, SLEEPS_SHORT | silent, ctx->res->name);
+
+	if (ex)
+		return ex;
+
+	/* Record last-known bdev info.
+	 * Unfortunately drbdsetup did not have enough information
+	 * when doing the "resize", and in theory, _our_ information
+	 * about the backing device may even be wrong.
+	 * Call drbdsetup again, tell it to ask the kernel for
+	 * current config, and update the last known bdev info
+	 * according to that. */
+	/* argv[0] = drbdsetup; */
+	argv[1] = "check-resize";
+	/* argv[2] = minor; */
+	argv[3] = NULL;
+	/* ignore exit code */
+	m_system_ex(argv, SLEEPS_SHORT | silent, ctx->res->name);
+
+	return 0;
 }
 
-int _admm_generic(struct d_resource *res, const char *cmd, int flags)
+int _admm_generic(struct cfg_ctx *ctx, int flags)
 {
+	struct d_volume *vol = ctx->vol;
 	char *argv[MAX_ARGS];
-	int argc = 0, i;
+	int argc = 0;
 
 	argv[NA(argc)] = drbdmeta;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	ssprintf(argv[NA(argc)], "%d", vol->device_minor);
 	argv[NA(argc)] = "v08";
-	if (!strcmp(res->me->meta_disk, "internal")) {
-		argv[NA(argc)] = res->me->disk;
+	if (!strcmp(vol->meta_disk, "internal")) {
+		argv[NA(argc)] = vol->disk;
 	} else {
-		argv[NA(argc)] = res->me->meta_disk;
+		argv[NA(argc)] = vol->meta_disk;
 	}
-	if (!strcmp(res->me->meta_index, "flexible")) {
-		if (!strcmp(res->me->meta_disk, "internal")) {
+	if (!strcmp(vol->meta_index, "flexible")) {
+		if (!strcmp(vol->meta_disk, "internal")) {
 			argv[NA(argc)] = "flex-internal";
 		} else {
 			argv[NA(argc)] = "flex-external";
 		}
 	} else {
-		argv[NA(argc)] = res->me->meta_index;
+		argv[NA(argc)] = vol->meta_index;
 	}
-	argv[NA(argc)] = (char *)cmd;
-	for (i = 0; i < soi; i++) {
-		argv[NA(argc)] = setup_opts[i];
-	}
-
+	argv[NA(argc)] = (char *)ctx->arg;
+	add_setup_options(argv, &argc);
 	argv[NA(argc)] = 0;
 
-	return m_system(argv, flags, res);
+	return m_system_ex(argv, flags, ctx->res->name);
 }
 
-static int admm_generic(struct d_resource *res, const char *cmd)
+static int admm_generic(struct cfg_ctx *ctx)
 {
-	return _admm_generic(res, cmd, SLEEPS_VERY_LONG);
+	return _admm_generic(ctx, SLEEPS_VERY_LONG);
 }
 
-static int adm_generic(struct d_resource *res, const char *cmd, int flags)
+static void _adm_generic(struct cfg_ctx *ctx, int flags, pid_t *pid, int *fd, int *ex)
 {
 	char *argv[MAX_ARGS];
-	int argc = 0, i;
+	int argc = 0;
 
-	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
-	argv[NA(argc)] = (char *)cmd;
-	for (i = 0; i < soi; i++) {
-		argv[NA(argc)] = setup_opts[i];
+	if (!ctx->res) {
+		/* ASSERT */
+		fprintf(stderr, "sorry, need at least a resource name to call drbdsetup\n");
+		abort();
 	}
+
+	argv[NA(argc)] = drbdsetup;
+	argv[NA(argc)] = (char *)ctx->arg;
+	if (ctx->vol)
+		ssprintf(argv[NA(argc)], "%d", ctx->vol->device_minor);
+	else
+		ssprintf(argv[NA(argc)], "%s", ctx->res->name);
+	add_setup_options(argv, &argc);
 	argv[NA(argc)] = 0;
 
-	setenv("DRBD_RESOURCE", res->name, 1);
-	return m_system(argv, flags, res);
+	setenv("DRBD_RESOURCE", ctx->res->name, 1);
+	m__system(argv, flags, ctx->res->name, pid, fd, ex);
 }
 
-int adm_generic_s(struct d_resource *res, const char *cmd)
+static int adm_generic(struct cfg_ctx *ctx, int flags)
 {
-	return adm_generic(res, cmd, SLEEPS_SHORT);
+	int ex;
+	_adm_generic(ctx, flags, NULL, NULL, &ex);
+	return ex;
 }
 
-int adm_status_xml(struct d_resource *res, const char *cmd)
+int adm_generic_s(struct cfg_ctx *ctx)
 {
-	struct d_resource *r, *t;
-	int rv = 0;
-
-	if (!dry_run) {
-		printf("<drbd-status version=\"%s\" api=\"%u\">\n",
-		       REL_VERSION, API_VERSION);
-		printf("<resources config_file=\"%s\">\n", config_save);
-	}
-
-	for_each_resource(r, t, res) {
-		if (r->ignore)
-			continue;
-		rv = adm_generic(r, cmd, SLEEPS_SHORT);
-		if (rv)
-			break;
-	}
-
-	if (!dry_run)
-		printf("</resources>\n</drbd-status>\n");
-	return rv;
+	return adm_generic(ctx, SLEEPS_SHORT);
 }
 
-int sh_status(struct d_resource *res, const char *cmd)
+int sh_status(struct cfg_ctx *ctx)
 {
 	struct d_resource *r, *t;
+	struct d_volume *vol, *lower_vol;
 	int rv = 0;
 
 	if (!dry_run) {
 		printf("_drbd_version=%s\n_drbd_api=%u\n",
 		       shell_escape(REL_VERSION), API_VERSION);
-		printf("_config_file=%s\n\n", shell_escape(config_save));
+		printf("_config_file=%s\n\n\n", shell_escape(config_save));
 	}
 
-	for_each_resource(r, t, res) {
+	for_each_resource(r, t, config) {
 		if (r->ignore)
 			continue;
-		printf("_stacked_on=%s\n", r->stacked && r->me->lower ?
-		       shell_escape(r->me->lower->name) : "");
-		printf("_stacked_on_device=%s\n", r->stacked && r->me->lower ?
-		       shell_escape(r->me->lower->me->device) : "");
-		if (r->stacked && r->me->lower)
-			printf("_stacked_on_minor=%d\n",
-			       r->me->lower->me->device_minor);
-		else
+		ctx->res = r;
+
+		printf("_conf_res_name=%s\n", shell_escape(r->name));
+		printf("_conf_file_line=%s:%u\n\n", shell_escape(r->config_file), r->start_line);
+		if (r->stacked && r->me->lower) {
+			printf("_stacked_on=%s\n", shell_escape(r->me->lower->name));
+			lower_vol = r->me->lower->me->volumes;
+		} else {
+			/* reset stuff */
+			printf("_stacked_on=\n");
+			printf("_stacked_on_device=\n");
 			printf("_stacked_on_minor=\n");
-		rv = adm_generic(r, cmd, SLEEPS_SHORT);
-		if (rv)
-			break;
-	}
+			lower_vol = NULL;
+		}
+		/* TODO: remove this loop, have drbdsetup use dump
+		 * and optionally filter on resource name.
+		 * "stacked" information is not directly known to drbdsetup, though.
+		 */
+		for_each_volume(vol, r->me->volumes) {
+			/* do not continue in this loop,
+			 * or lower_vol will get out of sync */
+			if (lower_vol) {
+				printf("_stacked_on_device=%s\n", shell_escape(lower_vol->device));
+				printf("_stacked_on_minor=%d\n", lower_vol->device_minor);
+			} else if (r->stacked && r->me->lower) {
+				/* ASSERT */
+				fprintf(stderr, "in %s: stacked volume[%u] without lower volume\n",
+						r->name, vol->vnr);
+				abort();
+			}
+			printf("_conf_volume=%d\n", vol->vnr);
 
-	return rv;
+			ctx->vol = vol;
+			rv = adm_generic(ctx, SLEEPS_SHORT);
+			if (rv)
+				return rv;
+
+			if (lower_vol)
+				lower_vol = lower_vol->next;
+			/* vol is advanced by for_each_volume */
+		}
+	}
+	return 0;
 }
 
-int adm_generic_l(struct d_resource *res, const char *cmd)
+int adm_generic_l(struct cfg_ctx *ctx)
 {
-	return adm_generic(res, cmd, SLEEPS_LONG);
+	return adm_generic(ctx, SLEEPS_LONG);
 }
 
-static int adm_outdate(struct d_resource *res, const char *cmd)
+static int adm_outdate(struct cfg_ctx *ctx)
 {
 	int rv;
 
-	rv = adm_generic(res, cmd, SLEEPS_SHORT | SUPRESS_STDERR);
+	rv = adm_generic(ctx, SLEEPS_SHORT | SUPRESS_STDERR);
 	/* special cases for outdate:
 	 * 17: drbdsetup outdate, but is primary and thus cannot be outdated.
 	 *  5: drbdsetup outdate, and is inconsistent or worse anyways. */
@@ -1557,24 +1893,38 @@
 
 	if (rv == 5) {
 		/* That might mean it is diskless. */
-		rv = admm_generic(res, cmd);
+		rv = admm_generic(ctx);
 		if (rv)
 			rv = 5;
 		return rv;
 	}
 
 	if (rv || dry_run) {
-		rv = admm_generic(res, cmd);
+		rv = admm_generic(ctx);
 	}
 	return rv;
 }
 
-static int adm_generic_b(struct d_resource *res, const char *cmd)
+/* shell equivalent:
+ * ( drbdsetup resize && drbdsetup check-resize ) || drbdmeta check-resize */
+static int adm_chk_resize(struct cfg_ctx *ctx)
+{
+	/* drbdsetup resize && drbdsetup check-resize */
+	int ex = adm_resize(ctx);
+	if (ex == 0)
+		return 0;
+
+	/* try drbdmeta check-resize */
+	return admm_generic(ctx);
+}
+
+static int adm_generic_b(struct cfg_ctx *ctx)
 {
 	char buffer[4096];
 	int fd, status, rv = 0, rr, s = 0;
+	pid_t pid;
 
-	fd = adm_generic(res, cmd, SLEEPS_SHORT | RETURN_STDERR_FD);
+	_adm_generic(ctx, SLEEPS_SHORT | RETURN_STDERR_FD, &pid, &fd, NULL);
 
 	if (fd < 0) {
 		fprintf(stderr, "Strange: got negative fd.\n");
@@ -1590,7 +1940,8 @@
 		}
 
 		close(fd);
-		waitpid(0, &status, WNOHANG);
+		rr = waitpid(pid, &status, 0);
+		alarm(0);
 
 		if (WIFEXITED(status))
 			rv = WEXITSTATUS(status);
@@ -1599,7 +1950,12 @@
 		}
 	}
 
-	if (rv == 11) {
+	/* see drbdsetup.c, print_config_error():
+	 *  11: some unspecific state change error
+	 *  17: SS_NO_UP_TO_DATE_DISK
+	 * In both cases, we don't need to retry with drbdmeta,
+	 * it would fail anyways with "Device is configured!" */
+	if (rv == 11 || rv == 17) {
 		/* Some state transition error, report it ... */
 		rr = write(fileno(stderr), buffer, s);
 		return rv;
@@ -1612,16 +1968,19 @@
 		   rv = 16 .. we are diskless here
 		   retry with drbdmeta.
 		 */
-		rv = admm_generic(res, cmd);
+		rv = admm_generic(ctx);
 	}
 	return rv;
 }
 
-static int adm_khelper(struct d_resource *res, const char *cmd)
+static int adm_khelper(struct cfg_ctx *ctx)
 {
+	struct d_resource *res = ctx->res;
+	struct d_volume *vol = ctx->vol;
 	int rv = 0;
 	char *sh_cmd;
 	char minor_string[8];
+	char volume_string[8];
 	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 
 	if (!res->peer) {
@@ -1650,14 +2009,50 @@
 			/* since 8.3.0, but not usable when using a config with "floating" statements. */
 	}
 
-	snprintf(minor_string, sizeof(minor_string), "%u", res->me->device_minor);
+	if (vol) {
+		snprintf(minor_string, sizeof(minor_string), "%u", vol->device_minor);
+		snprintf(volume_string, sizeof(volume_string), "%u", vol->vnr);
+		setenv("DRBD_MINOR", minor_string, 1);
+		setenv("DRBD_VOLUME", volume_string, 1);
+		setenv("DRBD_LL_DISK", vol->disk, 1);
+	} else {
+		char *minor_list;
+		char *separator = "";
+		char *pos;
+		int volumes = 0;
+		int bufsize;
+		int n;
+
+		for_each_volume(vol, res->me->volumes)
+			volumes++;
+
+		/* max minor number is 2**20 - 1, which is 7 decimal digits.
+		 * plus separator respective trailing zero. */
+		bufsize = volumes * 8 + 1;
+		minor_list = alloca(bufsize);
+
+		pos = minor_list;
+		for_each_volume(vol, res->me->volumes) {
+			n = snprintf(pos, bufsize, "%s%d", separator, vol->device_minor);
+			if (n >= bufsize) {
+				/* "can not happen" */
+				fprintf(stderr, "buffer too small when generating the minor list\n");
+				abort();
+				break;
+			}
+			bufsize -= n;
+			pos += n;
+			separator = " ";
+		}
+		setenv("DRBD_MINOR", minor_list, 1);
+	}
+
 	setenv("DRBD_RESOURCE", res->name, 1);
-	setenv("DRBD_MINOR", minor_string, 1);
 	setenv("DRBD_CONF", config_save, 1);
 
-	if ((sh_cmd = get_opt_val(res->handlers, cmd, NULL))) {
+	if ((sh_cmd = get_opt_val(res->handlers, ctx->arg, NULL))) {
 		argv[2] = sh_cmd;
-		rv = m_system(argv, SLEEPS_VERY_LONG, res);
+		rv = m_system_ex(argv, SLEEPS_VERY_LONG, res->name);
 	}
 	return rv;
 }
@@ -1683,17 +2078,10 @@
 	}
 }
 
-int adm_connect(struct d_resource *res,
-		const char *unused __attribute((unused)))
+static int add_connection_endpoints(char **argv, int *argcp, struct d_resource *res)
 {
-	char *argv[MAX_ARGS];
-	struct d_option *opt;
-	int i;
-	int argc = 0;
+	int argc = *argcp;
 
-	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
-	argv[NA(argc)] = "net";
 	make_address(res->me->address, res->me->port, res->me->address_family);
 	if (res->me->proxy) {
 		make_address(res->me->proxy->inside_addr,
@@ -1702,80 +2090,256 @@
 	} else if (res->peer) {
 		make_address(res->peer->address, res->peer->port,
 			     res->peer->address_family);
-	} else if (dry_run > 1) {
+	} else if (dry_run) {
 		argv[NA(argc)] = "N/A";
 	} else {
-		fprintf(stderr, "resource %s: cannot change network config without knowing my peer.\n", res->name);
-		return dry_run ? 0 : 20;
+		fprintf(stderr, "resource %s: cannot configure network without knowing my peer.\n", res->name);
+		return 20;
+	}
+	*argcp = argc;
+	return 0;
+}
+
+static int adm_connect_or_net_options(struct cfg_ctx *ctx, bool do_connect, bool reset)
+{
+	struct d_resource *res = ctx->res;
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int argc = 0;
+	int err;
+
+	argv[NA(argc)] = drbdsetup;
+	argv[NA(argc)] = do_connect ? "connect" : "net-options";
+	if (do_connect)
+		ssprintf(argv[NA(argc)], "%s", res->name);
+	err = add_connection_endpoints(argv, &argc, res);
+	if (err)
+		return err;
+
+	if (reset)
+		argv[NA(argc)] = "--set-defaults";
+	if (reset || do_connect) {
+		opt = res->net_options;
+		make_options(opt);
+	}
+
+	add_setup_options(argv, &argc);
+	argv[NA(argc)] = 0;
+
+	return m_system_ex(argv, SLEEPS_SHORT, res->name);
+}
+
+int adm_connect(struct cfg_ctx *ctx)
+{
+	return adm_connect_or_net_options(ctx, true, false);
+}
+
+int adm_net_options(struct cfg_ctx *ctx)
+{
+	return adm_connect_or_net_options(ctx, false, false);
+}
+
+int adm_set_default_net_options(struct cfg_ctx *ctx)
+{
+	return adm_connect_or_net_options(ctx, false, true);
+}
+
+int adm_disconnect(struct cfg_ctx *ctx)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0;
+
+	if (!ctx->res) {
+		/* ASSERT */
+		fprintf(stderr, "sorry, need at least a resource name to call drbdsetup\n");
+		abort();
+	}
+
+	argv[NA(argc)] = drbdsetup;
+	argv[NA(argc)] = (char *)ctx->arg;
+	add_connection_endpoints(argv, &argc, ctx->res);
+	add_setup_options(argv, &argc);
+	argv[NA(argc)] = 0;
+
+	setenv("DRBD_RESOURCE", ctx->res->name, 1);
+	return m_system_ex(argv, SLEEPS_SHORT, ctx->res->name);
+}
+
+struct d_option *del_opt(struct d_option *base, struct d_option *item)
+{
+	struct d_option *i;
+	if (base == item) {
+		base = item->next;
+		free(item->name);
+		free(item->value);
+		free(item);
+		return base;
 	}
-	argv[NA(argc)] = res->protocol;
 
-	argv[NA(argc)] = "--set-defaults";
-	argv[NA(argc)] = "--create-device";
-	opt = res->net_options;
-	make_options(opt);
+	for (i = base; i; i = i->next) {
+		if (i->next == item) {
+			i->next = item->next;
+			free(item->name);
+			free(item->value);
+			free(item);
+			return base;
+		}
+	}
+	return base;
+}
+
+// Need to convert after from resourcename to minor_number.
+void _convert_after_option(struct d_resource *res, struct d_volume *vol)
+{
+	struct d_option *opt, *next;
+	struct cfg_ctx depends_on_ctx = { };
+	int volumes;
+
+	if (res == NULL)
+		return;
+
+	opt = vol->disk_options;
+	while ((opt = find_opt(opt, "resync-after"))) {
+		next = opt->next;
+		ctx_by_name(&depends_on_ctx, opt->value);
+		volumes = ctx_set_implicit_volume(&depends_on_ctx);
+		if (volumes > 1) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n\t"
+				"resync-after contains '%s', which is ambiguous, since it contains %d volumes\n",
+				res->config_file, res->start_line, res->name,
+				opt->value, volumes);
+			config_valid = 0;
+			return;
+		}
+
+		if (!depends_on_ctx.res || depends_on_ctx.res->ignore) {
+			vol->disk_options = del_opt(vol->disk_options, opt);
+		} else {
+			free(opt->value);
+			m_asprintf(&opt->value, "%d", depends_on_ctx.vol->device_minor);
+		}
+		opt = next;
+	}
+}
+
+// Need to convert after from resourcename/volume to minor_number.
+void convert_after_option(struct d_resource *res)
+{
+	struct d_volume *vol;
+	struct d_host_info *h;
+
+	for (h = res->all_hosts; h; h = h->next)
+		for_each_volume(vol, h->volumes)
+			_convert_after_option(res, vol);
+}
+
+char *proxy_connection_name(struct d_resource *res)
+{
+	static char conn_name[128];
+	int counter;
+
+	counter = snprintf(conn_name, sizeof(conn_name), "%s-%s-%s",
+			 res->name,
+			 names_to_str_c(res->peer->proxy->on_hosts, '_'),
+			 names_to_str_c(res->me->proxy->on_hosts, '_')
+			 );
+	if (counter >= sizeof(conn_name)-3) {
+		fprintf(stderr,
+				"The connection name in resource %s got too long.\n",
+				res->name);
+		exit(E_config_invalid);
+	}
+
+	return conn_name;
+}
+
+int do_proxy_conn_up(struct cfg_ctx *ctx)
+{
+	struct d_resource *res = ctx->res;
+	char *argv[4] = { drbd_proxy_ctl, "-c", NULL, NULL };
+	char *conn_name;
+	int rv;
 
-	for (i = 0; i < soi; i++) {
-		argv[NA(argc)] = setup_opts[i];
-	}
+	conn_name = proxy_connection_name(res);
 
-	argv[NA(argc)] = 0;
+	ssprintf(argv[2],
+			"add connection %s %s:%s %s:%s %s:%s %s:%s",
+			conn_name,
+			res->me->proxy->inside_addr,
+			res->me->proxy->inside_port,
+			res->peer->proxy->outside_addr,
+			res->peer->proxy->outside_port,
+			res->me->proxy->outside_addr,
+			res->me->proxy->outside_port, res->me->address,
+			res->me->port);
 
-	return m_system(argv, SLEEPS_SHORT, res);
+	rv = m_system_ex(argv, SLEEPS_SHORT, res->name);
+	return rv;
 }
 
-struct d_resource *res_by_name(const char *name);
-
-struct d_option *del_opt(struct d_option *base, struct d_option *item)
+int do_proxy_conn_plugins(struct cfg_ctx *ctx)
 {
-	struct d_option *i;
-	if (base == item) {
-		base = item->next;
-		free(item->name);
-		free(item->value);
-		free(item);
-		return base;
+	struct d_resource *res = ctx->res;
+	char *argv[MAX_ARGS];
+	char *conn_name;
+	int argc = 0;
+	struct d_option *opt;
+	int counter;
+
+	conn_name = proxy_connection_name(res);
+
+	argc = 0;
+	argv[NA(argc)] = drbd_proxy_ctl;
+	opt = res->proxy_options;
+	while (opt) {
+		argv[NA(argc)] = "-c";
+		ssprintf(argv[NA(argc)], "set %s %s %s",
+			 opt->name, conn_name, opt->value);
+		opt = opt->next;
 	}
 
-	for (i = base; i; i = i->next) {
-		if (i->next == item) {
-			i->next = item->next;
-			free(item->name);
-			free(item->value);
-			free(item);
-			return base;
+	counter = 0;
+	opt = res->proxy_plugins;
+	/* Don't send the "set plugin ... END" line if no plugins are defined 
+	 * - that's incompatible with the drbd proxy version 1. */
+	if (opt) {
+		while (1) {
+			argv[NA(argc)] = "-c";
+			ssprintf(argv[NA(argc)], "set plugin %s %d %s",
+					conn_name, counter, opt ? opt->name : "END");
+			if (!opt) break;
+			opt = opt->next;
+			counter ++;
 		}
 	}
-	return base;
+
+	argv[NA(argc)] = 0;
+	if (argc > 2)
+		return m_system_ex(argv, SLEEPS_SHORT, res->name);
+
+	return 0;
 }
 
-// Need to convert after from resourcename to minor_number.
-void convert_after_option(struct d_resource *res)
+int do_proxy_conn_down(struct cfg_ctx *ctx)
 {
-	struct d_option *opt;
-	struct d_resource *depends_on_res;
+	struct d_resource *res = ctx->res;
+	char *conn_name;
+	char *argv[4] = { drbd_proxy_ctl, "-c", NULL, NULL};
+	int rv;
 
-	if (res == NULL)
-		return;
+	conn_name = proxy_connection_name(res);
+	ssprintf(argv[2], "del connection %s", conn_name);
 
-	opt = res->sync_options;
-	while ((opt = find_opt(opt, "after"))) {
-		depends_on_res = res_by_name(opt->value);
-		if (!depends_on_res || depends_on_res->ignore) {
-			res->sync_options = del_opt(res->sync_options, opt);
-		} else {
-			free(opt->value);
-			asprintf(&opt->value, "%d", depends_on_res->me->device_minor);
-		}
-		opt = opt->next;
-	}
+	rv = m_system_ex(argv, SLEEPS_SHORT, res->name);
+	return rv;
 }
 
-static int do_proxy(struct d_resource *res, int do_up)
+
+static int check_proxy(struct cfg_ctx *ctx, int do_up)
 {
-	char *argv[MAX_ARGS];
-	int argc = 0, rv;
-	struct d_option *opt;
+	struct d_resource *res = ctx->res;
+	int rv;
 
 	if (!res->me->proxy) {
 		if (all_resources)
@@ -1801,97 +2365,56 @@
 		exit(E_config_invalid);
 	}
 
-	argv[NA(argc)] = drbd_proxy_ctl;
-	argv[NA(argc)] = "-c";
-	if (do_up) {
-		ssprintf(argv[NA(argc)],
-			 "add connection %s-%s-%s %s:%s %s:%s %s:%s %s:%s",
-			 names_to_str_c(res->me->proxy->on_hosts, '_'),
-			 res->name,
-			 names_to_str_c(res->peer->proxy->on_hosts, '_'),
-			 res->me->proxy->inside_addr,
-			 res->me->proxy->inside_port,
-			 res->peer->proxy->outside_addr,
-			 res->peer->proxy->outside_port,
-			 res->me->proxy->outside_addr,
-			 res->me->proxy->outside_port, res->me->address,
-			 res->me->port);
-	} else {
-		ssprintf(argv[NA(argc)],
-			 "del connection %s-%s-%s",
-			 names_to_str_c(res->me->proxy->on_hosts, '_'),
-			 res->name,
-			 names_to_str_c(res->peer->proxy->on_hosts, '_'));
+	if (!res->peer->proxy) {
+		fprintf(stderr,
+			"There is no proxy config for the peer in resource %s.\n",
+			res->name);
+		if (all_resources)
+			return 0;
+		exit(E_config_invalid);
 	}
-	argv[NA(argc)] = 0;
-
-	rv = m_system(argv, SLEEPS_SHORT, res);
-	if (rv != 0)
-		return rv;
 
-	if (!do_up)
-		return rv;
 
-	argc = 0;
-	argv[NA(argc)] = drbd_proxy_ctl;
-	opt = res->proxy_options;
-	while (opt) {
-		argv[NA(argc)] = "-c";
-		ssprintf(argv[NA(argc)], "set %s %s-%s-%s %s",
-			 opt->name, names_to_str_c(res->me->proxy->on_hosts,
-						   '_'), res->name,
-			 names_to_str_c(res->peer->proxy->on_hosts, '_'),
-			 opt->value);
-		opt = opt->next;
+	if (do_up) {
+		rv = do_proxy_conn_up(ctx);
+		if (!rv)
+			rv = do_proxy_conn_plugins(ctx);
 	}
-	argv[NA(argc)] = 0;
-	if (argc > 2)
-		return m_system(argv, SLEEPS_SHORT, res);
+	else
+		rv = do_proxy_conn_down(ctx);
+
 	return rv;
 }
 
-static int adm_proxy_up(struct d_resource *res,
-			const char *unused __attribute((unused)))
+static int adm_proxy_up(struct cfg_ctx *ctx)
 {
-	return do_proxy(res, 1);
+	return check_proxy(ctx, 1);
 }
 
-static int adm_proxy_down(struct d_resource *res,
-			  const char *unused __attribute((unused)))
+static int adm_proxy_down(struct cfg_ctx *ctx)
 {
-	return do_proxy(res, 0);
+	return check_proxy(ctx, 0);
 }
 
-int adm_syncer(struct d_resource *res, const char *unused __attribute((unused)))
+/* The "main" loop iterates over resources.
+ * This "sorts" the drbdsetup commands to bring those up
+ * so we will later first create all objects,
+ * then attach all local disks,
+ * adjust various settings,
+ * and then configure the network part */
+static int adm_up(struct cfg_ctx *ctx)
 {
-	char *argv[MAX_ARGS];
-	struct d_option *opt;
-	int i, argc = 0;
+	static char *current_res_name;
 
-	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
-	argv[NA(argc)] = "syncer";
-
-	argv[NA(argc)] = "--set-defaults";
-	argv[NA(argc)] = "--create-device";
-	opt = res->sync_options;
-	make_options(opt);
+	if (!current_res_name || strcmp(current_res_name, ctx->res->name)) {
+		free(current_res_name);
+		current_res_name = strdup(ctx->res->name);
 
-	for (i = 0; i < soi; i++) {
-		argv[NA(argc)] = setup_opts[i];
+		schedule_deferred_cmd(adm_new_resource, ctx, "new-resource", CFG_PREREQ);
+		schedule_deferred_cmd(adm_connect, ctx, "connect", CFG_NET);
 	}
-
-	argv[NA(argc)] = 0;
-
-	return m_system(argv, SLEEPS_SHORT, res);
-}
-
-static int adm_up(struct d_resource *res,
-		  const char *unused __attribute((unused)))
-{
-	schedule_dcmd(adm_attach, res, "attach", 0);
-	schedule_dcmd(adm_syncer, res, "syncer", 1);
-	schedule_dcmd(adm_connect, res, "connect", 2);
+	schedule_deferred_cmd(adm_new_minor, ctx, "new-minor", CFG_PREREQ);
+	schedule_deferred_cmd(adm_attach, ctx, "attach", CFG_DISK);
 
 	return 0;
 }
@@ -1900,16 +2423,17 @@
    to enforce the use of the specified timeouts instead the use
    of a sane value. Should only be used if the third node should
    never become primary. */
-static int adm_wait_c(struct d_resource *res,
-		      const char *unused __attribute((unused)))
+static int adm_wait_c(struct cfg_ctx *ctx)
 {
+	struct d_resource *res = ctx->res;
+	struct d_volume *vol = ctx->vol;
 	char *argv[MAX_ARGS];
 	struct d_option *opt;
 	int argc = 0, rv;
 
 	argv[NA(argc)] = drbdsetup;
-	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
 	argv[NA(argc)] = "wait-connect";
+	ssprintf(argv[NA(argc)], "%d", vol->device_minor);
 	if (is_drbd_top && !res->stacked_timeouts) {
 		unsigned long timeout = 20;
 		if ((opt = find_opt(res->net_options, "connect-int"))) {
@@ -1926,7 +2450,7 @@
 	}
 	argv[NA(argc)] = 0;
 
-	rv = m_system(argv, SLEEPS_FOREVER, res);
+	rv = m_system_ex(argv, SLEEPS_FOREVER, res->name);
 
 	return rv;
 }
@@ -1938,24 +2462,29 @@
 	return m_strtoll(id + 6, 1);
 }
 
-struct d_resource *res_by_minor(const char *id)
+int ctx_by_minor(struct cfg_ctx *ctx, const char *id)
 {
 	struct d_resource *res, *t;
+	struct d_volume *vol;
 	unsigned int mm;
 
 	mm = minor_by_id(id);
 	if (mm == -1U)
-		return NULL;
+		return -ENOENT;
 
 	for_each_resource(res, t, config) {
 		if (res->ignore)
 			continue;
-		if (mm == res->me->device_minor) {
-			is_drbd_top = res->stacked;
-			return res;
+		for_each_volume(vol, res->me->volumes) {
+			if (mm == vol->device_minor) {
+				is_drbd_top = res->stacked;
+				ctx->res = res;
+				ctx->vol = vol;
+				return 0;
+			}
 		}
 	}
-	return NULL;
+	return -ENOENT;
 }
 
 struct d_resource *res_by_name(const char *name)
@@ -1969,13 +2498,87 @@
 	return NULL;
 }
 
+struct d_volume *volume_by_vnr(struct d_volume *volumes, int vnr)
+{
+	struct d_volume *vol;
+
+	for_each_volume(vol, volumes)
+		if (vnr == vol->vnr)
+			return vol;
+
+	return NULL;
+}
+
+int ctx_by_name(struct cfg_ctx *ctx, const char *id)
+{
+	struct d_resource *res, *t;
+	struct d_volume *vol;
+	char *name = strdupa(id);
+	char *vol_id = strchr(name, '/');
+	unsigned vol_nr = ~0U;
+
+	if (vol_id) {
+		*vol_id++ = '\0';
+		vol_nr = m_strtoll(vol_id, 0);
+	}
+
+	for_each_resource(res, t, config) {
+		if (strcmp(name, res->name) == 0)
+			break;
+	}
+	if (!res)
+		return -ENOENT;
+
+	if (!vol_id) {
+		/* We could assign implicit volumes here.
+		 * But that broke "drbdadm up specific-resource".
+		 */
+		ctx->res = res;
+		ctx->vol = NULL;
+		return 0;
+	}
+
+	vol = volume_by_vnr(res->me->volumes, vol_nr);
+	if (vol) {
+		ctx->res = res;
+		ctx->vol = vol;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+int ctx_set_implicit_volume(struct cfg_ctx *ctx)
+{
+	struct d_volume *vol, *v;
+	int volumes = 0;
+
+	if (ctx->vol || !ctx->res)
+		return 0;
+
+	if (!ctx->res->me) {
+		return 0;
+	}
+
+	for_each_volume(vol, ctx->res->me->volumes) {
+		volumes++;
+		v = vol;
+	}
+
+	if (volumes == 1)
+		ctx->vol = v;
+
+	return volumes;
+}
+
 /* In case a child exited, or exits, its return code is stored as
    negative number in the pids[i] array */
 static int childs_running(pid_t * pids, int opts)
 {
 	int i = 0, wr, rv = 0, status;
+	int N = nr_volumes[is_drbd_top ? STACKED : NORMAL];
 
-	for (i = 0; i < nr_resources; i++) {
+	for (i = 0; i < N; i++) {
 		if (pids[i] <= 0)
 			continue;
 		wr = waitpid(pids[i], &status, opts);
@@ -2004,8 +2607,9 @@
 static void kill_childs(pid_t * pids)
 {
 	int i;
+	int N = nr_volumes[is_drbd_top ? STACKED : NORMAL];
 
-	for (i = 0; i < nr_resources; i++) {
+	for (i = 0; i < N; i++) {
 		if (pids[i] <= 0)
 			continue;
 		kill(pids[i], SIGINT);
@@ -2097,8 +2701,7 @@
 	return rv;
 }
 
-static int adm_wait_ci(struct d_resource *ignored __attribute((unused)),
-		       const char *unused __attribute((unused)))
+static int adm_wait_ci(struct cfg_ctx *ctx)
 {
 	struct d_resource *res, *t;
 	char *argv[20], answer[40];
@@ -2107,7 +2710,7 @@
 	int rr, wtime, argc, i = 0;
 	time_t start;
 	int saved_stdin, saved_stdout, fd;
-
+	int N;
 	struct sigaction so, sa;
 
 	saved_stdin = -1;
@@ -2138,26 +2741,31 @@
 	sa.sa_flags = SA_NOCLDSTOP;
 	sigaction(SIGCHLD, &sa, &so);
 
-	pids = alloca(nr_resources * sizeof(pid_t));
+	N = nr_volumes[is_drbd_top ? STACKED : NORMAL];
+	pids = alloca(N * sizeof(pid_t));
 	/* alloca can not fail, it can "only" overflow the stack :)
 	 * but it needs to be initialized anyways! */
-	memset(pids, 0, nr_resources * sizeof(pid_t));
+	memset(pids, 0, N * sizeof(pid_t));
 
 	for_each_resource(res, t, config) {
+		struct d_volume *vol;
 		if (res->ignore)
 			continue;
 		if (is_drbd_top != res->stacked)
 			continue;
-		argc = 0;
-		argv[NA(argc)] = drbdsetup;
-		ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
 
-		argv[NA(argc)] = "wait-connect";
-		opt = res->startup_options;
-		make_options(opt);
-		argv[NA(argc)] = 0;
+		for_each_volume(vol, res->me->volumes) {
+			/* ctx is not used */
+			argc = 0;
+			argv[NA(argc)] = drbdsetup;
+			argv[NA(argc)] = "wait-connect";
+			ssprintf(argv[NA(argc)], "%u", vol->device_minor);
+			opt = res->startup_options;
+			make_options(opt);
+			argv[NA(argc)] = 0;
 
-		pids[i++] = m_system(argv, RETURN_PID, res);
+			m__system(argv, RETURN_PID, res->name, &pids[i++], NULL, NULL);
+		}
 	}
 
 	wtime = global_options.dialog_refresh ? : -1;
@@ -2174,6 +2782,21 @@
 	}
 
 	if (rr == 0) {
+		/* track a "yes", as well as ctrl-d and ctrl-c,
+		 * in case our tty is stuck in "raw" mode, and
+		 * we get it one character a time (-icanon) */
+		char yes_string[] = "yes\n";
+		char *yes_expect = yes_string;
+		int ctrl_c_count = 0;
+		int ctrl_d_count = 0;
+
+		/* Just in case, if plymouth or usplash is running,
+		 * tell them to step aside.
+		 * Also try to force canonical tty mode. */
+		if (system("exec > /dev/null 2>&1; plymouth quit ; usplash_write QUIT ; "
+			   "stty echo icanon icrnl"))
+			/* Ignore return value. Cannot do anything about it anyways. */;
+
 		printf
 		    ("\n***************************************************************\n"
 		     " DRBD's startup script waits for the peer node(s) to appear.\n"
@@ -2187,24 +2810,52 @@
 						   "wfc-timeout", "0"),
 		     config->name);
 
-		printf(" To abort waiting enter 'yes' [ -- ]:");
+		printf(" To abort waiting enter 'yes' [ -- ]: ");
 		do {
 			printf("\e[s\e[31G[%4d]:\e[u", (int)(time(0) - start));	// Redraw sec.
 			fflush(stdout);
 			rr = gets_timeout(pids, answer, 40, wtime * 1000);
 			check_exit_codes(pids);
 
-			if (rr == 1) {
-				if (!strcmp(answer, "yes\n")) {
-					kill_childs(pids);
-					childs_running(pids, 0);
-					check_exit_codes(pids);
-					rr = -1;
-				} else {
-					printf
-					    (" To abort waiting enter 'yes' [ -- ]:");
-				}
+			if (rr != 1)
+				continue;
+
+			/* If our tty is in "sane" or "canonical" mode,
+			 * we get whole lines.
+			 * If it still is in "raw" mode, even though we
+			 * tried to set ICANON above, possibly some other
+			 * "boot splash thingy" is in operation.
+			 * We may be lucky to get single characters.
+			 * If a sysadmin sees things stuck during boot,
+			 * I expect that ctrl-c or ctrl-d will be one
+			 * of the first things that are tried.
+			 * In raw mode, we get these characters directly.
+			 * But I want them to try that three times ;)
+			 */
+			if (answer[0] && answer[1] == 0) {
+				if (answer[0] == '\3')
+					++ctrl_c_count;
+				if (answer[0] == '\4')
+					++ctrl_d_count;
+				if (yes_expect && answer[0] == *yes_expect)
+					++yes_expect;
+				else if (answer[0] == '\n')
+					yes_expect = yes_string;
+				else
+					yes_expect = NULL;
+			}
+
+			if (!strcmp(answer, "yes\n") ||
+			    (yes_expect && *yes_expect == '\0') ||
+			    ctrl_c_count >= 3 ||
+			    ctrl_d_count >= 3) {
+				kill_childs(pids);
+				childs_running(pids, 0);
+				check_exit_codes(pids);
+				break;
 			}
+
+			printf(" To abort waiting enter 'yes' [ -- ]:");
 		} while (rr != -1);
 		printf("\n");
 	}
@@ -2222,7 +2873,7 @@
 	size_t i;
 	int j = 0;
 
-	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
 		if (cmds[i].show_in_usage != level)
 			continue;
 		if (j++ % 2) {
@@ -2235,8 +2886,7 @@
 		printf("\n");
 }
 
-static int hidden_cmds(struct d_resource *ignored __attribute((unused)),
-		       const char *ignored2 __attribute((unused)))
+static int hidden_cmds(struct cfg_ctx *ignored __attribute((unused)))
 {
 	printf("\nThese additional commands might be useful for writing\n"
 	       "nifty shell scripts around drbdadm:\n\n");
@@ -2258,25 +2908,61 @@
 	exit(0);
 }
 
-void print_usage_and_exit(const char *addinfo)
+static void field_to_option(const struct field_def *field, struct option *option)
+{
+	option->name = field->name;
+	option->has_arg = field->argument_is_optional ?
+		optional_argument : required_argument;
+	option->flag = NULL;
+	option->val = 257;
+}
+
+static void print_option(struct option *opt)
+{
+	if (opt->has_arg == required_argument) {
+		printf("  --%s=...", opt->name);
+		if (opt->val > 1 && opt->val < 256)
+			 printf(", -%c ...", opt->val);
+		printf("\n");
+	} else if (opt->has_arg == optional_argument) {
+		printf("  --%s[=...]", opt->name);
+		if (opt->val > 1 && opt->val < 256)
+			 printf(", -%c...", opt->val);
+		printf("\n");
+	} else {
+		printf("  --%s", opt->name);
+		if (opt->val > 1 && opt->val < 256)
+			 printf(", -%c", opt->val);
+		printf("\n");
+	}
+}
+
+void print_usage_and_exit(struct adm_cmd *cmd, const char *addinfo, int status)
 {
 	struct option *opt;
 
-	printf("\nUSAGE: %s [OPTION...] [-- DRBDSETUP-OPTION...] COMMAND "
-	       "{all|RESOURCE...}\n\n" "OPTIONS:\n", progname);
+	printf("\nUSAGE: %s %s [OPTION...] {all|RESOURCE...}\n\n"
+	       "GENERAL OPTIONS:\n", progname, cmd ? cmd->name : "COMMAND");
 
-	opt = admopt;
-	while (opt->name) {
-		if (opt->has_arg == required_argument)
-			printf(" {--%s|-%c} val\n", opt->name, opt->val);
-		else
-			printf(" {--%s|-%c}\n", opt->name, opt->val);
-		opt++;
+	for (opt = general_admopt; opt->name; opt++)
+		print_option(opt);
+	if (cmd && cmd->drbdsetup_ctx) {
+		const struct field_def *field;
+
+		printf("\nOPTIONS FOR %s:\n", cmd->name);
+		for (field = cmd->drbdsetup_ctx->fields; field->name; field++) {
+			struct option opt;
+
+			field_to_option(field, &opt);
+			print_option(&opt);
+		}
 	}
 
-	printf("\nCOMMANDS:\n");
+	if (!cmd) {
+		printf("\nCOMMANDS:\n");
 
-	print_cmds(1);
+		print_cmds(1);
+	}
 
 	printf("\nVersion: " REL_VERSION " (api:%d)\n%s\n",
 	       API_VERSION, drbd_buildtag());
@@ -2284,7 +2970,7 @@
 	if (addinfo)
 		printf("\n%s\n", addinfo);
 
-	exit(E_usage);
+	exit(status);
 }
 
 /*
@@ -2377,10 +3063,18 @@
 	FILE *if_inet6;
 	struct in6_addr addr6, query_addr;
 	unsigned int b[4];
-	char name[20];
+	char tmp_ip[INET6_ADDRSTRLEN+1];
+	char name[20]; /* IFNAMSIZ aka IF_NAMESIZE is 16 */
 	int i;
 
-	if (inet_pton(AF_INET6, ip, &query_addr) <= 0)
+	/* don't want to do getaddrinfo lookup, but inet_pton get's confused by
+	 * %eth0 link local scope specifiers. So we have a temporary copy
+	 * without that part. */
+	for (i=0; ip[i] && ip[i] != '%' && i < INET6_ADDRSTRLEN; i++)
+		tmp_ip[i] = ip[i];
+	tmp_ip[i] = 0;
+
+	if (inet_pton(AF_INET6, tmp_ip, &query_addr) <= 0)
 		return 0;
 
 #define PROC_IF_INET6 "/proc/net/if_inet6"
@@ -2445,6 +3139,7 @@
 }
 
 static char *conf_file[] = {
+	DRBD_CONFIG_DIR "/drbd-84.conf",
 	DRBD_CONFIG_DIR "/drbd-83.conf",
 	DRBD_CONFIG_DIR "/drbd-82.conf",
 	DRBD_CONFIG_DIR "/drbd-08.conf",
@@ -2462,7 +3157,7 @@
 		return 0;
 	}
 
-	if (!sb.st_mode & S_ISUID || sb.st_mode & S_IXOTH || sb.st_gid == 0) {
+	if (!(sb.st_mode & S_ISUID) || sb.st_mode & S_IXOTH || sb.st_gid == 0) {
 		static int did_header = 0;
 		if (!did_header)
 			fprintf(stderr,
@@ -2558,29 +3253,32 @@
 
 void validate_resource(struct d_resource *res)
 {
-	struct d_option *opt;
+	struct d_option *opt, *next;
 	struct d_name *bpo;
 
-	if (!res->protocol) {
-		if (!common || !common->protocol) {
-			fprintf(stderr,
-				"%s:%d: in resource %s:\n\tprotocol definition missing.\n",
-				res->config_file, res->start_line, res->name);
-			config_valid = 0;
-		}		/* else:
-				 * may not have been expanded yet for "dump" subcommand */
-	} else {
-		res->protocol[0] = toupper(res->protocol[0]);
-	}
-	if ((opt = find_opt(res->sync_options, "after"))) {
+	/* there may be more than one "resync-after" statement,
+	 * see commit 89cd0585 */
+	opt = res->disk_options;
+	while ((opt = find_opt(opt, "resync-after"))) {
+		next = opt->next;
 		if (res_by_name(opt->value) == NULL) {
 			fprintf(stderr,
 				"%s:%d: in resource %s:\n\tresource '%s' mentioned in "
-				"'after' option is not known.\n",
+				"'resync-after' option is not known.\n",
 				res->config_file, res->start_line, res->name,
 				opt->value);
-			config_valid = 0;
+			/* Non-fatal if run from some script.
+			 * When deleting resources, it is an easily made
+			 * oversight to leave references to the deleted
+			 * resources in resync-after statements.  Don't fail on
+			 * every pacemaker-induced action, as it would
+			 * ultimately lead to all nodes committing suicide. */
+			if (no_tty)
+				res->disk_options = del_opt(res->disk_options, opt);
+			else
+				config_valid = 0;
 		}
+		opt = next;
 	}
 	if (res->ignore)
 		return;
@@ -2628,6 +3326,9 @@
 		config_valid = 0;
 	}
 
+	if (!res->peer)
+		set_peer_in_resource(res, 0);
+
 	if (res->peer
 	    && ((res->me->proxy == NULL) != (res->peer->proxy == NULL))) {
 		fprintf(stderr,
@@ -2663,6 +3364,8 @@
 			convert_after_option(res);
 			convert_discard_opt(res);
 		}
+		if (!config_valid)
+			exit(E_config_invalid);
 	}
 }
 
@@ -2679,7 +3382,6 @@
 	char *tmp;
 	char *that_wd;
 	char *abs_path;
-	int len;
 
 	if (!path || !path[0]) {
 		fprintf(stderr, "cannot canonify an empty path\n");
@@ -2711,15 +3413,9 @@
 	}
 
 	if (!strcmp("/", that_wd))
-		len = asprintf(&abs_path, "/%s", last_slash);
+		m_asprintf(&abs_path, "/%s", last_slash);
 	else
-		len = asprintf(&abs_path, "%s/%s", that_wd, last_slash);
-
-	if (len < 0) {
-		fprintf(stderr, "out of memory during asprintf in %s\n",
-			__func__);
-		exit(E_usage);
-	}
+		m_asprintf(&abs_path, "%s/%s", that_wd, last_slash);
 
 	free(that_wd);
 	if (cwd_fd >= 0) {
@@ -2734,27 +3430,28 @@
 
 void assign_command_names_from_argv0(char **argv)
 {
+	struct cmd_helper {
+		char *name;
+		char **var;
+	};
+	static struct cmd_helper helpers[] = {
+		{"drbdsetup", &drbdsetup},
+		{"drbdmeta", &drbdmeta},
+		{"drbd-proxy-ctl", &drbd_proxy_ctl},
+		{"drbdadm-83", &drbdadm_83},
+		{NULL, NULL}
+	};
+	struct cmd_helper *c;
+
 	/* in case drbdadm is called with an absolute or relative pathname
 	 * look for the drbdsetup binary in the same location,
 	 * otherwise, just let execvp sort it out... */
-	if ((progname = strrchr(argv[0], '/')) == 0) {
+	if ((progname = strrchr(argv[0], '/')) == NULL) {
 		progname = argv[0];
-		drbdsetup = strdup("drbdsetup");
-		drbdmeta = strdup("drbdmeta");
-		drbd_proxy_ctl = strdup("drbd-proxy-ctl");
+		for (c = helpers; c->name; ++c)
+			*(c->var) = strdup(c->name);
 	} else {
-		struct cmd_helper {
-			char *name;
-			char **var;
-		};
-		struct cmd_helper helpers[] = {
-			{"drbdsetup", &drbdsetup},
-			{"drbdmeta", &drbdmeta},
-			{"drbd-proxy-ctl", &drbd_proxy_ctl},
-			{NULL, NULL}
-		};
 		size_t len_dir, l;
-		struct cmd_helper *c;
 
 		++progname;
 		len_dir = progname - argv[0];
@@ -2765,6 +3462,8 @@
 			if (*(c->var)) {
 				strncpy(*(c->var), argv[0], len_dir);
 				strcpy(*(c->var) + len_dir, c->name);
+				if (access(*(c->var), X_OK))
+					strcpy(*(c->var), c->name); /* see add_lib_drbd_to_path() */
 			}
 		}
 
@@ -2773,18 +3472,87 @@
 	}
 }
 
-int parse_options(int argc, char **argv)
+static void recognize_all_drbdsetup_options(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
+		const struct adm_cmd *cmd = &cmds[i];
+		const struct field_def *field;
+
+		if (!cmd->drbdsetup_ctx)
+			continue;
+
+		for (field = cmd->drbdsetup_ctx->fields; field->name; field++) {
+			struct option opt;
+			int n;
+
+			field_to_option(field, &opt);
+			for (n = 0; admopt[n].name; n++) {
+				if (!strcmp(admopt[n].name, field->name)) {
+					if (admopt[n].val == 257)
+						assert (admopt[n].has_arg == opt.has_arg);
+					else {
+						fprintf(stderr, "Warning: drbdsetup %s option --%s "
+							"can only be passed as -W--%s\n",
+							cmd->name, admopt[n].name, admopt[n].name);
+						goto skip;
+					}
+				}
+			}
+
+			if (admopt == general_admopt) {
+				admopt = malloc((n + 2) * sizeof(*admopt));
+				memcpy(admopt, general_admopt, (n + 1) * sizeof(*admopt));
+			} else
+				admopt = realloc(admopt, (n + 2) * sizeof(*admopt));
+			memcpy(&admopt[n+1], &admopt[n], sizeof(*admopt));
+			admopt[n] = opt;
+
+		    skip:
+			/* dummy statement required because of label */ ;
+		}
+	}
+}
+
+struct adm_cmd *find_cmd(char *cmdname);
+
+int parse_options(int argc, char **argv, struct adm_cmd **cmd, char ***resource_names)
 {
+	const char *optstring = make_optstring(admopt);
+	int longindex, first_arg_index;
+	int i;
+
+	*cmd = NULL;
+	*resource_names = malloc(sizeof(char *));
+	(*resource_names)[0] = NULL;
+
 	opterr = 1;
 	optind = 0;
 	while (1) {
 		int c;
 
-		c = getopt_long(argc, argv,
-				make_optstring(admopt, 0), admopt, 0);
+		c = getopt_long(argc, argv, optstring, admopt, &longindex);
 		if (c == -1)
 			break;
 		switch (c) {
+		case 257:  /* drbdsetup option */
+			{
+				struct option *option = &admopt[longindex];
+				char *opt;
+				int len;
+
+				len = strlen(option->name) + 2;
+				if (optarg)
+					len += 1 + strlen(optarg);
+				opt = malloc(len + 1);
+				if (optarg)
+					sprintf(opt, "--%s=%s", option->name, optarg);
+				else
+					sprintf(opt, "--%s", option->name);
+				add_setup_option(false, opt);
+			}
+			break;
 		case 'S':
 			is_drbd_top = 1;
 			break;
@@ -2868,9 +3636,6 @@
 						optarg);
 			}
 			break;
-		case 'f':
-			force = 1;
-			break;
 		case 'V':
 			printf("DRBDADM_BUILDTAG=%s\n", shell_escape(drbd_buildtag()));
 			printf("DRBDADM_API_VERSION=%u\n", API_VERSION);
@@ -2882,15 +3647,95 @@
 		case 'P':
 			connect_to_host = optarg;
 			break;
-		case '?':
-			/* commented out, since opterr=1
-			 * fprintf(stderr,"Unknown option %s\n",argv[optind-1]); */
-			fprintf(stderr, "try '%s help'\n", progname);
-			return 20;
+		case 'W':
+			add_setup_option(true, optarg);
 			break;
+		case 'h':
+			help = true;
+			break;
+		case '?':
+			goto help;
+		}
+	}
+
+	first_arg_index = optind;
+	for (; optind < argc; optind++) {
+		optarg = argv[optind];
+		if (*cmd) {
+			int n;
+			for (n = 0; (*resource_names)[n]; n++)
+				/* do nothing */ ;
+			*resource_names = realloc(*resource_names,
+						  (n + 2) * sizeof(char *));
+			(*resource_names)[n++] = optarg;
+			(*resource_names)[n] = NULL;
+		} else if (!strcmp(optarg, "help"))
+			help = true;
+		else {
+			*cmd = find_cmd(optarg);
+			if (!*cmd) {
+				/* Passing drbdsetup options like this is discouraged! */
+				add_setup_option(true, optarg);
+			}
+		}
+	}
+
+	if (help)
+		print_usage_and_exit(*cmd, 0, 0);
+
+	if (*cmd == NULL) {
+		if (first_arg_index < argc) {
+			fprintf(stderr, "%s: Unknown command '%s'\n",
+				progname, argv[first_arg_index]);
+			return E_usage;
+		}
+		print_usage_and_exit(*cmd, "No command specified", E_usage);
+	}
+
+	if (setup_options) {
+		/*
+		 * The drbdsetup options are command specific.  Make sure that only
+		 * setup options that this command recognizes are used.
+		 */
+		for (i = 0; setup_options[i].option; i++) {
+			const struct field_def *field;
+			const char *option;
+			int len;
+
+			if (setup_options[i].explicit)
+				continue;
+
+			option = setup_options[i].option;
+			for (len = 0; option[len]; len++)
+				if (option[len] == '=')
+					break;
+
+			field = NULL;
+			if (option[0] == '-' && option[1] == '-' && (*cmd)->drbdsetup_ctx) {
+				for (field = (*cmd)->drbdsetup_ctx->fields; field->name; field++) {
+					if (strlen(field->name) == len - 2 &&
+					    !strncmp(option + 2, field->name, len - 2))
+						break;
+				}
+				if (!field->name)
+					field = NULL;
+			}
+			if (!field) {
+				fprintf(stderr, "%s: unrecognized option '%.*s'\n",
+					progname, len, option);
+				goto help;
+			}
 		}
 	}
+
 	return 0;
+
+help:
+	if (*cmd)
+		fprintf(stderr, "try '%s help %s'\n", progname, (*cmd)->name);
+	else
+		fprintf(stderr, "try '%s help'\n", progname);
+	return E_usage;
 }
 
 static void substitute_deprecated_cmd(char **c, char *deprecated,
@@ -2909,11 +3754,9 @@
 	unsigned int i;
 	if (!strcmp("hidden-commands", cmdname)) {
 		// before parsing the configuration file...
-		hidden_cmds(NULL, NULL);
+		hidden_cmds(NULL);
 		exit(0);
 	}
-	if (!strncmp("help", cmdname, 5))
-		print_usage_and_exit(0);
 
 	/* R_PRIMARY / R_SECONDARY is not a state, but a role.  Whatever that
 	 * means, actually.  But anyways, we decided to start using _role_ as
@@ -2927,7 +3770,7 @@
 	 * The name of the handler should not imply something that is not done. */
 	substitute_deprecated_cmd(&cmdname, "outdate-peer", "fence-peer");
 
-	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
 		if (!strcmp(cmds[i].name, cmdname)) {
 			cmd = cmds + i;
 			break;
@@ -2941,28 +3784,31 @@
 	char *f;
 	int minor = minor_by_id(arg);
 
-	if (minor < 0) {
-		/* this is expected, if someone wants to test the configured
-		 * handlers from the command line, using resource names */
-		fprintf(stderr,
-			"Couldn't find minor from id %s, "
-			"expecting minor-<minor> as id. "
-			"Trying default config files.\n", arg);
-		return NULL;
+	if (minor >= 0) {
+		f = lookup_minor(minor);
+		if (!f) {
+			fprintf(stderr, "Don't know which config file belongs "
+					"to minor %d, trying default ones...\n",
+				minor);
+			return NULL;
+		}
+	} else {
+		f = lookup_resource(arg);
+		if (!f) {
+			fprintf(stderr, "Don't know which config file belongs "
+					"to resource %s, trying default "
+					"ones...\n",
+				arg);
+			return NULL;
+		}
 	}
 
-	f = lookup_minor(minor);
-	if (!f) {
+	yyin = fopen(f, "r");
+	if (yyin == NULL) {
 		fprintf(stderr,
-			"Don't know which config file belongs to minor %d, "
-			"trying default ones...\n", minor);
-	} else {
-		yyin = fopen(f, "r");
-		if (yyin == NULL) {
-			fprintf(stderr,
-				"Couldn't open file %s for reading, reason: %m\n"
-				"trying default config file...\n", config_file);
-		}
+			"Couldn't open file %s for reading, reason: %m\n"
+			"trying default config file...\n", config_file);
+		return NULL;
 	}
 	return f;
 }
@@ -2987,27 +3833,37 @@
 {
 	int m, mc = global_options.minor_count;
 	struct d_resource *res, *tmp;
+	struct d_volume *vol;
 
 	highest_minor = 0;
+	number_of_minors = 0;
 	for_each_resource(res, tmp, config) {
-		if (res->ignore)
+		if (res->ignore) {
+			nr_resources[IGNORED]++;
+			/* How can we count ignored volumes?
+			 * Do we want to? */
 			continue;
-
-		m = res->me->device_minor;
-		if (m > highest_minor)
-			highest_minor = m;
-		nr_resources++;
-		if (res->stacked)
-			nr_stacked++;
-		else if (res->ignore)
-			nr_ignore++;
+		} else if (res->stacked)
+			nr_resources[STACKED]++;
 		else
-			nr_normal++;
+			nr_resources[NORMAL]++;
+
+		for_each_volume(vol, res->me->volumes) {
+			number_of_minors++;
+			m = vol->device_minor;
+			if (m > highest_minor)
+				highest_minor = m;
+			if (res->stacked)
+				nr_volumes[STACKED]++;
+			/* res->ignored won't come here */
+			else
+				nr_volumes[NORMAL]++;
+		}
 	}
 
 	// Just for the case that minor_of_res() returned 0 for all devices.
-	if (nr_resources > (highest_minor + 1))
-		highest_minor = nr_resources - 1;
+	if (nr_volumes[NORMAL]+nr_volumes[STACKED] > (highest_minor + 1))
+		highest_minor = nr_volumes[NORMAL] + nr_volumes[STACKED] -1;
 
 	if (mc && mc < (highest_minor + 1)) {
 		fprintf(stderr,
@@ -3020,20 +3876,20 @@
 
 void die_if_no_resources(void)
 {
-	if (!is_drbd_top && nr_ignore > 0 && nr_normal == 0) {
+	if (!is_drbd_top && nr_resources[IGNORED] > 0 && nr_resources[NORMAL] == 0) {
 		fprintf(stderr,
 			"WARN: no normal resources defined for this host (%s)!?\n"
 			"Misspelled name of the local machine with the 'on' keyword ?\n",
 			nodeinfo.nodename);
 		exit(E_usage);
 	}
-	if (!is_drbd_top && nr_normal == 0) {
+	if (!is_drbd_top && nr_resources[NORMAL] == 0) {
 		fprintf(stderr,
 			"WARN: no normal resources defined for this host (%s)!?\n",
 			nodeinfo.nodename);
 		exit(E_usage);
 	}
-	if (is_drbd_top && nr_stacked == 0) {
+	if (is_drbd_top && nr_resources[STACKED] == 0) {
 		fprintf(stderr, "WARN: nothing stacked for this host (%s), "
 			"nothing to do in stacked mode!\n", nodeinfo.nodename);
 		exit(E_usage);
@@ -3060,10 +3916,12 @@
 	size_t i;
 	int rv = 0;
 	struct adm_cmd *cmd = NULL;
+	char **resource_names = NULL;
 	struct d_resource *res, *tmp;
 	char *env_drbd_nodename = NULL;
 	int is_dump_xml;
 	int is_dump;
+	struct cfg_ctx ctx = { .arg = NULL };
 
 	yyin = NULL;
 	uname(&nodeinfo);	/* FIXME maybe fold to lower case ? */
@@ -3082,9 +3940,6 @@
 
 	assign_command_names_from_argv0(argv);
 
-	if (argc == 1)
-		print_usage_and_exit("missing arguments");	// arguments missing.
-
 	if (drbdsetup == NULL || drbdmeta == NULL || drbd_proxy_ctl == NULL) {
 		fprintf(stderr, "could not strdup argv[0].\n");
 		exit(E_exec_error);
@@ -3093,26 +3948,13 @@
 	if (!getenv("DRBD_DONT_WARN_ON_VERSION_MISMATCH"))
 		warn_on_version_mismatch();
 
-	rv = parse_options(argc, argv);
+	maybe_exec_drbdadm_83(argv);
+
+	recognize_all_drbdsetup_options();
+	rv = parse_options(argc, argv, &cmd, &resource_names);
 	if (rv)
 		return rv;
 
-	/* store everything before the command name as pass through option/argument */
-	while (optind < argc) {
-		cmd = find_cmd(argv[optind]);
-		if (cmd)
-			break;
-		setup_opts[soi++] = argv[optind++];
-	}
-
-	if (optind == argc)
-		print_usage_and_exit(0);
-
-	if (cmd == NULL) {
-		fprintf(stderr, "Unknown command '%s'.\n", argv[optind]);
-		exit(E_usage);
-	}
-
 	if (config_test && !cmd->test_config) {
 		fprintf(stderr, "The --config-to-test (-t) option is only allowed "
 			"with the dump and sh-nop commands\n");
@@ -3120,31 +3962,26 @@
 	}
 
 	do_verify_ips = cmd->verify_ips;
-	optind++;
 
 	is_dump_xml = (cmd->function == adm_dump_xml);
 	is_dump = (is_dump_xml || cmd->function == adm_dump);
 
-	/* remaining argv are expected to be resource names
-	 * optind     == argc: no resourcenames given.
-	 * optind + 1 == argc: exactly one resource name (or "all") given
-	 * optind + 1  < argc: multiple resource names given. */
-	if (optind == argc) {
+	if (!resource_names[0]) {
 		if (is_dump)
 			all_resources = 1;
 		else if (cmd->res_name_required)
-			print_usage_and_exit("missing resourcename arguments");
-	} else if (optind + 1 < argc) {
+			print_usage_and_exit(cmd, "No resource names specified", E_usage);
+	} else if (resource_names[0] && resource_names[1]) {
 		if (!cmd->res_name_required)
 			fprintf(stderr,
-				"this command will ignore resource names!\n");
+				"This command will ignore resource names!\n");
 		else if (cmd->use_cached_config_file)
 			fprintf(stderr,
 				"You should not use this command with multiple resources!\n");
 	}
 
 	if (!config_file && cmd->use_cached_config_file)
-		config_file = config_file_from_arg(argv[optind]);
+		config_file = config_file_from_arg(resource_names[0]);
 
 	if (!config_file)
 		/* may exit if no config file can be used! */
@@ -3186,14 +4023,15 @@
 
 	if (!is_dump || dry_run || verbose)
 		expand_common();
-	if (is_dump || dry_run || config_from_stdin)
-		do_register_minor = 0;
+	if (dry_run || config_from_stdin)
+		do_register = 0;
 
 	count_resources_or_die();
 
 	if (cmd->uc_dialog)
 		uc_node(global_options.usage_count);
 
+	ctx.arg = cmd->name;
 	if (cmd->res_name_required) {
 		if (config == NULL) {
 			fprintf(stderr, "no resources defined!\n");
@@ -3202,12 +4040,12 @@
 
 		global_validate_maybe_expand_die_if_invalid(!is_dump);
 
-		if (optind == argc || !strcmp(argv[optind], "all")) {
+		if (!resource_names[0] || !strcmp(resource_names[0], "all")) {
 			/* either no resource arguments at all,
 			 * but command is dump / dump-xml, so implicit "all",
 			 * or an explicit "all" argument is given */
 			all_resources = 1;
-			if (!is_dump || !force)
+			if (!is_dump)
 				die_if_no_resources();
 			/* verify ips first, for all of them */
 			for_each_resource(res, tmp, config) {
@@ -3227,7 +4065,9 @@
 
 				if (!is_dump && is_drbd_top != res->stacked)
 					continue;
-				int r = call_cmd(cmd, res, EXIT_ON_FAIL);	/* does exit for r >= 20! */
+				ctx.res = res;
+				ctx.vol = NULL;
+				int r = call_cmd(cmd, &ctx, EXIT_ON_FAIL);	/* does exit for r >= 20! */
 				/* this super positioning of return values is soo ugly
 				 * anyone any better idea? */
 				if (r > rv)
@@ -3239,44 +4079,64 @@
 			}
 		} else {
 			/* explicit list of resources to work on */
-			for (i = optind; (int)i < argc; i++) {
-				res = res_by_name(argv[i]);
-				if (!res)
-					res = res_by_minor(argv[i]);
-				if (!res) {
+			for (i = 0; resource_names[i]; i++) {
+				ctx.res = NULL;
+				ctx.vol = NULL;
+				ctx_by_name(&ctx, resource_names[i]);
+				if (!ctx.res)
+					ctx_by_minor(&ctx, resource_names[i]);
+				if (!ctx.res) {
 					fprintf(stderr,
 						"'%s' not defined in your config.\n",
-						argv[i]);
+						resource_names[i]);
+					exit(E_usage);
+				}
+				if (!cmd->vol_id_required && !cmd->iterate_volumes && ctx.vol != NULL) {
+					if (ctx.vol->implicit)
+						ctx.vol = NULL;
+					else {
+						fprintf(stderr, "%s operates on whole resources, but you specified a specific volume!\n",
+								cmd->name);
+						exit(E_usage);
+					}
+				}
+				if (cmd->vol_id_required && !ctx.vol && ctx.res->me->volumes->implicit)
+					ctx.vol = ctx.res->me->volumes;
+				if (cmd->vol_id_required && !ctx.vol) {
+					fprintf(stderr, "%s requires a specific volume id, but none is specified.\n"
+							"Try '%s minor-<minor_number>' or '%s %s/<vnr>'\n",
+							cmd->name, cmd->name,
+							cmd->name, resource_names[i]);
 					exit(E_usage);
 				}
-				if (res->ignore && !is_dump) {
+				if (ctx.res->ignore && !is_dump) {
 					fprintf(stderr,
 						"'%s' ignored, since this host (%s) is not mentioned with an 'on' keyword.\n",
-						res->name, nodeinfo.nodename);
+						ctx.res->name, nodeinfo.nodename);
 					rv = E_usage;
 					continue;
 				}
-				if (is_drbd_top != res->stacked && !is_dump) {
+				if (is_drbd_top != ctx.res->stacked && !is_dump) {
 					fprintf(stderr,
 						"'%s' is a %s resource, and not available in %s mode.\n",
-						res->name,
-						res->
-						stacked ? "stacked" : "normal",
+						ctx.res->name,
+						ctx.res->stacked ? "stacked" : "normal",
 						is_drbd_top ? "stacked" :
 						"normal");
 					rv = E_usage;
 					continue;
 				}
-				verify_ips(res);
+				verify_ips(ctx.res);
 				if (!is_dump && !config_valid)
 					exit(E_config_invalid);
-				rv = call_cmd(cmd, res, EXIT_ON_FAIL);	/* does exit for rv >= 20! */
+				rv = call_cmd(cmd, &ctx, EXIT_ON_FAIL);	/* does exit for rv >= 20! */
 			}
 		}
 	} else {		// Commands which do not need a resource name
 		/* no call_cmd, as that implies register_minor,
-		 * which does not make sense for resource independent commands */
-		rv = cmd->function(config, cmd->name);
+		 * which does not make sense for resource independent commands.
+		 * It does also not need to iterate over volumes: it does not even know the resource. */
+		rv = cmd->function(&ctx);
 		if (rv >= 10) {	/* why do we special case the "generic sh-*" commands? */
 			fprintf(stderr, "command %s exited with code %d\n",
 				cmd->name, rv);
@@ -3286,9 +4146,12 @@
 
 	/* do we really have to bitor the exit code?
 	 * it is even only a Boolean value in this case! */
-	rv |= run_dcmds();
+	rv |= run_deferred_cmds();
 
 	free_config(config);
+	free(resource_names);
+	if (admopt != general_admopt)
+		free(admopt);
 
 	return rv;
 }
diff -Nru drbd8-8.3.7/user/drbdadm_minor_table.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_minor_table.c
--- drbd8-8.3.7/user/drbdadm_minor_table.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_minor_table.c	1970-01-01 00:00:00.000000000 +0000
@@ -1,160 +0,0 @@
-/*
-   drbdadm_minor_table.c
-
-   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
-   It was written by Johannes Thoma <johannes.thoma@linbit.com>
-
-   Copyright (C) 2002-2008, LINBIT Information Technologies GmbH.
-   Copyright (C) 2002-2008, Philipp Reisner <philipp.reisner@linbit.com>.
-   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
-
-   drbd is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   drbd is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with drbd; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
- */
-
-/* This keeps track of which DRBD minor was configured in which
- * config file. This is required to have alternative config files
- * (-c switch) and userland event handlers.
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <string.h>
-
-#include "config.h"
-
-#define MAX_MINOR 256
-#define MAX_REGISTER_PATH_LEN	1024
-
-/* buf has to be big enough to hold that path.
- * it is assumed that sprintf cannot fail :-] */
-void linkname_from_minor(char *buf, int minor)
-{
-	sprintf(buf, "%s/drbd-minor-%d.conf", DRBD_LIB_DIR, minor);
-}
-
-int unregister_minor(int minor)
-{
-	char buf[255];
-
-	if (minor >= MAX_MINOR || minor < 0) {
-		fprintf(stderr, "unregister_minor: minor too big (%d).\n", minor);
-		return -1;
-	}
-
-	linkname_from_minor(buf, minor);
-	if (unlink(buf) < 0) {
-		if (errno != ENOENT) {
-			perror("unlink");
-			return -1;
-		}
-	}
-	return 0;
-}
-
-int register_minor(int minor, const char *path)
-{
-	char buf[255];
-	struct stat stat_buf;
-	int err = -1;
-
-	if (minor >= MAX_MINOR || minor < 0) {
-		fprintf(stderr, "register_minor: minor too big (%d).\n", minor);
-		return -1;
-	}
-
-	linkname_from_minor(buf, minor);
-
-	if (!path || !path[0])
-		fprintf(stderr, "Cannot register an empty path.\n");
-	else if (path[0] != '/')
-		fprintf(stderr, "Absolute path expected, "
-			"won't register relative path (%s).\n", path);
-	else if (strlen(path) >= MAX_REGISTER_PATH_LEN)
-		fprintf(stderr, "path (%s):\ntoo long to be registered, "
-				"max path len supported: %u\n",
-				path, MAX_REGISTER_PATH_LEN-1);
-	else if (stat(path, &stat_buf) < 0)
-		fprintf(stderr, "stat(%s): %m\n", path);
-	else if (unlink(buf) < 0 && errno != ENOENT)
-		fprintf(stderr, "unlink(%s): %m\n", buf);
-	else if (symlink(path, buf) < 0)
-		fprintf(stderr, "symlink(%s, %s): %m\n", path, buf);
-	else
-		/* it did work out after all! */
-		err = 0;
-
-	return err;
-}
-
-/* This returns a static buffer containing the real
- * configuration file known to be used last for this minor.
- * If you need the return value longer, stuff it away with strdup. */
-char *lookup_minor(int minor)
-{
-	static char buf[255];
-	static char resolved_path[MAX_REGISTER_PATH_LEN+1];
-	struct stat stat_buf;
-	ssize_t len;
-
-	if (minor >= MAX_MINOR || minor < 0) {
-		fprintf(stderr, "register_minor: minor too big (%d).\n", minor);
-		return NULL;
-	}
-
-	linkname_from_minor(buf, minor);
-
-	if (stat(buf, &stat_buf) < 0) {
-		if (errno != ENOENT)
-			fprintf(stderr, "stat(%s): %m\n", buf);
-		return NULL;
-	}
-
-	len = readlink(buf, resolved_path, sizeof(resolved_path)-1);
-	if (len < 0) {
-		perror("readlink");
-		return NULL;
-	}
-	if (len >= MAX_REGISTER_PATH_LEN)
-		fprintf(stderr, "readlink(%s): result has probably been truncated\n",
-				buf);
-
-	resolved_path[len] = '\0';
-	return resolved_path;
-}
-
-
-#ifdef TEST
-
-int main(int argc, char ** argv)
-{
-	register_minor(1, "/etc/drbd-xy.conf");
-	register_minor(15, "/etc/drbd-82.conf");
-	register_minor(14, "/../../../../../../etc/drbd-82.conf");
-	printf("Minor 1 is %s.\n", lookup_minor(1));
-	printf("Minor 2 is %s.\n", lookup_minor(2));
-	printf("Minor 14 is %s.\n", lookup_minor(14));
-	printf("Minor 15 is %s.\n", lookup_minor(15));
-	return 0;
-}
-
-#endif
diff -Nru drbd8-8.3.7/user/drbdadm_parser.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_parser.c
--- drbd8-8.3.7/user/drbdadm_parser.c	2010-01-07 09:09:58.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_parser.c	2012-02-02 14:09:14.000000000 +0000
@@ -25,6 +25,8 @@
  */
 
 #define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -32,6 +34,8 @@
 #include <glob.h>
 #include <search.h>
 #include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
 
 #include "drbdadm.h"
 #include "linux/drbd_limits.h"
@@ -60,8 +64,10 @@
 {
 	int n = 0;
 
-	if (!names)
+	if (!names) {
+		snprintf(buffer, NAMES_STR_SIZE, "UNKNOWN");
 		return buffer;
+	}
 
 	while (1) {
 		n += snprintf(buffer + n, NAMES_STR_SIZE - n, "%s", names->name);
@@ -131,12 +137,15 @@
 		     unsigned long long min, unsigned long long max)
 {
 	unsigned long long r = m_strtoll(s, def_unit);
-	char unit[] = { def_unit > '1' ? def_unit : 0, 0 };
+	char unit[] = { def_unit != '1' ? def_unit : 0, 0 };
 	if (min > r || r > max) {
 		fprintf(stderr,
 			"%s:%d: %s %s => %llu%s out of range [%llu..%llu]%s.\n",
 			config_file, fline, name, s, r, unit, min, max, unit);
-		exit(E_config_invalid);
+		if (config_valid <= 1) {
+			config_valid = 0;
+			return;
+		}
 	}
 	if (DEBUG_RANGE_CHECK) {
 		fprintf(stderr,
@@ -146,8 +155,20 @@
 }
 
 void range_check(const enum range_checks what, const char *name,
-		 const char *value)
+		 char *value)
 {
+	char proto = 0;
+
+	/*
+	 * FIXME: Handle signed/unsigned values correctly by checking the
+	 * F_field_name_IS_SIGNED defines.
+	 */
+
+#define M_STRTOLL_RANGE(x) \
+		m_strtoll_range(value, DRBD_ ## x ## _SCALE, name, \
+				DRBD_ ## x ## _MIN, \
+				DRBD_ ## x ## _MAX)
+
 	switch (what) {
 	case R_NO_CHECK:
 		break;
@@ -156,78 +177,101 @@
 			config_file, fline, name, value);
 		break;
 	case R_MINOR_COUNT:
-		m_strtoll_range(value, 1, name,
-				DRBD_MINOR_COUNT_MIN, DRBD_MINOR_COUNT_MAX);
+		M_STRTOLL_RANGE(MINOR_COUNT);
 		break;
 	case R_DIALOG_REFRESH:
-		m_strtoll_range(value, 1, name,
-				DRBD_DIALOG_REFRESH_MIN,
-				DRBD_DIALOG_REFRESH_MAX);
+		M_STRTOLL_RANGE(DIALOG_REFRESH);
 		break;
 	case R_DISK_SIZE:
-		m_strtoll_range(value, 's', name,
-				DRBD_DISK_SIZE_SECT_MIN,
-				DRBD_DISK_SIZE_SECT_MAX);
+		M_STRTOLL_RANGE(DISK_SIZE);
 		break;
 	case R_TIMEOUT:
-		m_strtoll_range(value, 1, name, DRBD_TIMEOUT_MIN,
-				DRBD_TIMEOUT_MAX);
+		M_STRTOLL_RANGE(TIMEOUT);
 		break;
 	case R_CONNECT_INT:
-		m_strtoll_range(value, 1, name, DRBD_CONNECT_INT_MIN,
-				DRBD_CONNECT_INT_MAX);
+		M_STRTOLL_RANGE(CONNECT_INT);
 		break;
 	case R_PING_INT:
-		m_strtoll_range(value, 1, name, DRBD_PING_INT_MIN,
-				DRBD_PING_INT_MAX);
+		M_STRTOLL_RANGE(PING_INT);
 		break;
 	case R_MAX_BUFFERS:
-		m_strtoll_range(value, 1, name, DRBD_MAX_BUFFERS_MIN,
-				DRBD_MAX_BUFFERS_MAX);
+		M_STRTOLL_RANGE(MAX_BUFFERS);
 		break;
 	case R_MAX_EPOCH_SIZE:
-		m_strtoll_range(value, 1, name, DRBD_MAX_EPOCH_SIZE_MIN,
-				DRBD_MAX_EPOCH_SIZE_MAX);
+		M_STRTOLL_RANGE(MAX_EPOCH_SIZE);
 		break;
 	case R_SNDBUF_SIZE:
-		m_strtoll_range(value, 1, name, DRBD_SNDBUF_SIZE_MIN,
-				DRBD_SNDBUF_SIZE_MAX);
+		M_STRTOLL_RANGE(SNDBUF_SIZE);
 		break;
 	case R_RCVBUF_SIZE:
-		m_strtoll_range(value, 1, name, DRBD_RCVBUF_SIZE_MIN,
-				DRBD_RCVBUF_SIZE_MAX);
+		M_STRTOLL_RANGE(RCVBUF_SIZE);
 		break;
 	case R_KO_COUNT:
-		m_strtoll_range(value, 1, name, DRBD_KO_COUNT_MIN,
-				DRBD_KO_COUNT_MAX);
+		M_STRTOLL_RANGE(KO_COUNT);
 		break;
 	case R_RATE:
-		m_strtoll_range(value, 'K', name, DRBD_RATE_MIN, DRBD_RATE_MAX);
+		M_STRTOLL_RANGE(RESYNC_RATE);
 		break;
 	case R_AL_EXTENTS:
-		m_strtoll_range(value, 1, name, DRBD_AL_EXTENTS_MIN,
-				DRBD_AL_EXTENTS_MAX);
+		M_STRTOLL_RANGE(AL_EXTENTS);
 		break;
 	case R_PORT:
-		m_strtoll_range(value, 1, name, DRBD_PORT_MIN, DRBD_PORT_MAX);
+		M_STRTOLL_RANGE(PORT);
+		break;
+	/* FIXME not yet implemented!
+	case R_META_IDX:
+		M_STRTOLL_RANGE(META_IDX);
 		break;
-		/* FIXME not yet implemented!
-		   case R_META_IDX:
-		   m_strtoll_range(value, 1, name, DRBD_META_IDX_MIN, DRBD_META_IDX_MAX);
-		   break;
-		 */
+	*/
 	case R_WFC_TIMEOUT:
-		m_strtoll_range(value, 1, name, DRBD_WFC_TIMEOUT_MIN,
-				DRBD_WFC_TIMEOUT_MAX);
+		M_STRTOLL_RANGE(WFC_TIMEOUT);
 		break;
 	case R_DEGR_WFC_TIMEOUT:
-		m_strtoll_range(value, 1, name, DRBD_DEGR_WFC_TIMEOUT_MIN,
-				DRBD_DEGR_WFC_TIMEOUT_MAX);
+		M_STRTOLL_RANGE(DEGR_WFC_TIMEOUT);
 		break;
 	case R_OUTDATED_WFC_TIMEOUT:
-		m_strtoll_range(value, 1, name, DRBD_OUTDATED_WFC_TIMEOUT_MIN,
-				DRBD_OUTDATED_WFC_TIMEOUT_MAX);
+		M_STRTOLL_RANGE(OUTDATED_WFC_TIMEOUT);
+		break;
+
+	case R_C_PLAN_AHEAD:
+		M_STRTOLL_RANGE(C_PLAN_AHEAD);
+		break;
+
+	case R_C_DELAY_TARGET:
+		M_STRTOLL_RANGE(C_DELAY_TARGET);
 		break;
+
+	case R_C_FILL_TARGET:
+		M_STRTOLL_RANGE(C_FILL_TARGET);
+		break;
+
+	case R_C_MAX_RATE:
+		M_STRTOLL_RANGE(C_MAX_RATE);
+		break;
+
+	case R_C_MIN_RATE:
+		M_STRTOLL_RANGE(C_MIN_RATE);
+		break;
+
+	case R_CONG_FILL:
+		M_STRTOLL_RANGE(CONG_FILL);
+		break;
+
+	case R_CONG_EXTENTS:
+		M_STRTOLL_RANGE(CONG_EXTENTS);
+		break;
+	case R_PROTOCOL:
+		if (value && value[0] && value[1] == 0) {
+			proto = value[0] & ~0x20; /* toupper */
+			if (proto == 'A' || proto == 'B' || proto == 'C')
+				value[0] = proto;
+			else
+				proto = 0;
+		}
+		if (!proto && config_valid <= 1) {
+			config_valid = 0;
+			fprintf(stderr, "unknown protocol '%s', should be one of A,B,C\n", value);
+		}
 	}
 }
 
@@ -239,7 +283,6 @@
 	cn->name = name;
 	cn->value = value;
 	cn->mentioned = 0;
-	cn->is_default = 0;
 	cn->is_escaped = 0;
 
 	return cn;
@@ -252,6 +295,14 @@
 		config_file, c_section_start, res->name, names_to_str(host->on_hosts), text);
 }
 
+void pdperror(char *text)
+{
+	config_valid = 0;
+	fprintf(stderr, "%s:%d: in proxy plugin section: %s.\n",
+		config_file, line, text);
+	exit(E_config_invalid);
+}
+
 static void pperror(struct d_host_info *host, struct d_proxy_info *proxy, char *text)
 {
 	config_valid = 0;
@@ -398,30 +449,18 @@
 	return rv;
 }
 
-void check_meta_disk(struct d_host_info *host)
+void check_meta_disk(struct d_volume *vol, struct d_host_info *host)
 {
 	struct d_name *h;
-	if (strcmp(host->meta_disk, "internal") != 0) {
-		/* external */
-		if (host->meta_index == NULL) {
-			fprintf(stderr,
-				"%s:%d: expected 'meta-disk = %s [index]'.\n",
-				config_file, fline, host->meta_disk);
-		}
+	/* when parsing "drbdsetup show[-all]" output,
+	 * a detached volume will only have device/minor,
+	 * but no disk or meta disk. */
+	if (vol->meta_disk == NULL)
+		return;
+	if (strcmp(vol->meta_disk, "internal") != 0) {
 		/* index either some number, or "flexible" */
 		for_each_host(h, host->on_hosts)
-			check_uniq("meta-disk", "%s:%s[%s]", h->name, host->meta_disk, host->meta_index);
-	} else if (host->meta_index) {
-		/* internal */
-		if (strcmp(host->meta_index, "flexible") != 0) {
-			/* internal, not flexible, but index given: no sir! */
-			fprintf(stderr,
-				"%s:%d: no index allowed with 'meta-disk = internal'.\n",
-				config_file, fline);
-		}		/* else internal, flexible: fine */
-	} else {
-		/* internal, not flexible */
-		host->meta_index = strdup("internal");
+			check_uniq("meta-disk", "%s:%s[%s]", h->name, vol->meta_disk, vol->meta_index);
 	}
 }
 
@@ -550,66 +589,180 @@
 	}
 }
 
-static void check_and_change_deprecated_alias(char **name, int token_option)
+static void check_and_change_deprecated_alias(char **name, int token)
 {
-	if (token_option == TK_HANDLER_OPTION) {
-		if (!strcmp(*name, "outdate-peer")) {
-			/* fprintf(stder, "config file:line: name is deprecated ...\n") */
+	int i;
+	static struct {
+		enum yytokentype token;
+		char *old_name, *new_name;
+	} table[] = {
+		{ TK_HANDLER_OPTION, "outdate-peer", "fence-peer" },
+		{ TK_DISK_OPTION, "rate", "resync-rate" },
+		{ TK_DISK_OPTION, "after", "resync-after" },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(table); i++) {
+		if (table[i].token == token &&
+		    !strcmp(table[i].old_name, *name)) {
 			free(*name);
-			*name = strdup("fence-peer");
+			*name = strdup(table[i].new_name);
 		}
 	}
 }
 
-static struct d_option *parse_options_d(int token_switch, int token_option,
-					int token_delegate, void (*delegate)(void*),
-					void *ctx)
+/* The syncer section is deprecated. Distribute the options to the disk or net options. */
+void parse_options_syncer(struct d_resource *res)
 {
 	char *opt_name;
 	int token;
 	enum range_checks rc;
 
-	struct d_option *options = NULL, *ro = NULL;
+	struct d_option **options = NULL, *current_option = NULL;
 	c_section_start = line;
 	fline = line;
 
 	while (1) {
 		token = yylex();
 		fline = line;
-		if (token == token_switch) {
-			options = APPEND(options, new_opt(yylval.txt, NULL));
-		} else if (token == token_option) {
-			opt_name = yylval.txt;
+		if (token >= TK_GLOBAL && !(token & TK_SYNCER_OLD_OPT))
+			pe_expected("a syncer option keyword");
+		token &= ~TK_SYNCER_OLD_OPT;
+		switch (token) {
+		case TK_NET_FLAG:
+		case TK_NET_NO_FLAG:
+		case TK_NET_OPTION:
+			options = &res->net_options;
+			break;
+		case TK_DISK_FLAG:
+		case TK_DISK_NO_FLAG:
+		case TK_DISK_OPTION:
+			options = &res->disk_options;
+			break;
+		case TK_RES_OPTION:
+			options = &res->res_options;
+			break;
+		case '}':
+			return;
+		default:
+			pe_expected("a syncer option keyword");
+		}
+		opt_name = yylval.txt;
+		switch (token) {
+		case TK_NET_FLAG:
+		case TK_DISK_FLAG:
+			token = yylex();
+			switch(token) {
+			case TK_NO:
+				current_option = new_opt(opt_name, strdup("no"));
+				*options = APPEND(*options, current_option);
+				token = yylex();
+				break;
+			default:
+				current_option = new_opt(opt_name, strdup("yes"));
+				*options = APPEND(*options, current_option);
+				if (token == TK_YES)
+					token = yylex();
+				break;
+			}
+			break;
+		case TK_NET_NO_FLAG:
+		case TK_DISK_NO_FLAG:
+			/* Backward compatibility with the old config file syntax. */
+			assert(!strncmp(opt_name, "no-", 3));
+			current_option = new_opt(strdup(opt_name + 3), strdup("no"));
+			*options = APPEND(*options, current_option);
+			free(opt_name);
+			token = yylex();
+			break;
+		case TK_NET_OPTION:
+		case TK_DISK_OPTION:
+		case TK_RES_OPTION:
+			check_and_change_deprecated_alias(&opt_name, token);
+			rc = yylval.rc;
+			expect_STRING_or_INT();
+			range_check(rc, opt_name, yylval.txt);
+			current_option = new_opt(opt_name, yylval.txt);
+			*options = APPEND(*options, current_option);
+			token = yylex();
+			break;
+		}
+		switch (token) {
+		case ';':
+			break;
+		default:
+			pe_expected(";");
+		}
+	}
+}
+
+static struct d_option *parse_options_d(int token_flag, int token_no_flag, int token_option,
+					int token_delegate, void (*delegate)(void*),
+					void *ctx)
+{
+	char *opt_name;
+	int token, token_group;
+	enum range_checks rc;
+
+	struct d_option *options = NULL, *current_option = NULL;
+	c_section_start = line;
+	fline = line;
+
+	while (1) {
+		token_group = yylex();
+		/* Keep the higher bits in token_option, remove them from token. */
+		token = REMOVE_GROUP_FROM_TOKEN(token_group);
+		fline = line;
+		opt_name = yylval.txt;
+		if (token == token_flag) {
+			switch(yylex()) {
+			case TK_YES:
+				current_option = new_opt(opt_name, strdup("yes"));
+				options = APPEND(options, current_option);
+				break;
+			case TK_NO:
+				current_option = new_opt(opt_name, strdup("no"));
+				options = APPEND(options, current_option);
+				break;
+			case ';':
+				/* Flag value missing; assume yes.  */
+				options = APPEND(options, new_opt(opt_name, strdup("yes")));
+				continue;
+			default:
+				pe_expected("yes | no | ;");
+			}
+		} else if (token == token_no_flag) {
+			/* Backward compatibility with the old config file syntax. */
+			assert(!strncmp(opt_name, "no-", 3));
+			current_option = new_opt(strdup(opt_name + 3), strdup("no"));
+			options = APPEND(options, current_option);
+			free(opt_name);
+		} else if (token == token_option ||
+				GET_TOKEN_GROUP(token_option & token_group)) {
 			check_and_change_deprecated_alias(&opt_name, token_option);
 			rc = yylval.rc;
 			expect_STRING_or_INT();
 			range_check(rc, opt_name, yylval.txt);
-			ro = new_opt(opt_name, yylval.txt);
-			options = APPEND(options, ro);
-		} else if (token == token_delegate) {
+			current_option = new_opt(opt_name, yylval.txt);
+			options = APPEND(options, current_option);
+		} else if (token == token_delegate ||
+				GET_TOKEN_GROUP(token_delegate & token_group)) {
 			delegate(ctx);
 			continue;
+		} else if (token == TK_DEPRECATED_OPTION) {
+			/* fprintf(stderr, "Warn: Ignoring deprecated option '%s'\n", yylval.txt); */
+			expect_STRING_or_INT();
 		} else if (token == '}') {
 			return options;
 		} else {
 			pe_expected("an option keyword");
 		}
-		switch (yylex()) {
-		case TK__IS_DEFAULT:
-			ro->is_default = 1;
-			EXP(';');
-			break;
-		case ';':
-			break;
-		default:
-			pe_expected("_is_default | ;");
-		}
+		EXP(';');
 	}
 }
 
-static struct d_option *parse_options(int token_switch, int token_option)
+static struct d_option *parse_options(int token_flag, int token_no_flag, int token_option)
 {
-	return parse_options_d(token_switch, token_option, 0, NULL, NULL);
+	return parse_options_d(token_flag, token_no_flag, token_option, 0, NULL, NULL);
 }
 
 static void __parse_address(char** addr, char** port, char** af)
@@ -734,18 +887,30 @@
 	return;
 }
 
-static void parse_meta_disk(char **disk, char** index)
+void parse_meta_disk(struct d_volume *vol)
 {
 	EXP(TK_STRING);
-	*disk = yylval.txt;
-	if (strcmp("internal", yylval.txt)) {
-		EXP('[');
-		EXP(TK_INTEGER);
-		*index = yylval.txt;
-		EXP(']');
+	vol->meta_disk = yylval.txt;
+	if (strcmp("internal", yylval.txt) == 0) {
+		/* internal, flexible size */
+		vol->meta_index = strdup("internal");
 		EXP(';');
 	} else {
-		EXP(';');
+		switch(yylex()) {
+		case '[':
+			EXP(TK_INTEGER);
+			/* external, static size */
+			vol->meta_index = yylval.txt;
+			EXP(']');
+			EXP(';');
+			break;
+		case ';':
+			/* external, flexible size */
+			vol->meta_index = strdup("flexible");
+			break;
+		default:
+			pe_expected("[ | ;");
+		}
 	}
 }
 
@@ -780,7 +945,7 @@
 	return;
 }
 
-static void parse_device(struct d_name* on_hosts, unsigned *minor, char **device)
+static void parse_device(struct d_name* on_hosts, struct d_volume *vol)
 {
 	struct d_name *h;
 	int m;
@@ -788,12 +953,12 @@
 	switch (yylex()) {
 	case TK_STRING:
 		if (!strncmp("drbd", yylval.txt, 4)) {
-			m_asprintf(device, "/dev/%s", yylval.txt);
+			m_asprintf(&vol->device, "/dev/%s", yylval.txt);
 			free(yylval.txt);
 		} else
-			*device = yylval.txt;
+			vol->device = yylval.txt;
 
-		if (strncmp("/dev/drbd", *device, 9)) {
+		if (strncmp("/dev/drbd", vol->device, 9)) {
 			fprintf(stderr,
 				"%s:%d: device name must start with /dev/drbd\n"
 				"\t(/dev/ is optional, but drbd is required)\n",
@@ -807,54 +972,298 @@
 			pe_expected("minor | ;");
 			/* fall through */
 		case ';':
-			m = dt_minor_of_dev(*device);
+			m = dt_minor_of_dev(vol->device);
 			if (m < 0) {
 				fprintf(stderr,
 					"%s:%d: no minor given nor device name contains a minor number\n",
 					config_file, fline);
 				config_valid = 0;
 			}
-			*minor = m;
+			vol->device_minor = m;
 			goto out;
 		case TK_MINOR:
 			; /* double fall through */
 		}
 	case TK_MINOR:
 		EXP(TK_INTEGER);
-		*minor = atoi(yylval.txt);
+		vol->device_minor = atoi(yylval.txt);
 		EXP(';');
 
 		/* if both device name and minor number are explicitly given,
 		 * force /dev/drbd<minor-number> or /dev/drbd_<arbitrary> */
-		check_minor_nonsense(*device, *minor);
+		check_minor_nonsense(vol->device, vol->device_minor);
 	}
 out:
 	for_each_host(h, on_hosts) {
-		check_uniq("device-minor", "device-minor:%s:%u", h->name, *minor);
-		check_uniq("device", "device:%s:%s", h->name, *device);
+		check_uniq("device-minor", "device-minor:%s:%u", h->name, vol->device_minor);
+		if (vol->device)
+			check_uniq("device", "device:%s:%s", h->name, vol->device);
+	}
+}
+
+struct d_volume *find_volume(struct d_volume *vol, int vnr)
+{
+	while (vol) {
+		if (vol->vnr == vnr)
+			return vol;
+		vol = vol->next;
 	}
+	return NULL;
 }
 
+struct d_volume *volume0(struct d_volume **volp)
+{
+	struct d_volume *vol;
+
+	if (!*volp) {
+		vol = calloc(1, sizeof(struct d_volume));
+		vol->device_minor = -1;
+		*volp = vol;
+		vol->implicit = 1;
+		return vol;
+	} else {
+		vol = *volp;
+		if (vol->vnr == 0 && vol->next == NULL && vol->implicit)
+			return vol;
+
+		config_valid = 0;
+		fprintf(stderr,
+			"%s:%d: Explicit and implicit volumes not allowed\n",
+			config_file, line);
+		return vol;
+	}
+}
+
+int parse_volume_stmt(struct d_volume *vol, struct d_name* on_hosts, int token)
+{
+	switch (token) {
+	case TK_DISK:
+		token = yylex();
+		switch (token) {
+		case TK_STRING:
+			vol->disk = yylval.txt;
+			EXP(';');
+			break;
+		case '{':
+			vol->disk_options = parse_options(TK_DISK_FLAG,
+							  TK_DISK_NO_FLAG,
+							  TK_DISK_OPTION);
+			break;
+		default:
+			check_string_error(token);
+			pe_expected_got( "TK_STRING | {", token);
+		}
+		break;
+	case TK_DEVICE:
+		parse_device(on_hosts, vol);
+		break;
+	case TK_META_DISK:
+		parse_meta_disk(vol);
+		break;
+	case TK_FLEX_META_DISK:
+		EXP(TK_STRING);
+		vol->meta_disk = yylval.txt;
+		if (strcmp("internal", yylval.txt) != 0) {
+			/* external, flexible ize */
+			vol->meta_index = strdup("flexible");
+		} else {
+			/* internal, flexible size */
+			vol->meta_index = strdup("internal");
+		}
+		EXP(';');
+		break;
+	default:
+		return 0;
+	}
+	return 1;
+}
+
+struct d_volume *parse_volume(int vnr, struct d_name* on_hosts)
+{
+	struct d_volume *vol;
+	int token;
+
+	vol = calloc(1,sizeof(struct d_volume));
+	vol->device_minor = -1;
+	vol->vnr = vnr;
+
+	EXP('{');
+	while (1) {
+		token = yylex();
+		if (token == '}')
+			break;
+		if (!parse_volume_stmt(vol, on_hosts, token))
+			pe_expected_got("device | disk | meta-disk | flex-meta-disk | }",
+					token);
+	}
+
+	return vol;
+}
+
+struct d_volume *parse_stacked_volume(int vnr)
+{
+	struct d_volume *vol;
+
+	vol = calloc(1,sizeof(struct d_volume));
+	vol->device_minor = -1;
+	vol->vnr = vnr;
+
+	EXP('{');
+	EXP(TK_DEVICE);
+	parse_device(NULL, vol);
+	EXP('}');
+	vol->meta_disk = strdup("internal");
+	vol->meta_index = strdup("internal");
+
+	return vol;
+}
+
+void inherit_volumes(struct d_volume *from, struct d_host_info *host)
+{
+	struct d_volume *s, *t;
+	struct d_name *h;
+
+	for (s = from; s != NULL ; s = s->next) {
+		t = find_volume(host->volumes, s->vnr);
+		if (!t) {
+			t = calloc(1, sizeof(struct d_volume));
+			t->device_minor = -1;
+			t->vnr = s->vnr;
+			host->volumes = INSERT_SORTED(host->volumes, t, vnr);
+		}
+		if (!t->disk && s->disk) {
+			t->disk = strdup(s->disk);
+			for_each_host(h, host->on_hosts)
+				check_uniq("disk", "disk:%s:%s", h->name, t->disk);
+		}
+		if (!t->device && s->device)
+			t->device = strdup(s->device);
+		if (t->device_minor == -1U && s->device_minor != -1U) {
+			t->device_minor = s->device_minor;
+			for_each_host(h, host->on_hosts)
+				check_uniq("device-minor", "device-minor:%s:%d", h->name, t->device_minor);
+		}
+		if (!t->meta_disk && s->meta_disk) {
+			t->meta_disk = strdup(s->meta_disk);
+			if (s->meta_index)
+				t->meta_index = strdup(s->meta_index);
+		}
+	}
+}
+
+void check_volume_complete(struct d_resource *res, struct d_host_info *host, struct d_volume *vol)
+{
+	if (!vol->device && vol->device_minor == -1U)
+		derror(host, res, "device");
+	if (!vol->disk)
+		derror(host, res, "disk");
+	if (!vol->meta_disk)
+		derror(host, res, "meta-disk");
+	if (!vol->meta_index)
+		derror(host, res, "meta-index");
+}
+
+void check_volumes_complete(struct d_resource *res, struct d_host_info *host)
+{
+	struct d_volume *vol = host->volumes;
+	unsigned vnr = -1U;
+	while (vol) {
+		if (vnr == -1U || vnr < vol->vnr)
+			vnr = vol->vnr;
+		else
+			fprintf(stderr,
+				"internal error: in %s: unsorted volumes list\n",
+				res->name);
+		check_volume_complete(res, host, vol);
+		vol = vol->next;
+	}
+}
+
+void check_volume_sets_equal(struct d_resource *res, struct d_host_info *host1, struct d_host_info *host2)
+{
+	struct d_volume *a, *b;
+
+	/* change the error output, if we have been called to
+	 * compare stacked with lower resource volumes */
+	int compare_stacked = host1->lower && host1->lower->me == host2;
+
+	a = host1->volumes;
+	b = host2->volumes;
+
+	/* volume lists are supposed to be sorted on vnr */
+	while (a || b) {
+		while (a && (!b || a->vnr < b->vnr)) {
+			fprintf(stderr,
+				"%s:%d: in resource %s, on %s { ... }: "
+				"volume %d not defined on %s\n",
+				config_file, line, res->name,
+				names_to_str(host1->on_hosts),
+				a->vnr,
+				compare_stacked ? host1->lower->name
+					: names_to_str(host2->on_hosts));
+			a = a->next;
+			config_valid = 0;
+		}
+		while (b && (!a || a->vnr > b->vnr)) {
+			/* Though unusual, it is "legal" for a lower resource
+			 * to have more volumes than the resource stacked on
+			 * top of it.  Warn (if we have a terminal),
+			 * but consider it as valid. */
+			if (!(compare_stacked && no_tty))
+				fprintf(stderr,
+					"%s:%d: in resource %s, on %s { ... }: "
+					"volume %d missing (present on %s)\n",
+					config_file, line, res->name,
+					names_to_str(host1->on_hosts),
+					b->vnr,
+					compare_stacked ? host1->lower->name
+						: names_to_str(host2->on_hosts));
+			if (!compare_stacked)
+				config_valid = 0;
+			b = b->next;
+		}
+		if (a && b && a->vnr == b->vnr) {
+			a = a->next;
+			b = b->next;
+		}
+	}
+}
+
+/* Ensure that in all host sections the same volumes are defined */
+void check_volumes_hosts(struct d_resource *res)
+{
+	struct d_host_info *host1, *host2;
+
+	host1 = res->all_hosts;
+
+	if (!host1)
+		return;
+
+	for (host2 = host1->next; host2; host2 = host2->next)
+		check_volume_sets_equal(res, host1, host2);
+}
+
+
 enum parse_host_section_flags {
 	REQUIRE_ALL = 1,
 	BY_ADDRESS  = 2,
 };
 
-static void parse_host_section(struct d_resource *res,
+void parse_host_section(struct d_resource *res,
 			       struct d_name* on_hosts,
 			       enum parse_host_section_flags flags)
 {
 	struct d_host_info *host;
+	struct d_volume *vol;
 	struct d_name *h;
 	int in_braces = 1;
 
 	c_section_start = line;
 	fline = line;
 
-	host=calloc(1,sizeof(struct d_host_info));
+	host = calloc(1,sizeof(struct d_host_info));
 	host->on_hosts = on_hosts;
 	host->config_line = c_section_start;
-	host->device_minor = -1;
 
 	if (flags & BY_ADDRESS) {
 		/* floating <address> {} */
@@ -894,16 +1303,21 @@
 		case TK_DISK:
 			for_each_host(h, on_hosts)
 				check_upr("disk statement", "%s:%s:disk", res->name, h->name);
-			EXP(TK_STRING);
-			host->disk = yylval.txt;
-			for_each_host(h, on_hosts)
-				check_uniq("disk", "disk:%s:%s", h->name, yylval.txt);
-			EXP(';');
-			break;
+			goto vol0stmt;
+			/* for_each_host(h, on_hosts)
+			  check_uniq("disk", "disk:%s:%s", h->name, yylval.txt); */
 		case TK_DEVICE:
 			for_each_host(h, on_hosts)
 				check_upr("device statement", "%s:%s:device", res->name, h->name);
-			parse_device(on_hosts, &host->device_minor, &host->device);
+			goto vol0stmt;
+		case TK_META_DISK:
+			for_each_host(h, on_hosts)
+				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
+			goto vol0stmt;
+		case TK_FLEX_META_DISK:
+			for_each_host(h, on_hosts)
+				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
+			goto vol0stmt;
 			break;
 		case TK_ADDRESS:
 			if (host->by_address) {
@@ -918,68 +1332,43 @@
 			parse_address(on_hosts, &host->address, &host->port, &host->address_family);
 			range_check(R_PORT, "port", host->port);
 			break;
-		case TK_META_DISK:
-			for_each_host(h, on_hosts)
-				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
-			parse_meta_disk(&host->meta_disk, &host->meta_index);
-			check_meta_disk(host);
-			break;
-		case TK_FLEX_META_DISK:
-			for_each_host(h, on_hosts)
-				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
-			EXP(TK_STRING);
-			host->meta_disk = yylval.txt;
-			if (strcmp("internal", yylval.txt)) {
-				host->meta_index = strdup("flexible");
-			}
-			check_meta_disk(host);
-			EXP(';');
-			break;
 		case TK_PROXY:
 			parse_proxy_section(host);
 			break;
+		case TK_VOLUME:
+			EXP(TK_INTEGER);
+			host->volumes = INSERT_SORTED(host->volumes,
+						      parse_volume(atoi(yylval.txt), on_hosts),
+						      vnr);
+			break;
+		case TK_OPTIONS:
+			EXP('{');
+			host->res_options = parse_options(0,
+							  0,
+							  TK_RES_OPTION);
+			break;
 		case '}':
 			in_braces = 0;
 			break;
+		vol0stmt:
+			if (parse_volume_stmt(volume0(&host->volumes), on_hosts, token))
+				break;
+			/* else fall through */
 		default:
 			pe_expected("disk | device | address | meta-disk "
 				    "| flexible-meta-disk");
 		}
 	}
 
-	/* Inherit device, disk, meta_disk and meta_index from the resource. */
-	if(!host->disk && res->disk) {
-		host->disk = strdup(res->disk);
-		for_each_host(h, on_hosts)
-			check_uniq("disk", "disk:%s:%s", h->name, host->disk);
-	}
-
-	if(!host->device && res->device) {
-		host->device = strdup(res->device);
-	}
-
-	if (host->device_minor == -1U && res->device_minor != -1U) {
-		host->device_minor = res->device_minor;
-		for_each_host(h, on_hosts)
-			check_uniq("device-minor", "device-minor:%s:%d", h->name, host->device_minor);
-	}
-
-	if(!host->meta_disk && res->meta_disk) {
-		host->meta_disk = strdup(res->meta_disk);
-		if(res->meta_index) host->meta_index = strdup(res->meta_index);
-		check_meta_disk(host);
-	}
+	inherit_volumes(res->volumes, host);
+	for_each_volume(vol, host->volumes)
+		check_meta_disk(vol, host);
 
 	if (!(flags & REQUIRE_ALL))
 		return;
-	if (!host->device && host->device_minor == -1U)
-		derror(host, res, "device");
-	if (!host->disk)
-		derror(host, res, "disk");
 	if (!host->address)
 		derror(host, res, "address");
-	if (!host->meta_disk)
-		derror(host, res, "meta-disk");
+	check_volumes_complete(res, host);
 }
 
 void parse_skip()
@@ -1021,7 +1410,7 @@
 	while (level) ;
 }
 
-static void parse_stacked_section(struct d_resource* res)
+void parse_stacked_section(struct d_resource* res)
 {
 	struct d_host_info *host;
 	struct d_name *h;
@@ -1030,22 +1419,20 @@
 	fline = line;
 
 	host=calloc(1,sizeof(struct d_host_info));
-	host->device_minor = -1;
 	res->all_hosts = APPEND(res->all_hosts, host);
 	EXP(TK_STRING);
 	check_uniq("stacked-on-top-of", "stacked:%s", yylval.txt);
 	host->lower_name = yylval.txt;
 
-	m_asprintf(&host->meta_disk, "%s", "internal");
-	m_asprintf(&host->meta_index, "%s", "internal");
-
 	EXP('{');
 	while (1) {
 		switch(yylex()) {
 		case TK_DEVICE:
-			for_each_host(h, host->on_hosts)
-				check_upr("device statement", "%s:%s:device", res->name, h->name);
-			parse_device(host->on_hosts, &host->device_minor, &host->device);
+			/* for_each_host(h, host->on_hosts)
+			  check_upr("device statement", "%s:%s:device", res->name, h->name); */
+			parse_device(host->on_hosts, volume0(&host->volumes));
+			volume0(&host->volumes)->meta_disk = strdup("internal");
+			volume0(&host->volumes)->meta_index = strdup("internal");
 			break;
 		case TK_ADDRESS:
 			for_each_host(h, host->on_hosts)
@@ -1056,6 +1443,10 @@
 		case TK_PROXY:
 			parse_proxy_section(host);
 			break;
+		case TK_VOLUME:
+			EXP(TK_INTEGER);
+			host->volumes = INSERT_SORTED(host->volumes, parse_stacked_volume(atoi(yylval.txt)), vnr);
+			break;
 		case '}':
 			goto break_loop;
 		default:
@@ -1066,25 +1457,10 @@
 
 	res->stacked_on_one = 1;
 
-	/* inherit device */
-	if (!host->device && res->device) {
-		host->device = strdup(res->device);
-		for_each_host(h, host->on_hosts)
-			check_uniq("device", "device:%s:%s", h->name, host->device);
-	}
-
-	if (host->device_minor == -1U && res->device_minor != -1U) {
-		host->device_minor = res->device_minor;
-		for_each_host(h, host->on_hosts)
-			check_uniq("device-minor", "device-minor:%s:%d", h->name, host->device_minor);
-	}
+	inherit_volumes(res->volumes, host);
 
-	if (!host->device && host->device_minor == -1U)
-		derror(host, res, "device");
 	if (!host->address)
 		derror(host,res,"address");
-	if (!host->meta_disk)
-		derror(host,res,"meta-disk");
 }
 
 void startup_delegate(void *ctx)
@@ -1104,9 +1480,20 @@
 {
 	enum pr_flags flags = (enum pr_flags)ctx;
 
-	if (!strcmp(yytext, "discard-my-data") && flags & IgnDiscardMyData)
-		EXP(';');
-	else
+	if (!strcmp(yytext, "discard-my-data") && flags & PARSE_FOR_ADJUST) {
+		switch(yylex()) {
+		case TK_YES:
+		case TK_NO:
+			/* Ignore this option.  */
+			EXP(';');
+			break;
+		case ';':
+			/* Ignore this option.  */
+			return;
+		default:
+			pe_expected("yes | no | ;");
+		}
+	} else
 		pe_expected("an option keyword");
 }
 
@@ -1128,6 +1515,9 @@
 		} else if (host->lower) {
 			if (!host->lower->me)
 				continue;
+		} else if (!host->on_hosts) {
+			/* huh? a resource without hosts to run on?! */
+			continue;
 		} else {
 			if (!name_in_names(nodeinfo.nodename, host->on_hosts) &&
 			    strcmp("_this_host", host->on_hosts->name))
@@ -1275,8 +1665,10 @@
 			if (l_res == NULL) {
 				fprintf(stderr, "%s:%d: in resource %s, "
 					"referenced resource '%s' not defined.\n",
-					res->config_file, res->start_line, res->name, l_res->name);
+					res->config_file, res->start_line, res->name,
+					host->lower_name);
 				config_valid = 0;
+				continue;
 			}
 
 			/* Simple: host->on_hosts = concat_names(l_res->me->on_hosts, l_res->peer->on_hosts); */
@@ -1299,38 +1691,151 @@
 void set_disk_in_res(struct d_resource *res)
 {
 	struct d_host_info *host;
+	struct d_volume *a, *b;
 
 	if (res->ignore)
 		return;
 
 	for (host = res->all_hosts; host; host=host->next) {
-		if (host->lower) {
-			if (res->stacked && host->lower->stacked) {
-				fprintf(stderr,
-					"%s:%d: in resource %s, stacked-on-top-of %s { ... }:\n"
-					"\tFIXME. I won't stack stacked resources.\n",
-					res->config_file, res->start_line, res->name, host->lower_name);
-				config_valid = 0;
+		if (!host->lower)
+			continue;
+
+		if (res->stacked && host->lower->stacked) {
+			fprintf(stderr,
+				"%s:%d: in resource %s, stacked-on-top-of %s { ... }:\n"
+				"\tFIXME. I won't stack stacked resources.\n",
+				res->config_file, res->start_line, res->name, host->lower_name);
+			config_valid = 0;
+		}
+
+		if (host->lower->ignore)
+			continue;
+
+		check_volume_sets_equal(res, host, host->lower->me);
+		if (!config_valid)
+			/* don't even bother for broken config. */
+			continue;
+
+		/* volume lists are sorted on vnr */
+		a = host->volumes;
+		b = host->lower->me->volumes;
+		while (a) {
+			while (b && a->vnr > b->vnr) {
+				/* Lower resource has more volumes.
+				 * Probably unusual, but we decided
+				 * that it should be legal.
+				 * Skip those that do not match */
+				b = b->next;
+			}
+			if (a && b && a->vnr == b->vnr) {
+				if (b->device)
+					m_asprintf(&a->disk, "%s", b->device);
+				else
+					m_asprintf(&a->disk, "/dev/drbd%u", b->device_minor);
+				/* stacked implicit volumes need internal meta data, too */
+				if (!a->meta_disk)
+					m_asprintf(&a->meta_disk, "internal");
+				if (!a->meta_index)
+					m_asprintf(&a->meta_index, "internal");
+				a = a->next;
+				b = b->next;
+			} else {
+				/* config_invalid should have been set
+				 * by check_volume_sets_equal */
+				assert(0);
 			}
+		}
+	}
+}
 
-			if (host->lower->ignore)
-				continue;
+void proxy_delegate(void *ctx)
+{
+	struct d_resource *res = (struct d_resource *)ctx;
+	int token;
+	struct d_option *options, *opt;
+	struct d_name *line, *word, **pnp;
 
-			if (host->lower->me->device)
-				m_asprintf(&host->disk, "%s", host->lower->me->device);
-			else
-				m_asprintf(&host->disk, "/dev/drbd%u", host->lower->me->device_minor);
+	opt = NULL;
+	token = yylex();
+	if (token != '{') {
+		fprintf(stderr,	"%s:%d: expected \"{\" after \"proxy\" keyword\n",
+				config_file, fline);
+		exit(E_config_invalid);
+	}
+
+	options = NULL;
+	while (1) {
+		pnp = &line;
+		while (1) {
+			token = yylex();
+			if (token == ';')
+				break;
+			if (token == '}') {
+				if (pnp == &line)
+					goto out;
+
+				fprintf(stderr,	"%s:%d: Missing \";\" before  \"}\"\n",
+					config_file, fline);
+				exit(E_config_invalid);
+			}
+
+			word = malloc(sizeof(struct d_name));
+			if (!word)
+				pdperror("out of memory.");
+			word->name = yylval.txt;
+			word->next = NULL;
+			*pnp = word;
+			pnp = &word->next;
+		}
+
+		opt = calloc(1, sizeof(struct d_option));
+		if (!opt)
+			pdperror("out of memory.");
+		opt->name = strdup(names_to_str(line));
+		options = APPEND(options, opt);
+		free_names(line);
+	}
+out:
+	if (res)
+		res->proxy_plugins = options;
+}
+
+int parse_proxy_settings(struct d_resource *res, int flags)
+{
+	int token;
+	struct d_option *proxy_options;
+
+	if (flags & PARSER_CHECK_PROXY_KEYWORD) {
+		token = yylex();
+		if (token != TK_PROXY) {
+			if (flags & PARSER_STOP_IF_INVALID) {
+				yyrestart(yyin); /* flushes flex's buffers */
+				return 1;
+			}
 
-			if (!host->disk)
-				derror(host,res,"disk");
+			pe_expected_got("proxy", token);
 		}
 	}
+
+	EXP('{');
+
+	proxy_options = parse_options_d(0,
+					0,
+					TK_PROXY_OPTION | TK_PROXY_GROUP,
+					TK_PROXY_DELEGATE,
+					proxy_delegate,
+					res);
+
+	if (res)
+		res->proxy_options = proxy_options;
+	return 0;
 }
 
 struct d_resource* parse_resource(char* res_name, enum pr_flags flags)
 {
 	struct d_resource* res;
 	struct d_name *host_names;
+	char *opt_name;
 	int token;
 
 	check_upr_init();
@@ -1338,18 +1843,21 @@
 
 	res=calloc(1,sizeof(struct d_resource));
 	res->name = res_name;
-	res->device_minor = -1;
-	res->config_file = config_file;
+	res->config_file = config_save;
 	res->start_line = line;
 
 	while(1) {
 		token = yylex();
 		fline = line;
 		switch(token) {
-		case TK_PROTOCOL:
+		case TK_NET_OPTION:
+			if (strcmp(yylval.txt, "protocol"))
+				goto goto_default;
 			check_upr("protocol statement","%s: protocol",res->name);
+			opt_name = yylval.txt;
 			EXP(TK_STRING);
-			res->protocol=yylval.txt;
+			range_check(R_PROTOCOL, opt_name, yylval.txt);
+			res->net_options = APPEND(res->net_options, new_opt(opt_name, yylval.txt));
 			EXP(';');
 			break;
 		case TK_ON:
@@ -1389,13 +1897,17 @@
 		case TK_DISK:
 			switch (token=yylex()) {
 			case TK_STRING:
-				res->disk = yylval.txt;
+				/* open coded parse_volume_stmt() */
+				volume0(&res->volumes)->disk = yylval.txt;
 				EXP(';');
 				break;
 			case '{':
 				check_upr("disk section", "%s:disk", res->name);
-				res->disk_options = parse_options(TK_DISK_SWITCH,
-								  TK_DISK_OPTION);
+				res->disk_options =
+					SPLICE(res->disk_options,
+					       parse_options(TK_DISK_FLAG,
+							     TK_DISK_NO_FLAG,
+							     TK_DISK_OPTION));
 				break;
 			default:
 				check_string_error(token);
@@ -1405,57 +1917,65 @@
 		case TK_NET:
 			check_upr("net section", "%s:net", res->name);
 			EXP('{');
-			res->net_options = parse_options_d(TK_NET_SWITCH,
-							   TK_NET_OPTION,
-							   TK_NET_DELEGATE,
-							   &net_delegate,
-							   (void *)flags);
+			res->net_options =
+				SPLICE(res->net_options,
+				       parse_options_d(TK_NET_FLAG,
+						       TK_NET_NO_FLAG,
+						       TK_NET_OPTION,
+						       TK_NET_DELEGATE,
+						       &net_delegate,
+						       (void *)flags));
 			break;
 		case TK_SYNCER:
 			check_upr("syncer section", "%s:syncer", res->name);
 			EXP('{');
-			res->sync_options = parse_options(TK_SYNCER_SWITCH,
-							  TK_SYNCER_OPTION);
+			parse_options_syncer(res);
 			break;
 		case TK_STARTUP:
 			check_upr("startup section", "%s:startup", res->name);
 			EXP('{');
-			res->startup_options=parse_options_d(TK_STARTUP_SWITCH,
-							     TK_STARTUP_OPTION,
-							     TK_STARTUP_DELEGATE,
-							     &startup_delegate,
-							     res);
+			res->startup_options = parse_options_d(TK_STARTUP_FLAG,
+							       0,
+							       TK_STARTUP_OPTION,
+							       TK_STARTUP_DELEGATE,
+							       &startup_delegate,
+							       res);
 			break;
 		case TK_HANDLER:
 			check_upr("handlers section", "%s:handlers", res->name);
 			EXP('{');
-			res->handlers =  parse_options(0, TK_HANDLER_OPTION);
+			res->handlers =  parse_options(0, 0, TK_HANDLER_OPTION);
 			break;
 		case TK_PROXY:
 			check_upr("proxy section", "%s:proxy", res->name);
-			EXP('{');
-			res->proxy_options =  parse_options(TK_PROXY_SWITCH,
-							    TK_PROXY_OPTION);
+			parse_proxy_settings(res, 0);
 			break;
 		case TK_DEVICE:
 			check_upr("device statement", "%s:device", res->name);
-			parse_device(NULL, &res->device_minor, &res->device);
-			break;
 		case TK_META_DISK:
-			parse_meta_disk(&res->meta_disk, &res->meta_index);
-			break;
 		case TK_FLEX_META_DISK:
-			EXP(TK_STRING);
-			res->meta_disk = yylval.txt;
-			if (strcmp("internal", yylval.txt)) {
-				res->meta_index = strdup("flexible");
-			}
-			EXP(';');
+			parse_volume_stmt(volume0(&res->volumes), NULL, token);
+			break;
+		case TK_VOLUME:
+			EXP(TK_INTEGER);
+			res->volumes = INSERT_SORTED(res->volumes,
+						     parse_volume(atoi(yylval.txt), NULL),
+						     vnr);
+			break;
+		case TK_OPTIONS:
+			check_upr("resource options section", "%s:res_options", res->name);
+			EXP('{');
+			res->res_options =
+				SPLICE(res->res_options,
+				       parse_options(0,
+						     0,
+						     TK_RES_OPTION));
 			break;
 		case '}':
 		case 0:
 			goto exit_loop;
 		default:
+		goto_default:
 			pe_expected_got("protocol | on | disk | net | syncer |"
 					" startup | handlers |"
 					" ignore-on | stacked-on-top-of",token);
@@ -1473,9 +1993,33 @@
 			config_file, c_section_start, res->name);
 	}
 
+	if (!(flags & PARSE_FOR_ADJUST))
+		check_volumes_hosts(res);
+
 	return res;
 }
 
+struct d_resource* parse_resource_for_adjust(struct cfg_ctx *ctx)
+{
+	int token;
+
+	token = yylex();
+	if (token != TK_RESOURCE)
+		return NULL;
+
+	token = yylex();
+	if (token != TK_STRING)
+		return NULL;
+
+	/* FIXME assert that string and ctx->res->name match? */
+
+	token = yylex();
+	if (token != '{')
+		return NULL;
+
+	return parse_resource(ctx->res->name, PARSE_FOR_ADJUST);
+}
+
 void post_parse(struct d_resource *config, enum pp_flags flags)
 {
 	struct d_resource *res,*tmp;
@@ -1528,6 +2072,7 @@
 	int cwd_fd;
 	FILE *f;
 	size_t i;
+	int r;
 
 	/* in order to allow relative paths in include statements we change
 	   directory to the location of the current configuration file. */
@@ -1547,23 +2092,31 @@
 		exit(E_usage);
 	}
 
-	f = fopen(str, "r");
-	if (f) {
-		include_file(f, str);
-		fclose(f);
-	} else if (glob(str, 0, NULL, &glob_buf) == 0) {
+	r = glob(str, 0, NULL, &glob_buf);
+	if (r == 0) {
 		for (i=0; i<glob_buf.gl_pathc; i++) {
 			f = fopen(glob_buf.gl_pathv[i], "r");
-			if (f)
+			if (f) {
 				include_file(f, strdup(glob_buf.gl_pathv[i]));
-			fclose(f);
+				fclose(f);
+			} else {
+				fprintf(stderr,
+					"%s:%d: Failed to open include file '%s'.\n",
+					config_file, line, yylval.txt);
+				config_valid = 0;
+			}
 		}
 		globfree(&glob_buf);
+	} else if (r == GLOB_NOMATCH) {
+		if (!strchr(str, '?') && !strchr(str, '*') && !strchr(str, '[')) {
+			fprintf(stderr,
+				"%s:%d: Failed to open include file '%s'.\n",
+				config_file, line, yylval.txt);
+			config_valid = 0;
+		}
 	} else {
-		fprintf(stderr,
-			"%s:%d: Failed to open include file '%s'.\n",
-			config_file, line, yylval.txt);
-		config_valid = 0;
+		fprintf(stderr, "glob() failed: %d\n", r);
+		exit(E_usage);
 	}
 
 	if (fchdir(cwd_fd) < 0) {
diff -Nru drbd8-8.3.7/user/drbdadm_parser.h drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_parser.h
--- drbd8-8.3.7/user/drbdadm_parser.h	2009-08-26 13:27:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_parser.h	2012-02-02 14:09:14.000000000 +0000
@@ -46,6 +46,14 @@
 	R_WFC_TIMEOUT,
 	R_DEGR_WFC_TIMEOUT,
 	R_OUTDATED_WFC_TIMEOUT,
+	R_C_PLAN_AHEAD,
+	R_C_DELAY_TARGET,
+	R_C_FILL_TARGET,
+	R_C_MAX_RATE,
+	R_C_MIN_RATE,
+	R_CONG_FILL,
+	R_CONG_EXTENTS,
+	R_PROTOCOL,
 };
 
 enum yytokentype {
@@ -57,7 +65,7 @@
 	TK_NET,
 	TK_DISK,
 	TK_SKIP,
-	TK_SYNCER,
+	TK_SYNCER, /* depricated after 8.3 */
 	TK_STARTUP,
 	TK_DISABLE_IP_VERIFICATION,
 	TK_DIALOG_REFRESH,
@@ -74,13 +82,15 @@
 	TK_INTEGER,
 	TK_STRING,
 	TK_ELSE,
-	TK_DISK_SWITCH,
+	TK_DISK_FLAG,
+	TK_DISK_NO_FLAG,
 	TK_DISK_OPTION,
-	TK_NET_SWITCH,
+	TK_NET_FLAG,
+	TK_NET_NO_FLAG,
 	TK_NET_OPTION,
-	TK_SYNCER_SWITCH,
+	TK_SYNCER_FLAG,
 	TK_SYNCER_OPTION,
-	TK_STARTUP_SWITCH,
+	TK_STARTUP_FLAG,
 	TK_STARTUP_OPTION,
 	TK_STARTUP_DELEGATE,
 	TK_HANDLER_OPTION,
@@ -88,7 +98,6 @@
 	TK_ASK,
 	TK_YES,
 	TK_NO,
-	TK__IS_DEFAULT,
 	TK__THIS_HOST,
 	TK__REMOTE_HOST,
 	TK_PROXY,
@@ -96,7 +105,7 @@
 	TK_OUTSIDE,
 	TK_MEMLIMIT,
 	TK_PROXY_OPTION,
-	TK_PROXY_SWITCH,
+	TK_PROXY_DELEGATE,
 	TK_ERR_STRING_TOO_LONG,
 	TK_ERR_DQSTRING_TOO_LONG,
 	TK_ERR_DQSTRING,
@@ -109,8 +118,19 @@
 	TK_NET_DELEGATE,
 	TK_INCLUDE,
 	TK_FLOATING,
+	TK_DEPRECATED_OPTION,
+	TK_VOLUME,
+	TK_RES_OPTION,
+	TK_OPTIONS,
+	TK__GROUPING_BASE = 0x1000,
+	TK_SYNCER_OLD_OPT = 0x2000, /* Might be or'ed to TK_[NET|DISK]_[OPTION|SWITCH] */
+	TK_PROXY_GROUP = 0x3000, /* Gets or'ed to some options */
 };
 
+/* The higher bits define one or more token groups. */
+#define GET_TOKEN_GROUP(__x)         ((__x) & ~(TK__GROUPING_BASE - 1))
+#define REMOVE_GROUP_FROM_TOKEN(__x) ((__x) &  (TK__GROUPING_BASE - 1))
+
 typedef struct YYSTYPE {
 	char* txt;
 	enum range_checks rc;
@@ -122,8 +142,10 @@
 
 extern yystype yylval;
 extern char* yytext;
+extern FILE* yyin;
 
 /* avoid compiler warnings about implicit declaration */
 int yylex(void);
 void my_yypush_buffer_state(FILE *f);
 void yypop_buffer_state (void );
+void yyrestart(FILE *input_file);
diff -Nru drbd8-8.3.7/user/drbdadm_scanner.fl drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_scanner.fl
--- drbd8-8.3.7/user/drbdadm_scanner.fl	2009-08-26 13:27:50.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_scanner.fl	2012-02-02 14:09:14.000000000 +0000
@@ -4,11 +4,11 @@
 #include <string.h>
 #include "drbdadm_parser.h"
 #include "drbdadm.h"
+#include "drbdtool_common.h"
 
 void long_string(char* text);
 void long_dqstring(char* text);
 void err_dqstring(char* text);
-static void unescape(void);
 
 #if 0
 #define DP printf("'%s' ",yytext)
@@ -37,7 +37,7 @@
 SNUMB		[0-9]{1,3}
 IPV4ADDR	({SNUMB}"."){3}{SNUMB}
 HEX4            [0-9a-fA-F]{1,4}
-IPV6ADDR	((({HEX4}":"){0,5}{HEX4})?":"{HEX4}?":"({HEX4}(":"{HEX4}){0,5})?)|("::"[fF]{4}":"{IPV4ADDR})
+IPV6ADDR	((({HEX4}":"){0,5}{HEX4})?":"{HEX4}?":"({HEX4}(":"{HEX4}){0,5})?("%"{STRING})?)|("::"[fF]{4}":"{IPV4ADDR})
 WS		[ \t\r]
 OPCHAR		[{};\[\]:]
 DQSTRING	\"([^\"\\\n]|\\[^\n]){0,255}\"
@@ -65,16 +65,17 @@
 proxy			{ DP; return TK_PROXY;			}
 minor			{ DP; return TK_MINOR;			}
 inside			{ DP; return TK_INSIDE;			}
+volume			{ DP; return TK_VOLUME;			}
 syncer			{ DP; return TK_SYNCER;			}
 device			{ DP; return TK_DEVICE;			}
 global			{ DP; return TK_GLOBAL;			}
 common			{ DP; return TK_COMMON;			}
+options			{ DP; return TK_OPTIONS;		}
 outside			{ DP; return TK_OUTSIDE;		}
 address			{ DP; return TK_ADDRESS;		}
 startup			{ DP; return TK_STARTUP;		}
 include			{ DP; return TK_INCLUDE;		}
 handlers		{ DP; return TK_HANDLER;		}
-protocol		{ DP; return TK_PROTOCOL;		}
 minor-count		{ DP; return TK_MINOR_COUNT;		}
 disable-ip-verification { DP; return TK_DISABLE_IP_VERIFICATION;}
 dialog-refresh		{ DP; return TK_DIALOG_REFRESH;		}
@@ -82,7 +83,6 @@
 meta-disk		{ DP; return TK_META_DISK;		}
 flexible-meta-disk	{ DP; return TK_FLEX_META_DISK;		}
 usage-count 		{ DP; return TK_USAGE_COUNT;		}
-_is_default 		{ DP; return TK__IS_DEFAULT;		}
 _this_host 		{ DP; return TK__THIS_HOST;		}
 _remote_host 		{ DP; return TK__REMOTE_HOST;		}
 sci			{ DP; CP; return TK_SCI;		}
@@ -94,60 +94,86 @@
 on-io-error		{ DP; CP; return TK_DISK_OPTION;	}
 fencing			{ DP; CP; return TK_DISK_OPTION;	}
 max-bio-bvecs		{ DP; CP; return TK_DISK_OPTION;	}
-use-bmbv		{ DP; CP; return TK_DISK_SWITCH;		}
-no-disk-barrier		{ DP; CP; return TK_DISK_SWITCH;		}
-no-disk-flushes		{ DP; CP; return TK_DISK_SWITCH;		}
-no-disk-drain		{ DP; CP; return TK_DISK_SWITCH;		}
-no-md-flushes		{ DP; CP; return TK_DISK_SWITCH;		}
+disk-timeout		{ DP; CP; return TK_DISK_OPTION;	}
+read-balancing		{ DP; CP; return TK_DISK_OPTION;	}
+use-bmbv		{ DP; CP; return TK_DISK_FLAG;		}
+disk-barrier		{ DP; CP; return TK_DISK_FLAG;		}
+disk-flushes		{ DP; CP; return TK_DISK_FLAG;		}
+disk-drain		{ DP; CP; return TK_DISK_FLAG;          }
+md-flushes		{ DP; CP; return TK_DISK_FLAG;          }
+no-disk-barrier		{ DP; CP; return TK_DISK_NO_FLAG;		}
+no-disk-flushes		{ DP; CP; return TK_DISK_NO_FLAG;		}
+no-disk-drain		{ DP; CP; return TK_DISK_NO_FLAG;		}
+no-md-flushes		{ DP; CP; return TK_DISK_NO_FLAG;		}
 timeout			{ DP; CP; RC(TIMEOUT); return TK_NET_OPTION;	}
+protocol		{ DP; CP; RC(PROTOCOL); return TK_NET_OPTION;	}
 ko-count		{ DP; CP; RC(KO_COUNT); return TK_NET_OPTION;	}
 ping-int		{ DP; CP; RC(PING_INT); return TK_NET_OPTION;	}
 max-buffers		{ DP; CP; RC(MAX_BUFFERS); return TK_NET_OPTION;}
-sndbuf-size		{ DP; CP; RC(SNDBUF_SIZE); return TK_NET_OPTION;}
-rcvbuf-size		{ DP; CP; RC(RCVBUF_SIZE); return TK_NET_OPTION;}
+sndbuf-size		{ DP; CP; RC(SNDBUF_SIZE); return TK_NET_OPTION | TK_PROXY_GROUP;}
+rcvbuf-size		{ DP; CP; RC(RCVBUF_SIZE); return TK_NET_OPTION | TK_PROXY_GROUP;}
 connect-int		{ DP; CP; RC(CONNECT_INT); return TK_NET_OPTION;}
 cram-hmac-alg		{ DP; CP; return TK_NET_OPTION;		}
 shared-secret		{ DP; CP; return TK_NET_OPTION;		}
 max-epoch-size		{ DP; CP; RC(MAX_EPOCH_SIZE); return TK_NET_OPTION;}
 after-sb-[012]pri	{ DP; CP; return TK_NET_OPTION;		}
 rr-conflict 		{ DP; CP; return TK_NET_OPTION;		}
-ping-timeout 		{ DP; CP; return TK_NET_OPTION;		}
+ping-timeout 		{ DP; CP; return TK_NET_OPTION | TK_PROXY_GROUP;}
 unplug-watermark	{ DP; CP; return TK_NET_OPTION;         }
 data-integrity-alg	{ DP; CP; return TK_NET_OPTION;         }
-allow-two-primaries	{ DP; CP; return TK_NET_SWITCH;		}
-always-asbp		{ DP; CP; return TK_NET_SWITCH;		}
-no-tcp-cork		{ DP; CP; return TK_NET_SWITCH;		}
+on-congestion		{ DP; CP; return TK_NET_OPTION;         }
+congestion-fill		{ DP; CP; RC(CONG_FILL); return TK_NET_OPTION;   }
+congestion-extents	{ DP; CP; RC(CONG_EXTENTS); return TK_NET_OPTION;}
+allow-two-primaries	{ DP; CP; return TK_NET_FLAG;		}
+always-asbp		{ DP; CP; return TK_NET_FLAG;		}
+no-tcp-cork		{ DP; CP; return TK_NET_NO_FLAG;	}
+tcp-cork		{ DP; CP; return TK_NET_FLAG;		}
 discard-my-data		{ DP; CP; return TK_NET_DELEGATE;	}
-rate			{ DP; CP; RC(RATE); return TK_SYNCER_OPTION;	}
-after			{ DP; CP; return TK_SYNCER_OPTION;	}
-verify-alg              { DP; CP; return TK_SYNCER_OPTION;      }
-csums-alg               { DP; CP; return TK_SYNCER_OPTION;      }
-al-extents		{ DP; CP; RC(AL_EXTENTS); return TK_SYNCER_OPTION;}
-cpu-mask		{ DP; CP; return TK_SYNCER_OPTION;	}
-use-rle			{ DP; CP; return TK_SYNCER_SWITCH;	}
+rate			{ DP; CP; RC(RATE); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+resync-rate		{ DP; CP; RC(RATE); return TK_DISK_OPTION;	}
+after			{ DP; CP; return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+resync-after		{ DP; CP; return TK_DISK_OPTION;	}
+verify-alg              { DP; CP; return TK_SYNCER_OLD_OPT | TK_NET_OPTION;      }
+csums-alg               { DP; CP; return TK_SYNCER_OLD_OPT | TK_NET_OPTION;      }
+al-extents		{ DP; CP; RC(AL_EXTENTS); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;}
+cpu-mask		{ DP; CP; return TK_SYNCER_OLD_OPT | TK_RES_OPTION;	}
+use-rle			{ DP; CP; return TK_SYNCER_OLD_OPT | TK_NET_FLAG;	}
+delay-probe-volume	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+delay-probe-interval	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+c-plan-ahead		{ DP; CP; RC(C_PLAN_AHEAD); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+c-delay-target	{ DP; CP; RC(C_DELAY_TARGET); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+c-fill-target		{ DP; CP; RC(C_FILL_TARGET); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+c-max-rate		{ DP; CP; RC(C_MAX_RATE); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+c-min-rate		{ DP; CP; RC(C_MIN_RATE); return TK_SYNCER_OLD_OPT | TK_DISK_OPTION;	}
+throttle-threshold	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+hold-off-threshold	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+on-no-data-accessible   { DP; CP; return TK_SYNCER_OLD_OPT | TK_RES_OPTION;	}
 wfc-timeout		{ DP; CP; RC(WFC_TIMEOUT); return TK_STARTUP_OPTION;}
 degr-wfc-timeout	{ DP; CP; RC(DEGR_WFC_TIMEOUT); return TK_STARTUP_OPTION;}
 outdated-wfc-timeout	{ DP; CP; RC(OUTDATED_WFC_TIMEOUT); return TK_STARTUP_OPTION;}
 stacked-timeouts	{ DP; return TK_STARTUP_DELEGATE;       }
 become-primary-on	{ DP; return TK_STARTUP_DELEGATE;       }
-wait-after-sb		{ DP; CP; return TK_STARTUP_SWITCH;     }
+wait-after-sb		{ DP; CP; return TK_STARTUP_FLAG;     }
 pri-on-incon-degr	{ DP; CP; return TK_HANDLER_OPTION;	}
 pri-lost-after-sb	{ DP; CP; return TK_HANDLER_OPTION;	}
 pri-lost		{ DP; CP; return TK_HANDLER_OPTION;     }
+initial-split-brain    { DP; CP; return TK_HANDLER_OPTION;     }
 split-brain		{ DP; CP; return TK_HANDLER_OPTION;     }
 outdate-peer		{ DP; CP; return TK_HANDLER_OPTION;	}
 fence-peer		{ DP; CP; return TK_HANDLER_OPTION;	}
 local-io-error		{ DP; CP; return TK_HANDLER_OPTION;     }
 before-resync-target	{ DP; CP; return TK_HANDLER_OPTION;	}
 after-resync-target	{ DP; CP; return TK_HANDLER_OPTION;	}
-memlimit		{ DP; CP; return TK_PROXY_OPTION;       }
-read-loops		{ DP; CP; return TK_PROXY_OPTION;       }
-compression		{ DP; CP; return TK_PROXY_OPTION;       }
+before-resync-source	{ DP; CP; return TK_HANDLER_OPTION;	}
+memlimit		{ DP; CP; return TK_PROXY_OPTION | TK_PROXY_GROUP; }
+read-loops		{ DP; CP; return TK_PROXY_OPTION | TK_PROXY_GROUP; }
+compression		{ DP; CP; return TK_PROXY_OPTION | TK_PROXY_GROUP; }
+plugin			{ DP; CP; return TK_PROXY_DELEGATE;     }
 out-of-sync             { DP; CP; return TK_HANDLER_OPTION;     }
 {IPV4ADDR}		{ DP; CP; return TK_IPADDR;		}
 {IPV6ADDR}		{ DP; CP; return TK_IPADDR6;		}
 {NUM}			{ DP; CP; return TK_INTEGER;		}
-{DQSTRING}		{ unescape(); DP; CP; return TK_STRING;	}
+{DQSTRING}		{ unescape(yytext); DP; CP; return TK_STRING;	}
 {STRING}		{ DP; CP; return TK_STRING;		}
 {LONG_STRING}		{ return TK_ERR_STRING_TOO_LONG;	}
 {LONG_DQSTRING}		{ return TK_ERR_DQSTRING_TOO_LONG;	}
@@ -157,23 +183,6 @@
 %%
 
 
-static void unescape(void)
-{
-  /* backslash escapes from string */
-  char *ue, *e;
-  e = ue = yytext;
-  for (;;) {
-    if (*ue == '"')
-      ue++;
-    if (*ue == '\\')
-      ue++;
-    if (!*ue)
-      break;
-    *e++ = *ue++;
-  }
-  *e = '\0';
-}
-
 /* Compatibility cruft for flex version 2.5.4a */
 #ifndef YY_FLEX_SUBMINOR_VERSION
 /** Pushes the new state onto the stack. The new state becomes
diff -Nru drbd8-8.3.7/user/drbdadm_usage_cnt.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_usage_cnt.c
--- drbd8-8.3.7/user/drbdadm_usage_cnt.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdadm_usage_cnt.c	2012-02-02 14:09:14.000000000 +0000
@@ -277,6 +277,37 @@
 			msg);
 }
 
+void add_lib_drbd_to_path(void)
+{
+	char *new_path = NULL;
+	char *old_path = getenv("PATH");
+
+	m_asprintf(&new_path, "%s%s%s",
+			old_path,
+			old_path ? ":" : "",
+			"/lib/drbd");
+	setenv("PATH", new_path, 1);
+}
+
+void maybe_exec_drbdadm_83(char **argv)
+{
+	if (current_vcs_rel.version.major == 8 &&
+	    current_vcs_rel.version.minor == 3) {
+#ifdef DRBD_LEGACY_83
+		/* This drbdadm warned already... */
+		setenv("DRBD_DONT_WARN_ON_VERSION_MISMATCH", "1", 0);
+		add_lib_drbd_to_path();
+		execvp(drbdadm_83, argv);
+		fprintf(stderr, "execvp() failed to exec %s: %m\n", drbdadm_83);
+#else
+		fprintf(stderr, "This drbdadm was build without support for legacy\n"
+			"drbd kernel code. Consider to rebuild your user land\n"
+			"tools with ./configure --with-legacy-connector\n");
+#endif
+		exit(E_exec_error);
+	}
+}
+
 static char *vcs_to_str(struct vcs_rel *rev)
 {
 	static char buffer[80]; // Not generic, sufficient for the purpose.
@@ -419,11 +450,12 @@
  * 3 - cannot connect to server
  * 5 - other error
  */
-static int make_get_request(char *req_buf) {
+static int make_get_request(char *uri) {
 	struct sockaddr_in server;
 	struct hostent *host_info;
 	unsigned long addr;
 	int sock;
+	char *req_buf;
 	char *http_host = HTTP_HOST;
 	int buf_len = 1024;
 	char buffer[buf_len];
@@ -455,6 +487,16 @@
 			host_info->h_length);
 	}
 
+
+	ssprintf(req_buf,
+		"GET %s HTTP/1.0\r\n"
+		"Host: "HTTP_HOST"\r\n"
+		"User-Agent: drbdadm/"REL_VERSION" (%s; %s; %s; %s)\r\n"
+		"\r\n",
+		uri,
+		nodeinfo.sysname, nodeinfo.release,
+		nodeinfo.version, nodeinfo.machine);
+
 	server.sin_family = AF_INET;
 	server.sin_port = htons(HTTP_PORT);
 
@@ -520,12 +562,12 @@
 void uc_node(enum usage_count_type type)
 {
 	struct node_info ni;
-	char *req_buf;
+	char *uri;
 	int send = 0;
 	int update = 0;
 	char answer[ANSWER_SIZE];
 	char n_comment[ANSWER_SIZE*3];
-	char *unused_res;
+	char *r;
 
 	if( type == UC_NO ) return;
 	if( getuid() != 0 ) return;
@@ -562,8 +604,8 @@
 "\t\t--== This is %s of DRBD ==--\n"
 "Please take part in the global DRBD usage count at http://"HTTP_HOST".\n\n"
 "The counter works anonymously. It creates a random number to identify\n"
-"your machine and sends that random number, along with \n"
-"DRBD's version number, to "HTTP_HOST".\n\n"
+"your machine and sends that random number, along with the kernel and\n"
+"DRBD version, to "HTTP_HOST".\n\n"
 "The benefits for you are:\n"
 " * In response to your submission, the server ("HTTP_HOST") will tell you\n"
 "   how many users before you have installed this version (%s).\n"
@@ -579,13 +621,12 @@
 "* To count this node without comment, just press [RETURN]\n",
 			update ? "an update" : "a new installation",
 			REL_VERSION,ni.node_uuid, vcs_to_str(&ni.rev));
-		unused_res = fgets(answer, ANSWER_SIZE, stdin);
-		if(!strcmp(answer,"no\n")) send = 0;
+		r = fgets(answer, ANSWER_SIZE, stdin);
+		if(r && !strcmp(answer,"no\n")) send = 0;
 		url_encode(answer,n_comment);
 	}
 
-	ssprintf(req_buf,"GET http://"HTTP_HOST"/cgi-bin/insert_usage.pl?"
-		 "nu="U64"&%s%s%s HTTP/1.0\n\n",
+	ssprintf(uri,"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?nu="U64"&%s%s%s",
 		 ni.node_uuid, vcs_to_str(&ni.rev),
 		 n_comment[0] ? "&nc=" : "", n_comment);
 
@@ -596,7 +637,7 @@
 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
 "  --==  Thank you for participating in the global usage survey  ==--\n"
 "The server's response is:\n\n");
-		make_get_request(req_buf);
+		make_get_request(uri);
 		if (type == UC_ASK) {
 			fprintf(stderr,
 "\n"
@@ -605,14 +646,14 @@
 "to ask you for confirmation as long as 'usage-count' is at its default\n"
 "value of 'ask'.\n\n"
 "Just press [RETURN] to continue: ");
-			unused_res = fgets(answer, 9, stdin);
+			r = fgets(answer, 9, stdin);
 		}
 	}
 }
 
 /* For our purpose (finding the revision) SLURP_SIZE is always enough.
  */
-char* run_admm_generic(struct d_resource* res ,const char* cmd)
+static char* run_admm_generic(struct cfg_ctx *ctx, const char *arg_override)
 {
 	const int SLURP_SIZE = 4096;
 	int rr,pipes[2];
@@ -634,9 +675,13 @@
 		close(pipes[0]); // close reading end
 		dup2(pipes[1],1); // 1 = stdout
 		close(pipes[1]);
-		exit(_admm_generic(res,cmd,
+		/* local modification in child,
+		 * no propagation to parent */
+		ctx->arg = arg_override;
+		rr = _admm_generic(ctx,
 				   SLEEPS_VERY_LONG|SUPRESS_STDERR|
-				   DONT_REPORT_FAILED));
+				   DONT_REPORT_FAILED);
+		exit(rr);
 	}
 	close(pipes[1]); // close writing end
 
@@ -654,29 +699,28 @@
 	return buffer;
 }
 
-int adm_create_md(struct d_resource* res ,const char* cmd)
+int adm_create_md(struct cfg_ctx *ctx)
 {
 	char answer[ANSWER_SIZE];
 	struct node_info ni;
 	uint64_t device_uuid=0;
 	uint64_t device_size=0;
-	char *req_buf;
+	char *uri;
 	int send=0;
 	char *tb;
 	int rv,fd;
-	int soi_tmp;
-	char *setup_opts_0_tmp;
-	char *unused_res;
+	char *r;
 
-	tb = run_admm_generic(res, "read-dev-uuid");
+	tb = run_admm_generic(ctx, "read-dev-uuid");
 	device_uuid = strto_u64(tb,NULL,16);
 	free(tb);
 
-	rv = _admm_generic(res, cmd, SLEEPS_VERY_LONG); // cmd is "create-md".
+	/* this is "drbdmeta ... create-md" */
+	rv = _admm_generic(ctx, SLEEPS_VERY_LONG);
 
 	if(rv || dry_run) return rv;
 
-	fd = open(res->me->disk,O_RDONLY);
+	fd = open(ctx->vol->disk,O_RDONLY);
 	if( fd != -1) {
 		device_size = bdev_size(fd);
 		close(fd);
@@ -690,18 +734,18 @@
 			fprintf(stderr,
 "\n"
 "\t\t--== Creating metadata ==--\n"
-"As with nodes, we count the total number of devices mirrored by DRBD at\n"
+"As with nodes, we count the total number of devices mirrored by DRBD\n"
 "at http://"HTTP_HOST".\n\n"
 "The counter works anonymously. It creates a random number to identify\n"
-"the device and sends that random number, along with \n"
-"DRBD's version number, to "HTTP_HOST".\n\n"
+"the device and sends that random number, along with the kernel and\n"
+"DRBD version, to "HTTP_HOST".\n\n"
 "http://"HTTP_HOST"/cgi-bin/insert_usage.pl?nu="U64"&ru="U64"&rs="U64"\n\n"
 "* If you wish to opt out entirely, simply enter 'no'.\n"
 "* To continue, just press [RETURN]\n",
 				ni.node_uuid,device_uuid,device_size
 				);
-			unused_res = fgets(answer, ANSWER_SIZE, stdin);
-			if(strcmp(answer,"no\n")) send = 1;
+			r = fgets(answer, ANSWER_SIZE, stdin);
+			if(r && strcmp(answer,"no\n")) send = 1;
 		}
 	}
 
@@ -710,24 +754,29 @@
 	}
 
 	if (send) {
-		ssprintf(req_buf,"GET http://"HTTP_HOST"/cgi-bin/insert_usage.pl?"
-			 "nu="U64"&ru="U64"&rs="U64" HTTP/1.0\n\n",
+		ssprintf(uri,"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?"
+			 "nu="U64"&ru="U64"&rs="U64,
 			 ni.node_uuid, device_uuid, device_size);
-		make_get_request(req_buf);
+		make_get_request(uri);
 	}
 
 	/* HACK */
-	soi_tmp = soi;
-	setup_opts_0_tmp = setup_opts[0];
+	{
+		struct cfg_ctx local_ctx = *ctx;
+		struct setup_option *old_setup_options;
+		char *opt;
+
+		ssprintf(opt, X64(016), device_uuid);
+		old_setup_options = setup_options;
+		setup_options = NULL;
+		add_setup_option(false, opt);
 
-	setup_opts[0] = NULL;
-	ssprintf( setup_opts[0], X64(016), device_uuid);
-	soi=1;
-	_admm_generic(res, "write-dev-uuid", SLEEPS_VERY_LONG);
-
-	setup_opts[0] = setup_opts_0_tmp;
-	soi = soi_tmp;
+		local_ctx.arg = "write-dev-uuid";
+		_admm_generic(&local_ctx, SLEEPS_VERY_LONG);
 
+		free(setup_options);
+		setup_options = old_setup_options;
+	}
 	return rv;
 }
 
diff -Nru drbd8-8.3.7/user/drbdmeta.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta.c
--- drbd8-8.3.7/user/drbdmeta.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta.c	2012-02-02 14:09:14.000000000 +0000
@@ -31,7 +31,6 @@
 #define _GNU_SOURCE
 #define _XOPEN_SOURCE 600
 #define _FILE_OFFSET_BITS 64
-#define __USE_LARGEFILE64
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -63,12 +62,14 @@
 extern FILE* yyin;
 YYSTYPE yylval;
 
-/* int     force = 0; now extern, see drbdtool_common.c */
+int	force = 0;
 int	verbose = 0;
 int	ignore_sanity_checks = 0;
+int	dry_run = 0;
 
 struct option metaopt[] = {
     { "ignore-sanity-checks",  no_argument, &ignore_sanity_checks, 1000 },
+    { "dry-run",  no_argument, &dry_run, 1000 },
     { "force",  no_argument,    0, 'f' },
     { "verbose",  no_argument,    0, 'v' },
     { NULL,     0,              0, 0 },
@@ -88,14 +89,45 @@
 
 #if 0
 #define ASSERT(x) ((void)(0))
+#define d_expect(x) (x)
 #else
-#define ASSERT(x) do { if (!(x)) {			\
+#define ASSERT(x) do { if (!(x)) {				\
 	fprintf(stderr, "%s:%u:%s: ASSERT(%s) failed.\n",	\
 		__FILE__ , __LINE__ , __func__ , #x );		\
 	abort(); }						\
 	} while (0)
+#define d_expect(x) ({						\
+	int _x = (x);						\
+	if (!_x)						\
+		fprintf(stderr, "%s:%u:%s: ASSERT(%s) failed.\n",\
+			__FILE__ , __LINE__ , __func__ , #x );	\
+	_x; })
 #endif
 
+static int confirmed(const char *text)
+{
+	const char yes[] = "yes";
+	const ssize_t N = sizeof(yes);
+	char *answer = NULL;
+	size_t n = 0;
+	int ok;
+
+	printf("\n%s\n", text);
+
+	if (force) {
+	    printf("*** confirmation forced via --force option ***\n");
+	    ok = 1;
+	}
+	else {
+	    printf("[need to type '%s' to confirm] ", yes);
+	    ok = getline(&answer,&n,stdin) == N &&
+		strncmp(answer,yes,N-1) == 0;
+	    if (answer) free(answer);
+	    printf("\n");
+	}
+	return ok;
+}
+
 /*
  * FIXME
  *
@@ -153,13 +185,18 @@
 #define MD_BM_OFFSET_07        (MD_AL_OFFSET_07 + MD_AL_MAX_SECT_07)
 #define MD_RESERVED_SECT_07    ( (uint64_t)(128ULL << 11) )
 #define MD_BM_MAX_BYTE_07      ( (uint64_t)(MD_RESERVED_SECT_07 - MD_BM_OFFSET_07)*512 )
+#if BITS_PER_LONG == 32
 #define MD_BM_MAX_BYTE_FLEX    ( (uint64_t)(1ULL << (32-3)) )
+#else
+#define MD_BM_MAX_BYTE_FLEX    ( (uint64_t)(1ULL << (38-3)) )
+#endif
 
 #define DEFAULT_BM_BLOCK_SIZE  (1<<12)
 
 #define DRBD_MD_MAGIC_06   (DRBD_MAGIC+2)
 #define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
 #define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
+#define DRBD_MD_MAGIC_84_UNCLEAN   (DRBD_MAGIC+5)
 
 /*
  * }
@@ -191,6 +228,7 @@
 typedef struct { uint32_t le; } le_u32;
 typedef struct { uint32_t be; } be_u32;
 typedef struct { int32_t be; } be_s32;
+typedef struct { uint16_t be; } be_u16;
 typedef struct { unsigned long le; } le_ulong;
 typedef struct { unsigned long be; } be_ulong;
 
@@ -214,6 +252,7 @@
 	uint32_t flags;
 	uint64_t device_uuid;
 	uint32_t bm_bytes_per_bit;
+	uint32_t la_peer_max_bio_size;
 };
 
 /*
@@ -226,6 +265,7 @@
 	const struct format_ops *ops;
 	char *md_device_name;	/* well, in 06 it is file name */
 	char *drbd_dev_name;
+	unsigned minor;		/* cache, determined from drbd_dev_name */
 	int lock_fd;
 	int drbd_fd;		/* no longer used!   */
 	int ll_fd;		/* not yet used here */
@@ -238,6 +278,7 @@
 	unsigned int bm_bytes;
 	unsigned int bits_set;	/* 32 bit should be enough. @4k ==> 16TB */
 	int bits_counted:1;
+	int update_lk_bdev:1;	/* need to update the last known bdev info? */
 
 	struct md_cpu md;
 
@@ -246,8 +287,19 @@
 	uint64_t al_offset;
 	uint64_t bm_offset;
 
+	/* if create_md actually does convert,
+	 * we want to wipe the old meta data block _after_ convertion. */
+	uint64_t wipe_fixed;
+	uint64_t wipe_flex;
+
 	/* convenience */
 	uint64_t bd_size; /* size of block device for internal meta data */
+
+	/* last-known bdev info,
+	 * to increase the chance of finding internal meta data in case the
+	 * lower level device has been resized without telling DRBD.
+	 * Loaded from file for internal metadata */
+	struct bdev_info lk_bd;
 };
 
 /* - parse is expected to exit() if it does not work out.
@@ -360,12 +412,12 @@
 {
 	uint64_t md_size_sect;
 	char *v = (f == Drbd_07) ? "v07" : "v08";
-	const unsigned int magic = (f == Drbd_07) ? DRBD_MD_MAGIC_07 : DRBD_MD_MAGIC_08;
-
 
 	ASSERT(f == Drbd_07 || f == Drbd_08);
 
-	if (md->magic != magic) {
+	if ((f == Drbd_07 && md->magic != DRBD_MD_MAGIC_07) ||
+	    (f == Drbd_08 && md->magic != DRBD_MD_MAGIC_08
+			  && md->magic != DRBD_MD_MAGIC_84_UNCLEAN)) {
 		if (verbose >= 1)
 			fprintf(stderr, "%s Magic number not found\n", v);
 		return 0;
@@ -429,10 +481,10 @@
  * these stay the same for 0.8, too:
  */
 
-struct __packed al_sector_cpu {
+struct al_sector_cpu {
 	uint32_t magic;
 	uint32_t tr_number;
-	struct __packed {
+	struct {
 		uint32_t pos;
 		uint32_t extent;
 	} updates[62];
@@ -447,10 +499,27 @@
 		be_u32 extent;
 	} updates[62];
 	be_u32 xor_sum;
+	be_u32 pad;
 };
 
+int v07_al_disk_to_cpu(struct al_sector_cpu *al_cpu, struct al_sector_on_disk *al_disk)
+{
+	uint32_t xor_sum = 0;
+	int i;
+	al_cpu->magic = be32_to_cpu(al_disk->magic.be);
+	al_cpu->tr_number = be32_to_cpu(al_disk->tr_number.be);
+	for (i = 0; i < 62; i++) {
+		al_cpu->updates[i].pos = be32_to_cpu(al_disk->updates[i].pos.be);
+		al_cpu->updates[i].extent = be32_to_cpu(al_disk->updates[i].extent.be);
+		xor_sum ^= al_cpu->updates[i].extent;
+	}
+	al_cpu->xor_sum = be32_to_cpu(al_disk->xor_sum.be);
+	return al_cpu->magic == DRBD_MAGIC &&
+		al_cpu->xor_sum == xor_sum;
+}
+
 /*
- * -- DRBD 0.8 --------------------------------------
+ * -- DRBD 8.0, 8.2, 8.3 --------------------------------------
  */
 
 struct __packed md_on_disk_08 {
@@ -465,7 +534,8 @@
 	be_u32 al_nr_extents;	/* important for restoring the AL */
 	be_s32 bm_offset;	/* signed sector offset to the bitmap, from here */
 	be_u32 bm_bytes_per_bit;
-	be_u32 reserved_u32[4];
+	be_u32 la_peer_max_bio_size; /* last peer max_bio_size */
+	be_u32 reserved_u32[3];
 
 	char reserved[8 * 512 - (8*(UI_SIZE+3)+4*11)];
 };
@@ -486,6 +556,7 @@
 	cpu->al_nr_extents = be32_to_cpu(disk->al_nr_extents.be);
 	cpu->bm_offset = be32_to_cpu(disk->bm_offset.be);
 	cpu->bm_bytes_per_bit = be32_to_cpu(disk->bm_bytes_per_bit.be);
+	cpu->la_peer_max_bio_size = be32_to_cpu(disk->la_peer_max_bio_size.be);
 }
 
 void md_cpu_to_disk_08(struct md_on_disk_08 *disk, const struct md_cpu *cpu)
@@ -503,9 +574,206 @@
 	disk->al_nr_extents.be = cpu_to_be32(cpu->al_nr_extents);
 	disk->bm_offset.be = cpu_to_be32(cpu->bm_offset);
 	disk->bm_bytes_per_bit.be = cpu_to_be32(cpu->bm_bytes_per_bit);
+	disk->la_peer_max_bio_size.be = cpu_to_be32(cpu->la_peer_max_bio_size);
 	memset(disk->reserved, 0, sizeof(disk->reserved));
 }
 
+/*
+ * -- DRBD 8.4 --------------------------------------
+ */
+
+/* new in 8.4: 4k al transaction blocks */
+#define AL_UPDATES_PER_TRANSACTION 64
+#define AL_CONTEXT_PER_TRANSACTION 919
+/* from DRBD 8.4 linux/drbd/drbd_limits.h, DRBD_AL_EXTENTS_MAX */
+#define AL_EXTENTS_MAX  6433
+enum al_transaction_types {
+	AL_TR_UPDATE = 0,
+	AL_TR_INITIALIZED = 0xffff
+};
+struct __packed al_4k_transaction_on_disk {
+	/* don't we all like magic */
+	be_u32	magic;
+
+	/* to identify the most recent transaction block
+	 * in the on disk ring buffer */
+	be_u32	tr_number;
+
+	/* checksum on the full 4k block, with this field set to 0. */
+	be_u32	crc32c;
+
+	/* type of transaction, special transaction types like:
+	 * purge-all, set-all-idle, set-all-active, ... to-be-defined
+	 * see also enum al_transaction_types */
+	be_u16	transaction_type;
+
+	/* we currently allow only a few thousand extents,
+	 * so 16bit will be enough for the slot number. */
+
+	/* how many updates in this transaction */
+	be_u16	n_updates;
+
+	/* maximum slot number, "al-extents" in drbd.conf speak.
+	 * Having this in each transaction should make reconfiguration
+	 * of that parameter easier. */
+	be_u16	context_size;
+
+	/* slot number the context starts with */
+	be_u16	context_start_slot_nr;
+
+	/* Some reserved bytes.  Expected usage is a 64bit counter of
+	 * sectors-written since device creation, and other data generation tag
+	 * supporting usage */
+	be_u32	__reserved[4];
+
+	/* --- 36 byte used --- */
+
+	/* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
+	 * in one transaction, then use the remaining byte in the 4k block for
+	 * context information.  "Flexible" number of updates per transaction
+	 * does not help, as we have to account for the case when all update
+	 * slots are used anyways, so it would only complicate code without
+	 * additional benefit.
+	 */
+	be_u16	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* but the extent number is 32bit, which at an extent size of 4 MiB
+	 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
+	be_u32	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+
+	/* --- 420 bytes used (36 + 64*6) --- */
+
+	/* 4096 - 420 = 3676 = 919 * 4 */
+	be_u32	context[AL_CONTEXT_PER_TRANSACTION];
+};
+
+struct al_4k_cpu {
+	uint32_t	magic;
+	uint32_t	tr_number;
+	uint32_t	crc32c;
+	uint16_t	transaction_type;
+	uint16_t	n_updates;
+	uint16_t	context_size;
+	uint16_t	context_start_slot_nr;
+	uint32_t	__reserved[4];
+	uint16_t	update_slot_nr[AL_UPDATES_PER_TRANSACTION];
+	uint32_t	update_extent_nr[AL_UPDATES_PER_TRANSACTION];
+	uint32_t	context[AL_CONTEXT_PER_TRANSACTION];
+};
+
+/* from linux/crypto/crc32.c */
+static const uint32_t crc32c_table[256] = {
+	0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+	0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+	0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+	0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+	0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+	0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+	0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+	0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+	0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+	0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+	0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+	0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+	0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+	0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+	0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+	0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+	0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+	0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+	0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+	0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+	0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+	0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+	0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+	0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+	0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+	0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+	0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+	0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+	0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+	0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+	0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+	0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+	0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+	0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+	0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+	0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+	0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+	0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+	0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+	0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+	0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+	0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+	0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+	0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+	0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+	0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+	0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+	0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+	0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+	0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+	0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+	0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+	0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+	0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+	0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+	0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+	0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+	0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+	0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+	0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+	0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+	0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+	0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+	0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+static uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
+{
+	while (length--)
+		crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+
+	return crc;
+}
+/* --- */
+
+int v84_al_disk_to_cpu(struct al_4k_cpu *al_cpu, struct al_4k_transaction_on_disk *al_disk)
+{
+	unsigned crc = 0;
+	unsigned i;
+
+	al_cpu->magic                 = be32_to_cpu(al_disk->magic.be);
+	al_cpu->tr_number             = be32_to_cpu(al_disk->tr_number.be);
+	al_cpu->crc32c                = be32_to_cpu(al_disk->crc32c.be);
+	al_cpu->transaction_type      = be16_to_cpu(al_disk->transaction_type.be);
+	al_cpu->n_updates             = be16_to_cpu(al_disk->n_updates.be);
+	al_cpu->context_size          = be16_to_cpu(al_disk->context_size.be);
+	al_cpu->context_start_slot_nr = be16_to_cpu(al_disk->context_start_slot_nr.be);
+
+	/* reserverd al_disk->__reserved[4] */
+
+	for (i=0; i < AL_UPDATES_PER_TRANSACTION; i++)
+		al_cpu->update_slot_nr[i] = be16_to_cpu(al_disk->update_slot_nr[i].be);
+	for (i=0; i < AL_UPDATES_PER_TRANSACTION; i++)
+		al_cpu->update_extent_nr[i] = be32_to_cpu(al_disk->update_extent_nr[i].be);
+	for (i=0; i < AL_CONTEXT_PER_TRANSACTION; i++)
+		al_cpu->context[i] = be32_to_cpu(al_disk->context[i].be);
+
+	al_disk->crc32c.be = 0;
+	crc = crc32c(crc, (void*)al_disk, 4096);
+	return al_cpu->magic == DRBD_AL_MAGIC && al_cpu->crc32c == crc;
+}
+
+/*
+ * --------------------------------------------------
+ */
+
 /* pre declarations */
 void m_get_gc(struct md_cpu *md);
 void m_show_gc(struct md_cpu *md);
@@ -531,11 +799,20 @@
 int v07_parse(struct format *cfg, char **argv, int argc, int *ai);
 int v07_md_initialize(struct format *cfg);
 
-int v07_style_md_open(struct format *cfg); /* also v08 */
+int v07_style_md_open(struct format *cfg);
 
+int v08_md_open(struct format *cfg);
 int v08_md_cpu_to_disk(struct format *cfg);
 int v08_md_disk_to_cpu(struct format *cfg);
 int v08_md_initialize(struct format *cfg);
+int v08_md_close(struct format *cfg);
+
+/* return codes for md_open */
+enum {
+	VALID_MD_FOUND = 0,
+	NO_VALID_MD_FOUND = -1,
+	VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION = -2,
+};
 
 struct format_ops f_ops[] = {
 	[Drbd_06] = {
@@ -572,8 +849,8 @@
 		     .name = "v08",
 		     .args = (char *[]){"device", "index", NULL},
 		     .parse = v07_parse,
-		     .open = v07_style_md_open,
-		     .close = generic_md_close,
+		     .open = v08_md_open,
+		     .close = v08_md_close,
 		     .md_initialize = v08_md_initialize,
 		     .md_disk_to_cpu = v08_md_disk_to_cpu,
 		     .md_cpu_to_disk = v08_md_cpu_to_disk,
@@ -617,6 +894,7 @@
 int meta_get_gi(struct format *cfg, char **argv, int argc);
 int meta_show_gi(struct format *cfg, char **argv, int argc);
 int meta_dump_md(struct format *cfg, char **argv, int argc);
+int meta_apply_al(struct format *cfg, char **argv, int argc);
 int meta_restore_md(struct format *cfg, char **argv, int argc);
 int meta_verify_dump_file(struct format *cfg, char **argv, int argc);
 int meta_create_md(struct format *cfg, char **argv, int argc);
@@ -627,6 +905,7 @@
 int meta_read_dev_uuid(struct format *cfg, char **argv, int argc);
 int meta_write_dev_uuid(struct format *cfg, char **argv, int argc);
 int meta_dstate(struct format *cfg, char **argv, int argc);
+int meta_chk_offline_resize(struct format *cfg, char **argv, int argc);
 
 struct meta_cmd cmds[] = {
 	{"get-gi", 0, meta_get_gi, 1},
@@ -634,6 +913,7 @@
 	{"dump-md", 0, meta_dump_md, 1},
 	{"restore-md", "file", meta_restore_md, 1},
 	{"verify-dump", "file", meta_verify_dump_file, 1},
+	{"apply-al", 0, meta_apply_al, 1},
 	{"create-md", 0, meta_create_md, 1},
 	{"wipe-md", 0, meta_wipe_md, 1},
 	{"outdate", 0, meta_outdate, 1},
@@ -642,6 +922,7 @@
 	{"read-dev-uuid", "VAL",  meta_read_dev_uuid,  0},
 	{"write-dev-uuid", "VAL", meta_write_dev_uuid, 0},
 	{"set-gi", ":::VAL:VAL:...", meta_set_gi, 0},
+	{"check-resize", 0, meta_chk_offline_resize, 1},
 };
 
 /*
@@ -656,6 +937,7 @@
 {
 	ssize_t c = pread(fd, buf, count, offset);
 	if (verbose >= 2) {
+		fflush(stdout);
 		fprintf(stderr, " %-26s: pread(%u, ...,%6lu,%12llu)\n", tag,
 			fd, (unsigned long)count, (unsigned long long)offset);
 		if (count & ((1<<12)-1))
@@ -674,12 +956,25 @@
 			tag, (int)count, (int)c);
 		exit(10);
 	}
+	if (verbose > 10)
+		fprintf_hex(stderr, offset, buf, count);
 }
 
+static unsigned n_writes = 0;
 void pwrite_or_die(int fd, const void *buf, size_t count, off_t offset, const char* tag)
 {
-	ssize_t c = pwrite(fd, buf, count, offset);
+	ssize_t c;
+	++n_writes;
+	if (dry_run) {
+		fprintf(stderr, " %-26s: pwrite(%u, ...,%6lu,%12llu) SKIPPED DUE TO DRY-RUN\n",
+			tag, fd, (unsigned long)count, (unsigned long long)offset);
+		if (verbose > 10)
+			fprintf_hex(stderr, offset, buf, count);
+		return;
+	}
+	c = pwrite(fd, buf, count, offset);
 	if (verbose >= 2) {
+		fflush(stdout);
 		fprintf(stderr, " %-26s: pwrite(%u, ...,%6lu,%12llu)\n", tag,
 			fd, (unsigned long)count, (unsigned long long)offset);
 		if (count & ((1<<12)-1))
@@ -913,7 +1208,7 @@
 		fprintf(stderr, "'%s' is not a valid minor number.\n", argv[0]);
 		exit(20);
 	}
-	if (asprintf(&e, DRBD_LIB_DIR "/drbd%lu", minor) <= 18) {
+	if (asprintf(&e, "%s/drbd%lu", DRBD_LIB_DIR, minor) <= 18) {
 		fprintf(stderr, "asprintf() failed.\n");
 		exit(20);
 	};
@@ -932,29 +1227,32 @@
 
 	if (cfg->md_fd == -1) {
 		PERROR("open(%s) failed", cfg->md_device_name);
-		return -1;
+		return NO_VALID_MD_FOUND;
 	}
 
 	if (fstat(cfg->md_fd, &sb)) {
 		PERROR("fstat() failed");
-		return -1;
+		return NO_VALID_MD_FOUND;
 	}
 
 	if (!S_ISREG(sb.st_mode)) {
 		fprintf(stderr, "'%s' is not a plain file!\n",
 			cfg->md_device_name);
-		return -1;
+		return NO_VALID_MD_FOUND;
 	}
 
 	if (cfg->ops->md_disk_to_cpu(cfg)) {
-		return -1;
+		return NO_VALID_MD_FOUND;
 	}
 
-	return 0;
+	return VALID_MD_FOUND;
 }
 
 int generic_md_close(struct format *cfg)
 {
+	/* On /dev/ram0 we may not use O_SYNC for some kernels (eg. RHEL6 2.6.32),
+	 * and fsync() returns EIO, too. So we don't do error checking here. */
+	fsync(cfg->md_fd);
 	if (close(cfg->md_fd)) {
 		PERROR("close() failed");
 		return -1;
@@ -988,7 +1286,7 @@
 		break;
 	case DRBD_MD_INDEX_FLEX_EXT:
 		/* just occupy the full device; unit: sectors */
-		cfg->md.md_size_sect = bdev_size(cfg->md_fd)>>9;
+		cfg->md.md_size_sect = cfg->bd_size >> 9;
 		cfg->md.al_offset = MD_AL_OFFSET_07;
 		cfg->md.bm_offset = MD_BM_OFFSET_07;
 		break;
@@ -1002,11 +1300,15 @@
 		cfg->md.al_offset = -MD_AL_MAX_SECT_07;
 
 		/* we need (slightly less than) ~ this much bitmap sectors: */
-		md_size_sect = (bdev_size(cfg->md_fd) + (1UL<<24)-1) >> 24; /* BM_EXT_SIZE_B */
-		md_size_sect = (md_size_sect + 7) & ~7ULL;             /* align on 4K blocks */
+		md_size_sect = (cfg->bd_size + (1UL<<24)-1) >> 24; /* BM_EXT_SIZE_B */
+		md_size_sect = (md_size_sect + 7) & ~7ULL;         /* align on 4K blocks */
 
 		if (md_size_sect > (MD_BM_MAX_BYTE_FLEX>>9)) {
-			fprintf(stderr, "Device too large. We only support up to ~16TB.\n");
+			char ppbuf[10];
+			fprintf(stderr, "Device too large. We only support up to %s.\n",
+					ppsize(ppbuf, MD_BM_MAX_BYTE_FLEX << (3+2)));
+			if (BITS_PER_LONG == 32)
+				fprintf(stderr, "Maybe try a 64bit arch?\n");
 			exit(10);
 		}
 		/* plus the "drbd meta data super block",
@@ -1016,8 +1318,34 @@
 		cfg->md.bm_offset = -md_size_sect + MD_AL_OFFSET_07;
 		break;
 	}
-	cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512;
-	cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512;
+	cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL;
+	cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL;
+}
+
+void initialize_al(struct format *cfg)
+{
+	memset(on_disk_buffer, 0x00, MD_AL_MAX_SECT_07*512);
+	if (format_version(cfg) == Drbd_08) {
+		/* DRBD <= 8.3 does not care if it is all zero,
+		 * or otherwise wrong magic.
+		 *
+		 * For 8.4, we initialize to something that is
+		 * valid magic, valid crc, and transaction_type = 0xffff.
+		 */
+		struct al_4k_transaction_on_disk *al = on_disk_buffer;
+		unsigned crc_be = 0;
+		int i;
+		for (i = 0; i < MD_AL_MAX_SECT_07/8; i++, al++) {
+			al->magic.be = cpu_to_be32(DRBD_AL_MAGIC);
+			al->transaction_type.be = cpu_to_be16(AL_TR_INITIALIZED);
+			/* crc calculated once */
+			if (i == 0)
+				crc_be = cpu_to_be32(crc32c(0, (void*)al, 4096));
+			al->crc32c.be = crc_be;
+		}
+	}
+	pwrite_or_die(cfg->md_fd, on_disk_buffer, MD_AL_MAX_SECT_07*512, cfg->al_offset,
+		"md_initialize_common:AL");
 }
 
 /* MAYBE DOES DISK WRITES!! */
@@ -1047,9 +1375,7 @@
 		fprintf(stderr, "%s:%u: LOGIC BUG\n" , __FILE__ , __LINE__ );
 		exit(111);
 	}
-	memset(on_disk_buffer, 0x00, MD_AL_MAX_SECT_07*512);
-	pwrite_or_die(cfg->md_fd, on_disk_buffer, MD_AL_MAX_SECT_07*512, cfg->al_offset,
-		"md_initialize_common:AL");
+	initialize_al(cfg);
 
 	/* THINK
 	 * do we really need to initialize the bitmap? */
@@ -1085,7 +1411,7 @@
 		}
 		fprintf(stderr,"\r100%%\n");
 	} else {
-		fprintf(stderr,"NOT initialized bitmap\n");
+		fprintf(stderr,"NOT initializing bitmap\n");
 	}
 	return 0;
 }
@@ -1119,6 +1445,571 @@
 	return offset;
 }
 
+void printf_al_07(struct format *cfg, struct al_sector_on_disk *al_disk)
+{
+	struct al_sector_cpu al_cpu;
+	unsigned s, i;
+	unsigned max_slot_nr = 0;
+	for (s = 0; s < MD_AL_MAX_SECT_07; s++) {
+		int ok = v07_al_disk_to_cpu(&al_cpu, al_disk + s);
+		printf("#     sector %2u { %s\n", s, ok ? "valid" : "invalid");
+		printf("# \tmagic: 0x%08x\n", al_cpu.magic);
+		printf("# \ttr: %10u\n", al_cpu.tr_number);
+		for (i = 0; i < 62; i++) {
+			printf("# \t%2u: %10u %10u\n", i,
+				al_cpu.updates[i].pos,
+				al_cpu.updates[i].extent);
+			if (al_cpu.updates[i].pos > max_slot_nr &&
+			    al_cpu.updates[i].pos != -1U)
+				max_slot_nr = al_cpu.updates[i].pos;
+		}
+		printf("# \txor: 0x%08x\n", al_cpu.xor_sum);
+		printf("#     }\n");
+	}
+	if (max_slot_nr >= cfg->md.al_nr_extents)
+		printf(
+		"### CAUTION: maximum slot number found in AL: %u\n"
+		"### CAUTION: but 'super-block' al-extents is: %u\n",
+		max_slot_nr, cfg->md.al_nr_extents);
+}
+
+void printf_al_84(struct format *cfg, struct al_4k_transaction_on_disk *al_disk)
+{
+	struct al_4k_cpu al_cpu;
+	unsigned b, i;
+	unsigned max_slot_nr = 0;
+	for (b = 0; b < MD_AL_MAX_SECT_07/8; b++) {
+		int ok = v84_al_disk_to_cpu(&al_cpu, al_disk + b);
+		if (!ok) {
+			printf("#     block %2u { INVALID }\n", b);
+			continue;
+		}
+		if (al_cpu.transaction_type == 0xffff) {
+			printf("#     block %2u { INITIALIZED }\n", b);
+			continue;
+		}
+		printf("#     block %2u {\n", b);
+		printf("# \tmagic: 0x%08x\n", al_cpu.magic);
+		printf("# \ttype: 0x%04x\n", al_cpu.transaction_type);
+		printf("# \ttr: %10u\n", al_cpu.tr_number);
+		printf("# \tactive set size: %u\n", al_cpu.context_size);
+		if (al_cpu.context_size -1 > max_slot_nr)
+			max_slot_nr = al_cpu.context_size -1;
+		for (i = 0; i < AL_CONTEXT_PER_TRANSACTION; i++) {
+			unsigned slot = al_cpu.context_start_slot_nr + i;
+			if (al_cpu.context[i] == ~0U && slot >= al_cpu.context_size)
+				continue;
+			if (slot > max_slot_nr)
+				max_slot_nr = slot;
+			printf("# \t%2u: %10u %10u\n", i, slot, al_cpu.context[i]);
+		}
+		printf("# \tupdates: %u\n", al_cpu.n_updates);
+		for (i = 0; i < AL_UPDATES_PER_TRANSACTION; i++) {
+			if (i >= al_cpu.n_updates &&
+			    al_cpu.update_slot_nr[i] == (uint16_t)(~0U))
+				continue;
+			printf("# \t%2u: %10u %10u\n", i,
+				al_cpu.update_slot_nr[i],
+				al_cpu.update_extent_nr[i]);
+			if (al_cpu.update_slot_nr[i] > max_slot_nr)
+				max_slot_nr = al_cpu.update_slot_nr[i];
+		}
+		printf("# \tcrc32c: 0x%08x\n", al_cpu.crc32c);
+		printf("#     }\n");
+	}
+	if (max_slot_nr >= cfg->md.al_nr_extents)
+		printf(
+		"### CAUTION: maximum slot number found in AL: %u\n"
+		"### CAUTION: but 'super-block' al-extents is: %u\n",
+		max_slot_nr, cfg->md.al_nr_extents);
+}
+
+void printf_al(struct format *cfg)
+{
+	off_t al_on_disk_off = cfg->al_offset;
+	off_t al_size = MD_AL_MAX_SECT_07 * 512;
+	struct al_sector_on_disk *al_512_disk = on_disk_buffer;
+	struct al_4k_transaction_on_disk *al_4k_disk = on_disk_buffer;
+
+	printf("# al {\n");
+	pread_or_die(cfg->md_fd, on_disk_buffer, al_size, al_on_disk_off, "printf_al");
+
+	/* FIXME
+	 * we should introduce a new meta data "super block" magic, so we won't
+	 * have the same super block with two different activity log
+	 * transaction layouts */
+	if (format_version(cfg) < Drbd_08)
+		printf_al_07(cfg, al_512_disk);
+
+	/* looks like we have the new al format */
+	else if (DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[0].magic.be) ||
+		 DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[1].magic.be))
+		printf_al_84(cfg, al_4k_disk);
+
+	/* try the old al format anyways */
+	else
+		printf_al_07(cfg, al_512_disk);
+	printf("# }\n");
+}
+
+/* One activity log extent represents 4M of storage,
+ * one bit corresponds to 4k.
+ *                       4M / 4k / 8bit per byte */
+#define BM_BYTES_PER_AL_EXT	(1UL << (22 - 12 - 3))
+
+struct al_cursor {
+	unsigned i;
+	uint32_t tr_number;
+};
+
+static int replay_al_07(struct format *cfg, uint32_t *hot_extent)
+{
+	unsigned int mx;
+	struct al_sector_cpu al_cpu[MD_AL_MAX_SECT_07];
+	unsigned char valid[MD_AL_MAX_SECT_07];
+
+	struct al_sector_on_disk *al_disk = on_disk_buffer;
+
+	unsigned b, i;
+
+	int found_valid = 0;
+	struct al_cursor oldest = { 0, };
+	struct al_cursor newest = { 0, };
+
+	/* Endian convert, validate, and find oldest to newest log range.
+	 * In contrast to the 8.4 log format, this log format does NOT
+	 * use all log space, but only as many sectors as absolutely necessary.
+	 *
+	 * We need to trust the "al_nr_extents" setting in the "super block".
+	 */
+#define AL_EXTENTS_PT 61
+	/* mx = 1 + div_ceil(al_nr_extents, AL_EXTENTS_PT); */
+	mx = 1 + (cfg->md.al_nr_extents + AL_EXTENTS_PT -1) / AL_EXTENTS_PT;
+	for (b = 0; b < mx; b++) {
+		valid[b] = v07_al_disk_to_cpu(al_cpu + b, al_disk + b);
+		if (!valid[b])
+		       continue;
+		if (++found_valid == 1) {
+			oldest.i = b;
+			oldest.tr_number = al_cpu[b].tr_number;
+			newest = oldest;
+			continue;
+		}
+
+		d_expect(al_cpu[b].tr_number != oldest.tr_number);
+		d_expect(al_cpu[b].tr_number != newest.tr_number);
+		if ((int)al_cpu[b].tr_number - (int)oldest.tr_number < 0) {
+			d_expect(oldest.tr_number - al_cpu[b].tr_number + b - oldest.i == mx);
+			oldest.i = b;
+			oldest.tr_number = al_cpu[b].tr_number;
+		}
+		if ((int)al_cpu[b].tr_number - (int)newest.tr_number > 0) {
+			d_expect(al_cpu[b].tr_number - newest.tr_number == b - newest.i);
+			newest.i = b;
+			newest.tr_number = al_cpu[b].tr_number;
+		}
+	}
+
+	if (!found_valid) {
+		/* not even one transaction was valid.
+		 * Has this ever been initialized correctly? */
+		fprintf(stderr, "No usable activity log found.\n");
+		/* with up to 8.3 style activity log, this is NOT an error. */
+		return 0;
+	}
+
+	/* we do expect at most one corrupt transaction, and only in case
+	 * things went wrong during transaction write. */
+	if (found_valid != mx)
+		fprintf(stderr, "%u corrupt or uninitialized AL transactions found\n", mx - found_valid);
+
+	/* Any other paranoia checks possible with this log format? */
+
+	/* Ok, so we found valid update transactions.  Reconstruct the "active
+	 * set" at the time of the newest transaction. */
+
+	/* wrap around */
+	if (newest.i < oldest.i)
+		newest.i += mx;
+
+	for (b = oldest.i; b <= newest.i; b++) {
+		unsigned idx = b % mx;
+		if (!valid[idx])
+			continue;
+
+		/* This loop processes both "context" and "update" information.
+		 * There is only one update, on index 0,
+		 * which is why this loop counts down. */
+		for (i = AL_EXTENTS_PT; (int)i >= 0; i--) {
+			unsigned slot = al_cpu[idx].updates[i].pos;
+			if (al_cpu[idx].updates[i].extent == ~0U)
+				continue;
+			if (slot >= AL_EXTENTS_MAX) {
+				fprintf(stderr, "slot number out of range: tr:%u slot:%u\n",
+						idx, slot);
+				continue;
+			}
+			hot_extent[slot] = al_cpu[idx].updates[i].extent;
+		}
+	}
+	return found_valid;
+}
+
+/* Expects the AL to be read into on_disk_buffer already.
+ * Returns negative error code for non-interpretable data,
+ * 0 for "just mark me clean, nothing more to do",
+ * and positive if we have to apply something. */
+static int replay_al_84(struct format *cfg, uint32_t *hot_extent)
+{
+	const unsigned int mx = MD_AL_MAX_SECT_07/8;
+	struct al_4k_cpu al_cpu[mx];
+	unsigned char valid[mx];
+
+	struct al_4k_transaction_on_disk *al_disk = on_disk_buffer;
+
+	unsigned b, i;
+
+	int found_valid = 0;
+	int found_valid_updates = 0;
+	struct al_cursor oldest = { 0, };
+	struct al_cursor newest = { 0, };
+
+	/* endian convert, validate, and find oldest to newest log range */
+	for (b = 0; b < mx; b++) {
+		valid[b] = v84_al_disk_to_cpu(al_cpu + b, al_disk + b);
+		if (!valid[b])
+		       continue;
+		++found_valid;
+		if (al_cpu[b].transaction_type == AL_TR_INITIALIZED)
+			continue;
+		d_expect(al_cpu[b].transaction_type == AL_TR_UPDATE);
+		if (++found_valid_updates == 1) {
+			oldest.i = b;
+			oldest.tr_number = al_cpu[b].tr_number;
+			newest = oldest;
+			continue;
+		}
+		d_expect(al_cpu[b].tr_number != oldest.tr_number);
+		d_expect(al_cpu[b].tr_number != newest.tr_number);
+		if ((int)al_cpu[b].tr_number - (int)oldest.tr_number < 0) {
+			d_expect(oldest.tr_number - al_cpu[b].tr_number + b - oldest.i == mx);
+			oldest.i = b;
+			oldest.tr_number = al_cpu[b].tr_number;
+		}
+		if ((int)al_cpu[b].tr_number - (int)newest.tr_number > 0) {
+			d_expect(al_cpu[b].tr_number - newest.tr_number == b - newest.i);
+			newest.i = b;
+			newest.tr_number = al_cpu[b].tr_number;
+		}
+	}
+
+	if (!found_valid) {
+		/* not even one transaction was valid.
+		 * Has this ever been initialized correctly? */
+		fprintf(stderr, "No usable activity log found. Do you need to create-md?\n");
+		return -ENODATA;
+	}
+
+	/* we do expect at most one corrupt transaction, and only in case
+	 * things went wrong during transaction write. */
+	if (found_valid != mx)
+		fprintf(stderr, "%u corrupt AL transactions found\n", mx - found_valid);
+
+	if (!found_valid_updates) {
+		if (found_valid == mx)
+			/* nothing to do, all slots are valid AL_TR_INITIALIZED */
+			return 0;
+
+		/* this is only expected, in case the _first_ transaction
+		 * somehow failed. */
+		if (!valid[0] && found_valid == mx - 1)
+			return 0;
+
+		/* Hmm. Some transactions are valid.
+		 * Some are not.
+		 * This is not expected. */
+		/* FIXME how do we want to handle this? */
+		fprintf(stderr, "No valid AL update transaction found.\n");
+		return -EINVAL;
+	}
+
+	/* FIXME what do we do
+	 * about more than one corrupt transaction?
+	 * about corrupt transaction in the middle of the oldest -> newest range? */
+
+	/* Ok, so we found valid update transactions.  Reconstruct the "active
+	 * set" at the time of the newest transaction. */
+
+	/* wrap around */
+	if (newest.i < oldest.i)
+		newest.i += mx;
+
+	for (b = oldest.i; b <= newest.i; b++) {
+		unsigned idx = b % mx;
+		if (!valid[idx] || al_cpu[idx].transaction_type == AL_TR_INITIALIZED)
+			continue;
+
+		for (i = 0; i < AL_CONTEXT_PER_TRANSACTION; i++) {
+			unsigned slot = al_cpu[idx].context_start_slot_nr + i;
+			if (al_cpu[idx].context[i] == ~0U && slot >= al_cpu[idx].context_size)
+				continue;
+			if (slot >= AL_EXTENTS_MAX) {
+				fprintf(stderr, "slot number out of range: tr:%u slot:%u\n",
+						idx, slot);
+				continue;
+			}
+			hot_extent[slot] = al_cpu[idx].context[i];
+		}
+		for (i = 0; i < AL_UPDATES_PER_TRANSACTION; i++) {
+			unsigned slot = al_cpu[idx].update_slot_nr[i];
+			if (i >= al_cpu[idx].n_updates && slot == (uint16_t)(~0U))
+				continue;
+			if (slot >= AL_EXTENTS_MAX) {
+				fprintf(stderr, "update slot number out of range: tr:%u slot:%u\n",
+						idx, slot);
+				continue;
+			}
+			hot_extent[slot] = al_cpu[idx].update_extent_nr[i];
+		}
+	}
+	return found_valid_updates;
+}
+
+int cmp_u32(const void *p1, const void *p2)
+{
+	const unsigned a = *(unsigned *)p1;
+	const unsigned b = *(unsigned *)p2;
+
+	/* how best to deal with 32bit wrap? */
+	return a < b ? -1 : a == b ? 0 : 1;
+}
+
+void apply_al(struct format *cfg, uint32_t *hot_extent)
+{
+	const size_t bm_bytes = ALIGN(cfg->bm_bytes, cfg->md_hard_sect_size);
+	off_t bm_on_disk_off = cfg->bm_offset;
+	size_t bm_on_disk_pos = 0;
+	size_t chunk = 0;
+	int i, j;
+
+	/* can only be AL_EXTENTS_MAX * BM_BYTES_PER_AL_EXT * 8,
+	 * which currently is 6433 * 128 * 8 == 6587392;
+	 * fits easily into 32bit. */
+	unsigned additional_bits_set = 0;
+	uint64_t *w;
+	char ppb[10];
+
+	/* Now, actually apply this stuff to the on-disk bitmap.
+	 * Since one AL extent corresponds to 128 Byte of bitmap,
+	 * we need to do some read/modify/write cycles here.
+	 *
+	 * Note that this can be slow due to the use of O_DIRECT,
+	 * worst case it does 6433 (AL_EXTENTS_MAX) cycles of
+	 *  - read 128 kByte (buffer_size)
+	 *  - memset 128 Bytes (BM_BYTES_PER_AL_EXT) to 0xff
+	 *  - write 128 kByte
+	 * This implementation could optimized in various ways:
+	 *  - don't use direct IO; has other drawbacks
+	 *  - first scan hot_extents for extent ranges,
+	 *    and optimize the IO size.
+	 *  - use aio with multiple buffers
+	 *  - ...
+	 */
+	for (i = 0; i < AL_EXTENTS_MAX; i++) {
+		size_t bm_pos;
+		unsigned bits_set = 0;
+		if (hot_extent[i] == ~0U)
+			break;
+		bm_pos = hot_extent[i] * BM_BYTES_PER_AL_EXT;
+		if (bm_pos >= bm_bytes) {
+			fprintf(stderr, "extent %u beyond end of bitmap!\n", hot_extent[i]);
+			/* could break or return error here,
+			 * but I'll just print a warning, and skip, each of them. */
+			continue;
+		}
+
+
+		/* On first iteration, or when the current position in the bitmap
+		 * exceeds the current buffer, write out the current buffer, if any,
+		 * and read in the next (at most buffer_size) chunk of bitmap,
+		 * containing the currently processed bitmap region.
+		 */
+
+		if (i == 0 ||
+		    bm_pos + BM_BYTES_PER_AL_EXT >= bm_on_disk_pos + chunk) {
+			if (i != 0)
+				pwrite_or_die(cfg->md_fd, on_disk_buffer, chunk,
+						bm_on_disk_off + bm_on_disk_pos,
+						"apply_al");
+
+			/* don't special case logical sector size != 512,
+			 * operate in 4k always. */
+			bm_on_disk_pos = bm_pos & ~(off_t)(4095);
+			chunk = bm_bytes - bm_on_disk_pos;
+			if (chunk > buffer_size)
+				chunk = buffer_size;
+			pread_or_die(cfg->md_fd, on_disk_buffer, chunk,
+					bm_on_disk_off + bm_on_disk_pos,
+					"apply_al");
+		}
+		ASSERT(bm_pos - bm_on_disk_pos <= chunk - BM_BYTES_PER_AL_EXT);
+		ASSERT((bm_pos - bm_on_disk_pos) % sizeof(uint64_t) == 0);
+		w = (uint64_t *)on_disk_buffer
+			+ (bm_pos - bm_on_disk_pos)/sizeof(uint64_t);
+		for (j = 0; j < BM_BYTES_PER_AL_EXT/sizeof(uint64_t); j++)
+			bits_set += generic_hweight64(w[j]);
+
+		additional_bits_set += BM_BYTES_PER_AL_EXT * 8 - bits_set;
+		memset((char*)on_disk_buffer + (bm_pos - bm_on_disk_pos),
+			0xff, BM_BYTES_PER_AL_EXT);
+	}
+	/* we still need to write out the buffer of the last iteration */
+	if (i != 0) {
+		pwrite_or_die(cfg->md_fd, on_disk_buffer, chunk,
+				bm_on_disk_off + bm_on_disk_pos,
+				"apply_al");
+		fprintf(stderr, "Marked additional %s as out-of-sync based on AL.\n",
+		     ppsize(ppb, additional_bits_set * 4));
+	} else
+		fprintf(stderr, "Nothing to do.\n");
+}
+
+int need_to_apply_al(struct format *cfg)
+{
+	if (is_v08(cfg))
+		return cfg->md.flags & MDF_PRIMARY_IND;
+	else if (is_v07(cfg))
+		return cfg->md.gc[Flags] & MDF_PRIMARY_IND;
+	else
+		return 0; /* there was no activity log in 0.6, right? */
+}
+
+int v08_move_internal_md_after_resize(struct format *cfg);
+int meta_apply_al(struct format *cfg, char **argv __attribute((unused)), int argc)
+{
+	struct al_4k_transaction_on_disk *al_4k_disk = on_disk_buffer;
+	uint32_t hot_extent[AL_EXTENTS_MAX];
+	size_t al_size = MD_AL_MAX_SECT_07 * 512;
+	int need_to_update_md_flags = 0;
+	int re_initialize_anyways = 0;
+	int err;
+
+	if (argc > 0)
+		fprintf(stderr, "Ignoring additional arguments\n");
+
+	if (format_version(cfg) < Drbd_07) {
+		fprintf(stderr, "apply-al only implemented for DRBD >= 0.7\n");
+		return -1;
+	}
+
+	err = cfg->ops->open(cfg);
+	if (err == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) {
+		if (v08_move_internal_md_after_resize(cfg) == 0)
+			err = cfg->ops->open(cfg);
+	}
+	if (err != VALID_MD_FOUND) {
+		fprintf(stderr, "No valid meta data found\n");
+		return -1;
+	}
+
+	pread_or_die(cfg->md_fd, on_disk_buffer, al_size, cfg->al_offset, "apply_al");
+
+	/* init all extent numbers to -1U aka "unused" */
+	memset(hot_extent, 0xff, sizeof(hot_extent));
+
+	/* replay al */
+	if (is_v07(cfg))
+		err = replay_al_07(cfg, hot_extent);
+
+	/* FIXME
+	 * we should introduce a new meta data "super block" magic, so we won't
+	 * have the same super block with two different activity log
+	 * transaction layouts */
+	else if (DRBD_MD_MAGIC_84_UNCLEAN == cfg->md.magic ||
+		 DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[0].magic.be) ||
+		 DRBD_AL_MAGIC == be32_to_cpu(al_4k_disk[1].magic.be)) {
+		err = replay_al_84(cfg, hot_extent);
+	} else {
+		/* try the old al format anyways, this may be the first time we
+		* run after upgrading from < 8.4 to 8.4, and we need to
+		* transparently "convert" the activity log format. */
+		err = replay_al_07(cfg, hot_extent);
+		re_initialize_anyways = 1;
+	}
+
+	if (err < 0) {
+		/* ENODATA:
+		 * most likely this is an uninitialized,
+		 * or at least non-8.4-style activity log.
+		 * Cannot do anything about that.
+		 *
+		 * EINVAL:
+		 * Some valid 8.4 style INITIALIZED transactions found,
+		 * but others have been corrupt, and no single "usable"
+		 * update transaction was found.
+		 * FIXME: what to do about that?
+		 * We probably need some "FORCE" mode as well. */
+
+		if (need_to_apply_al(cfg)) {
+			/* 1, 2, 10, 20? FIXME sane exit codes! */
+			if (err == -ENODATA)
+				return 1;
+			return 2;
+		} else if (is_v08(cfg)) {
+			fprintf(stderr, "Error ignored, no need to apply the AL\n");
+			re_initialize_anyways = 1;
+		}
+	}
+
+	/* do we need to actually apply it? */
+	if (err > 0 && need_to_apply_al(cfg)) {
+		/* process hot extents in order, to reduce disk seeks. */
+		qsort(hot_extent, ARRAY_SIZE(hot_extent), sizeof(hot_extent[0]), cmp_u32);
+		apply_al(cfg, hot_extent);
+		need_to_update_md_flags = 1;
+	}
+
+	/* (Re-)initialize the activity log.
+	 * This is needed on 8.4, and does not hurt on < 8.4.
+	 * It may cause a "No usable activity log found" kernel message
+	 * if it is attached to < 8.4, but that is cosmetic.
+	 * We can skip this, if it was clean anyways (err == 0),
+	 * or if we know that this is for 0.7.
+	 */
+	if (re_initialize_anyways || err > 0 && !is_v07(cfg))
+		initialize_al(cfg);
+
+	if (is_v08(cfg) &&
+		((cfg->md.flags & MDF_AL_CLEAN) == 0 ||
+		  cfg->md.magic != DRBD_MD_MAGIC_08))
+		need_to_update_md_flags = 1;
+
+	err = 0;
+	if (need_to_update_md_flags) {
+		/* Must not touch MDF_PRIMARY_IND.
+		 * This flag is used in-kernel to determine which
+		 * "wait-for-connection-timeout" is to be used.
+		 * Maybe it is time to reconsider the concept or
+		 * current implementation of "degr-wfc-timeout".
+		 * RFC:
+		 * If we set MDF_CRASHED_PRIMARY, in case MDF_PRIMARY_IND
+		 * was set, and clear MDF_PRIMARY_IND here, we can then
+		 * USE_DEGR_WFC_T as long as MDF_CRASHED_PRIMARY is set.
+		 * Maybe that even results in better semantics.
+		 */
+		if (is_v08(cfg)) {
+			cfg->md.flags |= MDF_AL_CLEAN;
+			cfg->md.magic = DRBD_MD_MAGIC_08;
+		}
+		err = cfg->ops->md_cpu_to_disk(cfg);
+		err = cfg->ops->close(cfg) || err;
+		if (err)
+			fprintf(stderr, "update of super block flags failed\n");
+	}
+
+	return err;
+}
+
 unsigned long bm_words(uint64_t sectors, int bytes_per_bit)
 {
 	unsigned long long bits;
@@ -1213,7 +2104,7 @@
 	unsigned long words;
 	unsigned long hard_sect_size = 0;
 	int ioctl_err;
-	int open_flags = O_RDWR | O_SYNC | O_DIRECT;
+	int open_flags = O_RDWR | O_DIRECT;
 
  retry:
 	cfg->md_fd = open(cfg->md_device_name, open_flags );
@@ -1291,11 +2182,11 @@
 		 * al_offset and bm_offset anyways, so check_for_existing_data
 		 * has something to work with. */
 		re_initialize_md_offsets(cfg);
-		return -1;
+		return NO_VALID_MD_FOUND;
 	}
 
-	cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512;
-	cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512;
+	cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL;
+	cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL;
 
 	// For the case that someone modified la_sect by hand..
 	if( (cfg->md_index == DRBD_MD_INDEX_INTERNAL ||
@@ -1319,15 +2210,20 @@
 	/* FIXME paranoia verify that unused bits and words are unset... */
 	/* FIXME paranoia verify that unused bits and words are unset... */
 
-	return 0;
+	return VALID_MD_FOUND;
 }
 
 int v07_md_disk_to_cpu(struct format *cfg)
 {
+	struct md_cpu md;
+	int ok;
 	PREAD(cfg->md_fd, on_disk_buffer,
 		sizeof(struct md_on_disk_07), cfg->md_offset);
-	md_disk_07_to_cpu(&cfg->md, (struct md_on_disk_07*)on_disk_buffer);
-	return !is_valid_md(Drbd_07,&cfg->md, cfg->md_index, cfg->bd_size);
+	md_disk_07_to_cpu(&md, (struct md_on_disk_07*)on_disk_buffer);
+	ok = is_valid_md(Drbd_07, &md, cfg->md_index, cfg->bd_size);
+	if (ok)
+		cfg->md = md;
+	return ok ? 0 : -1;
 }
 
 int v07_md_cpu_to_disk(struct format *cfg)
@@ -1402,12 +2298,114 @@
  begin of v08 {{{
  ******************************************/
 
+/* if this returns with something != 0 in cfg->lk_bd.bd_size,
+ * caller knows he must move the meta data to actually find it. */
+void v08_check_for_resize(struct format *cfg)
+{
+	struct md_cpu md_08;
+	off_t flex_offset;
+	int found = 0;
+
+	/* you should not call me if you already found something. */
+	ASSERT(cfg->md.magic == 0);
+
+	/* check for resized lower level device ... only check for drbd 8 */
+	if (!is_v08(cfg))
+		return;
+	if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT)
+		return;
+
+	/* Do we know anything? Maybe it never was stored. */
+	if (lk_bdev_load(cfg->minor, &cfg->lk_bd)) {
+		if (verbose)
+			fprintf(stderr, "no last-known offset information available.\n");
+		return;
+	}
+
+	if (verbose) {
+		fprintf(stderr, " last known info: %llu %s\n",
+			(unsigned long long)cfg->lk_bd.bd_size,
+			cfg->lk_bd.bd_name ?: "-unknown device name-");
+		if (cfg->lk_bd.bd_uuid)
+			fprintf(stderr, " last known uuid: "X64(016)"\n",
+				cfg->lk_bd.bd_uuid);
+	}
+
+	/* I just checked that offset, nothing to see there. */
+	if (cfg->lk_bd.bd_size == cfg->bd_size)
+		return;
+
+	flex_offset = v07_style_md_get_byte_offset(
+		DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size);
+
+	/* actually check that offset, if it is accessible. */
+	/* If someone shrunk that device, I won't be able to read it! */
+	if (flex_offset < cfg->bd_size) {
+		PREAD(cfg->md_fd, on_disk_buffer, 4096, flex_offset);
+		md_disk_08_to_cpu(&md_08, (struct md_on_disk_08*)on_disk_buffer);
+		found = is_valid_md(Drbd_08, &md_08, DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size);
+	}
+
+	if (verbose) {
+		fprintf(stderr, "While checking for internal meta data for drbd%u on %s,\n"
+				"it appears that it may have been relocated.\n"
+				"It used to be ", cfg->minor, cfg->md_device_name);
+		if (cfg->lk_bd.bd_name &&
+			strcmp(cfg->lk_bd.bd_name, cfg->md_device_name)) {
+			fprintf(stderr, "on %s ", cfg->lk_bd.bd_name);
+		}
+		fprintf(stderr, "at byte offset %llu", (unsigned long long)flex_offset);
+
+		if (!found) {
+			fprintf(stderr, ", but I cannot find it now.\n");
+			if (flex_offset >= cfg->bd_size)
+				fprintf(stderr, "Device is too small now!\n");
+		} else
+			fprintf(stderr, ", and seems to still be valid.\n");
+	}
+
+	if (found) {
+		if (cfg->lk_bd.bd_uuid && md_08.device_uuid != cfg->lk_bd.bd_uuid) {
+			fprintf(stderr, "Last known and found uuid differ!?\n"
+					X64(016)" != "X64(016)"\n",
+					cfg->lk_bd.bd_uuid, cfg->md.device_uuid);
+			if (!force) {
+				found = 0;
+				fprintf(stderr, "You may --force me to ignore that.\n");
+			} else
+				fprintf(stderr, "You --force'ed me to ignore that.\n");
+		}
+	}
+	if (found)
+		cfg->md = md_08;
+	return;
+}
+
+int v08_md_open(struct format *cfg) {
+	int r = v07_style_md_open(cfg);
+	if (r == VALID_MD_FOUND)
+		return r;
+
+	v08_check_for_resize(cfg);
+	if (!cfg->lk_bd.bd_size || !cfg->md.magic)
+		return NO_VALID_MD_FOUND;
+	else
+		return VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION;
+}
+
 int v08_md_disk_to_cpu(struct format *cfg)
 {
+	struct md_cpu md;
+	int ok;
 	PREAD(cfg->md_fd, on_disk_buffer,
 		sizeof(struct md_on_disk_08), cfg->md_offset);
-	md_disk_08_to_cpu(&cfg->md, (struct md_on_disk_08*)on_disk_buffer);
-	return !is_valid_md(Drbd_08, &cfg->md, cfg->md_index, cfg->bd_size);
+	md_disk_08_to_cpu(&md, (struct md_on_disk_08*)on_disk_buffer);
+	ok = is_valid_md(Drbd_08, &md, cfg->md_index, cfg->bd_size);
+	if (ok)
+		cfg->md = md;
+	if (verbose >= 3 + !!ok && verbose <= 10)
+		fprintf_hex(stderr, cfg->md_offset, on_disk_buffer, 4096);
+	return ok ? 0 : -1;
 }
 
 int v08_md_cpu_to_disk(struct format *cfg)
@@ -1417,6 +2415,7 @@
 	md_cpu_to_disk_08((struct md_on_disk_08 *)on_disk_buffer, &cfg->md);
 	PWRITE(cfg->md_fd, on_disk_buffer,
 		sizeof(struct md_on_disk_08), cfg->md_offset);
+	cfg->update_lk_bdev = 1;
 	return 0;
 }
 
@@ -1443,6 +2442,23 @@
 	return _v08_md_initialize(cfg, 1);
 }
 
+int v08_md_close(struct format *cfg)
+{
+	/* update last known info, if we changed anything,
+	 * or if explicitly requested. */
+	if (cfg->update_lk_bdev && !dry_run) {
+		if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT)
+			lk_bdev_delete(cfg->minor);
+		else {
+			cfg->lk_bd.bd_size = cfg->bd_size;
+			cfg->lk_bd.bd_uuid = cfg->md.device_uuid;
+			cfg->lk_bd.bd_name = cfg->md_device_name;
+			lk_bdev_save(cfg->minor, &cfg->lk_bd);
+		}
+	}
+	return generic_md_close(cfg);
+}
+
 /******************************************
   }}} end of v08
  ******************************************/
@@ -1474,8 +2490,11 @@
 	cfg->ops->show_gi(&cfg->md);
 
 	if (cfg->md.la_sect) {
-		printf("last agreed size: %s\n",
-		       ppsize(ppb, cfg->md.la_sect >> 1));
+		printf("last agreed size: %s (%llu sectors)\n",
+		       ppsize(ppb, cfg->md.la_sect >> 1),
+		       (unsigned long long)cfg->md.la_sect);
+		printf("last agreed max bio size: %u Byte\n",
+			       cfg->md.la_peer_max_bio_size);
 #if 0
 		/* FIXME implement count_bits() */
 		printf("%u bits set in the bitmap [ %s out of sync ]\n",
@@ -1576,15 +2595,46 @@
 		fprintf(stderr, "Ignoring additional arguments\n");
 	}
 
-	if (cfg->ops->open(cfg))
+	i = cfg->ops->open(cfg);
+	if (i == NO_VALID_MD_FOUND) {
+		fprintf(stderr, "No valid meta data found\n");
 		return -1;
+	}
 
 	print_dump_header();
 	printf("version \"%s\";\n\n", cfg->ops->name);
 	printf("# md_size_sect %llu\n", (long long unsigned)cfg->md.md_size_sect);
+
+	if (i == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) {
+		printf("#\n"
+		"### Device seems to have been resized!\n"
+		"### dumping meta data from the last known position\n"
+		"### current size of %s: %llu byte\n"
+		"### expected position of meta data:\n",
+		cfg->md_device_name, (unsigned long long)cfg->bd_size);
+
+		printf("## md_offset %llu\n", (long long unsigned)cfg->md_offset);
+		printf("## al_offset %llu\n", (long long unsigned)cfg->al_offset);
+		printf("## bm_offset %llu\n", (long long unsigned)cfg->bm_offset);
+
+		printf(
+		"### last known size of %s: %llu byte\n"
+		"### adjusted position of meta data:\n",
+		cfg->lk_bd.bd_name ?: "-?-",
+		(unsigned long long)cfg->lk_bd.bd_size);
+
+		cfg->md_offset = v07_style_md_get_byte_offset(
+			DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size);
+
+		cfg->al_offset = cfg->md_offset + cfg->md.al_offset * 512LL;
+		cfg->bm_offset = cfg->md_offset + cfg->md.bm_offset * 512LL;
+		cfg->bm_bytes = sizeof(long) *
+			bm_words(cfg->md.la_sect, cfg->md.bm_bytes_per_bit);
+	}
 	printf("# md_offset %llu\n", (long long unsigned)cfg->md_offset);
 	printf("# al_offset %llu\n", (long long unsigned)cfg->al_offset);
-	printf("# bm_offset %llu\n\n", (long long unsigned)cfg->bm_offset);
+	printf("# bm_offset %llu\n", (long long unsigned)cfg->bm_offset);
+	printf("\n");
 
 	if (format_version(cfg) < Drbd_08) {
 		printf("gc {\n   ");
@@ -1603,21 +2653,24 @@
 	}
 
 	if (format_version(cfg) >= Drbd_07) {
+		printf("# al-extents %u;\n", cfg->md.al_nr_extents);
 		printf("la-size-sect "U64";\n", cfg->md.la_sect);
 		if (format_version(cfg) >= Drbd_08) {
 			printf("bm-byte-per-bit "U32";\n",
 			       cfg->md.bm_bytes_per_bit);
 			printf("device-uuid 0x"X64(016)";\n",
 			       cfg->md.device_uuid);
+			printf("la-peer-max-bio-size %d;\n",
+			       cfg->md.la_peer_max_bio_size);
 		}
 		printf("# bm-bytes %u;\n", cfg->bm_bytes);
-		printf_bm(cfg);
+		printf_bm(cfg); /* pretty prints the whole bitmap */
 		printf("# bits-set %u;\n", cfg->bits_set);
-	}
 
-	/* MAYBE dump activity log?
-	 * but that probably does not make any sense,
-	 * beyond debugging. */
+		/* This is half assed, still. Hide it. */
+		if (verbose >= 10)
+			printf_al(cfg);
+	}
 
 	return cfg->ops->close(cfg);
 }
@@ -1769,10 +2822,23 @@
 		cfg->md.bm_bytes_per_bit = yylval.u64;
 		EXP(TK_DEVICE_UUID); EXP(TK_U64); EXP(';');
 		cfg->md.device_uuid = yylval.u64;
+		int tok = yylex();
+		switch(tok) {
+		case TK_LA_BIO_SIZE:
+			EXP(TK_NUM); EXP(';');
+			cfg->md.la_peer_max_bio_size = yylval.u64;
+			break;
+		case TK_BM:
+			goto start_of_bm;
+		default:
+			md_parse_error(TK_BM, 0, "keyword 'bm' or 'la-peer-max-bio-size'");
+		}
 	} else {
 		cfg->md.bm_bytes_per_bit = 4096;
 	}
-	EXP(TK_BM); EXP('{');
+	EXP(TK_BM);
+start_of_bm:
+	EXP('{');
 	bm = (le_u64 *)on_disk_buffer;
 	i = 0;
 	bm_on_disk_off = cfg->bm_offset;
@@ -2413,15 +3479,20 @@
 			}
 		}
 	}
+	if (have_fixed_v07)
+		cfg->wipe_fixed = fixed_offset;
+	if (have_flex_v08 || have_flex_v07)
+		cfg->wipe_flex = flex_offset;
+}
 
- /* wipe: */
+void wipe_after_convert(struct format *cfg)
+{
 	memset(on_disk_buffer, 0x00, 4096);
-	if (have_fixed_v07) {
-		pwrite_or_die(cfg->md_fd, on_disk_buffer, 4096, fixed_offset,
+	if (cfg->wipe_fixed)
+		pwrite_or_die(cfg->md_fd, on_disk_buffer, 4096, cfg->wipe_fixed,
 			"wipe fixed-size v07 internal md");
-	}
-	if (have_flex_v08 || have_flex_v07)
-		pwrite_or_die(cfg->md_fd, on_disk_buffer, 4096, flex_offset,
+	if (cfg->wipe_flex)
+		pwrite_or_die(cfg->md_fd, on_disk_buffer, 4096, cfg->wipe_flex,
 			"wipe flexible-size internal md");
 }
 
@@ -2473,6 +3544,92 @@
 	}
 }
 
+/* ok, so there is no valid meta data at the end of the device,
+ * but there is valid internal meta data at the "last known"
+ * position.  Move the stuff.
+ * Areas may overlap:
+ * |--...~//~[BITMAP][AL][SB]|     <<- last known
+ * |--.......~//~[BITMAP][AL][SB]| <<- what it should look like now
+ * So we move it in chunks.
+ */
+int v08_move_internal_md_after_resize(struct format *cfg)
+{
+	off_t old_offset;
+	off_t old_bm_offset;
+	off_t cur_offset;
+	off_t last_chunk_size;
+	int err;
+
+	ASSERT(is_v08(cfg));
+	ASSERT(cfg->md_index == DRBD_MD_INDEX_FLEX_INT);
+	ASSERT(cfg->lk_bd.bd_size <= cfg->bd_size);
+
+	/* we just read it in v08_check_for_resize().
+	 * no need to do it again, but ASSERT this. */
+	old_offset = v07_style_md_get_byte_offset(DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size);
+	/*
+	PREAD(cfg->md_fd, on_disk_buffer, 4096, old_offset);
+	md_disk_08_to_cpu(&md_08, (struct md_on_disk_08*)on_disk_buffer);
+	*/
+	ASSERT(is_valid_md(Drbd_08, &cfg->md, DRBD_MD_INDEX_FLEX_INT, cfg->lk_bd.bd_size));
+
+	fprintf(stderr, "Moving the internal meta data to its proper location\n");
+
+	/* FIXME
+	 * If the new meta data area overlaps the old "super block",
+	 * and we crash before we successfully wrote the new super block,
+	 * but after we overwrote the old, we are out of luck!
+	 * But I don't want to write the new superblock early, either.
+	 */
+
+	/* move activity log, fixed size immediately preceeding the "super block". */
+	cur_offset = old_offset + cfg->md.al_offset * 512LL;
+	PREAD(cfg->md_fd, on_disk_buffer, old_offset - cur_offset, cur_offset);
+	PWRITE(cfg->md_fd, on_disk_buffer, old_offset - cur_offset, cfg->al_offset);
+
+	/* The AL was of fixed size.
+	 * Bitmap is of flexible size, new bitmap is likely larger.
+	 * We do not initialize that part, we just leave "garbage" in there.
+	 * Once DRBD "agrees" on the new lower level device size, that part of
+	 * the bitmap will be handled by the module, anyways. */
+	old_bm_offset = old_offset + cfg->md.bm_offset * 512LL;
+
+	/* move bitmap, in chunks, peel off from the end. */
+	cur_offset = old_offset + cfg->md.al_offset * 512LL - buffer_size;
+	while (cur_offset > old_bm_offset) {
+		PREAD(cfg->md_fd, on_disk_buffer, buffer_size, cur_offset);
+		PWRITE(cfg->md_fd, on_disk_buffer, buffer_size,
+				cfg->bm_offset + (cur_offset - old_bm_offset));
+		cur_offset -= buffer_size;
+	}
+
+	/* Adjust for last, possibly partial buffer. */
+	last_chunk_size = buffer_size - (old_bm_offset - cur_offset);
+	PREAD(cfg->md_fd, on_disk_buffer, last_chunk_size, old_bm_offset);
+	PWRITE(cfg->md_fd, on_disk_buffer, last_chunk_size, cfg->bm_offset);
+
+	/* fix bitmap offset in meta data,
+	 * and rewrite the "super block" */
+	re_initialize_md_offsets(cfg);
+
+	err = cfg->ops->md_cpu_to_disk(cfg);
+
+	if (!err)
+		printf("Internal drbd meta data successfully moved.\n");
+
+	if (!err && old_offset < cfg->bm_offset) {
+		/* wipe out previous meta data block, it has been superseeded. */
+		memset(on_disk_buffer, 0, 4096);
+		PWRITE(cfg->md_fd, on_disk_buffer, 4096, old_offset);
+	}
+
+	err = cfg->ops->close(cfg) || err;
+	if (err)
+		fprintf(stderr, "operation failed\n");
+
+	return err;
+}
+
 int meta_create_md(struct format *cfg, char **argv __attribute((unused)), int argc)
 {
 	int err = 0;
@@ -2481,14 +3638,24 @@
 		fprintf(stderr, "Ignoring additional arguments\n");
 	}
 
-	/* if cfg->md is not valid, reset it */
-	if (cfg->ops->open(cfg))
-		memset(&cfg->md, 0, sizeof(cfg->md));
+	err = cfg->ops->open(cfg);
 
 	/* Maybe we want to use some library that provides detection of
 	 * fs/partition/usage types? */
 	check_for_existing_data(cfg);
 
+	/* Suggest to move existing meta data after offline resize.  Though, if
+	 * you --force create-md, you probably mean it, so we don't even ask.
+	 * If you want to automatically move it, use check-resize.
+	 */
+	if (err == VALID_MD_FOUND_AT_LAST_KNOWN_LOCATION) {
+		if (!force &&
+		    confirmed("Move internal meta data from last-known position?\n"))
+			return v08_move_internal_md_after_resize(cfg);
+		/* else: reset cfg->md, it needs to be re-initialized below */
+		memset(&cfg->md, 0, sizeof(cfg->md));
+	}
+
 	/* the offset of v07 fixed-size internal meta data is different from
 	 * the offset of the flexible-size v07 ("plus") and v08 (default)
 	 * internal meta data.
@@ -2513,6 +3680,9 @@
 	printf("Writing meta data...\n");
 	if (!cfg->md.magic) /* not converted: initialize */
 		err = cfg->ops->md_initialize(cfg); /* Clears on disk AL implicitly */
+	else
+		err = 0; /* we have sucessfully converted somthing */
+
 	/* FIXME
 	 * if this converted fixed-size 128MB internal meta data
 	 * to flexible size, we'd need to move the AL and bitmap
@@ -2522,6 +3692,8 @@
 	 * and bitmap should be empty anyways.
 	 */
 	err = err || cfg->ops->md_cpu_to_disk(cfg); // <- short circuit
+	if (!err)
+		wipe_after_convert(cfg);
 	err = cfg->ops->close(cfg)          || err; // <- close always
 	if (err)
 		fprintf(stderr, "operation failed\n");
@@ -2559,6 +3731,9 @@
 	else
 		printf("DRBD meta data block successfully wiped out.\n");
 
+	/* delete last-known bdev info, it is of no use now. */
+	lk_bdev_delete(cfg->minor);
+
 	return err;
 }
 
@@ -2667,7 +3842,7 @@
 	}
 
 	printf("\nCOMMANDS:\n");
-	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
 		if (!cmds[i].show_in_usage)
 			continue;
 		printf("  %s %s\n", cmds[i].name,
@@ -2737,6 +3912,44 @@
 	return rv;
 }
 
+int meta_chk_offline_resize(struct format *cfg, char **argv, int argc)
+{
+	int err;
+
+	err = cfg->ops->open(cfg);
+
+	/* this is first, so that lk-bdev-info files are removed/updated
+	 * if we find valid meta data in the expected place. */
+	if (err == VALID_MD_FOUND) {
+		/* Do not clutter the output of the init script
+		printf("Found valid meta data in the expected location, %llu bytes into %s.\n",
+		       (unsigned long long)cfg->md_offset, cfg->md_device_name);
+		*/
+		/* create, delete or update the last known info */
+		err = lk_bdev_load(cfg->minor, &cfg->lk_bd);
+		if (cfg->md_index != DRBD_MD_INDEX_FLEX_INT)
+			lk_bdev_delete(cfg->minor);
+		else if (cfg->lk_bd.bd_size != cfg->bd_size ||
+			 cfg->lk_bd.bd_uuid != cfg->md.device_uuid)
+			cfg->update_lk_bdev = 1;
+		return cfg->ops->close(cfg);
+	} else if (err == NO_VALID_MD_FOUND) {
+		if (!is_v08(cfg) || cfg->md_index != DRBD_MD_INDEX_FLEX_INT) {
+			fprintf(stderr, "Operation only supported for v8 internal meta data\n");
+			return -1;
+		}
+		fprintf(stderr, "no suitable meta data found :(\n");
+		return -1; /* sorry :( */
+	}
+
+	ASSERT(is_v08(cfg));
+	ASSERT(cfg->md_index == DRBD_MD_INDEX_FLEX_INT);
+	ASSERT(cfg->lk_bd.bd_size);
+	ASSERT(cfg->md.magic);
+
+	return v08_move_internal_md_after_resize(cfg);
+}
+
 /* CALL ONLY ONCE as long as on_disk_buffer is global! */
 struct format *new_cfg()
 {
@@ -2808,7 +4021,7 @@
 
 	/* Check for options (e.g. --force) */
 	while (1) {
-	    int c = getopt_long(argc,argv,make_optstring(metaopt,0),metaopt,0);
+	    int c = getopt_long(argc, argv, make_optstring(metaopt), metaopt, 0);
 
 	    if (c == -1)
 		break;
@@ -2844,7 +4057,7 @@
 		exit(20);
 	}
 
-	for (i = 0; i < ARRY_SIZE(cmds); i++) {
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
 		if (!strcmp(cmds[i].name, argv[ai])) {
 			command = cmds + i;
 			break;
@@ -2860,10 +4073,17 @@
 	 * unlock happens implicitly when the process dies,
 	 * but may be requested implicitly
 	 */
-	cfg->lock_fd = dt_lock_drbd(cfg->drbd_dev_name);
+	cfg->minor = dt_minor_of_dev(cfg->drbd_dev_name);
+	if (cfg->minor < 0) {
+		fprintf(stderr, "Cannot determine minor device number of "
+				"drbd device '%s'",
+			cfg->drbd_dev_name);
+		exit(20);
+	}
+	cfg->lock_fd = dt_lock_drbd(cfg->minor);
 
 	/* unconditionally check whether this is in use */
-	if (is_attached(dt_minor_of_dev(cfg->drbd_dev_name))) {
+	if (is_attached(cfg->minor)) {
 		if (!(force && (command->function == meta_dump_md))) {
 			fprintf(stderr, "Device '%s' is configured!\n",
 				cfg->drbd_dev_name);
diff -Nru drbd8-8.3.7/user/drbdmeta_parser.h drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta_parser.h
--- drbd8-8.3.7/user/drbdmeta_parser.h	2009-09-14 14:04:52.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta_parser.h	2012-02-02 14:09:14.000000000 +0000
@@ -26,6 +26,7 @@
 	TK_FLAGS,
 	TK_INVALID,
 	TK_INVALID_CHAR,
+	TK_LA_BIO_SIZE,
 };
 
 /* avoid compiler warnings about implicit declaration */
diff -Nru drbd8-8.3.7/user/drbdmeta_scanner.fl drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta_scanner.fl
--- drbd8-8.3.7/user/drbdmeta_scanner.fl	2009-06-10 11:36:37.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdmeta_scanner.fl	2012-02-02 14:09:14.000000000 +0000
@@ -2,8 +2,8 @@
 
 #include "drbd_endian.h"
 #include "drbdmeta_parser.h"
+#include "drbdtool_common.h"
 
-static void unescape(void);
 static void bad_token(char*);
 
 //#define DP printf("%s ",yytext);
@@ -46,7 +46,7 @@
 {WS}		/* skip silently */
 {COMMENT}	/* skip silently */
 {OP}		DP; return yytext[0];
-{STRING}	unescape(); DP; CP; return TK_STRING;
+{STRING}	unescape(yytext); DP; CP; return TK_STRING;
 {U64}		yylval.u64 = strto_u64(yytext, NULL, 16); DP; return TK_U64;
 {U32}		yylval.u64 = strto_u64(yytext, NULL, 16); DP; return TK_U32;
 {NUM}		yylval.u64 = strto_u64(yytext, NULL, 10); DP; return TK_NUM;
@@ -59,6 +59,7 @@
 device-uuid	DP; CP; return TK_DEVICE_UUID;
 times		DP; CP; return TK_TIMES;
 flags		DP; CP; return TK_FLAGS;
+la-peer-max-bio-size DP; CP; return TK_LA_BIO_SIZE;
 
 {INVALID_STRING} CP; bad_token("invalid string"); return TK_INVALID;
 {EMPTY_STRING}	 CP; bad_token("invalid string"); return TK_INVALID;
@@ -74,20 +75,3 @@
 	fflush(stdout);
 	fprintf(stderr,"line %u: %s: %s ...\n", yylineno, msg, yytext);
 }
-
-static void unescape(void)
-{
-  /* backslash escapes from string */
-  char *ue, *e;
-  e = ue = yytext;
-  for (;;) {
-    if (*ue == '"')
-      ue++;
-    if (*ue == '\\')
-      ue++;
-    if (!*ue)
-      break;
-    *e++ = *ue++;
-  }
-  *e = '\0';
-}
diff -Nru drbd8-8.3.7/user/drbdsetup.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdsetup.c
--- drbd8-8.3.7/user/drbdsetup.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdsetup.c	2012-02-02 14:09:14.000000000 +0000
@@ -1,253 +1,216 @@
 /*
-   drbdsetup.c
-
-   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
-
-   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
-   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
-   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
-
-   drbd is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   drbd is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with drbd; see the file COPYING.  If not, write to
-   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
-
+ * DRBD setup via genetlink
+ *
+ * This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+ *
+ * Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+ * Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ * Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+ *
+ * drbd is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * drbd is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with drbd; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
 
+#include <stdbool.h>
 #include <errno.h>
 #include <unistd.h>
-#include <dirent.h>
-#include <mntent.h>
 #include <fcntl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
-#include <sys/poll.h>
+#include <sys/time.h>
+#include <poll.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 #include <netdb.h>
-#include <unistd.h>
 #include <stdio.h>
 #include <string.h>
 #include <getopt.h>
 #include <stdlib.h>
-#include <sys/time.h>
 #include <time.h>
+#include <signal.h>
+#include <assert.h>
+#include <libgen.h>
 
-#define __bitwise /* Build-workaround for broken RHEL4 kernels (2.6.9_78.0.1) */
-#include <linux/types.h>
 #include <linux/netlink.h>
-#include <linux/connector.h>
+#include <linux/genetlink.h>
 
-#include <linux/drbd.h>
-#include <linux/drbd_tag_magic.h>
-#include <linux/drbd_limits.h>
+#define EXIT_NOMEM 20
+#define EXIT_NO_FAMILY 20
+#define EXIT_SEND_ERR 20
+#define EXIT_RECV_ERR 20
+#define EXIT_TIMED_OUT 20
+#define EXIT_NOSOCK 30
+#define EXIT_THINKO 42
 
-#include "unaligned.h"
-#include "drbdtool_common.h"
+/*
+ * We are not using libnl,
+ * using its API for the few things we want to do
+ * ends up being almost as much lines of code as
+ * coding the necessary bits right here.
+ */
 
-#ifndef __CONNECTOR_H
-#error "You need to set KDIR while building drbdsetup."
-#endif
+#include "libgenl.h"
+#include "drbd_nla.h"
+#include <linux/drbd_config.h>
+#include <linux/drbd_genl_api.h>
+#include <linux/drbd_limits.h>
+#include <linux/genl_magic_func.h>
+#include "drbdtool_common.h"
+#include "registry.h"
+#include "config.h"
+#include "config_flags.h"
+#include "wrap_printf.h"
+
+char *progname;
+
+/* for parsing of messages */
+static struct nlattr *global_attrs[128];
+/* there is an other table, nested_attr_tb, defined in genl_magic_func.h,
+ * which can be used after <struct>_from_attrs,
+ * to check for presence of struct fields. */
+#define ntb(t)	nested_attr_tb[__nla_type(t)]
+
+#ifdef PRINT_NLMSG_LEN
+/* I'm to lazy to check the maximum possible nlmsg length by hand */
+int main(void)
+{
+	static __u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+		[NLA_U8]        = sizeof(__u8),
+		[NLA_U16]       = sizeof(__u16),
+		[NLA_U32]       = sizeof(__u32),
+		[NLA_U64]       = sizeof(__u64),
+		[NLA_NESTED]    = NLA_HDRLEN,
+	};
+	int i;
+	int sum_total = 0;
+#define LEN__(policy) do {					\
+	int sum = 0;						\
+	for (i = 0; i < ARRAY_SIZE(policy); i++) {		\
+		sum += nla_total_size(policy[i].len ?:		\
+			nla_attr_minlen[policy[i].type]);	\
+								\
+	}							\
+	sum += 4;						\
+	sum_total += sum;					\
+	printf("%-30s %4u [%4u]\n",				\
+			#policy ":", sum, sum_total);		\
+} while (0)
+#define LEN_(p) LEN__(p ## _nl_policy)
+	LEN_(disk_conf);
+	LEN_(syncer_conf);
+	LEN_(net_conf);
+	LEN_(set_role_parms);
+	LEN_(resize_parms);
+	LEN_(state_info);
+	LEN_(start_ov_parms);
+	LEN_(new_c_uuid_parms);
+	sum_total += sizeof(struct nlmsghdr) + sizeof(struct genlmsghdr)
+		+ sizeof(struct drbd_genlmsghdr);
+	printf("sum total inclusive hdr overhead: %4u\n", sum_total);
+	return 0;
+}
+#else
 
 #ifndef AF_INET_SDP
 #define AF_INET_SDP 27
 #define PF_INET_SDP AF_INET_SDP
 #endif
 
+/* pretty print helpers */
+static int indent = 0;
+#define INDENT_WIDTH	4
+#define printI(fmt, args... ) printf("%*s" fmt,INDENT_WIDTH * indent,"" , ## args )
+
 enum usage_type {
 	BRIEF,
 	FULL,
 	XML,
 };
 
-struct drbd_tag_list {
-	struct nlmsghdr *nl_header;
-	struct cn_msg   *cn_header;
-	struct drbd_nl_cfg_req* drbd_p_header;
-	unsigned short *tag_list_start;
-	unsigned short *tag_list_cpos;
-	int    tag_size;
-};
-
 struct drbd_argument {
 	const char* name;
-	const enum drbd_tags tag;
+	__u16 nla_type;
 	int (*convert_function)(struct drbd_argument *,
-				struct drbd_tag_list *,
+				struct msg_buff *,
+				struct drbd_genlmsghdr *dhdr,
 				char *);
 };
 
-struct drbd_option {
-	const char* name;
-	const char short_name;
-	const enum drbd_tags tag;
-	int (*convert_function)(struct drbd_option *,
-				struct drbd_tag_list *,
-				char *);
-	void (*show_function)(struct drbd_option *,unsigned short*);
-	int (*usage_function)(struct drbd_option *, char*, int);
-	void (*xml_function)(struct drbd_option *);
-	union {
-		struct {
-			const long long min;
-			const long long max;
-			const long long def;
-			const unsigned char unit_prefix;
-			const char* unit;
-		} numeric_param; // for conv_numeric
-		struct {
-			const char** handler_names;
-			const int number_of_handlers;
-			const int def;
-		} handler_param; // conv_handler
-	};
+/* Configuration requests typically need a context to operate on.
+ * Possible keys are device minor/volume id (both fit in the drbd_genlmsghdr),
+ * the replication link (aka connection) name,
+ * and/or the replication group (aka resource) name */
+enum cfg_ctx_key {
+	/* Only one of these can be present in a command: */
+	CTX_MINOR = 1,
+	CTX_RESOURCE = 2,
+	CTX_ALL = 4,
+	CTX_CONNECTION = 8,
+
+	CTX_RESOURCE_AND_CONNECTION = 16,
 };
 
 struct drbd_cmd {
 	const char* cmd;
-	const int packet_id;
-	int (*function)(struct drbd_cmd *, unsigned, int, char **);
-	void (*usage)(struct drbd_cmd *, enum usage_type);
-	union {
-		struct {
-			struct drbd_argument *args;
-			struct drbd_option *options;
-		} cp; // for generic_config_cmd, config_usage
-		struct {
-			int (*show_function)(struct drbd_cmd *, unsigned,
-					     unsigned short* );
-		} gp; // for generic_get_cmd, get_usage
-		struct {
-			struct option *options;
-			int (*proc_event)(unsigned int, int,
-					  struct drbd_nl_cfg_reply *);
-		} ep; // for events_cmd, events_usage
-	};
+	const enum cfg_ctx_key ctx_key;
+	const int cmd_id;
+	const int tla_id; /* top level attribute id */
+	int (*function)(struct drbd_cmd *, int, char **);
+	struct drbd_argument *drbd_args;
+	int (*show_function)(struct drbd_cmd*, struct genl_info *);
+	struct option *options;
+	bool missing_ok;
+	bool continuous_poll;
+	bool wait_for_connect_timeouts;
+	bool set_defaults;
+	struct context_def *ctx;
 };
 
-
-// Connector functions
-#define NL_TIME (COMM_TIMEOUT*1000)
-static int open_cn();
-static int send_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size);
-static int receive_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size, int timeout_ms);
-static int call_drbd(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
-		     int size, int timeout_ms);
-static void close_cn(int sk_nl);
-
 // other functions
 static int get_af_ssocks(int warn);
-static void print_command_usage(int i, const char *addinfo, enum usage_type);
+static void print_command_usage(struct drbd_cmd *cm, enum usage_type);
 
 // command functions
-static int generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
-static int down_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
-static int generic_get_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
-static int events_cmd(struct drbd_cmd *cm, unsigned minor, int argc,char **argv);
-
-// usage functions
-static void config_usage(struct drbd_cmd *cm, enum usage_type);
-static void get_usage(struct drbd_cmd *cm, enum usage_type);
-static void events_usage(struct drbd_cmd *cm, enum usage_type);
-
-// sub usage functions for config_usage
-static int numeric_opt_usage(struct drbd_option *option, char* str, int strlen);
-static int handler_opt_usage(struct drbd_option *option, char* str, int strlen);
-static int bit_opt_usage(struct drbd_option *option, char* str, int strlen);
-static int string_opt_usage(struct drbd_option *option, char* str, int strlen);
-
-// sub usage function for config_usage as xml
-static void numeric_opt_xml(struct drbd_option *option);
-static void handler_opt_xml(struct drbd_option *option);
-static void bit_opt_xml(struct drbd_option *option);
-static void string_opt_xml(struct drbd_option *option);
+static int generic_config_cmd(struct drbd_cmd *cm, int argc, char **argv);
+static int down_cmd(struct drbd_cmd *cm, int argc, char **argv);
+static int generic_get_cmd(struct drbd_cmd *cm, int argc, char **argv);
+static int del_minor_cmd(struct drbd_cmd *cm, int argc, char **argv);
+static int del_resource_cmd(struct drbd_cmd *cm, int argc, char **argv);
 
 // sub commands for generic_get_cmd
-static int show_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int role_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int status_xml_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int sh_status_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int cstate_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int dstate_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
-static int uuids_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int show_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int role_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int sh_status_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int cstate_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int dstate_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int uuids_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int lk_bdev_scmd(struct drbd_cmd *cm, struct genl_info *info);
+static int print_broadcast_events(struct drbd_cmd *, struct genl_info *);
+static int w_connected_state(struct drbd_cmd *, struct genl_info *);
+static int w_synced_state(struct drbd_cmd *, struct genl_info *);
 
 // convert functions for arguments
-static int conv_block_dev(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
-static int conv_md_idx(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
-static int conv_address(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
-static int conv_protocol(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
-
-// convert functions for options
-static int conv_numeric(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
-static int conv_sndbuf(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
-static int conv_handler(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
-static int conv_bit(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
-static int conv_string(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
-
-// show functions for options (used by show_scmd)
-static void show_numeric(struct drbd_option *od, unsigned short* tp);
-static void show_handler(struct drbd_option *od, unsigned short* tp);
-static void show_bit(struct drbd_option *od, unsigned short* tp);
-static void show_string(struct drbd_option *od, unsigned short* tp);
-
-// sub functions for events_cmd
-static int print_broadcast_events(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
-static int w_connected_state(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
-static int w_synced_state(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
-
-const char *on_error[] = {
-	[EP_PASS_ON]         = "pass_on",
-	[EP_CALL_HELPER]  = "call-local-io-error",
-	[EP_DETACH]         = "detach",
-};
-
-const char *fencing_n[] = {
-	[FP_DONT_CARE] = "dont-care",
-	[FP_RESOURCE] = "resource-only",
-	[FP_STONITH]  = "resource-and-stonith",
-};
-
-const char *asb0p_n[] = {
-        [ASB_DISCONNECT]        = "disconnect",
-	[ASB_DISCARD_YOUNGER_PRI] = "discard-younger-primary",
-	[ASB_DISCARD_OLDER_PRI]   = "discard-older-primary",
-	[ASB_DISCARD_ZERO_CHG]    = "discard-zero-changes",
-	[ASB_DISCARD_LEAST_CHG]   = "discard-least-changes",
-	[ASB_DISCARD_LOCAL]      = "discard-local",
-	[ASB_DISCARD_REMOTE]     = "discard-remote"
-};
-
-const char *asb1p_n[] = {
-	[ASB_DISCONNECT]        = "disconnect",
-	[ASB_CONSENSUS]         = "consensus",
-	[ASB_VIOLENTLY]         = "violently-as0p",
-	[ASB_DISCARD_SECONDARY]  = "discard-secondary",
-	[ASB_CALL_HELPER]        = "call-pri-lost-after-sb"
-};
-
-const char *asb2p_n[] = {
-	[ASB_DISCONNECT]        = "disconnect",
-	[ASB_VIOLENTLY]         = "violently-as0p",
-	[ASB_CALL_HELPER]        = "call-pri-lost-after-sb"
-};
-
-const char *rrcf_n[] = {
-	[ASB_DISCONNECT]        = "disconnect",
-	[ASB_VIOLENTLY]         = "violently",
-	[ASB_CALL_HELPER]        = "call-pri-lost"
-};
+static int conv_block_dev(struct drbd_argument *ad, struct msg_buff *msg, struct drbd_genlmsghdr *dhdr, char* arg);
+static int conv_md_idx(struct drbd_argument *ad, struct msg_buff *msg, struct drbd_genlmsghdr *dhdr, char* arg);
+static int conv_resource_name(struct drbd_argument *ad, struct msg_buff *msg, struct drbd_genlmsghdr *dhdr, char* arg);
+static int conv_volume(struct drbd_argument *ad, struct msg_buff *msg, struct drbd_genlmsghdr *dhdr, char* arg);
+static int conv_minor(struct drbd_argument *ad, struct msg_buff *msg, struct drbd_genlmsghdr *dhdr, char* arg);
 
 struct option wait_cmds_options[] = {
 	{ "wfc-timeout",required_argument, 0, 't' },
@@ -257,142 +220,122 @@
 	{ 0,            0,           0,  0  }
 };
 
-#define EN(N,U,UN) \
-	conv_numeric, show_numeric, numeric_opt_usage, numeric_opt_xml, \
-	{ .numeric_param = { DRBD_ ## N ## _MIN, DRBD_ ## N ## _MAX, \
-		DRBD_ ## N ## _DEF ,U,UN  } }
-#define EN_sndbuf(N,U,UN) \
-	conv_sndbuf, show_numeric, numeric_opt_usage, numeric_opt_xml, \
-	{ .numeric_param = { DRBD_ ## N ## _MIN, DRBD_ ## N ## _MAX, \
-		DRBD_ ## N ## _DEF ,U,UN  } }
-#define EH(N,D) \
-	conv_handler, show_handler, handler_opt_usage, handler_opt_xml, \
-	{ .handler_param = { N, ARRY_SIZE(N), \
-	DRBD_ ## D ## _DEF } }
-#define EB      conv_bit, show_bit, bit_opt_usage, bit_opt_xml, { }
-#define ES      conv_string, show_string, string_opt_usage, string_opt_xml, { }
-#define CLOSE_OPTIONS  { NULL,0,0,NULL,NULL,NULL, NULL, { } }
-
-#define F_CONFIG_CMD	generic_config_cmd, config_usage
-#define F_GET_CMD	generic_get_cmd, get_usage
-#define F_EVENTS_CMD	events_cmd, events_usage
-
-struct drbd_cmd commands[] = {
-	{"primary", P_primary, F_CONFIG_CMD, {{ NULL,
-	 (struct drbd_option[]) {
-		 { "overwrite-data-of-peer",'o',T_overwrite_peer, EB   },
-		 CLOSE_OPTIONS }} }, },
+struct option show_cmd_options[] = {
+	{ "show-defaults", no_argument, 0, 'D' },
+	{ }
+};
 
-	{"secondary", P_secondary, F_CONFIG_CMD, {{NULL, NULL}} },
+#define F_CONFIG_CMD	generic_config_cmd
+#define NO_PAYLOAD	0
+#define F_GET_CMD(scmd)	DRBD_ADM_GET_STATUS, NO_PAYLOAD, generic_get_cmd, \
+			.show_function = scmd
 
-	{"disk", P_disk_conf, F_CONFIG_CMD, {{
-	 (struct drbd_argument[]) {
+struct drbd_cmd commands[] = {
+	{"primary", CTX_MINOR, DRBD_ADM_PRIMARY, DRBD_NLA_SET_ROLE_PARMS,
+		F_CONFIG_CMD,
+	 .ctx = &primary_cmd_ctx },
+
+	{"secondary", CTX_MINOR, DRBD_ADM_SECONDARY, NO_PAYLOAD, F_CONFIG_CMD },
+
+	{"attach", CTX_MINOR, DRBD_ADM_ATTACH, DRBD_NLA_DISK_CONF,
+		F_CONFIG_CMD,
+	 .drbd_args = (struct drbd_argument[]) {
 		 { "lower_dev",		T_backing_dev,	conv_block_dev },
 		 { "meta_data_dev",	T_meta_dev,	conv_block_dev },
 		 { "meta_data_index",	T_meta_dev_idx,	conv_md_idx },
-		 { NULL,                0,           	NULL}, },
-	 (struct drbd_option[]) {
-		 { "size",'d',		T_disk_size,	EN(DISK_SIZE_SECT,'s',"bytes") },
-		 { "on-io-error",'e',	T_on_io_error,	EH(on_error,ON_IO_ERROR) },
-		 { "fencing",'f',	T_fencing,      EH(fencing_n,FENCING) },
-		 { "use-bmbv",'b',	T_use_bmbv,     EB },
-		 { "no-disk-barrier",'a',T_no_disk_barrier,EB },
-		 { "no-disk-flushes",'i',T_no_disk_flush,EB },
-		 { "no-disk-drain",'D', T_no_disk_drain,EB },
-		 { "no-md-flushes",'m', T_no_md_flush,  EB },
-		 { "max-bio-bvecs",'s',	T_max_bio_bvecs,EN(MAX_BIO_BVECS,1,NULL) },
-		 CLOSE_OPTIONS }} }, },
-
-	{"detach", P_detach, F_CONFIG_CMD, {{NULL, NULL}} },
-
-	{"net", P_net_conf, F_CONFIG_CMD, {{
-	 (struct drbd_argument[]) {
-		 { "[af:]local_addr[:port]",T_my_addr,	conv_address },
-		 { "[af:]remote_addr[:port]",T_peer_addr,conv_address },
-		 { "protocol",		T_wire_protocol,conv_protocol },
-		 { NULL,                0,           	NULL}, },
-	 (struct drbd_option[]) {
-		 { "timeout",'t',	T_timeout,	EN(TIMEOUT,1,"1/10 seconds") },
-		 { "max-epoch-size",'e',T_max_epoch_size,EN(MAX_EPOCH_SIZE,1,NULL) },
-		 { "max-buffers",'b',	T_max_buffers,	EN(MAX_BUFFERS,1,NULL) },
-		 { "unplug-watermark",'u',T_unplug_watermark, EN(UNPLUG_WATERMARK,1,NULL) },
-		 { "connect-int",'c',	T_try_connect_int, EN(CONNECT_INT,1,"seconds") },
-		 { "ping-int",'i',	T_ping_int,	   EN(PING_INT,1,"seconds") },
-		 { "sndbuf-size",'S',	T_sndbuf_size,	   EN_sndbuf(SNDBUF_SIZE,1,"bytes") },
-		 { "rcvbuf-size",'r',	T_rcvbuf_size,	   EN_sndbuf(RCVBUF_SIZE,1,"bytes") },
-		 { "ko-count",'k',	T_ko_count,	   EN(KO_COUNT,1,NULL) },
-		 { "allow-two-primaries",'m',T_two_primaries, EB },
-		 { "cram-hmac-alg",'a',	T_cram_hmac_alg,   ES },
-		 { "shared-secret",'x',	T_shared_secret,   ES },
-		 { "after-sb-0pri",'A',	T_after_sb_0p,EH(asb0p_n,AFTER_SB_0P) },
-		 { "after-sb-1pri",'B',	T_after_sb_1p,EH(asb1p_n,AFTER_SB_1P) },
-		 { "after-sb-2pri",'C',	T_after_sb_2p,EH(asb2p_n,AFTER_SB_2P) },
-		 { "always-asbp",'P',   T_always_asbp,     EB },
-		 { "rr-conflict",'R',	T_rr_conflict,EH(rrcf_n,RR_CONFLICT) },
-		 { "ping-timeout",'p',  T_ping_timeo,	   EN(PING_TIMEO,1,"1/10 seconds") },
-		 { "discard-my-data",'D', T_want_lose,     EB },
-		 { "data-integrity-alg",'d', T_integrity_alg,     ES },
-		 { "no-tcp-cork",'o',   T_no_cork,         EB },
-		 CLOSE_OPTIONS }} }, },
-
-	{"disconnect", P_disconnect, F_CONFIG_CMD, {{NULL, NULL}} },
-
-	{"resize", P_resize, F_CONFIG_CMD, {{ NULL,
-	 (struct drbd_option[]) {
-		 { "size",'s',T_resize_size,		EN(DISK_SIZE_SECT,'s',"bytes") },
-		 { "assume-peer-has-space",'f',T_resize_force,	EB },
-		 CLOSE_OPTIONS }} }, },
-
-	{"syncer", P_syncer_conf, F_CONFIG_CMD, {{ NULL,
-	 (struct drbd_option[]) {
-		 { "rate",'r',T_rate,			EN(RATE,'k',"bytes/second") },
-		 { "after",'a',T_after,			EN(AFTER,1,NULL) },
-		 { "al-extents",'e',T_al_extents,	EN(AL_EXTENTS,1,NULL) },
-		 { "csums-alg", 'C',T_csums_alg,        ES },
-		 { "verify-alg", 'v',T_verify_alg,      ES },
-		 { "cpu-mask",'c',T_cpu_mask,           ES },
-		 { "use-rle",'R',T_use_rle,   EB },
-		 CLOSE_OPTIONS }} }, },
-
-	{"new-current-uuid", P_new_c_uuid, F_CONFIG_CMD, {{NULL,
-	 (struct drbd_option[]) {
-		 { "clear-bitmap",'c',T_clear_bm, EB   },
-		 CLOSE_OPTIONS }} }, },
-
-	{"invalidate", P_invalidate, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"invalidate-remote", P_invalidate_peer, F_CONFIG_CMD, {{NULL, NULL}} },
-	{"pause-sync", P_pause_sync, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"resume-sync", P_resume_sync, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"suspend-io", P_suspend_io, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"resume-io", P_resume_io, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"outdate", P_outdate, F_CONFIG_CMD, {{ NULL, NULL }} },
-	{"verify", P_start_ov, F_CONFIG_CMD, {{ NULL,
-	 (struct drbd_option[]) {
-		 { "start",'s',T_start_sector, EN(DISK_SIZE_SECT,'s',"bytes") },
-		 CLOSE_OPTIONS }} }, },
-	{"down",            0, down_cmd, get_usage, { {NULL, NULL }} },
-	/* "state" is deprecated! please use "role".
-	 * find_cmd_by_name still understands "state", however. */
-	{"role", P_get_state, F_GET_CMD, { .gp={ role_scmd} } },
-	{"status", P_get_state, F_GET_CMD, {.gp={ status_xml_scmd } } },
-	{"sh-status", P_get_state, F_GET_CMD, {.gp={ sh_status_scmd } } },
-	{"cstate", P_get_state, F_GET_CMD, {.gp={ cstate_scmd} } },
-	{"dstate", P_get_state, F_GET_CMD, {.gp={ dstate_scmd} } },
-	{"show-gi", P_get_uuids, F_GET_CMD, {.gp={ uuids_scmd} }},
-	{"get-gi", P_get_uuids, F_GET_CMD, {.gp={ uuids_scmd} } },
-	{"show", P_get_config, F_GET_CMD, {.gp={ show_scmd} } },
-	{"events",          0, F_EVENTS_CMD, { .ep = {
-		(struct option[]) {
-			{ "unfiltered", no_argument, 0, 'u' },
-			{ "all-devices",no_argument, 0, 'a' },
-			{ 0,            0,           0,  0  } },
-		print_broadcast_events } } },
-	{"wait-connect", 0, F_EVENTS_CMD, { .ep = {
-		wait_cmds_options, w_connected_state } } },
-	{"wait-sync", 0, F_EVENTS_CMD, { .ep = {
-		wait_cmds_options, w_synced_state } } },
+		 { } },
+	 .ctx = &attach_cmd_ctx },
+
+	{"disk-options", CTX_MINOR, DRBD_ADM_CHG_DISK_OPTS, DRBD_NLA_DISK_CONF,
+		F_CONFIG_CMD,
+	 .set_defaults = true,
+	 .ctx = &disk_options_ctx },
+
+	{"detach", CTX_MINOR, DRBD_ADM_DETACH, DRBD_NLA_DETACH_PARMS, F_CONFIG_CMD,
+	 .ctx = &detach_cmd_ctx },
+
+	{"connect", CTX_RESOURCE_AND_CONNECTION, DRBD_ADM_CONNECT, DRBD_NLA_NET_CONF,
+		F_CONFIG_CMD,
+	 .ctx = &connect_cmd_ctx },
+
+	{"net-options", CTX_CONNECTION, DRBD_ADM_CHG_NET_OPTS, DRBD_NLA_NET_CONF,
+		F_CONFIG_CMD,
+	 .set_defaults = true,
+	 .ctx = &net_options_ctx },
+
+	{"disconnect", CTX_CONNECTION, DRBD_ADM_DISCONNECT, DRBD_NLA_DISCONNECT_PARMS,
+		F_CONFIG_CMD,
+	 .ctx = &disconnect_cmd_ctx },
+
+	{"resize", CTX_MINOR, DRBD_ADM_RESIZE, DRBD_NLA_RESIZE_PARMS,
+		F_CONFIG_CMD,
+	 .ctx = &resize_cmd_ctx },
+
+	{"resource-options", CTX_RESOURCE, DRBD_ADM_RESOURCE_OPTS, DRBD_NLA_RESOURCE_OPTS,
+		F_CONFIG_CMD,
+	 .set_defaults = true,
+	 .ctx = &resource_options_cmd_ctx },
+
+	{"new-current-uuid", CTX_MINOR, DRBD_ADM_NEW_C_UUID, DRBD_NLA_NEW_C_UUID_PARMS,
+		F_CONFIG_CMD,
+	 .ctx = &new_current_uuid_cmd_ctx },
+
+	{"invalidate", CTX_MINOR, DRBD_ADM_INVALIDATE, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"invalidate-remote", CTX_MINOR, DRBD_ADM_INVAL_PEER, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"pause-sync", CTX_MINOR, DRBD_ADM_PAUSE_SYNC, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"resume-sync", CTX_MINOR, DRBD_ADM_RESUME_SYNC, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"suspend-io", CTX_MINOR, DRBD_ADM_SUSPEND_IO, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"resume-io", CTX_MINOR, DRBD_ADM_RESUME_IO, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"outdate", CTX_MINOR, DRBD_ADM_OUTDATE, NO_PAYLOAD, F_CONFIG_CMD, },
+	{"verify", CTX_MINOR, DRBD_ADM_START_OV, DRBD_NLA_START_OV_PARMS,
+		F_CONFIG_CMD,
+	 .ctx = &verify_cmd_ctx },
+	{"down", CTX_RESOURCE, DRBD_ADM_DOWN, NO_PAYLOAD, down_cmd,
+		.missing_ok = true, },
+	{"state", CTX_MINOR, F_GET_CMD(role_scmd) },
+	{"role", CTX_MINOR, F_GET_CMD(role_scmd) },
+	{"sh-status", CTX_MINOR | CTX_RESOURCE | CTX_ALL,
+		F_GET_CMD(sh_status_scmd),
+		.missing_ok = true, },
+	{"cstate", CTX_MINOR, F_GET_CMD(cstate_scmd) },
+	{"dstate", CTX_MINOR, F_GET_CMD(dstate_scmd) },
+	{"show-gi", CTX_MINOR, F_GET_CMD(uuids_scmd) },
+	{"get-gi", CTX_MINOR, F_GET_CMD(uuids_scmd) },
+	{"show", CTX_MINOR | CTX_RESOURCE | CTX_ALL, F_GET_CMD(show_scmd),
+		.options = show_cmd_options },
+	{"check-resize", CTX_MINOR, F_GET_CMD(lk_bdev_scmd) },
+	{"events", CTX_MINOR | CTX_RESOURCE | CTX_ALL, F_GET_CMD(print_broadcast_events),
+		.missing_ok = true,
+		.continuous_poll = true, },
+	{"wait-connect", CTX_MINOR, F_GET_CMD(w_connected_state),
+		.options = wait_cmds_options,
+		.continuous_poll = true,
+		.wait_for_connect_timeouts = true, },
+	{"wait-sync", CTX_MINOR, F_GET_CMD(w_synced_state),
+		.options = wait_cmds_options,
+		.continuous_poll = true,
+		.wait_for_connect_timeouts = true, },
+
+	{"new-resource", CTX_RESOURCE, DRBD_ADM_NEW_RESOURCE, DRBD_NLA_RESOURCE_OPTS, F_CONFIG_CMD,
+	 .ctx = &resource_options_cmd_ctx },
+
+	/* only payload is resource name and volume number */
+	{"new-minor", 0, DRBD_ADM_NEW_MINOR, DRBD_NLA_CFG_CONTEXT,
+		F_CONFIG_CMD,
+	 .drbd_args = (struct drbd_argument[]) {
+		 { "resource", T_ctx_resource_name, conv_resource_name },
+		 { "minor", 0, conv_minor },
+		 { "volume", T_ctx_volume, conv_volume },
+		 { } },
+	 .ctx = &new_minor_cmd_ctx },
+
+	{"del-minor", CTX_MINOR, DRBD_ADM_DEL_MINOR, NO_PAYLOAD, del_minor_cmd, },
+	{"del-resource", CTX_RESOURCE, DRBD_ADM_DEL_RESOURCE, NO_PAYLOAD, del_resource_cmd, }
 };
 
+bool show_defaults;
+bool wait_after_split_brain;
+
 #define OTHER_ERROR 900
 
 #define EM(C) [ C - ERR_CODE_BASE ]
@@ -409,8 +352,8 @@
 	EM(ERR_MD_NOT_BDEV) = "Meta device is not a block device.",
 	EM(109) = "Open of lower device failed.",
 	EM(110) = "Open of meta device failed.",
-	EM(ERR_DISK_TO_SMALL) = "Low.dev. smaller than requested DRBD-dev. size.",
-	EM(ERR_MD_DISK_TO_SMALL) = "Meta device too small.",
+	EM(ERR_DISK_TOO_SMALL) = "Low.dev. smaller than requested DRBD-dev. size.",
+	EM(ERR_MD_DISK_TOO_SMALL) = "Meta device too small.",
 	EM(113) = "You have to use the disk command first.",
 	EM(ERR_BDCLAIM_DISK) = "Lower device is already claimed. This usually means it is mounted.",
 	EM(ERR_BDCLAIM_MD_DISK) = "Meta device is already claimed. This usually means it is mounted.",
@@ -419,6 +362,7 @@
 	"(up to 2TB in case you do not have CONFIG_LBD set)\n"
 	"Contact office@linbit.com, if you need more.",
 	EM(ERR_IO_MD_DISK) = "IO error(s) occurred during initial access to meta-data.\n",
+	EM(ERR_MD_UNCLEAN) = "Unclean meta-data found.\nYou need to 'drbdadm apply-al res'\n",
 	EM(ERR_MD_INVALID) = "No valid meta-data signature found.\n\n"
 	"\t==> Use 'drbdadm create-md res' to initialize meta-data area. <==\n",
 	EM(ERR_AUTH_ALG) = "The 'cram-hmac-alg' you specified is not known in "
@@ -434,8 +378,8 @@
 	EM(ERR_INTR) = "Interrupted by Signal",
 	EM(ERR_RESIZE_RESYNC) = "Resize not allowed during resync.",
 	EM(ERR_NO_PRIMARY) = "Need one Primary node to resize.",
-	EM(ERR_SYNC_AFTER) = "The sync-after minor number is invalid",
-	EM(ERR_SYNC_AFTER_CYCLE) = "This would cause a sync-after dependency cycle",
+	EM(ERR_RESYNC_AFTER) = "The resync-after minor number is invalid",
+	EM(ERR_RESYNC_AFTER_CYCLE) = "This would cause a resync-after dependency cycle",
 	EM(ERR_PAUSE_IS_SET) = "Sync-pause flag is already set",
 	EM(ERR_PAUSE_IS_CLEAR) = "Sync-pause flag is already cleared",
 	EM(136) = "Disk state is lower than outdated",
@@ -457,6 +401,22 @@
 	EM(ERR_CSUMS_ALG_ND) = "CSUMSAlgNotDigest",
 	EM(ERR_CSUMS_RESYNC_RUNNING) = "Can not change csums-alg while resync is in progress",
 	EM(ERR_PERM) = "Permission denied. CAP_SYS_ADMIN necessary",
+	EM(ERR_NEED_APV_93) = "Protocol version 93 required to use --assume-clean",
+	EM(ERR_STONITH_AND_PROT_A) = "Fencing policy resource-and-stonith only with prot B or C allowed",
+	EM(ERR_CONG_NOT_PROTO_A) = "on-congestion policy pull-ahead only with prot A allowed",
+	EM(ERR_PIC_AFTER_DEP) = "Sync-pause flag is already cleared.\n"
+	"Note: Resync pause caused by a local resync-after dependency.",
+	EM(ERR_PIC_PEER_DEP) = "Sync-pause flag is already cleared.\n"
+	"Note: Resync pause caused by the peer node.",
+	EM(ERR_RES_NOT_KNOWN) = "Unknown resource",
+	EM(ERR_RES_IN_USE) = "Resource still in use (delete all minors first)",
+	EM(ERR_MINOR_CONFIGURED) = "Minor still configured (down it first)",
+	EM(ERR_MINOR_EXISTS) = "Minor exists already (delete it first)",
+	EM(ERR_INVALID_REQUEST) = "Invalid configuration request",
+	EM(ERR_NEED_APV_100) = "Prot version 100 required in order to change\n"
+	"these network options while connected",
+	EM(ERR_NEED_ALLOW_TWO_PRI) = "Can not clear allow_two_primaries as long as\n"
+	"there a primaries on both sides",
 };
 #define MAX_ERROR (sizeof(error_messages)/sizeof(*error_messages))
 const char * error_to_string(int err_no)
@@ -468,108 +428,29 @@
 #undef MAX_ERROR
 
 char *cmdname = NULL; /* "drbdsetup" for reporting in usage etc. */
-char *devname = NULL; /* "/dev/drbd12" for reporting in print_config_error */
-char *resname = NULL; /* for pretty printing in "status" only,
-			 taken from environment variable DRBD_RESOURCE */
-int debug_dump_argv = 0; /* enabled by setting DRBD_DEBUG_DUMP_ARGV in the environment */
-int lock_fd;
-unsigned int cn_idx;
-
-static int dump_tag_list(unsigned short *tlc)
-{
-	enum drbd_tags tag;
-	unsigned int tag_nr;
-	int len;
-	int integer;
-	char bit;
-	uint64_t int64;
-	const char* string;
-	int found_unknown=0;
-
-	while( (tag = *tlc++ ) != TT_END) {
-		len = *tlc++;
-		if(tag == TT_REMOVED) goto skip;
-
-		tag_nr = tag_number(tag);
-		if(tag_nr<ARRY_SIZE(tag_descriptions)) {
-			string = tag_descriptions[tag_nr].name;
-		} else {
-			string = "unknown tag";
-			found_unknown=1;
-		}
-		printf("# (%2d) %16s = ",tag_nr,string);
-		switch(tag_type(tag)) {
-		case TT_INTEGER:
-			integer = *(int*)tlc;
-			printf("(integer) %d",integer);
-			break;
-		case TT_INT64:
-			int64 = *(uint64_t*)tlc;
-			printf("(int64) %lld",(long long)int64);
-			break;
-		case TT_BIT:
-			bit = *(char*)tlc;
-			printf("(bit) %s", bit ? "on" : "off");
-			break;
-		case TT_STRING:
-			string = (char*)tlc;
-			printf("(string)'%s'",string);
-			break;
-		}
-		printf(" \t[len: %u]\n",len);
-	skip:
-		tlc = (unsigned short*)((char*)tlc + len);
-	}
-
-	return found_unknown;
-}
-
-static struct drbd_tag_list *create_tag_list(int size)
-{
-	struct drbd_tag_list *tl;
-
-	tl = malloc(sizeof(struct drbd_tag_list));
-	tl->nl_header  = malloc(NLMSG_SPACE( sizeof(struct cn_msg) +
-					     sizeof(struct drbd_nl_cfg_req) +
-					     size) );
-	tl->cn_header = NLMSG_DATA(tl->nl_header);
-	tl->drbd_p_header = (struct drbd_nl_cfg_req*) tl->cn_header->data;
-	tl->tag_list_start = tl->drbd_p_header->tag_list;
-	tl->tag_list_cpos = tl->tag_list_start;
-	tl->tag_size = size;
 
-	return tl;
-}
+/*
+ * In CTX_MINOR, CTX_RESOURCE, CTX_ALL, objname and minor refer to the object
+ * the command operates on.
+ */
+char *objname;
+unsigned minor = -1U;
+enum cfg_ctx_key context;
 
-static void add_tag(struct drbd_tag_list *tl, short int tag, void *data, short int data_len)
-{
-	if(data_len > tag_descriptions[tag_number(tag)].max_len) {
-		fprintf(stderr, "The value for %s may only be %d byte long."
-			" You requested %d.\n",
-			tag_descriptions[tag_number(tag)].name,
-			tag_descriptions[tag_number(tag)].max_len,
-			data_len);
-		exit(20);
-	}
+int debug_dump_argv = 0; /* enabled by setting DRBD_DEBUG_DUMP_ARGV in the environment */
+int lock_fd;
 
-	if( (tl->tag_list_cpos - tl->tag_list_start) + data_len
-	    > tl->tag_size ) {
-		fprintf(stderr, "Tag list size exceeded!\n");
-		exit(20);
-	}
-	put_unaligned(tag, tl->tag_list_cpos++);
-	put_unaligned(data_len, tl->tag_list_cpos++);
-	memcpy(tl->tag_list_cpos, data, data_len);
-	tl->tag_list_cpos = (unsigned short*)((char*)tl->tag_list_cpos + data_len);
-}
+struct genl_sock *drbd_sock = NULL;
+int try_genl = 1;
 
-static void free_tag_list(struct drbd_tag_list *tl)
-{
-	free(tl->nl_header);
-	free(tl);
-}
+struct genl_family drbd_genl_family = {
+	.name = "drbd",
+	.version = GENL_MAGIC_VERSION,
+	.hdrsize = GENL_MAGIC_FAMILY_HDRSZ,
+};
 
-static int conv_block_dev(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+static int conv_block_dev(struct drbd_argument *ad, struct msg_buff *msg,
+			  struct drbd_genlmsghdr *dhdr, char* arg)
 {
 	struct stat sb;
 	int device_fd;
@@ -592,12 +473,13 @@
 
 	close(device_fd);
 
-	add_tag(tl,ad->tag,arg,strlen(arg)+1); // include the null byte.
+	nla_put_string(msg, ad->nla_type, arg);
 
 	return NO_ERROR;
 }
 
-static int conv_md_idx(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+static int conv_md_idx(struct drbd_argument *ad, struct msg_buff *msg,
+		       struct drbd_genlmsghdr *dhdr, char* arg)
 {
 	int idx;
 
@@ -605,27 +487,75 @@
 	else if(!strcmp(arg,"flexible")) idx = DRBD_MD_INDEX_FLEX_EXT;
 	else idx = m_strtoll(arg,1);
 
-	add_tag(tl,ad->tag,&idx,sizeof(idx));
+	nla_put_u32(msg, ad->nla_type, idx);
 
 	return NO_ERROR;
 }
 
-static void resolv6(char *name, struct in6_addr *addr)
+static int conv_resource_name(struct drbd_argument *ad, struct msg_buff *msg,
+			      struct drbd_genlmsghdr *dhdr, char* arg)
 {
-	int rv;
-	struct hostent *he;
+	/* additional sanity checks? */
+	nla_put_string(msg, T_ctx_resource_name, arg);
+	return NO_ERROR;
+}
 
-	rv = inet_pton(AF_INET6, name, addr);
-	if (rv > 0)
-		return;
-	else if (rv == 0) {
-		he = gethostbyname2(name, AF_INET6);
-		if (!he) {
-			fprintf(stderr, "can not resolve the hostname: gethostbyname2(%s, AF_INET6): %s\n",
-					name, hstrerror(h_errno));
-			exit(20);
-		}
-		memcpy(addr, he->h_addr_list[0], sizeof(struct in6_addr));
+static int conv_volume(struct drbd_argument *ad, struct msg_buff *msg,
+		       struct drbd_genlmsghdr *dhdr, char* arg)
+{
+	unsigned vol = m_strtoll(arg,1);
+	/* sanity check on vol < 256? */
+	nla_put_u32(msg, T_ctx_volume, vol);
+	return NO_ERROR;
+}
+
+static int conv_minor(struct drbd_argument *ad, struct msg_buff *msg,
+		      struct drbd_genlmsghdr *dhdr, char* arg)
+{
+	unsigned minor = dt_minor_of_dev(arg);
+	if (minor == -1U) {
+		fprintf(stderr, "Cannot determine minor device number of "
+				"device '%s'\n",
+			arg);
+		return OTHER_ERROR;
+	}
+	dhdr->minor = minor;
+	return NO_ERROR;
+}
+
+static void resolv6(char *name, struct sockaddr_in6 *addr)
+{
+	struct addrinfo hints, *res, *tmp;
+	int err;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_INET6;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = IPPROTO_TCP;
+
+	err = getaddrinfo(name, 0, &hints, &res);
+	if (err) {
+		fprintf(stderr, "getaddrinfo %s: %s\n", name, gai_strerror(err));
+		exit(20);
+	}
+
+	/* Yes, it is a list. We use only the first result. The loop is only
+	 * there to document that we know it is a list */
+	for (tmp = res; tmp; tmp = tmp->ai_next) {
+		memcpy(addr, tmp->ai_addr, sizeof(*addr));
+		break;
+	}
+	freeaddrinfo(res);
+	if (0) { /* debug output */
+		char ip[INET6_ADDRSTRLEN];
+		inet_ntop(AF_INET6, &addr->sin6_addr, ip, sizeof(ip));
+		fprintf(stderr, "%s -> %02x %04x %08x %s %08x\n",
+				name,
+				addr->sin6_family,
+				addr->sin6_port,
+				addr->sin6_flowinfo,
+				ip,
+				addr->sin6_scope_id);
 	}
 }
 
@@ -646,6 +576,25 @@
 	return retval;
 }
 
+static void split_ipv6_addr(char **address, int *port)
+{
+	/* ipv6:[fe80::0234:5678:9abc:def1]:8000; */
+	char *b = strrchr(*address,']');
+	if (address[0][0] != '[' || b == NULL ||
+		(b[1] != ':' && b[1] != '\0')) {
+		fprintf(stderr, "unexpected ipv6 format: %s\n",
+				*address);
+		exit(20);
+	}
+
+	*b = 0;
+	*address += 1; /* skip '[' */
+	if (b[1] == ':')
+		*port = m_strtoll(b+2,1); /* b+2: "]:" */
+	else
+		*port = 7788; /* will we ever get rid of that default port? */
+}
+
 static void split_address(char* text, int *af, char** address, int* port)
 {
 	static struct { char* text; int af; } afs[] = {
@@ -660,87 +609,60 @@
 
 	*af=AF_INET;
 	*address = text;
-	for (i=0; i<ARRY_SIZE(afs); i++) {
+	for (i=0; i<ARRAY_SIZE(afs); i++) {
 		if (!strncmp(text, afs[i].text, strlen(afs[i].text))) {
 			*af = afs[i].af;
 			*address = text + strlen(afs[i].text);
 			break;
 		}
 	}
+
+	if (*af == AF_INET6 && address[0][0] == '[')
+		return split_ipv6_addr(address, port);
+
 	if (*af == -1)
 		*af = get_af_ssocks(1);
 
 	b=strrchr(text,':');
 	if (b) {
 		*b = 0;
+		if (*af == AF_INET6) {
+			/* compatibility handling of ipv6 addresses,
+			 * in the style expected before drbd 8.3.9.
+			 * may go wrong without explicit port */
+			fprintf(stderr, "interpreting ipv6:%s:%s as ipv6:[%s]:%s\n",
+					*address, b+1, *address, b+1);
+		}
 		*port = m_strtoll(b+1,1);
 	} else
 		*port = 7788;
 }
 
-static int conv_address(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+static int nla_put_address(struct msg_buff *msg, int attrtype, char* arg)
 {
-	static int mind_af_set = 0;
-	struct sockaddr_in addr;
-	struct sockaddr_in6 addr6;
 	int af, port;
-	char *address, bit=0;
+	char *address;
 
 	split_address(arg, &af, &address, &port);
-
-	/* The mind_af tag is mandatory. I.e. the module may not silently ignore it.
-	   That means that an older DRBD module must fail the operation since it does
-	   not know the mind_af tag. We set it in case we use an other AF then AF_INET,
-	   so that the alternate AF is not silently ignored by the DRBD module */
-	if (af != AF_INET && !mind_af_set) {
-		add_tag(tl,T_mind_af,&bit,sizeof(bit));
-		mind_af_set=1;
-	}
-
 	if (af == AF_INET6) {
-		memset(&addr6, 0, sizeof(struct sockaddr_in6));
+		struct sockaddr_in6 addr6;
+
+		memset(&addr6, 0, sizeof(addr6));
+		resolv6(address, &addr6);
 		addr6.sin6_port = htons(port);
-		addr6.sin6_family = AF_INET6;
-		resolv6(address, &addr6.sin6_addr);
 		/* addr6.sin6_len = sizeof(addr6); */
-		add_tag(tl,ad->tag,&addr6,sizeof(addr6));
+		nla_put(msg, attrtype, sizeof(addr6), &addr6);
 	} else {
-		/* AF_INET and AF_SSOCKS */
+		/* AF_INET, AF_SDP, AF_SSOCKS,
+		 * all use the IPv4 addressing scheme */
+		struct sockaddr_in addr;
+
+		memset(&addr, 0, sizeof(addr));
 		addr.sin_port = htons(port);
 		addr.sin_family = af;
 		addr.sin_addr.s_addr = resolv(address);
-		add_tag(tl,ad->tag,&addr,sizeof(addr));
-	}
-
-	return NO_ERROR;
-}
-
-static int conv_protocol(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
-{
-	int prot;
-
-	if(!strcmp(arg,"A") || !strcmp(arg,"a")) {
-		prot=DRBD_PROT_A;
-	} else if (!strcmp(arg,"B") || !strcmp(arg,"b")) {
-		prot=DRBD_PROT_B;
-	} else if (!strcmp(arg,"C") || !strcmp(arg,"c")) {
-		prot=DRBD_PROT_C;
-	} else {
-		fprintf(stderr, "'%s' is no valid protocol.\n", arg);
-		return OTHER_ERROR;
+		nla_put(msg, attrtype, sizeof(addr), &addr);
 	}
-
-	add_tag(tl,ad->tag,&prot,sizeof(prot));
-
-	return NO_ERROR;
-}
-
-static int conv_bit(struct drbd_option *od, struct drbd_tag_list *tl, char* arg __attribute((unused)))
-{
-	char bit=1;
-
-	add_tag(tl,od->tag,&bit,sizeof(bit));
-
 	return NO_ERROR;
 }
 
@@ -790,120 +712,70 @@
 	return af;
 }
 
-static int conv_sndbuf(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
-{
-	int err = conv_numeric(od, tl, arg);
-	long long l = m_strtoll(arg, 0);
-	char bit = 0;
-
-	if (err != NO_ERROR || l != 0)
-		return err;
-	/* this is a mandatory bit,
-	 * to avoid newer userland to configure older modules with
-	 * a sndbuf size of zero, which would lead to Oops. */
-	add_tag(tl, T_auto_sndbuf_size, &bit, sizeof(bit));
-	return NO_ERROR;
-}
-
-static int conv_numeric(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
-{
-	const long long min = od->numeric_param.min;
-	const long long max = od->numeric_param.max;
-	const unsigned char unit_prefix = od->numeric_param.unit_prefix;
-	long long l;
-	int i;
-	char unit[] = {0,0};
-
-	l = m_strtoll(arg, unit_prefix);
-
-	if (min > l || l > max) {
-		unit[0] = unit_prefix > 1 ? unit_prefix : 0;
-		fprintf(stderr,"%s %s => %llu%s out of range [%llu..%llu]%s\n",
-			od->name, arg, l, unit, min, max, unit);
-		return OTHER_ERROR;
-	}
-
-	switch(tag_type(od->tag)) {
-	case TT_INT64:
-		add_tag(tl,od->tag,&l,sizeof(l));
-		break;
-	case TT_INTEGER:
-		i=l;
-		add_tag(tl,od->tag,&i,sizeof(i));
-		break;
-	default:
-		fprintf(stderr, "internal error in conv_numeric()\n");
-	}
-	return NO_ERROR;
-}
-
-static int conv_handler(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
+static struct option *make_longoptions(struct drbd_cmd *cm)
 {
-	const char** handler_names = od->handler_param.handler_names;
-	const int number_of_handlers = od->handler_param.number_of_handlers;
-	int i;
-
-	for(i=0;i<number_of_handlers;i++) {
-		if(handler_names[i]==NULL) continue;
-		if(strcmp(arg,handler_names[i])==0) {
-			add_tag(tl,od->tag,&i,sizeof(i));
-			return NO_ERROR;
-		}
+	static struct option buffer[42];
+	int i = 0;
+	int primary_force_index = -1;
+	int connect_tentative_index = -1;
+
+	if (cm->ctx) {
+		struct field_def *field;
+
+		/*
+		 * Make sure to keep cm->ctx->fields first: we use the index
+		 * returned by getopt_long() to access cm->ctx->fields.
+		 */
+		for (field = cm->ctx->fields; field->name; field++) {
+			assert(i < ARRAY_SIZE(buffer));
+			buffer[i].name = field->name;
+			buffer[i].has_arg = field->argument_is_optional ?
+				optional_argument : required_argument;
+			buffer[i].flag = NULL;
+			buffer[i].val = 0;
+			if (!strcmp(cm->cmd, "primary") && !strcmp(field->name, "force"))
+				primary_force_index = i;
+			if (!strcmp(cm->cmd, "connect") && !strcmp(field->name, "tentative"))
+				connect_tentative_index = i;
+			i++;
+		}
+		assert(field - cm->ctx->fields == i);
+	}
+
+	if (primary_force_index != -1) {
+		/*
+		 * For backward compatibility, add --overwrite-data-of-peer as
+		 * an alias to --force.
+		 */
+		assert(i < ARRAY_SIZE(buffer));
+		buffer[i] = buffer[primary_force_index];
+		buffer[i].name = "overwrite-data-of-peer";
+		buffer[i].val = 1000 + primary_force_index;
+		i++;
 	}
 
-	fprintf(stderr, "%s-handler '%s' not known\n", od->name, arg);
-	fprintf(stderr, "known %s-handlers:\n", od->name);
-	for (i = 0; i < number_of_handlers; i++) {
-		if (handler_names[i])
-			printf("\t%s\n", handler_names[i]);
+	if (connect_tentative_index != -1) {
+		/*
+		 * For backward compatibility, add --dry-run as an alias to
+		 * --tentative.
+		 */
+		assert(i < ARRAY_SIZE(buffer));
+		buffer[i] = buffer[connect_tentative_index];
+		buffer[i].name = "dry-run";
+		buffer[i].val = 1000 + connect_tentative_index;
+		i++;
 	}
-	return OTHER_ERROR;
-}
-
-static int conv_string(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
-{
-	add_tag(tl,od->tag,arg,strlen(arg)+1);
 
-	return NO_ERROR;
-}
-
-
-static struct option *	make_longoptions(struct drbd_option* od)
-{
-	/* room for up to N options,
-	 * plus set-defaults, create-device, and the terminating NULL */
-#define N 30
-	static struct option buffer[N+3];
-	int i=0;
-
-	while(od && od->name) {
-		buffer[i].name = od->name;
-		buffer[i].has_arg = tag_type(od->tag) == TT_BIT ?
-			no_argument : required_argument ;
+	if (cm->set_defaults) {
+		assert(i < ARRAY_SIZE(buffer));
+		buffer[i].name = "set-defaults";
+		buffer[i].has_arg = 0;
 		buffer[i].flag = NULL;
-		buffer[i].val = od->short_name;
-		if (i++ == N) {
-			/* we must not leave this loop with i > N */
-			fprintf(stderr,"buffer in make_longoptions to small.\n");
-			abort();
-		}
-		od++;
+		buffer[i].val = '(';
+		i++;
 	}
-#undef N
-
-	// The two omnipresent options:
-	buffer[i].name = "set-defaults";
-	buffer[i].has_arg = 0;
-	buffer[i].flag = NULL;
-	buffer[i].val = '(';
-	i++;
-
-	buffer[i].name = "create-device";
-	buffer[i].has_arg = 0;
-	buffer[i].flag = NULL;
-	buffer[i].val = ')';
-	i++;
 
+	assert(i < ARRAY_SIZE(buffer));
 	buffer[i].name = NULL;
 	buffer[i].has_arg = 0;
 	buffer[i].flag = NULL;
@@ -912,46 +784,40 @@
 	return buffer;
 }
 
-static struct drbd_option *find_opt_by_short_name(struct drbd_option *od, int c)
-{
-	if(!od) return NULL;
-	while(od->name) {
-		if(od->short_name == c) return od;
-		od++;
-	}
-
-	return NULL;
-}
-
-/* prepends global devname to output (if any) */
-static int print_config_error(int err_no)
+/* prepends global objname to output (if any) */
+static int print_config_error(int err_no, char *desc)
 {
 	int rv=0;
 
 	if (err_no == NO_ERROR || err_no == SS_SUCCESS)
 		return 0;
-	if (err_no == OTHER_ERROR)
+
+	if (err_no == OTHER_ERROR) {
+		if (desc)
+			fprintf(stderr,"%s: %s\n", objname, desc);
 		return 20;
+	}
 
 	if ( ( err_no >= AFTER_LAST_ERR_CODE || err_no <= ERR_CODE_BASE ) &&
 	     ( err_no > SS_CW_NO_NEED || err_no <= SS_AFTER_LAST_ERROR) ) {
-		fprintf(stderr,"Error code %d unknown.\n"
-			"You should update the drbd userland tools.\n",err_no);
+		fprintf(stderr,"%s: Error code %d unknown.\n"
+			"You should update the drbd userland tools.\n",
+			objname, err_no);
 		rv = 20;
 	} else {
 		if(err_no > ERR_CODE_BASE ) {
 			fprintf(stderr,"%s: Failure: (%d) %s\n",
-				devname, err_no, error_to_string(err_no));
+				objname, err_no, desc ?: error_to_string(err_no));
 			rv = 10;
 		} else if (err_no == SS_UNKNOWN_ERROR) {
 			fprintf(stderr,"%s: State change failed: (%d)"
-				"unknown error.\n", devname, err_no);
+				"unknown error.\n", objname, err_no);
 			rv = 11;
 		} else if (err_no > SS_TWO_PRIMARIES) {
 			// Ignore SS_SUCCESS, SS_NOTHING_TO_DO, SS_CW_Success...
 		} else {
 			fprintf(stderr,"%s: State change failed: (%d) %s\n",
-				devname, err_no, drbd_set_st_err_str(err_no));
+				objname, err_no, drbd_set_st_err_str(err_no));
 			if (err_no == SS_NO_UP_TO_DATE_DISK) {
 				/* all available disks are inconsistent,
 				 * or I am consistent, but cannot outdate the peer. */
@@ -967,27 +833,22 @@
 			}
 		}
 	}
+	if (global_attrs[DRBD_NLA_CFG_REPLY] &&
+	    global_attrs[DRBD_NLA_CFG_REPLY]->nla_len) {
+		struct nlattr *nla;
+		int rem;
+		fprintf(stderr, "additional info from kernel:\n");
+		nla_for_each_nested(nla, global_attrs[DRBD_NLA_CFG_REPLY], rem) {
+			if (nla_type(nla) == __nla_type(T_info_text))
+				fprintf(stderr, "%s\n", (char*)nla_data(nla));
+		}
+	}
 	return rv;
 }
 
-#define RCV_SIZE NLMSG_SPACE(sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply))
-
-/* cmdname and optind are global variables */
-static void warn_unrecognized_option(char **argv)
-{
-	fprintf(stderr, "%s %s: unrecognized option '%s'\n",
-		cmdname, argv[0], argv[optind - 1]);
-}
-
-static void warn_missing_required_arg(char **argv)
-{
-	fprintf(stderr, "%s %s: option '%s' requires an argument\n",
-		cmdname, argv[0], argv[optind - 1]);
-}
-
 static void warn_print_excess_args(int argc, char **argv, int i)
 {
-	fprintf(stderr, "Ignoring excess arguments:");
+	fprintf(stderr, "Excess arguments:");
 	for (; i < argc; i++)
 		fprintf(stderr, " %s", argv[i]);
 	printf("\n");
@@ -1012,332 +873,666 @@
 	fprintf(stderr, "`--\n");
 }
 
-static int _generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+int drbd_tla_parse(struct nlmsghdr *nlh)
+{
+	return nla_parse(global_attrs, ARRAY_SIZE(drbd_tla_nl_policy)-1,
+		nlmsg_attrdata(nlh, GENL_HDRLEN + drbd_genl_family.hdrsize),
+		nlmsg_attrlen(nlh, GENL_HDRLEN + drbd_genl_family.hdrsize),
+		drbd_tla_nl_policy);
+}
+
+#define ASSERT(exp) if (!(exp)) \
+		fprintf(stderr,"ASSERT( " #exp " ) in %s:%d\n", __FILE__,__LINE__);
+
+static int _generic_config_cmd(struct drbd_cmd *cm, int argc,
+			       char **argv, int quiet)
 {
-	char buffer[ RCV_SIZE ];
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_argument *ad = cm->cp.args;
-	struct drbd_option *od;
+	struct drbd_argument *ad = cm->drbd_args;
+	struct nlattr *nla;
 	struct option *lo;
-	struct drbd_tag_list *tl;
-	int c,i=1,rv=NO_ERROR,sk_nl;
-	int flags=0;
+	int c, i;
 	int n_args;
+	int rv = NO_ERROR;
+	char *desc = NULL; /* error description from kernel reply message */
 
-	tl = create_tag_list(4096);
+	struct drbd_genlmsghdr *dhdr;
+	struct msg_buff *smsg;
+	struct iovec iov;
+
+	/* pre allocate request message and reply buffer */
+	iov.iov_len = DEFAULT_MSG_SIZE;
+	iov.iov_base = malloc(iov.iov_len);
+	smsg = msg_new(DEFAULT_MSG_SIZE);
+	if (!smsg || !iov.iov_base) {
+		desc = "could not allocate netlink messages";
+		rv = OTHER_ERROR;
+		goto error;
+	}
+
+	dhdr = genlmsg_put(smsg, &drbd_genl_family, 0, cm->cmd_id);
+	dhdr->minor = -1;
+	dhdr->flags = 0;
+
+	i = 1;
+	if (context & (CTX_RESOURCE | CTX_CONNECTION)) {
+		nla = nla_nest_start(smsg, DRBD_NLA_CFG_CONTEXT);
+		if (context & CTX_RESOURCE)
+			nla_put_string(smsg, T_ctx_resource_name, argv[i++]);
+		if (context & CTX_CONNECTION) {
+			nla_put_address(smsg, T_ctx_my_addr, argv[i++]);
+			nla_put_address(smsg, T_ctx_peer_addr, argv[i++]);
+		}
+		nla_nest_end(smsg, nla);
+	} else if (context & CTX_MINOR) {
+		dhdr->minor = minor;
+		i++;
+	}
 
-	while(ad && ad->name) {
-		if(argc < i+1) {
-			fprintf(stderr,"Missing argument '%s'\n", ad->name);
-			print_command_usage(cm-commands, "",FULL);
+	nla = NULL;
+	for (ad = cm->drbd_args; ad && ad->name; i++) {
+		if (argc < i + 1) {
+			fprintf(stderr, "Missing argument '%s'\n", ad->name);
+			print_command_usage(cm, FULL);
 			rv = OTHER_ERROR;
 			goto error;
 		}
-		rv = ad->convert_function(ad,tl,argv[i++]);
+		if (!nla) {
+			assert (cm->tla_id != NO_PAYLOAD);
+			nla = nla_nest_start(smsg, cm->tla_id);
+		}
+		rv = ad->convert_function(ad, smsg, dhdr, argv[i]);
 		if (rv != NO_ERROR)
 			goto error;
 		ad++;
 	}
-	n_args = i - 1;
+	n_args = i - 1;  /* command name "doesn't count" here */
 
-	lo = make_longoptions(cm->cp.options);
-	opterr=0;
-	while( (c=getopt_long(argc,argv,make_optstring(lo,':'),lo,0)) != -1 ) {
-		od = find_opt_by_short_name(cm->cp.options,c);
-		if (od)
-			rv = od->convert_function(od,tl,optarg);
-		else {
-			if(c=='(') flags |= DRBD_NL_SET_DEFAULTS;
-			else if(c==')') flags |= DRBD_NL_CREATE_DEVICE;
-			else {
-				if (c == ':') {
-					warn_missing_required_arg(argv);
-					rv = OTHER_ERROR;
-					goto error;
-				}
-				warn_unrecognized_option(argv);
+	/* dhdr->minor may have been set by one of the convert functions. */
+	minor = dhdr->minor;
+
+	lo = make_longoptions(cm);
+	for (;;) {
+		int idx;
+
+		c = getopt_long(argc, argv, "(", lo, &idx);
+		if (c == -1)
+			break;
+		if (c >= 1000) {
+			/* This is a field alias. */
+			idx = c - 1000;
+			c = 0;
+		}
+		if (c == 0) {
+			struct field_def *field = &cm->ctx->fields[idx];
+			assert (field->name == lo[idx].name);
+			if (!nla) {
+				assert (cm->tla_id != NO_PAYLOAD);
+				nla = nla_nest_start(smsg, cm->tla_id);
+			}
+			if (!field->put(cm->ctx, field, smsg, optarg)) {
 				rv = OTHER_ERROR;
 				goto error;
 			}
-		}
-		if (rv != NO_ERROR)
+		} else if (c == '(')
+			dhdr->flags |= DRBD_GENL_F_SET_DEFAULTS;
+		else {
+			rv = OTHER_ERROR;
 			goto error;
+		}
 	}
 
 	/* argc should be cmd + n options + n args;
 	 * if it is more, we did not understand some */
-	if (n_args + optind < argc)
+	if (n_args + optind < argc) {
 		warn_print_excess_args(argc, argv, optind + n_args);
+		rv = OTHER_ERROR;
+		goto error;
+	}
 
 	dump_argv(argc, argv, optind, i - 1);
 
-	add_tag(tl,TT_END,NULL,0); // close the tag list
-
-	if(rv == NO_ERROR) {
-		//dump_tag_list(tl->tag_list_start);
+	if (rv == NO_ERROR) {
 		int received;
-		sk_nl = open_cn();
-		if (sk_nl < 0) {
+
+		if (nla)
+			nla_nest_end(smsg, nla);
+		if (genl_send(drbd_sock, smsg)) {
+			desc = "error sending config command";
 			rv = OTHER_ERROR;
 			goto error;
 		}
 
-		tl->drbd_p_header->packet_type = cm->packet_id;
-		tl->drbd_p_header->drbd_minor = minor;
-		tl->drbd_p_header->flags = flags;
-
-		received = call_drbd(sk_nl,tl, (struct nlmsghdr*)buffer,RCV_SIZE,NL_TIME);
-
-		close_cn(sk_nl);
-
-		if (received >= 0) {
-			reply = (struct drbd_nl_cfg_reply *)
-				((struct cn_msg *)NLMSG_DATA(buffer))->data;
-			rv = reply->ret_code;
+retry_recv:
+		/* reduce timeout! limit retries */
+		received = genl_recv_msgs(drbd_sock, &iov, &desc, 120000);
+		if (received > 0) {
+			struct nlmsghdr *nlh = (struct nlmsghdr*)iov.iov_base;
+			struct drbd_genlmsghdr *dh = genlmsg_data(nlmsg_data(nlh));
+			ASSERT(dh->minor == minor);
+			rv = dh->ret_code;
+			if (rv == ERR_RES_NOT_KNOWN && cm->missing_ok)
+				rv = NO_ERROR;
+			drbd_tla_parse(nlh);
+		} else {
+			if (received == -E_RCV_ERROR_REPLY && !errno)
+					goto retry_recv;
+			if (!desc)
+				desc = "error receiving config reply";
+
+			rv = OTHER_ERROR;
 		}
 	}
 error:
-	free_tag_list(tl);
+	msg_free(smsg);
 
+	if (!quiet)
+		rv = print_config_error(rv, desc);
+	free(iov.iov_base);
 	return rv;
 }
 
-static int generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+static int generic_config_cmd(struct drbd_cmd *cm, int argc, char **argv)
 {
-	return print_config_error(_generic_config_cmd(cm, minor, argc, argv));
-}
-
-#define ASSERT(exp) if (!(exp)) \
-		fprintf(stderr,"ASSERT( " #exp " ) in %s:%d\n", __FILE__,__LINE__);
-
-static void show_numeric(struct drbd_option *od, unsigned short* tp)
-{
-	long long val;
-	const unsigned char unit_prefix = od->numeric_param.unit_prefix;
-
-	switch(tag_type(get_unaligned(tp++))) {
-	case TT_INTEGER:
-		ASSERT( get_unaligned(tp++) == sizeof(int) );
-		val = get_unaligned((int*)tp);
-		break;
-	case TT_INT64:
-		ASSERT( get_unaligned(tp++) == sizeof(uint64_t) );
-		val = get_unaligned((uint64_t*)tp);
-		break;
-	default:
-		ASSERT(0);
-		val=0;
-	}
-
-	if(unit_prefix == 1) printf("\t%-16s\t%lld",od->name,val);
-	else printf("\t%-16s\t%lld%c",od->name,val,unit_prefix);
-	if(val == (long long) od->numeric_param.def) printf(" _is_default");
-	if(od->numeric_param.unit) {
-		printf("; # %s\n",od->numeric_param.unit);
-	} else {
-		printf(";\n");
-	}
+	return _generic_config_cmd(cm, argc, argv, 0);
 }
 
-static void show_handler(struct drbd_option *od, unsigned short* tp)
+static int del_minor_cmd(struct drbd_cmd *cm, int argc, char **argv)
 {
-	const char** handler_names = od->handler_param.handler_names;
-	int i;
+	int rv;
 
-	ASSERT( tag_type(get_unaligned(tp++)) == TT_INTEGER );
-	ASSERT( get_unaligned(tp++) == sizeof(int) );
-	i = get_unaligned((int*)tp);
-	printf("\t%-16s\t%s",od->name,handler_names[i]);
-	if( i == (long long)od->numeric_param.def) printf(" _is_default");
-	printf(";\n");
+	rv = generic_config_cmd(cm, argc, argv);
+	if (!rv)
+		unregister_minor(minor);
+	return rv;
 }
 
-static void show_bit(struct drbd_option *od, unsigned short* tp)
+static int del_resource_cmd(struct drbd_cmd *cm, int argc, char **argv)
 {
-	ASSERT( tag_type(get_unaligned(tp++)) == TT_BIT );
-	ASSERT( get_unaligned(tp++) == sizeof(char) );
-	if(get_unaligned((char*)tp)) printf("\t%-16s;\n",od->name);
-}
+	int rv;
 
-static void show_string(struct drbd_option *od, unsigned short* tp)
-{
-	ASSERT( tag_type(get_unaligned(tp++)) == TT_STRING );
-	if( get_unaligned(tp++) > 0 && get_unaligned((char*)tp)) printf("\t%-16s\t\"%s\";\n",od->name,(char*)tp);
+	rv = generic_config_cmd(cm, argc, argv);
+	if (!rv)
+		unregister_resource(objname);
+	return rv;
 }
 
-static unsigned short *look_for_tag(unsigned short *tlc, unsigned short tag)
+static struct drbd_cmd *find_cmd_by_name(const char *name)
 {
-	enum drbd_tags t;
-	int len;
+	unsigned int i;
 
-	while( (t = get_unaligned(tlc)) != TT_END ) {
-		if(t == tag) return tlc;
-		tlc++;
-		len = get_unaligned(tlc++);
-		tlc = (unsigned short*)((char*)tlc + len);
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		if (!strcmp(name, commands[i].cmd)) {
+			return commands + i;
+		}
 	}
 	return NULL;
 }
 
-static void print_options(struct drbd_option *od, unsigned short *tlc, const char* sect_name)
+static void print_options(const char *cmd_name, const char *sect_name)
 {
-	unsigned short *tp;
+	struct drbd_cmd *cmd;
+	struct field_def *field;
 	int opened = 0;
 
-	while(od->name) {
-		tp = look_for_tag(tlc,od->tag);
-		if(tp) {
-			if(!opened) {
-				opened=1;
-				printf("%s {\n",sect_name);
-			}
-			od->show_function(od,tp);
-			put_unaligned(TT_REMOVED, tp);
+	cmd = find_cmd_by_name(cmd_name);
+	if (!cmd) {
+		fprintf(stderr, "%s internal error, no such cmd %s\n",
+				cmdname, cmd_name);
+		abort();
+	}
+	if (!global_attrs[cmd->tla_id])
+		return;
+	if (drbd_nla_parse_nested(nested_attr_tb, cmd->ctx->nla_policy_size - 1,
+			     global_attrs[cmd->tla_id], cmd->ctx->nla_policy)) {
+		fprintf(stderr, "nla_policy violation for %s payload!\n", sect_name);
+		/* still, print those that validated ok */
+	}
+
+	if (!cmd->ctx)
+		return;
+	for (field = cmd->ctx->fields; field->name; field++) {
+		struct nlattr *nlattr;
+		const char *str;
+		bool is_default;
+
+		nlattr = ntb(field->nla_type);
+		if (!nlattr)
+			continue;
+		if (!opened) {
+			opened=1;
+			printI("%s {\n",sect_name);
+			++indent;
+		}
+		str = field->get(cmd->ctx, field, nlattr);
+		is_default = field->is_default(field, str);
+		if (is_default && !show_defaults)
+			continue;
+		if (field->needs_double_quoting)
+			str = double_quote_string(str);
+		printI("%-16s\t%s;",field->name, str);
+		if (field->unit || is_default) {
+				printf(" # ");
+			if (field->unit)
+				printf("%s", field->unit);
+			if (field->unit && is_default)
+				printf(", ");
+			if (is_default)
+				printf("default");
 		}
-		od++;
+		printf("\n");
 	}
 	if(opened) {
-		printf("}\n");
+		--indent;
+		printI("}\n");
 	}
 }
 
+struct choose_timo_ctx {
+	unsigned minor;
+	struct msg_buff *smsg;
+	struct iovec *iov;
+	int timeout;
+	int wfc_timeout;
+	int degr_wfc_timeout;
+	int outdated_wfc_timeout;
+};
 
-static int consume_tag_blob(enum drbd_tags tag, unsigned short *tlc,
-		     char** val, unsigned int* len)
+int choose_timeout(struct choose_timo_ctx *ctx)
 {
-	unsigned short *tp;
-	tp = look_for_tag(tlc,tag);
-	if(tp) {
-		put_unaligned(TT_REMOVED, tp++);
-		*len = get_unaligned(tp++);
-		*val = (char*)tp;
-		return 1;
-	}
-	return 0;
-}
+	char *desc = NULL;
+	struct drbd_genlmsghdr *dhdr;
+	int rr;
 
-static int consume_tag_string(enum drbd_tags tag, unsigned short *tlc, char** val)
-{
-	unsigned short *tp;
-	tp = look_for_tag(tlc,tag);
-	if(tp) {
-		put_unaligned(TT_REMOVED, tp++);
-		if( get_unaligned(tp++) > 0 )
-			*val = (char*)tp;
-		else
-			*val = "";
-		return 1;
+	if (0 < ctx->wfc_timeout &&
+	      (ctx->wfc_timeout < ctx->degr_wfc_timeout || ctx->degr_wfc_timeout == 0)) {
+		ctx->degr_wfc_timeout = ctx->wfc_timeout;
+		fprintf(stderr, "degr-wfc-timeout has to be shorter than wfc-timeout\n"
+				"degr-wfc-timeout implicitly set to wfc-timeout (%ds)\n",
+				ctx->degr_wfc_timeout);
+	}
+
+	if (0 < ctx->degr_wfc_timeout &&
+	    (ctx->degr_wfc_timeout < ctx->outdated_wfc_timeout || ctx->outdated_wfc_timeout == 0)) {
+		ctx->outdated_wfc_timeout = ctx->wfc_timeout;
+		fprintf(stderr, "outdated-wfc-timeout has to be shorter than degr-wfc-timeout\n"
+				"outdated-wfc-timeout implicitly set to degr-wfc-timeout (%ds)\n",
+				ctx->degr_wfc_timeout);
+	}
+	dhdr = genlmsg_put(ctx->smsg, &drbd_genl_family, 0, DRBD_ADM_GET_TIMEOUT_TYPE);
+	dhdr->minor = ctx->minor;
+	dhdr->flags = 0;
+
+	if (genl_send(drbd_sock, ctx->smsg)) {
+		desc = "error sending config command";
+		goto error;
+	}
+
+	rr = genl_recv_msgs(drbd_sock, ctx->iov, &desc, 120000);
+	if (rr > 0) {
+		struct nlmsghdr *nlh = (struct nlmsghdr*)ctx->iov->iov_base;
+		struct genl_info info = {
+			.seq = nlh->nlmsg_seq,
+			.nlhdr = nlh,
+			.genlhdr = nlmsg_data(nlh),
+			.userhdr = genlmsg_data(nlmsg_data(nlh)),
+			.attrs = global_attrs,
+		};
+		struct drbd_genlmsghdr *dh = info.userhdr;
+		struct timeout_parms parms;
+		ASSERT(dh->minor == ctx->minor);
+		rr = dh->ret_code;
+		if (rr == ERR_MINOR_INVALID) {
+			desc = "minor not available";
+			goto error;
+		}
+		if (rr != NO_ERROR)
+			goto error;
+		if (drbd_tla_parse(nlh)
+		|| timeout_parms_from_attrs(&parms, &info)) {
+			desc = "reply did not validate - "
+				"do you need to upgrade your useland tools?";
+			goto error;
+		}
+		rr = parms.timeout_type;
+		ctx->timeout =
+			(rr == UT_DEGRADED) ? ctx->degr_wfc_timeout :
+			(rr == UT_PEER_OUTDATED) ? ctx->outdated_wfc_timeout :
+			ctx->wfc_timeout;
+		return 0;
 	}
-	return 0;
-}
+error:
+	if (!desc)
+		desc = "error receiving netlink reply";
+	fprintf(stderr, "error determining which timeout to use: %s\n",
+			desc);
+	return 20;
+}
+
+#include <sys/utsname.h>
+static bool kernel_older_than(int version, int patchlevel, int sublevel)
+{
+	struct utsname utsname;
+	char *rel;
+	int l;
+
+	if (uname(&utsname) != 0)
+		return false;
+	rel = utsname.release;
+	l = strtol(rel, &rel, 10);
+	if (l > version)
+		return false;
+	else if (l < version || *rel == 0)
+		return true;
+	l = strtol(rel + 1, &rel, 10);
+	if (l > patchlevel)
+		return false;
+	else if (l < patchlevel || *rel == 0)
+		return true;
+	l = strtol(rel + 1, &rel, 10);
+	if (l >= sublevel)
+		return false;
+	return true;
+}
+
+static int generic_get_cmd(struct drbd_cmd *cm, int argc, char **argv)
+{
+	char *desc = NULL;
+	struct drbd_genlmsghdr *dhdr;
+	struct msg_buff *smsg;
+	struct iovec iov;
+	struct choose_timo_ctx timeo_ctx = {
+		.wfc_timeout = DRBD_WFC_TIMEOUT_DEF,
+		.degr_wfc_timeout = DRBD_DEGR_WFC_TIMEOUT_DEF,
+		.outdated_wfc_timeout = DRBD_OUTDATED_WFC_TIMEOUT_DEF,
+	};
+	int timeout_ms = -1;  /* "infinite" */
+	int flags;
+	int rv = NO_ERROR;
+	int err = 0;
+	int n_args;
 
-static int consume_tag_int(enum drbd_tags tag, unsigned short *tlc, int* val)
-{
-	unsigned short *tp;
-	tp = look_for_tag(tlc,tag);
-	if(tp) {
-		put_unaligned(TT_REMOVED, tp++);
-		tp++;
-		*val = get_unaligned((int *)tp);
-		return 1;
-	}
-	return 0;
-}
+	/* pre allocate request message and reply buffer */
+	iov.iov_len = 8192;
+	iov.iov_base = malloc(iov.iov_len);
+	smsg = msg_new(DEFAULT_MSG_SIZE);
+	if (!smsg || !iov.iov_base) {
+		desc = "could not allocate netlink messages";
+		rv = OTHER_ERROR;
+		goto out;
+	}
+
+	struct option *options = cm->options;
+	if (!options) {
+		static struct option none[] = { { } };
+		options = none;
+	}
+	const char *opts = make_optstring(options);
+	int c;
+
+	for(;;) {
+		c = getopt_long(argc, argv, opts, options, 0);
+		if (c == -1)
+			break;
+		switch(c) {
+		default:
+		case '?':
+			return 20;
+		case 't':
+			timeo_ctx.wfc_timeout = m_strtoll(optarg, 1);
+			if(DRBD_WFC_TIMEOUT_MIN > timeo_ctx.wfc_timeout ||
+			   timeo_ctx.wfc_timeout > DRBD_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					timeo_ctx.wfc_timeout,
+					DRBD_WFC_TIMEOUT_MIN,
+					DRBD_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
+		case 'd':
+			timeo_ctx.degr_wfc_timeout = m_strtoll(optarg, 1);
+			if(DRBD_DEGR_WFC_TIMEOUT_MIN > timeo_ctx.degr_wfc_timeout ||
+			   timeo_ctx.degr_wfc_timeout > DRBD_DEGR_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "degr_wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					timeo_ctx.degr_wfc_timeout,
+					DRBD_DEGR_WFC_TIMEOUT_MIN,
+					DRBD_DEGR_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
+		case 'o':
+			timeo_ctx.outdated_wfc_timeout = m_strtoll(optarg, 1);
+			if(DRBD_OUTDATED_WFC_TIMEOUT_MIN > timeo_ctx.outdated_wfc_timeout ||
+			   timeo_ctx.outdated_wfc_timeout > DRBD_OUTDATED_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "outdated_wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					timeo_ctx.outdated_wfc_timeout,
+					DRBD_OUTDATED_WFC_TIMEOUT_MIN,
+					DRBD_OUTDATED_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
 
-static int consume_tag_u64(enum drbd_tags tag, unsigned short *tlc, unsigned long long* val)
-{
-	unsigned short *tp;
-	unsigned short len;
-	tp = look_for_tag(tlc, tag);
-	if(tp) {
-		put_unaligned(TT_REMOVED, tp++);
-		len = get_unaligned(tp++);
-		/* check the data size.
-		 * actually it has to be long long, but I'm paranoid */
-		if (len == sizeof(int))
-			*val = get_unaligned((unsigned int*)tp);
-		else if (len == sizeof(long))
-			*val = get_unaligned((unsigned long *)tp);
-		else if (len == sizeof(long long))
-			*val = get_unaligned((unsigned long long *)tp);
-		else {
-			fprintf(stderr, "%s: unexpected tag len: %u\n",
-					__func__ , len);
-			return 0;
+		case 'w':
+			wait_after_split_brain = true;
+			break;
+
+		case 'D':
+			show_defaults = true;
 		}
-		return 1;
 	}
-	return 0;
-}
+	n_args = 1;
+	if (n_args + optind < argc) {
+		warn_print_excess_args(argc, argv, optind + n_args);
+		return 20;
+	}
 
-static int consume_tag_bit(enum drbd_tags tag, unsigned short *tlc, int* val)
-{
-	unsigned short *tp;
-	tp = look_for_tag(tlc,tag);
-	if(tp) {
-		put_unaligned(TT_REMOVED, tp++);
-		tp++;
-		*val = (int)(*(char *)tp);
-		return 1;
+	dump_argv(argc, argv, optind, 0);
+
+	/* otherwise we need to change handling/parsing
+	 * of expected replies */
+	ASSERT(cm->cmd_id == DRBD_ADM_GET_STATUS);
+
+	if (cm->wait_for_connect_timeouts) {
+		/* wait-connect, wait-sync */
+		int rr;
+
+		timeo_ctx.minor = minor;
+		timeo_ctx.smsg = smsg;
+		timeo_ctx.iov = &iov;
+		rr = choose_timeout(&timeo_ctx);
+		if (rr)
+			return rr;
+		if (timeo_ctx.timeout)
+			timeout_ms = timeo_ctx.timeout * 1000;
+
+		/* rewind send message buffer */
+		smsg->tail = smsg->data;
+	} else if (!cm->continuous_poll)
+		/* normal "get" request, or "show" */
+		timeout_ms = 120000;
+	/* else: events command, defaults to "infinity" */
+
+	if (cm->continuous_poll) {
+		if (genl_join_mc_group(drbd_sock, "events") &&
+		    !kernel_older_than(2, 6, 23)) {
+			fprintf(stderr, "unable to join drbd events multicast group\n");
+			return 20;
+		}
 	}
-	return 0;
-}
 
-static int generic_get_cmd(struct drbd_cmd *cm, unsigned minor, int argc,
-		    char **argv __attribute((unused)))
-{
-	char buffer[ 4096 ];
-	struct drbd_tag_list *tl;
-	struct drbd_nl_cfg_reply *reply;
-	int sk_nl,rv;
-	int ignore_minor_not_known;
-	int dummy;
-
-	if (argc > 1)
-		warn_print_excess_args(argc, argv, 1);
-
-	dump_argv(argc, argv, 1, 0);
-
-	tl = create_tag_list(2);
-	add_tag(tl,TT_END,NULL,0); // close the tag list
-
-	sk_nl = open_cn();
-	if(sk_nl < 0) return 20;
-
-	tl->drbd_p_header->packet_type = cm->packet_id;
-	tl->drbd_p_header->drbd_minor = minor;
-	tl->drbd_p_header->flags = 0;
-
-	memset(buffer,0,sizeof(buffer));
-	call_drbd(sk_nl,tl, (struct nlmsghdr*)buffer,4096,NL_TIME);
-
-	close_cn(sk_nl);
-	reply = (struct drbd_nl_cfg_reply *)
-		((struct cn_msg *)NLMSG_DATA(buffer))->data;
-
-	/* if there was an error, report and abort --
-	 * unless it was "this device is not there",
-	 * and command was "status" */
-	ignore_minor_not_known =
-		cm->gp.show_function == status_xml_scmd ||
-		cm->gp.show_function == sh_status_scmd;
-	if (reply->ret_code != NO_ERROR &&
-	   !(reply->ret_code == ERR_MINOR_INVALID && ignore_minor_not_known))
-		return print_config_error(reply->ret_code);
-
-	rv = cm->gp.show_function(cm,minor,reply->tag_list);
-
-	/* in case cm->packet_id == P_get_state, and the gp.show_function did
-	 * nothing with the sync_progress info, consume it here, so it won't
-	 * confuse users because it gets dumped below. */
-	consume_tag_int(T_sync_progress, reply->tag_list, &dummy);
-
-	if(dump_tag_list(reply->tag_list)) {
-		printf("# Found unknown tags, you should update your\n"
-		       "# userland tools\n");
+	flags = 0;
+	if (minor == -1U)
+		flags |= NLM_F_DUMP;
+	dhdr = genlmsg_put(smsg, &drbd_genl_family, flags, cm->cmd_id);
+	dhdr->minor = minor;
+	dhdr->flags = 0;
+	if (minor == -1U && strcmp(objname, "all")) {
+		/* Restrict the dump to a single resource. */
+		struct nlattr *nla;
+		nla = nla_nest_start(smsg, DRBD_NLA_CFG_CONTEXT);
+		nla_put_string(smsg, T_ctx_resource_name, objname);
+		nla_nest_end(smsg, nla);
+	}
+
+	if (genl_send(drbd_sock, smsg)) {
+		desc = "error sending config command";
+		rv = OTHER_ERROR;
+		goto out2;
+	}
+
+	/* disable sequence number check in genl_recv_msgs */
+	drbd_sock->s_seq_expect = 0;
+
+	for (;;) {
+		int received, rem;
+		struct nlmsghdr *nlh = (struct nlmsghdr *)iov.iov_base;
+		struct timeval before;
+
+		if (timeout_ms != -1)
+			gettimeofday(&before, NULL);
+
+		received = genl_recv_msgs(drbd_sock, &iov, &desc, timeout_ms);
+		if (received < 0) {
+			switch(received) {
+			case E_RCV_TIMEDOUT:
+				err = 5;
+				goto out2;
+			case -E_RCV_FAILED:
+				err = 20;
+				goto out2;
+			case -E_RCV_NO_SOURCE_ADDR:
+				continue; /* ignore invalid message */
+			case -E_RCV_SEQ_MISMATCH:
+				/* we disabled it, so it should not happen */
+				err = 20;
+				goto out2;
+			case -E_RCV_MSG_TRUNC:
+				continue;
+			case -E_RCV_UNEXPECTED_TYPE:
+				continue;
+			case -E_RCV_NLMSG_DONE:
+				if (cm->continuous_poll)
+					continue;
+				err = cm->show_function(cm, NULL);
+				if (err)
+					goto out2;
+				err = -*(int*)nlmsg_data(nlh);
+				if (err &&
+				    (err != ENODEV || !cm->missing_ok)) {
+					fprintf(stderr, "received netlink error reply: %s\n",
+						strerror(err));
+					err = 20;
+				}
+				goto out2;
+			case -E_RCV_ERROR_REPLY:
+				if (!errno) /* positive ACK message */
+					continue;
+				if (!desc)
+					desc = strerror(errno);
+				fprintf(stderr, "received netlink error reply: %s\n",
+					       desc);
+				err = 20;
+				goto out2;
+			default:
+				if (!desc)
+					desc = "error receiving config reply";
+				err = 20;
+				goto out2;
+			}
+		}
+
+		if (timeout_ms != -1) {
+			struct timeval after;
+
+			gettimeofday(&after, NULL);
+			timeout_ms -= (after.tv_sec - before.tv_sec) * 1000 +
+				      (after.tv_usec - before.tv_usec) / 1000;
+			if (timeout_ms <= 0) {
+				err = 5;
+				goto out2;
+			}
+		}
+
+		/* There may be multiple messages in one datagram (for dump replies). */
+		nlmsg_for_each_msg(nlh, nlh, received, rem) {
+			struct drbd_genlmsghdr *dh = genlmsg_data(nlmsg_data(nlh));
+			struct genl_info info = (struct genl_info){
+				.seq = nlh->nlmsg_seq,
+				.nlhdr = nlh,
+				.genlhdr = nlmsg_data(nlh),
+				.userhdr = genlmsg_data(nlmsg_data(nlh)),
+				.attrs = global_attrs,
+			};
+
+			/* parse early, otherwise drbd_cfg_context_from_attrs
+			 * can not work */
+			if (drbd_tla_parse(nlh)) {
+				/* FIXME
+				 * should continuous_poll continue?
+				 */
+				desc = "reply did not validate - "
+					"do you need to upgrade your useland tools?";
+				rv = OTHER_ERROR;
+				goto out2;
+			}
+			if (cm->continuous_poll) {
+				/*
+				 * We will receive all events and have to
+				 * filter for what we want ourself.
+				 */
+				/* FIXME
+				 * Do we want to ignore broadcasts until the
+				 * initial get/dump requests is done? */
+				if (minor != -1U) {
+					/* Assert that, for an unicast reply,
+					 * reply minor matches request minor.
+					 * "unsolicited" kernel broadcasts are "pid=0" (netlink "port id")
+					 * (and expected to be genlmsghdr.cmd == DRBD_EVENT) */
+					if (minor != dh->minor) {
+						if (info.nlhdr->nlmsg_pid != 0)
+							dbg(1, "received netlink packet for minor %u, while expecting %u\n",
+								dh->minor, minor);
+						continue;
+					}
+				} else if (strcmp(objname, "all")) {
+					struct drbd_cfg_context ctx =
+						{ .ctx_volume = -1U };
+
+					drbd_cfg_context_from_attrs(&ctx, &info);
+					if (ctx.ctx_volume == -1U ||
+					    strcmp(objname, ctx.ctx_resource_name))
+						continue;
+				}
+			}
+			rv = dh->ret_code;
+			if (rv == ERR_MINOR_INVALID && cm->missing_ok)
+				rv = NO_ERROR;
+			if (rv != NO_ERROR)
+				goto out2;
+			err = cm->show_function(cm, &info);
+			if (err) {
+				if (err < 0)
+					err = 0;
+				goto out2;
+			}
+		}
+		if (!cm->continuous_poll && !(flags & NLM_F_DUMP)) {
+			/* There will be no more reply packets.  */
+			err = cm->show_function(cm, NULL);
+			goto out2;
+		}
 	}
 
-	return rv;
+out2:
+	msg_free(smsg);
+
+out:
+	if (rv != NO_ERROR)
+		err = print_config_error(rv, desc);
+	free(iov.iov_base);
+	return err;
 }
 
 static char *af_to_str(int af)
@@ -1369,141 +1564,242 @@
 	|| addr->sa_family == get_af_ssocks(0)
 	|| addr->sa_family == AF_INET_SDP) {
 		addr4 = (struct sockaddr_in *)address;
-		printf("\taddress\t\t\t%s %s:%d;\n",
+		printI("address\t\t\t%s %s:%d;\n",
 		       af_to_str(addr4->sin_family),
 		       inet_ntoa(addr4->sin_addr),
 		       ntohs(addr4->sin_port));
 	} else if (addr->sa_family == AF_INET6) {
 		addr6 = (struct sockaddr_in6 *)address;
-		printf("\taddress\t\t\t%s [%s]:%d;\n",
+		printI("address\t\t\t%s [%s]:%d;\n",
 		       af_to_str(addr6->sin6_family),
 		       inet_ntop(addr6->sin6_family, &addr6->sin6_addr, buffer, INET6_ADDRSTRLEN),
 		       ntohs(addr6->sin6_port));
 	} else {
-		printf("\taddress\t\t\t[unknown af=%d, len=%d]\n", addr->sa_family, addr_len);
+		printI("address\t\t\t[unknown af=%d, len=%d]\n", addr->sa_family, addr_len);
 	}
 }
 
-static int show_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl)
+struct minors_list {
+	struct minors_list *next;
+	unsigned minor;
+};
+struct minors_list *__remembered_minors;
+
+static int remember_minor(struct drbd_cmd *cmd, struct genl_info *info)
 {
-	int idx = idx;
-	char *str = NULL, *backing_dev, *address;
-	unsigned int addr_len = 0;
-
-	// find all commands that have options and print those...
-	for ( cm = commands ; cm < commands + ARRY_SIZE(commands) ; cm++ ) {
-		if(cm->function == generic_config_cmd && cm->cp.options )
-			print_options(cm->cp.options, rtl, cm->cmd);
-	}
-
-	// start of spaghetti code...
-	if(consume_tag_int(T_wire_protocol,rtl,&idx))
-		printf("protocol %c;\n",'A'+idx-1);
-	backing_dev = address = NULL;
-	consume_tag_string(T_backing_dev,rtl,&backing_dev);
-	consume_tag_blob(T_my_addr, rtl, &address, &addr_len);
-	if(backing_dev || address) {
-		printf("_this_host {\n");
-		printf("\tdevice\t\t\tminor %d;\n",minor);
-		if(backing_dev) {
-			printf("\tdisk\t\t\t\"%s\";\n",backing_dev);
-			consume_tag_int(T_meta_dev_idx,rtl,&idx);
-			consume_tag_string(T_meta_dev,rtl,&str);
-			switch(idx) {
-			case DRBD_MD_INDEX_INTERNAL:
-			case DRBD_MD_INDEX_FLEX_INT:
-				printf("\tmeta-disk\t\tinternal;\n");
-				break;
-			case DRBD_MD_INDEX_FLEX_EXT:
-				printf("\tflexible-meta-disk\t\"%s\";\n",str);
-				break;
-			default:
-				printf("\tmeta-disk\t\t\"%s\" [ %d ];\n",str,
-				       idx);
-			 }
-		}
-		if(address)
-			show_address(address, addr_len);
-		printf("}\n");
-	}
-
-	if(consume_tag_blob(T_peer_addr, rtl, &address, &addr_len)) {
-		printf("_remote_host {\n");
-		show_address(address, addr_len);
-		printf("}\n");
+	struct drbd_cfg_context cfg = { .ctx_volume = -1U };
+
+	if (!info)
+		return 0;
+
+	drbd_cfg_context_from_attrs(&cfg, info);
+	if (cfg.ctx_volume != -1U) {
+		unsigned minor = ((struct drbd_genlmsghdr*)(info->userhdr))->minor;
+		struct minors_list *m = malloc(sizeof(*m));
+		m->next = __remembered_minors;
+		m->minor = minor;
+		__remembered_minors = m;
+	}
+	return 0;
+}
+
+static void free_minors(struct minors_list *minors)
+{
+	while (minors) {
+		struct minors_list *m = minors;
+		minors = minors->next;
+		free(m);
+	}
+}
+
+/*
+ * Expects objname to be set to the resource name or "all".
+ */
+static struct minors_list *enumerate_minors(void)
+{
+	struct drbd_cmd cmd = {
+		.cmd_id = DRBD_ADM_GET_STATUS,
+		.show_function = remember_minor,
+		.missing_ok = true,
+	};
+	struct minors_list *m;
+	int err;
+
+	err = generic_get_cmd(&cmd, 0, NULL);
+	m = __remembered_minors;
+	__remembered_minors = NULL;
+	if (err) {
+		free_minors(m);
+		m = NULL;
+	}
+	return m;
+}
+
+/* may be called for a "show" of a single minor device.
+ * prints all available configuration information in that case.
+ *
+ * may also be called iteratively for a "show-all", which should try to not
+ * print redundant configuration information for the same resource (tconn).
+ */
+static int show_scmd(struct drbd_cmd *cm, struct genl_info *info)
+{
+	/* FIXME need some define for max len here */
+	static char last_ctx_resource_name[128];
+	static int call_count;
+
+	struct drbd_cfg_context cfg = { .ctx_volume = -1U };
+	struct disk_conf dc = { .disk_size = 0, };
+	struct net_conf nc = { .timeout = 0, };;
+
+	if (!info) {
+		if (call_count) {
+			--indent;
+			printI("}\n"); /* close _this_host */
+			--indent;
+			printI("}\n"); /* close resource */
+		}
+		fflush(stdout);
+		return 0;
+	}
+	call_count++;
+
+	/* FIXME: Is the folowing check needed? */
+	if (!global_attrs[DRBD_NLA_CFG_CONTEXT])
+		dbg(1, "unexpected packet, configuration context missing!\n");
+
+	drbd_cfg_context_from_attrs(&cfg, info);
+	disk_conf_from_attrs(&dc, info);
+	net_conf_from_attrs(&nc, info);
+
+	if (strncmp(last_ctx_resource_name, cfg.ctx_resource_name, sizeof(last_ctx_resource_name))) {
+		if (strncmp(last_ctx_resource_name, "", sizeof(last_ctx_resource_name))) {
+			--indent;
+			printI("}\n"); /* close _this_host */
+			--indent;
+			printI("}\n\n");
+		}
+		strncpy(last_ctx_resource_name, cfg.ctx_resource_name, sizeof(last_ctx_resource_name));
+
+		printI("resource %s {\n", cfg.ctx_resource_name);
+		++indent;
+		print_options("resource-options", "options");
+		print_options("net-options", "net");
+
+		if (cfg.ctx_peer_addr_len) {
+			printI("_remote_host {\n");
+			++indent;
+			show_address(cfg.ctx_peer_addr, cfg.ctx_peer_addr_len);
+			--indent;
+			printI("}\n");
+		}
+		printI("_this_host {\n");
+		++indent;
+		if (cfg.ctx_my_addr_len)
+			show_address(cfg.ctx_my_addr, cfg.ctx_my_addr_len);
+	}
+
+	if (cfg.ctx_volume != -1U) {
+		unsigned minor = ((struct drbd_genlmsghdr*)(info->userhdr))->minor;
+		printI("volume %d {\n", cfg.ctx_volume);
+		++indent;
+		printI("device\t\t\tminor %d;\n", minor);
+		if (global_attrs[DRBD_NLA_DISK_CONF]) {
+			if (dc.backing_dev[0]) {
+				printI("disk\t\t\t\"%s\";\n", dc.backing_dev);
+				printI("meta-disk\t\t\t");
+				switch(dc.meta_dev_idx) {
+				case DRBD_MD_INDEX_INTERNAL:
+				case DRBD_MD_INDEX_FLEX_INT:
+					printf("internal;\n");
+					break;
+				case DRBD_MD_INDEX_FLEX_EXT:
+					printf("%s;\n",
+					       double_quote_string(dc.meta_dev));
+					break;
+				default:
+					printf("%s [ %d ];\n",
+					       double_quote_string(dc.meta_dev),
+					       dc.meta_dev_idx);
+				 }
+			}
+		}
+		print_options("attach", "disk");
+		--indent;
+		printI("}\n"); /* close volume */
 	}
-	consume_tag_bit(T_mind_af, rtl, &idx); /* consume it, its value has no relevance */
-	consume_tag_bit(T_auto_sndbuf_size, rtl, &idx); /* consume it, its value has no relevance */
 
 	return 0;
 }
 
-static int status_xml_scmd(struct drbd_cmd *cm __attribute((unused)),
-		unsigned minor, unsigned short *rtl)
+static int lk_bdev_scmd(struct drbd_cmd *cm, struct genl_info *info)
 {
-	union drbd_state state = { .i = 0 };
-	int synced = 0;
+	unsigned minor;
+	struct disk_conf dc = { .disk_size = 0, };
+	struct bdev_info bd = { 0, };
+	uint64_t bd_size;
+	int fd;
 
-	if (!consume_tag_int(T_state_i,rtl,(int*)&state.i)) {
-		printf( "<!-- resource minor=\"%u\"", minor);
-		if (resname)
-			printf(" name=\"%s\"", resname);
-		printf(" not available or not yet created -->\n");
+	if (!info)
 		return 0;
+
+	minor = ((struct drbd_genlmsghdr*)(info->userhdr))->minor;
+	disk_conf_from_attrs(&dc, info);
+	if (!dc.backing_dev) {
+		fprintf(stderr, "Has no disk config, try with drbdmeta.\n");
+		return 1;
 	}
-	printf("<resource minor=\"%u\"", minor);
-	if (resname)
-		printf(" name=\"%s\"", resname);
 
-	if (state.conn == C_STANDALONE && state.disk == D_DISKLESS) {
-		printf(" cs=\"Unconfigured\" />\n");
+	if (dc.meta_dev_idx >= 0 || dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_EXT) {
+		lk_bdev_delete(minor);
 		return 0;
 	}
 
-	printf( /* connection state */
-		" cs=\"%s\""
-		/* role */
-		" ro1=\"%s\" ro2=\"%s\""
-		/* disk state */
-		" ds1=\"%s\" ds2=\"%s\"",
-	       drbd_conn_str(state.conn),
-	       drbd_role_str(state.role),
-	       drbd_role_str(state.peer),
-	       drbd_disk_str(state.disk),
-	       drbd_disk_str(state.pdsk));
-
-	/* io suspended ? */
-	if (state.susp)
-		printf(" suspended");
-	/* reason why sync is paused */
-	if (state.aftr_isp)
-		printf(" aftr_isp");
-	if (state.peer_isp)
-		printf(" peer_isp");
-	if (state.user_isp)
-		printf(" user_isp");
+	fd = open(dc.backing_dev, O_RDONLY);
+	if (fd == -1) {
+		fprintf(stderr, "Could not open %s: %m.\n", dc.backing_dev);
+		return 1;
+	}
+	bd_size = bdev_size(fd);
+	close(fd);
 
-	if (consume_tag_int(T_sync_progress, rtl, &synced))
-		printf(" resynced_percent=\"%i.%i\"", synced / 10, synced % 10);
+	if (lk_bdev_load(minor, &bd) == 0 &&
+	    bd.bd_size == bd_size &&
+	    bd.bd_name && !strcmp(bd.bd_name, dc.backing_dev))
+		return 0;	/* nothing changed. */
+
+	bd.bd_size = bd_size;
+	bd.bd_name = dc.backing_dev;
+	lk_bdev_save(minor, &bd);
 
-	printf(" />\n");
 	return 0;
 }
 
 static int sh_status_scmd(struct drbd_cmd *cm __attribute((unused)),
-		unsigned minor, unsigned short *rtl)
+		struct genl_info *info)
 {
+	unsigned minor;
+	struct drbd_cfg_context cfg = { .ctx_volume = -1U };
+	struct state_info si = { .current_state = 0, };
+	union drbd_state state;
+	int available = 0;
+
+	if (!info)
+		return 0;
+
+	minor = ((struct drbd_genlmsghdr*)(info->userhdr))->minor;
 /* variable prefix; maybe rather make that a command line parameter?
  * or use "drbd_sh_status"? */
 #define _P ""
-	union drbd_state state = { .i = 0 };
-	int available = 0;
-	int synced = 0;
-
 	printf("%s_minor=%u\n", _P, minor);
-	printf("%s_res_name=%s\n", _P, shell_escape(resname ?: "UNKNOWN"));
 
-	available = consume_tag_int(T_state_i,rtl,(int*)&state.i);
+	drbd_cfg_context_from_attrs(&cfg, info);
+	if (cfg.ctx_resource_name)
+		printf("%s_res_name=%s\n", _P, shell_escape(cfg.ctx_resource_name));
+	printf("%s_volume=%d\n", _P, cfg.ctx_volume);
+
+	if (state_info_from_attrs(&si, info) == 0)
+		available = 1;
+	state.i = si.current_state;
 
 	if (state.conn == C_STANDALONE && state.disk == D_DISKLESS) {
 		printf("%s_known=%s\n\n", _P,
@@ -1513,7 +1809,7 @@
 		printf("%s_role=\n", _P);
 		printf("%s_peer=\n", _P);
 		printf("%s_disk=\n", _P);
-		printf("%s_pdisk=\n", _P);
+		printf("%s_pdsk=\n", _P);
 		printf("%s_flags_susp=\n", _P);
 		printf("%s_flags_aftr_isp=\n", _P);
 		printf("%s_flags_peer_isp=\n", _P);
@@ -1545,9 +1841,16 @@
 
 		printf("%s_resynced_percent=", _P);
 
-		if (consume_tag_int(T_sync_progress, rtl, &synced))
+		if (ntb(T_bits_rs_total)) {
+			uint32_t shift = si.bits_rs_total >= (1ULL << 32) ? 16 : 10;
+			uint64_t left = (si.bits_oos - si.bits_rs_failed) >> shift;
+			uint64_t total = 1UL + (si.bits_rs_total >> shift);
+			uint64_t tmp = 1000UL - left * 1000UL/total;
+
+			unsigned synced = tmp;
 			printf("%i.%i\n", synced / 10, synced % 10);
-		else
+			/* what else? everything available! */
+		} else
 			printf("\n");
 	}
 	printf("\n%s_sh_status_process\n\n\n", _P);
@@ -1558,13 +1861,28 @@
 }
 
 static int role_scmd(struct drbd_cmd *cm __attribute((unused)),
-	       unsigned minor __attribute((unused)),
-	       unsigned short *rtl)
+		struct genl_info *info)
 {
 	union drbd_state state = { .i = 0 };
-	consume_tag_int(T_state_i,rtl,(int*)&state.i);
-	if ( state.conn == C_STANDALONE &&
-	     state.disk == D_DISKLESS) {
+
+	if (!strcmp(cm->cmd, "state")) {
+		fprintf(stderr, "'%s ... state' is deprecated, use '%s ... role' instead.\n",
+			cmdname, cmdname);
+	}
+
+	if (!info)
+		return 0;
+
+	if (global_attrs[DRBD_NLA_STATE_INFO]) {
+		drbd_nla_parse_nested(nested_attr_tb,
+				      ARRAY_SIZE(state_info_nl_policy) - 1,
+				      global_attrs[DRBD_NLA_STATE_INFO],
+				      state_info_nl_policy);
+		if (ntb(T_current_state))
+			state.i = nla_get_u32(ntb(T_current_state));
+	}
+	if (state.conn == C_STANDALONE &&
+	    state.disk == D_DISKLESS) {
 		printf("Unconfigured\n");
 	} else {
 		printf("%s/%s\n",drbd_role_str(state.role),drbd_role_str(state.peer));
@@ -1573,13 +1891,23 @@
 }
 
 static int cstate_scmd(struct drbd_cmd *cm __attribute((unused)),
-		unsigned minor __attribute((unused)),
-		unsigned short *rtl)
+		struct genl_info *info)
 {
 	union drbd_state state = { .i = 0 };
-	consume_tag_int(T_state_i,rtl,(int*)&state.i);
-	if ( state.conn == C_STANDALONE &&
-	     state.disk == D_DISKLESS) {
+
+	if (!info)
+		return 0;
+
+	if (global_attrs[DRBD_NLA_STATE_INFO]) {
+		drbd_nla_parse_nested(nested_attr_tb,
+				      ARRAY_SIZE(state_info_nl_policy) - 1,
+				      global_attrs[DRBD_NLA_STATE_INFO],
+				      state_info_nl_policy);
+		if (ntb(T_current_state))
+			state.i = nla_get_u32(ntb(T_current_state));
+	}
+	if (state.conn == C_STANDALONE &&
+	    state.disk == D_DISKLESS) {
 		printf("Unconfigured\n");
 	} else {
 		printf("%s\n",drbd_conn_str(state.conn));
@@ -1588,11 +1916,21 @@
 }
 
 static int dstate_scmd(struct drbd_cmd *cm __attribute((unused)),
-		unsigned minor __attribute((unused)),
-		unsigned short *rtl)
+		struct genl_info *info)
 {
 	union drbd_state state = { .i = 0 };
-	consume_tag_int(T_state_i,rtl,(int*)&state.i);
+
+	if (!info)
+		return 0;
+
+	if (global_attrs[DRBD_NLA_STATE_INFO]) {
+		drbd_nla_parse_nested(nested_attr_tb,
+				      ARRAY_SIZE(state_info_nl_policy)-1,
+				      global_attrs[DRBD_NLA_STATE_INFO],
+				      state_info_nl_policy);
+		if (ntb(T_current_state))
+			state.i = nla_get_u32(ntb(T_current_state));
+	}
 	if ( state.conn == C_STANDALONE &&
 	     state.disk == D_DISKLESS) {
 		printf("Unconfigured\n");
@@ -1603,20 +1941,43 @@
 }
 
 static int uuids_scmd(struct drbd_cmd *cm,
-	       unsigned minor __attribute((unused)),
-	       unsigned short *rtl)
+		struct genl_info *info)
 {
-	uint64_t *uuids;
+	union drbd_state state = { .i = 0 };
+	uint64_t ed_uuid;
+	uint64_t *uuids = NULL;
 	int flags = flags;
-	unsigned int len;
 
-	if(!consume_tag_blob(T_uuids,rtl,(char **) &uuids,&len)) {
-		fprintf(stderr,"Reply payload did not carry an uuid-tag,\n"
-			"Probably the device has no disk!\n");
+	if (!info)
+		return 0;
+
+	if (global_attrs[DRBD_NLA_STATE_INFO]) {
+		drbd_nla_parse_nested(nested_attr_tb,
+				      ARRAY_SIZE(state_info_nl_policy)-1,
+				      global_attrs[DRBD_NLA_STATE_INFO],
+				      state_info_nl_policy);
+		if (ntb(T_current_state))
+			state.i = nla_get_u32(ntb(T_current_state));
+		if (ntb(T_uuids))
+			uuids = nla_data(ntb(T_uuids));
+		if (ntb(T_disk_flags))
+			flags = nla_get_u32(ntb(T_disk_flags));
+		if (ntb(T_ed_uuid))
+			ed_uuid = nla_get_u64(ntb(T_ed_uuid));
+	}
+	if (state.conn == C_STANDALONE &&
+	    state.disk == D_DISKLESS) {
+		fprintf(stderr, "Device is unconfigured\n");
 		return 1;
 	}
-	consume_tag_int(T_uuids_flags,rtl,&flags);
-	if( len == UI_SIZE * sizeof(uint64_t)) {
+	if (state.disk == D_DISKLESS) {
+		/* XXX we could print the ed_uuid anyways: */
+		if (0)
+			printf(X64(016)"\n", ed_uuid);
+		fprintf(stderr, "Device has no disk\n");
+		return 1;
+	}
+	if (uuids) {
 		if(!strcmp(cm->cmd,"show-gi")) {
 			dt_pretty_print_uuids(uuids,flags);
 		} else if(!strcmp(cm->cmd,"get-gi")) {
@@ -1625,657 +1986,381 @@
 			ASSERT( 0 );
 		}
 	} else {
-		fprintf(stderr, "Unexpected length of T_uuids tag. "
-			"You should upgrade your userland tools\n");
+		fprintf(stderr, "No uuids found in reply!\n"
+			"Maybe you need to upgrade your userland tools?\n");
 	}
 	return 0;
 }
 
-static struct drbd_cmd *find_cmd_by_name(char *name)
-{
-	unsigned int i;
-
-	if (!strcmp(name, "state")) {
-		fprintf(stderr, "'%s ... state' is deprecated, use '%s ... role' instead.\n",
-			cmdname, cmdname);
-		name = "role";
-	}
-
-	for (i = 0; i < ARRY_SIZE(commands); i++) {
-		if (!strcmp(name, commands[i].cmd)) {
-			return commands + i;
-		}
-	}
-	return NULL;
-}
-
-static int down_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+static int down_cmd(struct drbd_cmd *cm, int argc, char **argv)
 {
+	struct minors_list *minors, *m;
 	int rv;
 	int success;
 
-	if(argc > 1) {
-		fprintf(stderr,"Ignoring excess arguments\n");
+	if(argc > 2) {
+		warn_print_excess_args(argc, argv, 2);
+		return OTHER_ERROR;
 	}
 
-	cm = find_cmd_by_name("secondary");
-	rv = _generic_config_cmd(cm, minor, argc, argv); // No error messages
-	if (rv == ERR_MINOR_INVALID)
-		return 0;
+	minors = enumerate_minors();
+	rv = _generic_config_cmd(cm, argc, argv, 1);
 	success = (rv >= SS_SUCCESS && rv < ERR_CODE_BASE) || rv == NO_ERROR;
-	if (!success)
-		return print_config_error(rv);
-	cm = find_cmd_by_name("disconnect");
-	cm->function(cm,minor,argc,argv);
-	cm = find_cmd_by_name("detach");
-	return cm->function(cm,minor,argc,argv);
-}
-
-
-static void print_digest(const char* label, const int len, const unsigned char *hash)
-{
-	int i;
-	printf("\t%s: ", label);
-	for (i = 0; i < len; i++)
-		printf("%02x",hash[i]);
-	printf("\n");
-}
-
-static char printable_or_dot(char c)
-{
-	return (' ' < c && c <= '~') ? c : '.';
-}
-
-static void print_hex_line(int offset, unsigned char *data)
-{
-
-	printf(	" %04x:"
-		" %02x %02x %02x %02x %02x %02x %02x %02x "
-		" %02x %02x %02x %02x %02x %02x %02x %02x"
-		"  %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c\n",
-		offset,
-		data[0], data[1], data[2], data[3],
-		data[4], data[5], data[6], data[7],
-		data[8], data[9], data[10], data[11],
-		data[12], data[13], data[14], data[15],
-		printable_or_dot(data[0]), printable_or_dot(data[1]),
-		printable_or_dot(data[2]), printable_or_dot(data[3]),
-		printable_or_dot(data[4]), printable_or_dot(data[5]),
-		printable_or_dot(data[6]), printable_or_dot(data[7]),
-		printable_or_dot(data[8]), printable_or_dot(data[9]),
-		printable_or_dot(data[10]), printable_or_dot(data[11]),
-		printable_or_dot(data[12]), printable_or_dot(data[13]),
-		printable_or_dot(data[14]), printable_or_dot(data[15]));
-}
-
-/* successive identical lines are collapsed into just printing one star */
-static void print_hex_dump(int len, void *data)
-{
-	int i;
-	int star = 0;
-	for (i = 0; i < len-15; i += 16) {
-		if (i == 0 || memcmp(data + i, data + i - 16, 16)) {
-			print_hex_line(i, data + i);
-			star = 0;
-		} else if (!star)  {
-			printf(" *\n");
-			star = 1;
-		}
-	}
-	/* yes, I ignore remainders of len not modulo 16 here.
-	 * so what, usage is currently to dump bios, which are
-	 * multiple of 512. */
-	/* for good measure, print the total size as offset now,
-	 * last line may have been a '*' */
-	printf(" %04x.\n", len);
-}
-
-static void print_dump_ee(struct drbd_nl_cfg_reply *reply)
-{
-	unsigned long long sector = -1ULL;
-	unsigned long long block_id = 0;
-	char *reason = "UNKNOWN REASON";
-	char *dig_in = NULL;
-	char *dig_vv = NULL;
-	unsigned int dgs_in = 0, dgs_vv = 0;
-	unsigned int size = 0;
-	char *data = NULL;
-
-	if (!consume_tag_string(T_dump_ee_reason, reply->tag_list, &reason))
-		printf("\tno reason?\n");
-	if (!consume_tag_blob(T_seen_digest, reply->tag_list, &dig_in, &dgs_in))
-		printf("\tno digest in?\n");
-	if (!consume_tag_blob(T_calc_digest, reply->tag_list, &dig_vv, &dgs_vv))
-		printf("\tno digest out?\n");
-	if (!consume_tag_u64(T_ee_sector, reply->tag_list, &sector))
-		printf("\tno sector?\n");
-	if (!consume_tag_u64(T_ee_block_id, reply->tag_list, &block_id))
-		printf("\tno block_id?\n");
-	if (!consume_tag_blob(T_ee_data, reply->tag_list, &data, &size))
-		printf("\tno data?\n");
-
-	printf("\tdumping ee, reason: %s\n", reason);
-	printf("\tsector: %llu block_id: 0x%llx size: %u\n",
-			sector, block_id, size);
-	
-	/* "input sanitation". Did I mention yet that I'm paranoid? */
-	if (!data) size = 0;
-	if (!dig_in) dgs_in = 0;
-	if (!dig_vv) dgs_vv = 0;
-	if (dgs_in > SHARED_SECRET_MAX) dgs_in = SHARED_SECRET_MAX;
-	if (dgs_vv > SHARED_SECRET_MAX) dgs_vv = SHARED_SECRET_MAX;
-
-	print_digest("received digest", dgs_in, (unsigned char*)dig_in);
-	print_digest("verified digest", dgs_vv, (unsigned char*)dig_vv);
-
-	/* dump at most 32 K */
-	if (size > 0x8000) {
-		size = 0x8000;
-		printf("\tWARNING truncating data to %u!\n", 0x8000);
+	if (success) {
+		for (m = minors; m; m = m->next)
+			unregister_minor(m->minor);
+		free_minors(minors);
+		unregister_resource(objname);
+	} else {
+		free_minors(minors);
+		return print_config_error(rv, NULL);
 	}
-	print_hex_dump(size,data);
+	return 0;
 }
 
-static int print_broadcast_events(unsigned int seq, int u __attribute((unused)),
-			   struct drbd_nl_cfg_reply *reply)
-{
-	union drbd_state state;
-	char* str;
-	int synced = 0;
+/* printf format for minor, resource name, volume */
+#define MNV_FMT	"%d,%s[%d]"
+static void print_state(char *tag, unsigned seq, unsigned minor,
+		const char *resource_name, unsigned vnr, __u32 state_i)
+{
+	union drbd_state s = { .i = state_i };
+	printf("%u %s " MNV_FMT " { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
+	       seq,
+	       tag,
+	       minor, resource_name, vnr,
+	       drbd_conn_str(s.conn),
+	       drbd_role_str(s.role),
+	       drbd_role_str(s.peer),
+	       drbd_disk_str(s.disk),
+	       drbd_disk_str(s.pdsk),
+	       s.susp ? 's' : 'r',
+	       s.aftr_isp ? 'a' : '-',
+	       s.peer_isp ? 'p' : '-',
+	       s.user_isp ? 'u' : '-' );
+}
+
+static int print_broadcast_events(struct drbd_cmd *cm, struct genl_info *info)
+{
+	struct drbd_cfg_context cfg = { .ctx_volume = -1U };
+	struct state_info si = { .current_state = 0 };
+	struct disk_conf dc = { .disk_size = 0, };
+	struct net_conf nc = { .timeout = 0, };
+	struct drbd_genlmsghdr *dh;
 
-	/* Ignore error replies */
-	if (reply->ret_code != NO_ERROR)
-		return 1;
+	/* End of initial dump. Ignore. Maybe: print some marker? */
+	if (!info)
+		return 0;
 
-	switch (reply->packet_type) {
-	case P_get_state:
-		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
-			printf("%u ST %d { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
-			       seq,
-			       reply->minor,
-			       drbd_conn_str(state.conn),
-			       drbd_role_str(state.role),
-			       drbd_role_str(state.peer),
-			       drbd_disk_str(state.disk),
-			       drbd_disk_str(state.pdsk),
-			       state.susp ? 's' : 'r',
-			       state.aftr_isp ? 'a' : '-',
-			       state.peer_isp ? 'p' : '-',
-			       state.user_isp ? 'u' : '-' );
-		} else fprintf(stderr,"Missing tag !?\n");
+	dh = info->userhdr;
+	if (dh->ret_code == ERR_MINOR_INVALID && cm->missing_ok)
+		return 0;
+
+	if (drbd_cfg_context_from_attrs(&cfg, info)) {
+		dbg(1, "unexpected packet, configuration context missing!\n");
+		/* keep running anyways. */
+		struct nlattr *nla = NULL;
+		if (info->attrs[DRBD_NLA_CFG_REPLY])
+			nla = drbd_nla_find_nested(ARRAY_SIZE(drbd_cfg_reply_nl_policy) - 1,
+						   info->attrs[DRBD_NLA_CFG_REPLY], T_info_text);
+		if (nla) {
+			char *txt = nla_data(nla);
+			char *c;
+			for (c = txt; *c; c++)
+				if (*c == '\n')
+					*c = '_';
+			printf("%u # %s\n", info->seq, txt);
+		}
+		goto out;
+	}
+
+	if (state_info_from_attrs(&si, info)) {
+		/* this is a DRBD_ADM_GET_STATUS reply
+		 * with information about a resource without any volumes */
+		printf("%u R - %s\n", info->seq, cfg.ctx_resource_name);
+		goto out;
+	}
+
+	disk_conf_from_attrs(&dc, info);
+	net_conf_from_attrs(&nc, info);
+
+	switch (si.sib_reason) {
+	case SIB_STATE_CHANGE:
+		print_state("ST-prev", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.prev_state);
+		print_state("ST-new", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.new_state);
+		/* fall through */
+	case SIB_GET_STATUS_REPLY:
+		print_state("ST", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.current_state);
 		break;
-	case P_call_helper:
-		if(consume_tag_string(T_helper,reply->tag_list,&str)) {
-			printf("%u UH %d %s\n", seq, reply->minor, str);
-		} else fprintf(stderr,"Missing tag !?\n");
+	case SIB_HELPER_PRE:
+		printf("%u UH " MNV_FMT " %s\n", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.helper);
 		break;
-	case P_sync_progress:
-		if (consume_tag_int(T_sync_progress, reply->tag_list, &synced)) {
-			printf("%u SP %d %i.%i\n",
-				seq,
-				reply->minor,
-				synced / 10,
-				synced % 10);
-		} else fprintf(stderr,"Missing tag !?\n");
+	case SIB_HELPER_POST:
+		printf("%u UH-post " MNV_FMT " %s 0x%04x\n", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.helper, si.helper_exit_code);
 		break;
-	case P_dump_ee:
-		printf("%u DE %d\n", seq, reply->minor);
-		print_dump_ee(reply);
+	case SIB_SYNC_PROGRESS:
+		{
+		uint32_t shift = si.bits_rs_total >= (1ULL << 32) ? 16 : 10;
+		uint64_t left = (si.bits_oos - si.bits_rs_failed) >> shift;
+		uint64_t total = 1UL + (si.bits_rs_total >> shift);
+		uint64_t tmp = 1000UL - left * 1000UL/total;
+
+		unsigned synced = tmp;
+		printf("%u SP " MNV_FMT " %i.%i\n", info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				synced / 10, synced % 10);
+		}
 		break;
 	default:
-		printf("%u ?? %d <other message>\n",seq, reply->minor);
+		/* we could add the si.reason */
+		printf("%u ?? " MNV_FMT " <other message, state info broadcast reason:%u>\n",
+				info->seq,
+				dh->minor, cfg.ctx_resource_name, cfg.ctx_volume,
+				si.sib_reason);
 		break;
 	}
-
+out:
 	fflush(stdout);
 
-	return 1;
-}
-
-static int w_connected_state(unsigned int seq __attribute((unused)),
-		      int wait_after_sb,
-		      struct drbd_nl_cfg_reply *reply)
-{
-	union drbd_state state;
-
-	if(reply->packet_type == P_get_state) {
-		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
-			if(state.conn >= C_CONNECTED) return 0;
-			if(!wait_after_sb && state.conn < C_UNCONNECTED) return 0;
-		} else fprintf(stderr,"Missing tag !?\n");
-	}
-
-	return 1;
+	return 0;
 }
 
-static int w_synced_state(unsigned int seq __attribute((unused)),
-		   int wait_after_sb,
-		   struct drbd_nl_cfg_reply *reply)
+static int w_connected_state(struct drbd_cmd *cm, struct genl_info *info)
 {
+	struct state_info si = { .current_state = 0 };
 	union drbd_state state;
 
-	if(reply->packet_type == P_get_state) {
-		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
-			if(state.conn == C_CONNECTED) return 0;
-			if(!wait_after_sb && state.conn < C_UNCONNECTED) return 0;
-		} else fprintf(stderr,"Missing tag !?\n");
-	}
-	return 1;
-}
-
-static int events_cmd(struct drbd_cmd *cm, unsigned minor, int argc ,char **argv)
-{
-	void *buffer;
-	struct cn_msg *cn_reply;
-	struct drbd_nl_cfg_reply *reply;
-	struct drbd_tag_list *tl;
-	struct option *lo;
-	unsigned int b_seq=0, r_seq=0;
-	int sk_nl,c,cont=1,rr = rr,i,last;
-	int unfiltered=0, all_devices=0, timeout_ms=0;
-	int wfc_timeout=DRBD_WFC_TIMEOUT_DEF;
-	int degr_wfc_timeout=DRBD_DEGR_WFC_TIMEOUT_DEF;
-	int outdated_wfc_timeout=DRBD_OUTDATED_WFC_TIMEOUT_DEF;
-	struct timeval before,after;
-	int wasb=0;
-
-	lo = cm->ep.options;
-
-	while( (c=getopt_long(argc,argv,make_optstring(lo,':'),lo,0)) != -1 ) {
-		switch(c) {
-		default:
-		case '?':
-			warn_unrecognized_option(argv);
-			return 20;
-		case ':':
-			warn_missing_required_arg(argv);
-			return 20;
-		case 'u': unfiltered=1; break;
-		case 'a': all_devices=1; break;
-		case 't':
-			wfc_timeout=m_strtoll(optarg,1);
-			if(DRBD_WFC_TIMEOUT_MIN > wfc_timeout ||
-			   wfc_timeout > DRBD_WFC_TIMEOUT_MAX) {
-				fprintf(stderr, "wfc_timeout => %d"
-					" out of range [%d..%d]\n",
-					wfc_timeout, DRBD_WFC_TIMEOUT_MIN,
-					DRBD_WFC_TIMEOUT_MAX);
-				return 20;
-			}
-			break;
-		case 'd':
-			degr_wfc_timeout=m_strtoll(optarg,1);
-			if(DRBD_DEGR_WFC_TIMEOUT_MIN > degr_wfc_timeout ||
-			   degr_wfc_timeout > DRBD_DEGR_WFC_TIMEOUT_MAX) {
-				fprintf(stderr, "degr_wfc_timeout => %d"
-					" out of range [%d..%d]\n",
-					degr_wfc_timeout, DRBD_DEGR_WFC_TIMEOUT_MIN,
-					DRBD_DEGR_WFC_TIMEOUT_MAX);
-				return 20;
-			}
-			break;
-		case 'o':
-			outdated_wfc_timeout=m_strtoll(optarg,1);
-			if(DRBD_OUTDATED_WFC_TIMEOUT_MIN > degr_wfc_timeout ||
-			   degr_wfc_timeout > DRBD_OUTDATED_WFC_TIMEOUT_MAX) {
-				fprintf(stderr, "degr_wfc_timeout => %d"
-					" out of range [%d..%d]\n",
-					outdated_wfc_timeout, DRBD_OUTDATED_WFC_TIMEOUT_MIN,
-					DRBD_OUTDATED_WFC_TIMEOUT_MAX);
-				return 20;
-			}
-			break;
-
-		case 'w':
-			wasb=1;
-			break;
-		}
-	}
-
-	if (optind < argc)
-		warn_print_excess_args(argc, argv, optind);
-
-	dump_argv(argc, argv, optind, 0);
-
-	tl = create_tag_list(2);
-	add_tag(tl,TT_END,NULL,0); // close the tag list
-
-	sk_nl = open_cn();
-	if(sk_nl < 0) return 20;
-
-	// Find out which timeout value to use.
-	tl->drbd_p_header->packet_type = P_get_timeout_flag;
-	tl->drbd_p_header->drbd_minor = minor;
-	tl->drbd_p_header->flags = 0;
-
-	/* allocate 64k to be on the safe side. */
-#define NL_BUFFER_SIZE (64 << 10)
-	buffer = malloc(NL_BUFFER_SIZE);
-	if (!buffer) {
-		fprintf(stderr, "could not allocate buffer of %u bytes\n", NL_BUFFER_SIZE);
-		exit(20);
-	}
-
-	call_drbd(sk_nl,tl, buffer, NL_BUFFER_SIZE, NL_TIME);
-
-	cn_reply = (struct cn_msg *)NLMSG_DATA(buffer);
-	reply = (struct drbd_nl_cfg_reply *)cn_reply->data;
-	consume_tag_bit(T_use_degraded,reply->tag_list,&rr);
-	if (rr != UT_DEFAULT) {
-		if (0 < wfc_timeout &&
-		      (wfc_timeout < degr_wfc_timeout || degr_wfc_timeout == 0)) {
-			degr_wfc_timeout = wfc_timeout;
-			fprintf(stderr, "degr-wfc-timeout has to be shorter than wfc-timeout\n"
-					"degr-wfc-timeout implicitly set to wfc-timeout (%ds)\n",
-					degr_wfc_timeout);
-		}
-
-		if (0 < degr_wfc_timeout &&
-		    (degr_wfc_timeout < outdated_wfc_timeout || outdated_wfc_timeout == 0)) {
-			outdated_wfc_timeout = wfc_timeout;
-			fprintf(stderr, "outdated-wfc-timeout has to be shorter than degr-wfc-timeout\n"
-					"outdated-wfc-timeout implicitly set to degr-wfc-timeout (%ds)\n",
-					degr_wfc_timeout);
-		}
+	if (!info)
+		return 0;
 
-	}
+	if (!global_attrs[DRBD_NLA_STATE_INFO])
+		return 0;
 
-	switch (rr) {
-	case UT_DEFAULT:
-		timeout_ms = wfc_timeout;
-		break;
-	case UT_DEGRADED:
-		timeout_ms = degr_wfc_timeout;
-		break;
-	case UT_PEER_OUTDATED:
-		timeout_ms = outdated_wfc_timeout;
-		break;
+	if (state_info_from_attrs(&si, info)) {
+		fprintf(stderr,"nla_policy violation!?\n");
+		return 0;
 	}
-	timeout_ms = timeout_ms * 1000 - 1; /* 0 -> -1 "infinite", 1000 -> 999, nobody cares...  */
 
-	// ask for the current state before waiting for state updates...
-	if (all_devices) {
-		i = 0;
-		last = 255;
-	}
-	else {
-		i = last = minor;
-	}
+	if (si.sib_reason != SIB_STATE_CHANGE &&
+	    si.sib_reason != SIB_GET_STATUS_REPLY)
+		return 0;
 
-	while (i <= last) {
-		tl->drbd_p_header->packet_type = P_get_state;
-		tl->drbd_p_header->drbd_minor = i;
-		tl->drbd_p_header->flags = 0;
-		send_cn(sk_nl,tl->nl_header,(char*)tl->tag_list_cpos-(char*)tl->nl_header);
-		i++;
+	state.i = si.current_state;
+	if (state.conn >= C_CONNECTED)
+		return -1;  /* done waiting */
+	if (state.conn < C_UNCONNECTED) {
+		struct drbd_genlmsghdr *dhdr = info->userhdr;
+		struct drbd_cfg_context cfg = { .ctx_volume = -1U };
+
+		if (!wait_after_split_brain)
+			return -1;  /* done waiting */
+		drbd_cfg_context_from_attrs(&cfg, info);
+
+		fprintf(stderr, "\ndrbd%u (%s[%u]) is %s, "
+			       "but I'm configured to wait anways (--wait-after-sb)\n",
+			       dhdr->minor,
+			       cfg.ctx_resource_name, cfg.ctx_volume,
+			       drbd_conn_str(state.conn));
 	}
 
-	dt_unlock_drbd(lock_fd);
-	lock_fd=-1;
-
-	do {
-		gettimeofday(&before,NULL);
-		rr = receive_cn(sk_nl, buffer, NL_BUFFER_SIZE, timeout_ms);
-		gettimeofday(&after,NULL);
-		if(rr == -2) break; // timeout expired.
-
-		if(timeout_ms > 0 ) {
-			timeout_ms -= ( (after.tv_sec - before.tv_sec) * 1000 +
-					(after.tv_usec - before.tv_usec) / 1000 );
-		}
-
-		cn_reply = (struct cn_msg *)NLMSG_DATA(buffer);
-		reply = (struct drbd_nl_cfg_reply *)cn_reply->data;
-
-		// dump_tag_list(reply->tag_list);
-
-		/* There are two value spaces for sequence numbers. The first
-		   is the one created by this drbdsetup instance, the kernel's
-		   reply packets simply echo those sequence numbers.
-		   The second is created by the kernel's broadcast packets. */
-		if (!unfiltered) {
-			if (cn_reply->ack == 0) { // broadcasts
-				if (cn_reply->seq <= b_seq) continue;
-				b_seq = cn_reply->seq;
-			} else if (minor == reply->minor && cn_reply->ack == (uint32_t)getpid() + 1) {
-				// replies to drbdsetup packets and for this device.
-				if (cn_reply->seq <= r_seq) continue;
-				r_seq = cn_reply->seq;
-			}
-		}
-
-		if( all_devices || minor == reply->minor ) {
-			cont=cm->ep.proc_event(cn_reply->seq, wasb, reply);
-		}
-	} while(cont);
-
-	free(buffer);
-
-	close_cn(sk_nl);
-
-	/* return code becomes exit code.
-	 * timeout? => exit 5
-	 * else     => exit 0 */
-	return (rr == -2) ? 5 : 0;
+	return 0;
 }
 
-static int numeric_opt_usage(struct drbd_option *option, char* str, int strlen)
+static int w_synced_state(struct drbd_cmd *cm, struct genl_info *info)
 {
-	return snprintf(str,strlen," [{--%s|-%c} %lld ... %lld]",
-			option->name, option->short_name,
-			option->numeric_param.min,
-			option->numeric_param.max);
-}
+	struct state_info si = { .current_state = 0 };
+	union drbd_state state;
 
-static int handler_opt_usage(struct drbd_option *option, char* str, int strlen)
-{
-	const char** handlers;
-	int i, chars=0,first=1;
+	if (!info)
+		return 0;
 
-	chars += snprintf(str,strlen," [{--%s|-%c} {",
-			  option->name, option->short_name);
-	handlers = option->handler_param.handler_names;
-	for(i=0;i<option->handler_param.number_of_handlers;i++) {
-		if(handlers[i]) {
-			if(!first) chars += snprintf(str+chars,strlen,"|");
-			first=0;
-			chars += snprintf(str+chars,strlen,
-					  "%s",handlers[i]);
-		}
+	if (!global_attrs[DRBD_NLA_STATE_INFO])
+		return 0;
+
+	if (state_info_from_attrs(&si, info)) {
+		fprintf(stderr,"nla_policy violation!?\n");
+		return 0;
 	}
-	chars += snprintf(str+chars,strlen,"}]");
-	return chars;
-}
 
-static int bit_opt_usage(struct drbd_option *option, char* str, int strlen)
-{
-	return snprintf(str,strlen," [{--%s|-%c}]",
-			option->name, option->short_name);
-}
+	if (si.sib_reason != SIB_STATE_CHANGE &&
+	    si.sib_reason != SIB_GET_STATUS_REPLY)
+		return 0;
 
-static int string_opt_usage(struct drbd_option *option, char* str, int strlen)
-{
-	return snprintf(str,strlen," [{--%s|-%c} <str>]",
-			option->name, option->short_name);
-}
+	state.i = si.current_state;
 
-static void numeric_opt_xml(struct drbd_option *option)
-{
-	printf("\t<option name=\"%s\" type=\"numeric\">\n",option->name);
-	printf("\t\t<min>%lld</min>\n",option->numeric_param.min);
-	printf("\t\t<max>%lld</max>\n",option->numeric_param.max);
-	printf("\t\t<default>%lld</default>\n",option->numeric_param.def);
-	if(option->numeric_param.unit_prefix==1) {
-		printf("\t\t<unit_prefix>1</unit_prefix>\n");
-	} else {
-		printf("\t\t<unit_prefix>%c</unit_prefix>\n",
-		       option->numeric_param.unit_prefix);
-	}
-	if(option->numeric_param.unit) {
-		printf("\t\t<unit>%s</unit>\n",option->numeric_param.unit);
-	}
-	printf("\t</option>\n");
-}
+	if (state.conn == C_CONNECTED)
+		return -1;  /* done waiting */
 
-static void handler_opt_xml(struct drbd_option *option)
-{
-	const char** handlers;
-	int i;
+	if (!wait_after_split_brain && state.conn < C_UNCONNECTED)
+		return -1;  /* done waiting */
 
-	printf("\t<option name=\"%s\" type=\"handler\">\n",option->name);
-	handlers = option->handler_param.handler_names;
-	for(i=0;i<option->handler_param.number_of_handlers;i++) {
-		if(handlers[i]) {
-			printf("\t\t<handler>%s</handler>\n",handlers[i]);
-		}
-	}
-	printf("\t</option>\n");
-}
-
-static void bit_opt_xml(struct drbd_option *option)
-{
-	printf("\t<option name=\"%s\" type=\"boolean\">\n",option->name);
-	printf("\t</option>\n");
+	return 0;
 }
 
-static void string_opt_xml(struct drbd_option *option)
+/*
+ * Check if an integer is a power of two.
+ */
+static bool power_of_two(int i)
 {
-	printf("\t<option name=\"%s\" type=\"string\">\n",option->name);
-	printf("\t</option>\n");
+	return i && !(i & (i - 1));
 }
 
-
-static void config_usage(struct drbd_cmd *cm, enum usage_type ut)
+static void print_command_usage(struct drbd_cmd *cm, enum usage_type ut)
 {
 	struct drbd_argument *args;
-	struct drbd_option *options;
-	static char line[300];
-	int maxcol,col,prevcol,startcol,toolong;
-	char *colstr;
 
 	if(ut == XML) {
-		printf("<command name=\"%s\">\n",cm->cmd);
-		if( (args = cm->cp.args) ) {
-			while (args->name) {
+		enum cfg_ctx_key ctx = cm->ctx_key;
+
+		printf("<command name=\"%s\">\n", cm->cmd);
+		if (ctx & CTX_RESOURCE_AND_CONNECTION)
+			ctx = CTX_RESOURCE | CTX_CONNECTION;
+		if (ctx & (CTX_RESOURCE | CTX_MINOR | CTX_ALL)) {
+			bool more_than_one_choice =
+				!power_of_two(ctx & (CTX_RESOURCE | CTX_MINOR | CTX_ALL));
+			const char *indent = "\t\t" + !more_than_one_choice;
+			if (more_than_one_choice)
+				printf("\t<group>\n");
+			if (ctx & CTX_RESOURCE)
+				printf("%s<argument>resource</argument>\n", indent);
+			if (ctx & CTX_MINOR)
+				printf("%s<argument>minor</argument>\n", indent);
+			if (ctx & CTX_ALL)
+				printf("%s<argument>all</argument>\n", indent);
+			if (more_than_one_choice)
+				printf("\t</group>\n");
+		}
+		if (ctx & CTX_CONNECTION) {
+			printf("\t<argument>local_addr</argument>\n");
+			printf("\t<argument>remote_addr</argument>\n");
+		}
+
+		if(cm->drbd_args) {
+			for (args = cm->drbd_args; args->name; args++) {
 				printf("\t<argument>%s</argument>\n",
 				       args->name);
-				args++;
 			}
 		}
 
-		options = cm->cp.options;
-		while (options && options->name) {
-			options->xml_function(options);
-			options++;
-		}
-		printf("</command>\n");
-		return;
-	}
+		if (cm->options) {
+			struct option *option;
 
-	prevcol=col=0;
-	maxcol=100;
+			for (option = cm->options; option->name; option++) {
+				/*
+				 * The "string" options here really are
+				 * timeouts, but we can't describe them
+				 * in a resonable way here.
+				 */
+				printf("\t<option name=\"%s\" type=\"%s\">\n"
+				       "\t</option>\n",
+				       option->name,
+				       option->has_arg == no_argument ?
+					 "flag" : "string");
+			}
+		}
 
-	if((colstr=getenv("COLUMNS"))) maxcol=atoi(colstr)-1;
+		if (cm->set_defaults)
+			printf("\t<option name=\"set-defaults\" type=\"flag\">\n"
+			       "\t</option>\n");
 
-	col += snprintf(line+col, maxcol-col, " %s", cm->cmd);
+		if (cm->ctx) {
+			struct field_def *field;
 
-	if( (args = cm->cp.args) ) {
-		if(ut == BRIEF) {
-			col += snprintf(line+col, maxcol-col, " [args...]");
-		} else {
-			while (args->name) {
-				col += snprintf(line+col, maxcol-col, " %s",
-						args->name);
-				args++;
-			}
+			for (field = cm->ctx->fields; field->name; field++)
+				field->describe_xml(field);
 		}
-	}
-
-	if (col > maxcol) {
-		printf("%s\n",line);
-		col=0;
-	}
-	startcol=prevcol=col;
-
-	options = cm->cp.options;
-	if(ut == BRIEF) {
-		if(options)
-			col += snprintf(line+col, maxcol-col, " [opts...]");
-		printf("%-40s",line);
+		printf("</command>\n");
 		return;
 	}
 
-	while (options && options->name) {
-		col += options->usage_function(options, line+col, maxcol-col);
-		if (col >= maxcol) {
-			toolong = (prevcol == startcol);
-			if( !toolong ) line[prevcol]=0;
-			printf("%s\n",line);
-			startcol=prevcol=col = sprintf(line,"    ");
-			if( toolong) options++;
-		} else {
-			prevcol=col;
-			options++;
-		}
-	}
-	line[col]=0;
+	if (ut == BRIEF)
+		wrap_printf(4, "%-18s  ", cm->cmd);
+	else {
+		wrap_printf(0, "USAGE:\n");
 
-	printf("%s\n",line);
-}
+		wrap_printf(1, "%s %s", progname, cm->cmd);
+		if (cm->ctx_key && ut != BRIEF) {
+			enum cfg_ctx_key ctx = cm->ctx_key;
+
+			if (ctx & CTX_RESOURCE_AND_CONNECTION)
+				ctx = CTX_RESOURCE | CTX_CONNECTION;
+			if (ctx & (CTX_RESOURCE | CTX_MINOR | CTX_ALL)) {
+				bool first = true;
+
+				wrap_printf(4, " {");
+				if (ctx & CTX_RESOURCE) {
+					wrap_printf(4, "|resource" + first);
+					first = false;
+				}
+				if (ctx & CTX_MINOR) {
+					wrap_printf(4, "|minor" + first);
+					first = false;
+				}
+				if (ctx & CTX_ALL) {
+					wrap_printf(4, "|all" + first);
+					first = false;
+				}
+				wrap_printf(4, "}");
+			}
+			if (ctx & CTX_CONNECTION) {
+				wrap_printf(4, " [{af}:]{local_addr}[:{port}]");
+				wrap_printf(4, " [{af}:]{remote_addr}[:{port}]");
+			}
+		}
 
-static void get_usage(struct drbd_cmd *cm, enum usage_type ut)
-{
-	if(ut == BRIEF) {
-		printf(" %-39s", cm->cmd);
-	} else {
-		printf(" %s\n", cm->cmd);
-	}
-}
+		if (cm->drbd_args) {
+			for (args = cm->drbd_args; args->name; args++)
+				wrap_printf(4, " {%s}", args->name);
+		}
 
-static void events_usage(struct drbd_cmd *cm, enum usage_type ut)
-{
-	struct option *lo;
-	char line[41];
+		if (cm->options) {
+			struct option *option;
 
-	if(ut == BRIEF) {
-		sprintf(line,"%s [opts...]", cm->cmd);
-		printf(" %-39s",line);
-	} else {
-		printf(" %s", cm->cmd);
-		lo = cm->ep.options;
-		while(lo && lo->name) {
-			printf(" [{--%s|-%c}]",lo->name,lo->val);
-			lo++;
+			for (option = cm->options; option->name; option++)
+				wrap_printf(4, " [--%s%s]",
+					    option->name,
+					    option->has_arg == no_argument ?
+					        "" : "=...");
 		}
-		printf("\n");
-	}
-}
 
-static void print_command_usage(int i, const char *addinfo, enum usage_type ut)
-{
-	if(ut != XML) printf("USAGE:\n");
-	commands[i].usage(commands+i,ut);
+		if (cm->set_defaults)
+			wrap_printf(4, " [--set-defaults]");
 
-	if (addinfo) {
-		printf("%s\n",addinfo);
-		exit(20);
+		if (cm->ctx) {
+			struct field_def *field;
+
+			for (field = cm->ctx->fields; field->name; field++) {
+				char buffer[300];
+				int n;
+				n = field->usage(field, buffer, sizeof(buffer));
+				assert(n < sizeof(buffer));
+				wrap_printf(4, " %s", buffer);
+			}
+		}
+		wrap_printf(4, "\n");
 	}
 }
 
-static void print_usage(const char* addinfo)
+static void print_usage_and_exit(const char* addinfo)
 {
 	size_t i;
 
-	printf("\nUSAGE: %s device command arguments options\n\n"
+	printf("\nUSAGE: %s command device arguments options\n\n"
 	       "Device is usually /dev/drbdX or /dev/drbd/X.\n"
-	       "General options: --create-device, --set-defaults\n"
 	       "\nCommands are:\n",cmdname);
 
 
-	for (i = 0; i < ARRY_SIZE(commands); i++) {
-		commands[i].usage(commands+i,BRIEF);
-		if(i%2==1) printf("\n");
-	}
+	for (i = 0; i < ARRAY_SIZE(commands); i++)
+		print_command_usage(&commands[i], BRIEF);
 
 	printf("\n\n"
 	       "To get more details about a command issue "
@@ -2291,163 +2376,11 @@
 	exit(20);
 }
 
-static int open_cn()
-{
-	int sk_nl;
-	int err;
-	struct sockaddr_nl my_nla;
-
-	sk_nl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
-	if (sk_nl == -1) {
-		perror("socket() failed");
-		return -1;
-	}
-
-	my_nla.nl_family = AF_NETLINK;
-	my_nla.nl_groups = -1; //cn_idx
-	my_nla.nl_pid = getpid();
-
-	err = bind(sk_nl, (struct sockaddr *)&my_nla, sizeof(my_nla));
-	if (err == -1) {
-		err = errno;
-		perror("bind() failed");
-		switch(err) {
-		case ENOENT:
-			fprintf(stderr,"Connector module not loaded? Try 'modprobe cn'.\n");
-			break;
-		case EPERM:
-			fprintf(stderr,"Missing privileges? You should run this as root.\n");
-			break;
-		}
-		return -1;
-	}
-
-	return sk_nl;
-}
-
-
-static void prepare_nl_header(struct nlmsghdr* nl_hdr, int size)
-{
-	static uint32_t cn_seq = 1;
-	struct cn_msg *cn_hdr;
-	cn_hdr = (struct cn_msg *)NLMSG_DATA(nl_hdr);
-
-	/* fill the netlink header */
-	nl_hdr->nlmsg_len = NLMSG_LENGTH(size - sizeof(struct nlmsghdr));
-	nl_hdr->nlmsg_type = NLMSG_DONE;
-	nl_hdr->nlmsg_flags = 0;
-	nl_hdr->nlmsg_seq = cn_seq;
-	nl_hdr->nlmsg_pid = getpid();
-	/* fill the connector header */
-	cn_hdr->id.val = CN_VAL_DRBD;
-	cn_hdr->id.idx = cn_idx;
-	cn_hdr->seq = cn_seq++;
-	cn_hdr->ack = getpid();
-	cn_hdr->len = size - sizeof(struct nlmsghdr) - sizeof(struct cn_msg);
-}
-
-
-static int send_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size)
-{
-	int rr;
-
-	prepare_nl_header(nl_hdr,size);
-
-	rr = send(sk_nl,nl_hdr,nl_hdr->nlmsg_len,0);
-	if( rr != (ssize_t)nl_hdr->nlmsg_len) {
-		perror("send() failed");
-		return -1;
-	}
-	return rr;
-}
-
-static int receive_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size, int timeout_ms)
-{
-	struct pollfd pfd;
-	int rr;
-
-	pfd.fd = sk_nl;
-	pfd.events = POLLIN;
-
-	rr = poll(&pfd,1,timeout_ms);
-	if(rr == 0) return -2; // timeout expired.
-
-	rr = recv(sk_nl,nl_hdr,size,0);
-
-	if( rr < 0 ) {
-		perror("recv() failed");
-		return -1;
-	}
-	return rr;
-}
-
-int receive_reply_cn(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
-		     int size, int timeout_ms)
-{
-	struct cn_msg *request_cn_hdr;
-	struct cn_msg *reply_cn_hdr;
-	int rr;
-
-	request_cn_hdr = (struct cn_msg *)NLMSG_DATA(tl->nl_header);
-	reply_cn_hdr = (struct cn_msg *)NLMSG_DATA(nl_hdr);
-
-	while(1) {
-		rr = receive_cn(sk_nl,nl_hdr,size,timeout_ms);
-		if( rr < 0 ) return rr;
-		if(reply_cn_hdr->seq == request_cn_hdr->seq &&
-		   reply_cn_hdr->ack == request_cn_hdr->ack+1 ) return rr;
-		/* printf("INFO: got other message \n"
-		   "got seq: %d ; ack %d \n"
-		   "exp seq: %d ; ack %d \n",
-		   reply_cn_hdr->seq,reply_cn_hdr->ack,
-		   request_cn_hdr->seq,request_cn_hdr->ack); */
-	}
-
-	return rr;
-}
-
-static int call_drbd(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
-		     int size, int timeout_ms)
-{
-	int rr;
-	prepare_nl_header(tl->nl_header, (char*)tl->tag_list_cpos -
-			  (char*)tl->nl_header);
-
-	rr = send(sk_nl,tl->nl_header,tl->nl_header->nlmsg_len,0);
-	if( rr != (ssize_t)tl->nl_header->nlmsg_len) {
-		perror("send() failed");
-		return -1;
-	}
-
-	rr = receive_reply_cn(sk_nl,tl,nl_hdr,size,timeout_ms);
-
-	if( rr == -2) {
-		fprintf(stderr,"No response from the DRBD driver!"
-			" Is the module loaded?\n");
-	}
-	return rr;
-}
-
-static void close_cn(int sk_nl)
-{
-	close(sk_nl);
-}
-
 static int is_drbd_driver_missing(void)
 {
 	struct stat sb;
-	FILE *cn_idx_file;
 	int err;
 
-	cn_idx = CN_IDX_DRBD;
-	cn_idx_file = fopen("/sys/module/drbd/parameters/cn_idx", "r");
-	if (cn_idx_file) {
-		unsigned int idx; /* gcc is picky */
-		if (fscanf(cn_idx_file, "%u", &idx))
-			cn_idx = idx;
-		fclose(cn_idx_file);
-	}
-
 	err = stat("/proc/drbd", &sb);
 	if (!err)
 		return 0;
@@ -2456,16 +2389,49 @@
 		fprintf(stderr, "DRBD driver appears to be missing\n");
 	else
 		fprintf(stderr, "Could not stat(\"/proc/drbd\"): %m\n");
-
 	return 1;
 }
 
-int main(int argc, char** argv)
+void exec_legacy_drbdsetup(char **argv)
+{
+#ifdef DRBD_LEGACY_83
+	static const char * const legacy_drbdsetup = "drbdsetup-83";
+	char *progname, *drbdsetup;
+
+	/* in case drbdsetup is called with an absolute or relative pathname
+	 * look for the legacy drbdsetup binary in the same location,
+	 * otherwise, just let execvp sort it out... */
+	if ((progname = strrchr(argv[0], '/')) == 0) {
+		drbdsetup = strdup(legacy_drbdsetup);
+	} else {
+		size_t len_dir, l;
+
+		++progname;
+		len_dir = progname - argv[0];
+
+		l = len_dir + strlen(legacy_drbdsetup) + 1;
+		drbdsetup = malloc(l);
+		if (!drbdsetup) {
+			fprintf(stderr, "Malloc() failed\n");
+			exit(20);
+		}
+		strncpy(drbdsetup, argv[0], len_dir);
+		strcpy(drbdsetup + len_dir, legacy_drbdsetup);
+	}
+	execvp(drbdsetup, argv);
+#else
+	fprintf(stderr, "This drbdsetup was not built with support for legacy drbd-8.3\n"
+		"Eventually rebuild with ./configure --with-legacy-connector\n");
+#endif
+}
+
+int main(int argc, char **argv)
 {
-	unsigned minor;
 	struct drbd_cmd *cmd;
 	int rv=0;
 
+	progname = basename(argv[0]);
+
 	if (chdir("/")) {
 		/* highly unlikely, but gcc is picky */
 		perror("cannot chdir /");
@@ -2478,39 +2444,52 @@
 	else
 		cmdname = argv[0];
 
-	/* == '-' catches -h, --help, and similar */
-	if (argc > 1 && (!strcmp(argv[1],"help") || argv[1][0] == '-')) {
-		if(argc >= 3) {
-			cmd=find_cmd_by_name(argv[2]);
-			if(cmd) print_command_usage(cmd-commands,NULL,FULL);
-			else print_usage("unknown command");
-			exit(0);
-		}
+	if (argc > 2 && (!strcmp(argv[2], "--help")  || !strcmp(argv[2], "-h"))) {
+		char *swap = argv[1];
+		argv[1] = argv[2];
+		argv[2] = swap;
+	}
+
+	if (argc > 1 && (!strcmp(argv[1], "help") || !strcmp(argv[1], "xml-help")  ||
+			 !strcmp(argv[1], "--help")  || !strcmp(argv[1], "-h"))) {
+		enum usage_type usage_type = !strcmp(argv[1], "xml-help") ? XML : FULL;
+		if(argc > 2) {
+			cmd = find_cmd_by_name(argv[2]);
+			if(cmd) {
+				print_command_usage(cmd, usage_type);
+				exit(0);
+			} else
+				print_usage_and_exit("unknown command");
+		} else
+			print_usage_and_exit(0);
+	}
+
+	/*
+	 * drbdsetup previously took the object to operate on as its first argument,
+	 * followed by the command.  For backwards compatibility, still support his.
+	 */
+	if (argc >= 3 && !find_cmd_by_name(argv[1]) && find_cmd_by_name(argv[2])) {
+		char *swap = argv[1];
+		argv[1] = argv[2];
+		argv[2] = swap;
 	}
 
 	/* it is enough to set it, value is ignored */
 	if (getenv("DRBD_DEBUG_DUMP_ARGV"))
 		debug_dump_argv = 1;
-	resname = getenv("DRBD_RESOURCE");
 
-	if (argc > 1 && (!strcmp(argv[1],"xml"))) {
-		if(argc >= 3) {
-			cmd=find_cmd_by_name(argv[2]);
-			if(cmd) print_command_usage(cmd-commands,NULL,XML);
-			else print_usage("unknown command");
-			exit(0);
-		}
-	}
-
-	if (argc < 3) print_usage(argc==1 ? 0 : " Insufficient arguments");
+	if (argc < 2)
+		print_usage_and_exit(0);
 
-	cmd=find_cmd_by_name(argv[2]);
+	cmd = find_cmd_by_name(argv[1]);
+	if (!cmd)
+		print_usage_and_exit("invalid command");
 
 	if (is_drbd_driver_missing()) {
-		if (!strcmp(argv[2], "down") ||
-		    !strcmp(argv[2], "secondary") ||
-		    !strcmp(argv[2], "disconnect") ||
-		    !strcmp(argv[2], "detach"))
+		if (!strcmp(argv[1], "down") ||
+		    !strcmp(argv[1], "secondary") ||
+		    !strcmp(argv[1], "disconnect") ||
+		    !strcmp(argv[1], "detach"))
 			return 0; /* "down" succeeds even if drbd is missing */
 
 		fprintf(stderr, "do you need to load the module?\n"
@@ -2518,18 +2497,77 @@
 		return 20;
 	}
 
-	if(cmd) {
-		lock_fd = dt_lock_drbd(argv[1]);
-		minor=dt_minor_of_dev(argv[1]);
-		/* maybe rather canonicalize, using asprintf? */
-		devname = argv[1];
-		// by passing argc-2, argv+2 the function has the command name
-		// in argv[0], e.g. "syncer"
-		rv = cmd->function(cmd,minor,argc-2,argv+2);
-		dt_unlock_drbd(lock_fd);
-	} else {
-		print_usage("invalid command");
+	if (try_genl) {
+		if (cmd->continuous_poll)
+			drbd_genl_family.nl_groups = -1;
+		drbd_sock = genl_connect_to_family(&drbd_genl_family);
+		if (!drbd_sock) {
+			try_genl = 0;
+			exec_legacy_drbdsetup(argv);
+			/* Only reached in case exec() failed... */
+			fprintf(stderr, "Could not connect to 'drbd' generic netlink family\n");
+			return 20;
+		}
+		if (drbd_genl_family.version != API_VERSION ||
+		    drbd_genl_family.hdrsize != sizeof(struct drbd_genlmsghdr)) {
+			fprintf(stderr, "API mismatch!\n\t"
+				"API version drbdsetup: %u kernel: %u\n\t"
+				"header size drbdsetup: %u kernel: %u\n",
+				API_VERSION, drbd_genl_family.version,
+				(unsigned)sizeof(struct drbd_genlmsghdr),
+				drbd_genl_family.hdrsize);
+			return 20;
+		}
 	}
 
+	context = 0;
+	if (cmd->ctx_key & (CTX_MINOR | CTX_RESOURCE | CTX_ALL | CTX_RESOURCE_AND_CONNECTION)) {
+		if (argc < 3) {
+			fprintf(stderr, "Missing first argument\n");
+			print_command_usage(cmd, FULL);
+			exit(20);
+		}
+		objname = argv[2];
+		if (!strcmp(objname, "all")) {
+			if (!(cmd->ctx_key & CTX_ALL))
+				print_usage_and_exit("command does not accept argument 'all'");
+			context = CTX_ALL;
+		} else if (cmd->ctx_key & CTX_MINOR) {
+			minor = dt_minor_of_dev(objname);
+			if (minor == -1U && !(cmd->ctx_key &
+					(CTX_RESOURCE | CTX_RESOURCE_AND_CONNECTION))) {
+				fprintf(stderr, "Cannot determine minor device number of "
+						"device '%s'\n",
+					objname);
+				exit(20);
+			}
+			if (cmd->cmd_id != DRBD_ADM_GET_STATUS)
+				lock_fd = dt_lock_drbd(minor);
+			context = CTX_MINOR;
+		} else
+			context = CTX_RESOURCE;
+	}
+	if (cmd->ctx_key & (CTX_CONNECTION | CTX_RESOURCE_AND_CONNECTION)) {
+		if (argc < 4 + !!context) {
+			fprintf(stderr, "Missing connection endpoint argument\n");
+			print_command_usage(cmd, FULL);
+			exit(20);
+		}
+		context |= CTX_CONNECTION;
+	}
+	if (objname == NULL && (cmd->ctx_key & CTX_CONNECTION)) {
+		objname = getenv("DRBD_RESOURCE");
+		if (objname == NULL)
+			m_asprintf(&objname, "connection %s %s", argv[2], argv[3]);
+	}
+	if (objname == NULL && cmd->ctx == &new_minor_cmd_ctx)
+		objname = argv[2];
+	if (objname == NULL)
+		objname = "??";
+
+	/* Make it so that argv[0] is the command name. */
+	rv = cmd->function(cmd, argc - 1, argv + 1);
+	dt_unlock_drbd(lock_fd);
 	return rv;
 }
+#endif
diff -Nru drbd8-8.3.7/user/drbdtool_common.c drbd8-8.4.1+git55a81dc~cmd1/user/drbdtool_common.c
--- drbd8-8.3.7/user/drbdtool_common.c	2010-01-07 09:09:34.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdtool_common.c	2012-02-02 14:09:14.000000000 +0000
@@ -1,4 +1,6 @@
 #define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -19,61 +21,68 @@
 #include "drbdtool_common.h"
 #include "config.h"
 
-int force = 0;
-int confirmed(const char *text)
-{
-	const char yes[] = "yes";
-	const ssize_t N = sizeof(yes);
-	char *answer = NULL;
-	size_t n = 0;
-	int ok;
-
-	printf("\n%s\n", text);
-
-	if (force) {
-	    printf("*** confirmation forced via --force option ***\n");
-	    ok = 1;
-	}
-	else {
-	    printf("[need to type '%s' to confirm] ", yes);
-	    ok = getline(&answer,&n,stdin) == N &&
-		strncmp(answer,yes,N-1) == 0;
-	    if (answer) free(answer);
-	    printf("\n");
+/* In-place unescape double quotes and backslash escape sequences from a
+ * double quoted string. Note: backslash is only useful to quote itself, or
+ * double quote, no special treatment to any c-style escape sequences. */
+void unescape(char *txt)
+{
+	char *ue, *e;
+	e = ue = txt;
+	for (;;) {
+		if (*ue == '"') {
+			ue++;
+			continue;
+		}
+		if (*ue == '\\')
+			ue++;
+		if (!*ue)
+			break;
+		*e++ = *ue++;
 	}
-	return ok;
+	*e = '\0';
 }
 
 
-char* ppsize(char* buf, size_t size)
+/* input size is expected to be in KB */
+char *ppsize(char *buf, unsigned long long size)
 {
-	// Needs 9 bytes at max.
-	static char units[] = { 'K','M','G','T' };
+	/* Needs 9 bytes at max including trailing NUL:
+	 * -1ULL ==> "16384 EB" */
+	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
 	int base = 0;
-	while (size >= 10000 ) {
-		size = size >> 10;
+	while (size >= 10000 && base < sizeof(units)-1) {
+		/* shift + round */
+		size = (size >> 10) + !!(size & (1<<9));
 		base++;
 	}
-	sprintf(buf,"%lu %cB",(unsigned long)size,units[base]);
+	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
 
 	return buf;
 }
 
-const char *make_optstring(struct option *options, char startc)
+const char *make_optstring(struct option *options)
 {
 	static char buffer[200];
+	char seen[256];
 	struct option *opt;
 	char *c;
 
+	memset(seen, 0, sizeof(seen));
 	opt = options;
 	c = buffer;
-	if (startc)
-		*c++ = startc;
 	while (opt->name) {
-		if (0 < opt->val || opt->val < 256) {
+		if (0 < opt->val && opt->val < 256) {
+			if (seen[opt->val]++) {
+				fprintf(stderr, "internal error: --%s has duplicate opt->val '%c'\n",
+						opt->name, opt->val);
+				abort();
+			}
 			*c++ = opt->val;
-			if (opt->has_arg)
+			if (opt->has_arg != no_argument) {
 				*c++ = ':';
+				if (opt->has_arg == optional_argument)
+					*c++ = ':';
+			}
 		}
 		opt++;
 	}
@@ -302,33 +311,11 @@
 	return c != s && *c == 0;
 }
 
-int dt_lock_drbd(const char* device)
+int dt_lock_drbd(int minor)
 {
-	int lfd;
-	struct stat drbd_stat;
-	char lfname[40];
-	int dev_major,dev_minor;
-
-	dev_major = LANANA_DRBD_MAJOR;
-
-	/* if called from drbdadm, "device" is usually just the minor number.
-	 * if someone happens to mkdir /0, drbdsetup 0 anything would simply
-	 * say "0 is not a block device!" */
-
-	if (!only_digits(device) && !stat(device, &drbd_stat)) {
-		if (!S_ISBLK(drbd_stat.st_mode)) {
-			fprintf(stderr, "%s is not a block device!\n", device);
-			exit(20);
-		}
-
-		dev_major = major(drbd_stat.st_rdev);
+	int sz, lfd;
+	char *lfname;
 
-		if (dev_major != LANANA_DRBD_MAJOR) {
-			fprintf(stderr, "%s does not appear to be a DRBD (major %u, expected %u)!\n",
-					device, dev_major, LANANA_DRBD_MAJOR);
-			exit(20);
-		}
-	}
 	/* THINK.
 	 * maybe we should also place a fcntl lock on the
 	 * _physical_device_ we open later...
@@ -347,16 +334,15 @@
 	 * and make sure that /var/lock/drbd is drwx.-..-. root:root  ...
 	 */
 
-	dev_minor = dt_minor_of_dev(device);
-	if (dev_minor < 0) {
-		fprintf(stderr,
-			"Could not determine device minor number of '%s'.\n"
-			"Try /dev/drbd<minor-number> or just <minor-number> instead.\n", device);
+	sz = asprintf(&lfname, DRBD_LOCK_DIR "/drbd-%d-%d",
+		      LANANA_DRBD_MAJOR, minor);
+	if (sz < 0) {
+		perror("");
 		exit(20);
 	}
-	snprintf(lfname, 39, DRBD_LOCK_DIR "/drbd-%d-%d", dev_major, dev_minor);
 
-	lfd = get_fd_lockfile_timeout(lfname,1);
+	lfd = get_fd_lockfile_timeout(lfname, 1);
+	free (lfname);
 	if (lfd < 0)
 		exit(20);
 	return lfd;
@@ -365,7 +351,8 @@
 /* ignore errors */
 void dt_unlock_drbd(int lock_fd)
 {
-	if (lock_fd >= 0) unlock_fd(lock_fd);
+	if (lock_fd >= 0)
+		unlock_fd(lock_fd);
 }
 
 void dt_print_gc(const uint32_t* gen_cnt)
@@ -453,6 +440,7 @@
 			: "Inconsistent",
 	       (flags & MDF_FULL_SYNC) ? ", need full sync" : "",
 	       (flags & MDF_PEER_OUT_DATED) ? ", peer Outdated" : "");
+	printf("meta-data: %s\n", (flags & MDF_AL_CLEAN) ? "clean" : "need apply-al");
 }
 
 /*    s: token buffer
@@ -532,6 +520,118 @@
 	return size64;
 }
 
+char *lk_bdev_path(unsigned minor)
+{
+	char *path;
+	m_asprintf(&path, "%s/drbd-minor-%d.lkbd", DRBD_LIB_DIR, minor);
+	return path;
+}
+
+/* If the lower level device is resized,
+ * and DRBD did not move its "internal" meta data in time,
+ * the next time we try to attach, we won't find our meta data.
+ *
+ * Some helpers for storing and retrieving "last known"
+ * information, to be able to find it regardless,
+ * without scanning the full device for magic numbers.
+ */
+
+/* these return 0 on sucess, error code if something goes wrong. */
+
+/* NOTE: file format for now:
+ * one line, starting with size in byte, followed by tab,
+ * followed by device name, followed by newline. */
+
+int lk_bdev_save(const unsigned minor, const struct bdev_info *bd)
+{
+	FILE *fp;
+	char *path = lk_bdev_path(minor);
+	int ok = 0;
+
+	fp = fopen(path, "w");
+	if (!fp)
+		goto fail;
+
+	ok = fprintf(fp, "%llu\t%s\n",
+		(unsigned long long) bd->bd_size, bd->bd_name);
+	if (ok <= 0)
+		goto fail;
+	if (bd->bd_uuid)
+		fprintf(fp, "uuid:\t"X64(016)"\n", bd->bd_uuid);
+	ok =       0 == fflush(fp);
+	ok = ok && 0 == fsync(fileno(fp));
+	ok = ok && 0 == fclose(fp);
+
+	if (!ok)
+fail:		/* MAYBE: unlink. But maybe partial info is better than no info? */
+		fprintf(stderr, "lk_bdev_save(%s) failed: %m\n", path);
+
+	free(path);
+	return ok <= 0 ? -1 : 0;
+}
+
+/* we may want to remove all stored information */
+int lk_bdev_delete(const unsigned minor)
+{
+	char *path = lk_bdev_path(minor);
+	int rc = unlink(path);
+	if (rc && errno != ENOENT)
+		fprintf(stderr, "lk_bdev_delete(%s) failed: %m\n", path);
+	free(path);
+	return rc;
+}
+
+/* load info from that file.
+ * caller should free(bd->bd_name) once it is no longer needed. */
+int lk_bdev_load(const unsigned minor, struct bdev_info *bd)
+{
+	FILE *fp;
+	char *path;
+	char *bd_name;
+	unsigned long long bd_size;
+	unsigned long long bd_uuid;
+	char nl[2];
+	int rc = -1;
+
+	if (!bd)
+		return -1;
+
+	path = lk_bdev_path(minor);
+	fp = fopen(path, "r");
+	if (!fp) {
+		if (errno != ENOENT)
+			fprintf(stderr, "lk_bdev_load(%s) failed: %m\n", path);
+		goto out;
+	}
+
+	/* GNU format extension: %as:
+	 * malloc buffer space for the resulting char */
+	rc = fscanf(fp, "%llu %as%[\n]uuid: %llx%[\n]",
+			&bd_size, &bd_name, nl,
+			&bd_uuid, nl);
+	/* rc == 5: successfully converted two lines.
+	 *    == 4: newline not found, possibly truncated uuid
+	 *    == 3: first line complete, uuid missing.
+	 *    == 2: new line not found, possibly truncated pathname,
+	 *          or early whitespace
+	 *    == 1: found some number, but no more.
+	 *          incomplete file? try anyways.
+	 */
+	bd->bd_uuid = (rc >= 4) ? bd_uuid : 0;
+	bd->bd_name = (rc >= 2) ? bd_name : NULL;
+	bd->bd_size = (rc >= 1) ? bd_size : 0;
+	if (rc < 1) {
+		fprintf(stderr, "lk_bdev_load(%s): parse error\n", path);
+		rc = -1;
+	} else
+		rc = 0;
+
+	fclose(fp);
+out:
+	free(path);
+	return rc;
+}
+
 void get_random_bytes(void* buffer, int len)
 {
 	int fd;
@@ -597,3 +697,62 @@
 	return r;
 }
 
+/* print len bytes from buf in the format of well known "hd",
+ * adjust displayed offset by file_offset */
+void fprintf_hex(FILE *fp, off_t file_offset, const void *buf, unsigned len)
+{
+	const unsigned char *c = buf;
+	unsigned o;
+	int skipped = 0;
+
+	for (o = 0; o + 16 < len; o += 16, c += 16) {
+		if (o && !memcmp(c - 16, c, 16)) {
+			skipped = 1;
+			continue;
+		}
+		if (skipped) {
+			skipped = 0;
+			fprintf(fp, "*\n");
+		}
+		/* no error check here, don't know what to do about errors */
+		fprintf(fp,
+			/* offset */
+			"%08llx"
+			/* two times 8 byte as byte stream, on disk order */
+			"  %02x %02x %02x %02x %02x %02x %02x %02x"
+			"  %02x %02x %02x %02x %02x %02x %02x %02x"
+			/* the same as printable char or '.' */
+			"  |%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c|\n",
+			(unsigned long long)o + file_offset,
+			c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7],
+			c[8], c[9], c[10], c[11], c[12], c[13], c[14], c[15],
+
+#define p_(x)	(isprint(x) ? x : '.')
+#define p(a,b,c,d,e,f,g,h) \
+		p_(a), p_(b), p_(c), p_(d), p_(e), p_(f), p_(g), p_(h)
+			p(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]),
+			p(c[8], c[9], c[10], c[11], c[12], c[13], c[14], c[15])
+		       );
+	}
+	if (skipped) {
+		skipped = 0;
+		fprintf(fp, "*\n");
+	}
+	if (o < len) {
+		unsigned remaining = len - o;
+		unsigned i;
+		fprintf(fp, "%08llx ", (unsigned long long)o + file_offset);
+		for (i = 0; i < remaining; i++) {
+			if (i == 8)
+				fprintf(fp, " ");
+			fprintf(fp, " %02x", c[i]);
+		}
+		fprintf(fp, "%*s  |", (16 - i)*3 + (i < 8), "");
+		for (i = 0; i < remaining; i++)
+			fprintf(fp, "%c", p_(c[i]));
+#undef p
+#undef p_
+		fprintf(fp, "|\n");
+	}
+	fprintf(fp, "%08llx\n", (unsigned long long)len + file_offset);
+}
diff -Nru drbd8-8.3.7/user/drbdtool_common.h drbd8-8.4.1+git55a81dc~cmd1/user/drbdtool_common.h
--- drbd8-8.3.7/user/drbdtool_common.h	2009-09-14 14:04:52.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/drbdtool_common.h	2012-02-02 14:09:14.000000000 +0000
@@ -2,6 +2,8 @@
 #define DRBDTOOL_COMMON_H
 
 #include "drbd_endian.h"
+#include <stdio.h>
+#include <unistd.h>
 #include <stdarg.h>
 #include <linux/major.h>
 
@@ -16,7 +18,9 @@
 #define __packed __attribute__((packed))
 #endif
 
-#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+#endif
 
 #define COMM_TIMEOUT 120
 
@@ -48,14 +52,14 @@
 struct option;
 
 extern int only_digits(const char *s);
-extern int dt_lock_drbd(const char* device);
+extern int dt_lock_drbd(int minor);
 extern void dt_unlock_drbd(int lock_fd);
 extern void dt_release_lockfile(int drbd_fd);
 extern int dt_minor_of_dev(const char *device);
 extern int new_strtoll(const char *s, const char def_unit, unsigned long long *rv);
 extern unsigned long long m_strtoll(const char* s,const char def_unit);
-extern const char* make_optstring(struct option *options, char startc);
-extern char* ppsize(char* buf, size_t size);
+extern const char* make_optstring(struct option *options);
+extern char* ppsize(char* buf, unsigned long long size);
 extern void dt_print_gc(const uint32_t* gen_cnt);
 extern void dt_pretty_print_gc(const uint32_t* gen_cnt);
 extern void dt_print_uuids(const uint64_t* uuid, unsigned int flags);
@@ -65,13 +69,45 @@
 extern uint64_t bdev_size(int fd);
 extern void get_random_bytes(void* buffer, int len);
 
-extern int force; /* global option to force implicit confirmation */
-extern int confirmed(const char *text);
-
 extern const char* shell_escape(const char* s);
 
+/* In-place unescape double quotes and backslash escape sequences from a
+ * double quoted string. Note: backslash is only useful to quote itself, or
+ * double quote, no special treatment to any c-style escape sequences. */
+extern void unescape(char *txt);
+
 /* Since glibc 2.8~20080505-0ubuntu7 asprintf() is declared with the
    warn_unused_result attribute.... */
 extern int m_asprintf(char **strp, const char *fmt, ...);
 
+extern void fprintf_hex(FILE *fp, off_t file_offset, const void *buf, unsigned len);
+
+/* If the lower level device is resized,
+ * and DRBD did not move its "internal" meta data in time,
+ * the next time we try to attach, we won't find our meta data.
+ *
+ * Some helpers for storing and retrieving "last known"
+ * information, to be able to find it regardless,
+ * without scanning the full device for magic numbers.
+ */
+
+/* We may want to store more things later...  if so, we can easily change to
+ * some NULL terminated tag-value list format then.
+ * For now: store the last known lower level block device size,
+ * and its /dev/<name> */
+struct bdev_info {
+	uint64_t bd_size;
+	uint64_t bd_uuid;
+	char *bd_name;
+};
+
+/* these return 0 on sucess, error code if something goes wrong. */
+/* create (update) the last-known-bdev-info file */
+extern int lk_bdev_save(const unsigned minor, const struct bdev_info *bd);
+/* we may want to remove all stored information */
+extern int lk_bdev_delete(const unsigned minor);
+/* load info from that file.
+ * caller should free(bd->bd_name) once it is no longer needed. */
+extern int lk_bdev_load(const unsigned minor, struct bdev_info *bd);
+
 #endif
diff -Nru drbd8-8.3.7/user/legacy/.gitignore drbd8-8.4.1+git55a81dc~cmd1/user/legacy/.gitignore
--- drbd8-8.3.7/user/legacy/.gitignore	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/.gitignore	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,5 @@
+Makefile
+config.h
+drbd_buildtag.c
+drbd_endian.h
+drbdadm_scanner.c
diff -Nru drbd8-8.3.7/user/legacy/Makefile drbd8-8.4.1+git55a81dc~cmd1/user/legacy/Makefile
--- drbd8-8.3.7/user/legacy/Makefile	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/Makefile	2012-09-03 22:37:14.000000000 +0000
@@ -0,0 +1,125 @@
+# Makefile for drbd.o
+#
+# This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+#
+# drbd is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# drbd is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with drbd; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+# variables set by configure
+DISTRO = debian
+prefix = /usr
+exec_prefix = /usr
+localstatedir = /var
+datarootdir = /usr/share
+datadir = /usr/share
+sbindir = /usr/sbin
+sysconfdir = /etc
+BASH_COMPLETION_SUFFIX = 
+UDEV_RULE_SUFFIX = 
+INITDIR = /etc/init.d
+LIBDIR = /usr/lib/drbd
+CC = gcc
+CFLAGS = -Wall -g -O2
+LN_S = ln -s
+
+# features enabled or disabled by configure
+WITH_UTILS = yes
+WITH_LEGACY_UTILS = yes
+WITH_KM = no
+WITH_UDEV = yes
+WITH_XEN = yes
+WITH_PACEMAKER = yes
+WITH_HEARTBEAT = yes
+WITH_RGMANAGER = no
+WITH_BASHCOMPLETION = yes
+
+# variables meant to be overridden from the make command line
+DESTDIR ?= /
+
+CFLAGS += -Wall -I. -I../drbd -I../drbd/compat
+
+drbdadm-obj = drbdadm_scanner.o drbdadm_parser.o drbdadm_main.o \
+	      drbdadm_adjust.o drbdtool_common.o drbdadm_usage_cnt.o \
+	      drbd_buildtag.o drbdadm_minor_table.o
+
+drbdsetup-obj = drbdsetup.o drbdtool_common.o drbd_buildtag.o \
+	        drbd_strings.o
+
+all: tools
+
+ifeq ($(WITH_LEGACY_UTILS),yes)
+tools: drbdadm-83 drbdsetup-83
+else
+tools:
+endif
+
+drbd_buildtag.c: ../../drbd/drbd_buildtag.c
+	cp $< $@
+
+drbd_endian.h: ../drbd_endian.h
+	cp $< $@
+
+drbdtool_common.h: drbd_endian.h
+
+drbdadm-83: $(drbdadm-obj)
+	$(LINK.c) -o $@ $^
+
+drbdadm_scanner.c: drbdadm_scanner.fl drbdadm_parser.h
+	flex -s -odrbdadm_scanner.c drbdadm_scanner.fl
+
+drbdsetup-83: $(drbdsetup-obj)
+	$(LINK.c) -o $@ $^
+
+clean:
+	rm -f drbdadm_scanner.c
+	rm -f drbdsetup-83 drbdadm-83 *.o
+	rm -f drbd_buildtag.c drbd_endian.h
+	rm -f *~
+
+distclean: clean
+
+install:
+ifeq ($(WITH_UTILS),yes)
+ifeq ($(WITH_LEGACY_UTILS),yes)
+	install -d $(DESTDIR)/sbin/
+	install -d $(DESTDIR)$(localstatedir)/lib/drbd
+	install -d $(DESTDIR)$(localstatedir)/lock
+	install -d $(DESTDIR)/lib/drbd/
+	if getent group haclient > /dev/null 2> /dev/null ; then		\
+		install -g haclient -m 4750 drbdsetup-83 $(DESTDIR)/lib/drbd/ ;	\
+		install -m 755 drbdadm-83 $(DESTDIR)/lib/drbd/ ;		\
+	else									\
+		install -m 755 drbdsetup-83 $(DESTDIR)/lib/drbd/ ;		\
+		install -m 755 drbdadm-83 $(DESTDIR)/lib/drbd/ ; 		\
+	fi
+endif
+endif
+
+uninstall:
+	rm -f $(DESTDIR)/lib/drbd/drbdsetup-83
+	rm -f $(DESTDIR)/lib/drbd/drbdadm-83
+
+###dependencies
+drbdsetup.o:       drbdtool_common.h linux/drbd_limits.h
+drbdsetup.o:       linux/drbd_tag_magic.h linux/drbd.h
+drbdsetup.o:       linux/drbd_config.h linux/drbd_nl.h
+drbdsetup.o:       unaligned.h
+drbdtool_common.o: drbdtool_common.h
+drbdadm_main.o:    drbdtool_common.h drbdadm.h
+drbdadm_adjust.o:  drbdtool_common.h drbdadm.h
+drbdadm_parser.o:  drbdtool_common.h drbdadm.h linux/drbd_limits.h drbd_endian.h
+drbdadm_scanner.o: drbdtool_common.h drbdadm.h drbdadm_parser.h
+drbdsetup.o:       drbdtool_common.h           linux/drbd_limits.h
+drbdadm_usage_cnt.o: drbdtool_common.h drbdadm.h drbd_endian.h
diff -Nru drbd8-8.3.7/user/legacy/Makefile.in drbd8-8.4.1+git55a81dc~cmd1/user/legacy/Makefile.in
--- drbd8-8.3.7/user/legacy/Makefile.in	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/Makefile.in	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,125 @@
+# Makefile for drbd.o
+#
+# This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+#
+# drbd is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# drbd is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with drbd; see the file COPYING.  If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+
+# variables set by configure
+DISTRO = @DISTRO@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+localstatedir = @localstatedir@
+datarootdir = @datarootdir@
+datadir = @datadir@
+sbindir = @sbindir@
+sysconfdir = @sysconfdir@
+BASH_COMPLETION_SUFFIX = @BASH_COMPLETION_SUFFIX@
+UDEV_RULE_SUFFIX = @UDEV_RULE_SUFFIX@
+INITDIR = @INITDIR@
+LIBDIR = @prefix@/lib/@PACKAGE_TARNAME@
+CC = @CC@
+CFLAGS = @CFLAGS@
+LN_S = @LN_S@
+
+# features enabled or disabled by configure
+WITH_UTILS = @WITH_UTILS@
+WITH_LEGACY_UTILS = @WITH_LEGACY_UTILS@
+WITH_KM = @WITH_KM@
+WITH_UDEV = @WITH_UDEV@
+WITH_XEN = @WITH_XEN@
+WITH_PACEMAKER = @WITH_PACEMAKER@
+WITH_HEARTBEAT = @WITH_HEARTBEAT@
+WITH_RGMANAGER = @WITH_RGMANAGER@
+WITH_BASHCOMPLETION = @WITH_BASHCOMPLETION@
+
+# variables meant to be overridden from the make command line
+DESTDIR ?= /
+
+CFLAGS += -Wall -I. -I../drbd -I../drbd/compat
+
+drbdadm-obj = drbdadm_scanner.o drbdadm_parser.o drbdadm_main.o \
+	      drbdadm_adjust.o drbdtool_common.o drbdadm_usage_cnt.o \
+	      drbd_buildtag.o drbdadm_minor_table.o
+
+drbdsetup-obj = drbdsetup.o drbdtool_common.o drbd_buildtag.o \
+	        drbd_strings.o
+
+all: tools
+
+ifeq ($(WITH_LEGACY_UTILS),yes)
+tools: drbdadm-83 drbdsetup-83
+else
+tools:
+endif
+
+drbd_buildtag.c: ../../drbd/drbd_buildtag.c
+	cp $< $@
+
+drbd_endian.h: ../drbd_endian.h
+	cp $< $@
+
+drbdtool_common.h: drbd_endian.h
+
+drbdadm-83: $(drbdadm-obj)
+	$(LINK.c) -o $@ $^
+
+drbdadm_scanner.c: drbdadm_scanner.fl drbdadm_parser.h
+	flex -s -odrbdadm_scanner.c drbdadm_scanner.fl
+
+drbdsetup-83: $(drbdsetup-obj)
+	$(LINK.c) -o $@ $^
+
+clean:
+	rm -f drbdadm_scanner.c
+	rm -f drbdsetup-83 drbdadm-83 *.o
+	rm -f drbd_buildtag.c drbd_endian.h
+	rm -f *~
+
+distclean: clean
+
+install:
+ifeq ($(WITH_UTILS),yes)
+ifeq ($(WITH_LEGACY_UTILS),yes)
+	install -d $(DESTDIR)/sbin/
+	install -d $(DESTDIR)$(localstatedir)/lib/drbd
+	install -d $(DESTDIR)$(localstatedir)/lock
+	install -d $(DESTDIR)/lib/drbd/
+	if getent group haclient > /dev/null 2> /dev/null ; then		\
+		install -g haclient -m 4750 drbdsetup-83 $(DESTDIR)/lib/drbd/ ;	\
+		install -m 755 drbdadm-83 $(DESTDIR)/lib/drbd/ ;		\
+	else									\
+		install -m 755 drbdsetup-83 $(DESTDIR)/lib/drbd/ ;		\
+		install -m 755 drbdadm-83 $(DESTDIR)/lib/drbd/ ; 		\
+	fi
+endif
+endif
+
+uninstall:
+	rm -f $(DESTDIR)/lib/drbd/drbdsetup-83
+	rm -f $(DESTDIR)/lib/drbd/drbdadm-83
+
+###dependencies
+drbdsetup.o:       drbdtool_common.h linux/drbd_limits.h
+drbdsetup.o:       linux/drbd_tag_magic.h linux/drbd.h
+drbdsetup.o:       linux/drbd_config.h linux/drbd_nl.h
+drbdsetup.o:       unaligned.h
+drbdtool_common.o: drbdtool_common.h
+drbdadm_main.o:    drbdtool_common.h drbdadm.h
+drbdadm_adjust.o:  drbdtool_common.h drbdadm.h
+drbdadm_parser.o:  drbdtool_common.h drbdadm.h linux/drbd_limits.h drbd_endian.h
+drbdadm_scanner.o: drbdtool_common.h drbdadm.h drbdadm_parser.h
+drbdsetup.o:       drbdtool_common.h           linux/drbd_limits.h
+drbdadm_usage_cnt.o: drbdtool_common.h drbdadm.h drbd_endian.h
diff -Nru drbd8-8.3.7/user/legacy/config.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/config.h
--- drbd8-8.3.7/user/legacy/config.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/config.h	2012-02-02 14:09:45.000000000 +0000
@@ -0,0 +1,29 @@
+/* user/legacy/config.h.  Generated from config.h.in by configure.  */
+/* user/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Local configuration directory. Commonly /etc or /usr/local/etc */
+#define DRBD_CONFIG_DIR "/etc"
+
+/* Local state directory. Commonly /var/lib/drbd or /usr/local/var/lib/drbd */
+#define DRBD_LIB_DIR "/var/lib/drbd"
+
+/* Local lock directory. Commonly /var/lock or /usr/local/var/lock */
+#define DRBD_LOCK_DIR "/var/lock"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "drbd-dev@lists.linbit.com"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "DRBD"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "DRBD 8.4.1"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "drbd"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "8.4.1"
diff -Nru drbd8-8.3.7/user/legacy/config.h.in drbd8-8.4.1+git55a81dc~cmd1/user/legacy/config.h.in
--- drbd8-8.3.7/user/legacy/config.h.in	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/config.h.in	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,28 @@
+/* user/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Local configuration directory. Commonly /etc or /usr/local/etc */
+#undef DRBD_CONFIG_DIR
+
+/* Local state directory. Commonly /var/lib/drbd or /usr/local/var/lib/drbd */
+#undef DRBD_LIB_DIR
+
+/* Local lock directory. Commonly /var/lock or /usr/local/var/lock */
+#undef DRBD_LOCK_DIR
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
diff -Nru drbd8-8.3.7/user/legacy/drbd_strings.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbd_strings.c
--- drbd8-8.3.7/user/legacy/drbd_strings.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbd_strings.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,115 @@
+/*
+  drbd.h
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include <linux/drbd.h>
+
+static const char *drbd_conn_s_names[] = {
+	[C_STANDALONE]       = "StandAlone",
+	[C_DISCONNECTING]    = "Disconnecting",
+	[C_UNCONNECTED]      = "Unconnected",
+	[C_TIMEOUT]          = "Timeout",
+	[C_BROKEN_PIPE]      = "BrokenPipe",
+	[C_NETWORK_FAILURE]  = "NetworkFailure",
+	[C_PROTOCOL_ERROR]   = "ProtocolError",
+	[C_WF_CONNECTION]    = "WFConnection",
+	[C_WF_REPORT_PARAMS] = "WFReportParams",
+	[C_TEAR_DOWN]        = "TearDown",
+	[C_CONNECTED]        = "Connected",
+	[C_STARTING_SYNC_S]  = "StartingSyncS",
+	[C_STARTING_SYNC_T]  = "StartingSyncT",
+	[C_WF_BITMAP_S]      = "WFBitMapS",
+	[C_WF_BITMAP_T]      = "WFBitMapT",
+	[C_WF_SYNC_UUID]     = "WFSyncUUID",
+	[C_SYNC_SOURCE]      = "SyncSource",
+	[C_SYNC_TARGET]      = "SyncTarget",
+	[C_PAUSED_SYNC_S]    = "PausedSyncS",
+	[C_PAUSED_SYNC_T]    = "PausedSyncT",
+	[C_VERIFY_S]         = "VerifyS",
+	[C_VERIFY_T]         = "VerifyT",
+	[C_AHEAD]            = "Ahead",
+	[C_BEHIND]           = "Behind",
+};
+
+static const char *drbd_role_s_names[] = {
+	[R_PRIMARY]   = "Primary",
+	[R_SECONDARY] = "Secondary",
+	[R_UNKNOWN]   = "Unknown"
+};
+
+static const char *drbd_disk_s_names[] = {
+	[D_DISKLESS]     = "Diskless",
+	[D_ATTACHING]    = "Attaching",
+	[D_FAILED]       = "Failed",
+	[D_NEGOTIATING]  = "Negotiating",
+	[D_INCONSISTENT] = "Inconsistent",
+	[D_OUTDATED]     = "Outdated",
+	[D_UNKNOWN]      = "DUnknown",
+	[D_CONSISTENT]   = "Consistent",
+	[D_UP_TO_DATE]   = "UpToDate",
+};
+
+static const char *drbd_state_sw_errors[] = {
+	[-SS_TWO_PRIMARIES] = "Multiple primaries not allowed by config",
+	[-SS_NO_UP_TO_DATE_DISK] = "Need access to UpToDate data",
+	[-SS_NO_LOCAL_DISK] = "Can not resync without local disk",
+	[-SS_NO_REMOTE_DISK] = "Can not resync without remote disk",
+	[-SS_CONNECTED_OUTDATES] = "Refusing to be Outdated while Connected",
+	[-SS_PRIMARY_NOP] = "Refusing to be Primary while peer is not outdated",
+	[-SS_RESYNC_RUNNING] = "Can not start OV/resync since it is already active",
+	[-SS_ALREADY_STANDALONE] = "Can not disconnect a StandAlone device",
+	[-SS_CW_FAILED_BY_PEER] = "State change was refused by peer node",
+	[-SS_IS_DISKLESS] = "Device is diskless, the requested operation requires a disk",
+	[-SS_DEVICE_IN_USE] = "Device is held open by someone",
+	[-SS_NO_NET_CONFIG] = "Have no net/connection configuration",
+	[-SS_NO_VERIFY_ALG] = "Need a verify algorithm to start online verify",
+	[-SS_NEED_CONNECTION] = "Need a connection to start verify or resync",
+	[-SS_NOT_SUPPORTED] = "Peer does not support protocol",
+	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
+	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
+	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
+};
+
+const char *drbd_conn_str(enum drbd_conns s)
+{
+	/* enums are unsigned... */
+	return s > C_BEHIND ? "TOO_LARGE" : drbd_conn_s_names[s];
+}
+
+const char *drbd_role_str(enum drbd_role s)
+{
+	return s > R_SECONDARY   ? "TOO_LARGE" : drbd_role_s_names[s];
+}
+
+const char *drbd_disk_str(enum drbd_disk_state s)
+{
+	return s > D_UP_TO_DATE    ? "TOO_LARGE" : drbd_disk_s_names[s];
+}
+
+const char *drbd_set_st_err_str(enum drbd_state_rv err)
+{
+	return err <= SS_AFTER_LAST_ERROR ? "TOO_SMALL" :
+	       err > SS_TWO_PRIMARIES ? "TOO_LARGE"
+			: drbd_state_sw_errors[-err];
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdadm.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm.h
--- drbd8-8.3.7/user/legacy/drbdadm.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,276 @@
+#ifndef DRBDADM_H
+#define DRBDADM_H
+
+#include <linux/drbd_config.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <stdarg.h>
+
+#include "config.h"
+
+#define E_syntax	  2
+#define E_usage		  3
+#define E_config_invalid 10
+#define E_exec_error     20
+#define E_thinko	 42 /* :) */
+
+enum {
+  SLEEPS_FINITE        = 1,
+  SLEEPS_SHORT         = 2+1,
+  SLEEPS_LONG          = 4+1,
+  SLEEPS_VERY_LONG     = 8+1,
+  SLEEPS_MASK          = 15,
+
+  RETURN_PID           = 2,
+  SLEEPS_FOREVER       = 4,
+
+  SUPRESS_STDERR       = 0x10,
+  RETURN_STDOUT_FD     = 0x20,
+  RETURN_STDERR_FD     = 0x40,
+  DONT_REPORT_FAILED   = 0x80,
+};
+
+/* for check_uniq(): Check for uniqueness of certain values...
+ * comment out if you want to NOT choke on the first conflict */
+#define EXIT_ON_CONFLICT 1
+
+/* for verify_ips(): are not verifyable ips fatal? */
+#define INVALID_IP_IS_INVALID_CONF 1
+
+enum usage_count_type {
+  UC_YES,
+  UC_NO,
+  UC_ASK,
+};
+
+struct d_globals
+{
+  int disable_io_hints;
+  int disable_ip_verification;
+  int minor_count;
+  int dialog_refresh;
+  enum usage_count_type usage_count;
+};
+
+#define IFI_HADDR 8
+#define IFI_ALIAS 1
+
+struct ifi_info {
+  char ifi_name[IFNAMSIZ];      /* interface name, nul terminated */
+  uint8_t ifi_haddr[IFI_HADDR]; /* hardware address */
+  uint16_t ifi_hlen;            /* bytes in hardware address, 0, 6, 8 */
+  short ifi_flags;              /* IFF_xxx constants from <net/if.h> */
+  short ifi_myflags;            /* our own IFI_xxx flags */
+  struct sockaddr *ifi_addr;    /* primary address */
+  struct ifi_info *ifi_next;    /* next ifi_info structure */
+};
+
+struct d_name
+{
+  char *name;
+  struct d_name *next;
+};
+
+struct d_proxy_info
+{
+  struct d_name *on_hosts;
+  char* inside_addr;
+  char* inside_port;
+  char* inside_af;
+  char* outside_addr;
+  char* outside_port;
+  char* outside_af;
+};
+
+struct d_host_info
+{
+  struct d_name *on_hosts;
+  char* device;
+  unsigned device_minor;
+  char* disk;
+  char* address;
+  char* port;
+  char* meta_disk;
+  char* address_family;
+  int meta_major;
+  int meta_minor;
+  char* meta_index;
+  struct d_proxy_info *proxy;
+  struct d_host_info* next;
+  struct d_resource* lower;  /* for device stacking */
+  char *lower_name;          /* for device stacking, before bind_stacked_res() */
+  int config_line;
+  unsigned int by_address:1; /* Match to machines by address, not by names (=on_hosts) */
+};
+
+struct d_option
+{
+  char* name;
+  char* value;
+  struct d_option* next;
+  unsigned int mentioned  :1 ; // for the adjust command.
+  unsigned int is_default :1 ; // for the adjust command.
+  unsigned int is_escaped :1 ;
+};
+
+struct d_resource
+{
+  char* name;
+  char* protocol;
+
+  /* these get propagated to host_info sections later. */
+  char* device;
+  unsigned device_minor;
+  char* disk;
+  char* meta_disk;
+  char* meta_index;
+
+  struct d_host_info* me;
+  struct d_host_info* peer;
+  struct d_host_info* all_hosts;
+  struct d_option* net_options;
+  struct d_option* disk_options;
+  struct d_option* sync_options;
+  struct d_option* startup_options;
+  struct d_option* handlers;
+  struct d_option* proxy_options;
+  struct d_option* proxy_plugins;
+  struct d_resource* next;
+  struct d_name *become_primary_on;
+  char *config_file; /* The config file this resource is define in.*/
+  int start_line;
+  unsigned int stacked_timeouts:1;
+  unsigned int ignore:1;
+  unsigned int stacked:1;        /* Stacked on this node */
+  unsigned int stacked_on_one:1; /* Stacked either on me or on peer */
+};
+
+extern char *canonify_path(char *path);
+extern int adm_attach(struct d_resource* ,const char* );
+extern int adm_connect(struct d_resource* ,const char* );
+extern int adm_resize(struct d_resource* ,const char* );
+extern int adm_syncer(struct d_resource* ,const char* );
+extern int adm_generic_s(struct d_resource* ,const char* );
+extern int _admm_generic(struct d_resource* ,const char*, int flags);
+extern void m__system(char **argv, int flags, struct d_resource *res, pid_t *kid, int *fd, int *ex);
+static inline int m_system_ex(char **argv, int flags, struct d_resource *res)
+{
+	int ex;
+	m__system(argv, flags, res, NULL, NULL, &ex);
+	return ex;
+}
+extern struct d_option* find_opt(struct d_option*,char*);
+extern void validate_resource(struct d_resource *);
+extern void schedule_dcmd( int (* function)(struct d_resource*,const char* ),
+			   struct d_resource* res,
+			   char* arg,
+			   int order);
+
+extern int version_code_kernel(void);
+extern int version_code_userland(void);
+extern void warn_on_version_mismatch(void);
+extern void uc_node(enum usage_count_type type);
+extern int adm_create_md(struct d_resource* res ,const char* cmd);
+extern void convert_discard_opt(struct d_resource* res);
+extern void convert_after_option(struct d_resource* res);
+extern int have_ip(const char *af, const char *ip);
+
+/* See drbdadm_minor_table.c */
+extern int register_minor(int minor, const char *path);
+extern int unregister_minor(int minor);
+extern char *lookup_minor(int minor);
+
+enum pr_flags {
+  NoneHAllowed  = 4,
+  IgnDiscardMyData = 8
+};
+enum pp_flags {
+	match_on_proxy = 1,
+};
+extern struct d_resource* parse_resource(char*, enum pr_flags);
+extern void post_parse(struct d_resource *config, enum pp_flags);
+extern struct d_option *new_opt(char *name, char *value);
+extern int name_in_names(char *name, struct d_name *names);
+extern char *_names_to_str(char* buffer, struct d_name *names);
+extern char *_names_to_str_c(char* buffer, struct d_name *names, char c);
+#define NAMES_STR_SIZE 255
+#define names_to_str(N) _names_to_str(alloca(NAMES_STR_SIZE+1), N)
+#define names_to_str_c(N, C) _names_to_str_c(alloca(NAMES_STR_SIZE+1), N, C)
+extern void free_names(struct d_name *names);
+extern void set_me_in_resource(struct d_resource* res, int match_on_proxy);
+extern void set_peer_in_resource(struct d_resource* res, int peer_required);
+extern void set_on_hosts_in_res(struct d_resource *res);
+extern void set_disk_in_res(struct d_resource *res);
+extern char *proxy_connection_name(struct d_resource *res);
+int parse_proxy_settings(struct d_resource *res, int check_proxy_token);
+/* conn_name is optional and mostly for compatibility with dcmd */
+int do_proxy_conn_up(struct d_resource *res, const char *conn_name);
+int do_proxy_conn_down(struct d_resource *res, const char *conn_name);
+int do_proxy_conn_plugins(struct d_resource *res, const char *conn_name);
+
+extern char *config_file;
+extern char *config_save;
+extern int config_valid;
+extern struct d_resource* config;
+extern struct d_resource* common;
+extern struct d_globals global_options;
+extern int line, fline;
+extern struct hsearch_data global_htable;
+
+extern int no_tty;
+extern int dry_run;
+extern int verbose;
+extern char* drbdsetup;
+extern char* drbd_proxy_ctl;
+extern char ss_buffer[1024];
+extern struct utsname nodeinfo;
+
+extern char* setup_opts[10];
+extern char* connect_to_host;
+extern int soi;
+
+
+/* ssprintf() places the result of the printf in the current stack
+   frame and sets ptr to the resulting string. If the current stack
+   frame is destroyed (=function returns), the allocated memory is
+   freed automatically */
+
+/*
+  // This is the nicer version, that does not need the ss_buffer.
+  // But it only works with very new glibcs.
+
+#define ssprintf(...) \
+	 ({ int _ss_size = snprintf(0, 0, ##__VA_ARGS__);        \
+	 char *_ss_ret = __builtin_alloca(_ss_size+1);           \
+	 snprintf(_ss_ret, _ss_size+1, ##__VA_ARGS__);           \
+	 _ss_ret; })
+*/
+
+#define ssprintf(ptr,...) \
+  ptr=strcpy(alloca(snprintf(ss_buffer,sizeof(ss_buffer),##__VA_ARGS__)+1),ss_buffer)
+
+/* CAUTION: arguments may not have side effects! */
+#define for_each_resource(res,tmp,config) \
+	for (res = (config); res && (tmp = res->next, 1); res = tmp)
+
+#endif
+
+#define APPEND(LIST,ITEM) ({		      \
+  typeof((LIST)) _l = (LIST);		      \
+  typeof((ITEM)) _i = (ITEM);		      \
+  typeof((ITEM)) _t;			      \
+  _i->next = NULL;			      \
+  if (_l == NULL) { _l = _i; }		      \
+  else {				      \
+    for (_t = _l; _t->next; _t = _t->next);   \
+    _t->next = _i;			      \
+  };					      \
+  _l;					      \
+})
+
+
+#define PARSER_CHECK_PROXY_KEYWORD (1)
+#define PARSER_STOP_IF_INVALID (2)
+
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_adjust.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_adjust.c
--- drbd8-8.3.7/user/legacy/drbdadm_adjust.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_adjust.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,510 @@
+/*
+   drbdadm_adjust.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "drbdadm.h"
+#include "drbdtool_common.h"
+#include "drbdadm_parser.h"
+
+/* drbdsetup show might complain that the device minor does
+   not exist at all. Redirect stderr to /dev/null therefore.
+ */
+static FILE *m_popen(int *pid,char** argv)
+{
+	int mpid;
+	int pipes[2];
+	int dev_null;
+
+	if(pipe(pipes)) {
+		perror("Creation of pipes failed");
+		exit(E_exec_error);
+	}
+
+	dev_null = open("/dev/null", O_WRONLY);
+	if (dev_null == -1) {
+		perror("Opening /dev/null failed");
+		exit(E_exec_error);
+	}
+
+	mpid = fork();
+	if(mpid == -1) {
+		fprintf(stderr,"Can not fork");
+		exit(E_exec_error);
+	}
+	if(mpid == 0) {
+		close(pipes[0]); // close reading end
+		dup2(pipes[1], fileno(stdout));
+		close(pipes[1]);
+		dup2(dev_null, fileno(stderr));
+		close(dev_null);
+		execvp(argv[0],argv);
+		fprintf(stderr,"Can not exec");
+		exit(E_exec_error);
+	}
+
+	close(pipes[1]); // close writing end
+	close(dev_null);
+	*pid=mpid;
+	return fdopen(pipes[0],"r");
+}
+
+/* option value equal? */
+static int ov_eq(char* val1, char* val2)
+{
+	unsigned long long v1,v2;
+
+	if(val1 == NULL && val2 == NULL) return 1;
+	if(val1 == NULL || val2 == NULL) return 0;
+
+	if(new_strtoll(val1,0,&v1) == MSE_OK &&
+	   new_strtoll(val2,0,&v2) == MSE_OK) return v1 == v2;
+
+	return !strcmp(val1,val2);
+}
+
+static int opts_equal(struct d_option* conf, struct d_option* running)
+{
+	struct d_option* opt;
+
+	while(running) {
+		if((opt=find_opt(conf,running->name))) {
+			if(!ov_eq(running->value,opt->value)) {
+			/* printf("Value of '%s' differs: r=%s c=%s\n",
+			   opt->name,running->value,opt->value); */
+				return 0;
+			}
+			opt->mentioned=1;
+		} else {
+			if(!running->is_default) {
+				/*printf("Only in running config %s: %s\n",
+				  running->name,running->value);*/
+				return 0;
+			}
+		}
+		running=running->next;
+	}
+
+	while(conf) {
+		if(conf->mentioned==0) {
+			/*printf("Only in config file %s: %s\n",
+			  conf->name,conf->value);*/
+			return 0;
+		}
+		conf=conf->next;
+	}
+	return 1;
+}
+
+static int addr_equal(struct d_resource* conf, struct d_resource* running)
+{
+	int equal;
+
+	if (conf->peer == NULL && running->peer == NULL) return 1;
+	if (running->peer == NULL) return 0;
+
+	equal = !strcmp(conf->me->address,        running->me->address) &&
+		!strcmp(conf->me->port,           running->me->port) &&
+		!strcmp(conf->me->address_family, running->me->address_family);
+
+	if(conf->me->proxy)
+		equal = equal &&
+			!strcmp(conf->me->proxy->inside_addr, running->peer->address) &&
+			!strcmp(conf->me->proxy->inside_port, running->peer->port) &&
+			!strcmp(conf->me->proxy->inside_af,   running->peer->address_family);
+	else
+		equal = equal && conf->peer &&
+			!strcmp(conf->peer->address,        running->peer->address) &&
+			!strcmp(conf->peer->port,           running->peer->port) &&
+			!strcmp(conf->peer->address_family, running->peer->address_family);
+
+	return equal;
+}
+
+static int proto_equal(struct d_resource* conf, struct d_resource* running)
+{
+	if (conf->protocol == NULL && running->protocol == NULL) return 1;
+	if (conf->protocol == NULL || running->protocol == NULL) return 0;
+
+	return !strcmp(conf->protocol, running->protocol);
+}
+
+/* Are both internal, or are both not internal. */
+static int int_eq(char* m_conf, char* m_running)
+{
+	return !strcmp(m_conf,"internal") == !strcmp(m_running,"internal");
+}
+
+static int disk_equal(struct d_host_info* conf, struct d_host_info* running)
+{
+	int eq = 1;
+
+	if (conf->disk == NULL && running->disk == NULL) return 1;
+	if (conf->disk == NULL || running->disk == NULL) return 0;
+
+	eq &= !strcmp(conf->disk,running->disk);
+	eq &= int_eq(conf->meta_disk,running->meta_disk);
+	if(!strcmp(conf->meta_disk,"internal")) return eq;
+	eq &= !strcmp(conf->meta_disk,running->meta_disk);
+
+	return eq;
+}
+
+
+/* NULL terminated */
+static void find_option_in_resources(char *name,
+		struct d_option *list, struct d_option **opt, ...)
+{
+	va_list va;
+
+	va_start(va, opt);
+	/* We need to keep setting *opt to NULL, even if a list == NULL. */
+	while (list || opt) {
+		while (list) {
+			if (strcmp(list->name, name) == 0)
+				break;
+			list = list->next;
+		}
+
+		*opt = list;
+
+		list = va_arg(va, struct d_option*);
+		opt  = va_arg(va, struct d_option**);
+	}
+}
+
+static int do_proxy_reconf(struct d_resource *res, const char *cmd)
+{
+	int rv;
+	char *argv[4] = { drbd_proxy_ctl, "-c", (char*)cmd, NULL };
+
+	rv = m_system_ex(argv, SLEEPS_SHORT, res);
+	return rv;
+}
+
+#define MAX_PLUGINS (10)
+#define MAX_PLUGIN_NAME (16)
+
+/* The new name is appended to the alist. */
+int _is_plugin_in_list(char *string,
+		char slist[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		char alist[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		int list_len)
+{
+	int word_len, i;
+	char *copy;
+
+	for(word_len=0; string[word_len]; word_len++)
+		if (isspace(string[word_len]))
+			break;
+
+	if (word_len+1 >= MAX_PLUGIN_NAME) {
+		fprintf(stderr, "Wrong proxy plugin name %*.*s",
+				word_len, word_len, string);
+		exit(E_config_invalid);
+	}
+
+	copy = alist[list_len];
+	strncpy(copy, string, word_len);
+	copy[word_len] = 0;
+
+
+	for(i=0; i<list_len && *slist; i++) {
+		if (strcmp(slist[i], copy) == 0)
+			return 1;
+	}
+
+	/* Not found, insert into list. */
+	if (list_len >= MAX_PLUGINS) {
+		fprintf(stderr, "Too many proxy plugins.");
+		exit(E_config_invalid);
+	}
+
+	return 0;
+}
+
+
+static int proxy_reconf(struct d_resource *res, struct d_resource *running)
+{
+	int reconn = 0;
+	struct d_option* res_o, *run_o;
+	unsigned long long v1, v2, minimum;
+	char *plugin_changes[MAX_PLUGINS], *cp, *conn_name;
+	/* It's less memory usage when we're storing char[]. malloc overhead for
+	 * the few bytes + pointers is much more. */
+	char p_res[MAX_PLUGINS][MAX_PLUGIN_NAME],
+		 p_run[MAX_PLUGINS][MAX_PLUGIN_NAME];
+	int used, i, re_do;
+
+	reconn = 0;
+
+
+	find_option_in_resources("memlimit",
+			res->proxy_options, &res_o,
+			running->proxy_options, &run_o,
+			NULL, NULL);
+	v1 = res_o ? m_strtoll(res_o->value, 1) : 0;
+	v2 = run_o ? m_strtoll(run_o->value, 1) : 0;
+	minimum = v1 < v2 ? v1 : v2;
+	/* We allow an є [epsilon] of 2%, so that small (rounding) deviations do
+	 * not cause the connection to be re-established. */
+	if (res_o &&
+			(!run_o || abs(v1-v2)/(float)minimum > 0.02))
+	{
+redo_whole_conn:
+		/* As the memory is in use while the connection is allocated we have to
+		 * completely destroy and rebuild the connection. */
+		schedule_dcmd( do_proxy_conn_down, res, NULL, 0);
+		schedule_dcmd( do_proxy_conn_up, res, NULL, 1);
+		schedule_dcmd( do_proxy_conn_plugins, res, NULL, 2);
+
+		/* With connection cleanup and reopen everything is rebuild anyway, and
+		 * DRBD will get a reconnect too.  */
+		return 0;
+	}
+
+
+	res_o = res->proxy_plugins;
+	run_o = running->proxy_plugins;
+	used = 0;
+	conn_name = proxy_connection_name(res);
+	for(i=0; i<MAX_PLUGINS; i++)
+	{
+		if (used >= sizeof(plugin_changes)-1) {
+			fprintf(stderr, "Too many proxy plugin changes");
+			exit(E_config_invalid);
+		}
+		/* Now we can be sure that we can store another pointer. */
+
+		if (!res_o) {
+			if (run_o) {
+				/* More plugins running than configured - just stop here. */
+				m_asprintf(&cp, "set plugin %s %d end", conn_name, i);
+				plugin_changes[used++] = cp;
+			}
+			else {
+				/* Both at the end? ok, quit loop */
+			}
+			break;
+		}
+
+		/* res_o != NULL. */
+
+		if (!run_o) {
+			p_run[i][0] = 0;
+			if (_is_plugin_in_list(res_o->name, p_run, p_res, i)) {
+				/* Current plugin was already active, just at another position.
+				 * Redo the whole connection. */
+				goto redo_whole_conn;
+			}
+
+			/* More configured than running - just add it, if it's not already
+			 * somewhere else. */
+			m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
+			plugin_changes[used++] = cp;
+		} else {
+			/* If we get here, both lists have been filled in parallel, so we
+			 * can simply use the common counter. */
+			re_do = _is_plugin_in_list(res_o->name, p_run, p_res, i) ||
+				_is_plugin_in_list(run_o->name, p_res, p_run, i);
+			if (re_do) {
+				/* Plugin(s) were moved, not simple reconfigured.
+				 * Re-do the whole connection. */
+				goto redo_whole_conn;
+			}
+
+			/* TODO: We don't (yet) account for possible different ordering of
+			 * the parameters to the plugin.
+			 *    plugin A 1 B 2
+			 * should be treated as equal to
+			 *    plugin B 2 A 1. */
+			if (strcmp(run_o->name, res_o->name) != 0) {
+				/* Either a different plugin, or just different settings
+				 * - plugin can be overwritten.  */
+				m_asprintf(&cp, "set plugin %s %d %s", conn_name, i, res_o->name);
+				plugin_changes[used++] = cp;
+			}
+		}
+
+
+		if (res_o)
+			res_o = res_o->next;
+		if (run_o)
+			run_o = run_o->next;
+	}
+
+	/* change only a few plugin settings. */
+	for(i=0; i<used; i++)
+		schedule_dcmd(do_proxy_reconf, res, plugin_changes[i], 2);
+
+	return reconn;
+}
+
+int need_trigger_kobj_change(struct d_resource *res)
+{
+	struct stat sbuf;
+	char *link_name;
+	int err;
+
+	m_asprintf(&link_name, "/dev/drbd/by-res/%s", res->name);
+
+	err = stat("/dev/drbd/by-res", &sbuf);
+	if (err)	/* probably no udev rules in use */
+		return 0;
+
+	err = stat(link_name, &sbuf);
+	if (err)
+		/* resource link cannot be stat()ed. */
+		return 1;
+
+	/* double check device information */
+	if (!S_ISBLK(sbuf.st_mode))
+		return 1;
+	if (major(sbuf.st_rdev) != DRBD_MAJOR)
+		return 1;
+	if (minor(sbuf.st_rdev) != res->me->device_minor)
+		return 1;
+
+	/* Link exists, and is expected block major:minor.
+	 * Do nothing. */
+	return 0;
+}
+
+/*
+ * CAUTION this modifies global static char * config_file!
+ */
+int adm_adjust(struct d_resource* res,char* unused __attribute((unused)))
+{
+	char* argv[20];
+	int pid,argc, i;
+	struct d_resource* running;
+	int do_attach=0,do_connect=0,do_syncer=0;
+	int have_disk=0,have_net=0,can_do_proxy=1;
+	char config_file_dummy[250], *conn_name, show_conn[128];
+
+	/* disable check_uniq, so it won't interfere
+	 * with parsing of drbdsetup show output */
+	config_valid = 2;
+
+
+	/* setup error reporting context for the parsing routines */
+	line = 1;
+	sprintf(config_file_dummy,"drbdsetup %u show", res->me->device_minor);
+	config_file = config_file_dummy;
+
+	argc=0;
+	argv[argc++]=drbdsetup;
+	argv[argc++]=res->me->device;
+	argv[argc++]="show";
+	argv[argc++]=0;
+
+	/* actually parse drbdsetup show output */
+	yyin = m_popen(&pid,argv);
+	running = parse_resource(res->name, IgnDiscardMyData);
+	fclose(yyin);
+
+	waitpid(pid,0,0);
+	/* Sets "me" and "peer" pointer */
+	post_parse(running, 0);
+	set_peer_in_resource(running, 0);
+
+
+	/* Parse proxy settings, if this host has a proxy definition */
+	if (res->me->proxy) {
+		line = 1;
+		conn_name = proxy_connection_name(res);
+		i=snprintf(show_conn, sizeof(show_conn), "show proxy-settings %s", conn_name);
+		if (i>= sizeof(show_conn)-1) {
+			fprintf(stderr,"connection name too long");
+			exit(E_thinko);
+		}
+		sprintf(config_file_dummy,"drbd-proxy-ctl -c '%s'", show_conn);
+		config_file = config_file_dummy;
+
+		argc=0;
+		argv[argc++]=drbd_proxy_ctl;
+		argv[argc++]="-c";
+		argv[argc++]=show_conn;
+		argv[argc++]=0;
+
+		/* actually parse "drbd-proxy-ctl show" output */
+		yyin = m_popen(&pid,argv);
+		can_do_proxy = !parse_proxy_settings(running,
+				PARSER_CHECK_PROXY_KEYWORD | PARSER_STOP_IF_INVALID);
+		fclose(yyin);
+
+		waitpid(pid,0,0);
+	}
+
+
+	do_attach  = !opts_equal(res->disk_options, running->disk_options);
+	if(running->me) {
+		do_attach |= (res->me->device_minor != running->me->device_minor);
+		do_attach |= !disk_equal(res->me, running->me);
+		have_disk = (running->me->disk != NULL);
+	} else  do_attach |= 1;
+
+	do_connect  = !opts_equal(res->net_options, running->net_options);
+	do_connect |= !addr_equal(res,running);
+	do_connect |= !proto_equal(res,running);
+	/* No adjust support for drbd proxy version 1. */
+	if (res->me->proxy && can_do_proxy)
+		do_connect |= proxy_reconf(res,running);
+	have_net = (running->protocol != NULL);
+
+	do_syncer = !opts_equal(res->sync_options, running->sync_options);
+
+	/* Special case: nothing changed, but the resource name.
+	 * Trigger a no-op syncer request, which will cause a KOBJ_CHANGE
+	 * to be broadcast, so udev may pick up the resource name change
+	 * and update its symlinks. */
+	if (!(do_attach || do_syncer || do_connect))
+		do_syncer = need_trigger_kobj_change(running);
+
+	if(do_attach) {
+		if(have_disk) schedule_dcmd(adm_generic_s,res,"detach",0);
+		schedule_dcmd(adm_attach,res,"attach",0);
+	}
+	if(do_syncer)  schedule_dcmd(adm_syncer,res,"syncer",1);
+	if(do_connect) {
+		if (have_net && res->peer)
+			schedule_dcmd(adm_generic_s,res,"disconnect",0);
+		schedule_dcmd(adm_connect,res,"connect",2);
+	}
+
+	return 0;
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_main.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_main.c
--- drbd8-8.3.7/user/legacy/drbdadm_main.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_main.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,3488 @@
+/*
+   drbdadm_main.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2002-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2002-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <search.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/poll.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <time.h>
+#include "linux/drbd_limits.h"
+#include "drbdtool_common.h"
+#include "drbdadm.h"
+
+#define MAX_ARGS 40
+
+static int indent = 0;
+#define INDENT_WIDTH 4
+#define BFMT  "%s;\n"
+#define IPV4FMT "%-16s %s %s:%s;\n"
+#define IPV6FMT "%-16s %s [%s]:%s;\n"
+#define MDISK "%-16s %s [%s];\n"
+#define FMDISK "%-16s %s;\n"
+#define printI(fmt, args... ) printf("%*s" fmt,INDENT_WIDTH * indent,"" , ## args )
+#define printA(name, val ) \
+	printf("%*s%*s %3s;\n", \
+	  INDENT_WIDTH * indent,"" , \
+	  -24+INDENT_WIDTH * indent, \
+	  name, val )
+
+char *progname;
+
+struct adm_cmd {
+	const char *name;
+	int (*function) (struct d_resource *, const char *);
+	/* which level this command is for.
+	 * 0: don't show this command, ever
+	 * 1: normal administrative commands, shown in normal help
+	 * 2-4: shown on "drbdadm hidden-commands"
+	 * 2: useful for shell scripts
+	 * 3: callbacks potentially called from kernel module on certain events
+	 * 4: advanced, experts and developers only */
+	unsigned int show_in_usage:3;
+	/* if set, command requires an explicit resource name */
+	unsigned int res_name_required:1;
+	/* error out if the ip specified is not available/active now */
+	unsigned int verify_ips:1;
+	/* if set, use the "cache" in /var/lib/drbd to figure out
+	 * which config file to use.
+	 * This is necessary for handlers (callbacks from kernel) to work
+	 * when using "drbdadm -c /some/other/config/file" */
+	unsigned int use_cached_config_file:1;
+	unsigned int need_peer:1;
+	unsigned int is_proxy_cmd:1;
+	unsigned int uc_dialog:1; /* May show usage count dialog */
+	unsigned int test_config:1; /* Allow -t option */
+};
+
+struct deferred_cmd {
+	int (*function) (struct d_resource *, const char *);
+	const char *arg;
+	struct d_resource *res;
+	struct deferred_cmd *next;
+};
+
+struct option admopt[] = {
+	{"stacked", no_argument, 0, 'S'},
+	{"dry-run", no_argument, 0, 'd'},
+	{"verbose", no_argument, 0, 'v'},
+	{"config-file", required_argument, 0, 'c'},
+	{"config-to-test", required_argument, 0, 't'},
+	{"drbdsetup", required_argument, 0, 's'},
+	{"drbdmeta", required_argument, 0, 'm'},
+	{"drbd-proxy-ctl", required_argument, 0, 'p'},
+	{"sh-varname", required_argument, 0, 'n'},
+	{"force", no_argument, 0, 'f'},
+	{"peer", required_argument, 0, 'P'},
+	{"version", no_argument, 0, 'V'},
+	{0, 0, 0, 0}
+};
+
+extern int my_parse();
+extern int yydebug;
+extern FILE *yyin;
+
+int adm_attach(struct d_resource *, const char *);
+int adm_connect(struct d_resource *, const char *);
+int adm_generic_s(struct d_resource *, const char *);
+int adm_status_xml(struct d_resource *, const char *);
+int adm_generic_l(struct d_resource *, const char *);
+int adm_resize(struct d_resource *, const char *);
+int adm_syncer(struct d_resource *, const char *);
+static int adm_up(struct d_resource *, const char *);
+extern int adm_adjust(struct d_resource *, const char *);
+static int adm_dump(struct d_resource *, const char *);
+static int adm_dump_xml(struct d_resource *, const char *);
+static int adm_wait_c(struct d_resource *, const char *);
+static int adm_wait_ci(struct d_resource *, const char *);
+static int adm_proxy_up(struct d_resource *, const char *);
+static int adm_proxy_down(struct d_resource *, const char *);
+static int sh_nop(struct d_resource *, const char *);
+static int sh_resources(struct d_resource *, const char *);
+static int sh_resource(struct d_resource *, const char *);
+static int sh_mod_parms(struct d_resource *, const char *);
+static int sh_dev(struct d_resource *, const char *);
+static int sh_udev(struct d_resource *, const char *);
+static int sh_minor(struct d_resource *, const char *);
+static int sh_ip(struct d_resource *, const char *);
+static int sh_lres(struct d_resource *, const char *);
+static int sh_ll_dev(struct d_resource *, const char *);
+static int sh_md_dev(struct d_resource *, const char *);
+static int sh_md_idx(struct d_resource *, const char *);
+static int sh_b_pri(struct d_resource *, const char *);
+static int sh_status(struct d_resource *, const char *);
+static int admm_generic(struct d_resource *, const char *);
+static int adm_khelper(struct d_resource *, const char *);
+static int adm_generic_b(struct d_resource *, const char *);
+static int hidden_cmds(struct d_resource *, const char *);
+static int adm_outdate(struct d_resource *, const char *);
+static int adm_chk_resize(struct d_resource *res, const char *cmd);
+static void dump_options(char *name, struct d_option *opts);
+
+static char *get_opt_val(struct d_option *, const char *, char *);
+static void register_config_file(struct d_resource *res, const char *cfname);
+
+static struct ifreq *get_ifreq();
+
+char ss_buffer[1024];
+struct utsname nodeinfo;
+int line = 1;
+int fline;
+struct d_globals global_options = { 0, 0, 0, 1, UC_ASK };
+
+char *config_file = NULL;
+char *config_save = NULL;
+char *config_test = NULL;
+struct d_resource *config = NULL;
+struct d_resource *common = NULL;
+struct ifreq *ifreq_list = NULL;
+int is_drbd_top;
+int nr_resources;
+int nr_stacked;
+int nr_normal;
+int nr_ignore;
+int highest_minor;
+int config_from_stdin = 0;
+int config_valid = 1;
+int no_tty;
+int dry_run = 0;
+int verbose = 0;
+int do_verify_ips = 0;
+int do_register_minor = 1;
+/* whether drbdadm was called with "all" instead of resource name(s) */
+int all_resources = 0;
+char *drbdsetup = NULL;
+char *drbdmeta = NULL;
+char *drbd_proxy_ctl;
+char *sh_varname = NULL;
+char *setup_opts[10];
+char *connect_to_host = NULL;
+int soi = 0;
+volatile int alarm_raised;
+
+struct deferred_cmd *deferred_cmds[3] = { NULL, NULL, NULL };
+struct deferred_cmd *deferred_cmds_tail[3] = { NULL, NULL, NULL };
+
+/* DRBD adm_cmd flags shortcuts,
+ * to avoid merge conflicts and unreadable diffs
+ * when we add the next flag */
+
+#define DRBD_acf1_default		\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf1_connect		\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.verify_ips = 1,		\
+	.need_peer = 1,			\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf1_defnet		\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.verify_ips = 1,		\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf3_handler		\
+	.show_in_usage = 3,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+	.use_cached_config_file = 1,	\
+
+#define DRBD_acf4_advanced		\
+	.show_in_usage = 4,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+	.uc_dialog = 1,			\
+
+#define DRBD_acf1_dump			\
+	.show_in_usage = 1,		\
+	.res_name_required = 1,		\
+	.verify_ips = 1,		\
+	.uc_dialog = 1,			\
+	.test_config = 1,		\
+
+#define DRBD_acf2_shell			\
+	.show_in_usage = 2,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+
+#define DRBD_acf2_proxy			\
+	.show_in_usage = 2,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,		\
+	.need_peer = 1,			\
+	.is_proxy_cmd = 1,		\
+
+#define DRBD_acf2_hook			\
+	.show_in_usage = 2,		\
+	.res_name_required = 1,		\
+	.verify_ips = 0,                \
+	.use_cached_config_file = 1,	\
+
+#define DRBD_acf2_gen_shell		\
+	.show_in_usage = 2,		\
+	.res_name_required = 0,		\
+	.verify_ips = 0,		\
+
+struct adm_cmd cmds[] = {
+	/*  name, function, flags
+	 *  sort order:
+	 *  - normal config commands,
+	 *  - normal meta data manipulation
+	 *  - sh-*
+	 *  - handler
+	 *  - advanced
+	 ***/
+	{"attach", adm_attach, DRBD_acf1_default},
+	{"detach", adm_generic_l, DRBD_acf1_default},
+	{"connect", adm_connect, DRBD_acf1_connect},
+	{"disconnect", adm_generic_s, DRBD_acf1_default},
+	{"up", adm_up, DRBD_acf1_connect},
+	{"down", adm_generic_l, DRBD_acf1_default},
+	{"primary", adm_generic_l, DRBD_acf1_default},
+	{"secondary", adm_generic_l, DRBD_acf1_default},
+	{"invalidate", adm_generic_b, DRBD_acf1_default},
+	{"invalidate-remote", adm_generic_l, DRBD_acf1_defnet},
+	{"outdate", adm_outdate, DRBD_acf1_default},
+	{"resize", adm_resize, DRBD_acf1_defnet},
+	{"syncer", adm_syncer, DRBD_acf1_defnet},
+	{"verify", adm_generic_s, DRBD_acf1_defnet},
+	{"pause-sync", adm_generic_s, DRBD_acf1_defnet},
+	{"resume-sync", adm_generic_s, DRBD_acf1_defnet},
+	{"adjust", adm_adjust, DRBD_acf1_connect},
+	{"wait-connect", adm_wait_c, DRBD_acf1_defnet},
+	{"wait-con-int", adm_wait_ci,
+	 .show_in_usage = 1,.verify_ips = 1,},
+	{"status", adm_status_xml, DRBD_acf2_gen_shell},
+	{"role", adm_generic_s, DRBD_acf1_default},
+	{"cstate", adm_generic_s, DRBD_acf1_default},
+	{"dstate", adm_generic_b, DRBD_acf1_default},
+
+	{"dump", adm_dump, DRBD_acf1_dump},
+	{"dump-xml", adm_dump_xml, DRBD_acf1_dump},
+
+	{"create-md", adm_create_md, DRBD_acf1_default},
+	{"show-gi", adm_generic_b, DRBD_acf1_default},
+	{"get-gi", adm_generic_b, DRBD_acf1_default},
+	{"dump-md", admm_generic, DRBD_acf1_default},
+	{"wipe-md", admm_generic, DRBD_acf1_default},
+	{"hidden-commands", hidden_cmds,.show_in_usage = 1,},
+
+	{"sh-nop", sh_nop, DRBD_acf2_gen_shell .uc_dialog = 1, .test_config = 1},
+	{"sh-resources", sh_resources, DRBD_acf2_gen_shell},
+	{"sh-resource", sh_resource, DRBD_acf2_shell},
+	{"sh-mod-parms", sh_mod_parms, DRBD_acf2_gen_shell},
+	{"sh-dev", sh_dev, DRBD_acf2_shell},
+	{"sh-udev", sh_udev, DRBD_acf2_hook},
+	{"sh-minor", sh_minor, DRBD_acf2_shell},
+	{"sh-ll-dev", sh_ll_dev, DRBD_acf2_shell},
+	{"sh-md-dev", sh_md_dev, DRBD_acf2_shell},
+	{"sh-md-idx", sh_md_idx, DRBD_acf2_shell},
+	{"sh-ip", sh_ip, DRBD_acf2_shell},
+	{"sh-lr-of", sh_lres, DRBD_acf2_shell},
+	{"sh-b-pri", sh_b_pri, DRBD_acf2_shell},
+	{"sh-status", sh_status, DRBD_acf2_gen_shell},
+
+	{"proxy-up", adm_proxy_up, DRBD_acf2_proxy},
+	{"proxy-down", adm_proxy_down, DRBD_acf2_proxy},
+
+	{"before-resync-target", adm_khelper, DRBD_acf3_handler},
+	{"after-resync-target", adm_khelper, DRBD_acf3_handler},
+	{"before-resync-source", adm_khelper, DRBD_acf3_handler},
+	{"pri-on-incon-degr", adm_khelper, DRBD_acf3_handler},
+	{"pri-lost-after-sb", adm_khelper, DRBD_acf3_handler},
+	{"fence-peer", adm_khelper, DRBD_acf3_handler},
+	{"local-io-error", adm_khelper, DRBD_acf3_handler},
+	{"pri-lost", adm_khelper, DRBD_acf3_handler},
+	{"initial-split-brain", adm_khelper, DRBD_acf3_handler},
+	{"split-brain", adm_khelper, DRBD_acf3_handler},
+	{"out-of-sync", adm_khelper, DRBD_acf3_handler},
+
+	{"suspend-io", adm_generic_s, DRBD_acf4_advanced},
+	{"resume-io", adm_generic_s, DRBD_acf4_advanced},
+	{"set-gi", admm_generic, DRBD_acf4_advanced},
+	{"new-current-uuid", adm_generic_s, DRBD_acf4_advanced},
+	{"check-resize", adm_chk_resize, DRBD_acf4_advanced},
+};
+
+
+void schedule_dcmd(int (*function) (struct d_resource *, const char *),
+		   struct d_resource *res, char *arg, int order)
+{
+	struct deferred_cmd *d, *t;
+
+	d = calloc(1, sizeof(struct deferred_cmd));
+	if (d == NULL) {
+		perror("calloc");
+		exit(E_exec_error);
+	}
+
+	d->function = function;
+	d->res = res;
+	d->arg = arg;
+
+	/* first to come is head */
+	if (!deferred_cmds[order])
+		deferred_cmds[order] = d;
+
+	/* link it in at tail */
+	t = deferred_cmds_tail[order];
+	if (t)
+		t->next = d;
+
+	/* advance tail */
+	deferred_cmds_tail[order] = d;
+}
+
+static void _adm_generic(struct d_resource *res, const char *cmd, int flags, pid_t *pid, int *fd, int *ex);
+
+/* Returns non-zero if the resource is down. */
+static int test_if_resource_is_down(struct d_resource *res)
+{
+	char buf[1024];
+	int rr, s = 0;
+	int fd;
+	pid_t pid;
+	int old_verbose = verbose;
+
+	if (dry_run) {
+		fprintf(stderr, "Logic bug: should not be dry-running here.\n");
+		exit(E_thinko);
+	}
+	if (verbose == 1)
+		verbose = 0;
+	_adm_generic(res, "role", SLEEPS_SHORT | RETURN_STDOUT_FD | SUPRESS_STDERR,
+			&pid, &fd, NULL);
+	verbose = old_verbose;
+
+	if (fd < 0) {
+		fprintf(stderr, "Strange: got negative fd.\n");
+		exit(E_thinko);
+	}
+
+	while (1) {
+		rr = read(fd, buf + s, sizeof(buf) - s);
+		if (rr <= 0)
+			break;
+		s += rr;
+	}
+	close(fd);
+
+	waitpid(pid, NULL, 0);	/* Reap the child process, do not leave a zombie around. */
+	alarm(0);
+
+	if (s == 0 || strncmp(buf, "Unconfigured", strlen("Unconfigured")) == 0)
+		return 1;
+
+	return 0;
+}
+
+enum do_register { SAME_ANYWAYS, DO_REGISTER };
+enum do_register if_conf_differs_confirm_or_abort(struct d_resource *res)
+{
+	int minor = res->me->device_minor;
+	char *f;
+
+	/* if the resource was down,
+	 * just register the new config file */
+	if (test_if_resource_is_down(res)) {
+		unregister_minor(minor);
+		return DO_REGISTER;
+	}
+
+	f = lookup_minor(minor);
+
+	/* if there was nothing registered before,
+	 * there is nothing to compare to */
+	if (!f)
+		return DO_REGISTER;
+
+	/* no need to register the same thing again */
+	if (strcmp(f, config_save) == 0)
+		return SAME_ANYWAYS;
+
+	fprintf(stderr, "Warning: resource %s\n"
+		"last used config file: %s\n"
+		"  current config file: %s\n", res->name, f, config_save);
+
+	/* implicitly force if we don't have a tty */
+	if (no_tty)
+		force = 1;
+
+	if (!confirmed("Do you want to proceed "
+		       "and register the current config file?")) {
+		printf("Operation canceled.\n");
+		exit(E_usage);
+	}
+	return DO_REGISTER;
+}
+
+static void register_config_file(struct d_resource *res, const char *cfname)
+{
+	int minor = res->me->device_minor;
+	if (test_if_resource_is_down(res))
+		unregister_minor(minor);
+	else
+		register_minor(minor, cfname);
+}
+
+enum on_error { KEEP_RUNNING, EXIT_ON_FAIL };
+int call_cmd_fn(int (*function) (struct d_resource *, const char *),
+		const char *fn_name, struct d_resource *res,
+		enum on_error on_error)
+{
+	int rv;
+	int really_register = do_register_minor &&
+	    DO_REGISTER == if_conf_differs_confirm_or_abort(res) &&
+	    /* adm_up and adm_adjust only
+	     * "schedule" the commands, don't register yet! */
+	    function != adm_up && function != adm_adjust;
+
+	rv = function(res, fn_name);
+	if (rv >= 20) {
+		fprintf(stderr, "%s %s %s: exited with code %d\n",
+			progname, fn_name, res->name, rv);
+		if (on_error == EXIT_ON_FAIL)
+			exit(rv);
+	}
+	if (rv == 0 && really_register)
+		register_config_file(res, config_save);
+
+	return rv;
+}
+
+int call_cmd(struct adm_cmd *cmd, struct d_resource *res,
+	     enum on_error on_error)
+{
+	if (!res->peer)
+		set_peer_in_resource(res, cmd->need_peer);
+
+	return call_cmd_fn(cmd->function, cmd->name, res, on_error);
+}
+
+int _run_dcmds(int order)
+{
+	struct deferred_cmd *d = deferred_cmds[order];
+	struct deferred_cmd *t;
+	int r = 0;
+	int rv = 0;
+
+	while (d) {
+		r = call_cmd_fn(d->function, d->arg, d->res, KEEP_RUNNING);
+		t = d->next;
+		free(d);
+		d = t;
+		if (r > rv)
+			rv = r;
+	}
+	return rv;
+}
+
+int run_dcmds(void)
+{
+	return _run_dcmds(0) || _run_dcmds(1) || _run_dcmds(2);
+}
+
+/*** These functions are used to the print the config ***/
+
+static char *esc(char *str)
+{
+	static char buffer[1024];
+	char *ue = str, *e = buffer;
+
+	if (!str || !str[0]) {
+		return "\"\"";
+	}
+	if (strchr(str, ' ') || strchr(str, '\t') || strchr(str, '\\')) {
+		*e++ = '"';
+		while (*ue) {
+			if (*ue == '"' || *ue == '\\') {
+				*e++ = '\\';
+			}
+			if (e - buffer >= 1022) {
+				fprintf(stderr, "string too long.\n");
+				exit(E_syntax);
+			}
+			*e++ = *ue++;
+			if (e - buffer >= 1022) {
+				fprintf(stderr, "string too long.\n");
+				exit(E_syntax);
+			}
+		}
+		*e++ = '"';
+		*e++ = '\0';
+		return buffer;
+	}
+	return str;
+}
+
+static char *esc_xml(char *str)
+{
+	static char buffer[1024];
+	char *ue = str, *e = buffer;
+
+	if (!str || !str[0]) {
+		return "";
+	}
+	if (strchr(str, '"') || strchr(str, '\'') || strchr(str, '<') ||
+	    strchr(str, '>') || strchr(str, '&') || strchr(str, '\\')) {
+		while (*ue) {
+			if (*ue == '"' || *ue == '\\') {
+				*e++ = '\\';
+				if (e - buffer >= 1021) {
+					fprintf(stderr, "string too long.\n");
+					exit(E_syntax);
+				}
+				*e++ = *ue++;
+			} else if (*ue == '\'' || *ue == '<' || *ue == '>'
+				   || *ue == '&') {
+				if (*ue == '\'' && e - buffer < 1017) {
+					strcpy(e, "&apos;");
+					e += 6;
+				} else if (*ue == '<' && e - buffer < 1019) {
+					strcpy(e, "&lt;");
+					e += 4;
+				} else if (*ue == '>' && e - buffer < 1019) {
+					strcpy(e, "&gt;");
+					e += 4;
+				} else if (*ue == '&' && e - buffer < 1018) {
+					strcpy(e, "&amp;");
+					e += 5;
+				} else {
+					fprintf(stderr, "string too long.\n");
+					exit(E_syntax);
+				}
+				ue++;
+			} else {
+				*e++ = *ue++;
+				if (e - buffer >= 1022) {
+					fprintf(stderr, "string too long.\n");
+					exit(E_syntax);
+				}
+			}
+		}
+		*e++ = '\0';
+		return buffer;
+	}
+	return str;
+}
+
+static void dump_options2(char *name, struct d_option *opts,
+		void(*within)(void*), void *ctx)
+{
+	if (!opts && !(within && ctx))
+		return;
+
+	printI("%s {\n", name);
+	++indent;
+	while (opts) {
+		if (opts->value)
+			printA(opts->name,
+			       opts->is_escaped ? opts->value : esc(opts->
+								    value));
+		else
+			printI(BFMT, opts->name);
+		opts = opts->next;
+	}
+	if (within)
+		within(ctx);
+	--indent;
+	printI("}\n");
+}
+
+static void dump_options(char *name, struct d_option *opts)
+{
+	dump_options2(name, opts, NULL, NULL);
+}
+
+void dump_proxy_plugins(void *ctx)
+{
+	struct d_option *opt = ctx;
+
+	dump_options("plugin", opt);
+}
+
+static void dump_global_info()
+{
+	if (!global_options.minor_count
+	    && !global_options.disable_ip_verification
+	    && global_options.dialog_refresh == 1)
+		return;
+	printI("global {\n");
+	++indent;
+	if (global_options.disable_ip_verification)
+		printI("disable-ip-verification;\n");
+	if (global_options.minor_count)
+		printI("minor-count %i;\n", global_options.minor_count);
+	if (global_options.dialog_refresh != 1)
+		printI("dialog-refresh %i;\n", global_options.dialog_refresh);
+	--indent;
+	printI("}\n\n");
+}
+
+static void fake_startup_options(struct d_resource *res);
+
+static void dump_common_info()
+{
+	if (!common)
+		return;
+	printI("common {\n");
+	++indent;
+	if (common->protocol)
+		printA("protocol", common->protocol);
+	fake_startup_options(common);
+	dump_options("net", common->net_options);
+	dump_options("disk", common->disk_options);
+	dump_options("syncer", common->sync_options);
+	dump_options("startup", common->startup_options);
+	dump_options2("proxy", common->proxy_options,
+			dump_proxy_plugins, common->proxy_plugins);
+	dump_options("handlers", common->handlers);
+	--indent;
+	printf("}\n\n");
+}
+
+static void dump_address(char *name, char *addr, char *port, char *af)
+{
+	if (!strcmp(af, "ipv6"))
+		printI(IPV6FMT, name, af, addr, port);
+	else
+		printI(IPV4FMT, name, af, addr, port);
+}
+
+static void dump_proxy_info(struct d_proxy_info *pi)
+{
+	printI("proxy on %s {\n", names_to_str(pi->on_hosts));
+	++indent;
+	dump_address("inside", pi->inside_addr, pi->inside_port, pi->inside_af);
+	dump_address("outside", pi->outside_addr, pi->outside_port,
+		     pi->outside_af);
+	--indent;
+	printI("}\n");
+}
+
+static void dump_host_info(struct d_host_info *hi)
+{
+	if (!hi) {
+		printI("  # No host section data available.\n");
+		return;
+	}
+
+	if (hi->lower) {
+		printI("stacked-on-top-of %s {\n", esc(hi->lower->name));
+		++indent;
+		printI("# on %s \n", names_to_str(hi->on_hosts));
+	} else if (hi->by_address) {
+		if (!strcmp(hi->address_family, "ipv6"))
+			printI("floating ipv6 [%s]:%s {\n", hi->address, hi->port);
+		else
+			printI("floating %s %s:%s {\n", hi->address_family, hi->address, hi->port);
+		++indent;
+	} else {
+		printI("on %s {\n", names_to_str(hi->on_hosts));
+		++indent;
+	}
+	printI("device%*s", -19 + INDENT_WIDTH * indent, "");
+	if (hi->device)
+		printf("%s ", esc(hi->device));
+	printf("minor %d;\n", hi->device_minor);
+	if (!hi->lower)
+		printA("disk", esc(hi->disk));
+	if (!hi->by_address)
+		dump_address("address", hi->address, hi->port, hi->address_family);
+	if (!hi->lower) {
+		if (!strncmp(hi->meta_index, "flex", 4))
+			printI(FMDISK, "flexible-meta-disk",
+			       esc(hi->meta_disk));
+		else if (!strcmp(hi->meta_index, "internal"))
+			printA("meta-disk", "internal");
+		else
+			printI(MDISK, "meta-disk", esc(hi->meta_disk),
+			       hi->meta_index);
+	}
+	if (hi->proxy)
+		dump_proxy_info(hi->proxy);
+	--indent;
+	printI("}\n");
+}
+
+static void dump_options_xml2(char *name, struct d_option *opts,
+		void(*within)(void*), void *ctx)
+{
+	if (!opts && !(within && ctx))
+		return;
+
+	printI("<section name=\"%s\">\n", name);
+	++indent;
+	while (opts) {
+		if (opts->value)
+			printI("<option name=\"%s\" value=\"%s\"/>\n",
+			       opts->name,
+			       opts->is_escaped ? opts->value : esc_xml(opts->
+									value));
+		else
+			printI("<option name=\"%s\"/>\n", opts->name);
+		opts = opts->next;
+	}
+	if (within)
+		within(ctx);
+	--indent;
+	printI("</section>\n");
+}
+
+static void dump_options_xml(char *name, struct d_option *opts)
+{
+	dump_options_xml2(name, opts, NULL, NULL);
+}
+
+void dump_proxy_plugins_xml(void *ctx)
+{
+	struct d_option *opt = ctx;
+
+	dump_options_xml("plugin", opt);
+}
+
+static void dump_global_info_xml()
+{
+	if (!global_options.minor_count
+	    && !global_options.disable_ip_verification
+	    && global_options.dialog_refresh == 1)
+		return;
+	printI("<global>\n");
+	++indent;
+	if (global_options.disable_ip_verification)
+		printI("<disable-ip-verification/>\n");
+	if (global_options.minor_count)
+		printI("<minor-count count=\"%i\"/>\n",
+		       global_options.minor_count);
+	if (global_options.dialog_refresh != 1)
+		printI("<dialog-refresh refresh=\"%i\"/>\n",
+		       global_options.dialog_refresh);
+	--indent;
+	printI("</global>\n");
+}
+
+static void dump_common_info_xml()
+{
+	if (!common)
+		return;
+	printI("<common");
+	if (common->protocol)
+		printf(" protocol=\"%s\"", common->protocol);
+	printf(">\n");
+	++indent;
+	fake_startup_options(common);
+	dump_options_xml("net", common->net_options);
+	dump_options_xml("disk", common->disk_options);
+	dump_options_xml("syncer", common->sync_options);
+	dump_options_xml("startup", common->startup_options);
+	dump_options2("proxy", common->proxy_options,
+			dump_proxy_plugins, common->proxy_plugins);
+	dump_options_xml("handlers", common->handlers);
+	--indent;
+	printI("</common>\n");
+}
+
+static void dump_proxy_info_xml(struct d_proxy_info *pi)
+{
+	printI("<proxy hostname=\"%s\">\n", names_to_str(pi->on_hosts));
+	++indent;
+	printI("<inside family=\"%s\" port=\"%s\">%s</inside>\n", pi->inside_af,
+	       pi->inside_port, pi->inside_addr);
+	printI("<outside family=\"%s\" port=\"%s\">%s</outside>\n",
+	       pi->outside_af, pi->outside_port, pi->outside_addr);
+	--indent;
+	printI("</proxy>\n");
+}
+
+static void dump_host_info_xml(struct d_host_info *hi)
+{
+	if (!hi) {
+		printI("<!-- No host section data available. -->\n");
+		return;
+	}
+
+	if (hi->by_address)
+		printI("<host floating=\"1\">\n");
+	else
+		printI("<host name=\"%s\">\n", names_to_str(hi->on_hosts));
+
+	++indent;
+	printI("<device minor=\"%d\">%s</device>\n", hi->device_minor,
+	       esc_xml(hi->device));
+	printI("<disk>%s</disk>\n", esc_xml(hi->disk));
+	printI("<address family=\"%s\" port=\"%s\">%s</address>\n",
+	       hi->address_family, hi->port, hi->address);
+	if (!strncmp(hi->meta_index, "flex", 4))
+		printI("<flexible-meta-disk>%s</flexible-meta-disk>\n",
+		       esc_xml(hi->meta_disk));
+	else if (!strcmp(hi->meta_index, "internal"))
+		printI("<meta-disk>internal</meta-disk>\n");
+	else {
+		printI("<meta-disk index=\"%s\">%s</meta-disk>\n",
+		       hi->meta_index, esc_xml(hi->meta_disk));
+	}
+	if (hi->proxy)
+		dump_proxy_info_xml(hi->proxy);
+	--indent;
+	printI("</host>\n");
+}
+
+static void fake_startup_options(struct d_resource *res)
+{
+	struct d_option *opt;
+	char *val;
+
+	if (res->stacked_timeouts) {
+		opt = new_opt(strdup("stacked-timeouts"), NULL);
+		res->startup_options = APPEND(res->startup_options, opt);
+	}
+
+	if (res->become_primary_on) {
+		val = strdup(names_to_str(res->become_primary_on));
+		opt = new_opt(strdup("become-primary-on"), val);
+		opt->is_escaped = 1;
+		res->startup_options = APPEND(res->startup_options, opt);
+	}
+}
+
+static int adm_dump(struct d_resource *res,
+		    const char *unused __attribute((unused)))
+{
+	struct d_host_info *host;
+
+	printI("# resource %s on %s: %s, %s\n",
+	       esc(res->name), nodeinfo.nodename,
+	       res->ignore ? "ignored" : "not ignored",
+	       res->stacked ? "stacked" : "not stacked");
+	printI("resource %s {\n", esc(res->name));
+	++indent;
+	if (res->protocol)
+		printA("protocol", res->protocol);
+
+	for (host = res->all_hosts; host; host = host->next)
+		dump_host_info(host);
+
+	fake_startup_options(res);
+	dump_options("net", res->net_options);
+	dump_options("disk", res->disk_options);
+	dump_options("syncer", res->sync_options);
+	dump_options("startup", res->startup_options);
+	dump_options2("proxy", res->proxy_options,
+			dump_proxy_plugins, res->proxy_plugins);
+	dump_options("handlers", res->handlers);
+	--indent;
+	printf("}\n\n");
+
+	return 0;
+}
+
+static int adm_dump_xml(struct d_resource *res,
+			const char *unused __attribute((unused)))
+{
+	struct d_host_info *host;
+	printI("<resource name=\"%s\"", esc_xml(res->name));
+	if (res->protocol)
+		printf(" protocol=\"%s\"", res->protocol);
+	printf(">\n");
+	++indent;
+	// else if (common && common->protocol) printA("# common protocol", common->protocol);
+	for (host = res->all_hosts; host; host = host->next)
+		dump_host_info_xml(host);
+	fake_startup_options(res);
+	dump_options_xml("net", res->net_options);
+	dump_options_xml("disk", res->disk_options);
+	dump_options_xml("syncer", res->sync_options);
+	dump_options_xml("startup", res->startup_options);
+	dump_options_xml2("proxy", res->proxy_options,
+			dump_proxy_plugins_xml, res->proxy_plugins);
+	dump_options_xml("handlers", res->handlers);
+	--indent;
+	printI("</resource>\n");
+
+	return 0;
+}
+
+static int sh_nop(struct d_resource *ignored __attribute((unused)),
+		  const char *unused __attribute((unused)))
+{
+	return 0;
+}
+
+static int sh_resources(struct d_resource *ignored __attribute((unused)),
+			const char *unused __attribute((unused)))
+{
+	struct d_resource *res, *t;
+	int first = 1;
+
+	for_each_resource(res, t, config) {
+		if (res->ignore)
+			continue;
+		if (is_drbd_top != res->stacked)
+			continue;
+		printf(first ? "%s" : " %s", esc(res->name));
+		first = 0;
+	}
+	if (!first)
+		printf("\n");
+
+	return 0;
+}
+
+static int sh_resource(struct d_resource *res,
+		       const char *unused __attribute((unused)))
+{
+	printf("%s\n", res->name);
+
+	return 0;
+}
+
+static int sh_dev(struct d_resource *res,
+		  const char *unused __attribute((unused)))
+{
+	printf("%s\n", res->me->device);
+
+	return 0;
+}
+
+static int sh_udev(struct d_resource *res,
+		   const char *unused __attribute((unused)))
+{
+	/* No shell escape necessary. Udev does not handle it anyways... */
+	printf("RESOURCE=%s\n", res->name);
+
+	if (!strncmp(res->me->device, "/dev/drbd", 9))
+		printf("DEVICE=%s\n", res->me->device + 5);
+	else
+		printf("DEVICE=drbd%u\n", res->me->device_minor);
+
+	if (!strncmp(res->me->disk, "/dev/", 5))
+		printf("DISK=%s\n", res->me->disk + 5);
+	else
+		printf("DISK=%s\n", res->me->disk);
+
+	return 0;
+}
+
+static int sh_minor(struct d_resource *res,
+		    const char *unused __attribute((unused)))
+{
+	printf("%d\n", res->me->device_minor);
+
+	return 0;
+}
+
+static int sh_ip(struct d_resource *res,
+		 const char *unused __attribute((unused)))
+{
+	printf("%s\n", res->me->address);
+
+	return 0;
+}
+
+static int sh_lres(struct d_resource *res,
+		   const char *unused __attribute((unused)))
+{
+	if (!is_drbd_top) {
+		fprintf(stderr,
+			"sh-lower-resource only available in stacked mode\n");
+		exit(E_usage);
+	}
+	if (!res->stacked) {
+		fprintf(stderr, "'%s' is not stacked on this host (%s)\n",
+			res->name, nodeinfo.nodename);
+		exit(E_usage);
+	}
+	printf("%s\n", res->me->lower->name);
+
+	return 0;
+}
+
+static int sh_ll_dev(struct d_resource *res,
+		     const char *unused __attribute((unused)))
+{
+	printf("%s\n", res->me->disk);
+
+	return 0;
+}
+
+static int sh_md_dev(struct d_resource *res,
+		     const char *unused __attribute((unused)))
+{
+	char *r;
+
+	if (strcmp("internal", res->me->meta_disk) == 0)
+		r = res->me->disk;
+	else
+		r = res->me->meta_disk;
+
+	printf("%s\n", r);
+
+	return 0;
+}
+
+static int sh_md_idx(struct d_resource *res,
+		     const char *unused __attribute((unused)))
+{
+	printf("%s\n", res->me->meta_index);
+
+	return 0;
+}
+
+static int sh_b_pri(struct d_resource *res,
+		    const char *unused __attribute((unused)))
+{
+	int i, rv;
+
+	if (name_in_names(nodeinfo.nodename, res->become_primary_on) ||
+	    name_in_names("both", res->become_primary_on)) {
+		/* Opon connect resync starts, and both sides become primary at the same time.
+		   One's try might be declined since an other state transition happens. Retry. */
+		for (i = 0; i < 5; i++) {
+			rv = adm_generic_s(res, "primary");
+			if (rv == 0)
+				return rv;
+			sleep(1);
+		}
+		return rv;
+	}
+	return 0;
+}
+
+static int sh_mod_parms(struct d_resource *res __attribute((unused)),
+			const char *unused __attribute((unused)))
+{
+	int mc = global_options.minor_count;
+
+	if (mc == 0) {
+		mc = highest_minor + 11;
+		if (mc > DRBD_MINOR_COUNT_MAX)
+			mc = DRBD_MINOR_COUNT_MAX;
+
+		if (mc < DRBD_MINOR_COUNT_DEF)
+			mc = DRBD_MINOR_COUNT_DEF;
+	}
+	printf("minor_count=%d\n", mc);
+	return 0;
+}
+
+static void free_host_info(struct d_host_info *hi)
+{
+	if (!hi)
+		return;
+
+	free_names(hi->on_hosts);
+	free(hi->device);
+	free(hi->disk);
+	free(hi->address);
+	free(hi->address_family);
+	free(hi->port);
+	free(hi->meta_disk);
+	free(hi->meta_index);
+}
+
+static void free_options(struct d_option *opts)
+{
+	struct d_option *f;
+	while (opts) {
+		free(opts->name);
+		free(opts->value);
+		f = opts;
+		opts = opts->next;
+		free(f);
+	}
+}
+
+static void free_config(struct d_resource *res)
+{
+	struct d_resource *f, *t;
+	struct d_host_info *host;
+
+	for_each_resource(f, t, res) {
+		free(f->name);
+		free(f->protocol);
+		free(f->device);
+		free(f->disk);
+		free(f->meta_disk);
+		free(f->meta_index);
+		for (host = f->all_hosts; host; host = host->next)
+			free_host_info(host);
+		free_options(f->net_options);
+		free_options(f->disk_options);
+		free_options(f->sync_options);
+		free_options(f->startup_options);
+		free_options(f->proxy_options);
+		free_options(f->handlers);
+		free(f);
+	}
+	if (common) {
+		free_options(common->net_options);
+		free_options(common->disk_options);
+		free_options(common->sync_options);
+		free_options(common->startup_options);
+		free_options(common->proxy_options);
+		free_options(common->handlers);
+		free(common);
+	}
+	if (ifreq_list)
+		free(ifreq_list);
+}
+
+static void expand_opts(struct d_option *co, struct d_option **opts)
+{
+	struct d_option *no;
+
+	while (co) {
+		if (!find_opt(*opts, co->name)) {
+			// prepend new item to opts
+			no = new_opt(strdup(co->name),
+				     co->value ? strdup(co->value) : NULL);
+			no->next = *opts;
+			*opts = no;
+		}
+		co = co->next;
+	}
+}
+
+static void expand_common(void)
+{
+	struct d_resource *res, *tmp;
+	struct d_host_info *h;
+
+	for_each_resource(res, tmp, config) {
+		for (h = res->all_hosts; h; h = h->next) {
+			if (!h->device)
+				m_asprintf(&h->device, "/dev/drbd%u",
+					   h->device_minor);
+		}
+	}
+
+	if (!common)
+		return;
+
+	for_each_resource(res, tmp, config) {
+		expand_opts(common->net_options, &res->net_options);
+		expand_opts(common->disk_options, &res->disk_options);
+		expand_opts(common->sync_options, &res->sync_options);
+		expand_opts(common->startup_options, &res->startup_options);
+		expand_opts(common->proxy_options, &res->proxy_options);
+		expand_opts(common->handlers, &res->handlers);
+
+		if (common->protocol && !res->protocol)
+			res->protocol = strdup(common->protocol);
+
+		if (common->stacked_timeouts)
+			res->stacked_timeouts = 1;
+
+		if (!res->become_primary_on)
+			res->become_primary_on = common->become_primary_on;
+
+		if (common->proxy_plugins && !res->proxy_plugins)
+			expand_opts(common->proxy_plugins, &res->proxy_plugins);
+
+	}
+}
+
+static void find_drbdcmd(char **cmd, char **pathes)
+{
+	char **path;
+
+	path = pathes;
+	while (*path) {
+		if (access(*path, X_OK) == 0) {
+			*cmd = *path;
+			return;
+		}
+		path++;
+	}
+
+	fprintf(stderr, "Can not find command (drbdsetup/drbdmeta)\n");
+	exit(E_exec_error);
+}
+
+static void alarm_handler(int __attribute((unused)) signo)
+{
+	alarm_raised = 1;
+}
+
+void m__system(char **argv, int flags, struct d_resource *res, pid_t *kid, int *fd, int *ex)
+{
+	pid_t pid;
+	int status, rv = -1;
+	int timeout = 0;
+	char **cmdline = argv;
+	int pipe_fds[2];
+
+	struct sigaction so;
+	struct sigaction sa;
+
+	sa.sa_handler = &alarm_handler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = 0;
+
+	if (dry_run || verbose) {
+		if (sh_varname && *cmdline)
+			printf("%s=%s\n", sh_varname, shell_escape(res->name));
+		while (*cmdline) {
+			printf("%s ", shell_escape(*cmdline++));
+		}
+		printf("\n");
+		if (dry_run) {
+			if (kid)
+				*kid = -1;
+			if (fd)
+				*fd = 0;
+			if (ex)
+				*ex = 0;
+			return;
+		}
+	}
+
+	/* flush stdout and stderr, so output of drbdadm
+	 * and helper binaries is reported in order! */
+	fflush(stdout);
+	fflush(stderr);
+
+	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD)) {
+		if (pipe(pipe_fds) < 0) {
+			perror("pipe");
+			fprintf(stderr, "Error in pipe, giving up.\n");
+			exit(E_exec_error);
+		}
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		fprintf(stderr, "Can not fork\n");
+		exit(E_exec_error);
+	}
+	if (pid == 0) {
+		if (flags & RETURN_STDOUT_FD) {
+			close(pipe_fds[0]);
+			dup2(pipe_fds[1], 1);
+		}
+		if (flags & RETURN_STDERR_FD) {
+			close(pipe_fds[0]);
+			dup2(pipe_fds[1], 2);
+		}
+		if (flags & SUPRESS_STDERR)
+			fclose(stderr);
+		execvp(argv[0], argv);
+		fprintf(stderr, "Can not exec\n");
+		exit(E_exec_error);
+	}
+
+	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD))
+		close(pipe_fds[1]);
+
+	if (flags & SLEEPS_FINITE) {
+		sigaction(SIGALRM, &sa, &so);
+		alarm_raised = 0;
+		switch (flags & SLEEPS_MASK) {
+		case SLEEPS_SHORT:
+			timeout = 5;
+			break;
+		case SLEEPS_LONG:
+			timeout = COMM_TIMEOUT + 1;
+			break;
+		case SLEEPS_VERY_LONG:
+			timeout = 600;
+			break;
+		default:
+			fprintf(stderr, "logic bug in %s:%d\n", __FILE__,
+				__LINE__);
+			exit(E_thinko);
+		}
+		alarm(timeout);
+	}
+
+	if (kid)
+		*kid = pid;
+	if (fd)
+		*fd = pipe_fds[0];
+
+	if (flags & (RETURN_STDOUT_FD | RETURN_STDERR_FD)
+	||  flags == RETURN_PID)
+		return;
+
+	while (1) {
+		if (waitpid(pid, &status, 0) == -1) {
+			if (errno != EINTR)
+				break;
+			if (alarm_raised) {
+				alarm(0);
+				sigaction(SIGALRM, &so, NULL);
+				rv = 0x100;
+				break;
+			} else {
+				fprintf(stderr, "logic bug in %s:%d\n",
+					__FILE__, __LINE__);
+				exit(E_exec_error);
+			}
+		} else {
+			if (WIFEXITED(status)) {
+				rv = WEXITSTATUS(status);
+				break;
+			}
+		}
+	}
+
+	if (flags & SLEEPS_FINITE) {
+		if (rv >= 10
+		    && !(flags & (DONT_REPORT_FAILED | SUPRESS_STDERR))) {
+			fprintf(stderr, "Command '");
+			for (cmdline = argv; *cmdline; cmdline++) {
+				fprintf(stderr, "%s", *cmdline);
+				if (cmdline[1])
+					fputc(' ', stderr);
+			}
+			if (alarm_raised) {
+				fprintf(stderr,
+					"' did not terminate within %u seconds\n",
+					timeout);
+				exit(E_exec_error);
+			} else {
+				fprintf(stderr,
+					"' terminated with exit code %d\n", rv);
+			}
+		}
+	}
+	fflush(stdout);
+	fflush(stderr);
+
+	if (ex)
+		*ex = rv;
+}
+
+#define NA(ARGC) \
+  ({ if((ARGC) >= MAX_ARGS) { fprintf(stderr,"MAX_ARGS too small\n"); \
+       exit(E_thinko); \
+     } \
+     (ARGC)++; \
+  })
+
+#define make_options(OPT) \
+  while(OPT) { \
+    if(OPT->value) { \
+      ssprintf(argv[NA(argc)],"--%s=%s",OPT->name,OPT->value); \
+    } else { \
+      ssprintf(argv[NA(argc)],"--%s",OPT->name); \
+    } \
+    OPT=OPT->next; \
+  }
+
+#define make_address(ADDR, PORT, AF)		\
+  if (!strcmp(AF, "ipv6")) { \
+    ssprintf(argv[NA(argc)],"%s:[%s]:%s", AF, ADDR, PORT); \
+  } else { \
+    ssprintf(argv[NA(argc)],"%s:%s:%s", AF, ADDR, PORT); \
+  }
+
+int adm_attach(struct d_resource *res, const char *unused __attribute((unused)))
+{
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int argc = 0;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "disk";
+	argv[NA(argc)] = res->me->disk;
+	if (!strcmp(res->me->meta_disk, "internal")) {
+		argv[NA(argc)] = res->me->disk;
+	} else {
+		argv[NA(argc)] = res->me->meta_disk;
+	}
+	argv[NA(argc)] = res->me->meta_index;
+	argv[NA(argc)] = "--set-defaults";
+	argv[NA(argc)] = "--create-device";
+	opt = res->disk_options;
+	make_options(opt);
+	argv[NA(argc)] = 0;
+
+	return m_system_ex(argv, SLEEPS_LONG, res);
+}
+
+struct d_option *find_opt(struct d_option *base, char *name)
+{
+	while (base) {
+		if (!strcmp(base->name, name)) {
+			return base;
+		}
+		base = base->next;
+	}
+	return 0;
+}
+
+int adm_resize(struct d_resource *res, const char *cmd)
+{
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int i, argc = 0;
+	int silent;
+	int ex;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "resize";
+	opt = find_opt(res->disk_options, "size");
+	if (opt)
+		ssprintf(argv[NA(argc)], "--%s=%s", opt->name, opt->value);
+	for (i = 0; i < soi; i++)
+		argv[NA(argc)] = setup_opts[i];
+	argv[NA(argc)] = 0;
+
+	/* if this is not "resize", but "check-resize", be silent! */
+	silent = strcmp(cmd, "resize") ? SUPRESS_STDERR : 0;
+	ex = m_system_ex(argv, SLEEPS_SHORT | silent, res);
+
+	if (ex)
+		return ex;
+
+	/* Record last-known bdev info.
+	 * Unfortunately drbdsetup did not have enough information
+	 * when doing the "resize", and in theory, _our_ information
+	 * about the backing device may even be wrong.
+	 * Call drbdsetup again, tell it to ask the kernel for
+	 * current config, and update the last known bdev info
+	 * according to that. */
+	/* argv[0] = drbdsetup;
+	 * argv[1] = minor; */
+	argv[2] = "check-resize";
+	argv[3] = NULL;
+	/* ignore exit code */
+	m_system_ex(argv, SLEEPS_SHORT | silent, res);
+
+	return 0;
+}
+
+int _admm_generic(struct d_resource *res, const char *cmd, int flags)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0, i;
+
+	argv[NA(argc)] = drbdmeta;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "v08";
+	if (!strcmp(res->me->meta_disk, "internal")) {
+		argv[NA(argc)] = res->me->disk;
+	} else {
+		argv[NA(argc)] = res->me->meta_disk;
+	}
+	if (!strcmp(res->me->meta_index, "flexible")) {
+		if (!strcmp(res->me->meta_disk, "internal")) {
+			argv[NA(argc)] = "flex-internal";
+		} else {
+			argv[NA(argc)] = "flex-external";
+		}
+	} else {
+		argv[NA(argc)] = res->me->meta_index;
+	}
+	argv[NA(argc)] = (char *)cmd;
+	for (i = 0; i < soi; i++) {
+		argv[NA(argc)] = setup_opts[i];
+	}
+
+	argv[NA(argc)] = 0;
+
+	return m_system_ex(argv, flags, res);
+}
+
+static int admm_generic(struct d_resource *res, const char *cmd)
+{
+	return _admm_generic(res, cmd, SLEEPS_VERY_LONG);
+}
+
+static void _adm_generic(struct d_resource *res, const char *cmd, int flags, pid_t *pid, int *fd, int *ex)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0, i;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = (char *)cmd;
+	for (i = 0; i < soi; i++) {
+		argv[NA(argc)] = setup_opts[i];
+	}
+	argv[NA(argc)] = 0;
+
+	setenv("DRBD_RESOURCE", res->name, 1);
+	m__system(argv, flags, res, pid, fd, ex);
+}
+
+static int adm_generic(struct d_resource *res, const char *cmd, int flags)
+{
+	int ex;
+	_adm_generic(res, cmd, flags, NULL, NULL, &ex);
+	return ex;
+}
+
+int adm_generic_s(struct d_resource *res, const char *cmd)
+{
+	return adm_generic(res, cmd, SLEEPS_SHORT);
+}
+
+int adm_status_xml(struct d_resource *res, const char *cmd)
+{
+	struct d_resource *r, *t;
+	int rv = 0;
+
+	if (!dry_run) {
+		printf("<drbd-status version=\"%s\" api=\"%u\">\n",
+		       REL_VERSION, API_VERSION);
+		printf("<resources config_file=\"%s\">\n", config_save);
+	}
+
+	for_each_resource(r, t, res) {
+		if (r->ignore)
+			continue;
+		rv = adm_generic(r, cmd, SLEEPS_SHORT);
+		if (rv)
+			break;
+	}
+
+	if (!dry_run)
+		printf("</resources>\n</drbd-status>\n");
+	return rv;
+}
+
+int sh_status(struct d_resource *res, const char *cmd)
+{
+	struct d_resource *r, *t;
+	int rv = 0;
+
+	if (!dry_run) {
+		printf("_drbd_version=%s\n_drbd_api=%u\n",
+		       shell_escape(REL_VERSION), API_VERSION);
+		printf("_config_file=%s\n\n", shell_escape(config_save));
+	}
+
+	for_each_resource(r, t, res) {
+		if (r->ignore)
+			continue;
+		printf("_stacked_on=%s\n", r->stacked && r->me->lower ?
+		       shell_escape(r->me->lower->name) : "");
+		printf("_stacked_on_device=%s\n", r->stacked && r->me->lower ?
+		       shell_escape(r->me->lower->me->device) : "");
+		if (r->stacked && r->me->lower)
+			printf("_stacked_on_minor=%d\n",
+			       r->me->lower->me->device_minor);
+		else
+			printf("_stacked_on_minor=\n");
+		rv = adm_generic(r, cmd, SLEEPS_SHORT);
+		if (rv)
+			break;
+	}
+
+	return rv;
+}
+
+int adm_generic_l(struct d_resource *res, const char *cmd)
+{
+	return adm_generic(res, cmd, SLEEPS_LONG);
+}
+
+static int adm_outdate(struct d_resource *res, const char *cmd)
+{
+	int rv;
+
+	rv = adm_generic(res, cmd, SLEEPS_SHORT | SUPRESS_STDERR);
+	/* special cases for outdate:
+	 * 17: drbdsetup outdate, but is primary and thus cannot be outdated.
+	 *  5: drbdsetup outdate, and is inconsistent or worse anyways. */
+	if (rv == 17)
+		return rv;
+
+	if (rv == 5) {
+		/* That might mean it is diskless. */
+		rv = admm_generic(res, cmd);
+		if (rv)
+			rv = 5;
+		return rv;
+	}
+
+	if (rv || dry_run) {
+		rv = admm_generic(res, cmd);
+	}
+	return rv;
+}
+
+/* shell equivalent:
+ * ( drbdsetup resize && drbdsetup check-resize ) || drbdmeta check-resize */
+static int adm_chk_resize(struct d_resource *res, const char *cmd)
+{
+	/* drbdsetup resize && drbdsetup check-resize */
+	int ex = adm_resize(res, cmd);
+	if (ex == 0)
+		return 0;
+
+	/* try drbdmeta check-resize */
+	return admm_generic(res, cmd);
+}
+
+static int adm_generic_b(struct d_resource *res, const char *cmd)
+{
+	char buffer[4096];
+	int fd, status, rv = 0, rr, s = 0;
+	pid_t pid;
+
+	_adm_generic(res, cmd, SLEEPS_SHORT | RETURN_STDERR_FD, &pid, &fd, NULL);
+
+	if (fd < 0) {
+		fprintf(stderr, "Strange: got negative fd.\n");
+		exit(E_thinko);
+	}
+
+	if (!dry_run) {
+		while (1) {
+			rr = read(fd, buffer + s, 4096 - s);
+			if (rr <= 0)
+				break;
+			s += rr;
+		}
+
+		close(fd);
+		rr = waitpid(pid, &status, 0);
+		alarm(0);
+
+		if (WIFEXITED(status))
+			rv = WEXITSTATUS(status);
+		if (alarm_raised) {
+			rv = 0x100;
+		}
+	}
+
+	/* see drbdsetup.c, print_config_error():
+	 *  11: some unspecific state change error
+	 *  17: SS_NO_UP_TO_DATE_DISK
+	 * In both cases, we don't need to retry with drbdmeta,
+	 * it would fail anyways with "Device is configured!" */
+	if (rv == 11 || rv == 17) {
+		/* Some state transition error, report it ... */
+		rr = write(fileno(stderr), buffer, s);
+		return rv;
+	}
+
+	if (rv || dry_run) {
+		/* On other errors
+		   rv = 10 .. no minor allocated
+		   rv = 20 .. module not loaded
+		   rv = 16 .. we are diskless here
+		   retry with drbdmeta.
+		 */
+		rv = admm_generic(res, cmd);
+	}
+	return rv;
+}
+
+static int adm_khelper(struct d_resource *res, const char *cmd)
+{
+	int rv = 0;
+	char *sh_cmd;
+	char minor_string[8];
+	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
+
+	if (!res->peer) {
+		/* Since 8.3.2 we get DRBD_PEER_AF and DRBD_PEER_ADDRESS from the kernel.
+		   If we do not know the peer by now, use these to find the peer. */
+		struct d_host_info *host;
+		char *peer_address = getenv("DRBD_PEER_ADDRESS");
+		char *peer_af = getenv("DRBD_PEER_AF");
+
+		if (peer_address && peer_af) {
+			for (host = res->all_hosts; host; host = host->next) {
+				if (!strcmp(host->address_family, peer_af) &&
+				    !strcmp(host->address, peer_address)) {
+					res->peer = host;
+					break;
+				}
+			}
+		}
+	}
+
+	if (res->peer) {
+		setenv("DRBD_PEER_AF", res->peer->address_family, 1);	/* since 8.3.0 */
+		setenv("DRBD_PEER_ADDRESS", res->peer->address, 1);	/* since 8.3.0 */
+		setenv("DRBD_PEER", res->peer->on_hosts->name, 1);	/* deprecated */
+		setenv("DRBD_PEERS", names_to_str(res->peer->on_hosts), 1);
+			/* since 8.3.0, but not usable when using a config with "floating" statements. */
+	}
+
+	snprintf(minor_string, sizeof(minor_string), "%u", res->me->device_minor);
+	setenv("DRBD_RESOURCE", res->name, 1);
+	setenv("DRBD_MINOR", minor_string, 1);
+	setenv("DRBD_CONF", config_save, 1);
+
+	if ((sh_cmd = get_opt_val(res->handlers, cmd, NULL))) {
+		argv[2] = sh_cmd;
+		rv = m_system_ex(argv, SLEEPS_VERY_LONG, res);
+	}
+	return rv;
+}
+
+// need to convert discard-node-nodename to discard-local or discard-remote.
+void convert_discard_opt(struct d_resource *res)
+{
+	struct d_option *opt;
+
+	if (res == NULL)
+		return;
+
+	if ((opt = find_opt(res->net_options, "after-sb-0pri"))) {
+		if (!strncmp(opt->value, "discard-node-", 13)) {
+			if (!strcmp(nodeinfo.nodename, opt->value + 13)) {
+				free(opt->value);
+				opt->value = strdup("discard-local");
+			} else {
+				free(opt->value);
+				opt->value = strdup("discard-remote");
+			}
+		}
+	}
+}
+
+int adm_connect(struct d_resource *res,
+		const char *unused __attribute((unused)))
+{
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int i;
+	int argc = 0;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "net";
+	make_address(res->me->address, res->me->port, res->me->address_family);
+	if (res->me->proxy) {
+		make_address(res->me->proxy->inside_addr,
+			     res->me->proxy->inside_port,
+			     res->me->proxy->inside_af);
+	} else if (res->peer) {
+		make_address(res->peer->address, res->peer->port,
+			     res->peer->address_family);
+	} else if (dry_run > 1) {
+		argv[NA(argc)] = "N/A";
+	} else {
+		fprintf(stderr, "resource %s: cannot change network config without knowing my peer.\n", res->name);
+		return dry_run ? 0 : 20;
+	}
+	argv[NA(argc)] = res->protocol;
+
+	argv[NA(argc)] = "--set-defaults";
+	argv[NA(argc)] = "--create-device";
+	opt = res->net_options;
+	make_options(opt);
+
+	for (i = 0; i < soi; i++) {
+		argv[NA(argc)] = setup_opts[i];
+	}
+
+	argv[NA(argc)] = 0;
+
+	return m_system_ex(argv, SLEEPS_SHORT, res);
+}
+
+struct d_resource *res_by_name(const char *name);
+
+struct d_option *del_opt(struct d_option *base, struct d_option *item)
+{
+	struct d_option *i;
+	if (base == item) {
+		base = item->next;
+		free(item->name);
+		free(item->value);
+		free(item);
+		return base;
+	}
+
+	for (i = base; i; i = i->next) {
+		if (i->next == item) {
+			i->next = item->next;
+			free(item->name);
+			free(item->value);
+			free(item);
+			return base;
+		}
+	}
+	return base;
+}
+
+// Need to convert after from resourcename to minor_number.
+void convert_after_option(struct d_resource *res)
+{
+	struct d_option *opt, *next;
+	struct d_resource *depends_on_res;
+
+	if (res == NULL)
+		return;
+
+	opt = res->sync_options;
+	while ((opt = find_opt(opt, "after"))) {
+		next = opt->next;
+		depends_on_res = res_by_name(opt->value);
+		if (!depends_on_res || depends_on_res->ignore) {
+			res->sync_options = del_opt(res->sync_options, opt);
+		} else {
+			free(opt->value);
+			m_asprintf(&opt->value, "%d", depends_on_res->me->device_minor);
+		}
+		opt = next;
+	}
+}
+
+char *proxy_connection_name(struct d_resource *res)
+{
+	static char conn_name[128];
+	int counter;
+
+	counter = snprintf(conn_name, sizeof(conn_name), "%s-%s-%s",
+			 names_to_str_c(res->me->proxy->on_hosts, '_'),
+			 res->name,
+			 names_to_str_c(res->peer->proxy->on_hosts, '_'));
+	if (counter >= sizeof(conn_name)-3) {
+		fprintf(stderr,
+				"The connection name in resource %s got too long.\n",
+				res->name);
+		exit(E_config_invalid);
+	}
+
+	return conn_name;
+}
+
+int do_proxy_conn_up(struct d_resource *res, const char *conn_name)
+{
+	char *argv[4] = { drbd_proxy_ctl, "-c", NULL, NULL };
+	int rv;
+
+	if (!conn_name)
+		conn_name = proxy_connection_name(res);
+
+	ssprintf(argv[2],
+			"add connection %s %s:%s %s:%s %s:%s %s:%s",
+			conn_name,
+			res->me->proxy->inside_addr,
+			res->me->proxy->inside_port,
+			res->peer->proxy->outside_addr,
+			res->peer->proxy->outside_port,
+			res->me->proxy->outside_addr,
+			res->me->proxy->outside_port, res->me->address,
+			res->me->port);
+
+	rv = m_system_ex(argv, SLEEPS_SHORT, res);
+	return rv;
+}
+
+int do_proxy_conn_plugins(struct d_resource *res, const char *conn_name)
+{
+	char *argv[MAX_ARGS];
+	int argc = 0;
+	struct d_option *opt;
+	int counter;
+
+	if (!conn_name)
+		conn_name = proxy_connection_name(res);
+
+	argc = 0;
+	argv[NA(argc)] = drbd_proxy_ctl;
+	opt = res->proxy_options;
+	while (opt) {
+		argv[NA(argc)] = "-c";
+		ssprintf(argv[NA(argc)], "set %s %s %s",
+			 opt->name, conn_name, opt->value);
+		opt = opt->next;
+	}
+
+	counter = 0;
+	opt = res->proxy_plugins;
+	/* Don't send the "set plugin ... END" line if no plugins are defined 
+	 * - that's incompatible with the drbd proxy version 1. */
+	if (opt) {
+		while (1) {
+			argv[NA(argc)] = "-c";
+			ssprintf(argv[NA(argc)], "set plugin %s %d %s",
+					conn_name, counter, opt ? opt->name : "END");
+			if (!opt) break;
+			opt = opt->next;
+			counter ++;
+		}
+	}
+
+	argv[NA(argc)] = 0;
+	if (argc > 2)
+		return m_system_ex(argv, SLEEPS_SHORT, res);
+
+	return 0;
+}
+
+int do_proxy_conn_down(struct d_resource *res, const char *conn_name)
+{
+	char *argv[4] = { drbd_proxy_ctl, "-c", NULL, NULL};
+	int rv;
+
+	if (!conn_name)
+		conn_name = proxy_connection_name(res);
+	ssprintf(argv[2], "del connection %s", conn_name);
+
+	rv = m_system_ex(argv, SLEEPS_SHORT, res);
+	return rv;
+}
+
+
+static int check_proxy(struct d_resource *res, int do_up)
+{
+	int rv;
+
+	if (!res->me->proxy) {
+		if (all_resources)
+			return 0;
+		fprintf(stderr,
+			"There is no proxy config for host %s in resource %s.\n",
+			nodeinfo.nodename, res->name);
+		exit(E_config_invalid);
+	}
+
+	if (!name_in_names(nodeinfo.nodename, res->me->proxy->on_hosts)) {
+		if (all_resources)
+			return 0;
+		fprintf(stderr,
+			"The proxy config in resource %s is not for %s.\n",
+			res->name, nodeinfo.nodename);
+		exit(E_config_invalid);
+	}
+
+	if (!res->peer) {
+		fprintf(stderr, "Cannot determine the peer in resource %s.\n",
+			res->name);
+		exit(E_config_invalid);
+	}
+
+	if (!res->peer->proxy) {
+		fprintf(stderr,
+			"There is no proxy config for the peer in resource %s.\n",
+			res->name);
+		if (all_resources)
+			return 0;
+		exit(E_config_invalid);
+	}
+
+
+	if (do_up) {
+		rv = do_proxy_conn_up(res, NULL);
+		if (!rv)
+			rv = do_proxy_conn_plugins(res, NULL);
+	}
+	else
+		rv = do_proxy_conn_down(res, NULL);
+
+	return rv;
+}
+
+static int adm_proxy_up(struct d_resource *res,
+			const char *unused __attribute((unused)))
+{
+	return check_proxy(res, 1);
+}
+
+static int adm_proxy_down(struct d_resource *res,
+			  const char *unused __attribute((unused)))
+{
+	return check_proxy(res, 0);
+}
+
+int adm_syncer(struct d_resource *res, const char *unused __attribute((unused)))
+{
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int i, argc = 0;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "syncer";
+
+	argv[NA(argc)] = "--set-defaults";
+	argv[NA(argc)] = "--create-device";
+	opt = res->sync_options;
+	make_options(opt);
+
+	for (i = 0; i < soi; i++) {
+		argv[NA(argc)] = setup_opts[i];
+	}
+
+	argv[NA(argc)] = 0;
+
+	return m_system_ex(argv, SLEEPS_SHORT, res);
+}
+
+static int adm_up(struct d_resource *res,
+		  const char *unused __attribute((unused)))
+{
+	schedule_dcmd(adm_attach, res, "attach", 0);
+	schedule_dcmd(adm_syncer, res, "syncer", 1);
+	schedule_dcmd(adm_connect, res, "connect", 2);
+
+	return 0;
+}
+
+/* The stacked-timeouts switch in the startup sections allows us
+   to enforce the use of the specified timeouts instead the use
+   of a sane value. Should only be used if the third node should
+   never become primary. */
+static int adm_wait_c(struct d_resource *res,
+		      const char *unused __attribute((unused)))
+{
+	char *argv[MAX_ARGS];
+	struct d_option *opt;
+	int argc = 0, rv;
+
+	argv[NA(argc)] = drbdsetup;
+	ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+	argv[NA(argc)] = "wait-connect";
+	if (is_drbd_top && !res->stacked_timeouts) {
+		unsigned long timeout = 20;
+		if ((opt = find_opt(res->net_options, "connect-int"))) {
+			timeout = strtoul(opt->value, NULL, 10);
+			// one connect-interval? two?
+			timeout *= 2;
+		}
+		argv[argc++] = "-t";
+		ssprintf(argv[argc], "%lu", timeout);
+		argc++;
+	} else {
+		opt = res->startup_options;
+		make_options(opt);
+	}
+	argv[NA(argc)] = 0;
+
+	rv = m_system_ex(argv, SLEEPS_FOREVER, res);
+
+	return rv;
+}
+
+static unsigned minor_by_id(const char *id)
+{
+	if (strncmp(id, "minor-", 6))
+		return -1U;
+	return m_strtoll(id + 6, 1);
+}
+
+struct d_resource *res_by_minor(const char *id)
+{
+	struct d_resource *res, *t;
+	unsigned int mm;
+
+	mm = minor_by_id(id);
+	if (mm == -1U)
+		return NULL;
+
+	for_each_resource(res, t, config) {
+		if (res->ignore)
+			continue;
+		if (mm == res->me->device_minor) {
+			is_drbd_top = res->stacked;
+			return res;
+		}
+	}
+	return NULL;
+}
+
+struct d_resource *res_by_name(const char *name)
+{
+	struct d_resource *res, *t;
+
+	for_each_resource(res, t, config) {
+		if (strcmp(name, res->name) == 0)
+			return res;
+	}
+	return NULL;
+}
+
+/* In case a child exited, or exits, its return code is stored as
+   negative number in the pids[i] array */
+static int childs_running(pid_t * pids, int opts)
+{
+	int i = 0, wr, rv = 0, status;
+
+	for (i = 0; i < nr_resources; i++) {
+		if (pids[i] <= 0)
+			continue;
+		wr = waitpid(pids[i], &status, opts);
+		if (wr == -1) {	// Wait error.
+			if (errno == ECHILD) {
+				printf("No exit code for %d\n", pids[i]);
+				pids[i] = 0;	// Child exited before ?
+				continue;
+			}
+			perror("waitpid");
+			exit(E_exec_error);
+		}
+		if (wr == 0)
+			rv = 1;	// Child still running.
+		if (wr > 0) {
+			pids[i] = 0;
+			if (WIFEXITED(status))
+				pids[i] = -WEXITSTATUS(status);
+			if (WIFSIGNALED(status))
+				pids[i] = -1000;
+		}
+	}
+	return rv;
+}
+
+static void kill_childs(pid_t * pids)
+{
+	int i;
+
+	for (i = 0; i < nr_resources; i++) {
+		if (pids[i] <= 0)
+			continue;
+		kill(pids[i], SIGINT);
+	}
+}
+
+/*
+  returns:
+  -1 ... all childs terminated
+   0 ... timeout expired
+   1 ... a string was read
+ */
+int gets_timeout(pid_t * pids, char *s, int size, int timeout)
+{
+	int pr, rr, n = 0;
+	struct pollfd pfd;
+
+	if (s) {
+		pfd.fd = fileno(stdin);
+		pfd.events = POLLIN | POLLHUP | POLLERR | POLLNVAL;
+		n = 1;
+	}
+
+	if (!childs_running(pids, WNOHANG)) {
+		pr = -1;
+		goto out;
+	}
+
+	do {
+		pr = poll(&pfd, n, timeout);
+
+		if (pr == -1) {	// Poll error.
+			if (errno == EINTR) {
+				if (childs_running(pids, WNOHANG))
+					continue;
+				goto out;	// pr = -1 here.
+			}
+			perror("poll");
+			exit(E_exec_error);
+		}
+	} while (pr == -1);
+
+	if (pr == 1) {		// Input available.
+		rr = read(fileno(stdin), s, size - 1);
+		if (rr == -1) {
+			perror("read");
+			exit(E_exec_error);
+		}
+		s[rr] = 0;
+	}
+
+out:
+	return pr;
+}
+
+static char *get_opt_val(struct d_option *base, const char *name, char *def)
+{
+	while (base) {
+		if (!strcmp(base->name, name)) {
+			return base->value;
+		}
+		base = base->next;
+	}
+	return def;
+}
+
+void chld_sig_hand(int __attribute((unused)) unused)
+{
+	// do nothing. But interrupt systemcalls :)
+}
+
+static int check_exit_codes(pid_t * pids)
+{
+	struct d_resource *res, *t;
+	int i = 0, rv = 0;
+
+	for_each_resource(res, t, config) {
+		if (res->ignore)
+			continue;
+		if (is_drbd_top != res->stacked)
+			continue;
+		if (pids[i] == -5 || pids[i] == -1000) {
+			pids[i] = 0;
+		}
+		if (pids[i] == -20)
+			rv = 20;
+		i++;
+	}
+	return rv;
+}
+
+static int adm_wait_ci(struct d_resource *ignored __attribute((unused)),
+		       const char *unused __attribute((unused)))
+{
+	struct d_resource *res, *t;
+	char *argv[20], answer[40];
+	pid_t *pids;
+	struct d_option *opt;
+	int rr, wtime, argc, i = 0;
+	time_t start;
+	int saved_stdin, saved_stdout, fd;
+
+	struct sigaction so, sa;
+
+	saved_stdin = -1;
+	saved_stdout = -1;
+	if (no_tty) {
+		fprintf(stderr,
+			"WARN: stdin/stdout is not a TTY; using /dev/console");
+		fprintf(stdout,
+			"WARN: stdin/stdout is not a TTY; using /dev/console");
+		saved_stdin = dup(fileno(stdin));
+		if (saved_stdin == -1)
+			perror("dup(stdin)");
+		saved_stdout = dup(fileno(stdout));
+		if (saved_stdin == -1)
+			perror("dup(stdout)");
+		fd = open("/dev/console", O_RDONLY);
+		if (fd == -1)
+			perror("open('/dev/console, O_RDONLY)");
+		dup2(fd, fileno(stdin));
+		fd = open("/dev/console", O_WRONLY);
+		if (fd == -1)
+			perror("open('/dev/console, O_WRONLY)");
+		dup2(fd, fileno(stdout));
+	}
+
+	sa.sa_handler = chld_sig_hand;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_NOCLDSTOP;
+	sigaction(SIGCHLD, &sa, &so);
+
+	pids = alloca(nr_resources * sizeof(pid_t));
+	/* alloca can not fail, it can "only" overflow the stack :)
+	 * but it needs to be initialized anyways! */
+	memset(pids, 0, nr_resources * sizeof(pid_t));
+
+	for_each_resource(res, t, config) {
+		if (res->ignore)
+			continue;
+		if (is_drbd_top != res->stacked)
+			continue;
+		argc = 0;
+		argv[NA(argc)] = drbdsetup;
+		ssprintf(argv[NA(argc)], "%d", res->me->device_minor);
+
+		argv[NA(argc)] = "wait-connect";
+		opt = res->startup_options;
+		make_options(opt);
+		argv[NA(argc)] = 0;
+
+		m__system(argv, RETURN_PID, res, &pids[i++], NULL, NULL);
+	}
+
+	wtime = global_options.dialog_refresh ? : -1;
+
+	start = time(0);
+	for (i = 0; i < 10; i++) {
+		// no string, but timeout
+		rr = gets_timeout(pids, 0, 0, 1 * 1000);
+		if (rr < 0)
+			break;
+		putchar('.');
+		fflush(stdout);
+		check_exit_codes(pids);
+	}
+
+	if (rr == 0) {
+		printf
+		    ("\n***************************************************************\n"
+		     " DRBD's startup script waits for the peer node(s) to appear.\n"
+		     " - In case this node was already a degraded cluster before the\n"
+		     "   reboot the timeout is %s seconds. [degr-wfc-timeout]\n"
+		     " - If the peer was available before the reboot the timeout will\n"
+		     "   expire after %s seconds. [wfc-timeout]\n"
+		     "   (These values are for resource '%s'; 0 sec -> wait forever)\n",
+		     get_opt_val(config->startup_options, "degr-wfc-timeout",
+				 "0"), get_opt_val(config->startup_options,
+						   "wfc-timeout", "0"),
+		     config->name);
+
+		printf(" To abort waiting enter 'yes' [ -- ]:");
+		do {
+			printf("\e[s\e[31G[%4d]:\e[u", (int)(time(0) - start));	// Redraw sec.
+			fflush(stdout);
+			rr = gets_timeout(pids, answer, 40, wtime * 1000);
+			check_exit_codes(pids);
+
+			if (rr == 1) {
+				if (!strcmp(answer, "yes\n")) {
+					kill_childs(pids);
+					childs_running(pids, 0);
+					check_exit_codes(pids);
+					rr = -1;
+				} else {
+					printf
+					    (" To abort waiting enter 'yes' [ -- ]:");
+				}
+			}
+		} while (rr != -1);
+		printf("\n");
+	}
+
+	if (saved_stdin != -1) {
+		dup2(saved_stdin, fileno(stdin));
+		dup2(saved_stdout, fileno(stdout));
+	}
+
+	return 0;
+}
+
+static void print_cmds(int level)
+{
+	size_t i;
+	int j = 0;
+
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
+		if (cmds[i].show_in_usage != level)
+			continue;
+		if (j++ % 2) {
+			printf("%-35s\n", cmds[i].name);
+		} else {
+			printf(" %-35s", cmds[i].name);
+		}
+	}
+	if (j % 2)
+		printf("\n");
+}
+
+static int hidden_cmds(struct d_resource *ignored __attribute((unused)),
+		       const char *ignored2 __attribute((unused)))
+{
+	printf("\nThese additional commands might be useful for writing\n"
+	       "nifty shell scripts around drbdadm:\n\n");
+
+	print_cmds(2);
+
+	printf("\nThese commands are used by the kernel part of DRBD to\n"
+	       "invoke user mode helper programs:\n\n");
+
+	print_cmds(3);
+
+	printf
+	    ("\nThese commands ought to be used by experts and developers:\n\n");
+
+	print_cmds(4);
+
+	printf("\n");
+
+	exit(0);
+}
+
+void print_usage_and_exit(const char *addinfo)
+{
+	struct option *opt;
+
+	printf("\nUSAGE: %s [OPTION...] [-- DRBDSETUP-OPTION...] COMMAND "
+	       "{all|RESOURCE...}\n\n" "OPTIONS:\n", progname);
+
+	opt = admopt;
+	while (opt->name) {
+		if (opt->has_arg == required_argument)
+			printf(" {--%s|-%c} val\n", opt->name, opt->val);
+		else
+			printf(" {--%s|-%c}\n", opt->name, opt->val);
+		opt++;
+	}
+
+	printf("\nCOMMANDS:\n");
+
+	print_cmds(1);
+
+	printf("\nVersion: " REL_VERSION " (api:%d)\n%s\n",
+	       API_VERSION, drbd_buildtag());
+
+	if (addinfo)
+		printf("\n%s\n", addinfo);
+
+	exit(E_usage);
+}
+
+/*
+ * I'd really rather parse the output of
+ *   ip -o a s
+ * once, and be done.
+ * But anyways....
+ */
+
+static struct ifreq *get_ifreq(void)
+{
+	int sockfd, num_ifaces;
+	struct ifreq *ifr;
+	struct ifconf ifc;
+	size_t buf_size;
+
+	if (0 > (sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP))) {
+		perror("Cannot open socket");
+		exit(EXIT_FAILURE);
+	}
+
+	num_ifaces = 0;
+	ifc.ifc_req = NULL;
+
+	/* realloc buffer size until no overflow occurs  */
+	do {
+		num_ifaces += 16;	/* initial guess and increment */
+		buf_size = ++num_ifaces * sizeof(struct ifreq);
+		ifc.ifc_len = buf_size;
+		if (NULL == (ifc.ifc_req = realloc(ifc.ifc_req, ifc.ifc_len))) {
+			fprintf(stderr, "Out of memory.\n");
+			return NULL;
+		}
+		if (ioctl(sockfd, SIOCGIFCONF, &ifc)) {
+			perror("ioctl SIOCFIFCONF");
+			free(ifc.ifc_req);
+			return NULL;
+		}
+	} while (buf_size <= (size_t) ifc.ifc_len);
+
+	num_ifaces = ifc.ifc_len / sizeof(struct ifreq);
+	/* Since we allocated at least one more than necessary,
+	 * this serves as a stop marker for the iteration in
+	 * have_ip() */
+	ifc.ifc_req[num_ifaces].ifr_name[0] = 0;
+	for (ifr = ifc.ifc_req; ifr->ifr_name[0] != 0; ifr++) {
+		/* we only want to look up the presence or absence of a certain address
+		 * here. but we want to skip "down" interfaces.  if an interface is down,
+		 * we store an invalid sa_family, so the lookup will skip it.
+		 */
+		struct ifreq ifr_for_flags = *ifr;	/* get a copy to work with */
+		if (ioctl(sockfd, SIOCGIFFLAGS, &ifr_for_flags) < 0) {
+			perror("ioctl SIOCGIFFLAGS");
+			ifr->ifr_addr.sa_family = -1;	/* what's wrong here? anyways: skip */
+			continue;
+		}
+		if (!(ifr_for_flags.ifr_flags & IFF_UP)) {
+			ifr->ifr_addr.sa_family = -1;	/* is not up: skip */
+			continue;
+		}
+	}
+	close(sockfd);
+	return ifc.ifc_req;
+}
+
+int have_ip_ipv4(const char *ip)
+{
+	struct ifreq *ifr;
+	struct in_addr query_addr;
+
+	query_addr.s_addr = inet_addr(ip);
+
+	if (!ifreq_list)
+		ifreq_list = get_ifreq();
+
+	for (ifr = ifreq_list; ifr && ifr->ifr_name[0] != 0; ifr++) {
+		/* SIOCGIFCONF only supports AF_INET */
+		struct sockaddr_in *list_addr =
+		    (struct sockaddr_in *)&ifr->ifr_addr;
+		if (ifr->ifr_addr.sa_family != AF_INET)
+			continue;
+		if (query_addr.s_addr == list_addr->sin_addr.s_addr)
+			return 1;
+	}
+	return 0;
+}
+
+int have_ip_ipv6(const char *ip)
+{
+	FILE *if_inet6;
+	struct in6_addr addr6, query_addr;
+	unsigned int b[4];
+	char tmp_ip[INET6_ADDRSTRLEN+1];
+	char name[20]; /* IFNAMSIZ aka IF_NAMESIZE is 16 */
+	int i;
+
+	/* don't want to do getaddrinfo lookup, but inet_pton get's confused by
+	 * %eth0 link local scope specifiers. So we have a temporary copy
+	 * without that part. */
+	for (i=0; ip[i] && ip[i] != '%' && i < INET6_ADDRSTRLEN; i++)
+		tmp_ip[i] = ip[i];
+	tmp_ip[i] = 0;
+
+	if (inet_pton(AF_INET6, tmp_ip, &query_addr) <= 0)
+		return 0;
+
+#define PROC_IF_INET6 "/proc/net/if_inet6"
+	if_inet6 = fopen(PROC_IF_INET6, "r");
+	if (!if_inet6) {
+		if (errno != ENOENT)
+			perror("open of " PROC_IF_INET6 " failed:");
+#undef PROC_IF_INET6
+		return 0;
+	}
+
+	while (fscanf
+	       (if_inet6,
+		X32(08) X32(08) X32(08) X32(08) " %*02x %*02x %*02x %*02x %s",
+		b, b + 1, b + 2, b + 3, name) > 0) {
+		for (i = 0; i < 4; i++)
+			addr6.s6_addr32[i] = cpu_to_be32(b[i]);
+
+		if (memcmp(&query_addr, &addr6, sizeof(struct in6_addr)) == 0) {
+			fclose(if_inet6);
+			return 1;
+		}
+	}
+	fclose(if_inet6);
+	return 0;
+}
+
+int have_ip(const char *af, const char *ip)
+{
+	if (!strcmp(af, "ipv4"))
+		return have_ip_ipv4(ip);
+	else if (!strcmp(af, "ipv6"))
+		return have_ip_ipv6(ip);
+
+	return 1;		/* SCI */
+}
+
+void verify_ips(struct d_resource *res)
+{
+	if (global_options.disable_ip_verification)
+		return;
+	if (dry_run == 1 || do_verify_ips == 0)
+		return;
+	if (res->ignore)
+		return;
+	if (res->stacked && !is_drbd_top)
+		return;
+
+	if (!have_ip(res->me->address_family, res->me->address)) {
+		ENTRY e, *ep;
+		e.key = e.data = ep = NULL;
+		m_asprintf(&e.key, "%s:%s", res->me->address, res->me->port);
+		hsearch_r(e, FIND, &ep, &global_htable);
+		fprintf(stderr, "%s: in resource %s, on %s:\n\t"
+			"IP %s not found on this host.\n",
+			ep ? (char *)ep->data : res->config_file,
+			res->name, names_to_str(res->me->on_hosts),
+			res->me->address);
+		if (INVALID_IP_IS_INVALID_CONF)
+			config_valid = 0;
+	}
+}
+
+static char *conf_file[] = {
+	DRBD_CONFIG_DIR "/drbd-83.conf",
+	DRBD_CONFIG_DIR "/drbd-82.conf",
+	DRBD_CONFIG_DIR "/drbd-08.conf",
+	DRBD_CONFIG_DIR "/drbd.conf",
+	0
+};
+
+int sanity_check_abs_cmd(char *cmd_name)
+{
+	struct stat sb;
+
+	if (stat(cmd_name, &sb)) {
+		/* If stat fails, just ignore this sanity check,
+		 * we are still iterating over $PATH probably. */
+		return 0;
+	}
+
+	if (!(sb.st_mode & S_ISUID) || sb.st_mode & S_IXOTH || sb.st_gid == 0) {
+		static int did_header = 0;
+		if (!did_header)
+			fprintf(stderr,
+				"WARN:\n"
+				"  You are using the 'drbd-peer-outdater' as fence-peer program.\n"
+				"  If you use that mechanism the dopd heartbeat plugin program needs\n"
+				"  to be able to call drbdsetup and drbdmeta with root privileges.\n\n"
+				"  You need to fix this with these commands:\n");
+		did_header = 1;
+		fprintf(stderr,
+			"  chgrp haclient %s\n"
+			"  chmod o-x %s\n"
+			"  chmod u+s %s\n\n", cmd_name, cmd_name, cmd_name);
+	}
+	return 1;
+}
+
+void sanity_check_cmd(char *cmd_name)
+{
+	char *path, *pp, *c;
+	char abs_path[100];
+
+	if (strchr(cmd_name, '/')) {
+		sanity_check_abs_cmd(cmd_name);
+	} else {
+		path = pp = c = strdup(getenv("PATH"));
+
+		while (1) {
+			c = strchr(pp, ':');
+			if (c)
+				*c = 0;
+			snprintf(abs_path, 100, "%s/%s", pp, cmd_name);
+			if (sanity_check_abs_cmd(abs_path))
+				break;
+			if (!c)
+				break;
+			c++;
+			if (!*c)
+				break;
+			pp = c;
+		}
+		free(path);
+	}
+}
+
+/* if the config file is not readable by haclient,
+ * dopd cannot work.
+ * NOTE: we assume that any gid != 0 will be the group dopd will run as,
+ * typically haclient. */
+void sanity_check_conf(char *c)
+{
+	struct stat sb;
+
+	/* if we cannot stat the config file,
+	 * we have other things to worry about. */
+	if (stat(c, &sb))
+		return;
+
+	/* permissions are funny: if it is world readable,
+	 * but not group readable, and it belongs to my group,
+	 * I am denied access.
+	 * For the file to be readable by dopd (hacluster:haclient),
+	 * it is not enough to be world readable. */
+
+	/* ok if world readable, and NOT group haclient (see NOTE above) */
+	if (sb.st_mode & S_IROTH && sb.st_gid == 0)
+		return;
+
+	/* ok if group readable, and group haclient (see NOTE above) */
+	if (sb.st_mode & S_IRGRP && sb.st_gid != 0)
+		return;
+
+	fprintf(stderr,
+		"WARN:\n"
+		"  You are using the 'drbd-peer-outdater' as fence-peer program.\n"
+		"  If you use that mechanism the dopd heartbeat plugin program needs\n"
+		"  to be able to read the drbd.config file.\n\n"
+		"  You need to fix this with these commands:\n"
+		"  chgrp haclient %s\n" "  chmod g+r %s\n\n", c, c);
+}
+
+void sanity_check_perm()
+{
+	static int checked = 0;
+	if (checked)
+		return;
+
+	sanity_check_cmd(drbdsetup);
+	sanity_check_cmd(drbdmeta);
+	sanity_check_conf(config_file);
+	checked = 1;
+}
+
+void validate_resource(struct d_resource *res)
+{
+	struct d_option *opt, *next;
+	struct d_name *bpo;
+
+	if (!res->protocol) {
+		if (!common || !common->protocol) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n\tprotocol definition missing.\n",
+				res->config_file, res->start_line, res->name);
+			config_valid = 0;
+		}		/* else:
+				 * may not have been expanded yet for "dump" subcommand */
+	} else {
+		res->protocol[0] = toupper(res->protocol[0]);
+	}
+	/* there may be more than one "after" statement,
+	 * see commit 89cd0585 */
+	opt = res->sync_options;
+	while ((opt = find_opt(opt, "after"))) {
+		next = opt->next;
+		if (res_by_name(opt->value) == NULL) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n\tresource '%s' mentioned in "
+				"'after' option is not known.\n",
+				res->config_file, res->start_line, res->name,
+				opt->value);
+			/* Non-fatal if run from some script.
+			 * When deleting resources, it is an easily made
+			 * oversight to leave references to the deleted
+			 * resources in sync-after statements.  Don't fail on
+			 * every pacemaker-induced action, as it would
+			 * ultimately lead to all nodes committing suicide. */
+			if (no_tty)
+				res->sync_options = del_opt(res->sync_options, opt);
+			else
+				config_valid = 0;
+		}
+		opt = next;
+	}
+	if (res->ignore)
+		return;
+	if (!res->me) {
+		fprintf(stderr,
+			"%s:%d: in resource %s:\n\tmissing section 'on %s { ... }'.\n",
+			res->config_file, res->start_line, res->name,
+			nodeinfo.nodename);
+		config_valid = 0;
+	}
+	// need to verify that in the discard-node-nodename options only known
+	// nodenames are mentioned.
+	if ((opt = find_opt(res->net_options, "after-sb-0pri"))) {
+		if (!strncmp(opt->value, "discard-node-", 13)) {
+			if (res->peer &&
+			    !name_in_names(opt->value + 13, res->peer->on_hosts)
+			    && !name_in_names(opt->value + 13,
+					      res->me->on_hosts)) {
+				fprintf(stderr,
+					"%s:%d: in resource %s:\n\t"
+					"the nodename in the '%s' option is "
+					"not known.\n\t"
+					"valid nodenames are: '%s %s'.\n",
+					res->config_file, res->start_line,
+					res->name, opt->value,
+					names_to_str(res->me->on_hosts),
+					names_to_str(res->peer->on_hosts));
+				config_valid = 0;
+			}
+		}
+	}
+
+	if ((opt = find_opt(res->handlers, "fence-peer"))) {
+		if (strstr(opt->value, "drbd-peer-outdater"))
+			sanity_check_perm();
+	}
+
+	opt = find_opt(res->net_options, "allow-two-primaries");
+	if (name_in_names("both", res->become_primary_on) && opt == NULL) {
+		fprintf(stderr,
+			"%s:%d: in resource %s:\n"
+			"become-primary-on is set to both, but allow-two-primaries "
+			"is not set.\n", res->config_file, res->start_line,
+			res->name);
+		config_valid = 0;
+	}
+
+	if (!res->peer)
+		set_peer_in_resource(res, 0);
+
+	if (res->peer
+	    && ((res->me->proxy == NULL) != (res->peer->proxy == NULL))) {
+		fprintf(stderr,
+			"%s:%d: in resource %s:\n\t"
+			"Either both 'on' sections must contain a proxy subsection, or none.\n",
+			res->config_file, res->start_line, res->name);
+		config_valid = 0;
+	}
+
+	for (bpo = res->become_primary_on; bpo; bpo = bpo->next) {
+		if (res->peer &&
+		    !name_in_names(bpo->name, res->me->on_hosts) &&
+		    !name_in_names(bpo->name, res->peer->on_hosts) &&
+		    strcmp(bpo->name, "both")) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n\t"
+				"become-primary-on contains '%s', which is not named with the 'on' sections.\n",
+				res->config_file, res->start_line, res->name,
+				bpo->name);
+			config_valid = 0;
+		}
+	}
+}
+
+static void global_validate_maybe_expand_die_if_invalid(int expand)
+{
+	struct d_resource *res, *tmp;
+	for_each_resource(res, tmp, config) {
+		validate_resource(res);
+		if (!config_valid)
+			exit(E_config_invalid);
+		if (expand) {
+			convert_after_option(res);
+			convert_discard_opt(res);
+		}
+	}
+}
+
+/*
+ * returns a pointer to an malloced area that contains
+ * an absolute, canonical, version of path.
+ * aborts if any allocation or syscall fails.
+ * return value should be free()d, once no longer needed.
+ */
+char *canonify_path(char *path)
+{
+	int cwd_fd = -1;
+	char *last_slash;
+	char *tmp;
+	char *that_wd;
+	char *abs_path;
+
+	if (!path || !path[0]) {
+		fprintf(stderr, "cannot canonify an empty path\n");
+		exit(E_usage);
+	}
+
+	tmp = strdupa(path);
+	last_slash = strrchr(tmp, '/');
+
+	if (last_slash) {
+		*last_slash++ = '\0';
+		cwd_fd = open(".", O_RDONLY);
+		if (cwd_fd < 0) {
+			fprintf(stderr, "open(\".\") failed: %m\n");
+			exit(E_usage);
+		}
+		if (chdir(tmp)) {
+			fprintf(stderr, "chdir(\"%s\") failed: %m\n", tmp);
+			exit(E_usage);
+		}
+	} else {
+		last_slash = tmp;
+	}
+
+	that_wd = getcwd(NULL, 0);
+	if (!that_wd) {
+		fprintf(stderr, "getcwd() failed: %m\n");
+		exit(E_usage);
+	}
+
+	if (!strcmp("/", that_wd))
+		m_asprintf(&abs_path, "/%s", last_slash);
+	else
+		m_asprintf(&abs_path, "%s/%s", that_wd, last_slash);
+
+	free(that_wd);
+	if (cwd_fd >= 0) {
+		if (fchdir(cwd_fd) < 0) {
+			fprintf(stderr, "fchdir() failed: %m\n");
+			exit(E_usage);
+		}
+	}
+
+	return abs_path;
+}
+
+void assign_command_names_from_argv0(char **argv)
+{
+	/* in case drbdadm is called with an absolute or relative pathname
+	 * look for the drbdsetup binary in the same location,
+	 * otherwise, just let execvp sort it out... */
+	if ((progname = strrchr(argv[0], '/')) == 0) {
+		progname = argv[0];
+		drbdsetup = strdup("drbdsetup-83");
+		drbdmeta = strdup("drbdmeta");
+		drbd_proxy_ctl = strdup("drbd-proxy-ctl");
+	} else {
+		struct cmd_helper {
+			char *name;
+			char **var;
+		};
+		struct cmd_helper helpers[] = {
+			{"drbdsetup-83", &drbdsetup},
+			{"drbdmeta", &drbdmeta},
+			{"drbd-proxy-ctl", &drbd_proxy_ctl},
+			{NULL, NULL}
+		};
+		size_t len_dir, l;
+		struct cmd_helper *c;
+
+		++progname;
+		len_dir = progname - argv[0];
+
+		for (c = helpers; c->name; ++c) {
+			l = len_dir + strlen(c->name) + 1;
+			*(c->var) = malloc(l);
+			if (*(c->var)) {
+				strncpy(*(c->var), argv[0], len_dir);
+				strcpy(*(c->var) + len_dir, c->name);
+			}
+		}
+
+		/* for pretty printing, truncate to basename */
+		argv[0] = progname;
+	}
+}
+
+int parse_options(int argc, char **argv)
+{
+	opterr = 1;
+	optind = 0;
+	while (1) {
+		int c;
+
+		c = getopt_long(argc, argv,
+				make_optstring(admopt), admopt, 0);
+		if (c == -1)
+			break;
+		switch (c) {
+		case 'S':
+			is_drbd_top = 1;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		case 'd':
+			dry_run++;
+			break;
+		case 'c':
+			if (!strcmp(optarg, "-")) {
+				yyin = stdin;
+				if (asprintf(&config_file, "STDIN") < 0) {
+					fprintf(stderr,
+						"asprintf(config_file): %m\n");
+					return 20;
+				}
+				config_from_stdin = 1;
+			} else {
+				yyin = fopen(optarg, "r");
+				if (!yyin) {
+					fprintf(stderr, "Can not open '%s'.\n.",
+						optarg);
+					exit(E_exec_error);
+				}
+				if (asprintf(&config_file, "%s", optarg) < 0) {
+					fprintf(stderr,
+						"asprintf(config_file): %m\n");
+					return 20;
+				}
+			}
+			break;
+		case 't':
+			config_test = optarg;
+			break;
+		case 's':
+			{
+				char *pathes[2];
+				pathes[0] = optarg;
+				pathes[1] = 0;
+				find_drbdcmd(&drbdsetup, pathes);
+			}
+			break;
+		case 'm':
+			{
+				char *pathes[2];
+				pathes[0] = optarg;
+				pathes[1] = 0;
+				find_drbdcmd(&drbdmeta, pathes);
+			}
+			break;
+		case 'p':
+			{
+				char *pathes[2];
+				pathes[0] = optarg;
+				pathes[1] = 0;
+				find_drbdcmd(&drbd_proxy_ctl, pathes);
+			}
+			break;
+		case 'n':
+			{
+				char *c;
+				int shell_var_name_ok = 1;
+				for (c = optarg; *c && shell_var_name_ok; c++) {
+					switch (*c) {
+					case 'a'...'z':
+					case 'A'...'Z':
+					case '0'...'9':
+					case '_':
+						break;
+					default:
+						shell_var_name_ok = 0;
+					}
+				}
+				if (shell_var_name_ok)
+					sh_varname = optarg;
+				else
+					fprintf(stderr,
+						"ignored --sh-varname=%s: "
+						"contains suspect characters, allowed set is [a-zA-Z0-9_]\n",
+						optarg);
+			}
+			break;
+		case 'f':
+			force = 1;
+			break;
+		case 'V':
+			printf("DRBDADM_BUILDTAG=%s\n", shell_escape(drbd_buildtag()));
+			printf("DRBDADM_API_VERSION=%u\n", API_VERSION);
+			printf("DRBD_KERNEL_VERSION_CODE=0x%06x\n", version_code_kernel());
+			printf("DRBDADM_VERSION_CODE=0x%06x\n", version_code_userland());
+			printf("DRBDADM_VERSION=%s\n", shell_escape(REL_VERSION));
+			exit(0);
+			break;
+		case 'P':
+			connect_to_host = optarg;
+			break;
+		case '?':
+			/* commented out, since opterr=1
+			 * fprintf(stderr,"Unknown option %s\n",argv[optind-1]); */
+			fprintf(stderr, "try '%s help'\n", progname);
+			return 20;
+			break;
+		}
+	}
+	return 0;
+}
+
+static void substitute_deprecated_cmd(char **c, char *deprecated,
+				      char *substitution)
+{
+	if (!strcmp(*c, deprecated)) {
+		fprintf(stderr, "'%s %s' is deprecated, use '%s %s' instead.\n",
+			progname, deprecated, progname, substitution);
+		*c = substitution;
+	}
+}
+
+struct adm_cmd *find_cmd(char *cmdname)
+{
+	struct adm_cmd *cmd = NULL;
+	unsigned int i;
+	if (!strcmp("hidden-commands", cmdname)) {
+		// before parsing the configuration file...
+		hidden_cmds(NULL, NULL);
+		exit(0);
+	}
+	if (!strncmp("help", cmdname, 5))
+		print_usage_and_exit(0);
+
+	/* R_PRIMARY / R_SECONDARY is not a state, but a role.  Whatever that
+	 * means, actually.  But anyways, we decided to start using _role_ as
+	 * the terminus of choice, and deprecate "state". */
+	substitute_deprecated_cmd(&cmdname, "state", "role");
+
+	/* "outdate-peer" got renamed to fence-peer,
+	 * it is not required to actually outdate the peer,
+	 * depending on situation it may be sufficient to power-reset it
+	 * or do some other fencing action, or even call out to "meatware".
+	 * The name of the handler should not imply something that is not done. */
+	substitute_deprecated_cmd(&cmdname, "outdate-peer", "fence-peer");
+
+	for (i = 0; i < ARRAY_SIZE(cmds); i++) {
+		if (!strcmp(cmds[i].name, cmdname)) {
+			cmd = cmds + i;
+			break;
+		}
+	}
+	return cmd;
+}
+
+char *config_file_from_arg(char *arg)
+{
+	char *f;
+	int minor = minor_by_id(arg);
+
+	if (minor < 0) {
+		/* this is expected, if someone wants to test the configured
+		 * handlers from the command line, using resource names */
+		fprintf(stderr,
+			"Couldn't find minor from id %s, "
+			"expecting minor-<minor> as id. "
+			"Trying default config files.\n", arg);
+		return NULL;
+	}
+
+	f = lookup_minor(minor);
+	if (!f) {
+		fprintf(stderr,
+			"Don't know which config file belongs to minor %d, "
+			"trying default ones...\n", minor);
+	} else {
+		yyin = fopen(f, "r");
+		if (yyin == NULL) {
+			fprintf(stderr,
+				"Couldn't open file %s for reading, reason: %m\n"
+				"trying default config file...\n", config_file);
+		}
+	}
+	return f;
+}
+
+void assign_default_config_file(void)
+{
+	int i;
+	for (i = 0; conf_file[i]; i++) {
+		yyin = fopen(conf_file[i], "r");
+		if (yyin) {
+			config_file = conf_file[i];
+			break;
+		}
+	}
+	if (!config_file) {
+		fprintf(stderr, "Can not open '%s': %m\n", conf_file[i - 1]);
+		exit(E_config_invalid);
+	}
+}
+
+void count_resources_or_die(void)
+{
+	int m, mc = global_options.minor_count;
+	struct d_resource *res, *tmp;
+
+	highest_minor = 0;
+	for_each_resource(res, tmp, config) {
+		if (res->ignore)
+			continue;
+
+		m = res->me->device_minor;
+		if (m > highest_minor)
+			highest_minor = m;
+		nr_resources++;
+		if (res->stacked)
+			nr_stacked++;
+		else if (res->ignore)
+			nr_ignore++;
+		else
+			nr_normal++;
+	}
+
+	// Just for the case that minor_of_res() returned 0 for all devices.
+	if (nr_resources > (highest_minor + 1))
+		highest_minor = nr_resources - 1;
+
+	if (mc && mc < (highest_minor + 1)) {
+		fprintf(stderr,
+			"The highest minor you have in your config is %d"
+			"but a minor_count of %d in your config!\n",
+			highest_minor, mc);
+		exit(E_usage);
+	}
+}
+
+void die_if_no_resources(void)
+{
+	if (!is_drbd_top && nr_ignore > 0 && nr_normal == 0) {
+		fprintf(stderr,
+			"WARN: no normal resources defined for this host (%s)!?\n"
+			"Misspelled name of the local machine with the 'on' keyword ?\n",
+			nodeinfo.nodename);
+		exit(E_usage);
+	}
+	if (!is_drbd_top && nr_normal == 0) {
+		fprintf(stderr,
+			"WARN: no normal resources defined for this host (%s)!?\n",
+			nodeinfo.nodename);
+		exit(E_usage);
+	}
+	if (is_drbd_top && nr_stacked == 0) {
+		fprintf(stderr, "WARN: nothing stacked for this host (%s), "
+			"nothing to do in stacked mode!\n", nodeinfo.nodename);
+		exit(E_usage);
+	}
+}
+
+void print_dump_xml_header(void)
+{
+	printf("<config file=\"%s\">\n", config_save);
+	++indent;
+	dump_global_info_xml();
+	dump_common_info_xml();
+}
+
+void print_dump_header(void)
+{
+	printf("# %s\n", config_save);
+	dump_global_info();
+	dump_common_info();
+}
+
+int main(int argc, char **argv)
+{
+	size_t i;
+	int rv = 0;
+	struct adm_cmd *cmd = NULL;
+	struct d_resource *res, *tmp;
+	char *env_drbd_nodename = NULL;
+	int is_dump_xml;
+	int is_dump;
+
+	yyin = NULL;
+	uname(&nodeinfo);	/* FIXME maybe fold to lower case ? */
+	no_tty = (!isatty(fileno(stdin)) || !isatty(fileno(stdout)));
+
+	env_drbd_nodename = getenv("__DRBD_NODE__");
+	if (env_drbd_nodename && *env_drbd_nodename) {
+		strncpy(nodeinfo.nodename, env_drbd_nodename,
+			sizeof(nodeinfo.nodename) - 1);
+		nodeinfo.nodename[sizeof(nodeinfo.nodename) - 1] = 0;
+		fprintf(stderr, "\n"
+			"   found __DRBD_NODE__ in environment\n"
+			"   PRETENDING that I am >>%s<<\n\n",
+			nodeinfo.nodename);
+	}
+
+	assign_command_names_from_argv0(argv);
+
+	if (argc == 1)
+		print_usage_and_exit("missing arguments");	// arguments missing.
+
+	if (drbdsetup == NULL || drbdmeta == NULL || drbd_proxy_ctl == NULL) {
+		fprintf(stderr, "could not strdup argv[0].\n");
+		exit(E_exec_error);
+	}
+
+	if (!getenv("DRBD_DONT_WARN_ON_VERSION_MISMATCH"))
+		warn_on_version_mismatch();
+
+	rv = parse_options(argc, argv);
+	if (rv)
+		return rv;
+
+	/* store everything before the command name as pass through option/argument */
+	while (optind < argc) {
+		cmd = find_cmd(argv[optind]);
+		if (cmd)
+			break;
+		setup_opts[soi++] = argv[optind++];
+	}
+
+	if (optind == argc)
+		print_usage_and_exit(0);
+
+	if (cmd == NULL) {
+		fprintf(stderr, "Unknown command '%s'.\n", argv[optind]);
+		exit(E_usage);
+	}
+
+	if (config_test && !cmd->test_config) {
+		fprintf(stderr, "The --config-to-test (-t) option is only allowed "
+			"with the dump and sh-nop commands\n");
+		exit(E_usage);
+	}
+
+	do_verify_ips = cmd->verify_ips;
+	optind++;
+
+	is_dump_xml = (cmd->function == adm_dump_xml);
+	is_dump = (is_dump_xml || cmd->function == adm_dump);
+
+	/* remaining argv are expected to be resource names
+	 * optind     == argc: no resourcenames given.
+	 * optind + 1 == argc: exactly one resource name (or "all") given
+	 * optind + 1  < argc: multiple resource names given. */
+	if (optind == argc) {
+		if (is_dump)
+			all_resources = 1;
+		else if (cmd->res_name_required)
+			print_usage_and_exit("missing resourcename arguments");
+	} else if (optind + 1 < argc) {
+		if (!cmd->res_name_required)
+			fprintf(stderr,
+				"this command will ignore resource names!\n");
+		else if (cmd->use_cached_config_file)
+			fprintf(stderr,
+				"You should not use this command with multiple resources!\n");
+	}
+
+	if (!config_file && cmd->use_cached_config_file)
+		config_file = config_file_from_arg(argv[optind]);
+
+	if (!config_file)
+		/* may exit if no config file can be used! */
+		assign_default_config_file();
+
+	/* for error-reporting reasons config_file may be re-assigned by adm_adjust,
+	 * we need the current value for register_minor, though.
+	 * save that. */
+	if (config_from_stdin)
+		config_save = config_file;
+	else
+		config_save = canonify_path(config_file);
+
+	my_parse();
+
+	if (config_test) {
+		char *saved_config_file = config_file;
+		char *saved_config_save = config_save;
+
+		config_file = config_test;
+		config_save = canonify_path(config_test);
+
+		fclose(yyin);
+		yyin = fopen(config_test, "r");
+		if (!yyin) {
+			fprintf(stderr, "Can not open '%s'.\n.", config_test);
+			exit(E_exec_error);
+		}
+		my_parse();
+
+		config_file = saved_config_file;
+		config_save = saved_config_save;
+	}
+
+	if (!config_valid)
+		exit(E_config_invalid);
+
+	post_parse(config, cmd->is_proxy_cmd ? match_on_proxy : 0);
+
+	if (!is_dump || dry_run || verbose)
+		expand_common();
+	if (is_dump || dry_run || config_from_stdin)
+		do_register_minor = 0;
+
+	count_resources_or_die();
+
+	if (cmd->uc_dialog)
+		uc_node(global_options.usage_count);
+
+	if (cmd->res_name_required) {
+		if (config == NULL) {
+			fprintf(stderr, "no resources defined!\n");
+			exit(E_usage);
+		}
+
+		global_validate_maybe_expand_die_if_invalid(!is_dump);
+
+		if (optind == argc || !strcmp(argv[optind], "all")) {
+			/* either no resource arguments at all,
+			 * but command is dump / dump-xml, so implicit "all",
+			 * or an explicit "all" argument is given */
+			all_resources = 1;
+			if (!is_dump || !force)
+				die_if_no_resources();
+			/* verify ips first, for all of them */
+			for_each_resource(res, tmp, config) {
+				verify_ips(res);
+			}
+			if (!config_valid)
+				exit(E_config_invalid);
+
+			if (is_dump_xml)
+				print_dump_xml_header();
+			else if (is_dump)
+				print_dump_header();
+
+			for_each_resource(res, tmp, config) {
+				if (!is_dump && res->ignore)
+					continue;
+
+				if (!is_dump && is_drbd_top != res->stacked)
+					continue;
+				int r = call_cmd(cmd, res, EXIT_ON_FAIL);	/* does exit for r >= 20! */
+				/* this super positioning of return values is soo ugly
+				 * anyone any better idea? */
+				if (r > rv)
+					rv = r;
+			}
+			if (is_dump_xml) {
+				--indent;
+				printf("</config>\n");
+			}
+		} else {
+			/* explicit list of resources to work on */
+			for (i = optind; (int)i < argc; i++) {
+				res = res_by_name(argv[i]);
+				if (!res)
+					res = res_by_minor(argv[i]);
+				if (!res) {
+					fprintf(stderr,
+						"'%s' not defined in your config.\n",
+						argv[i]);
+					exit(E_usage);
+				}
+				if (res->ignore && !is_dump) {
+					fprintf(stderr,
+						"'%s' ignored, since this host (%s) is not mentioned with an 'on' keyword.\n",
+						res->name, nodeinfo.nodename);
+					rv = E_usage;
+					continue;
+				}
+				if (is_drbd_top != res->stacked && !is_dump) {
+					fprintf(stderr,
+						"'%s' is a %s resource, and not available in %s mode.\n",
+						res->name,
+						res->
+						stacked ? "stacked" : "normal",
+						is_drbd_top ? "stacked" :
+						"normal");
+					rv = E_usage;
+					continue;
+				}
+				verify_ips(res);
+				if (!is_dump && !config_valid)
+					exit(E_config_invalid);
+				rv = call_cmd(cmd, res, EXIT_ON_FAIL);	/* does exit for rv >= 20! */
+			}
+		}
+	} else {		// Commands which do not need a resource name
+		/* no call_cmd, as that implies register_minor,
+		 * which does not make sense for resource independent commands */
+		rv = cmd->function(config, cmd->name);
+		if (rv >= 10) {	/* why do we special case the "generic sh-*" commands? */
+			fprintf(stderr, "command %s exited with code %d\n",
+				cmd->name, rv);
+			exit(rv);
+		}
+	}
+
+	/* do we really have to bitor the exit code?
+	 * it is even only a Boolean value in this case! */
+	rv |= run_dcmds();
+
+	free_config(config);
+
+	return rv;
+}
+
+void yyerror(char *text)
+{
+	fprintf(stderr, "%s:%d: %s\n", config_file, line, text);
+	exit(E_syntax);
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_minor_table.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_minor_table.c
--- drbd8-8.3.7/user/legacy/drbdadm_minor_table.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_minor_table.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,160 @@
+/*
+   drbdadm_minor_table.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+   It was written by Johannes Thoma <johannes.thoma@linbit.com>
+
+   Copyright (C) 2002-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2002-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+/* This keeps track of which DRBD minor was configured in which
+ * config file. This is required to have alternative config files
+ * (-c switch) and userland event handlers.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+#include "config.h"
+
+#define MAX_MINOR 256
+#define MAX_REGISTER_PATH_LEN	1024
+
+/* buf has to be big enough to hold that path.
+ * it is assumed that sprintf cannot fail :-] */
+void linkname_from_minor(char *buf, int minor)
+{
+	sprintf(buf, "%s/drbd-minor-%d.conf", DRBD_LIB_DIR, minor);
+}
+
+int unregister_minor(int minor)
+{
+	char buf[255];
+
+	if (minor >= MAX_MINOR || minor < 0) {
+		fprintf(stderr, "unregister_minor: minor too big (%d).\n", minor);
+		return -1;
+	}
+
+	linkname_from_minor(buf, minor);
+	if (unlink(buf) < 0) {
+		if (errno != ENOENT) {
+			perror("unlink");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int register_minor(int minor, const char *path)
+{
+	char buf[255];
+	struct stat stat_buf;
+	int err = -1;
+
+	if (minor >= MAX_MINOR || minor < 0) {
+		fprintf(stderr, "register_minor: minor too big (%d).\n", minor);
+		return -1;
+	}
+
+	linkname_from_minor(buf, minor);
+
+	if (!path || !path[0])
+		fprintf(stderr, "Cannot register an empty path.\n");
+	else if (path[0] != '/')
+		fprintf(stderr, "Absolute path expected, "
+			"won't register relative path (%s).\n", path);
+	else if (strlen(path) >= MAX_REGISTER_PATH_LEN)
+		fprintf(stderr, "path (%s):\ntoo long to be registered, "
+				"max path len supported: %u\n",
+				path, MAX_REGISTER_PATH_LEN-1);
+	else if (stat(path, &stat_buf) < 0)
+		fprintf(stderr, "stat(%s): %m\n", path);
+	else if (unlink(buf) < 0 && errno != ENOENT)
+		fprintf(stderr, "unlink(%s): %m\n", buf);
+	else if (symlink(path, buf) < 0)
+		fprintf(stderr, "symlink(%s, %s): %m\n", path, buf);
+	else
+		/* it did work out after all! */
+		err = 0;
+
+	return err;
+}
+
+/* This returns a static buffer containing the real
+ * configuration file known to be used last for this minor.
+ * If you need the return value longer, stuff it away with strdup. */
+char *lookup_minor(int minor)
+{
+	static char buf[255];
+	static char resolved_path[MAX_REGISTER_PATH_LEN+1];
+	struct stat stat_buf;
+	ssize_t len;
+
+	if (minor >= MAX_MINOR || minor < 0) {
+		fprintf(stderr, "register_minor: minor too big (%d).\n", minor);
+		return NULL;
+	}
+
+	linkname_from_minor(buf, minor);
+
+	if (stat(buf, &stat_buf) < 0) {
+		if (errno != ENOENT)
+			fprintf(stderr, "stat(%s): %m\n", buf);
+		return NULL;
+	}
+
+	len = readlink(buf, resolved_path, sizeof(resolved_path)-1);
+	if (len < 0) {
+		perror("readlink");
+		return NULL;
+	}
+	if (len >= MAX_REGISTER_PATH_LEN)
+		fprintf(stderr, "readlink(%s): result has probably been truncated\n",
+				buf);
+
+	resolved_path[len] = '\0';
+	return resolved_path;
+}
+
+
+#ifdef TEST
+
+int main(int argc, char ** argv)
+{
+	register_minor(1, "/etc/drbd-xy.conf");
+	register_minor(15, "/etc/drbd-82.conf");
+	register_minor(14, "/../../../../../../etc/drbd-82.conf");
+	printf("Minor 1 is %s.\n", lookup_minor(1));
+	printf("Minor 2 is %s.\n", lookup_minor(2));
+	printf("Minor 14 is %s.\n", lookup_minor(14));
+	printf("Minor 15 is %s.\n", lookup_minor(15));
+	return 0;
+}
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_parser.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_parser.c
--- drbd8-8.3.7/user/legacy/drbdadm_parser.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_parser.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,1754 @@
+/*
+ *
+   drbdadm_parser.c a hand crafted parser
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2006-2008, LINBIT Information Technologies GmbH
+   Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>
+   Copyright (C) 2006-2008, Lars Ellenberg  <lars.ellenberg@linbit.com>
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <glob.h>
+#include <search.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "drbdadm.h"
+#include "linux/drbd_limits.h"
+#include "drbdtool_common.h"
+#include "drbdadm_parser.h"
+
+YYSTYPE yylval;
+
+/////////////////////
+
+static int c_section_start;
+void my_parse(void);
+
+struct d_name *names_from_str(char* str)
+{
+	struct d_name *names;
+
+	names = malloc(sizeof(struct d_name));
+	names->next = NULL;
+	names->name = strdup(str);
+
+	return names;
+}
+
+char *_names_to_str_c(char* buffer, struct d_name *names, char c)
+{
+	int n = 0;
+
+	if (!names)
+		return buffer;
+
+	while (1) {
+		n += snprintf(buffer + n, NAMES_STR_SIZE - n, "%s", names->name);
+		names = names->next;
+		if (!names)
+			return buffer;
+		n += snprintf(buffer + n, NAMES_STR_SIZE - n, "%c", c);
+	}
+}
+
+char *_names_to_str(char* buffer, struct d_name *names)
+{
+	return _names_to_str_c(buffer, names, ' ');
+}
+
+int name_in_names(char *name, struct d_name *names)
+{
+	while (names) {
+		if (!strcmp(names->name, name))
+			return 1;
+		names = names->next;
+	}
+	return 0;
+}
+
+void free_names(struct d_name *names)
+{
+	struct d_name *nf;
+	while (names) {
+		nf = names->next;
+		free(names->name);
+		free(names);
+		names = nf;
+	}
+}
+
+static void append_names(struct d_name **head, struct d_name ***last, struct d_name *to_copy)
+{
+	struct d_name *new;
+
+	while (to_copy) {
+		new = malloc(sizeof(struct d_name));
+		if (!*head)
+			*head = new;
+		new->name = strdup(to_copy->name);
+		new->next = NULL;
+		if (*last)
+			**last = new;
+		*last = &new->next;
+		to_copy = to_copy->next;
+	}
+}
+
+
+struct d_name *concat_names(struct d_name *to_copy1, struct d_name *to_copy2)
+{
+	struct d_name *head = NULL, **last = NULL;
+
+	append_names(&head, &last, to_copy1);
+	append_names(&head, &last, to_copy2);
+
+	return head;
+}
+
+void m_strtoll_range(const char *s, char def_unit,
+		     const char *name,
+		     unsigned long long min, unsigned long long max)
+{
+	unsigned long long r = m_strtoll(s, def_unit);
+	char unit[] = { def_unit > '1' ? def_unit : 0, 0 };
+	if (min > r || r > max) {
+		fprintf(stderr,
+			"%s:%d: %s %s => %llu%s out of range [%llu..%llu]%s.\n",
+			config_file, fline, name, s, r, unit, min, max, unit);
+		if (config_valid <= 1) {
+			config_valid = 0;
+			return;
+		}
+	}
+	if (DEBUG_RANGE_CHECK) {
+		fprintf(stderr,
+			"%s:%d: %s %s => %llu%s in range [%llu..%llu]%s.\n",
+			config_file, fline, name, s, r, unit, min, max, unit);
+	}
+}
+
+void range_check(const enum range_checks what, const char *name,
+		 const char *value)
+{
+	switch (what) {
+	case R_NO_CHECK:
+		break;
+	default:
+		fprintf(stderr, "%s:%d: unknown range for %s => %s\n",
+			config_file, fline, name, value);
+		break;
+	case R_MINOR_COUNT:
+		m_strtoll_range(value, 1, name,
+				DRBD_MINOR_COUNT_MIN, DRBD_MINOR_COUNT_MAX);
+		break;
+	case R_DIALOG_REFRESH:
+		m_strtoll_range(value, 1, name,
+				DRBD_DIALOG_REFRESH_MIN,
+				DRBD_DIALOG_REFRESH_MAX);
+		break;
+	case R_DISK_SIZE:
+		m_strtoll_range(value, 's', name,
+				DRBD_DISK_SIZE_SECT_MIN,
+				DRBD_DISK_SIZE_SECT_MAX);
+		break;
+	case R_TIMEOUT:
+		m_strtoll_range(value, 1, name, DRBD_TIMEOUT_MIN,
+				DRBD_TIMEOUT_MAX);
+		break;
+	case R_CONNECT_INT:
+		m_strtoll_range(value, 1, name, DRBD_CONNECT_INT_MIN,
+				DRBD_CONNECT_INT_MAX);
+		break;
+	case R_PING_INT:
+		m_strtoll_range(value, 1, name, DRBD_PING_INT_MIN,
+				DRBD_PING_INT_MAX);
+		break;
+	case R_MAX_BUFFERS:
+		m_strtoll_range(value, 1, name, DRBD_MAX_BUFFERS_MIN,
+				DRBD_MAX_BUFFERS_MAX);
+		break;
+	case R_MAX_EPOCH_SIZE:
+		m_strtoll_range(value, 1, name, DRBD_MAX_EPOCH_SIZE_MIN,
+				DRBD_MAX_EPOCH_SIZE_MAX);
+		break;
+	case R_SNDBUF_SIZE:
+		m_strtoll_range(value, 1, name, DRBD_SNDBUF_SIZE_MIN,
+				DRBD_SNDBUF_SIZE_MAX);
+		break;
+	case R_RCVBUF_SIZE:
+		m_strtoll_range(value, 1, name, DRBD_RCVBUF_SIZE_MIN,
+				DRBD_RCVBUF_SIZE_MAX);
+		break;
+	case R_KO_COUNT:
+		m_strtoll_range(value, 1, name, DRBD_KO_COUNT_MIN,
+				DRBD_KO_COUNT_MAX);
+		break;
+	case R_RATE:
+		m_strtoll_range(value, 'K', name, DRBD_RATE_MIN, DRBD_RATE_MAX);
+		break;
+	case R_AL_EXTENTS:
+		m_strtoll_range(value, 1, name, DRBD_AL_EXTENTS_MIN,
+				DRBD_AL_EXTENTS_MAX);
+		break;
+	case R_PORT:
+		m_strtoll_range(value, 1, name, DRBD_PORT_MIN, DRBD_PORT_MAX);
+		break;
+		/* FIXME not yet implemented!
+		   case R_META_IDX:
+		   m_strtoll_range(value, 1, name, DRBD_META_IDX_MIN, DRBD_META_IDX_MAX);
+		   break;
+		 */
+	case R_WFC_TIMEOUT:
+		m_strtoll_range(value, 1, name, DRBD_WFC_TIMEOUT_MIN,
+				DRBD_WFC_TIMEOUT_MAX);
+		break;
+	case R_DEGR_WFC_TIMEOUT:
+		m_strtoll_range(value, 1, name, DRBD_DEGR_WFC_TIMEOUT_MIN,
+				DRBD_DEGR_WFC_TIMEOUT_MAX);
+		break;
+	case R_OUTDATED_WFC_TIMEOUT:
+		m_strtoll_range(value, 1, name, DRBD_OUTDATED_WFC_TIMEOUT_MIN,
+				DRBD_OUTDATED_WFC_TIMEOUT_MAX);
+		break;
+
+	case R_C_PLAN_AHEAD:
+		m_strtoll_range(value, 1, name, DRBD_C_PLAN_AHEAD_MIN,
+				DRBD_C_PLAN_AHEAD_MAX);
+		break;
+
+	case R_C_DELAY_TARGET:
+		m_strtoll_range(value, 1, name, DRBD_C_DELAY_TARGET_MIN,
+				DRBD_C_DELAY_TARGET_MAX);
+		break;
+
+	case R_C_FILL_TARGET:
+		m_strtoll_range(value, 's', name, DRBD_C_FILL_TARGET_MIN,
+				DRBD_C_FILL_TARGET_MAX);
+		break;
+
+	case R_C_MAX_RATE:
+		m_strtoll_range(value, 'k', name, DRBD_C_MAX_RATE_MIN,
+				DRBD_C_MAX_RATE_MAX);
+		break;
+
+	case R_C_MIN_RATE:
+		m_strtoll_range(value, 'k', name, DRBD_C_MIN_RATE_MIN,
+				DRBD_C_MIN_RATE_MAX);
+		break;
+
+	case R_CONG_FILL:
+		m_strtoll_range(value, 's', name, DRBD_CONG_FILL_MIN,
+				DRBD_CONG_FILL_MAX);
+		break;
+
+	case R_CONG_EXTENTS:
+		m_strtoll_range(value, 1, name, DRBD_CONG_EXTENTS_MIN,
+				DRBD_CONG_EXTENTS_MAX);
+		break;
+
+	}
+}
+
+struct d_option *new_opt(char *name, char *value)
+{
+	struct d_option *cn = malloc(sizeof(struct d_option));
+
+	/* fprintf(stderr,"%s:%d: %s = %s\n",config_file,line,name,value); */
+	cn->name = name;
+	cn->value = value;
+	cn->mentioned = 0;
+	cn->is_default = 0;
+	cn->is_escaped = 0;
+
+	return cn;
+}
+static void derror(struct d_host_info *host, struct d_resource *res, char *text)
+{
+	config_valid = 0;
+	fprintf(stderr, "%s:%d: in resource %s, on %s { ... }:"
+		" '%s' keyword missing.\n",
+		config_file, c_section_start, res->name, names_to_str(host->on_hosts), text);
+}
+
+void pdperror(char *text)
+{
+	config_valid = 0;
+	fprintf(stderr, "%s:%d: in proxy plugin section: %s.\n",
+		config_file, line, text);
+	exit(E_config_invalid);
+}
+
+static void pperror(struct d_host_info *host, struct d_proxy_info *proxy, char *text)
+{
+	config_valid = 0;
+	fprintf(stderr, "%s:%d: in section: on %s { proxy on %s { ... } }:"
+		" '%s' keyword missing.\n",
+		config_file, c_section_start, names_to_str(host->on_hosts),
+		names_to_str(proxy->on_hosts), text);
+}
+
+#define typecheck(type,x) \
+({	type __dummy; \
+	typeof(x) __dummy2; \
+	(void)(&__dummy == &__dummy2); \
+	1; \
+})
+
+#define for_each_host(h_,hosts_) \
+	for ( ({ typecheck(struct d_name*, h_); \
+		h_ = hosts_; }); \
+	 	h_; h_ = h_->next)
+
+/*
+ * for check_uniq: check uniqueness of
+ * resource names, ip:port, node:disk and node:device combinations
+ * as well as resource:section ...
+ * hash table to test for uniqueness of these values...
+ *  256  (max minors)
+ *  *(
+ *       2 (host sections) * 4 (res ip:port node:disk node:device)
+ *     + 4 (other sections)
+ *     + some more,
+ *       if we want to check for scoped uniqueness of *every* option
+ *   )
+ *     since nobody (?) will actually use more than a dozen minors,
+ *     this should be more than enough.
+ */
+struct hsearch_data global_htable;
+void check_uniq_init(void)
+{
+	memset(&global_htable, 0, sizeof(global_htable));
+	if (!hcreate_r(256 * ((2 * 4) + 4), &global_htable)) {
+		fprintf(stderr, "Insufficient memory.\n");
+		exit(E_exec_error);
+	};
+}
+
+/* some settings need only be unique within one resource definition.
+ * we need currently about 8 + (number of host) * 8 entries,
+ * 200 should be much more than enough. */
+struct hsearch_data per_resource_htable;
+void check_upr_init(void)
+{
+	static int created = 0;
+	if (config_valid >= 2)
+		return;
+	if (created)
+		hdestroy_r(&per_resource_htable);
+	memset(&per_resource_htable, 0, sizeof(per_resource_htable));
+	if (!hcreate_r(256, &per_resource_htable)) {
+		fprintf(stderr, "Insufficient memory.\n");
+		exit(E_exec_error);
+	};
+	created = 1;
+}
+
+/* FIXME
+ * strictly speaking we don't need to check for uniqueness of disk and device names,
+ * but for uniqueness of their major:minor numbers ;-)
+ */
+int vcheck_uniq(struct hsearch_data *ht, const char *what, const char *fmt, va_list ap)
+{
+	int rv;
+	ENTRY e, *ep;
+	e.key = e.data = ep = NULL;
+
+	/* if we are done parsing the config file,
+	 * switch off this paranoia */
+	if (config_valid >= 2)
+		return 1;
+
+	rv = vasprintf(&e.key, fmt, ap);
+
+	if (rv < 0) {
+		perror("vasprintf");
+		exit(E_thinko);
+	}
+
+	if (EXIT_ON_CONFLICT && !what) {
+		fprintf(stderr, "Oops, unset argument in %s:%d.\n", __FILE__,
+			__LINE__);
+		exit(E_thinko);
+	}
+	m_asprintf((char **)&e.data, "%s:%u", config_file, fline);
+	hsearch_r(e, FIND, &ep, ht);
+	//fprintf(stderr, "FIND %s: %p\n", e.key, ep);
+	if (ep) {
+		if (what) {
+			fprintf(stderr,
+				"%s: conflicting use of %s '%s' ...\n"
+				"%s: %s '%s' first used here.\n",
+				(char *)e.data,  what, ep->key,
+				(char *)ep->data, what, ep->key);
+		}
+		free(e.key);
+		free(e.data);
+		config_valid = 0;
+	} else {
+		//fprintf(stderr, "ENTER %s\t=>\t%s\n", e.key, (char *)e.data);
+		hsearch_r(e, ENTER, &ep, ht);
+		if (!ep) {
+			fprintf(stderr, "hash table entry (%s => %s) failed\n",
+					e.key, (char *)e.data);
+			exit(E_thinko);
+		}
+		ep = NULL;
+	}
+	if (EXIT_ON_CONFLICT && ep)
+		exit(E_config_invalid);
+	return !ep;
+}
+
+int check_uniq(const char *what, const char *fmt, ...)
+{
+	int rv;
+	va_list ap;
+
+	va_start(ap, fmt);
+	rv = vcheck_uniq(&global_htable, what, fmt, ap);
+	va_end(ap);
+
+	return rv;
+}
+
+/* unique per resource */
+int check_upr(const char *what, const char *fmt, ...)
+{
+	int rv;
+	va_list ap;
+
+	va_start(ap, fmt);
+	rv = vcheck_uniq(&per_resource_htable, what, fmt, ap);
+	va_end(ap);
+
+	return rv;
+}
+
+void check_meta_disk(struct d_host_info *host)
+{
+	struct d_name *h;
+	if (strcmp(host->meta_disk, "internal") != 0) {
+		/* external */
+		if (host->meta_index == NULL) {
+			fprintf(stderr,
+				"%s:%d: expected 'meta-disk = %s [index]'.\n",
+				config_file, fline, host->meta_disk);
+		}
+		/* index either some number, or "flexible" */
+		for_each_host(h, host->on_hosts)
+			check_uniq("meta-disk", "%s:%s[%s]", h->name, host->meta_disk, host->meta_index);
+	} else if (host->meta_index) {
+		/* internal */
+		if (strcmp(host->meta_index, "flexible") != 0) {
+			/* internal, not flexible, but index given: no sir! */
+			fprintf(stderr,
+				"%s:%d: no index allowed with 'meta-disk = internal'.\n",
+				config_file, fline);
+		}		/* else internal, flexible: fine */
+	} else {
+		/* internal, not flexible */
+		host->meta_index = strdup("internal");
+	}
+}
+
+static void pe_expected(const char *exp)
+{
+	const char *s = yytext;
+	fprintf(stderr, "%s:%u: Parse error: '%s' expected,\n\t"
+		"but got '%.20s%s'\n", config_file, line, exp, s,
+		strlen(s) > 20 ? "..." : "");
+	exit(E_config_invalid);
+}
+
+static void check_string_error(int got)
+{
+	const char *msg;
+	switch(got) {
+	case TK_ERR_STRING_TOO_LONG:
+		msg = "Token too long";
+		break;
+	case TK_ERR_DQSTRING_TOO_LONG:
+		msg = "Double quoted string too long";
+		break;
+	case TK_ERR_DQSTRING:
+		msg = "Unterminated double quoted string\n  we don't allow embedded newlines\n ";
+		break;
+	default:
+		return;
+	}
+	fprintf(stderr,"%s:%u: %s >>>%.20s...<<<\n", config_file, line, msg, yytext);
+	exit(E_config_invalid);
+}
+
+static void pe_expected_got(const char *exp, int got)
+{
+	static char tmp[2] = "\0";
+	const char *s = yytext;
+	if (exp[0] == '\'' && exp[1] && exp[2] == '\'' && exp[3] == 0) {
+		tmp[0] = exp[1];
+	}
+	fprintf(stderr, "%s:%u: Parse error: '%s' expected,\n\t"
+		"but got '%.20s%s' (TK %d)\n",
+		config_file, line,
+		tmp[0] ? tmp : exp, s, strlen(s) > 20 ? "..." : "", got);
+	exit(E_config_invalid);
+}
+
+#define EXP(TOKEN1)						\
+({								\
+	int token;						\
+	token = yylex();					\
+	if (token != TOKEN1) {					\
+		if (TOKEN1 == TK_STRING)			\
+			check_string_error(token);		\
+		pe_expected_got( #TOKEN1, token);		\
+	}							\
+	token;							\
+})
+
+static void expect_STRING_or_INT(void)
+{
+	int token = yylex();
+	switch(token) {
+	case TK_INTEGER:
+	case TK_STRING:
+		break;
+	case TK_ON:
+		yylval.txt = strdup(yytext);
+		break;
+	default:
+		check_string_error(token);
+		pe_expected_got("TK_STRING | TK_INTEGER", token);
+	}
+}
+
+static void parse_global(void)
+{
+	fline = line;
+	check_uniq("global section", "global");
+	if (config) {
+		fprintf(stderr,
+			"%s:%u: You should put the global {} section\n\t"
+			"in front of any resource {} section\n",
+			config_file, line);
+	}
+	EXP('{');
+	while (1) {
+		int token = yylex();
+		fline = line;
+		switch (token) {
+		case TK_DISABLE_IP_VERIFICATION:
+			global_options.disable_ip_verification = 1;
+			break;
+		case TK_MINOR_COUNT:
+			EXP(TK_INTEGER);
+			range_check(R_MINOR_COUNT, "minor-count", yylval.txt);
+			global_options.minor_count = atoi(yylval.txt);
+			break;
+		case TK_DIALOG_REFRESH:
+			EXP(TK_INTEGER);
+			range_check(R_DIALOG_REFRESH, "dialog-refresh",
+				    yylval.txt);
+			global_options.dialog_refresh = atoi(yylval.txt);
+			break;
+		case TK_USAGE_COUNT:
+			switch (yylex()) {
+			case TK_YES:
+				global_options.usage_count = UC_YES;
+				break;
+			case TK_NO:
+				global_options.usage_count = UC_NO;
+				break;
+			case TK_ASK:
+				global_options.usage_count = UC_ASK;
+				break;
+			default:
+				pe_expected("yes | no | ask");
+			}
+			break;
+		case '}':
+			return;
+		default:
+			pe_expected("dialog-refresh | minor-count | "
+				    "disable-ip-verification");
+		}
+		EXP(';');
+	}
+}
+
+static void check_and_change_deprecated_alias(char **name, int token_option)
+{
+	if (token_option == TK_HANDLER_OPTION) {
+		if (!strcmp(*name, "outdate-peer")) {
+			/* fprintf(stder, "config file:line: name is deprecated ...\n") */
+			free(*name);
+			*name = strdup("fence-peer");
+		}
+	}
+}
+
+static struct d_option *parse_options_d(int token_switch, int token_option,
+					int token_delegate, void (*delegate)(void*),
+					void *ctx)
+{
+	char *opt_name;
+	int token;
+	enum range_checks rc;
+
+	struct d_option *options = NULL, *ro = NULL;
+	c_section_start = line;
+	fline = line;
+
+	while (1) {
+		token = yylex();
+		fline = line;
+		if (token == token_switch) {
+			options = APPEND(options, new_opt(yylval.txt, NULL));
+		} else if (token == token_option) {
+			opt_name = yylval.txt;
+			check_and_change_deprecated_alias(&opt_name, token_option);
+			rc = yylval.rc;
+			expect_STRING_or_INT();
+			range_check(rc, opt_name, yylval.txt);
+			ro = new_opt(opt_name, yylval.txt);
+			options = APPEND(options, ro);
+		} else if (token == token_delegate) {
+			delegate(ctx);
+			continue;
+		} else if (token == TK_DEPRECATED_OPTION) {
+			/* fprintf(stderr, "Warn: Ignoring deprecated option '%s'\n", yylval.txt); */
+			expect_STRING_or_INT();
+		} else if (token == '}') {
+			return options;
+		} else {
+			pe_expected("an option keyword");
+		}
+		switch (yylex()) {
+		case TK__IS_DEFAULT:
+			ro->is_default = 1;
+			EXP(';');
+			break;
+		case ';':
+			break;
+		default:
+			pe_expected("_is_default | ;");
+		}
+	}
+}
+
+static struct d_option *parse_options(int token_switch, int token_option)
+{
+	return parse_options_d(token_switch, token_option, 0, NULL, NULL);
+}
+
+static void __parse_address(char** addr, char** port, char** af)
+{
+	switch(yylex()) {
+	case TK_SCI:   /* 'ssocks' was names 'sci' before. */
+		if (af)
+			*af = strdup("ssocks");
+		EXP(TK_IPADDR);
+		break;
+	case TK_SSOCKS:
+	case TK_SDP:
+	case TK_IPV4:
+		if (af)
+			*af = yylval.txt;
+		EXP(TK_IPADDR);
+		break;
+	case TK_IPV6:
+		if (af)
+			*af = yylval.txt;
+		EXP('[');
+		EXP(TK_IPADDR6);
+		break;
+	case TK_IPADDR:
+		if (af)
+			*af = strdup("ipv4");
+		break;
+	/* case '[': // Do not foster people's laziness ;)
+		EXP(TK_IPADDR6);
+		*af = strdup("ipv6");
+		break; */
+	default:
+		pe_expected("ssocks | sdp | ipv4 | ipv6 | <ipv4 address> ");
+	}
+
+	if (addr)
+		*addr = yylval.txt;
+	if (af && !strcmp(*af, "ipv6"))
+		EXP(']');
+	EXP(':');
+	EXP(TK_INTEGER);
+	if (port)
+		*port = yylval.txt;
+	range_check(R_PORT, "port", yylval.txt);
+}
+
+static void parse_address(struct d_name *on_hosts, char** addr, char** port, char** af)
+{
+	struct d_name *h;
+	__parse_address(addr, port, af);
+	if (!strcmp(*addr, "127.0.0.1") || !strcmp(*addr, "::1"))
+		for_each_host(h, on_hosts)
+			check_uniq("IP", "%s:%s:%s", h->name, *addr, *port);
+	else
+		check_uniq("IP", "%s:%s", *addr, *port);
+	EXP(';');
+}
+
+static void parse_hosts(struct d_name **pnp, char delimeter)
+{
+	char errstr[20];
+	struct d_name *name;
+	int hosts = 0;
+	int token;
+
+	while (1) {
+		token = yylex();
+		switch (token) {
+		case TK_STRING:
+			name = malloc(sizeof(struct d_name));
+			name->name = yylval.txt;
+			name->next = NULL;
+			*pnp = name;
+			pnp = &name->next;
+			hosts++;
+			break;
+		default:
+			if (token == delimeter) {
+				if (!hosts)
+					pe_expected_got("TK_STRING", token);
+				return;
+			} else {
+				sprintf(errstr, "TK_STRING | '%c'", delimeter);
+				pe_expected_got(errstr, token);
+			}
+		}
+	}
+}
+
+static void parse_proxy_section(struct d_host_info *host)
+{
+	struct d_proxy_info *proxy;
+
+	proxy=calloc(1,sizeof(struct d_proxy_info));
+	host->proxy = proxy;
+
+	EXP(TK_ON);
+	parse_hosts(&proxy->on_hosts, '{');
+	while (1) {
+		switch (yylex()) {
+		case TK_INSIDE:
+			parse_address(proxy->on_hosts, &proxy->inside_addr, &proxy->inside_port, &proxy->inside_af);
+			break;
+		case TK_OUTSIDE:
+			parse_address(proxy->on_hosts, &proxy->outside_addr, &proxy->outside_port, &proxy->outside_af);
+			break;
+		case '}':
+			goto break_loop;
+		default:
+			pe_expected("inside | outside");
+
+		}
+	}
+
+ break_loop:
+	if (!proxy->inside_addr)
+		pperror(host, proxy, "inside");
+
+	if (!proxy->outside_addr)
+		pperror(host, proxy, "outside");
+
+	return;
+}
+
+static void parse_meta_disk(char **disk, char** index)
+{
+	EXP(TK_STRING);
+	*disk = yylval.txt;
+	if (strcmp("internal", yylval.txt)) {
+		EXP('[');
+		EXP(TK_INTEGER);
+		*index = yylval.txt;
+		EXP(']');
+		EXP(';');
+	} else {
+		EXP(';');
+	}
+}
+
+static void check_minor_nonsense(const char *devname, const int explicit_minor)
+{
+	if (!devname)
+		return;
+
+	/* if devname is set, it starts with /dev/drbd */
+	if (only_digits(devname + 9)) {
+		int m = strtol(devname + 9, NULL, 10);
+		if (m == explicit_minor)
+			return;
+
+		fprintf(stderr,
+			"%s:%d: explicit minor number must match with device name\n"
+			"\tTry \"device /dev/drbd%u minor %u;\",\n"
+			"\tor leave off either device name or explicit minor.\n"
+			"\tArbitrary device names must start with /dev/drbd_\n"
+			"\tmind the '_'! (/dev/ is optional, but drbd_ is required)\n",
+			config_file, fline, explicit_minor, explicit_minor);
+		config_valid = 0;
+		return;
+	} else if (devname[9] == '_')
+		return;
+
+	fprintf(stderr,
+		"%s:%d: arbitrary device name must start with /dev/drbd_\n"
+		"\tmind the '_'! (/dev/ is optional, but drbd_ is required)\n",
+		config_file, fline);
+	config_valid = 0;
+	return;
+}
+
+static void parse_device(struct d_name* on_hosts, unsigned *minor, char **device)
+{
+	struct d_name *h;
+	int m;
+
+	switch (yylex()) {
+	case TK_STRING:
+		if (!strncmp("drbd", yylval.txt, 4)) {
+			m_asprintf(device, "/dev/%s", yylval.txt);
+			free(yylval.txt);
+		} else
+			*device = yylval.txt;
+
+		if (strncmp("/dev/drbd", *device, 9)) {
+			fprintf(stderr,
+				"%s:%d: device name must start with /dev/drbd\n"
+				"\t(/dev/ is optional, but drbd is required)\n",
+				config_file, fline);
+			config_valid = 0;
+			/* no goto out yet,
+			 * as that would additionally throw a parse error */
+		}
+		switch (yylex()) {
+		default:
+			pe_expected("minor | ;");
+			/* fall through */
+		case ';':
+			m = dt_minor_of_dev(*device);
+			if (m < 0) {
+				fprintf(stderr,
+					"%s:%d: no minor given nor device name contains a minor number\n",
+					config_file, fline);
+				config_valid = 0;
+			}
+			*minor = m;
+			goto out;
+		case TK_MINOR:
+			; /* double fall through */
+		}
+	case TK_MINOR:
+		EXP(TK_INTEGER);
+		*minor = atoi(yylval.txt);
+		EXP(';');
+
+		/* if both device name and minor number are explicitly given,
+		 * force /dev/drbd<minor-number> or /dev/drbd_<arbitrary> */
+		check_minor_nonsense(*device, *minor);
+	}
+out:
+	for_each_host(h, on_hosts) {
+		check_uniq("device-minor", "device-minor:%s:%u", h->name, *minor);
+		check_uniq("device", "device:%s:%s", h->name, *device);
+	}
+}
+
+enum parse_host_section_flags {
+	REQUIRE_ALL = 1,
+	BY_ADDRESS  = 2,
+};
+
+static void parse_host_section(struct d_resource *res,
+			       struct d_name* on_hosts,
+			       enum parse_host_section_flags flags)
+{
+	struct d_host_info *host;
+	struct d_name *h;
+	int in_braces = 1;
+
+	c_section_start = line;
+	fline = line;
+
+	host=calloc(1,sizeof(struct d_host_info));
+	host->on_hosts = on_hosts;
+	host->config_line = c_section_start;
+	host->device_minor = -1;
+
+	if (flags & BY_ADDRESS) {
+		/* floating <address> {} */
+		char *fake_uname = NULL;
+		int token;
+
+		host->by_address = 1;
+		__parse_address(&host->address, &host->port, &host->address_family);
+		check_uniq("IP", "%s:%s", host->address, host->port);
+		if (!strcmp(host->address_family, "ipv6"))
+			m_asprintf(&fake_uname, "ipv6 [%s]:%s", host->address, host->port);
+		else
+			m_asprintf(&fake_uname, "%s:%s", host->address, host->port);
+		on_hosts = names_from_str(fake_uname);
+		host->on_hosts = on_hosts;
+
+		token = yylex();
+		switch(token) {
+		case '{':
+			break;
+		case ';':
+			in_braces = 0;
+			break;
+		default:
+			pe_expected_got("{ | ;", token);
+		}
+	}
+
+	for_each_host(h, on_hosts)
+		check_upr("host section", "%s: on %s", res->name, h->name);
+	res->all_hosts = APPEND(res->all_hosts, host);
+
+	while (in_braces) {
+		int token = yylex();
+		fline = line;
+		switch (token) {
+		case TK_DISK:
+			for_each_host(h, on_hosts)
+				check_upr("disk statement", "%s:%s:disk", res->name, h->name);
+			EXP(TK_STRING);
+			host->disk = yylval.txt;
+			for_each_host(h, on_hosts)
+				check_uniq("disk", "disk:%s:%s", h->name, yylval.txt);
+			EXP(';');
+			break;
+		case TK_DEVICE:
+			for_each_host(h, on_hosts)
+				check_upr("device statement", "%s:%s:device", res->name, h->name);
+			parse_device(on_hosts, &host->device_minor, &host->device);
+			break;
+		case TK_ADDRESS:
+			if (host->by_address) {
+				fprintf(stderr,
+					"%s:%d: address statement not allowed for floating {} host sections\n",
+					config_file, fline);
+				config_valid = 0;
+				exit(E_config_invalid);
+			}
+			for_each_host(h, on_hosts)
+				check_upr("address statement", "%s:%s:address", res->name, h->name);
+			parse_address(on_hosts, &host->address, &host->port, &host->address_family);
+			range_check(R_PORT, "port", host->port);
+			break;
+		case TK_META_DISK:
+			for_each_host(h, on_hosts)
+				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
+			parse_meta_disk(&host->meta_disk, &host->meta_index);
+			check_meta_disk(host);
+			break;
+		case TK_FLEX_META_DISK:
+			for_each_host(h, on_hosts)
+				check_upr("meta-disk statement", "%s:%s:meta-disk", res->name, h->name);
+			EXP(TK_STRING);
+			host->meta_disk = yylval.txt;
+			if (strcmp("internal", yylval.txt)) {
+				host->meta_index = strdup("flexible");
+			}
+			check_meta_disk(host);
+			EXP(';');
+			break;
+		case TK_PROXY:
+			parse_proxy_section(host);
+			break;
+		case '}':
+			in_braces = 0;
+			break;
+		default:
+			pe_expected("disk | device | address | meta-disk "
+				    "| flexible-meta-disk");
+		}
+	}
+
+	/* Inherit device, disk, meta_disk and meta_index from the resource. */
+	if(!host->disk && res->disk) {
+		host->disk = strdup(res->disk);
+		for_each_host(h, on_hosts)
+			check_uniq("disk", "disk:%s:%s", h->name, host->disk);
+	}
+
+	if(!host->device && res->device) {
+		host->device = strdup(res->device);
+	}
+
+	if (host->device_minor == -1U && res->device_minor != -1U) {
+		host->device_minor = res->device_minor;
+		for_each_host(h, on_hosts)
+			check_uniq("device-minor", "device-minor:%s:%d", h->name, host->device_minor);
+	}
+
+	if(!host->meta_disk && res->meta_disk) {
+		host->meta_disk = strdup(res->meta_disk);
+		if(res->meta_index) host->meta_index = strdup(res->meta_index);
+		check_meta_disk(host);
+	}
+
+	if (!(flags & REQUIRE_ALL))
+		return;
+	if (!host->device && host->device_minor == -1U)
+		derror(host, res, "device");
+	if (!host->disk)
+		derror(host, res, "disk");
+	if (!host->address)
+		derror(host, res, "address");
+	if (!host->meta_disk)
+		derror(host, res, "meta-disk");
+}
+
+void parse_skip()
+{
+	int level;
+	int token;
+	fline = line;
+
+	token = yylex();
+	switch (token) {
+	case TK_STRING:
+		EXP('{');
+		break;
+	case '{':
+		break;
+	default:
+		check_string_error(token);
+		pe_expected("[ some_text ] {");
+	}
+
+	level = 1;
+	while (level) {
+		switch (yylex()) {
+		case '{':
+			/* if you really want to,
+			   you can wrap this with a GB size config file :) */
+			level++;
+			break;
+		case '}':
+			level--;
+			break;
+		case 0:
+			fprintf(stderr, "%s:%u: reached eof "
+				"while parsing this skip block.\n",
+				config_file, fline);
+			exit(E_config_invalid);
+		}
+	}
+	while (level) ;
+}
+
+static void parse_stacked_section(struct d_resource* res)
+{
+	struct d_host_info *host;
+	struct d_name *h;
+
+	c_section_start = line;
+	fline = line;
+
+	host=calloc(1,sizeof(struct d_host_info));
+	host->device_minor = -1;
+	res->all_hosts = APPEND(res->all_hosts, host);
+	EXP(TK_STRING);
+	check_uniq("stacked-on-top-of", "stacked:%s", yylval.txt);
+	host->lower_name = yylval.txt;
+
+	m_asprintf(&host->meta_disk, "%s", "internal");
+	m_asprintf(&host->meta_index, "%s", "internal");
+
+	EXP('{');
+	while (1) {
+		switch(yylex()) {
+		case TK_DEVICE:
+			for_each_host(h, host->on_hosts)
+				check_upr("device statement", "%s:%s:device", res->name, h->name);
+			parse_device(host->on_hosts, &host->device_minor, &host->device);
+			break;
+		case TK_ADDRESS:
+			for_each_host(h, host->on_hosts)
+				check_upr("address statement", "%s:%s:address", res->name, h->name);
+			parse_address(NULL, &host->address, &host->port, &host->address_family);
+			range_check(R_PORT, "port", yylval.txt);
+			break;
+		case TK_PROXY:
+			parse_proxy_section(host);
+			break;
+		case '}':
+			goto break_loop;
+		default:
+			pe_expected("device | address | proxy");
+		}
+	}
+ break_loop:
+
+	res->stacked_on_one = 1;
+
+	/* inherit device */
+	if (!host->device && res->device) {
+		host->device = strdup(res->device);
+		for_each_host(h, host->on_hosts)
+			check_uniq("device", "device:%s:%s", h->name, host->device);
+	}
+
+	if (host->device_minor == -1U && res->device_minor != -1U) {
+		host->device_minor = res->device_minor;
+		for_each_host(h, host->on_hosts)
+			check_uniq("device-minor", "device-minor:%s:%d", h->name, host->device_minor);
+	}
+
+	if (!host->device && host->device_minor == -1U)
+		derror(host, res, "device");
+	if (!host->address)
+		derror(host,res,"address");
+	if (!host->meta_disk)
+		derror(host,res,"meta-disk");
+}
+
+void startup_delegate(void *ctx)
+{
+	struct d_resource *res = (struct d_resource *)ctx;
+
+	if (!strcmp(yytext, "become-primary-on")) {
+		parse_hosts(&res->become_primary_on, ';');
+	} else if (!strcmp(yytext, "stacked-timeouts")) {
+		res->stacked_timeouts = 1;
+		EXP(';');
+	} else
+		pe_expected("<an option keyword> | become-primary-on | stacked-timeouts");
+}
+
+void net_delegate(void *ctx)
+{
+	enum pr_flags flags = (enum pr_flags)ctx;
+
+	if (!strcmp(yytext, "discard-my-data") && flags & IgnDiscardMyData)
+		EXP(';');
+	else
+		pe_expected("an option keyword");
+}
+
+void set_me_in_resource(struct d_resource* res, int match_on_proxy)
+{
+	struct d_host_info *host;
+
+	/* Determine the local host section */
+	for (host = res->all_hosts; host; host=host->next) {
+		/* do we match  this host? */
+		if (match_on_proxy) {
+		       if (!host->proxy || !name_in_names(nodeinfo.nodename, host->proxy->on_hosts))
+			       continue;
+		} else if (host->by_address) {
+			if (!have_ip(host->address_family, host->address) &&
+				/* for debugging only, e.g. __DRBD_NODE__=10.0.0.1 */
+			    strcmp(nodeinfo.nodename, host->address))
+				continue;
+		} else if (host->lower) {
+			if (!host->lower->me)
+				continue;
+		} else if (!host->on_hosts) {
+			/* huh? a resource without hosts to run on?! */
+			continue;
+		} else {
+			if (!name_in_names(nodeinfo.nodename, host->on_hosts) &&
+			    strcmp("_this_host", host->on_hosts->name))
+				continue;
+		}
+		/* we matched. */
+		if (res->ignore) {
+			config_valid = 0;
+			fprintf(stderr,
+				"%s:%d: in resource %s, %s %s { ... }:\n"
+				"\tYou cannot ignore and define at the same time.\n",
+				res->config_file, host->config_line, res->name,
+				host->lower ? "stacked-on-top-of" : "on",
+				host->lower ? host->lower->name : names_to_str(host->on_hosts));
+		}
+		if (res->me) {
+			config_valid = 0;
+			fprintf(stderr,
+				"%s:%d: in resource %s, %s %s { ... } ... %s %s { ... }:\n"
+				"\tThere are multiple host sections for this node.\n",
+				res->config_file, host->config_line, res->name,
+				res->me->lower ? "stacked-on-top-of" : "on",
+				res->me->lower ? res->me->lower->name : names_to_str(res->me->on_hosts),
+				host->lower ? "stacked-on-top-of" : "on",
+				host->lower ? host->lower->name : names_to_str(host->on_hosts));
+		}
+		res->me = host;
+		if (host->lower)
+			res->stacked = 1;
+	}
+
+	/* If there is no me, implicitly ignore that resource */
+	if (!res->me) {
+		res->ignore = 1;
+		return;
+	}
+}
+
+void set_peer_in_resource(struct d_resource* res, int peer_required)
+{
+	struct d_host_info *host = NULL;
+
+	if (res->ignore)
+		return;
+
+	/* me must be already set */
+	if (!res->me) {
+		/* should have been implicitly ignored. */
+		fprintf(stderr, "%s:%d: in resource %s:\n"
+				"\tcannot determine the peer, don't even know myself!\n",
+				res->config_file, res->start_line, res->name);
+		exit(E_thinko);
+	}
+
+	/* only one host section? */
+	if (!res->all_hosts->next) {
+		if (peer_required) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n"
+				"\tMissing section 'on <PEER> { ... }'.\n",
+				res->config_file, res->start_line, res->name);
+			config_valid = 0;
+		}
+		return;
+	}
+
+	/* short cut for exactly two host sections.
+	 * silently ignore any --peer connect_to_host option. */
+	if (res->all_hosts->next->next == NULL) {
+		res->peer = res->all_hosts == res->me ?
+			res->all_hosts->next : res->all_hosts;
+		if (dry_run > 1 && connect_to_host)
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n"
+				"\tIgnoring --peer '%s': there are only two host sections.\n",
+				res->config_file, res->start_line, res->name, connect_to_host);
+		return;
+	}
+
+	/* Multiple peer hosts to choose from.
+	 * we need some help! */
+	if (!connect_to_host) {
+		if (peer_required) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n"
+				"\tThere are multiple host sections for the peer node.\n"
+				"\tUse the --peer option to select which peer section to use.\n",
+				res->config_file, res->start_line, res->name);
+			config_valid = 0;
+		}
+		return;
+	}
+
+	for (host = res->all_hosts; host; host=host->next) {
+		if (host->by_address && strcmp(connect_to_host, host->address))
+			continue;
+		if (host->proxy && !name_in_names(nodeinfo.nodename, host->proxy->on_hosts))
+			continue;
+		if (!name_in_names(connect_to_host, host->on_hosts))
+			continue;
+
+		if (host == res->me) {
+			fprintf(stderr,
+				"%s:%d: in resource %s\n"
+				"\tInvoked with --peer '%s', but that matches myself!\n",
+				res->config_file, res->start_line, res->name, connect_to_host);
+			res->peer = NULL;
+			break;
+		}
+
+		if (res->peer) {
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n"
+				"\tInvoked with --peer '%s', but that matches multiple times!\n",
+				res->config_file, res->start_line, res->name, connect_to_host);
+			res->peer = NULL;
+			break;
+		}
+		res->peer = host;
+	}
+
+	if (peer_required && !res->peer) {
+		config_valid = 0;
+		if (!host)
+			fprintf(stderr,
+				"%s:%d: in resource %s:\n"
+				"\tNo host ('on' or 'floating') section matches --peer '%s'\n",
+				res->config_file, res->start_line, res->name, connect_to_host);
+	}
+}
+
+void set_on_hosts_in_res(struct d_resource *res)
+{
+	struct d_resource *l_res, *tmp;
+	struct d_host_info *host, *host2;
+	struct d_name *h, **last;
+
+	for (host = res->all_hosts; host; host=host->next) {
+		if (host->lower_name) {
+			for_each_resource(l_res, tmp, config) {
+				if (!strcmp(l_res->name, host->lower_name))
+					break;
+			}
+
+			if (l_res == NULL) {
+				fprintf(stderr, "%s:%d: in resource %s, "
+					"referenced resource '%s' not defined.\n",
+					res->config_file, res->start_line, res->name,
+					host->lower_name);
+				config_valid = 0;
+				continue;
+			}
+
+			/* Simple: host->on_hosts = concat_names(l_res->me->on_hosts, l_res->peer->on_hosts); */
+			last = NULL;
+			for (host2 = l_res->all_hosts; host2; host2 = host2->next)
+				if (!host2->lower_name)
+					append_names(&host->on_hosts, &last, host2->on_hosts);
+
+			host->lower = l_res;
+
+			/* */
+			if (!strcmp(host->address, "127.0.0.1") || !strcmp(host->address, "::1"))
+				for_each_host(h, host->on_hosts)
+					check_uniq("IP", "%s:%s:%s", h->name, host->address, host->port);
+
+		}
+	}
+}
+
+void set_disk_in_res(struct d_resource *res)
+{
+	struct d_host_info *host;
+
+	if (res->ignore)
+		return;
+
+	for (host = res->all_hosts; host; host=host->next) {
+		if (host->lower) {
+			if (res->stacked && host->lower->stacked) {
+				fprintf(stderr,
+					"%s:%d: in resource %s, stacked-on-top-of %s { ... }:\n"
+					"\tFIXME. I won't stack stacked resources.\n",
+					res->config_file, res->start_line, res->name, host->lower_name);
+				config_valid = 0;
+			}
+
+			if (host->lower->ignore)
+				continue;
+
+			if (host->lower->me->device)
+				m_asprintf(&host->disk, "%s", host->lower->me->device);
+			else
+				m_asprintf(&host->disk, "/dev/drbd%u", host->lower->me->device_minor);
+
+			if (!host->disk)
+				derror(host,res,"disk");
+		}
+	}
+}
+
+void proxy_delegate(void *ctx)
+{
+	struct d_resource *res = (struct d_resource *)ctx;
+	int token;
+	struct d_option *options, *opt;
+	struct d_name *line, *word, **pnp;
+
+	opt = NULL;
+	token = yylex();
+	if (token != '{') {
+		fprintf(stderr,	"%s:%d: expected \"{\" after \"proxy\" keyword\n",
+				config_file, fline);
+		exit(E_config_invalid);
+	}
+
+	options = NULL;
+	while (1) {
+		pnp = &line;
+		while (1) {
+			token = yylex();
+			if (token == ';')
+				break;
+			if (token == '}') {
+				if (pnp == &line)
+					goto out;
+
+				fprintf(stderr,	"%s:%d: Missing \";\" before  \"}\"\n",
+					config_file, fline);
+				exit(E_config_invalid);
+			}
+
+			word = malloc(sizeof(struct d_name));
+			if (!word)
+				pdperror("out of memory.");
+			word->name = yylval.txt;
+			word->next = NULL;
+			*pnp = word;
+			pnp = &word->next;
+		}
+
+		opt = calloc(1, sizeof(struct d_option));
+		if (!opt)
+			pdperror("out of memory.");
+		opt->name = strdup(names_to_str(line));
+		options = APPEND(options, opt);
+		free_names(line);
+	}
+out:
+	res->proxy_plugins = options;
+}
+
+int parse_proxy_settings(struct d_resource *res, int flags)
+{
+	int token;
+
+	if (flags & PARSER_CHECK_PROXY_KEYWORD) {
+		token = yylex();
+		if (token != TK_PROXY) {
+			if (flags & PARSER_STOP_IF_INVALID) {
+				yyrestart(yyin); /* flushes flex's buffers */
+				return 1;
+			}
+
+			pe_expected_got("proxy", token);
+		}
+	}
+
+	EXP('{');
+
+	res->proxy_options =
+		parse_options_d(TK_PROXY_SWITCH,
+				TK_PROXY_OPTION,
+				TK_PROXY_DELEGATE,
+				proxy_delegate, res);
+
+	return 0;
+}
+
+struct d_resource* parse_resource(char* res_name, enum pr_flags flags)
+{
+	struct d_resource* res;
+	struct d_name *host_names;
+	int token;
+
+	check_upr_init();
+	check_uniq("resource section", res_name);
+
+	res=calloc(1,sizeof(struct d_resource));
+	res->name = res_name;
+	res->device_minor = -1;
+	res->config_file = config_file;
+	res->start_line = line;
+
+	while(1) {
+		token = yylex();
+		fline = line;
+		switch(token) {
+		case TK_PROTOCOL:
+			check_upr("protocol statement","%s: protocol",res->name);
+			EXP(TK_STRING);
+			res->protocol=yylval.txt;
+			EXP(';');
+			break;
+		case TK_ON:
+			parse_hosts(&host_names, '{');
+			parse_host_section(res, host_names, REQUIRE_ALL);
+			break;
+		case TK_STACKED:
+			parse_stacked_section(res);
+			break;
+		case TK_IGNORE:
+			if (res->me || res->peer) {
+				fprintf(stderr,
+					"%s:%d: in resource %s, "
+					"'ignore-on' statement must precede any real host section (on ... { ... }).\n",
+					config_file, line, res->name);
+				exit(E_config_invalid);
+			}
+			EXP(TK_STRING);
+			fprintf(stderr, "%s:%d: in resource %s, "
+			       "WARN: The 'ignore-on' keyword is deprecated.\n",
+			       config_file, line, res->name);
+			EXP(';');
+			break;
+		case TK__THIS_HOST:
+			EXP('{');
+			host_names = names_from_str("_this_host");
+			parse_host_section(res, host_names, 0);
+			break;
+		case TK__REMOTE_HOST:
+			EXP('{');
+			host_names = names_from_str("_remote_host");
+			parse_host_section(res, host_names, 0);
+			break;
+		case TK_FLOATING:
+			parse_host_section(res, NULL, REQUIRE_ALL + BY_ADDRESS);
+			break;
+		case TK_DISK:
+			switch (token=yylex()) {
+			case TK_STRING:
+				res->disk = yylval.txt;
+				EXP(';');
+				break;
+			case '{':
+				check_upr("disk section", "%s:disk", res->name);
+				res->disk_options = parse_options(TK_DISK_SWITCH,
+								  TK_DISK_OPTION);
+				break;
+			default:
+				check_string_error(token);
+				pe_expected_got( "TK_STRING | {", token);
+			}
+			break;
+		case TK_NET:
+			check_upr("net section", "%s:net", res->name);
+			EXP('{');
+			res->net_options = parse_options_d(TK_NET_SWITCH,
+							   TK_NET_OPTION,
+							   TK_NET_DELEGATE,
+							   &net_delegate,
+							   (void *)flags);
+			break;
+		case TK_SYNCER:
+			check_upr("syncer section", "%s:syncer", res->name);
+			EXP('{');
+			res->sync_options = parse_options(TK_SYNCER_SWITCH,
+							  TK_SYNCER_OPTION);
+			break;
+		case TK_STARTUP:
+			check_upr("startup section", "%s:startup", res->name);
+			EXP('{');
+			res->startup_options=parse_options_d(TK_STARTUP_SWITCH,
+							     TK_STARTUP_OPTION,
+							     TK_STARTUP_DELEGATE,
+							     &startup_delegate,
+							     res);
+			break;
+		case TK_HANDLER:
+			check_upr("handlers section", "%s:handlers", res->name);
+			EXP('{');
+			res->handlers =  parse_options(0, TK_HANDLER_OPTION);
+			break;
+		case TK_PROXY:
+			check_upr("proxy section", "%s:proxy", res->name);
+			parse_proxy_settings(res, 0);
+			break;
+		case TK_DEVICE:
+			check_upr("device statement", "%s:device", res->name);
+			parse_device(NULL, &res->device_minor, &res->device);
+			break;
+		case TK_META_DISK:
+			parse_meta_disk(&res->meta_disk, &res->meta_index);
+			break;
+		case TK_FLEX_META_DISK:
+			EXP(TK_STRING);
+			res->meta_disk = yylval.txt;
+			if (strcmp("internal", yylval.txt)) {
+				res->meta_index = strdup("flexible");
+			}
+			EXP(';');
+			break;
+		case '}':
+		case 0:
+			goto exit_loop;
+		default:
+			pe_expected_got("protocol | on | disk | net | syncer |"
+					" startup | handlers |"
+					" ignore-on | stacked-on-top-of",token);
+		}
+	}
+
+ exit_loop:
+
+	if (flags == NoneHAllowed && res->all_hosts) {
+		config_valid = 0;
+
+		fprintf(stderr,
+			"%s:%d: in the %s section, there are no host sections"
+			" allowed.\n",
+			config_file, c_section_start, res->name);
+	}
+
+	return res;
+}
+
+void post_parse(struct d_resource *config, enum pp_flags flags)
+{
+	struct d_resource *res,*tmp;
+
+	for_each_resource(res, tmp, config)
+		if (res->stacked_on_one)
+			set_on_hosts_in_res(res); /* sets on_hosts and host->lower */
+
+	/* Needs "on_hosts" and host->lower already set */
+	for_each_resource(res, tmp, config)
+		if (!res->stacked_on_one)
+			set_me_in_resource(res, flags & match_on_proxy);
+
+	/* Needs host->lower->me already set */
+	for_each_resource(res, tmp, config)
+		if (res->stacked_on_one)
+			set_me_in_resource(res, flags & match_on_proxy);
+
+	// Needs "me" set already
+	for_each_resource(res, tmp, config)
+		if (res->stacked_on_one)
+			set_disk_in_res(res);
+}
+
+void include_file(FILE *f, char *name)
+{
+	int saved_line;
+	char *saved_config_file, *saved_config_save;
+
+	saved_line = line;
+	saved_config_file = config_file;
+	saved_config_save = config_save;
+	line = 1;
+	config_file = name;
+	config_save = canonify_path(name);
+
+	my_yypush_buffer_state(f);
+	my_parse();
+	yypop_buffer_state();
+
+	line = saved_line;
+	config_file = saved_config_file;
+	config_save = saved_config_save;
+}
+
+void include_stmt(char *str)
+{
+	char *last_slash, *tmp;
+	glob_t glob_buf;
+	int cwd_fd;
+	FILE *f;
+	size_t i;
+	int r;
+
+	/* in order to allow relative paths in include statements we change
+	   directory to the location of the current configuration file. */
+	cwd_fd = open(".", O_RDONLY);
+	if (cwd_fd < 0) {
+		fprintf(stderr, "open(\".\") failed: %m\n");
+		exit(E_usage);
+	}
+
+	tmp = strdupa(config_save);
+	last_slash = strrchr(tmp, '/');
+	if (last_slash)
+		*last_slash = 0;
+
+	if (chdir(tmp)) {
+		fprintf(stderr, "chdir(\"%s\") failed: %m\n", tmp);
+		exit(E_usage);
+	}
+
+	r = glob(str, 0, NULL, &glob_buf);
+	if (r == 0) {
+		for (i=0; i<glob_buf.gl_pathc; i++) {
+			f = fopen(glob_buf.gl_pathv[i], "r");
+			if (f) {
+				include_file(f, strdup(glob_buf.gl_pathv[i]));
+				fclose(f);
+			} else {
+				fprintf(stderr,
+					"%s:%d: Failed to open include file '%s'.\n",
+					config_file, line, yylval.txt);
+				config_valid = 0;
+			}
+		}
+		globfree(&glob_buf);
+	} else if (r == GLOB_NOMATCH) {
+		if (!strchr(str, '?') && !strchr(str, '*') && !strchr(str, '[')) {
+			fprintf(stderr,
+				"%s:%d: Failed to open include file '%s'.\n",
+				config_file, line, yylval.txt);
+			config_valid = 0;
+		}
+	} else {
+		fprintf(stderr, "glob() failed: %d\n", r);
+		exit(E_usage);
+	}
+
+	if (fchdir(cwd_fd) < 0) {
+		fprintf(stderr, "fchdir() failed: %m\n");
+		exit(E_usage);
+	}
+}
+
+void my_parse(void)
+{
+	static int global_htable_init = 0;
+	if (!global_htable_init) {
+		check_uniq_init();
+		global_htable_init = 1;
+	}
+
+	while (1) {
+		int token = yylex();
+		fline = line;
+		switch(token) {
+		case TK_GLOBAL:
+			parse_global();
+			break;
+		case TK_COMMON:
+			EXP('{');
+			common = parse_resource("common",NoneHAllowed);
+			break;
+		case TK_RESOURCE:
+			EXP(TK_STRING);
+			EXP('{');
+			config = APPEND(config, parse_resource(yylval.txt, 0));
+			break;
+		case TK_SKIP:
+			parse_skip();
+			break;
+		case TK_INCLUDE:
+			EXP(TK_STRING);
+			EXP(';');
+			include_stmt(yylval.txt);
+			break;
+		case 0:
+			return;
+		default:
+			pe_expected("global | common | resource | skip | include");
+		}
+	}
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_parser.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_parser.h
--- drbd8-8.3.7/user/legacy/drbdadm_parser.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_parser.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,140 @@
+/*
+   drbdadm_parser.h a hand crafted parser
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2006-2008, LINBIT Information Technologies GmbH
+   Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>
+   Copyright (C) 2006-2008, Lars Ellenberg  <lars.ellenberg@linbit.com>
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+
+enum range_checks
+{
+	R_NO_CHECK,
+	R_MINOR_COUNT,
+	R_DIALOG_REFRESH,
+	R_DISK_SIZE,
+	R_TIMEOUT,
+	R_CONNECT_INT,
+	R_PING_INT,
+	R_MAX_BUFFERS,
+	R_MAX_EPOCH_SIZE,
+	R_SNDBUF_SIZE,
+	R_RCVBUF_SIZE,
+	R_KO_COUNT,
+	R_RATE,
+	R_GROUP,
+	R_AL_EXTENTS,
+	R_PORT,
+	R_META_IDX,
+	R_WFC_TIMEOUT,
+	R_DEGR_WFC_TIMEOUT,
+	R_OUTDATED_WFC_TIMEOUT,
+	R_C_PLAN_AHEAD,
+	R_C_DELAY_TARGET,
+	R_C_FILL_TARGET,
+	R_C_MAX_RATE,
+	R_C_MIN_RATE,
+	R_CONG_FILL,
+	R_CONG_EXTENTS,
+};
+
+enum yytokentype {
+	TK_GLOBAL = 258,
+	TK_RESOURCE,
+	TK_ON,
+	TK_STACKED,
+	TK_IGNORE,
+	TK_NET,
+	TK_DISK,
+	TK_SKIP,
+	TK_SYNCER,
+	TK_STARTUP,
+	TK_DISABLE_IP_VERIFICATION,
+	TK_DIALOG_REFRESH,
+	TK_PROTOCOL,
+	TK_HANDLER,
+	TK_COMMON,
+	TK_ADDRESS,
+	TK_DEVICE,
+	TK_MINOR,
+	TK_META_DISK,
+	TK_FLEX_META_DISK,
+	TK_MINOR_COUNT,
+	TK_IPADDR,
+	TK_INTEGER,
+	TK_STRING,
+	TK_ELSE,
+	TK_DISK_SWITCH,
+	TK_DISK_OPTION,
+	TK_NET_SWITCH,
+	TK_NET_OPTION,
+	TK_SYNCER_SWITCH,
+	TK_SYNCER_OPTION,
+	TK_STARTUP_SWITCH,
+	TK_STARTUP_OPTION,
+	TK_STARTUP_DELEGATE,
+	TK_HANDLER_OPTION,
+	TK_USAGE_COUNT,
+	TK_ASK,
+	TK_YES,
+	TK_NO,
+	TK__IS_DEFAULT,
+	TK__THIS_HOST,
+	TK__REMOTE_HOST,
+	TK_PROXY,
+	TK_INSIDE,
+	TK_OUTSIDE,
+	TK_MEMLIMIT,
+	TK_PROXY_OPTION,
+	TK_PROXY_SWITCH,
+	TK_PROXY_DELEGATE,
+	TK_ERR_STRING_TOO_LONG,
+	TK_ERR_DQSTRING_TOO_LONG,
+	TK_ERR_DQSTRING,
+	TK_SCI,
+	TK_SDP,
+	TK_SSOCKS,
+	TK_IPV4,
+	TK_IPV6,
+	TK_IPADDR6,
+	TK_NET_DELEGATE,
+	TK_INCLUDE,
+	TK_FLOATING,
+	TK_DEPRECATED_OPTION,
+};
+
+typedef struct YYSTYPE {
+	char* txt;
+	enum range_checks rc;
+} YYSTYPE;
+
+#define yystype YYSTYPE /* obsolescent; will be withdrawn */
+#define YYSTYPE_IS_DECLARED 1
+#define YYSTYPE_IS_TRIVIAL 1
+
+extern yystype yylval;
+extern char* yytext;
+extern FILE* yyin;
+
+/* avoid compiler warnings about implicit declaration */
+int yylex(void);
+void my_yypush_buffer_state(FILE *f);
+void yypop_buffer_state (void );
+void yyrestart(FILE *input_file);
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_scanner.fl drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_scanner.fl
--- drbd8-8.3.7/user/legacy/drbdadm_scanner.fl	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_scanner.fl	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,223 @@
+%{
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "drbdadm_parser.h"
+#include "drbdadm.h"
+#include "drbdtool_common.h"
+
+void long_string(char* text);
+void long_dqstring(char* text);
+void err_dqstring(char* text);
+
+#if 0
+#define DP printf("'%s' ",yytext)
+#else
+#define DP
+#endif
+
+#define CP yylval.txt = strdup(yytext); yylval.rc = R_NO_CHECK
+#define RC(N) yylval.rc = R_ ## N
+
+#define YY_NO_INPUT 1
+#define YY_NO_UNPUT 1
+static void yyunput (int c, register char * yy_bp ) __attribute((unused));
+
+#ifndef YY_FLEX_SUBMINOR_VERSION
+#define MAX_INCLUDE_DEPTH 10
+YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH];
+int include_stack_ptr = 0;
+#endif
+
+%}
+
+%option noyywrap
+
+NUM		[0-9]{1,8}[MKGs]?
+SNUMB		[0-9]{1,3}
+IPV4ADDR	({SNUMB}"."){3}{SNUMB}
+HEX4            [0-9a-fA-F]{1,4}
+IPV6ADDR	((({HEX4}":"){0,5}{HEX4})?":"{HEX4}?":"({HEX4}(":"{HEX4}){0,5})?("%"{STRING})?)|("::"[fF]{4}":"{IPV4ADDR})
+WS		[ \t\r]
+OPCHAR		[{};\[\]:]
+DQSTRING	\"([^\"\\\n]|\\[^\n]){0,255}\"
+LONG_DQSTRING	\"([^\"\\\n]|\\[^\n]){255}.
+ERR_DQSTRING	\"([^\"\\\n]|\\[^\n]){0,255}[\\\n]
+STRING		[a-zA-Z0-9/._-]{1,80}
+LONG_STRING	[a-zA-Z0-9/._-]{81}
+
+%%
+
+\n			{ line++; 				}
+\#.*			/* ignore comments */
+{WS}			/* ignore whitespaces */
+{OPCHAR}		{ DP; return yytext[0];			}
+on			{ DP; return TK_ON;			}
+ignore-on		{ DP; return TK_IGNORE;			}
+stacked-on-top-of	{ DP; return TK_STACKED;		}
+floating		{ DP; return TK_FLOATING;		}
+no			{ DP; return TK_NO;			}
+net			{ DP; return TK_NET;			}
+yes			{ DP; return TK_YES;			}
+ask			{ DP; return TK_ASK;			}
+skip			{ DP; return TK_SKIP;			}
+disk			{ DP; return TK_DISK;			}
+proxy			{ DP; return TK_PROXY;			}
+minor			{ DP; return TK_MINOR;			}
+inside			{ DP; return TK_INSIDE;			}
+syncer			{ DP; return TK_SYNCER;			}
+device			{ DP; return TK_DEVICE;			}
+global			{ DP; return TK_GLOBAL;			}
+common			{ DP; return TK_COMMON;			}
+outside			{ DP; return TK_OUTSIDE;		}
+address			{ DP; return TK_ADDRESS;		}
+startup			{ DP; return TK_STARTUP;		}
+include			{ DP; return TK_INCLUDE;		}
+handlers		{ DP; return TK_HANDLER;		}
+protocol		{ DP; return TK_PROTOCOL;		}
+minor-count		{ DP; return TK_MINOR_COUNT;		}
+disable-ip-verification { DP; return TK_DISABLE_IP_VERIFICATION;}
+dialog-refresh		{ DP; return TK_DIALOG_REFRESH;		}
+resource		{ DP; return TK_RESOURCE;		}
+meta-disk		{ DP; return TK_META_DISK;		}
+flexible-meta-disk	{ DP; return TK_FLEX_META_DISK;		}
+usage-count 		{ DP; return TK_USAGE_COUNT;		}
+_is_default 		{ DP; return TK__IS_DEFAULT;		}
+_this_host 		{ DP; return TK__THIS_HOST;		}
+_remote_host 		{ DP; return TK__REMOTE_HOST;		}
+sci			{ DP; CP; return TK_SCI;		}
+ssocks			{ DP; CP; return TK_SSOCKS;		}
+sdp			{ DP; CP; return TK_SDP;		}
+ipv4			{ DP; CP; return TK_IPV4;		}
+ipv6			{ DP; CP; return TK_IPV6;		}
+size			{ DP; CP; RC(DISK_SIZE); return TK_DISK_OPTION;	}
+on-io-error		{ DP; CP; return TK_DISK_OPTION;	}
+fencing			{ DP; CP; return TK_DISK_OPTION;	}
+max-bio-bvecs		{ DP; CP; return TK_DISK_OPTION;	}
+disk-timeout		{ DP; CP; return TK_DISK_OPTION;	}
+use-bmbv		{ DP; CP; return TK_DISK_SWITCH;		}
+no-disk-barrier		{ DP; CP; return TK_DISK_SWITCH;		}
+no-disk-flushes		{ DP; CP; return TK_DISK_SWITCH;		}
+no-disk-drain		{ DP; CP; return TK_DISK_SWITCH;		}
+no-md-flushes		{ DP; CP; return TK_DISK_SWITCH;		}
+timeout			{ DP; CP; RC(TIMEOUT); return TK_NET_OPTION;	}
+ko-count		{ DP; CP; RC(KO_COUNT); return TK_NET_OPTION;	}
+ping-int		{ DP; CP; RC(PING_INT); return TK_NET_OPTION;	}
+max-buffers		{ DP; CP; RC(MAX_BUFFERS); return TK_NET_OPTION;}
+sndbuf-size		{ DP; CP; RC(SNDBUF_SIZE); return TK_NET_OPTION;}
+rcvbuf-size		{ DP; CP; RC(RCVBUF_SIZE); return TK_NET_OPTION;}
+connect-int		{ DP; CP; RC(CONNECT_INT); return TK_NET_OPTION;}
+cram-hmac-alg		{ DP; CP; return TK_NET_OPTION;		}
+shared-secret		{ DP; CP; return TK_NET_OPTION;		}
+max-epoch-size		{ DP; CP; RC(MAX_EPOCH_SIZE); return TK_NET_OPTION;}
+after-sb-[012]pri	{ DP; CP; return TK_NET_OPTION;		}
+rr-conflict 		{ DP; CP; return TK_NET_OPTION;		}
+ping-timeout 		{ DP; CP; return TK_NET_OPTION;		}
+unplug-watermark	{ DP; CP; return TK_NET_OPTION;         }
+data-integrity-alg	{ DP; CP; return TK_NET_OPTION;         }
+on-congestion		{ DP; CP; return TK_NET_OPTION;         }
+congestion-fill		{ DP; CP; RC(CONG_FILL); return TK_NET_OPTION;   }
+congestion-extents	{ DP; CP; RC(CONG_EXTENTS); return TK_NET_OPTION;}
+allow-two-primaries	{ DP; CP; return TK_NET_SWITCH;		}
+always-asbp		{ DP; CP; return TK_NET_SWITCH;		}
+no-tcp-cork		{ DP; CP; return TK_NET_SWITCH;		}
+discard-my-data		{ DP; CP; return TK_NET_DELEGATE;	}
+rate			{ DP; CP; RC(RATE); return TK_SYNCER_OPTION;	}
+after			{ DP; CP; return TK_SYNCER_OPTION;	}
+verify-alg              { DP; CP; return TK_SYNCER_OPTION;      }
+csums-alg               { DP; CP; return TK_SYNCER_OPTION;      }
+al-extents		{ DP; CP; RC(AL_EXTENTS); return TK_SYNCER_OPTION;}
+cpu-mask		{ DP; CP; return TK_SYNCER_OPTION;	}
+use-rle			{ DP; CP; return TK_SYNCER_SWITCH;	}
+delay-probe-volume	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+delay-probe-interval	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+c-plan-ahead		{ DP; CP; RC(C_PLAN_AHEAD); return TK_SYNCER_OPTION;	}
+c-delay-target	{ DP; CP; RC(C_DELAY_TARGET); return TK_SYNCER_OPTION;	}
+c-fill-target		{ DP; CP; RC(C_FILL_TARGET); return TK_SYNCER_OPTION;	}
+c-max-rate		{ DP; CP; RC(C_MAX_RATE); return TK_SYNCER_OPTION;	}
+c-min-rate		{ DP; CP; RC(C_MIN_RATE); return TK_SYNCER_OPTION;	}
+throttle-threshold	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+hold-off-threshold	{ DP; CP; return TK_DEPRECATED_OPTION;  }
+on-no-data-accessible   { DP; CP; return TK_SYNCER_OPTION;	}
+wfc-timeout		{ DP; CP; RC(WFC_TIMEOUT); return TK_STARTUP_OPTION;}
+degr-wfc-timeout	{ DP; CP; RC(DEGR_WFC_TIMEOUT); return TK_STARTUP_OPTION;}
+outdated-wfc-timeout	{ DP; CP; RC(OUTDATED_WFC_TIMEOUT); return TK_STARTUP_OPTION;}
+stacked-timeouts	{ DP; return TK_STARTUP_DELEGATE;       }
+become-primary-on	{ DP; return TK_STARTUP_DELEGATE;       }
+wait-after-sb		{ DP; CP; return TK_STARTUP_SWITCH;     }
+pri-on-incon-degr	{ DP; CP; return TK_HANDLER_OPTION;	}
+pri-lost-after-sb	{ DP; CP; return TK_HANDLER_OPTION;	}
+pri-lost		{ DP; CP; return TK_HANDLER_OPTION;     }
+initial-split-brain    { DP; CP; return TK_HANDLER_OPTION;     }
+split-brain		{ DP; CP; return TK_HANDLER_OPTION;     }
+outdate-peer		{ DP; CP; return TK_HANDLER_OPTION;	}
+fence-peer		{ DP; CP; return TK_HANDLER_OPTION;	}
+local-io-error		{ DP; CP; return TK_HANDLER_OPTION;     }
+before-resync-target	{ DP; CP; return TK_HANDLER_OPTION;	}
+after-resync-target	{ DP; CP; return TK_HANDLER_OPTION;	}
+before-resync-source	{ DP; CP; return TK_HANDLER_OPTION;	}
+memlimit		{ DP; CP; return TK_PROXY_OPTION;       }
+read-loops		{ DP; CP; return TK_PROXY_OPTION;       }
+compression		{ DP; CP; return TK_PROXY_OPTION;       }
+plugin			{ DP; CP; return TK_PROXY_DELEGATE;     }
+out-of-sync             { DP; CP; return TK_HANDLER_OPTION;     }
+{IPV4ADDR}		{ DP; CP; return TK_IPADDR;		}
+{IPV6ADDR}		{ DP; CP; return TK_IPADDR6;		}
+{NUM}			{ DP; CP; return TK_INTEGER;		}
+{DQSTRING}		{ unescape(yytext); DP; CP; return TK_STRING;	}
+{STRING}		{ DP; CP; return TK_STRING;		}
+{LONG_STRING}		{ return TK_ERR_STRING_TOO_LONG;	}
+{LONG_DQSTRING}		{ return TK_ERR_DQSTRING_TOO_LONG;	}
+{ERR_DQSTRING}		{ return TK_ERR_DQSTRING;		}
+.			{ DP; return TK_ELSE;			}
+
+%%
+
+/* Compatibility cruft for flex version 2.5.4a */
+#ifndef YY_FLEX_SUBMINOR_VERSION
+/** Pushes the new state onto the stack. The new state becomes
+ *  the current state. This function will allocate the stack
+ *  if necessary.
+ *  @param new_buffer The new state.
+ *
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+	if (new_buffer == NULL)
+		return;
+
+	if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) {
+		fprintf( stderr, "Includes nested too deeply" );
+		exit( 1 );
+	}
+
+	include_stack[include_stack_ptr++] = YY_CURRENT_BUFFER;
+
+	yy_switch_to_buffer(new_buffer);
+	BEGIN(INITIAL);
+}
+
+/** Removes and deletes the top of the stack, if present.
+ *  The next element becomes the new top.
+ *
+ */
+void yypop_buffer_state (void)
+{
+	if (!YY_CURRENT_BUFFER)
+		return;
+
+	if ( --include_stack_ptr < 0 ) {
+		fprintf( stderr, "error in flex compat code\n" );
+		exit( 1 );
+	}
+
+	yy_delete_buffer(YY_CURRENT_BUFFER );
+	yy_switch_to_buffer(include_stack[include_stack_ptr]);
+}
+#endif
+
+void my_yypush_buffer_state(FILE *f)
+{
+	/* Since we do not have YY_BUF_SIZE outside of the flex generated file.*/
+	yypush_buffer_state(yy_create_buffer(f, YY_BUF_SIZE));
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdadm_usage_cnt.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_usage_cnt.c
--- drbd8-8.3.7/user/legacy/drbdadm_usage_cnt.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdadm_usage_cnt.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,743 @@
+/*
+   drbdadm_usage_cnt.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2006-2008, LINBIT Information Technologies GmbH
+   Copyright (C) 2006-2008, Philipp Reisner <philipp.reisner@linbit.com>
+   Copyright (C) 2006-2008, Lars Ellenberg  <lars.ellenberg@linbit.com>
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <setjmp.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include "drbdadm.h"
+#include "drbdtool_common.h"
+#include "drbd_endian.h"
+#include "linux/drbd.h"		/* only use DRBD_MAGIC from here! */
+
+#define HTTP_PORT 80
+#define HTTP_HOST "usage.drbd.org"
+#define HTTP_ADDR "212.69.161.111"
+#define NODE_ID_FILE DRBD_LIB_DIR"/node_id"
+#define GIT_HASH_BYTE   20
+#define SRCVERSION_BYTE 12     /* actually 11 and a half. */
+#define SRCVERSION_PAD (GIT_HASH_BYTE - SRCVERSION_BYTE)
+#define SVN_STYLE_OD  16
+
+struct vcs_rel {
+	uint32_t svn_revision;
+	char git_hash[GIT_HASH_BYTE];
+	struct {
+		unsigned major, minor, sublvl;
+	} version;
+	unsigned version_code;
+};
+
+struct node_info {
+	uint64_t node_uuid;
+	struct vcs_rel rev;
+};
+
+struct node_info_od {
+	uint32_t magic;
+	struct node_info ni;
+} __packed;
+
+/* For our purpose (finding the revision) SLURP_SIZE is always enough.
+ */
+static char* slurp_proc_drbd()
+{
+	const int SLURP_SIZE = 4096;
+	char* buffer;
+	int rr, fd;
+
+	fd = open("/proc/drbd",O_RDONLY);
+	if( fd == -1) return 0;
+
+	buffer = malloc(SLURP_SIZE);
+	if(!buffer) return 0;
+
+	rr = read(fd, buffer, SLURP_SIZE-1);
+	if( rr == -1) {
+		free(buffer);
+		return 0;
+	}
+
+	buffer[rr]=0;
+	close(fd);
+
+	return buffer;
+}
+
+void read_hex(char* dst, char* src, int dst_size, int src_size)
+{
+	int dst_i, u, src_i=0;
+
+	for(dst_i=0;dst_i<dst_size;dst_i++) {
+		if (src[src_i] == 0) break;
+		if (src_size - src_i < 2) {
+			sscanf(src+src_i,"%1x",&u);
+			dst[dst_i]=u<<4;
+		} else {
+			sscanf(src+src_i,"%2x",&u);
+			dst[dst_i]=u;
+		}
+		if(++src_i >= src_size) break;
+		if(src[src_i] == 0) break;
+		if(++src_i >= src_size) break;
+	}
+}
+
+void vcs_ver_from_str(struct vcs_rel *rel, const char *token)
+{
+	char *dot;
+	long maj, min, sub;
+	maj = strtol(token, &dot, 10);
+	if (*dot != '.')
+		return;
+	min = strtol(dot+1, &dot, 10);
+	if (*dot != '.')
+		return;
+	sub = strtol(dot+1, &dot, 10);
+	/* don't check on *dot == 0,
+	 * we may want to add some extraversion tag sometime
+	if (*dot != 0)
+		return;
+	*/
+
+	rel->version.major = maj;
+	rel->version.minor = min;
+	rel->version.sublvl = sub;
+
+	rel->version_code = (maj << 16) + (min << 8) + sub;
+}
+
+void vcs_from_str(struct vcs_rel *rel, const char *text)
+{
+	char token[80];
+	int plus=0;
+	enum { begin, f_ver, f_svn, f_rev, f_git, f_srcv } ex = begin;
+
+	while (sget_token(token, sizeof(token), &text) != EOF) {
+		switch(ex) {
+		case begin:
+			if(!strcmp(token,"version:"))
+				ex = f_ver;
+			if(!strcmp(token,"SVN"))  ex = f_svn;
+			if(!strcmp(token,"GIT-hash:"))  ex = f_git;
+			if(!strcmp(token,"srcversion:"))  ex = f_srcv;
+			break;
+		case f_ver:
+			if(!strcmp(token,"plus"))
+				plus = 1;
+				/* still waiting for version */
+			else {
+				vcs_ver_from_str(rel, token);
+				ex = begin;
+			}
+			break;
+		case f_svn:
+			if(!strcmp(token,"Revision:"))  ex = f_rev;
+			break;
+		case f_rev:
+			rel->svn_revision = atol(token) * 10;
+			if( plus ) rel->svn_revision += 1;
+			memset(rel->git_hash, 0, GIT_HASH_BYTE);
+			return;
+		case f_git:
+			read_hex(rel->git_hash, token, GIT_HASH_BYTE, strlen(token));
+			rel->svn_revision = 0;
+			return;
+		case f_srcv:
+			memset(rel->git_hash, 0, SRCVERSION_PAD);
+			read_hex(rel->git_hash + SRCVERSION_PAD, token, SRCVERSION_BYTE, strlen(token));
+			rel->svn_revision = 0;
+			return;
+		}
+	}
+}
+
+static int current_vcs_is_from_proc_drbd;
+static struct vcs_rel current_vcs_rel;
+static struct vcs_rel userland_version;
+static void vcs_get_current(void)
+{
+	char* version_txt;
+
+	if (current_vcs_rel.version_code)
+		return;
+
+	version_txt = slurp_proc_drbd();
+	if(version_txt) {
+		vcs_from_str(&current_vcs_rel, version_txt);
+		current_vcs_is_from_proc_drbd = 1;
+		free(version_txt);
+	} else {
+		vcs_from_str(&current_vcs_rel, drbd_buildtag());
+		vcs_ver_from_str(&current_vcs_rel, REL_VERSION);
+	}
+}
+
+static void vcs_get_userland(void)
+{
+	if (userland_version.version_code)
+		return;
+	vcs_ver_from_str(&userland_version, REL_VERSION);
+}
+
+int version_code_kernel(void)
+{
+	vcs_get_current();
+	return current_vcs_is_from_proc_drbd
+		? current_vcs_rel.version_code
+		: 0;
+}
+
+int version_code_userland(void)
+{
+	vcs_get_userland();
+	return userland_version.version_code;
+}
+
+static int vcs_eq(struct vcs_rel *rev1, struct vcs_rel *rev2)
+{
+	if( rev1->svn_revision || rev2->svn_revision ) {
+		return rev1->svn_revision == rev2->svn_revision;
+	} else {
+		return !memcmp(rev1->git_hash,rev2->git_hash,GIT_HASH_BYTE);
+	}
+}
+
+static int vcs_ver_cmp(struct vcs_rel *rev1, struct vcs_rel *rev2)
+{
+	return rev1->version_code - rev2->version_code;
+}
+
+void warn_on_version_mismatch(void)
+{
+	char *msg;
+	int cmp;
+
+	/* get the kernel module version from /proc/drbd */
+	vcs_get_current();
+
+	/* get the userland version from REL_VERSION */
+	vcs_get_userland();
+
+	cmp = vcs_ver_cmp(&userland_version, &current_vcs_rel);
+	/* no message if equal */
+	if (cmp == 0)
+		return;
+	if (cmp > 0xffff || cmp < -0xffff)	 /* major version differs! */
+		msg = "mixing different major numbers will not work!";
+	else if (cmp < 0)		/* userland is older. always warn. */
+		msg = "you should upgrade your drbd tools!";
+	else if (cmp & 0xff00)		/* userland is newer minor version */
+		msg = "please don't mix different DRBD series.";
+	else		/* userland is newer, but only differ in sublevel. */
+		msg = "preferably kernel and userland versions should match.";
+
+	fprintf(stderr, "DRBD module version: %u.%u.%u\n"
+			"   userland version: %u.%u.%u\n%s\n",
+			current_vcs_rel.version.major,
+			current_vcs_rel.version.minor,
+			current_vcs_rel.version.sublvl,
+			userland_version.version.major,
+			userland_version.version.minor,
+			userland_version.version.sublvl,
+			msg);
+}
+
+static char *vcs_to_str(struct vcs_rel *rev)
+{
+	static char buffer[80]; // Not generic, sufficient for the purpose.
+
+	if( rev->svn_revision ) {
+		snprintf(buffer,80,"nv="U32,rev->svn_revision);
+	} else {
+		int len=20,p;
+		unsigned char *bytes;
+
+		p = sprintf(buffer,"git=");
+		bytes = (unsigned char*)rev->git_hash;
+		while(len--) p += sprintf(buffer+p,"%02x",*bytes++);
+	}
+	return buffer;
+}
+
+static void write_node_id(struct node_info *ni)
+{
+	int fd;
+	struct node_info_od on_disk;
+	int size;
+
+	fd = open(NODE_ID_FILE,O_WRONLY|O_CREAT,S_IRUSR|S_IWUSR);
+	if( fd == -1 && errno == ENOENT) {
+		mkdir(DRBD_LIB_DIR,S_IRWXU);
+		fd = open(NODE_ID_FILE,O_WRONLY|O_CREAT,S_IRUSR|S_IWUSR);
+	}
+
+	if( fd == -1) {
+		perror("Creation of "NODE_ID_FILE" failed.");
+		exit(20);
+	}
+
+	if(ni->rev.svn_revision != 0) { // SVN style (old)
+		on_disk.magic               = cpu_to_be32(DRBD_MAGIC);
+		on_disk.ni.node_uuid        = cpu_to_be64(ni->node_uuid);
+		on_disk.ni.rev.svn_revision = cpu_to_be32(ni->rev.svn_revision);
+		memset(on_disk.ni.rev.git_hash,0,GIT_HASH_BYTE);
+		size = SVN_STYLE_OD;
+	} else {
+		on_disk.magic               = cpu_to_be32(DRBD_MAGIC+1);
+		on_disk.ni.node_uuid        = cpu_to_be64(ni->node_uuid);
+		on_disk.ni.rev.svn_revision = 0;
+		memcpy(on_disk.ni.rev.git_hash,ni->rev.git_hash,GIT_HASH_BYTE);
+		size = sizeof(on_disk);
+	}
+
+	if( write(fd,&on_disk, size) != size) {
+		perror("Write to "NODE_ID_FILE" failed.");
+		exit(20);
+	}
+
+	close(fd);
+}
+
+
+static int read_node_id(struct node_info *ni)
+{
+	int rr,fd;
+	struct node_info_od on_disk;
+
+	fd = open(NODE_ID_FILE,O_RDONLY);
+	if( fd == -1) {
+		return 0;
+	}
+
+	rr = read(fd,&on_disk, sizeof(on_disk)); 
+	if( rr != sizeof(on_disk) && rr != SVN_STYLE_OD ) {
+		close(fd);
+		return 0;
+	}
+
+	switch(be32_to_cpu(on_disk.magic)) {
+	case DRBD_MAGIC:
+		ni->node_uuid    = be64_to_cpu(on_disk.ni.node_uuid);
+		ni->rev.svn_revision = be32_to_cpu(on_disk.ni.rev.svn_revision);
+		memset(ni->rev.git_hash,0,GIT_HASH_BYTE);
+		break;
+	case DRBD_MAGIC+1:
+		ni->node_uuid    = be64_to_cpu(on_disk.ni.node_uuid);
+		ni->rev.svn_revision = 0;
+		memcpy(ni->rev.git_hash,on_disk.ni.rev.git_hash,GIT_HASH_BYTE);
+		break;
+	default:
+		return 0;
+	}
+
+	close(fd);
+	return 1;
+}
+
+/* to interrupt gethostbyname,
+ * we not only need a signal,
+ * but also the long jump:
+ * gethostbyname would otherwise just restart the syscall
+ * and timeout again. */
+static jmp_buf timed_out;
+static void alarm_handler(int __attribute((unused)) signo)
+{
+	longjmp(timed_out, 1);
+}
+
+#define DNS_TIMEOUT 3	/* seconds */
+#define SOCKET_TIMEOUT 3 /* seconds */
+struct hostent *my_gethostbyname(const char *name)
+{
+	struct sigaction sa;
+	struct sigaction so;
+	struct hostent *h;
+
+	alarm(0);
+	sa.sa_handler = &alarm_handler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = 0;
+
+	sigaction(SIGALRM, &sa, &so);
+
+	if (!setjmp(timed_out)) {
+		alarm(DNS_TIMEOUT);
+		h = gethostbyname(name);
+	} else
+		/* timed out, longjmp of SIGALRM jumped here */
+		h = NULL;
+
+	alarm(0);
+	sigaction(SIGALRM, &so, NULL);
+
+	return h;
+}
+
+/**
+ * insert_usage_with_socket:
+ *
+ * Return codes:
+ *
+ * 0 - success
+ * 1 - failed to create socket
+ * 2 - unknown server
+ * 3 - cannot connect to server
+ * 5 - other error
+ */
+static int make_get_request(char *uri) {
+	struct sockaddr_in server;
+	struct hostent *host_info;
+	unsigned long addr;
+	int sock;
+	char *req_buf;
+	char *http_host = HTTP_HOST;
+	int buf_len = 1024;
+	char buffer[buf_len];
+	FILE *sockfd;
+	int writeit;
+	struct timeval timeout = { .tv_sec = SOCKET_TIMEOUT };
+
+	sock = socket( PF_INET, SOCK_STREAM, 0);
+	if (sock < 0)
+		return 1;
+
+	setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeout));
+	setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(timeout));
+
+	memset (&server, 0, sizeof(server));
+
+	/* convert host name to ip */
+	host_info = my_gethostbyname(http_host);
+	if (host_info == NULL) {
+		/* unknown host, try with ip */
+		if ((addr = inet_addr( HTTP_ADDR )) != INADDR_NONE)
+			memcpy((char *)&server.sin_addr, &addr, sizeof(addr));
+		else {
+			close(sock);
+			return 2;
+		}
+	} else {
+		memcpy((char *)&server.sin_addr, host_info->h_addr,
+			host_info->h_length);
+	}
+
+
+	ssprintf(req_buf,
+		"GET %s HTTP/1.0\r\n"
+		"Host: "HTTP_HOST"\r\n"
+		"User-Agent: drbdadm/"REL_VERSION" (%s; %s; %s; %s)\r\n"
+		"\r\n",
+		uri,
+		nodeinfo.sysname, nodeinfo.release,
+		nodeinfo.version, nodeinfo.machine);
+
+	server.sin_family = AF_INET;
+	server.sin_port = htons(HTTP_PORT);
+
+	if (connect(sock, (struct sockaddr*)&server, sizeof(server))<0) {
+		/* cannot connect to server */
+		close(sock);
+		return 3;
+	}
+
+	if ((sockfd = fdopen(sock, "r+")) == NULL) {
+		close(sock);
+		return 5;
+	}
+
+	if (fputs(req_buf, sockfd) == EOF) {
+		fclose(sockfd);
+		close(sock);
+		return 5;
+	}
+
+	writeit = 0;
+	while (fgets(buffer, buf_len, sockfd) != NULL) {
+		/* ignore http headers */
+		if (writeit == 0) {
+			if (buffer[0] == '\r' || buffer[0] == '\n')
+				writeit = 1;
+		} else {
+			fprintf(stderr,"%s", buffer);
+		}
+	}
+	fclose(sockfd);
+	close(sock);
+	return 0;
+}
+
+static void url_encode(char* in, char* out)
+{
+	char *h = "0123456789abcdef";
+	char c;
+
+	while( (c = *in++) != 0 ) {
+		if( c == '\n' ) break;
+		if( ( 'a' <= c && c <= 'z' )
+		    || ( 'A' <= c && c <= 'Z' )
+		    || ( '0' <= c && c <= '9' )
+		    || c == '-' || c == '_' || c == '.' )
+			*out++ = c;
+		else if( c == ' ' )
+			*out++ = '+';
+		else {
+			*out++ = '%';
+			*out++ = h[c >> 4];
+			*out++ = h[c & 0x0f];
+		}
+	}
+	*out = 0;
+}
+
+/* Ensure that the node is counted on http://usage.drbd.org
+ */
+#define ANSWER_SIZE 80
+
+void uc_node(enum usage_count_type type)
+{
+	struct node_info ni;
+	char *uri;
+	int send = 0;
+	int update = 0;
+	char answer[ANSWER_SIZE];
+	char n_comment[ANSWER_SIZE*3];
+	char *r;
+
+	if( type == UC_NO ) return;
+	if( getuid() != 0 ) return;
+
+	/* not when running directly from init,
+	 * or if stdout is no tty.
+	 * you do not want to have the "user information message"
+	 * as output from `drbdadm sh-resources all`
+	 */
+	if (getenv("INIT_VERSION")) return;
+	if (no_tty) return;
+
+	vcs_get_current();
+
+	if( ! read_node_id(&ni) ) {
+		get_random_bytes(&ni.node_uuid,sizeof(ni.node_uuid));
+		ni.rev = current_vcs_rel;
+		send = 1;
+	} else {
+		// read_node_id() was successful
+		if (!vcs_eq(&ni.rev,&current_vcs_rel)) {
+			ni.rev = current_vcs_rel;
+			update = 1;
+			send = 1;
+		}
+	}
+
+	if(!send) return;
+
+	n_comment[0]=0;
+	if (type == UC_ASK ) {
+		fprintf(stderr,
+"\n"
+"\t\t--== This is %s of DRBD ==--\n"
+"Please take part in the global DRBD usage count at http://"HTTP_HOST".\n\n"
+"The counter works anonymously. It creates a random number to identify\n"
+"your machine and sends that random number, along with the kernel and\n"
+"DRBD version, to "HTTP_HOST".\n\n"
+"The benefits for you are:\n"
+" * In response to your submission, the server ("HTTP_HOST") will tell you\n"
+"   how many users before you have installed this version (%s).\n"
+" * With a high counter LINBIT has a strong motivation to\n"
+"   continue funding DRBD's development.\n\n"
+"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?nu="U64"&%s\n\n"
+"In case you want to participate but know that this machine is firewalled,\n"
+"simply issue the query string with your favorite web browser or wget.\n"
+"You can control all of this by setting 'usage-count' in your drbd.conf.\n\n"
+"* You may enter a free form comment about your machine, that gets\n"
+"  used on "HTTP_HOST" instead of the big random number.\n"
+"* If you wish to opt out entirely, simply enter 'no'.\n"
+"* To count this node without comment, just press [RETURN]\n",
+			update ? "an update" : "a new installation",
+			REL_VERSION,ni.node_uuid, vcs_to_str(&ni.rev));
+		r = fgets(answer, ANSWER_SIZE, stdin);
+		if(r && !strcmp(answer,"no\n")) send = 0;
+		url_encode(answer,n_comment);
+	}
+
+	ssprintf(uri,"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?nu="U64"&%s%s%s",
+		 ni.node_uuid, vcs_to_str(&ni.rev),
+		 n_comment[0] ? "&nc=" : "", n_comment);
+
+	if (send) {
+		write_node_id(&ni);
+
+		fprintf(stderr,
+"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
+"  --==  Thank you for participating in the global usage survey  ==--\n"
+"The server's response is:\n\n");
+		make_get_request(uri);
+		if (type == UC_ASK) {
+			fprintf(stderr,
+"\n"
+"From now on, drbdadm will contact "HTTP_HOST" only when you update\n"
+"DRBD or when you use 'drbdadm create-md'. Of course it will continue\n"
+"to ask you for confirmation as long as 'usage-count' is at its default\n"
+"value of 'ask'.\n\n"
+"Just press [RETURN] to continue: ");
+			r = fgets(answer, 9, stdin);
+		}
+	}
+}
+
+/* For our purpose (finding the revision) SLURP_SIZE is always enough.
+ */
+char* run_admm_generic(struct d_resource* res ,const char* cmd)
+{
+	const int SLURP_SIZE = 4096;
+	int rr,pipes[2];
+	char* buffer;
+	pid_t pid;
+
+	buffer = malloc(SLURP_SIZE);
+	if(!buffer) return 0;
+
+	if(pipe(pipes)) return 0;
+
+	pid = fork();
+	if(pid == -1) {
+		fprintf(stderr,"Can not fork\n");
+		exit(E_exec_error);
+	}
+	if(pid == 0) {
+		// child
+		close(pipes[0]); // close reading end
+		dup2(pipes[1],1); // 1 = stdout
+		close(pipes[1]);
+		exit(_admm_generic(res,cmd,
+				   SLEEPS_VERY_LONG|SUPRESS_STDERR|
+				   DONT_REPORT_FAILED));
+	}
+	close(pipes[1]); // close writing end
+
+	rr = read(pipes[0], buffer, SLURP_SIZE-1);
+	if( rr == -1) {
+		free(buffer);
+		// FIXME cleanup
+		return 0;
+	}
+	buffer[rr]=0;
+	close(pipes[0]);
+
+	waitpid(pid,0,0);
+
+	return buffer;
+}
+
+int adm_create_md(struct d_resource* res ,const char* cmd)
+{
+	char answer[ANSWER_SIZE];
+	struct node_info ni;
+	uint64_t device_uuid=0;
+	uint64_t device_size=0;
+	char *uri;
+	int send=0;
+	char *tb;
+	int rv,fd;
+	int soi_tmp;
+	char *setup_opts_0_tmp;
+	char *r;
+
+	tb = run_admm_generic(res, "read-dev-uuid");
+	device_uuid = strto_u64(tb,NULL,16);
+	free(tb);
+
+	rv = _admm_generic(res, cmd, SLEEPS_VERY_LONG); // cmd is "create-md".
+
+	if(rv || dry_run) return rv;
+
+	fd = open(res->me->disk,O_RDONLY);
+	if( fd != -1) {
+		device_size = bdev_size(fd);
+		close(fd);
+	}
+
+	if( read_node_id(&ni) && device_size && !device_uuid) {
+		get_random_bytes(&device_uuid, sizeof(uint64_t));
+
+		if( global_options.usage_count == UC_YES ) send = 1;
+		if( global_options.usage_count == UC_ASK ) {
+			fprintf(stderr,
+"\n"
+"\t\t--== Creating metadata ==--\n"
+"As with nodes, we count the total number of devices mirrored by DRBD\n"
+"at http://"HTTP_HOST".\n\n"
+"The counter works anonymously. It creates a random number to identify\n"
+"the device and sends that random number, along with the kernel and\n"
+"DRBD version, to "HTTP_HOST".\n\n"
+"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?nu="U64"&ru="U64"&rs="U64"\n\n"
+"* If you wish to opt out entirely, simply enter 'no'.\n"
+"* To continue, just press [RETURN]\n",
+				ni.node_uuid,device_uuid,device_size
+				);
+			r = fgets(answer, ANSWER_SIZE, stdin);
+			if(r && strcmp(answer,"no\n")) send = 1;
+		}
+	}
+
+	if(!device_uuid) {
+		get_random_bytes(&device_uuid, sizeof(uint64_t));
+	}
+
+	if (send) {
+		ssprintf(uri,"http://"HTTP_HOST"/cgi-bin/insert_usage.pl?"
+			 "nu="U64"&ru="U64"&rs="U64,
+			 ni.node_uuid, device_uuid, device_size);
+		make_get_request(uri);
+	}
+
+	/* HACK */
+	soi_tmp = soi;
+	setup_opts_0_tmp = setup_opts[0];
+
+	setup_opts[0] = NULL;
+	ssprintf( setup_opts[0], X64(016), device_uuid);
+	soi=1;
+	_admm_generic(res, "write-dev-uuid", SLEEPS_VERY_LONG);
+
+	setup_opts[0] = setup_opts_0_tmp;
+	soi = soi_tmp;
+
+	return rv;
+}
+
diff -Nru drbd8-8.3.7/user/legacy/drbdsetup.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdsetup.c
--- drbd8-8.3.7/user/legacy/drbdsetup.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdsetup.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,2762 @@
+/*
+   drbdsetup.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <mntent.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define __bitwise /* Build-workaround for broken RHEL4 kernels (2.6.9_78.0.1) */
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/connector.h>
+
+#include <linux/drbd.h>
+#include <linux/drbd_tag_magic.h>
+#include <linux/drbd_limits.h>
+
+#include "unaligned.h"
+#include "drbdtool_common.h"
+
+#ifndef __CONNECTOR_H
+#error "You need to set KDIR while building drbdsetup."
+#endif
+
+#ifndef AF_INET_SDP
+#define AF_INET_SDP 27
+#define PF_INET_SDP AF_INET_SDP
+#endif
+
+enum usage_type {
+	BRIEF,
+	FULL,
+	XML,
+};
+
+struct drbd_tag_list {
+	struct nlmsghdr *nl_header;
+	struct cn_msg   *cn_header;
+	struct drbd_nl_cfg_req* drbd_p_header;
+	unsigned short *tag_list_start;
+	unsigned short *tag_list_cpos;
+	int    tag_size;
+};
+
+struct drbd_argument {
+	const char* name;
+	const enum drbd_tags tag;
+	int (*convert_function)(struct drbd_argument *,
+				struct drbd_tag_list *,
+				char *);
+};
+
+struct drbd_option {
+	const char* name;
+	const char short_name;
+	const enum drbd_tags tag;
+	int (*convert_function)(struct drbd_option *,
+				struct drbd_tag_list *,
+				char *);
+	void (*show_function)(struct drbd_option *,unsigned short*);
+	int (*usage_function)(struct drbd_option *, char*, int);
+	void (*xml_function)(struct drbd_option *);
+	union {
+		struct {
+			const long long min;
+			const long long max;
+			const long long def;
+			const unsigned char unit_prefix;
+			const char* unit;
+		} numeric_param; // for conv_numeric
+		struct {
+			const char** handler_names;
+			const int number_of_handlers;
+			const int def;
+		} handler_param; // conv_handler
+	};
+};
+
+struct drbd_cmd {
+	const char* cmd;
+	const int packet_id;
+	int (*function)(struct drbd_cmd *, unsigned, int, char **);
+	void (*usage)(struct drbd_cmd *, enum usage_type);
+	union {
+		struct {
+			struct drbd_argument *args;
+			struct drbd_option *options;
+		} cp; // for generic_config_cmd, config_usage
+		struct {
+			int (*show_function)(struct drbd_cmd *, unsigned,
+					     unsigned short* );
+		} gp; // for generic_get_cmd, get_usage
+		struct {
+			struct option *options;
+			int (*proc_event)(unsigned int, int,
+					  struct drbd_nl_cfg_reply *);
+		} ep; // for events_cmd, events_usage
+	};
+};
+
+
+// Connector functions
+#define NL_TIME (COMM_TIMEOUT*1000)
+static int open_cn();
+static int send_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size);
+static int receive_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size, int timeout_ms);
+static int call_drbd(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
+		     int size, int timeout_ms);
+static void close_cn(int sk_nl);
+
+// other functions
+static int get_af_ssocks(int warn);
+static void print_command_usage(int i, const char *addinfo, enum usage_type);
+
+// command functions
+static int generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
+static int down_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
+static int generic_get_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv);
+static int events_cmd(struct drbd_cmd *cm, unsigned minor, int argc,char **argv);
+
+// usage functions
+static void config_usage(struct drbd_cmd *cm, enum usage_type);
+static void get_usage(struct drbd_cmd *cm, enum usage_type);
+static void events_usage(struct drbd_cmd *cm, enum usage_type);
+
+// sub usage functions for config_usage
+static int numeric_opt_usage(struct drbd_option *option, char* str, int strlen);
+static int handler_opt_usage(struct drbd_option *option, char* str, int strlen);
+static int bit_opt_usage(struct drbd_option *option, char* str, int strlen);
+static int string_opt_usage(struct drbd_option *option, char* str, int strlen);
+
+// sub usage function for config_usage as xml
+static void numeric_opt_xml(struct drbd_option *option);
+static void handler_opt_xml(struct drbd_option *option);
+static void bit_opt_xml(struct drbd_option *option);
+static void string_opt_xml(struct drbd_option *option);
+
+// sub commands for generic_get_cmd
+static int show_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int role_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int status_xml_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int sh_status_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int cstate_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int dstate_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int uuids_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+static int lk_bdev_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl);
+
+// convert functions for arguments
+static int conv_block_dev(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
+static int conv_md_idx(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
+static int conv_address(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
+static int conv_protocol(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg);
+
+// convert functions for options
+static int conv_numeric(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
+static int conv_sndbuf(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
+static int conv_handler(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
+static int conv_bit(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
+static int conv_string(struct drbd_option *od, struct drbd_tag_list *tl, char* arg);
+
+// show functions for options (used by show_scmd)
+static void show_numeric(struct drbd_option *od, unsigned short* tp);
+static void show_handler(struct drbd_option *od, unsigned short* tp);
+static void show_bit(struct drbd_option *od, unsigned short* tp);
+static void show_string(struct drbd_option *od, unsigned short* tp);
+
+// sub functions for events_cmd
+static int print_broadcast_events(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
+static int w_connected_state(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
+static int w_synced_state(unsigned int seq, int, struct drbd_nl_cfg_reply *reply);
+
+const char *on_error[] = {
+	[EP_PASS_ON]         = "pass_on",
+	[EP_CALL_HELPER]  = "call-local-io-error",
+	[EP_DETACH]         = "detach",
+};
+
+const char *fencing_n[] = {
+	[FP_DONT_CARE] = "dont-care",
+	[FP_RESOURCE] = "resource-only",
+	[FP_STONITH]  = "resource-and-stonith",
+};
+
+const char *asb0p_n[] = {
+        [ASB_DISCONNECT]        = "disconnect",
+	[ASB_DISCARD_YOUNGER_PRI] = "discard-younger-primary",
+	[ASB_DISCARD_OLDER_PRI]   = "discard-older-primary",
+	[ASB_DISCARD_ZERO_CHG]    = "discard-zero-changes",
+	[ASB_DISCARD_LEAST_CHG]   = "discard-least-changes",
+	[ASB_DISCARD_LOCAL]      = "discard-local",
+	[ASB_DISCARD_REMOTE]     = "discard-remote"
+};
+
+const char *asb1p_n[] = {
+	[ASB_DISCONNECT]        = "disconnect",
+	[ASB_CONSENSUS]         = "consensus",
+	[ASB_VIOLENTLY]         = "violently-as0p",
+	[ASB_DISCARD_SECONDARY]  = "discard-secondary",
+	[ASB_CALL_HELPER]        = "call-pri-lost-after-sb"
+};
+
+const char *asb2p_n[] = {
+	[ASB_DISCONNECT]        = "disconnect",
+	[ASB_VIOLENTLY]         = "violently-as0p",
+	[ASB_CALL_HELPER]        = "call-pri-lost-after-sb"
+};
+
+const char *rrcf_n[] = {
+	[ASB_DISCONNECT]        = "disconnect",
+	[ASB_VIOLENTLY]         = "violently",
+	[ASB_CALL_HELPER]        = "call-pri-lost"
+};
+
+const char *on_no_data_n[] = {
+	[OND_IO_ERROR]		= "io-error",
+	[OND_SUSPEND_IO]	= "suspend-io"
+};
+
+const char *on_congestion_n[] = {
+	[OC_BLOCK]              = "block",
+	[OC_PULL_AHEAD]         = "pull-ahead",
+	[OC_DISCONNECT]         = "disconnect"
+};
+
+struct option wait_cmds_options[] = {
+	{ "wfc-timeout",required_argument, 0, 't' },
+	{ "degr-wfc-timeout",required_argument,0,'d'},
+	{ "outdated-wfc-timeout",required_argument,0,'o'},
+	{ "wait-after-sb",no_argument,0,'w'},
+	{ 0,            0,           0,  0  }
+};
+
+#define EN(N,U,UN) \
+	conv_numeric, show_numeric, numeric_opt_usage, numeric_opt_xml, \
+	{ .numeric_param = { DRBD_ ## N ## _MIN, DRBD_ ## N ## _MAX, \
+		DRBD_ ## N ## _DEF ,U,UN  } }
+#define EN_sndbuf(N,U,UN) \
+	conv_sndbuf, show_numeric, numeric_opt_usage, numeric_opt_xml, \
+	{ .numeric_param = { DRBD_ ## N ## _MIN, DRBD_ ## N ## _MAX, \
+		DRBD_ ## N ## _DEF ,U,UN  } }
+#define EH(N,D) \
+	conv_handler, show_handler, handler_opt_usage, handler_opt_xml, \
+	{ .handler_param = { N, ARRAY_SIZE(N), \
+	DRBD_ ## D ## _DEF } }
+#define EB      conv_bit, show_bit, bit_opt_usage, bit_opt_xml, { }
+#define ES      conv_string, show_string, string_opt_usage, string_opt_xml, { }
+#define CLOSE_OPTIONS  { NULL,0,0,NULL,NULL,NULL, NULL, { } }
+
+#define F_CONFIG_CMD	generic_config_cmd, config_usage
+#define F_GET_CMD	generic_get_cmd, get_usage
+#define F_EVENTS_CMD	events_cmd, events_usage
+
+struct drbd_cmd commands[] = {
+	{"primary", P_primary, F_CONFIG_CMD, {{ NULL,
+	 (struct drbd_option[]) {
+		 { "overwrite-data-of-peer",'o',T_primary_force, EB   }, /* legacy name */
+		 { "force",'f',			T_primary_force, EB   },
+		 CLOSE_OPTIONS }} }, },
+
+	{"secondary", P_secondary, F_CONFIG_CMD, {{NULL, NULL}} },
+
+	{"disk", P_disk_conf, F_CONFIG_CMD, {{
+	 (struct drbd_argument[]) {
+		 { "lower_dev",		T_backing_dev,	conv_block_dev },
+		 { "meta_data_dev",	T_meta_dev,	conv_block_dev },
+		 { "meta_data_index",	T_meta_dev_idx,	conv_md_idx },
+		 { NULL,                0,           	NULL}, },
+	 (struct drbd_option[]) {
+		 { "size",'d',		T_disk_size,	EN(DISK_SIZE_SECT,'s',"bytes") },
+		 { "on-io-error",'e',	T_on_io_error,	EH(on_error,ON_IO_ERROR) },
+		 { "fencing",'f',	T_fencing,      EH(fencing_n,FENCING) },
+		 { "use-bmbv",'b',	T_use_bmbv,     EB },
+		 { "no-disk-barrier",'a',T_no_disk_barrier,EB },
+		 { "no-disk-flushes",'i',T_no_disk_flush,EB },
+		 { "no-disk-drain",'D', T_no_disk_drain,EB },
+		 { "no-md-flushes",'m', T_no_md_flush,  EB },
+		 { "max-bio-bvecs",'s',	T_max_bio_bvecs,EN(MAX_BIO_BVECS,1,NULL) },
+		 { "disk-timeout",'t',	T_disk_timeout,	EN(DISK_TIMEOUT,1,"1/10 seconds") },
+		 CLOSE_OPTIONS }} }, },
+
+	{"detach", P_detach, F_CONFIG_CMD, {{NULL,
+	 (struct drbd_option[]) {
+		{ "force",'f',			T_detach_force, EB   },
+		CLOSE_OPTIONS }} }, },
+
+	{"net", P_net_conf, F_CONFIG_CMD, {{
+	 (struct drbd_argument[]) {
+		 { "[af:]local_addr[:port]",T_my_addr,	conv_address },
+		 { "[af:]remote_addr[:port]",T_peer_addr,conv_address },
+		 { "protocol",		T_wire_protocol,conv_protocol },
+		 { NULL,                0,           	NULL}, },
+	 (struct drbd_option[]) {
+		 { "timeout",'t',	T_timeout,	EN(TIMEOUT,1,"1/10 seconds") },
+		 { "max-epoch-size",'e',T_max_epoch_size,EN(MAX_EPOCH_SIZE,1,NULL) },
+		 { "max-buffers",'b',	T_max_buffers,	EN(MAX_BUFFERS,1,NULL) },
+		 { "unplug-watermark",'u',T_unplug_watermark, EN(UNPLUG_WATERMARK,1,NULL) },
+		 { "connect-int",'c',	T_try_connect_int, EN(CONNECT_INT,1,"seconds") },
+		 { "ping-int",'i',	T_ping_int,	   EN(PING_INT,1,"seconds") },
+		 { "sndbuf-size",'S',	T_sndbuf_size,	   EN_sndbuf(SNDBUF_SIZE,1,"bytes") },
+		 { "rcvbuf-size",'r',	T_rcvbuf_size,	   EN_sndbuf(RCVBUF_SIZE,1,"bytes") },
+		 { "ko-count",'k',	T_ko_count,	   EN(KO_COUNT,1,NULL) },
+		 { "allow-two-primaries",'m',T_two_primaries, EB },
+		 { "cram-hmac-alg",'a',	T_cram_hmac_alg,   ES },
+		 { "shared-secret",'x',	T_shared_secret,   ES },
+		 { "after-sb-0pri",'A',	T_after_sb_0p,EH(asb0p_n,AFTER_SB_0P) },
+		 { "after-sb-1pri",'B',	T_after_sb_1p,EH(asb1p_n,AFTER_SB_1P) },
+		 { "after-sb-2pri",'C',	T_after_sb_2p,EH(asb2p_n,AFTER_SB_2P) },
+		 { "always-asbp",'P',   T_always_asbp,     EB },
+		 { "rr-conflict",'R',	T_rr_conflict,EH(rrcf_n,RR_CONFLICT) },
+		 { "ping-timeout",'p',  T_ping_timeo,	   EN(PING_TIMEO,1,"1/10 seconds") },
+		 { "discard-my-data",'D', T_want_lose,     EB },
+		 { "data-integrity-alg",'d', T_integrity_alg,     ES },
+		 { "no-tcp-cork",'o',   T_no_cork,         EB },
+		 { "dry-run",'n',   T_dry_run,		   EB },
+		 { "on-congestion", 'g', T_on_congestion, EH(on_congestion_n,ON_CONGESTION) },
+		 { "congestion-fill", 'f', T_cong_fill,    EN(CONG_FILL,'s',"byte") },
+		 { "congestion-extents", 'h', T_cong_extents, EN(CONG_EXTENTS,1,NULL) },
+		 CLOSE_OPTIONS }} }, },
+
+	{"disconnect", P_disconnect, F_CONFIG_CMD, {{NULL,
+	 (struct drbd_option[]) {
+		 { "force", 'F',	T_force,	EB },
+		CLOSE_OPTIONS }} }, },
+
+	{"resize", P_resize, F_CONFIG_CMD, {{ NULL,
+	 (struct drbd_option[]) {
+		 { "size",'s',T_resize_size,		EN(DISK_SIZE_SECT,'s',"bytes") },
+		 { "assume-peer-has-space",'f',T_resize_force,	EB },
+		 { "assume-clean", 'c',        T_no_resync, EB },
+		 CLOSE_OPTIONS }} }, },
+
+	{"syncer", P_syncer_conf, F_CONFIG_CMD, {{ NULL,
+	 (struct drbd_option[]) {
+		 { "rate",'r',T_rate,			EN(RATE,'k',"bytes/second") },
+		 { "after",'a',T_after,			EN(AFTER,1,NULL) },
+		 { "al-extents",'e',T_al_extents,	EN(AL_EXTENTS,1,NULL) },
+		 { "csums-alg", 'C',T_csums_alg,        ES },
+		 { "verify-alg", 'v',T_verify_alg,      ES },
+		 { "cpu-mask",'c',T_cpu_mask,           ES },
+		 { "use-rle",'R',T_use_rle,   EB },
+		 { "on-no-data-accessible",'n',	T_on_no_data, EH(on_no_data_n,ON_NO_DATA) },
+		 { "c-plan-ahead", 'p',         T_c_plan_ahead, EN(C_PLAN_AHEAD,1,"1/10 seconds") },
+		 { "c-delay-target", 'd',       T_c_delay_target, EN(C_DELAY_TARGET,1,"1/10 seconds") },
+		 { "c-fill-target", 's',        T_c_fill_target, EN(C_FILL_TARGET,'s',"bytes") },
+		 { "c-max-rate", 'M',		T_c_max_rate, EN(C_MAX_RATE,'k',"bytes/second") },
+		 { "c-min-rate", 'm',	        T_c_min_rate, EN(C_MIN_RATE,'k',"bytes/second") },
+		 CLOSE_OPTIONS }} }, },
+
+	{"new-current-uuid", P_new_c_uuid, F_CONFIG_CMD, {{NULL,
+	 (struct drbd_option[]) {
+		 { "clear-bitmap",'c',T_clear_bm, EB   },
+		 CLOSE_OPTIONS }} }, },
+
+	{"invalidate", P_invalidate, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"invalidate-remote", P_invalidate_peer, F_CONFIG_CMD, {{NULL, NULL}} },
+	{"pause-sync", P_pause_sync, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"resume-sync", P_resume_sync, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"suspend-io", P_suspend_io, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"resume-io", P_resume_io, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"outdate", P_outdate, F_CONFIG_CMD, {{ NULL, NULL }} },
+	{"verify", P_start_ov, F_CONFIG_CMD, {{ NULL,
+	 (struct drbd_option[]) {
+		 { "start",'s',T_start_sector, EN(DISK_SIZE_SECT,'s',"bytes") },
+		 CLOSE_OPTIONS }} }, },
+	{"down",            0, down_cmd, get_usage, { {NULL, NULL }} },
+	{"state", P_get_state, F_GET_CMD, { .gp={ role_scmd} } },
+	{"role", P_get_state, F_GET_CMD, { .gp={ role_scmd} } },
+	{"status", P_get_state, F_GET_CMD, {.gp={ status_xml_scmd } } },
+	{"sh-status", P_get_state, F_GET_CMD, {.gp={ sh_status_scmd } } },
+	{"cstate", P_get_state, F_GET_CMD, {.gp={ cstate_scmd} } },
+	{"dstate", P_get_state, F_GET_CMD, {.gp={ dstate_scmd} } },
+	{"show-gi", P_get_uuids, F_GET_CMD, {.gp={ uuids_scmd} }},
+	{"get-gi", P_get_uuids, F_GET_CMD, {.gp={ uuids_scmd} } },
+	{"show", P_get_config, F_GET_CMD, {.gp={ show_scmd} } },
+	{"check-resize", P_get_config, F_GET_CMD, {.gp={ lk_bdev_scmd} } },
+	{"events",          0, F_EVENTS_CMD, { .ep = {
+		(struct option[]) {
+			{ "unfiltered", no_argument, 0, 'u' },
+			{ "all-devices",no_argument, 0, 'a' },
+			{ 0,            0,           0,  0  } },
+		print_broadcast_events } } },
+	{"wait-connect", 0, F_EVENTS_CMD, { .ep = {
+		wait_cmds_options, w_connected_state } } },
+	{"wait-sync", 0, F_EVENTS_CMD, { .ep = {
+		wait_cmds_options, w_synced_state } } },
+};
+
+#define OTHER_ERROR 900
+
+#define EM(C) [ C - ERR_CODE_BASE ]
+
+/* The EM(123) are used for old error messages. */
+static const char *error_messages[] = {
+	EM(NO_ERROR) = "No further Information available.",
+	EM(ERR_LOCAL_ADDR) = "Local address(port) already in use.",
+	EM(ERR_PEER_ADDR) = "Remote address(port) already in use.",
+	EM(ERR_OPEN_DISK) = "Can not open backing device.",
+	EM(ERR_OPEN_MD_DISK) = "Can not open meta device.",
+	EM(106) = "Lower device already in use.",
+	EM(ERR_DISK_NOT_BDEV) = "Lower device is not a block device.",
+	EM(ERR_MD_NOT_BDEV) = "Meta device is not a block device.",
+	EM(109) = "Open of lower device failed.",
+	EM(110) = "Open of meta device failed.",
+	EM(ERR_DISK_TO_SMALL) = "Low.dev. smaller than requested DRBD-dev. size.",
+	EM(ERR_MD_DISK_TO_SMALL) = "Meta device too small.",
+	EM(113) = "You have to use the disk command first.",
+	EM(ERR_BDCLAIM_DISK) = "Lower device is already claimed. This usually means it is mounted.",
+	EM(ERR_BDCLAIM_MD_DISK) = "Meta device is already claimed. This usually means it is mounted.",
+	EM(ERR_MD_IDX_INVALID) = "Lower device / meta device / index combination invalid.",
+	EM(117) = "Currently we only support devices up to 3.998TB.\n"
+	"(up to 2TB in case you do not have CONFIG_LBD set)\n"
+	"Contact office@linbit.com, if you need more.",
+	EM(ERR_IO_MD_DISK) = "IO error(s) occurred during initial access to meta-data.\n",
+	EM(ERR_MD_INVALID) = "No valid meta-data signature found.\n\n"
+	"\t==> Use 'drbdadm create-md res' to initialize meta-data area. <==\n",
+	EM(ERR_AUTH_ALG) = "The 'cram-hmac-alg' you specified is not known in "
+	"the kernel. (Maybe you need to modprobe it, or modprobe hmac?)",
+	EM(ERR_AUTH_ALG_ND) = "The 'cram-hmac-alg' you specified is not a digest.",
+	EM(ERR_NOMEM) = "kmalloc() failed. Out of memory?",
+	EM(ERR_DISCARD) = "--discard-my-data not gllowed when primary.",
+	EM(ERR_DISK_CONFIGURED) = "Device is attached to a disk (use detach first)",
+	EM(ERR_NET_CONFIGURED) = "Device has a net-config (use disconnect first)",
+	EM(ERR_MANDATORY_TAG) = "UnknownMandatoryTag",
+	EM(ERR_MINOR_INVALID) = "Device minor not allocated",
+	EM(128) = "Resulting device state would be invalid",
+	EM(ERR_INTR) = "Interrupted by Signal",
+	EM(ERR_RESIZE_RESYNC) = "Resize not allowed during resync.",
+	EM(ERR_NO_PRIMARY) = "Need one Primary node to resize.",
+	EM(ERR_SYNC_AFTER) = "The sync-after minor number is invalid",
+	EM(ERR_SYNC_AFTER_CYCLE) = "This would cause a sync-after dependency cycle",
+	EM(ERR_PAUSE_IS_SET) = "Sync-pause flag is already set",
+	EM(ERR_PAUSE_IS_CLEAR) = "Sync-pause flag is already cleared",
+	EM(136) = "Disk state is lower than outdated",
+	EM(ERR_PACKET_NR) = "Kernel does not know how to handle your request.\n"
+	"Maybe API_VERSION mismatch?",
+	EM(ERR_NO_DISK) = "Device does not have a disk-config",
+	EM(ERR_NOT_PROTO_C) = "Protocol C required",
+	EM(ERR_NOMEM_BITMAP) = "vmalloc() failed. Out of memory?",
+	EM(ERR_INTEGRITY_ALG) = "The 'data-integrity-alg' you specified is not known in "
+	"the kernel. (Maybe you need to modprobe it, or modprobe hmac?)",
+	EM(ERR_INTEGRITY_ALG_ND) = "The 'data-integrity-alg' you specified is not a digest.",
+	EM(ERR_CPU_MASK_PARSE) = "Invalid cpu-mask.",
+	EM(ERR_VERIFY_ALG) = "VERIFYAlgNotAvail",
+	EM(ERR_VERIFY_ALG_ND) = "VERIFYAlgNotDigest",
+	EM(ERR_VERIFY_RUNNING) = "Can not change verify-alg while online verify runs",
+	EM(ERR_DATA_NOT_CURRENT) = "Can only attach to the data we lost last (see kernel log).",
+	EM(ERR_CONNECTED) = "Need to be StandAlone",
+	EM(ERR_CSUMS_ALG) = "CSUMSAlgNotAvail",
+	EM(ERR_CSUMS_ALG_ND) = "CSUMSAlgNotDigest",
+	EM(ERR_CSUMS_RESYNC_RUNNING) = "Can not change csums-alg while resync is in progress",
+	EM(ERR_PERM) = "Permission denied. CAP_SYS_ADMIN necessary",
+	EM(ERR_NEED_APV_93) = "Protocol version 93 required to use --assume-clean",
+	EM(ERR_STONITH_AND_PROT_A) = "Fencing policy resource-and-stonith only with prot B or C allowed",
+	EM(ERR_CONG_NOT_PROTO_A) = "on-congestion policy pull-ahead only with prot A allowed",
+	EM(ERR_PIC_AFTER_DEP) = "Sync-pause flag is already cleared.\n"
+	"Note: Resync pause caused by a local sync-after dependency.",
+	EM(ERR_PIC_PEER_DEP) = "Sync-pause flag is already cleared.\n"
+	"Note: Resync pause caused by the peer node.",
+};
+#define MAX_ERROR (sizeof(error_messages)/sizeof(*error_messages))
+const char * error_to_string(int err_no)
+{
+	const unsigned int idx = err_no - ERR_CODE_BASE;
+	if (idx >= MAX_ERROR) return "Unknown... maybe API_VERSION mismatch?";
+	return error_messages[idx];
+}
+#undef MAX_ERROR
+
+char *cmdname = NULL; /* "drbdsetup" for reporting in usage etc. */
+char *devname = NULL; /* "/dev/drbd12" for reporting in print_config_error */
+char *resname = NULL; /* for pretty printing in "status" only,
+			 taken from environment variable DRBD_RESOURCE */
+int debug_dump_argv = 0; /* enabled by setting DRBD_DEBUG_DUMP_ARGV in the environment */
+int lock_fd = -1;
+unsigned int cn_idx;
+
+static int dump_tag_list(unsigned short *tlc)
+{
+	enum drbd_tags tag;
+	unsigned int tag_nr;
+	int len;
+	int integer;
+	char bit;
+	uint64_t int64;
+	const char* string;
+	int found_unknown=0;
+
+	while( (tag = *tlc++ ) != TT_END) {
+		len = *tlc++;
+		if(tag == TT_REMOVED) goto skip;
+
+		tag_nr = tag_number(tag);
+		if(tag_nr<ARRAY_SIZE(tag_descriptions)) {
+			string = tag_descriptions[tag_nr].name;
+		} else {
+			string = "unknown tag";
+			found_unknown=1;
+		}
+		printf("# (%2d) %16s = ",tag_nr,string);
+		switch(tag_type(tag)) {
+		case TT_INTEGER:
+			integer = *(int*)tlc;
+			printf("(integer) %d",integer);
+			break;
+		case TT_INT64:
+			int64 = *(uint64_t*)tlc;
+			printf("(int64) %lld",(long long)int64);
+			break;
+		case TT_BIT:
+			bit = *(char*)tlc;
+			printf("(bit) %s", bit ? "on" : "off");
+			break;
+		case TT_STRING:
+			string = (char*)tlc;
+			printf("(string)'%s'", len ? string : "");
+			break;
+		}
+		printf(" \t[len: %u]\n",len);
+	skip:
+		tlc = (unsigned short*)((char*)tlc + len);
+	}
+
+	return found_unknown;
+}
+
+static struct drbd_tag_list *create_tag_list(int size)
+{
+	struct drbd_tag_list *tl;
+
+	tl = malloc(sizeof(struct drbd_tag_list));
+	tl->nl_header  = malloc(NLMSG_SPACE( sizeof(struct cn_msg) +
+					     sizeof(struct drbd_nl_cfg_req) +
+					     size) );
+	tl->cn_header = NLMSG_DATA(tl->nl_header);
+	tl->drbd_p_header = (struct drbd_nl_cfg_req*) tl->cn_header->data;
+	tl->tag_list_start = tl->drbd_p_header->tag_list;
+	tl->tag_list_cpos = tl->tag_list_start;
+	tl->tag_size = size;
+
+	return tl;
+}
+
+static void add_tag(struct drbd_tag_list *tl, short int tag, void *data, short int data_len)
+{
+	if(data_len > tag_descriptions[tag_number(tag)].max_len) {
+		fprintf(stderr, "The value for %s may only be %d byte long."
+			" You requested %d.\n",
+			tag_descriptions[tag_number(tag)].name,
+			tag_descriptions[tag_number(tag)].max_len,
+			data_len);
+		exit(20);
+	}
+
+	if( (tl->tag_list_cpos - tl->tag_list_start) + data_len
+	    > tl->tag_size ) {
+		fprintf(stderr, "Tag list size exceeded!\n");
+		exit(20);
+	}
+	put_unaligned(tag, tl->tag_list_cpos++);
+	put_unaligned(data_len, tl->tag_list_cpos++);
+	memcpy(tl->tag_list_cpos, data, data_len);
+	tl->tag_list_cpos = (unsigned short*)((char*)tl->tag_list_cpos + data_len);
+}
+
+static void free_tag_list(struct drbd_tag_list *tl)
+{
+	free(tl->nl_header);
+	free(tl);
+}
+
+static int conv_block_dev(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+{
+	struct stat sb;
+	int device_fd;
+	int err;
+
+	if ((device_fd = open(arg,O_RDWR))==-1) {
+		PERROR("Can not open device '%s'", arg);
+		return OTHER_ERROR;
+	}
+
+	if ( (err=fstat(device_fd, &sb)) ) {
+		PERROR("fstat(%s) failed", arg);
+		return OTHER_ERROR;
+	}
+
+	if(!S_ISBLK(sb.st_mode)) {
+		fprintf(stderr, "%s is not a block device!\n", arg);
+		return OTHER_ERROR;
+	}
+
+	close(device_fd);
+
+	add_tag(tl,ad->tag,arg,strlen(arg)+1); // include the null byte.
+
+	return NO_ERROR;
+}
+
+static int conv_md_idx(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+{
+	int idx;
+
+	if(!strcmp(arg,"internal")) idx = DRBD_MD_INDEX_FLEX_INT;
+	else if(!strcmp(arg,"flexible")) idx = DRBD_MD_INDEX_FLEX_EXT;
+	else idx = m_strtoll(arg,1);
+
+	add_tag(tl,ad->tag,&idx,sizeof(idx));
+
+	return NO_ERROR;
+}
+
+static void resolv6(char *name, struct sockaddr_in6 *addr)
+{
+	struct addrinfo hints, *res, *tmp;
+	int err;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_INET6;
+	hints.ai_socktype = SOCK_STREAM;
+	hints.ai_protocol = IPPROTO_TCP;
+
+	err = getaddrinfo(name, 0, &hints, &res);
+	if (err) {
+		fprintf(stderr, "getaddrinfo %s: %s\n", name, gai_strerror(err));
+		exit(20);
+	}
+
+	/* Yes, it is a list. We use only the first result. The loop is only
+	 * there to document that we know it is a list */
+	for (tmp = res; tmp; tmp = tmp->ai_next) {
+		memcpy(addr, tmp->ai_addr, sizeof(*addr));
+		break;
+	}
+	freeaddrinfo(res);
+	if (0) { /* debug output */
+		char ip[INET6_ADDRSTRLEN];
+		inet_ntop(AF_INET6, &addr->sin6_addr, ip, sizeof(ip));
+		fprintf(stderr, "%s -> %02x %04x %08x %s %08x\n",
+				name,
+				addr->sin6_family,
+				addr->sin6_port,
+				addr->sin6_flowinfo,
+				ip,
+				addr->sin6_scope_id);
+	}
+}
+
+static unsigned long resolv(const char* name)
+{
+	unsigned long retval;
+
+	if((retval = inet_addr(name)) == INADDR_NONE ) {
+		struct hostent *he;
+		he = gethostbyname(name);
+		if (!he) {
+			fprintf(stderr, "can not resolve the hostname: gethostbyname(%s): %s\n",
+					name, hstrerror(h_errno));
+			exit(20);
+		}
+		retval = ((struct in_addr *)(he->h_addr_list[0]))->s_addr;
+	}
+	return retval;
+}
+
+static void split_ipv6_addr(char **address, int *port)
+{
+	/* ipv6:[fe80::0234:5678:9abc:def1]:8000; */
+	char *b = strrchr(*address,']');
+	if (address[0][0] != '[' || b == NULL ||
+		(b[1] != ':' && b[1] != '\0')) {
+		fprintf(stderr, "unexpected ipv6 format: %s\n",
+				*address);
+		exit(20);
+	}
+
+	*b = 0;
+	*address += 1; /* skip '[' */
+	if (b[1] == ':')
+		*port = m_strtoll(b+2,1); /* b+2: "]:" */
+	else
+		*port = 7788; /* will we ever get rid of that default port? */
+}
+
+static void split_address(char* text, int *af, char** address, int* port)
+{
+	static struct { char* text; int af; } afs[] = {
+		{ "ipv4:", AF_INET  },
+		{ "ipv6:", AF_INET6 },
+		{ "sdp:",  AF_INET_SDP },
+		{ "ssocks:",  -1 },
+	};
+
+	unsigned int i;
+	char *b;
+
+	*af=AF_INET;
+	*address = text;
+	for (i=0; i<ARRAY_SIZE(afs); i++) {
+		if (!strncmp(text, afs[i].text, strlen(afs[i].text))) {
+			*af = afs[i].af;
+			*address = text + strlen(afs[i].text);
+			break;
+		}
+	}
+
+	if (*af == AF_INET6 && address[0][0] == '[')
+		return split_ipv6_addr(address, port);
+
+	if (*af == -1)
+		*af = get_af_ssocks(1);
+
+	b=strrchr(text,':');
+	if (b) {
+		*b = 0;
+		if (*af == AF_INET6) {
+			/* compatibility handling of ipv6 addresses,
+			 * in the style expected before drbd 8.3.9.
+			 * may go wrong without explicit port */
+			fprintf(stderr, "interpreting ipv6:%s:%s as ipv6:[%s]:%s\n",
+					*address, b+1, *address, b+1);
+		}
+		*port = m_strtoll(b+1,1);
+	} else
+		*port = 7788;
+
+}
+
+static int conv_address(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+{
+	static int mind_af_set = 0;
+	struct sockaddr_in addr;
+	struct sockaddr_in6 addr6;
+	int af, port;
+	char *address, bit=0;
+
+	split_address(arg, &af, &address, &port);
+
+	/* The mind_af tag is mandatory. I.e. the module may not silently ignore it.
+	   That means that an older DRBD module must fail the operation since it does
+	   not know the mind_af tag. We set it in case we use an other AF then AF_INET,
+	   so that the alternate AF is not silently ignored by the DRBD module */
+	if (af != AF_INET && !mind_af_set) {
+		add_tag(tl,T_mind_af,&bit,sizeof(bit));
+		mind_af_set=1;
+	}
+
+	if (af == AF_INET6) {
+		memset(&addr6, 0, sizeof(struct sockaddr_in6));
+		resolv6(address, &addr6);
+		addr6.sin6_port = htons(port);
+		add_tag(tl,ad->tag,&addr6,sizeof(addr6));
+	} else {
+		/* AF_INET, AF_SDP, AF_SSOCKS,
+		 * all use the IPv4 addressing scheme */
+		addr.sin_port = htons(port);
+		addr.sin_family = af;
+		addr.sin_addr.s_addr = resolv(address);
+		add_tag(tl,ad->tag,&addr,sizeof(addr));
+	}
+
+	return NO_ERROR;
+}
+
+static int conv_protocol(struct drbd_argument *ad, struct drbd_tag_list *tl, char* arg)
+{
+	int prot;
+
+	if(!strcmp(arg,"A") || !strcmp(arg,"a")) {
+		prot=DRBD_PROT_A;
+	} else if (!strcmp(arg,"B") || !strcmp(arg,"b")) {
+		prot=DRBD_PROT_B;
+	} else if (!strcmp(arg,"C") || !strcmp(arg,"c")) {
+		prot=DRBD_PROT_C;
+	} else {
+		fprintf(stderr, "'%s' is no valid protocol.\n", arg);
+		return OTHER_ERROR;
+	}
+
+	add_tag(tl,ad->tag,&prot,sizeof(prot));
+
+	return NO_ERROR;
+}
+
+static int conv_bit(struct drbd_option *od, struct drbd_tag_list *tl, char* arg __attribute((unused)))
+{
+	char bit=1;
+
+	add_tag(tl,od->tag,&bit,sizeof(bit));
+
+	return NO_ERROR;
+}
+
+/* It will only print the WARNING if the warn flag is set
+   with the _first_ call! */
+#define PROC_NET_AF_SCI_FAMILY "/proc/net/af_sci/family"
+#define PROC_NET_AF_SSOCKS_FAMILY "/proc/net/af_ssocks/family"
+
+static int get_af_ssocks(int warn_and_use_default)
+{
+	char buf[16];
+	int c, fd;
+	static int af = -1;
+
+	if (af > 0)
+		return af;
+
+	fd = open(PROC_NET_AF_SSOCKS_FAMILY, O_RDONLY);
+
+	if (fd < 0)
+		fd = open(PROC_NET_AF_SCI_FAMILY, O_RDONLY);
+
+	if (fd < 0) {
+		if (warn_and_use_default) {
+			fprintf(stderr, "open(" PROC_NET_AF_SSOCKS_FAMILY ") "
+				"failed: %m\n WARNING: assuming AF_SSOCKS = 27. "
+				"Socket creation may fail.\n");
+			af = 27;
+		}
+		return af;
+	}
+	c = read(fd, buf, sizeof(buf)-1);
+	if (c > 0) {
+		buf[c] = 0;
+		if (buf[c-1] == '\n')
+			buf[c-1] = 0;
+		af = m_strtoll(buf,1);
+	} else {
+		if (warn_and_use_default) {
+			fprintf(stderr, "read(" PROC_NET_AF_SSOCKS_FAMILY ") "
+				"failed: %m\n WARNING: assuming AF_SSOCKS = 27. "
+				"Socket creation may fail.\n");
+			af = 27;
+		}
+	}
+	close(fd);
+	return af;
+}
+
+static int conv_sndbuf(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
+{
+	int err = conv_numeric(od, tl, arg);
+	long long l = m_strtoll(arg, 0);
+	char bit = 0;
+
+	if (err != NO_ERROR || l != 0)
+		return err;
+	/* this is a mandatory bit,
+	 * to avoid newer userland to configure older modules with
+	 * a sndbuf size of zero, which would lead to Oops. */
+	add_tag(tl, T_auto_sndbuf_size, &bit, sizeof(bit));
+	return NO_ERROR;
+}
+
+static int conv_numeric(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
+{
+	const long long min = od->numeric_param.min;
+	const long long max = od->numeric_param.max;
+	const unsigned char unit_prefix = od->numeric_param.unit_prefix;
+	long long l;
+	int i;
+	char unit[] = {0,0};
+
+	l = m_strtoll(arg, unit_prefix);
+
+	if (min > l || l > max) {
+		unit[0] = unit_prefix > 1 ? unit_prefix : 0;
+		fprintf(stderr,"%s %s => %llu%s out of range [%llu..%llu]%s\n",
+			od->name, arg, l, unit, min, max, unit);
+		return OTHER_ERROR;
+	}
+
+	switch(tag_type(od->tag)) {
+	case TT_INT64:
+		add_tag(tl,od->tag,&l,sizeof(l));
+		break;
+	case TT_INTEGER:
+		i=l;
+		add_tag(tl,od->tag,&i,sizeof(i));
+		break;
+	default:
+		fprintf(stderr, "internal error in conv_numeric()\n");
+	}
+	return NO_ERROR;
+}
+
+static int conv_handler(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
+{
+	const char** handler_names = od->handler_param.handler_names;
+	const int number_of_handlers = od->handler_param.number_of_handlers;
+	int i;
+
+	for(i=0;i<number_of_handlers;i++) {
+		if(handler_names[i]==NULL) continue;
+		if(strcmp(arg,handler_names[i])==0) {
+			add_tag(tl,od->tag,&i,sizeof(i));
+			return NO_ERROR;
+		}
+	}
+
+	fprintf(stderr, "%s-handler '%s' not known\n", od->name, arg);
+	fprintf(stderr, "known %s-handlers:\n", od->name);
+	for (i = 0; i < number_of_handlers; i++) {
+		if (handler_names[i])
+			printf("\t%s\n", handler_names[i]);
+	}
+	return OTHER_ERROR;
+}
+
+static int conv_string(struct drbd_option *od, struct drbd_tag_list *tl, char* arg)
+{
+	add_tag(tl,od->tag,arg,strlen(arg)+1);
+
+	return NO_ERROR;
+}
+
+
+static struct option *	make_longoptions(struct drbd_option* od)
+{
+	/* room for up to N options,
+	 * plus set-defaults, create-device, and the terminating NULL */
+#define N 30
+	static struct option buffer[N+3];
+	int i=0;
+
+	while(od && od->name) {
+		buffer[i].name = od->name;
+		buffer[i].has_arg = tag_type(od->tag) == TT_BIT ?
+			no_argument : required_argument ;
+		buffer[i].flag = NULL;
+		buffer[i].val = od->short_name;
+		if (i++ == N) {
+			/* we must not leave this loop with i > N */
+			fprintf(stderr,"buffer in make_longoptions to small.\n");
+			abort();
+		}
+		od++;
+	}
+#undef N
+
+	// The two omnipresent options:
+	buffer[i].name = "set-defaults";
+	buffer[i].has_arg = 0;
+	buffer[i].flag = NULL;
+	buffer[i].val = '(';
+	i++;
+
+	buffer[i].name = "create-device";
+	buffer[i].has_arg = 0;
+	buffer[i].flag = NULL;
+	buffer[i].val = ')';
+	i++;
+
+	buffer[i].name = NULL;
+	buffer[i].has_arg = 0;
+	buffer[i].flag = NULL;
+	buffer[i].val = 0;
+
+	return buffer;
+}
+
+static struct drbd_option *find_opt_by_short_name(struct drbd_option *od, int c)
+{
+	if(!od) return NULL;
+	while(od->name) {
+		if(od->short_name == c) return od;
+		od++;
+	}
+
+	return NULL;
+}
+
+/* prepends global devname to output (if any) */
+static int print_config_error(int err_no)
+{
+	int rv=0;
+
+	if (err_no == NO_ERROR || err_no == SS_SUCCESS)
+		return 0;
+	if (err_no == OTHER_ERROR)
+		return 20;
+
+	if ( ( err_no >= AFTER_LAST_ERR_CODE || err_no <= ERR_CODE_BASE ) &&
+	     ( err_no > SS_CW_NO_NEED || err_no <= SS_AFTER_LAST_ERROR) ) {
+		fprintf(stderr,"Error code %d unknown.\n"
+			"You should update the drbd userland tools.\n",err_no);
+		rv = 20;
+	} else {
+		if(err_no > ERR_CODE_BASE ) {
+			fprintf(stderr,"%s: Failure: (%d) %s\n",
+				devname, err_no, error_to_string(err_no));
+			rv = 10;
+		} else if (err_no == SS_UNKNOWN_ERROR) {
+			fprintf(stderr,"%s: State change failed: (%d)"
+				"unknown error.\n", devname, err_no);
+			rv = 11;
+		} else if (err_no > SS_TWO_PRIMARIES) {
+			// Ignore SS_SUCCESS, SS_NOTHING_TO_DO, SS_CW_Success...
+		} else {
+			fprintf(stderr,"%s: State change failed: (%d) %s\n",
+				devname, err_no, drbd_set_st_err_str(err_no));
+			if (err_no == SS_NO_UP_TO_DATE_DISK) {
+				/* all available disks are inconsistent,
+				 * or I am consistent, but cannot outdate the peer. */
+				rv = 17;
+			} else if (err_no == SS_LOWER_THAN_OUTDATED) {
+				/* was inconsistent anyways */
+				rv = 5;
+			} else if (err_no == SS_NO_LOCAL_DISK) {
+				/* Can not start resync, no local disks, try with drbdmeta */
+				rv = 16;
+			} else {
+				rv = 11;
+			}
+		}
+	}
+	return rv;
+}
+
+#define RCV_SIZE NLMSG_SPACE(sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply))
+
+static void warn_print_excess_args(int argc, char **argv, int i)
+{
+	fprintf(stderr, "Excess arguments:");
+	for (; i < argc; i++)
+		fprintf(stderr, " %s", argv[i]);
+	printf("\n");
+}
+
+static void dump_argv(int argc, char **argv, int first_non_option, int n_known_args)
+{
+	int i;
+	if (!debug_dump_argv)
+		return;
+	fprintf(stderr, ",-- ARGV dump (optind %d, known_args %d, argc %u):\n",
+		first_non_option, n_known_args, argc);
+	for (i = 0; i < argc; i++) {
+		if (i == 1)
+			fprintf(stderr, "-- consumed options:");
+		if (i == first_non_option)
+			fprintf(stderr, "-- known args:");
+		if (i == (first_non_option + n_known_args))
+			fprintf(stderr, "-- unexpected args:");
+		fprintf(stderr, "| %2u: %s\n", i, argv[i]);
+	}
+	fprintf(stderr, "`--\n");
+}
+
+static int _generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+{
+	char buffer[ RCV_SIZE ];
+	struct drbd_nl_cfg_reply *reply;
+	struct drbd_argument *ad = cm->cp.args;
+	struct drbd_option *od;
+	struct option *lo;
+	struct drbd_tag_list *tl;
+	int c,i=1,rv=NO_ERROR,sk_nl;
+	int flags=0;
+	int n_args;
+
+	tl = create_tag_list(4096);
+
+	while(ad && ad->name) {
+		if(argc < i+1) {
+			fprintf(stderr,"Missing argument '%s'\n", ad->name);
+			print_command_usage(cm-commands, "",FULL);
+			rv = OTHER_ERROR;
+			goto error;
+		}
+		rv = ad->convert_function(ad,tl,argv[i++]);
+		if (rv != NO_ERROR)
+			goto error;
+		ad++;
+	}
+	n_args = i - 1;
+
+	lo = make_longoptions(cm->cp.options);
+	if (!lo) {
+		static struct option none[] = { { } };
+		lo = none;
+	}
+	for(;;) {
+		c = getopt_long(argc, argv, make_optstring(lo), lo, 0);
+		if (c == -1)
+			break;
+		od = find_opt_by_short_name(cm->cp.options,c);
+		if (od)
+			rv = od->convert_function(od,tl,optarg);
+		else {
+			if(c=='(') flags |= DRBD_NL_SET_DEFAULTS;
+			else if(c==')') flags |= DRBD_NL_CREATE_DEVICE;
+			else {
+				rv = OTHER_ERROR;
+				goto error;
+			}
+		}
+		if (rv != NO_ERROR)
+			goto error;
+	}
+
+	/* argc should be cmd + n options + n args;
+	 * if it is more, we did not understand some */
+	if (n_args + optind < argc) {
+		warn_print_excess_args(argc, argv, optind + n_args);
+		rv = OTHER_ERROR;
+		goto error;
+	}
+
+	dump_argv(argc, argv, optind, i - 1);
+
+	add_tag(tl,TT_END,NULL,0); // close the tag list
+
+	if(rv == NO_ERROR) {
+		//dump_tag_list(tl->tag_list_start);
+		int received;
+		sk_nl = open_cn();
+		if (sk_nl < 0) {
+			rv = OTHER_ERROR;
+			goto error;
+		}
+
+		tl->drbd_p_header->packet_type = cm->packet_id;
+		tl->drbd_p_header->drbd_minor = minor;
+		tl->drbd_p_header->flags = flags;
+
+		received = call_drbd(sk_nl,tl, (struct nlmsghdr*)buffer,RCV_SIZE,NL_TIME);
+
+		close_cn(sk_nl);
+
+		if (received >= 0) {
+			reply = (struct drbd_nl_cfg_reply *)
+				((struct cn_msg *)NLMSG_DATA(buffer))->data;
+			rv = reply->ret_code;
+		}
+	}
+error:
+	free_tag_list(tl);
+
+	return rv;
+}
+
+static int generic_config_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+{
+	return print_config_error(_generic_config_cmd(cm, minor, argc, argv));
+}
+
+#define ASSERT(exp) if (!(exp)) \
+		fprintf(stderr,"ASSERT( " #exp " ) in %s:%d\n", __FILE__,__LINE__);
+
+static void show_numeric(struct drbd_option *od, unsigned short* tp)
+{
+	long long val;
+	const unsigned char unit_prefix = od->numeric_param.unit_prefix;
+
+	switch(tag_type(get_unaligned(tp++))) {
+	case TT_INTEGER:
+		ASSERT( get_unaligned(tp++) == sizeof(int) );
+		val = get_unaligned((int*)tp);
+		break;
+	case TT_INT64:
+		ASSERT( get_unaligned(tp++) == sizeof(uint64_t) );
+		val = get_unaligned((uint64_t*)tp);
+		break;
+	default:
+		ASSERT(0);
+		val=0;
+	}
+
+	if(unit_prefix == 1) printf("\t%-16s\t%lld",od->name,val);
+	else printf("\t%-16s\t%lld%c",od->name,val,unit_prefix);
+	if(val == (long long) od->numeric_param.def) printf(" _is_default");
+	if(od->numeric_param.unit) {
+		printf("; # %s\n",od->numeric_param.unit);
+	} else {
+		printf(";\n");
+	}
+}
+
+static void show_handler(struct drbd_option *od, unsigned short* tp)
+{
+	const char** handler_names = od->handler_param.handler_names;
+	int i;
+
+	ASSERT( tag_type(get_unaligned(tp++)) == TT_INTEGER );
+	ASSERT( get_unaligned(tp++) == sizeof(int) );
+	i = get_unaligned((int*)tp);
+	printf("\t%-16s\t%s",od->name,handler_names[i]);
+	if( i == (long long)od->numeric_param.def) printf(" _is_default");
+	printf(";\n");
+}
+
+static void show_bit(struct drbd_option *od, unsigned short* tp)
+{
+	ASSERT( tag_type(get_unaligned(tp++)) == TT_BIT );
+	ASSERT( get_unaligned(tp++) == sizeof(char) );
+	if(get_unaligned((char*)tp)) printf("\t%-16s;\n",od->name);
+}
+
+static void show_string(struct drbd_option *od, unsigned short* tp)
+{
+	ASSERT( tag_type(get_unaligned(tp++)) == TT_STRING );
+	if( get_unaligned(tp++) > 0 && get_unaligned((char*)tp)) printf("\t%-16s\t\"%s\";\n",od->name,(char*)tp);
+}
+
+static unsigned short *look_for_tag(unsigned short *tlc, unsigned short tag)
+{
+	enum drbd_tags t;
+	int len;
+
+	while( (t = get_unaligned(tlc)) != TT_END ) {
+		if(t == tag) return tlc;
+		tlc++;
+		len = get_unaligned(tlc++);
+		tlc = (unsigned short*)((char*)tlc + len);
+	}
+	return NULL;
+}
+
+static void print_options(struct drbd_option *od, unsigned short *tlc, const char* sect_name)
+{
+	unsigned short *tp;
+	int opened = 0;
+
+	while(od->name) {
+		tp = look_for_tag(tlc,od->tag);
+		if(tp) {
+			if(!opened) {
+				opened=1;
+				printf("%s {\n",sect_name);
+			}
+			od->show_function(od,tp);
+			put_unaligned(TT_REMOVED, tp);
+		}
+		od++;
+	}
+	if(opened) {
+		printf("}\n");
+	}
+}
+
+
+static void consume_everything(unsigned short *tlc)
+{
+	enum drbd_tags t;
+	int len;
+	while( (t = get_unaligned(tlc)) != TT_END ) {
+		put_unaligned(TT_REMOVED, tlc++);
+		len = get_unaligned(tlc++);
+		tlc = (unsigned short*)((char*)tlc + len);
+	}
+}
+
+static int consume_tag_blob(enum drbd_tags tag, unsigned short *tlc,
+		     char** val, unsigned int* len)
+{
+	unsigned short *tp;
+	tp = look_for_tag(tlc,tag);
+	if(tp) {
+		put_unaligned(TT_REMOVED, tp++);
+		*len = get_unaligned(tp++);
+		*val = (char*)tp;
+		return 1;
+	}
+	return 0;
+}
+
+static int consume_tag_string(enum drbd_tags tag, unsigned short *tlc, char** val)
+{
+	unsigned short *tp;
+	tp = look_for_tag(tlc,tag);
+	if(tp) {
+		put_unaligned(TT_REMOVED, tp++);
+		if( get_unaligned(tp++) > 0 )
+			*val = (char*)tp;
+		else
+			*val = "";
+		return 1;
+	}
+	return 0;
+}
+
+static int consume_tag_int(enum drbd_tags tag, unsigned short *tlc, int* val)
+{
+	unsigned short *tp;
+	tp = look_for_tag(tlc,tag);
+	if(tp) {
+		put_unaligned(TT_REMOVED, tp++);
+		tp++;
+		*val = get_unaligned((int *)tp);
+		return 1;
+	}
+	return 0;
+}
+
+static int consume_tag_u64(enum drbd_tags tag, unsigned short *tlc, unsigned long long* val)
+{
+	unsigned short *tp;
+	unsigned short len;
+	tp = look_for_tag(tlc, tag);
+	if(tp) {
+		put_unaligned(TT_REMOVED, tp++);
+		len = get_unaligned(tp++);
+		/* check the data size.
+		 * actually it has to be long long, but I'm paranoid */
+		if (len == sizeof(int))
+			*val = get_unaligned((unsigned int*)tp);
+		else if (len == sizeof(long))
+			*val = get_unaligned((unsigned long *)tp);
+		else if (len == sizeof(long long))
+			*val = get_unaligned((unsigned long long *)tp);
+		else {
+			fprintf(stderr, "%s: unexpected tag len: %u\n",
+					__func__ , len);
+			return 0;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int consume_tag_bit(enum drbd_tags tag, unsigned short *tlc, int* val)
+{
+	unsigned short *tp;
+	tp = look_for_tag(tlc,tag);
+	if(tp) {
+		put_unaligned(TT_REMOVED, tp++);
+		tp++;
+		*val = (int)(*(char *)tp);
+		return 1;
+	}
+	return 0;
+}
+
+static int generic_get_cmd(struct drbd_cmd *cm, unsigned minor, int argc,
+		    char **argv __attribute((unused)))
+{
+	char buffer[ 4096 ];
+	struct drbd_tag_list *tl;
+	struct drbd_nl_cfg_reply *reply;
+	int sk_nl,rv;
+	int ignore_minor_not_known;
+	int dummy;
+
+	if (argc > 1) {
+		warn_print_excess_args(argc, argv, 1);
+		return 20;
+	}
+
+	dump_argv(argc, argv, 1, 0);
+
+	tl = create_tag_list(2);
+	add_tag(tl,TT_END,NULL,0); // close the tag list
+
+	sk_nl = open_cn();
+	if(sk_nl < 0) return 20;
+
+	tl->drbd_p_header->packet_type = cm->packet_id;
+	tl->drbd_p_header->drbd_minor = minor;
+	tl->drbd_p_header->flags = 0;
+
+	memset(buffer,0,sizeof(buffer));
+	call_drbd(sk_nl,tl, (struct nlmsghdr*)buffer,4096,NL_TIME);
+
+	close_cn(sk_nl);
+	reply = (struct drbd_nl_cfg_reply *)
+		((struct cn_msg *)NLMSG_DATA(buffer))->data;
+
+	/* if there was an error, report and abort --
+	 * unless it was "this device is not there",
+	 * and command was "status" */
+	ignore_minor_not_known =
+		cm->gp.show_function == status_xml_scmd ||
+		cm->gp.show_function == sh_status_scmd;
+	if (reply->ret_code != NO_ERROR &&
+	   !(reply->ret_code == ERR_MINOR_INVALID && ignore_minor_not_known))
+		return print_config_error(reply->ret_code);
+
+	rv = cm->gp.show_function(cm,minor,reply->tag_list);
+
+	/* in case cm->packet_id == P_get_state, and the gp.show_function did
+	 * nothing with the sync_progress info, consume it here, so it won't
+	 * confuse users because it gets dumped below. */
+	consume_tag_int(T_sync_progress, reply->tag_list, &dummy);
+
+	if(dump_tag_list(reply->tag_list)) {
+		printf("# Found unknown tags, you should update your\n"
+		       "# userland tools\n");
+	}
+
+	return rv;
+}
+
+static char *af_to_str(int af)
+{
+	if (af == AF_INET)
+		return "ipv4";
+	else if (af == AF_INET6)
+		return "ipv6";
+	/* AF_SSOCKS typically is 27, the same as AF_INET_SDP.
+	 * But with warn_and_use_default = 0, it will stay at -1 if not available.
+	 * Just keep the test on ssocks before the one on SDP (which is hard-coded),
+	 * and all should be fine.  */
+	else if (af == get_af_ssocks(0))
+		return "ssocks";
+	else if (af == AF_INET_SDP)
+		return "sdp";
+	else return "unknown";
+}
+
+static void show_address(void* address, int addr_len)
+{
+	struct sockaddr     *addr;
+	struct sockaddr_in  *addr4;
+	struct sockaddr_in6 *addr6;
+	char buffer[INET6_ADDRSTRLEN];
+
+	addr = (struct sockaddr *)address;
+	if (addr->sa_family == AF_INET
+	|| addr->sa_family == get_af_ssocks(0)
+	|| addr->sa_family == AF_INET_SDP) {
+		addr4 = (struct sockaddr_in *)address;
+		printf("\taddress\t\t\t%s %s:%d;\n",
+		       af_to_str(addr4->sin_family),
+		       inet_ntoa(addr4->sin_addr),
+		       ntohs(addr4->sin_port));
+	} else if (addr->sa_family == AF_INET6) {
+		addr6 = (struct sockaddr_in6 *)address;
+		printf("\taddress\t\t\t%s [%s]:%d;\n",
+		       af_to_str(addr6->sin6_family),
+		       inet_ntop(addr6->sin6_family, &addr6->sin6_addr, buffer, INET6_ADDRSTRLEN),
+		       ntohs(addr6->sin6_port));
+	} else {
+		printf("\taddress\t\t\t[unknown af=%d, len=%d]\n", addr->sa_family, addr_len);
+	}
+}
+
+static int show_scmd(struct drbd_cmd *cm, unsigned minor, unsigned short *rtl)
+{
+	int idx = idx;
+	char *str = NULL, *backing_dev, *address;
+	unsigned int addr_len = 0;
+
+	// find all commands that have options and print those...
+	for ( cm = commands ; cm < commands + ARRAY_SIZE(commands) ; cm++ ) {
+		if(cm->function == generic_config_cmd && cm->cp.options )
+			print_options(cm->cp.options, rtl, cm->cmd);
+	}
+
+	// start of spaghetti code...
+	if(consume_tag_int(T_wire_protocol,rtl,&idx))
+		printf("protocol %c;\n",'A'+idx-1);
+	backing_dev = address = NULL;
+	consume_tag_string(T_backing_dev,rtl,&backing_dev);
+	consume_tag_blob(T_my_addr, rtl, &address, &addr_len);
+	if(backing_dev || address) {
+		printf("_this_host {\n");
+		printf("\tdevice\t\t\tminor %d;\n",minor);
+		if(backing_dev) {
+			printf("\tdisk\t\t\t\"%s\";\n",backing_dev);
+			consume_tag_int(T_meta_dev_idx,rtl,&idx);
+			consume_tag_string(T_meta_dev,rtl,&str);
+			switch(idx) {
+			case DRBD_MD_INDEX_INTERNAL:
+			case DRBD_MD_INDEX_FLEX_INT:
+				printf("\tmeta-disk\t\tinternal;\n");
+				break;
+			case DRBD_MD_INDEX_FLEX_EXT:
+				printf("\tflexible-meta-disk\t\"%s\";\n",str);
+				break;
+			default:
+				printf("\tmeta-disk\t\t\"%s\" [ %d ];\n",str,
+				       idx);
+			 }
+		}
+		if(address)
+			show_address(address, addr_len);
+		printf("}\n");
+	}
+
+	if(consume_tag_blob(T_peer_addr, rtl, &address, &addr_len)) {
+		printf("_remote_host {\n");
+		show_address(address, addr_len);
+		printf("}\n");
+	}
+	consume_tag_bit(T_mind_af, rtl, &idx); /* consume it, its value has no relevance */
+	consume_tag_bit(T_auto_sndbuf_size, rtl, &idx); /* consume it, its value has no relevance */
+
+	return 0;
+}
+
+static int lk_bdev_scmd(struct drbd_cmd *cm, unsigned minor,
+			unsigned short *rtl)
+{
+	struct bdev_info bd = { 0, };
+	char *backing_dev = NULL;
+	uint64_t bd_size;
+	int fd;
+	int idx = idx;
+	int index_valid = 0;
+
+	consume_tag_string(T_backing_dev, rtl, &backing_dev);
+	index_valid = consume_tag_int(T_meta_dev_idx, rtl, &idx);
+
+	/* consume everything */
+	consume_everything(rtl);
+
+	if (!backing_dev) {
+		fprintf(stderr, "Has no disk config, try with drbdmeta.\n");
+		return 1;
+	}
+
+	if (!index_valid) {
+		/* cannot happen, right? ;-) */
+		fprintf(stderr, "No meta data index!?\n");
+		return 1;
+	}
+
+	if (idx >= 0 || idx == DRBD_MD_INDEX_FLEX_EXT) {
+		lk_bdev_delete(minor);
+		return 0;
+	}
+
+	fd = open(backing_dev, O_RDONLY);
+	if (fd == -1) {
+		fprintf(stderr, "Could not open %s: %m.\n", backing_dev);
+		return 1;
+	}
+	bd_size = bdev_size(fd);
+	close(fd);
+
+	if (lk_bdev_load(minor, &bd) == 0 &&
+	    bd.bd_size == bd_size &&
+	    bd.bd_name && !strcmp(bd.bd_name, backing_dev))
+		return 0;	/* nothing changed. */
+
+	bd.bd_size = bd_size;
+	bd.bd_name = backing_dev;
+	lk_bdev_save(minor, &bd);
+
+	return 0;
+}
+
+static int status_xml_scmd(struct drbd_cmd *cm __attribute((unused)),
+		unsigned minor, unsigned short *rtl)
+{
+	union drbd_state state = { .i = 0 };
+	int synced = 0;
+
+	if (!consume_tag_int(T_state_i,rtl,(int*)&state.i)) {
+		printf( "<!-- resource minor=\"%u\"", minor);
+		if (resname)
+			printf(" name=\"%s\"", resname);
+		printf(" not available or not yet created -->\n");
+		return 0;
+	}
+	printf("<resource minor=\"%u\"", minor);
+	if (resname)
+		printf(" name=\"%s\"", resname);
+
+	if (state.conn == C_STANDALONE && state.disk == D_DISKLESS) {
+		printf(" cs=\"Unconfigured\" />\n");
+		return 0;
+	}
+
+	printf( /* connection state */
+		" cs=\"%s\""
+		/* role */
+		" ro1=\"%s\" ro2=\"%s\""
+		/* disk state */
+		" ds1=\"%s\" ds2=\"%s\"",
+	       drbd_conn_str(state.conn),
+	       drbd_role_str(state.role),
+	       drbd_role_str(state.peer),
+	       drbd_disk_str(state.disk),
+	       drbd_disk_str(state.pdsk));
+
+	/* io suspended ? */
+	if (state.susp)
+		printf(" suspended");
+	/* reason why sync is paused */
+	if (state.aftr_isp)
+		printf(" aftr_isp");
+	if (state.peer_isp)
+		printf(" peer_isp");
+	if (state.user_isp)
+		printf(" user_isp");
+
+	if (consume_tag_int(T_sync_progress, rtl, &synced))
+		printf(" resynced_percent=\"%i.%i\"", synced / 10, synced % 10);
+
+	printf(" />\n");
+	return 0;
+}
+
+static int sh_status_scmd(struct drbd_cmd *cm __attribute((unused)),
+		unsigned minor, unsigned short *rtl)
+{
+/* variable prefix; maybe rather make that a command line parameter?
+ * or use "drbd_sh_status"? */
+#define _P ""
+	union drbd_state state = { .i = 0 };
+	int available = 0;
+	int synced = 0;
+
+	printf("%s_minor=%u\n", _P, minor);
+	printf("%s_res_name=%s\n", _P, shell_escape(resname ?: "UNKNOWN"));
+
+	available = consume_tag_int(T_state_i,rtl,(int*)&state.i);
+
+	if (state.conn == C_STANDALONE && state.disk == D_DISKLESS) {
+		printf("%s_known=%s\n\n", _P,
+			available ? "Unconfigured"
+			          : "NA # not available or not yet created");
+		printf("%s_cstate=Unconfigured\n", _P);
+		printf("%s_role=\n", _P);
+		printf("%s_peer=\n", _P);
+		printf("%s_disk=\n", _P);
+		printf("%s_pdsk=\n", _P);
+		printf("%s_flags_susp=\n", _P);
+		printf("%s_flags_aftr_isp=\n", _P);
+		printf("%s_flags_peer_isp=\n", _P);
+		printf("%s_flags_user_isp=\n", _P);
+		printf("%s_resynced_percent=\n", _P);
+	} else {
+		printf( "%s_known=Configured\n\n"
+			/* connection state */
+			"%s_cstate=%s\n"
+			/* role */
+			"%s_role=%s\n"
+			"%s_peer=%s\n"
+			/* disk state */
+			"%s_disk=%s\n"
+			"%s_pdsk=%s\n\n",
+			_P,
+			_P, drbd_conn_str(state.conn),
+			_P, drbd_role_str(state.role),
+			_P, drbd_role_str(state.peer),
+			_P, drbd_disk_str(state.disk),
+			_P, drbd_disk_str(state.pdsk));
+
+		/* io suspended ? */
+		printf("%s_flags_susp=%s\n", _P, state.susp ? "1" : "");
+		/* reason why sync is paused */
+		printf("%s_flags_aftr_isp=%s\n", _P, state.aftr_isp ? "1" : "");
+		printf("%s_flags_peer_isp=%s\n", _P, state.peer_isp ? "1" : "");
+		printf("%s_flags_user_isp=%s\n\n", _P, state.user_isp ? "1" : "");
+
+		printf("%s_resynced_percent=", _P);
+
+		if (consume_tag_int(T_sync_progress, rtl, &synced))
+			printf("%i.%i\n", synced / 10, synced % 10);
+		else
+			printf("\n");
+	}
+	printf("\n%s_sh_status_process\n\n\n", _P);
+
+	fflush(stdout);
+	return 0;
+#undef _P
+}
+
+static int role_scmd(struct drbd_cmd *cm __attribute((unused)),
+	       unsigned minor __attribute((unused)),
+	       unsigned short *rtl)
+{
+	union drbd_state state = { .i = 0 };
+
+	if (!strcmp(cm->cmd, "state")) {
+		fprintf(stderr, "'%s ... state' is deprecated, use '%s ... role' instead.\n",
+			cmdname, cmdname);
+	}
+
+	consume_tag_int(T_state_i,rtl,(int*)&state.i);
+	if ( state.conn == C_STANDALONE &&
+	     state.disk == D_DISKLESS) {
+		printf("Unconfigured\n");
+	} else {
+		printf("%s/%s\n",drbd_role_str(state.role),drbd_role_str(state.peer));
+	}
+	return 0;
+}
+
+static int cstate_scmd(struct drbd_cmd *cm __attribute((unused)),
+		unsigned minor __attribute((unused)),
+		unsigned short *rtl)
+{
+	union drbd_state state = { .i = 0 };
+	consume_tag_int(T_state_i,rtl,(int*)&state.i);
+	if ( state.conn == C_STANDALONE &&
+	     state.disk == D_DISKLESS) {
+		printf("Unconfigured\n");
+	} else {
+		printf("%s\n",drbd_conn_str(state.conn));
+	}
+	return 0;
+}
+
+static int dstate_scmd(struct drbd_cmd *cm __attribute((unused)),
+		unsigned minor __attribute((unused)),
+		unsigned short *rtl)
+{
+	union drbd_state state = { .i = 0 };
+	consume_tag_int(T_state_i,rtl,(int*)&state.i);
+	if ( state.conn == C_STANDALONE &&
+	     state.disk == D_DISKLESS) {
+		printf("Unconfigured\n");
+	} else {
+		printf("%s/%s\n",drbd_disk_str(state.disk),drbd_disk_str(state.pdsk));
+	}
+	return 0;
+}
+
+static int uuids_scmd(struct drbd_cmd *cm,
+	       unsigned minor __attribute((unused)),
+	       unsigned short *rtl)
+{
+	uint64_t uuids[UI_SIZE];
+	char *tl_uuids;
+	int flags = flags;
+	unsigned int len;
+
+	if (!consume_tag_blob(T_uuids, rtl, &tl_uuids, &len)) {
+		fprintf(stderr,"Reply payload did not carry an uuid-tag,\n"
+			"Probably the device has no disk!\n");
+		return 1;
+	}
+
+	consume_tag_int(T_uuids_flags,rtl,&flags);
+	if( len == UI_SIZE * sizeof(uint64_t)) {
+		memcpy(uuids, tl_uuids, len);
+		if(!strcmp(cm->cmd,"show-gi")) {
+			dt_pretty_print_uuids(uuids,flags);
+		} else if(!strcmp(cm->cmd,"get-gi")) {
+			dt_print_uuids(uuids,flags);
+		} else {
+			ASSERT( 0 );
+		}
+	} else {
+		fprintf(stderr, "Unexpected length of T_uuids tag. "
+			"You should upgrade your userland tools\n");
+	}
+	return 0;
+}
+
+static struct drbd_cmd *find_cmd_by_name(char *name)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		if (!strcmp(name, commands[i].cmd)) {
+			return commands + i;
+		}
+	}
+	return NULL;
+}
+
+static int down_cmd(struct drbd_cmd *cm, unsigned minor, int argc, char **argv)
+{
+	int rv;
+	int success;
+
+	if(argc > 1) {
+		fprintf(stderr,"Ignoring excess arguments\n");
+	}
+
+	cm = find_cmd_by_name("secondary");
+	rv = _generic_config_cmd(cm, minor, argc, argv); // No error messages
+	if (rv == ERR_MINOR_INVALID)
+		return 0;
+	success = (rv >= SS_SUCCESS && rv < ERR_CODE_BASE) || rv == NO_ERROR;
+	if (!success)
+		return print_config_error(rv);
+	cm = find_cmd_by_name("disconnect");
+	cm->function(cm,minor,argc,argv);
+	cm = find_cmd_by_name("detach");
+	rv = cm->function(cm,minor,argc,argv);
+	return rv;
+}
+
+
+static void print_digest(const char* label, const int len, const unsigned char *hash)
+{
+	int i;
+	printf("\t%s: ", label);
+	for (i = 0; i < len; i++)
+		printf("%02x",hash[i]);
+	printf("\n");
+}
+
+static char printable_or_dot(char c)
+{
+	return (' ' < c && c <= '~') ? c : '.';
+}
+
+static void print_hex_line(int offset, unsigned char *data)
+{
+
+	printf(	" %04x:"
+		" %02x %02x %02x %02x %02x %02x %02x %02x "
+		" %02x %02x %02x %02x %02x %02x %02x %02x"
+		"  %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c\n",
+		offset,
+		data[0], data[1], data[2], data[3],
+		data[4], data[5], data[6], data[7],
+		data[8], data[9], data[10], data[11],
+		data[12], data[13], data[14], data[15],
+		printable_or_dot(data[0]), printable_or_dot(data[1]),
+		printable_or_dot(data[2]), printable_or_dot(data[3]),
+		printable_or_dot(data[4]), printable_or_dot(data[5]),
+		printable_or_dot(data[6]), printable_or_dot(data[7]),
+		printable_or_dot(data[8]), printable_or_dot(data[9]),
+		printable_or_dot(data[10]), printable_or_dot(data[11]),
+		printable_or_dot(data[12]), printable_or_dot(data[13]),
+		printable_or_dot(data[14]), printable_or_dot(data[15]));
+}
+
+/* successive identical lines are collapsed into just printing one star */
+static void print_hex_dump(int len, void *data)
+{
+	int i;
+	int star = 0;
+	for (i = 0; i < len-15; i += 16) {
+		if (i == 0 || memcmp(data + i, data + i - 16, 16)) {
+			print_hex_line(i, data + i);
+			star = 0;
+		} else if (!star)  {
+			printf(" *\n");
+			star = 1;
+		}
+	}
+	/* yes, I ignore remainders of len not modulo 16 here.
+	 * so what, usage is currently to dump bios, which are
+	 * multiple of 512. */
+	/* for good measure, print the total size as offset now,
+	 * last line may have been a '*' */
+	printf(" %04x.\n", len);
+}
+
+static void print_dump_ee(struct drbd_nl_cfg_reply *reply)
+{
+	unsigned long long sector = -1ULL;
+	unsigned long long block_id = 0;
+	char *reason = "UNKNOWN REASON";
+	char *dig_in = NULL;
+	char *dig_vv = NULL;
+	unsigned int dgs_in = 0, dgs_vv = 0;
+	unsigned int size = 0;
+	char *data = NULL;
+
+	if (!consume_tag_string(T_dump_ee_reason, reply->tag_list, &reason))
+		printf("\tno reason?\n");
+	if (!consume_tag_blob(T_seen_digest, reply->tag_list, &dig_in, &dgs_in))
+		printf("\tno digest in?\n");
+	if (!consume_tag_blob(T_calc_digest, reply->tag_list, &dig_vv, &dgs_vv))
+		printf("\tno digest out?\n");
+	if (!consume_tag_u64(T_ee_sector, reply->tag_list, &sector))
+		printf("\tno sector?\n");
+	if (!consume_tag_u64(T_ee_block_id, reply->tag_list, &block_id))
+		printf("\tno block_id?\n");
+	if (!consume_tag_blob(T_ee_data, reply->tag_list, &data, &size))
+		printf("\tno data?\n");
+
+	printf("\tdumping ee, reason: %s\n", reason);
+	printf("\tsector: %llu block_id: 0x%llx size: %u\n",
+			sector, block_id, size);
+	
+	/* "input sanitation". Did I mention yet that I'm paranoid? */
+	if (!data) size = 0;
+	if (!dig_in) dgs_in = 0;
+	if (!dig_vv) dgs_vv = 0;
+	if (dgs_in > SHARED_SECRET_MAX) dgs_in = SHARED_SECRET_MAX;
+	if (dgs_vv > SHARED_SECRET_MAX) dgs_vv = SHARED_SECRET_MAX;
+
+	print_digest("received digest", dgs_in, (unsigned char*)dig_in);
+	print_digest("verified digest", dgs_vv, (unsigned char*)dig_vv);
+
+	/* dump at most 32 K */
+	if (size > 0x8000) {
+		size = 0x8000;
+		printf("\tWARNING truncating data to %u!\n", 0x8000);
+	}
+	print_hex_dump(size,data);
+}
+
+/* this is not pretty; but it's api... ;-( */
+const char *pretty_print_return_code(int e)
+{
+	return
+		e == NO_ERROR ? "No error" :
+		e > ERR_CODE_BASE ?
+			error_to_string(e) :
+		e > SS_AFTER_LAST_ERROR && e <= SS_TWO_PRIMARIES ?
+			drbd_set_st_err_str(e) :
+		e == SS_CW_NO_NEED ? "Cluster wide state change: nothing to do" :
+		e == SS_CW_SUCCESS ? "Cluster wide state change successful" :
+		e == SS_NOTHING_TO_DO ? "State change: nothing to do" :
+		e == SS_SUCCESS ? "State change successful" :
+		e == SS_UNKNOWN_ERROR ? "Unspecified error" :
+		"Unknown return code";
+}
+
+static int print_broadcast_events(unsigned int seq, int u __attribute((unused)),
+			   struct drbd_nl_cfg_reply *reply)
+{
+	union drbd_state state;
+	char* str;
+	int synced = 0;
+
+	switch (reply->packet_type) {
+	case 0: /* used to be this way in drbd_nl.c for some responses :-( */
+	case P_return_code_only: /* used by drbd_nl.c for most "empty" responses */
+		printf("%u ZZ %d ret_code: %d %s\n", seq, reply->minor,
+			reply->ret_code,
+			pretty_print_return_code(reply->ret_code));
+		break;
+	case P_get_state:
+		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
+			printf("%u ST %d { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
+			       seq,
+			       reply->minor,
+			       drbd_conn_str(state.conn),
+			       drbd_role_str(state.role),
+			       drbd_role_str(state.peer),
+			       drbd_disk_str(state.disk),
+			       drbd_disk_str(state.pdsk),
+			       state.susp ? 's' : 'r',
+			       state.aftr_isp ? 'a' : '-',
+			       state.peer_isp ? 'p' : '-',
+			       state.user_isp ? 'u' : '-' );
+		} else fprintf(stderr,"Missing tag !?\n");
+		break;
+	case P_call_helper:
+		if(consume_tag_string(T_helper,reply->tag_list,&str)) {
+			printf("%u UH %d %s\n", seq, reply->minor, str);
+		} else fprintf(stderr,"Missing tag !?\n");
+		break;
+	case P_sync_progress:
+		if (consume_tag_int(T_sync_progress, reply->tag_list, &synced)) {
+			printf("%u SP %d %i.%i\n",
+				seq,
+				reply->minor,
+				synced / 10,
+				synced % 10);
+		} else fprintf(stderr,"Missing tag !?\n");
+		break;
+	case P_dump_ee:
+		printf("%u DE %d\n", seq, reply->minor);
+		print_dump_ee(reply);
+		break;
+	default:
+		printf("%u ?? %d <other message %d>\n",seq, reply->minor, reply->packet_type);
+		break;
+	}
+
+	fflush(stdout);
+
+	return 1;
+}
+
+void print_failure_code(int ret_code)
+{
+	if (ret_code > ERR_CODE_BASE)
+		fprintf(stderr,"%s: Failure: (%d) %s\n",
+			devname, ret_code, error_to_string(ret_code));
+	else
+		fprintf(stderr,"%s: Failure: (ret_code=%d)\n",
+			devname, ret_code);
+}
+
+static int w_connected_state(unsigned int seq __attribute((unused)),
+		      int wait_after_sb,
+		      struct drbd_nl_cfg_reply *reply)
+{
+	union drbd_state state;
+
+	if (reply->ret_code != NO_ERROR) {
+		print_failure_code(reply->ret_code);
+		return 0;
+	}
+
+	if(reply->packet_type == P_get_state) {
+		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
+			if(state.conn >= C_CONNECTED) return 0;
+			if(!wait_after_sb && state.conn < C_UNCONNECTED) return 0;
+		} else fprintf(stderr,"Missing tag !?\n");
+	}
+
+	return 1;
+}
+
+static int w_synced_state(unsigned int seq __attribute((unused)),
+		   int wait_after_sb,
+		   struct drbd_nl_cfg_reply *reply)
+{
+	union drbd_state state;
+
+	if (reply->ret_code != NO_ERROR) {
+		print_failure_code(reply->ret_code);
+		return 0;
+	}
+
+	if(reply->packet_type == P_get_state) {
+		if(consume_tag_int(T_state_i,reply->tag_list,(int*)&state.i)) {
+			if(state.conn == C_CONNECTED) return 0;
+			if(!wait_after_sb && state.conn < C_UNCONNECTED) return 0;
+		} else fprintf(stderr,"Missing tag !?\n");
+	}
+	return 1;
+}
+
+static int events_cmd(struct drbd_cmd *cm, unsigned minor, int argc ,char **argv)
+{
+	void *buffer;
+	struct cn_msg *cn_reply;
+	struct drbd_nl_cfg_reply *reply;
+	struct drbd_tag_list *tl;
+	struct option *lo;
+	unsigned int b_seq=0, r_seq=0;
+	int sk_nl,c,cont=1,rr = rr,i,last;
+	int unfiltered=0, all_devices=0, timeout_ms=0;
+	int wfc_timeout=DRBD_WFC_TIMEOUT_DEF;
+	int degr_wfc_timeout=DRBD_DEGR_WFC_TIMEOUT_DEF;
+	int outdated_wfc_timeout=DRBD_OUTDATED_WFC_TIMEOUT_DEF;
+	struct timeval before,after;
+	int wasb=0;
+
+	lo = cm->ep.options;
+	if (!lo) {
+		static struct option none[] = { { } };
+		lo = none;
+	}
+	for(;;) {
+		c = getopt_long(argc, argv, make_optstring(lo), lo, 0);
+		if (c == -1)
+			break;
+		switch(c) {
+		default:
+		case '?':
+			return 20;
+		case 'u': unfiltered=1; break;
+		case 'a': all_devices=1; break;
+		case 't':
+			wfc_timeout=m_strtoll(optarg,1);
+			if(DRBD_WFC_TIMEOUT_MIN > wfc_timeout ||
+			   wfc_timeout > DRBD_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					wfc_timeout, DRBD_WFC_TIMEOUT_MIN,
+					DRBD_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
+		case 'd':
+			degr_wfc_timeout=m_strtoll(optarg,1);
+			if(DRBD_DEGR_WFC_TIMEOUT_MIN > degr_wfc_timeout ||
+			   degr_wfc_timeout > DRBD_DEGR_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "degr_wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					degr_wfc_timeout, DRBD_DEGR_WFC_TIMEOUT_MIN,
+					DRBD_DEGR_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
+		case 'o':
+			outdated_wfc_timeout=m_strtoll(optarg,1);
+			if(DRBD_OUTDATED_WFC_TIMEOUT_MIN > degr_wfc_timeout ||
+			   degr_wfc_timeout > DRBD_OUTDATED_WFC_TIMEOUT_MAX) {
+				fprintf(stderr, "degr_wfc_timeout => %d"
+					" out of range [%d..%d]\n",
+					outdated_wfc_timeout, DRBD_OUTDATED_WFC_TIMEOUT_MIN,
+					DRBD_OUTDATED_WFC_TIMEOUT_MAX);
+				return 20;
+			}
+			break;
+
+		case 'w':
+			wasb=1;
+			break;
+		}
+	}
+
+	if (optind < argc) {
+		warn_print_excess_args(argc, argv, optind);
+		return 20;
+	}
+
+	dump_argv(argc, argv, optind, 0);
+
+	tl = create_tag_list(2);
+	add_tag(tl,TT_END,NULL,0); // close the tag list
+
+	sk_nl = open_cn();
+	if(sk_nl < 0) return 20;
+
+	/* allocate 64k to be on the safe side. */
+#define NL_BUFFER_SIZE (64 << 10)
+	buffer = malloc(NL_BUFFER_SIZE);
+	if (!buffer) {
+		fprintf(stderr, "could not allocate buffer of %u bytes\n", NL_BUFFER_SIZE);
+		exit(20);
+	}
+
+	/* drbdsetup events should not ask for timeout "type",
+	 * this is only useful with wait-sync and wait-connected callbacks.
+	 */
+	if (cm->ep.proc_event != print_broadcast_events) {
+		// Find out which timeout value to use.
+		tl->drbd_p_header->packet_type = P_get_timeout_flag;
+		tl->drbd_p_header->drbd_minor = minor;
+		tl->drbd_p_header->flags = 0;
+
+		if (0 >= call_drbd(sk_nl,tl, buffer, NL_BUFFER_SIZE, NL_TIME))
+			exit(20);
+
+		cn_reply = (struct cn_msg *)NLMSG_DATA(buffer);
+		reply = (struct drbd_nl_cfg_reply *)cn_reply->data;
+
+		if (reply->ret_code != NO_ERROR)
+			return print_config_error(reply->ret_code);
+
+		consume_tag_bit(T_use_degraded,reply->tag_list,&rr);
+		if (rr != UT_DEFAULT) {
+			if (0 < wfc_timeout &&
+			      (wfc_timeout < degr_wfc_timeout || degr_wfc_timeout == 0)) {
+				degr_wfc_timeout = wfc_timeout;
+				fprintf(stderr, "degr-wfc-timeout has to be shorter than wfc-timeout\n"
+						"degr-wfc-timeout implicitly set to wfc-timeout (%ds)\n",
+						degr_wfc_timeout);
+			}
+
+			if (0 < degr_wfc_timeout &&
+			    (degr_wfc_timeout < outdated_wfc_timeout || outdated_wfc_timeout == 0)) {
+				outdated_wfc_timeout = wfc_timeout;
+				fprintf(stderr, "outdated-wfc-timeout has to be shorter than degr-wfc-timeout\n"
+						"outdated-wfc-timeout implicitly set to degr-wfc-timeout (%ds)\n",
+						degr_wfc_timeout);
+			}
+
+		}
+
+		switch (rr) {
+		case UT_DEFAULT:
+			timeout_ms = wfc_timeout;
+			break;
+		case UT_DEGRADED:
+			timeout_ms = degr_wfc_timeout;
+			break;
+		case UT_PEER_OUTDATED:
+			timeout_ms = outdated_wfc_timeout;
+			break;
+		}
+	}
+
+	timeout_ms = timeout_ms * 1000 - 1; /* 0 -> -1 "infinite", 1000 -> 999, nobody cares...  */
+
+	// ask for the current state before waiting for state updates...
+	if (all_devices) {
+		i = 0;
+		last = 255;
+	}
+	else {
+		i = last = minor;
+	}
+
+	while (i <= last) {
+		tl->drbd_p_header->packet_type = P_get_state;
+		tl->drbd_p_header->drbd_minor = i;
+		tl->drbd_p_header->flags = 0;
+		send_cn(sk_nl,tl->nl_header,(char*)tl->tag_list_cpos-(char*)tl->nl_header);
+		i++;
+	}
+
+	dt_unlock_drbd(lock_fd);
+	lock_fd=-1;
+
+	do {
+		gettimeofday(&before,NULL);
+		rr = receive_cn(sk_nl, buffer, NL_BUFFER_SIZE, timeout_ms);
+		gettimeofday(&after,NULL);
+		if(rr == -2) break; // timeout expired.
+
+		if(timeout_ms > 0 ) {
+			timeout_ms -= ( (after.tv_sec - before.tv_sec) * 1000 +
+					(after.tv_usec - before.tv_usec) / 1000 );
+		}
+
+		cn_reply = (struct cn_msg *)NLMSG_DATA(buffer);
+		reply = (struct drbd_nl_cfg_reply *)cn_reply->data;
+
+		// dump_tag_list(reply->tag_list);
+
+		/* There are two value spaces for sequence numbers. The first
+		   is the one created by this drbdsetup instance, the kernel's
+		   reply packets simply echo those sequence numbers.
+		   The second is created by the kernel's broadcast packets. */
+		if (!unfiltered) {
+			if (cn_reply->ack == 0) { // broadcasts
+				/* Careful, potential wrap around!
+				 * Will skip a lot of packets if you
+				 * unload/reload the module in between,
+				 * but keep this drbdsetup events running.
+				 * So don't do that.
+				 */
+				if ((int)(cn_reply->seq - b_seq) <= 0)
+					continue;
+				b_seq = cn_reply->seq;
+			} else if ((all_devices || minor == reply->minor)
+					&& cn_reply->ack == (uint32_t)getpid() + 1) {
+				// replies to drbdsetup packets and for this device.
+				if ((int)(cn_reply->seq - r_seq) <= 0)
+					continue;
+				r_seq = cn_reply->seq;
+			} else {
+				/* or reply to configuration request of other drbdsetup */
+				continue;
+			}
+		}
+
+		if( all_devices || minor == reply->minor ) {
+			cont=cm->ep.proc_event(cn_reply->seq, wasb, reply);
+		}
+	} while(cont);
+
+	free(buffer);
+
+	close_cn(sk_nl);
+
+	/* return code becomes exit code.
+	 * timeout? => exit 5
+	 * else     => exit 0 */
+	return (rr == -2) ? 5 : 0;
+}
+
+static int numeric_opt_usage(struct drbd_option *option, char* str, int strlen)
+{
+	return snprintf(str,strlen," [{--%s|-%c} %lld ... %lld]",
+			option->name, option->short_name,
+			option->numeric_param.min,
+			option->numeric_param.max);
+}
+
+static int handler_opt_usage(struct drbd_option *option, char* str, int strlen)
+{
+	const char** handlers;
+	int i, chars=0,first=1;
+
+	chars += snprintf(str,strlen," [{--%s|-%c} {",
+			  option->name, option->short_name);
+	handlers = option->handler_param.handler_names;
+	for(i=0;i<option->handler_param.number_of_handlers;i++) {
+		if(handlers[i]) {
+			if(!first) chars += snprintf(str+chars,strlen,"|");
+			first=0;
+			chars += snprintf(str+chars,strlen,
+					  "%s",handlers[i]);
+		}
+	}
+	chars += snprintf(str+chars,strlen,"}]");
+	return chars;
+}
+
+static int bit_opt_usage(struct drbd_option *option, char* str, int strlen)
+{
+	return snprintf(str,strlen," [{--%s|-%c}]",
+			option->name, option->short_name);
+}
+
+static int string_opt_usage(struct drbd_option *option, char* str, int strlen)
+{
+	return snprintf(str,strlen," [{--%s|-%c} <str>]",
+			option->name, option->short_name);
+}
+
+static void numeric_opt_xml(struct drbd_option *option)
+{
+	printf("\t<option name=\"%s\" type=\"numeric\">\n",option->name);
+	printf("\t\t<min>%lld</min>\n",option->numeric_param.min);
+	printf("\t\t<max>%lld</max>\n",option->numeric_param.max);
+	printf("\t\t<default>%lld</default>\n",option->numeric_param.def);
+	if(option->numeric_param.unit_prefix==1) {
+		printf("\t\t<unit_prefix>1</unit_prefix>\n");
+	} else {
+		printf("\t\t<unit_prefix>%c</unit_prefix>\n",
+		       option->numeric_param.unit_prefix);
+	}
+	if(option->numeric_param.unit) {
+		printf("\t\t<unit>%s</unit>\n",option->numeric_param.unit);
+	}
+	printf("\t</option>\n");
+}
+
+static void handler_opt_xml(struct drbd_option *option)
+{
+	const char** handlers;
+	int i;
+
+	printf("\t<option name=\"%s\" type=\"handler\">\n",option->name);
+	handlers = option->handler_param.handler_names;
+	for(i=0;i<option->handler_param.number_of_handlers;i++) {
+		if(handlers[i]) {
+			printf("\t\t<handler>%s</handler>\n",handlers[i]);
+		}
+	}
+	printf("\t</option>\n");
+}
+
+static void bit_opt_xml(struct drbd_option *option)
+{
+	printf("\t<option name=\"%s\" type=\"boolean\">\n",option->name);
+	printf("\t</option>\n");
+}
+
+static void string_opt_xml(struct drbd_option *option)
+{
+	printf("\t<option name=\"%s\" type=\"string\">\n",option->name);
+	printf("\t</option>\n");
+}
+
+
+static void config_usage(struct drbd_cmd *cm, enum usage_type ut)
+{
+	struct drbd_argument *args;
+	struct drbd_option *options;
+	static char line[300];
+	int maxcol,col,prevcol,startcol,toolong;
+	char *colstr;
+
+	if(ut == XML) {
+		printf("<command name=\"%s\">\n",cm->cmd);
+		if( (args = cm->cp.args) ) {
+			while (args->name) {
+				printf("\t<argument>%s</argument>\n",
+				       args->name);
+				args++;
+			}
+		}
+
+		options = cm->cp.options;
+		while (options && options->name) {
+			options->xml_function(options);
+			options++;
+		}
+		printf("</command>\n");
+		return;
+	}
+
+	prevcol=col=0;
+	maxcol=100;
+
+	if((colstr=getenv("COLUMNS"))) maxcol=atoi(colstr)-1;
+
+	col += snprintf(line+col, maxcol-col, " %s", cm->cmd);
+
+	if( (args = cm->cp.args) ) {
+		if(ut == BRIEF) {
+			col += snprintf(line+col, maxcol-col, " [args...]");
+		} else {
+			while (args->name) {
+				col += snprintf(line+col, maxcol-col, " %s",
+						args->name);
+				args++;
+			}
+		}
+	}
+
+	if (col > maxcol) {
+		printf("%s\n",line);
+		col=0;
+	}
+	startcol=prevcol=col;
+
+	options = cm->cp.options;
+	if(ut == BRIEF) {
+		if(options)
+			col += snprintf(line+col, maxcol-col, " [opts...]");
+		printf("%-40s",line);
+		return;
+	}
+
+	while (options && options->name) {
+		col += options->usage_function(options, line+col, maxcol-col);
+		if (col >= maxcol) {
+			toolong = (prevcol == startcol);
+			if( !toolong ) line[prevcol]=0;
+			printf("%s\n",line);
+			startcol=prevcol=col = sprintf(line,"    ");
+			if( toolong) options++;
+		} else {
+			prevcol=col;
+			options++;
+		}
+	}
+	line[col]=0;
+
+	printf("%s\n",line);
+}
+
+static void get_usage(struct drbd_cmd *cm, enum usage_type ut)
+{
+	if(ut == BRIEF) {
+		printf(" %-39s", cm->cmd);
+	} else {
+		printf(" %s\n", cm->cmd);
+	}
+}
+
+static void events_usage(struct drbd_cmd *cm, enum usage_type ut)
+{
+	struct option *lo;
+	char line[41];
+
+	if(ut == BRIEF) {
+		sprintf(line,"%s [opts...]", cm->cmd);
+		printf(" %-39s",line);
+	} else {
+		printf(" %s", cm->cmd);
+		lo = cm->ep.options;
+		while(lo && lo->name) {
+			printf(" [{--%s|-%c}]",lo->name,lo->val);
+			lo++;
+		}
+		printf("\n");
+	}
+}
+
+static void print_command_usage(int i, const char *addinfo, enum usage_type ut)
+{
+	if(ut != XML) printf("USAGE:\n");
+	commands[i].usage(commands+i,ut);
+
+	if (addinfo) {
+		printf("%s\n",addinfo);
+		exit(20);
+	}
+}
+
+static void print_usage(const char* addinfo)
+{
+	size_t i;
+
+	printf("\nUSAGE: %s device command arguments options\n\n"
+	       "Device is usually /dev/drbdX or /dev/drbd/X.\n"
+	       "General options: --create-device, --set-defaults\n"
+	       "\nCommands are:\n",cmdname);
+
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		commands[i].usage(commands+i,BRIEF);
+		if(i%2==1) printf("\n");
+	}
+
+	printf("\n\n"
+	       "To get more details about a command issue "
+	       "'drbdsetup help cmd'.\n"
+	       "\n");
+	/*
+	printf("\n\nVersion: "REL_VERSION" (api:%d)\n%s\n",
+	       API_VERSION, drbd_buildtag());
+	*/
+	if (addinfo)
+		printf("\n%s\n",addinfo);
+
+	exit(20);
+}
+
+static int open_cn()
+{
+	int sk_nl;
+	int err;
+	struct sockaddr_nl my_nla;
+
+	sk_nl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+	if (sk_nl == -1) {
+		perror("socket() failed");
+		return -1;
+	}
+
+	my_nla.nl_family = AF_NETLINK;
+	my_nla.nl_groups = -1; //cn_idx
+	my_nla.nl_pid = getpid();
+
+	err = bind(sk_nl, (struct sockaddr *)&my_nla, sizeof(my_nla));
+	if (err == -1) {
+		err = errno;
+		perror("bind() failed");
+		switch(err) {
+		case ENOENT:
+			fprintf(stderr,"Connector module not loaded? Try 'modprobe cn'.\n");
+			break;
+		case EPERM:
+			fprintf(stderr,"Missing privileges? You should run this as root.\n");
+			break;
+		}
+		return -1;
+	}
+
+	return sk_nl;
+}
+
+
+static void prepare_nl_header(struct nlmsghdr* nl_hdr, int size)
+{
+	static uint32_t cn_seq = 1;
+	struct cn_msg *cn_hdr;
+	cn_hdr = (struct cn_msg *)NLMSG_DATA(nl_hdr);
+
+	/* fill the netlink header */
+	nl_hdr->nlmsg_len = NLMSG_LENGTH(size - sizeof(struct nlmsghdr));
+	nl_hdr->nlmsg_type = NLMSG_DONE;
+	nl_hdr->nlmsg_flags = 0;
+	nl_hdr->nlmsg_seq = cn_seq;
+	nl_hdr->nlmsg_pid = getpid();
+	/* fill the connector header */
+	cn_hdr->id.val = CN_VAL_DRBD;
+	cn_hdr->id.idx = cn_idx;
+	cn_hdr->seq = cn_seq++;
+	cn_hdr->ack = getpid();
+	cn_hdr->len = size - sizeof(struct nlmsghdr) - sizeof(struct cn_msg);
+}
+
+
+static int send_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size)
+{
+	int rr;
+
+	prepare_nl_header(nl_hdr,size);
+
+	rr = send(sk_nl,nl_hdr,nl_hdr->nlmsg_len,0);
+	if( rr != (ssize_t)nl_hdr->nlmsg_len) {
+		perror("send() failed");
+		return -1;
+	}
+	return rr;
+}
+
+static int receive_cn(int sk_nl, struct nlmsghdr* nl_hdr, int size, int timeout_ms)
+{
+	struct pollfd pfd;
+	int rr;
+
+	pfd.fd = sk_nl;
+	pfd.events = POLLIN;
+
+	rr = poll(&pfd,1,timeout_ms);
+	if(rr == 0) return -2; // timeout expired.
+
+	rr = recv(sk_nl,nl_hdr,size,0);
+
+	if( rr < 0 ) {
+		perror("recv() failed");
+		return -1;
+	}
+	return rr;
+}
+
+int receive_reply_cn(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
+		     int size, int timeout_ms)
+{
+	struct cn_msg *request_cn_hdr;
+	struct cn_msg *reply_cn_hdr;
+	int rr;
+
+	request_cn_hdr = (struct cn_msg *)NLMSG_DATA(tl->nl_header);
+	reply_cn_hdr = (struct cn_msg *)NLMSG_DATA(nl_hdr);
+
+	while(1) {
+		rr = receive_cn(sk_nl,nl_hdr,size,timeout_ms);
+		if( rr < 0 ) return rr;
+		if(reply_cn_hdr->seq == request_cn_hdr->seq &&
+		   reply_cn_hdr->ack == request_cn_hdr->ack+1 ) return rr;
+		/* printf("INFO: got other message \n"
+		   "got seq: %d ; ack %d \n"
+		   "exp seq: %d ; ack %d \n",
+		   reply_cn_hdr->seq,reply_cn_hdr->ack,
+		   request_cn_hdr->seq,request_cn_hdr->ack); */
+	}
+
+	return rr;
+}
+
+static int call_drbd(int sk_nl, struct drbd_tag_list *tl, struct nlmsghdr* nl_hdr,
+		     int size, int timeout_ms)
+{
+	int rr;
+	prepare_nl_header(tl->nl_header, (char*)tl->tag_list_cpos -
+			  (char*)tl->nl_header);
+
+	rr = send(sk_nl,tl->nl_header,tl->nl_header->nlmsg_len,0);
+	if( rr != (ssize_t)tl->nl_header->nlmsg_len) {
+		perror("send() failed");
+		return -1;
+	}
+
+	rr = receive_reply_cn(sk_nl,tl,nl_hdr,size,timeout_ms);
+
+	if( rr == -2) {
+		fprintf(stderr,"No response from the DRBD driver!"
+			" Is the module loaded?\n");
+	}
+	return rr;
+}
+
+static void close_cn(int sk_nl)
+{
+	close(sk_nl);
+}
+
+static int is_drbd_driver_missing(void)
+{
+	struct stat sb;
+	FILE *cn_idx_file;
+	int err;
+
+	cn_idx = CN_IDX_DRBD;
+	cn_idx_file = fopen("/sys/module/drbd/parameters/cn_idx", "r");
+	if (cn_idx_file) {
+		unsigned int idx; /* gcc is picky */
+		if (fscanf(cn_idx_file, "%u", &idx))
+			cn_idx = idx;
+		fclose(cn_idx_file);
+	}
+
+	err = stat("/proc/drbd", &sb);
+	if (!err)
+		return 0;
+
+	if (err == ENOENT)
+		fprintf(stderr, "DRBD driver appears to be missing\n");
+	else
+		fprintf(stderr, "Could not stat(\"/proc/drbd\"): %m\n");
+
+	return 1;
+}
+
+int main(int argc, char** argv)
+{
+	unsigned minor;
+	struct drbd_cmd *cmd;
+	int rv=0;
+
+	if (chdir("/")) {
+		/* highly unlikely, but gcc is picky */
+		perror("cannot chdir /");
+		return -111;
+	}
+
+	cmdname = strrchr(argv[0],'/');
+	if (cmdname)
+		argv[0] = ++cmdname;
+	else
+		cmdname = argv[0];
+
+	/* == '-' catches -h, --help, and similar */
+	if (argc > 1 && (!strcmp(argv[1],"help") || argv[1][0] == '-')) {
+		if(argc >= 3) {
+			cmd=find_cmd_by_name(argv[2]);
+			if(cmd) print_command_usage(cmd-commands,NULL,FULL);
+			else print_usage("unknown command");
+			exit(0);
+		}
+	}
+
+	/*
+	 * The legacy drbdsetup takes the object to operate on as its first argument,
+	 * followed by the command.  For forward compatibility, check if we got the
+	 * command name first.
+	 */
+	if (argc >= 3 && !find_cmd_by_name(argv[2]) && find_cmd_by_name(argv[1])) {
+		char *swap = argv[1];
+		argv[1] = argv[2];
+		argv[2] = swap;
+	}
+
+	/* it is enough to set it, value is ignored */
+	if (getenv("DRBD_DEBUG_DUMP_ARGV"))
+		debug_dump_argv = 1;
+	resname = getenv("DRBD_RESOURCE");
+
+	if (argc > 1 && (!strcmp(argv[1],"xml"))) {
+		if(argc >= 3) {
+			cmd=find_cmd_by_name(argv[2]);
+			if(cmd) print_command_usage(cmd-commands,NULL,XML);
+			else print_usage("unknown command");
+			exit(0);
+		}
+	}
+
+	if (argc < 3) print_usage(argc==1 ? 0 : " Insufficient arguments");
+
+	cmd=find_cmd_by_name(argv[2]);
+
+	if (is_drbd_driver_missing()) {
+		if (!strcmp(argv[2], "down") ||
+		    !strcmp(argv[2], "secondary") ||
+		    !strcmp(argv[2], "disconnect") ||
+		    !strcmp(argv[2], "detach"))
+			return 0; /* "down" succeeds even if drbd is missing */
+
+		fprintf(stderr, "do you need to load the module?\n"
+				"try: modprobe drbd\n");
+		return 20;
+	}
+
+	if(cmd) {
+		minor = dt_minor_of_dev(argv[1]);
+		if (minor < 0) {
+			fprintf(stderr, "Cannot determine minor device number of "
+					"drbd device '%s'",
+				argv[1]);
+			exit(20);
+		}
+		lock_fd = dt_lock_drbd(minor);
+		/* maybe rather canonicalize, using asprintf? */
+		devname = argv[1];
+		// by passing argc-2, argv+2 the function has the command name
+		// in argv[0], e.g. "syncer"
+		rv = cmd->function(cmd,minor,argc-2,argv+2);
+		dt_unlock_drbd(lock_fd);
+	} else {
+		print_usage("invalid command");
+	}
+
+	return rv;
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdtool_common.c drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdtool_common.c
--- drbd8-8.3.7/user/legacy/drbdtool_common.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdtool_common.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,782 @@
+#define _GNU_SOURCE
+#define _XOPEN_SOURCE 600
+#define _FILE_OFFSET_BITS 64
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <linux/drbd.h>
+#include <linux/fs.h>           /* for BLKGETSIZE64 */
+#include <string.h>
+
+#include "drbdtool_common.h"
+#include "config.h"
+
+int force = 0;
+int confirmed(const char *text)
+{
+	const char yes[] = "yes";
+	const ssize_t N = sizeof(yes);
+	char *answer = NULL;
+	size_t n = 0;
+	int ok;
+
+	printf("\n%s\n", text);
+
+	if (force) {
+	    printf("*** confirmation forced via --force option ***\n");
+	    ok = 1;
+	}
+	else {
+	    printf("[need to type '%s' to confirm] ", yes);
+	    ok = getline(&answer,&n,stdin) == N &&
+		strncmp(answer,yes,N-1) == 0;
+	    if (answer) free(answer);
+	    printf("\n");
+	}
+	return ok;
+}
+
+/* In-place unescape double quotes and backslash escape sequences from a
+ * double quoted string. Note: backslash is only useful to quote itself, or
+ * double quote, no special treatment to any c-style escape sequences. */
+void unescape(char *txt)
+{
+	char *ue, *e;
+	e = ue = txt;
+	for (;;) {
+		if (*ue == '"') {
+			ue++;
+			continue;
+		}
+		if (*ue == '\\')
+			ue++;
+		if (!*ue)
+			break;
+		*e++ = *ue++;
+	}
+	*e = '\0';
+}
+
+
+/* input size is expected to be in KB */
+char *ppsize(char *buf, unsigned long long size)
+{
+	/* Needs 9 bytes at max including trailing NUL:
+	 * -1ULL ==> "16384 EB" */
+	static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
+	int base = 0;
+	while (size >= 10000 && base < sizeof(units)-1) {
+		/* shift + round */
+		size = (size >> 10) + !!(size & (1<<9));
+		base++;
+	}
+	sprintf(buf, "%u %cB", (unsigned)size, units[base]);
+
+	return buf;
+}
+
+const char *make_optstring(struct option *options)
+{
+	static char buffer[200];
+	char seen[256];
+	struct option *opt;
+	char *c;
+
+	memset(seen, 0, sizeof(seen));
+	opt = options;
+	c = buffer;
+	while (opt->name) {
+		if (0 < opt->val && opt->val < 256) {
+			if (seen[opt->val]++) {
+				fprintf(stderr, "internal error: --%s has duplicate opt->val '%c'\n",
+						opt->name, opt->val);
+				abort();
+			}
+			*c++ = opt->val;
+			if (opt->has_arg != no_argument) {
+				*c++ = ':';
+				if (opt->has_arg == optional_argument)
+					*c++ = ':';
+			}
+		}
+		opt++;
+	}
+
+	*c = 0;
+	return buffer;
+}
+
+int
+new_strtoll(const char *s, const char def_unit, unsigned long long *rv)
+{
+	char unit = 0;
+	char dummy = 0;
+	int shift, c;
+
+	switch (def_unit) {
+	default:
+		return MSE_DEFAULT_UNIT;
+	case 0:
+	case 1:
+	case '1':
+		shift = 0;
+		break;
+	case 'K':
+	case 'k':
+		shift = -10;
+		break;
+	case 's':
+		shift = -9;   // sectors
+		break;
+		/*
+		  case 'M':
+		  case 'm':
+		  case 'G':
+		  case 'g':
+		*/
+	}
+
+	if (!s || !*s) return MSE_MISSING_NUMBER;
+
+	c = sscanf(s, "%llu%c%c", rv, &unit, &dummy);
+
+	if (c != 1 && c != 2) return MSE_INVALID_NUMBER;
+
+	switch (unit) {
+	case 0:
+		return MSE_OK;
+	case 'K':
+	case 'k':
+		shift += 10;
+		break;
+	case 'M':
+	case 'm':
+		shift += 20;
+		break;
+	case 'G':
+	case 'g':
+		shift += 30;
+		break;
+	case 's':
+		shift += 9;
+		break;
+	default:
+		return MSE_INVALID_UNIT;
+	}
+
+	/* if shift is negative (e.g. default unit 'K', actual unit 's'),
+	 * convert to positive, and shift right, rounding up. */
+	if (shift < 0) {
+		shift = -shift;
+		*rv = (*rv + (1ULL << shift) - 1) >> shift;
+		return MSE_OK;
+	}
+
+	/* if shift is positive, first check for overflow */
+	if (*rv > (~0ULL >> shift))
+		return MSE_OUT_OF_RANGE;
+
+	/* then convert */
+	*rv = *rv << shift;
+	return MSE_OK;
+}
+
+unsigned long long
+m_strtoll(const char *s, const char def_unit)
+{
+	unsigned long long r;
+
+	switch(new_strtoll(s, def_unit, &r)) {
+	case MSE_OK:
+		return r;
+	case MSE_DEFAULT_UNIT:
+		fprintf(stderr, "unexpected default unit: %d\n",def_unit);
+		exit(100);
+	case MSE_MISSING_NUMBER:
+		fprintf(stderr, "missing number argument\n");
+		exit(100);
+	case MSE_INVALID_NUMBER:
+		fprintf(stderr, "%s is not a valid number\n", s);
+		exit(20);
+	case MSE_INVALID_UNIT:
+		fprintf(stderr, "%s is not a valid number\n", s);
+		exit(20);
+	case MSE_OUT_OF_RANGE:
+		fprintf(stderr, "%s: out of range\n", s);
+		exit(20);
+	default:
+		fprintf(stderr, "m_stroll() is confused\n");
+		exit(20);
+	}
+}
+
+void alarm_handler(int __attribute((unused)) signo)
+{ /* nothing. just interrupt F_SETLKW */ }
+
+/* it is implicitly unlocked when the process dies.
+ * but if you want to explicitly unlock it, just close it. */
+int unlock_fd(int fd)
+{
+	return close(fd);
+}
+
+int get_fd_lockfile_timeout(const char *path, int seconds)
+{
+    int fd, err;
+    struct sigaction sa,so;
+    struct flock fl = {
+	.l_type = F_WRLCK,
+	.l_whence = 0,
+	.l_start = 0,
+	.l_len = 0
+    };
+
+    if ((fd = open(path, O_RDWR | O_CREAT, 0600)) < 0) {
+	fprintf(stderr,"open(%s): %m\n",path);
+	return -1;
+    }
+
+    if (seconds) {
+	sa.sa_handler=alarm_handler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags=0;
+	sigaction(SIGALRM,&sa,&so);
+	alarm(seconds);
+	err = fcntl(fd,F_SETLKW,&fl);
+	if (err) err = errno;
+	alarm(0);
+	sigaction(SIGALRM,&so,NULL);
+    } else {
+	err = fcntl(fd,F_SETLK,&fl);
+	if (err) err = errno;
+    }
+
+    if (!err) return fd;
+
+    if (err != EINTR && err != EAGAIN) {
+	close(fd);
+	errno = err;
+	fprintf(stderr,"fcntl(%s,...): %m\n", path);
+	return -1;
+    }
+
+    /* do we want to know this? */
+    if (!fcntl(fd,F_GETLK,&fl)) {
+	fprintf(stderr,"lock on %s currently held by pid:%u\n",
+		path, fl.l_pid);
+    }
+    close(fd);
+    return -1;
+}
+
+int dt_minor_of_dev(const char *device)
+{
+	struct stat sb;
+	long m;
+	int digits_only = only_digits(device);
+	const char *c = device;
+
+	/* On udev/devfs based system the device nodes does not
+	 * exist before the drbd is created.
+	 *
+	 * If the device name starts with /dev/drbd followed by
+	 * only digits, or if only digits are given,
+	 * those digits are the minor number.
+	 *
+	 * Otherwise, we cannot reliably determine the minor number!
+	 *
+	 * We allow "arbitrary" device names in drbd.conf,
+	 * and those may well contain digits.
+	 * Interpreting any digits as minor number is dangerous!
+	 */
+	if (!digits_only) {
+		if (!strncmp("/dev/drbd", device, 9) &&
+		    only_digits(device + 9))
+			c = device + 9;
+
+		/* if the device node exists,
+		 * and is a block device with the correct major,
+		 * do not enforce further naming conventions.
+		 * people without udev, and not using drbdadm
+		 * may do whatever they like. */
+		else if (!stat(device,&sb) &&
+			 S_ISBLK(sb.st_mode) &&
+			 major(sb.st_rdev) == LANANA_DRBD_MAJOR)
+			return minor(sb.st_rdev);
+
+		else
+			return -1;
+	}
+
+	/* ^[0-9]+$ or ^/dev/drbd[0-9]+$ */
+
+	errno = 0;
+	m = strtol(c, NULL, 10);
+	if (!errno)
+		return m;
+
+	return -1;
+}
+
+int only_digits(const char *s)
+{
+	const char *c;
+	for (c = s; isdigit(*c); c++)
+		;
+	return c != s && *c == 0;
+}
+
+int dt_lock_drbd(int minor)
+{
+	int sz, lfd;
+	char *lfname;
+
+	/* THINK.
+	 * maybe we should also place a fcntl lock on the
+	 * _physical_device_ we open later...
+	 *
+	 * This lock is to prevent a drbd minor from being configured
+	 * by drbdsetup while drbdmeta is about to mess with its meta data.
+	 *
+	 * If you happen to mess with the meta data of one device,
+	 * pretending it belongs to an other, you'll screw up completely.
+	 *
+	 * We should store something in the meta data to detect such abuses.
+	 */
+
+	/* NOTE that /var/lock/drbd-*-* may not be "secure",
+	 * maybe we should rather use /var/lock/drbd/drbd-*-*,
+	 * and make sure that /var/lock/drbd is drwx.-..-. root:root  ...
+	 */
+
+	sz = asprintf(&lfname, DRBD_LOCK_DIR "/drbd-%d-%d",
+		      LANANA_DRBD_MAJOR, minor);
+	if (sz < 0) {
+		perror("");
+		exit(20);
+	}
+
+	lfd = get_fd_lockfile_timeout(lfname, 1);
+	free (lfname);
+	if (lfd < 0)
+		exit(20);
+	return lfd;
+}
+
+/* ignore errors */
+void dt_unlock_drbd(int lock_fd)
+{
+	if (lock_fd >= 0)
+		unlock_fd(lock_fd);
+}
+
+void dt_print_gc(const uint32_t* gen_cnt)
+{
+	printf("%d:%d:%d:%d:%d:%d:%d:%d\n",
+	       gen_cnt[Flags] & MDF_CONSISTENT ? 1 : 0,
+	       gen_cnt[HumanCnt],
+	       gen_cnt[TimeoutCnt],
+	       gen_cnt[ConnectedCnt],
+	       gen_cnt[ArbitraryCnt],
+	       gen_cnt[Flags] & MDF_PRIMARY_IND ? 1 : 0,
+	       gen_cnt[Flags] & MDF_CONNECTED_IND ? 1 : 0,
+	       gen_cnt[Flags] & MDF_FULL_SYNC ? 1 : 0);
+}
+
+void dt_pretty_print_gc(const uint32_t* gen_cnt)
+{
+	printf("\n"
+	       "                                        WantFullSync |\n"
+	       "                                  ConnectedInd |     |\n"
+	       "                               lastState |     |     |\n"
+	       "                      ArbitraryCnt |     |     |     |\n"
+	       "                ConnectedCnt |     |     |     |     |\n"
+	       "            TimeoutCnt |     |     |     |     |     |\n"
+	       "        HumanCnt |     |     |     |     |     |     |\n"
+	       "Consistent |     |     |     |     |     |     |     |\n"
+	       "   --------+-----+-----+-----+-----+-----+-----+-----+\n"
+	       "       %3s | %3d | %3d | %3d | %3d | %3s | %3s | %3s  \n"
+	       "\n",
+	       gen_cnt[Flags] & MDF_CONSISTENT ? "1/c" : "0/i",
+	       gen_cnt[HumanCnt],
+	       gen_cnt[TimeoutCnt],
+	       gen_cnt[ConnectedCnt],
+	       gen_cnt[ArbitraryCnt],
+	       gen_cnt[Flags] & MDF_PRIMARY_IND ? "1/p" : "0/s",
+	       gen_cnt[Flags] & MDF_CONNECTED_IND ? "1/c" : "0/n",
+	       gen_cnt[Flags] & MDF_FULL_SYNC ? "1/y" : "0/n");
+}
+
+void dt_print_uuids(const uint64_t* uuid, unsigned int flags)
+{
+	int i;
+	printf(X64(016)":"X64(016)":",
+	       uuid[UI_CURRENT],
+	       uuid[UI_BITMAP]);
+	for ( i=UI_HISTORY_START ; i<=UI_HISTORY_END ; i++ ) {
+		printf(X64(016)":", uuid[i]);
+	}
+	printf("%d:%d:%d:%d:%d:%d:%d\n",
+	       flags & MDF_CONSISTENT ? 1 : 0,
+	       flags & MDF_WAS_UP_TO_DATE ? 1 : 0,
+	       flags & MDF_PRIMARY_IND ? 1 : 0,
+	       flags & MDF_CONNECTED_IND ? 1 : 0,
+	       flags & MDF_FULL_SYNC ? 1 : 0,
+	       flags & MDF_PEER_OUT_DATED ? 1 : 0,
+	       flags & MDF_CRASHED_PRIMARY ? 1 : 0);
+}
+
+void dt_pretty_print_uuids(const uint64_t* uuid, unsigned int flags)
+{
+	printf(
+"\n"
+"       +--<  Current data generation UUID  >-\n"
+"       |               +--<  Bitmap's base data generation UUID  >-\n"
+"       |               |                 +--<  younger history UUID  >-\n"
+"       |               |                 |         +-<  older history  >-\n"
+"       V               V                 V         V\n");
+	dt_print_uuids(uuid, flags);
+	printf(
+"                                                                    ^ ^ ^ ^ ^ ^ ^\n"
+"                                      -<  Data consistency flag  >--+ | | | | | |\n"
+"                             -<  Data was/is currently up-to-date  >--+ | | | | |\n"
+"                                  -<  Node was/is currently primary  >--+ | | | |\n"
+"                                  -<  Node was/is currently connected  >--+ | | |\n"
+"         -<  Node was in the progress of setting all bits in the bitmap  >--+ | |\n"
+"                        -<  The peer's disk was out-dated or inconsistent  >--+ |\n"
+"      -<  This node was a crashed primary, and has not seen its peer since   >--+\n"
+"\n");
+	printf("flags:%s %s, %s, %s%s%s\n",
+	       (flags & MDF_CRASHED_PRIMARY) ? " crashed" : "",
+	       (flags & MDF_PRIMARY_IND) ? "Primary" : "Secondary",
+	       (flags & MDF_CONNECTED_IND) ? "Connected" : "StandAlone",
+	       (flags & MDF_CONSISTENT)
+			?  ((flags & MDF_WAS_UP_TO_DATE) ? "UpToDate" : "Outdated")
+			: "Inconsistent",
+	       (flags & MDF_FULL_SYNC) ? ", need full sync" : "",
+	       (flags & MDF_PEER_OUT_DATED) ? ", peer Outdated" : "");
+}
+
+/*    s: token buffer
+ * size: size of s, _including_ the terminating NUL
+ * stream: to read from.
+ * s is guaranteed to be NUL terminated
+ * if a token (including the NUL) needs more size bytes,
+ * s will contain only a truncated token, and the next call will
+ * return the next size-1 non-white-space bytes of stream.
+ */
+int fget_token(char *s, int size, FILE* stream)
+{
+	int c;
+	char* sp = s;
+
+	*sp = 0; /* terminate even if nothing is found */
+	--size;  /* account for the terminating NUL */
+	do { // eat white spaces in front.
+		c = getc(stream);
+		if( c == EOF) return EOF;
+	} while (!isgraph(c));
+
+	do { // read the first word into s
+		*sp++ = c;
+		c = getc(stream);
+		if ( c == EOF) break;
+	} while (isgraph(c) && --size);
+
+	*sp=0;
+	return 1;
+}
+
+int sget_token(char *s, int size, const char** text)
+{
+	int c;
+	char* sp = s;
+
+	*sp = 0; /* terminate even if nothing is found */
+	--size;  /* account for the terminating NUL */
+	do { // eat white spaces in front.
+		c = *(*text)++;
+		if( c == 0) return EOF;
+	} while (!isgraph(c));
+
+	do { // read the first word into s
+		*sp++ = c;
+		c = *(*text)++;
+		if ( c == 0) break;
+	} while (isgraph(c) && --size);
+
+	*sp=0;
+	return 1;
+}
+
+uint64_t bdev_size(int fd)
+{
+	uint64_t size64;		/* size in byte. */
+	long size;		/* size in sectors. */
+	int err;
+
+	err = ioctl(fd, BLKGETSIZE64, &size64);
+	if (err) {
+		if (errno == EINVAL) {
+			printf("INFO: falling back to BLKGETSIZE\n");
+			err = ioctl(fd, BLKGETSIZE, &size);
+			if (err) {
+				perror("ioctl(,BLKGETSIZE,) failed");
+				exit(20);
+			}
+			size64 = (uint64_t)512 *size;
+		} else {
+			perror("ioctl(,BLKGETSIZE64,) failed");
+			exit(20);
+		}
+	}
+
+	return size64;
+}
+
+char *lk_bdev_path(unsigned minor)
+{
+	char *path;
+	m_asprintf(&path, "%s/drbd-minor-%d.lkbd", DRBD_LIB_DIR, minor);
+	return path;
+}
+
+/* If the lower level device is resized,
+ * and DRBD did not move its "internal" meta data in time,
+ * the next time we try to attach, we won't find our meta data.
+ *
+ * Some helpers for storing and retrieving "last known"
+ * information, to be able to find it regardless,
+ * without scanning the full device for magic numbers.
+ */
+
+/* these return 0 on sucess, error code if something goes wrong. */
+
+/* NOTE: file format for now:
+ * one line, starting with size in byte, followed by tab,
+ * followed by device name, followed by newline. */
+
+int lk_bdev_save(const unsigned minor, const struct bdev_info *bd)
+{
+	FILE *fp;
+	char *path = lk_bdev_path(minor);
+	int ok = 0;
+
+	fp = fopen(path, "w");
+	if (!fp)
+		goto fail;
+
+	ok = fprintf(fp, "%llu\t%s\n",
+		(unsigned long long) bd->bd_size, bd->bd_name);
+	if (ok <= 0)
+		goto fail;
+	if (bd->bd_uuid)
+		fprintf(fp, "uuid:\t"X64(016)"\n", bd->bd_uuid);
+	ok =       0 == fflush(fp);
+	ok = ok && 0 == fsync(fileno(fp));
+	ok = ok && 0 == fclose(fp);
+
+	if (!ok)
+fail:		/* MAYBE: unlink. But maybe partial info is better than no info? */
+		fprintf(stderr, "lk_bdev_save(%s) failed: %m\n", path);
+
+	free(path);
+	return ok <= 0 ? -1 : 0;
+}
+
+/* we may want to remove all stored information */
+int lk_bdev_delete(const unsigned minor)
+{
+	char *path = lk_bdev_path(minor);
+	int rc = unlink(path);
+	if (rc && errno != ENOENT)
+		fprintf(stderr, "lk_bdev_delete(%s) failed: %m\n", path);
+	free(path);
+	return rc;
+}
+
+/* load info from that file.
+ * caller should free(bd->bd_name) once it is no longer needed. */
+int lk_bdev_load(const unsigned minor, struct bdev_info *bd)
+{
+	FILE *fp;
+	char *path;
+	char *bd_name;
+	unsigned long long bd_size;
+	unsigned long long bd_uuid;
+	char nl[2];
+	int rc = -1;
+
+	if (!bd)
+		return -1;
+
+	path = lk_bdev_path(minor);
+	fp = fopen(path, "r");
+	if (!fp) {
+		if (errno != ENOENT)
+			fprintf(stderr, "lk_bdev_load(%s) failed: %m\n", path);
+		goto out;
+	}
+
+	/* GNU format extension: %as:
+	 * malloc buffer space for the resulting char */
+	rc = fscanf(fp, "%llu %as%[\n]uuid: %llx%[\n]",
+			&bd_size, &bd_name, nl,
+			&bd_uuid, nl);
+	/* rc == 5: successfully converted two lines.
+	 *    == 4: newline not found, possibly truncated uuid
+	 *    == 3: first line complete, uuid missing.
+	 *    == 2: new line not found, possibly truncated pathname,
+	 *          or early whitespace
+	 *    == 1: found some number, but no more.
+	 *          incomplete file? try anyways.
+	 */
+	bd->bd_uuid = (rc >= 4) ? bd_uuid : 0;
+	bd->bd_name = (rc >= 2) ? bd_name : NULL;
+	bd->bd_size = (rc >= 1) ? bd_size : 0;
+	if (rc < 1) {
+		fprintf(stderr, "lk_bdev_load(%s): parse error\n", path);
+		rc = -1;
+	} else
+		rc = 0;
+
+	fclose(fp);
+out:
+	free(path);
+	return rc;
+}
+
+void get_random_bytes(void* buffer, int len)
+{
+	int fd;
+
+	fd = open("/dev/urandom",O_RDONLY);
+	if( fd == -1) {
+		perror("Open of /dev/urandom failed");
+		exit(20);
+	}
+	if(read(fd,buffer,len) != len) {
+		fprintf(stderr,"Reading from /dev/urandom failed\n");
+		exit(20);
+	}
+	close(fd);
+}
+
+const char* shell_escape(const char* s)
+{
+	/* ugly static buffer. so what. */
+	static char buffer[1024];
+	char *c = buffer;
+
+	if (s == NULL)
+		return s;
+
+	while (*s) {
+		if (buffer + sizeof(buffer) < c+2)
+			break;
+
+		switch(*s) {
+		/* set of 'clean' characters */
+		case '%': case '+': case '-': case '.': case '/':
+		case '0' ... '9':
+		case ':': case '=': case '@':
+		case 'A' ... 'Z':
+		case '_':
+		case 'a' ... 'z':
+			break;
+		/* escape everything else */
+		default:
+			*c++ = '\\';
+		}
+		*c++ = *s++;
+	}
+	*c = '\0';
+	return buffer;
+}
+
+int m_asprintf(char **strp, const char *fmt, ...)
+{
+	int r;
+	va_list ap;
+
+	va_start(ap, fmt);
+	r = vasprintf(strp, fmt, ap);
+	va_end(ap);
+
+	if (r == -1) {
+		fprintf(stderr, "vasprintf() failed. Out of memory?\n");
+		exit(10);
+	}
+
+	return r;
+}
+
+/* print len bytes from buf in the format of well known "hd",
+ * adjust displayed offset by file_offset */
+void fprintf_hex(FILE *fp, off_t file_offset, const void *buf, unsigned len)
+{
+	const unsigned char *c = buf;
+	unsigned o;
+	int skipped = 0;
+
+	for (o = 0; o + 16 < len; o += 16, c += 16) {
+		if (o && !memcmp(c - 16, c, 16)) {
+			skipped = 1;
+			continue;
+		}
+		if (skipped) {
+			skipped = 0;
+			fprintf(fp, "*\n");
+		}
+		/* no error check here, don't know what to do about errors */
+		fprintf(fp,
+			/* offset */
+			"%08llx"
+			/* two times 8 byte as byte stream, on disk order */
+			"  %02x %02x %02x %02x %02x %02x %02x %02x"
+			"  %02x %02x %02x %02x %02x %02x %02x %02x"
+			/* the same as printable char or '.' */
+			"  |%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c|\n",
+			(unsigned long long)o + file_offset,
+			c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7],
+			c[8], c[9], c[10], c[11], c[12], c[13], c[14], c[15],
+
+#define p_(x)	(isprint(x) ? x : '.')
+#define p(a,b,c,d,e,f,g,h) \
+		p_(a), p_(b), p_(c), p_(d), p_(e), p_(f), p_(g), p_(h)
+			p(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]),
+			p(c[8], c[9], c[10], c[11], c[12], c[13], c[14], c[15])
+		       );
+	}
+	if (skipped) {
+		skipped = 0;
+		fprintf(fp, "*\n");
+	}
+	if (o < len) {
+		unsigned remaining = len - o;
+		unsigned i;
+		fprintf(fp, "%08llx ", (unsigned long long)o + file_offset);
+		for (i = 0; i < remaining; i++) {
+			if (i == 8)
+				fprintf(fp, " ");
+			fprintf(fp, " %02x", c[i]);
+		}
+		fprintf(fp, "%*s  |", (16 - i)*3 + (i < 8), "");
+		for (i = 0; i < remaining; i++)
+			fprintf(fp, "%c", p_(c[i]));
+#undef p
+#undef p_
+		fprintf(fp, "|\n");
+	}
+	fprintf(fp, "%08llx\n", (unsigned long long)len + file_offset);
+}
diff -Nru drbd8-8.3.7/user/legacy/drbdtool_common.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdtool_common.h
--- drbd8-8.3.7/user/legacy/drbdtool_common.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/drbdtool_common.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,116 @@
+#ifndef DRBDTOOL_COMMON_H
+#define DRBDTOOL_COMMON_H
+
+#include "drbd_endian.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <linux/major.h>
+
+#define LANANA_DRBD_MAJOR 147	/* we should get this into linux/major.h */
+#ifndef DRBD_MAJOR
+#define DRBD_MAJOR LANANA_DRBD_MAJOR
+#elif (DRBD_MAJOR != LANANA_DRBD_MAJOR)
+# error "FIXME unexpected DRBD_MAJOR"
+#endif
+
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0]))
+#endif
+
+#define COMM_TIMEOUT 120
+
+/* MetaDataIndex for v06 / v07 style meta data blocks */
+enum MetaDataIndex {
+	Flags,			/* Consistency flag,connected-ind,primary-ind */
+	HumanCnt,		/* human-intervention-count */
+	TimeoutCnt,		/* timout-count */
+	ConnectedCnt,		/* connected-count */
+	ArbitraryCnt,		/* arbitrary-count */
+	GEN_CNT_SIZE		/* MUST BE LAST! (and Flags must stay first...) */
+};
+
+/*
+#define PERROR(fmt, args...) \
+do { fprintf(stderr,fmt ": " , ##args); perror(0); } while (0)
+*/
+#define PERROR(fmt, args...) fprintf(stderr, fmt ": %m\n" , ##args);
+
+enum new_strtoll_errs {
+	MSE_OK,
+	MSE_DEFAULT_UNIT,
+	MSE_MISSING_NUMBER,
+	MSE_INVALID_NUMBER,
+	MSE_INVALID_UNIT,
+	MSE_OUT_OF_RANGE,
+};
+
+struct option;
+
+extern int only_digits(const char *s);
+extern int dt_lock_drbd(int minor);
+extern void dt_unlock_drbd(int lock_fd);
+extern void dt_release_lockfile(int drbd_fd);
+extern int dt_minor_of_dev(const char *device);
+extern int new_strtoll(const char *s, const char def_unit, unsigned long long *rv);
+extern unsigned long long m_strtoll(const char* s,const char def_unit);
+extern const char* make_optstring(struct option *options);
+extern char* ppsize(char* buf, unsigned long long size);
+extern void dt_print_gc(const uint32_t* gen_cnt);
+extern void dt_pretty_print_gc(const uint32_t* gen_cnt);
+extern void dt_print_uuids(const uint64_t* uuid, unsigned int flags);
+extern void dt_pretty_print_uuids(const uint64_t* uuid, unsigned int flags);
+extern int fget_token(char *s, int size, FILE* stream);
+extern int sget_token(char *s, int size, const char** text);
+extern uint64_t bdev_size(int fd);
+extern void get_random_bytes(void* buffer, int len);
+
+extern int force; /* global option to force implicit confirmation */
+extern int confirmed(const char *text);
+
+extern const char* shell_escape(const char* s);
+
+/* In-place unescape double quotes and backslash escape sequences from a
+ * double quoted string. Note: backslash is only useful to quote itself, or
+ * double quote, no special treatment to any c-style escape sequences. */
+extern void unescape(char *txt);
+
+/* Since glibc 2.8~20080505-0ubuntu7 asprintf() is declared with the
+   warn_unused_result attribute.... */
+extern int m_asprintf(char **strp, const char *fmt, ...);
+
+extern void fprintf_hex(FILE *fp, off_t file_offset, const void *buf, unsigned len);
+
+/* If the lower level device is resized,
+ * and DRBD did not move its "internal" meta data in time,
+ * the next time we try to attach, we won't find our meta data.
+ *
+ * Some helpers for storing and retrieving "last known"
+ * information, to be able to find it regardless,
+ * without scanning the full device for magic numbers.
+ */
+
+/* We may want to store more things later...  if so, we can easily change to
+ * some NULL terminated tag-value list format then.
+ * For now: store the last known lower level block device size,
+ * and its /dev/<name> */
+struct bdev_info {
+	uint64_t bd_size;
+	uint64_t bd_uuid;
+	char *bd_name;
+};
+
+/* these return 0 on sucess, error code if something goes wrong. */
+/* create (update) the last-known-bdev-info file */
+extern int lk_bdev_save(const unsigned minor, const struct bdev_info *bd);
+/* we may want to remove all stored information */
+extern int lk_bdev_delete(const unsigned minor);
+/* load info from that file.
+ * caller should free(bd->bd_name) once it is no longer needed. */
+extern int lk_bdev_load(const unsigned minor, struct bdev_info *bd);
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/linux/drbd.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd.h
--- drbd8-8.3.7/user/legacy/linux/drbd.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,383 @@
+/*
+  drbd.h
+  Kernel module for 2.6.x Kernels
+
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
+  Copyright (C) 2001-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+  Copyright (C) 2001-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+#ifndef DRBD_H
+#define DRBD_H
+#include <linux/connector.h>
+
+#include <asm/types.h>
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#else
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <limits.h>
+
+/* Altough the Linux source code makes a difference between
+   generic endianness and the bitfields' endianness, there is no
+   architecture as of Linux-2.6.24-rc4 where the bitfileds' endianness
+   does not match the generic endianness. */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN_BITFIELD
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define __BIG_ENDIAN_BITFIELD
+#else
+# error "sorry, weird endianness on this box"
+#endif
+
+#endif
+
+
+enum drbd_io_error_p {
+	EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
+	EP_CALL_HELPER,
+	EP_DETACH
+};
+
+enum drbd_fencing_p {
+	FP_DONT_CARE,
+	FP_RESOURCE,
+	FP_STONITH
+};
+
+enum drbd_disconnect_p {
+	DP_RECONNECT,
+	DP_DROP_NET_CONF,
+	DP_FREEZE_IO
+};
+
+enum drbd_after_sb_p {
+	ASB_DISCONNECT,
+	ASB_DISCARD_YOUNGER_PRI,
+	ASB_DISCARD_OLDER_PRI,
+	ASB_DISCARD_ZERO_CHG,
+	ASB_DISCARD_LEAST_CHG,
+	ASB_DISCARD_LOCAL,
+	ASB_DISCARD_REMOTE,
+	ASB_CONSENSUS,
+	ASB_DISCARD_SECONDARY,
+	ASB_CALL_HELPER,
+	ASB_VIOLENTLY
+};
+
+enum drbd_on_no_data {
+	OND_IO_ERROR,
+	OND_SUSPEND_IO
+};
+
+enum drbd_on_congestion {
+	OC_BLOCK,
+	OC_PULL_AHEAD,
+	OC_DISCONNECT,
+};
+
+/* KEEP the order, do not delete or insert. Only append. */
+enum drbd_ret_code {
+	ERR_CODE_BASE		= 100,
+	NO_ERROR		= 101,
+	ERR_LOCAL_ADDR		= 102,
+	ERR_PEER_ADDR		= 103,
+	ERR_OPEN_DISK		= 104,
+	ERR_OPEN_MD_DISK	= 105,
+	ERR_DISK_NOT_BDEV	= 107,
+	ERR_MD_NOT_BDEV		= 108,
+	ERR_DISK_TO_SMALL	= 111,
+	ERR_MD_DISK_TO_SMALL	= 112,
+	ERR_BDCLAIM_DISK	= 114,
+	ERR_BDCLAIM_MD_DISK	= 115,
+	ERR_MD_IDX_INVALID	= 116,
+	ERR_IO_MD_DISK		= 118,
+	ERR_MD_INVALID          = 119,
+	ERR_AUTH_ALG		= 120,
+	ERR_AUTH_ALG_ND		= 121,
+	ERR_NOMEM		= 122,
+	ERR_DISCARD		= 123,
+	ERR_DISK_CONFIGURED	= 124,
+	ERR_NET_CONFIGURED	= 125,
+	ERR_MANDATORY_TAG	= 126,
+	ERR_MINOR_INVALID	= 127,
+	ERR_INTR		= 129, /* EINTR */
+	ERR_RESIZE_RESYNC	= 130,
+	ERR_NO_PRIMARY		= 131,
+	ERR_SYNC_AFTER		= 132,
+	ERR_SYNC_AFTER_CYCLE	= 133,
+	ERR_PAUSE_IS_SET	= 134,
+	ERR_PAUSE_IS_CLEAR	= 135,
+	ERR_PACKET_NR		= 137,
+	ERR_NO_DISK		= 138,
+	ERR_NOT_PROTO_C		= 139,
+	ERR_NOMEM_BITMAP	= 140,
+	ERR_INTEGRITY_ALG	= 141, /* DRBD 8.2 only */
+	ERR_INTEGRITY_ALG_ND	= 142, /* DRBD 8.2 only */
+	ERR_CPU_MASK_PARSE	= 143, /* DRBD 8.2 only */
+	ERR_CSUMS_ALG		= 144, /* DRBD 8.2 only */
+	ERR_CSUMS_ALG_ND	= 145, /* DRBD 8.2 only */
+	ERR_VERIFY_ALG		= 146, /* DRBD 8.2 only */
+	ERR_VERIFY_ALG_ND	= 147, /* DRBD 8.2 only */
+	ERR_CSUMS_RESYNC_RUNNING= 148, /* DRBD 8.2 only */
+	ERR_VERIFY_RUNNING	= 149, /* DRBD 8.2 only */
+	ERR_DATA_NOT_CURRENT	= 150,
+	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
+	ERR_PERM		= 152,
+	ERR_NEED_APV_93		= 153,
+	ERR_STONITH_AND_PROT_A  = 154,
+	ERR_CONG_NOT_PROTO_A	= 155,
+	ERR_PIC_AFTER_DEP	= 156,
+	ERR_PIC_PEER_DEP	= 157,
+
+	/* insert new ones above this line */
+	AFTER_LAST_ERR_CODE
+};
+
+#define DRBD_PROT_A   1
+#define DRBD_PROT_B   2
+#define DRBD_PROT_C   3
+
+enum drbd_role {
+	R_UNKNOWN = 0,
+	R_PRIMARY = 1,     /* role */
+	R_SECONDARY = 2,   /* role */
+	R_MASK = 3,
+};
+
+/* The order of these constants is important.
+ * The lower ones (<C_WF_REPORT_PARAMS) indicate
+ * that there is no socket!
+ * >=C_WF_REPORT_PARAMS ==> There is a socket
+ */
+enum drbd_conns {
+	C_STANDALONE,
+	C_DISCONNECTING,  /* Temporal state on the way to StandAlone. */
+	C_UNCONNECTED,    /* >= C_UNCONNECTED -> inc_net() succeeds */
+
+	/* These temporal states are all used on the way
+	 * from >= C_CONNECTED to Unconnected.
+	 * The 'disconnect reason' states
+	 * I do not allow to change beween them. */
+	C_TIMEOUT,
+	C_BROKEN_PIPE,
+	C_NETWORK_FAILURE,
+	C_PROTOCOL_ERROR,
+	C_TEAR_DOWN,
+
+	C_WF_CONNECTION,
+	C_WF_REPORT_PARAMS, /* we have a socket */
+	C_CONNECTED,      /* we have introduced each other */
+	C_STARTING_SYNC_S,  /* starting full sync by admin request. */
+	C_STARTING_SYNC_T,  /* stariing full sync by admin request. */
+	C_WF_BITMAP_S,
+	C_WF_BITMAP_T,
+	C_WF_SYNC_UUID,
+
+	/* All SyncStates are tested with this comparison
+	 * xx >= C_SYNC_SOURCE && xx <= C_PAUSED_SYNC_T */
+	C_SYNC_SOURCE,
+	C_SYNC_TARGET,
+	C_VERIFY_S,
+	C_VERIFY_T,
+	C_PAUSED_SYNC_S,
+	C_PAUSED_SYNC_T,
+
+	C_AHEAD,
+	C_BEHIND,
+
+	C_MASK = 31
+};
+
+enum drbd_disk_state {
+	D_DISKLESS,
+	D_ATTACHING,      /* In the process of reading the meta-data */
+	D_FAILED,         /* Becomes D_DISKLESS as soon as we told it the peer */
+			/* when >= D_FAILED it is legal to access mdev->bc */
+	D_NEGOTIATING,    /* Late attaching state, we need to talk to the peer */
+	D_INCONSISTENT,
+	D_OUTDATED,
+	D_UNKNOWN,       /* Only used for the peer, never for myself */
+	D_CONSISTENT,     /* Might be D_OUTDATED, might be D_UP_TO_DATE ... */
+	D_UP_TO_DATE,       /* Only this disk state allows applications' IO ! */
+	D_MASK = 15
+};
+
+union drbd_state {
+/* According to gcc's docs is the ...
+ * The order of allocation of bit-fields within a unit (C90 6.5.2.1, C99 6.7.2.1).
+ * Determined by ABI.
+ * pointed out by Maxim Uvarov q<muvarov@ru.mvista.com>
+ * even though we transmit as "cpu_to_be32(state)",
+ * the offsets of the bitfields still need to be swapped
+ * on different endianess.
+ */
+	struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned susp:1 ;   /* 2/2	 IO suspended no/yes (by user) */
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned peer_isp:1 ;
+		unsigned user_isp:1 ;
+		unsigned susp_nod:1 ; /* IO suspended because no data */
+		unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
+		unsigned _pad:9;   /* 0	 unused */
+#elif defined(__BIG_ENDIAN_BITFIELD)
+		unsigned _pad:9;
+		unsigned susp_fen:1 ;
+		unsigned susp_nod:1 ;
+		unsigned user_isp:1 ;
+		unsigned peer_isp:1 ;
+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
+		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
+		unsigned conn:5 ;   /* 17/32	 cstates */
+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
+#else
+# error "this endianess is not supported"
+#endif
+#ifndef DRBD_DEBUG_STATE_CHANGES
+# ifdef CONFIG_DYNAMIC_DEBUG
+#  define DRBD_DEBUG_STATE_CHANGES 1
+# else
+#  define DRBD_DEBUG_STATE_CHANGES 0
+# endif
+#endif
+#if DRBD_DEBUG_STATE_CHANGES
+		unsigned int line;
+		const char *func;
+		unsigned long long seq;
+#endif
+	};
+	unsigned int i;
+};
+
+enum drbd_state_rv {
+	SS_CW_NO_NEED = 4,
+	SS_CW_SUCCESS = 3,
+	SS_NOTHING_TO_DO = 2,
+	SS_SUCCESS = 1,
+	SS_UNKNOWN_ERROR = 0, /* Used to sleep longer in _drbd_request_state */
+	SS_TWO_PRIMARIES = -1,
+	SS_NO_UP_TO_DATE_DISK = -2,
+	SS_NO_LOCAL_DISK = -4,
+	SS_NO_REMOTE_DISK = -5,
+	SS_CONNECTED_OUTDATES = -6,
+	SS_PRIMARY_NOP = -7,
+	SS_RESYNC_RUNNING = -8,
+	SS_ALREADY_STANDALONE = -9,
+	SS_CW_FAILED_BY_PEER = -10,
+	SS_IS_DISKLESS = -11,
+	SS_DEVICE_IN_USE = -12,
+	SS_NO_NET_CONFIG = -13,
+	SS_NO_VERIFY_ALG = -14,       /* drbd-8.2 only */
+	SS_NEED_CONNECTION = -15,    /* drbd-8.2 only */
+	SS_LOWER_THAN_OUTDATED = -16,
+	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
+	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
+	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
+	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
+};
+
+/* from drbd_strings.c */
+extern const char *drbd_conn_str(enum drbd_conns);
+extern const char *drbd_role_str(enum drbd_role);
+extern const char *drbd_disk_str(enum drbd_disk_state);
+extern const char *drbd_set_st_err_str(enum drbd_state_rv);
+
+#define SHARED_SECRET_MAX 64
+
+#define MDF_CONSISTENT		(1 << 0)
+#define MDF_PRIMARY_IND		(1 << 1)
+#define MDF_CONNECTED_IND	(1 << 2)
+#define MDF_FULL_SYNC		(1 << 3)
+#define MDF_WAS_UP_TO_DATE	(1 << 4)
+#define MDF_PEER_OUT_DATED	(1 << 5)
+#define MDF_CRASHED_PRIMARY      (1 << 6)
+
+enum drbd_uuid_index {
+	UI_CURRENT,
+	UI_BITMAP,
+	UI_HISTORY_START,
+	UI_HISTORY_END,
+	UI_SIZE,      /* nl-packet: number of dirty bits */
+	UI_FLAGS,     /* nl-packet: flags */
+	UI_EXTENDED_SIZE   /* Everything. */
+};
+
+enum drbd_timeout_flag {
+	UT_DEFAULT      = 0,
+	UT_DEGRADED     = 1,
+	UT_PEER_OUTDATED = 2,
+};
+
+#define UUID_JUST_CREATED ((__u64)4)
+
+#define DRBD_MAGIC 0x83740267
+#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
+#define DRBD_MAGIC_BIG 0x835a
+#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
+
+/* these are of type "int" */
+#define DRBD_MD_INDEX_INTERNAL -1
+#define DRBD_MD_INDEX_FLEX_EXT -2
+#define DRBD_MD_INDEX_FLEX_INT -3
+
+/* Start of the new netlink/connector stuff */
+
+#define DRBD_NL_CREATE_DEVICE 0x01
+#define DRBD_NL_SET_DEFAULTS  0x02
+
+/* The following line should be moved over to linux/connector.h
+ * when the time comes */
+#ifndef CN_IDX_DRBD
+# define CN_IDX_DRBD			0x4
+/* Ubuntu "intrepid ibex" release defined CN_IDX_DRBD as 0x6 */
+#endif
+#define CN_VAL_DRBD			0x1
+
+/* For searching a vacant cn_idx value */
+#define CN_IDX_STEP			6977
+
+struct drbd_nl_cfg_req {
+	int packet_type;
+	unsigned int drbd_minor;
+	int flags;
+	unsigned short tag_list[];
+};
+
+struct drbd_nl_cfg_reply {
+	int packet_type;
+	unsigned int minor;
+	/* FIXME: This is super ugly. */
+	int ret_code; /* enum drbd_ret_code or enum drbd_state_rv */
+	unsigned short tag_list[]; /* only used with get_* calls */
+};
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/linux/drbd_config.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_config.h
--- drbd8-8.3.7/user/legacy/linux/drbd_config.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_config.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,152 @@
+/*
+  drbd_config.h
+  DRBD's compile time configuration.
+
+  drbd is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2, or (at your option)
+  any later version.
+
+  drbd is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with drbd; see the file COPYING.  If not, write to
+  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifndef DRBD_CONFIG_H
+#define DRBD_CONFIG_H
+
+extern const char *drbd_buildtag(void);
+
+/* Necessary to build the external module against >= Linux-2.6.33 */
+#ifdef REL_VERSION
+#undef REL_VERSION
+#undef API_VERSION
+#undef PRO_VERSION_MIN
+#undef PRO_VERSION_MAX
+#endif
+
+/* End of external module for 2.6.33 stuff */
+
+#define REL_VERSION "8.3.10"
+#define API_VERSION 88
+#define PRO_VERSION_MIN 86
+#define PRO_VERSION_MAX 96
+
+#ifndef __CHECKER__   /* for a sparse run, we need all STATICs */
+#define DBG_ALL_SYMBOLS /* no static functs, improves quality of OOPS traces */
+#endif
+
+/* drbd_assert_breakpoint() function
+#define DBG_ASSERTS
+ */
+
+/* Dump all cstate changes */
+#define DUMP_MD 2
+
+/* some extra checks
+#define PARANOIA
+ */
+
+/* Enable fault insertion code */
+#define DRBD_ENABLE_FAULTS
+
+/* RedHat's 2.6.9 kernels have the gfp_t type. Mainline has this feature
+ * since 2.6.16. If you build for RedHat enable the line below. */
+#define KERNEL_HAS_GFP_T
+
+/* kernel.org has atomic_add_return since 2.6.10. some vendor kernels
+ * have it backported, though. Others don't. */
+//#define NEED_BACKPORT_OF_ATOMIC_ADD
+
+/* 2.6.something has deprecated kmem_cache_t
+ * some older still use it.
+ * some have it defined as struct kmem_cache_s, some as struct kmem_cache */
+//#define USE_KMEM_CACHE_S
+
+/* 2.6.something has sock_create_kern (SE-linux security context stuff)
+ * some older distribution kernels don't. */
+//#define DEFINE_SOCK_CREATE_KERN
+
+/* 2.6.24 and later have kernel_sock_shutdown.
+ * some older distribution kernels may also have a backport. */
+//#define DEFINE_KERNEL_SOCK_SHUTDOWN
+
+/* in older kernels (vanilla < 2.6.16) struct netlink_skb_parms has a
+ * member called dst_groups. Later it is called dst_group (without 's'). */
+//#define DRBD_NL_DST_GROUPS
+
+/* in older kernels (vanilla < 2.6.14) is no kzalloc() */
+//#define NEED_BACKPORT_OF_KZALLOC
+
+// some vendor kernels have it, some don't
+//#define NEED_SG_SET_BUF
+#define HAVE_LINUX_SCATTERLIST_H
+
+/* 2.6.29 and up no longer have swabb.h */
+//#define HAVE_LINUX_BYTEORDER_SWABB_H
+
+/* some vendor kernel have it backported. */
+#define HAVE_SET_CPUS_ALLOWED_PTR
+
+/* Some vendor kernels < 2.6.7 might define msleep in one or
+ * another way .. */
+
+#define KERNEL_HAS_MSLEEP
+
+/* Some other kernels < 2.6.8 do not have struct kvec,
+ * others do.. */
+
+#define KERNEL_HAS_KVEC
+
+/* Actually availabe since 2.6.26, but vendors have backported...
+ */
+#define KERNEL_HAS_PROC_CREATE_DATA
+
+/* In 2.6.32 we finally fixed connector to pass netlink_skb_parms to the callback
+ */
+#define KERNEL_HAS_CN_SKB_PARMS
+
+/* In the 2.6.34 mergewindow blk_queue_max_sectors() got blk_queue_max_hw_sectors() and
+   blk_queue_max_(phys|hw)_segments() got blk_queue_max_segments()
+   See Linux commits: 086fa5ff0854c676ec333 8a78362c4eefc1deddbef */
+//#define NEED_BLK_QUEUE_MAX_HW_SECTORS
+//#define NEED_BLK_QUEUE_MAX_SEGMENTS
+
+/* For kernel versions 2.6.31 to 2.6.33 inclusive, even though
+ * blk_queue_max_hw_sectors is present, we actually need to use
+ * blk_queue_max_sectors to set max_hw_sectors. :-(
+ * RHEL6 2.6.32 chose to be different and already has eliminated
+ * blk_queue_max_sectors as upstream 2.6.34 did.
+ * I check it into the git repo as defined,
+ * because if someone does not run our compat adjust magic, it otherwise would
+ * silently compile broken code on affected kernel versions, which is worse
+ * than the compile error it may cause on more recent kernels.
+ */
+#define USE_BLK_QUEUE_MAX_SECTORS_ANYWAYS
+
+/* For kernel versions > 2.6.38, open_bdev_excl has been replaced with
+ * blkdev_get_by_path. See e525fd89 and d4d77629 */
+//#define COMPAT_HAVE_BLKDEV_GET_BY_PATH
+
+/* before open_bdev_exclusive, there was a open_bdev_excl,
+ * see 30c40d2 */
+#define COMPAT_HAVE_OPEN_BDEV_EXCLUSIVE
+
+/* some old kernels do not have atomic_add_unless() */
+//#define NEED_ATOMIC_ADD_UNLESS
+
+/* some old kernels do not have the bool type */
+//#define NEED_BOOL_TYPE
+
+/* some older kernels do not have schedule_timeout_interruptible() */
+//#define NEED_SCHEDULE_TIMEOUT_INTERR
+
+/* Stone old kernels lack the fmode_t type */
+#define COMPAT_HAVE_FMODE_T
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/linux/drbd_limits.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_limits.h
--- drbd8-8.3.7/user/legacy/linux/drbd_limits.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_limits.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,172 @@
+/*
+  drbd_limits.h
+  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+*/
+
+/*
+ * Our current limitations.
+ * Some of them are hard limits,
+ * some of them are arbitrary range limits, that make it easier to provide
+ * feedback about nonsense settings for certain configurable values.
+ */
+
+#ifndef DRBD_LIMITS_H
+#define DRBD_LIMITS_H 1
+
+#define DEBUG_RANGE_CHECK 0
+
+#define DRBD_MINOR_COUNT_MIN 1
+#define DRBD_MINOR_COUNT_MAX 256
+#define DRBD_MINOR_COUNT_DEF 32
+
+#define DRBD_DIALOG_REFRESH_MIN 0
+#define DRBD_DIALOG_REFRESH_MAX 600
+
+/* valid port number */
+#define DRBD_PORT_MIN 1
+#define DRBD_PORT_MAX 0xffff
+
+/* startup { */
+  /* if you want more than 3.4 days, disable */
+#define DRBD_WFC_TIMEOUT_MIN 0
+#define DRBD_WFC_TIMEOUT_MAX 300000
+#define DRBD_WFC_TIMEOUT_DEF 0
+
+#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
+#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
+#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
+
+#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
+#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
+#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
+/* }*/
+
+/* net { */
+  /* timeout, unit centi seconds
+   * more than one minute timeout is not usefull */
+#define DRBD_TIMEOUT_MIN 1
+#define DRBD_TIMEOUT_MAX 600
+#define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
+
+ /* If backing disk takes longer than disk_timeout, mark the disk as failed */
+#define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
+#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
+#define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
+
+  /* active connection retries when C_WF_CONNECTION */
+#define DRBD_CONNECT_INT_MIN 1
+#define DRBD_CONNECT_INT_MAX 120
+#define DRBD_CONNECT_INT_DEF 10   /* seconds */
+
+  /* keep-alive probes when idle */
+#define DRBD_PING_INT_MIN 1
+#define DRBD_PING_INT_MAX 120
+#define DRBD_PING_INT_DEF 10
+
+ /* timeout for the ping packets.*/
+#define DRBD_PING_TIMEO_MIN  1
+#define DRBD_PING_TIMEO_MAX  100
+#define DRBD_PING_TIMEO_DEF  5
+
+  /* max number of write requests between write barriers */
+#define DRBD_MAX_EPOCH_SIZE_MIN 1
+#define DRBD_MAX_EPOCH_SIZE_MAX 20000
+#define DRBD_MAX_EPOCH_SIZE_DEF 2048
+
+  /* I don't think that a tcp send buffer of more than 10M is usefull */
+#define DRBD_SNDBUF_SIZE_MIN  0
+#define DRBD_SNDBUF_SIZE_MAX  (10<<20)
+#define DRBD_SNDBUF_SIZE_DEF  0
+
+#define DRBD_RCVBUF_SIZE_MIN  0
+#define DRBD_RCVBUF_SIZE_MAX  (10<<20)
+#define DRBD_RCVBUF_SIZE_DEF  0
+
+  /* @4k PageSize -> 128kB - 512MB */
+#define DRBD_MAX_BUFFERS_MIN  32
+#define DRBD_MAX_BUFFERS_MAX  131072
+#define DRBD_MAX_BUFFERS_DEF  2048
+
+  /* @4k PageSize -> 4kB - 512MB */
+#define DRBD_UNPLUG_WATERMARK_MIN  1
+#define DRBD_UNPLUG_WATERMARK_MAX  131072
+#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
+
+  /* 0 is disabled.
+   * 200 should be more than enough even for very short timeouts */
+#define DRBD_KO_COUNT_MIN  0
+#define DRBD_KO_COUNT_MAX  200
+#define DRBD_KO_COUNT_DEF  0
+/* } */
+
+/* syncer { */
+  /* FIXME allow rate to be zero? */
+#define DRBD_RATE_MIN 1
+/* channel bonding 10 GbE, or other hardware */
+#define DRBD_RATE_MAX (4 << 20)
+#define DRBD_RATE_DEF 250  /* kb/second */
+
+  /* less than 7 would hit performance unneccessarily.
+   * 3833 is the largest prime that still does fit
+   * into 64 sectors of activity log */
+#define DRBD_AL_EXTENTS_MIN  7
+#define DRBD_AL_EXTENTS_MAX  3833
+#define DRBD_AL_EXTENTS_DEF  127
+
+#define DRBD_AFTER_MIN  -1
+#define DRBD_AFTER_MAX  255
+#define DRBD_AFTER_DEF  -1
+
+/* } */
+
+/* drbdsetup XY resize -d Z
+ * you are free to reduce the device size to nothing, if you want to.
+ * the upper limit with 64bit kernel, enough ram and flexible meta data
+ * is 16 TB, currently. */
+/* DRBD_MAX_SECTORS */
+#define DRBD_DISK_SIZE_SECT_MIN  0
+#define DRBD_DISK_SIZE_SECT_MAX  (16 * (2LLU << 30))
+#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
+
+#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
+#define DRBD_FENCING_DEF FP_DONT_CARE
+#define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
+#define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
+#define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
+#define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
+#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
+#define DRBD_ON_CONGESTION_DEF OC_BLOCK
+
+#define DRBD_MAX_BIO_BVECS_MIN 0
+#define DRBD_MAX_BIO_BVECS_MAX 128
+#define DRBD_MAX_BIO_BVECS_DEF 0
+
+#define DRBD_C_PLAN_AHEAD_MIN  0
+#define DRBD_C_PLAN_AHEAD_MAX  300
+#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
+
+#define DRBD_C_DELAY_TARGET_MIN 1
+#define DRBD_C_DELAY_TARGET_MAX 100
+#define DRBD_C_DELAY_TARGET_DEF 10
+
+#define DRBD_C_FILL_TARGET_MIN 0
+#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
+#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
+
+#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MAX     (4 << 20)
+#define DRBD_C_MAX_RATE_DEF     102400
+
+#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MAX     (4 << 20)
+#define DRBD_C_MIN_RATE_DEF     4096
+
+#define DRBD_CONG_FILL_MIN	0
+#define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
+#define DRBD_CONG_FILL_DEF	0
+
+#define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
+#define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
+#define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/linux/drbd_nl.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_nl.h
--- drbd8-8.3.7/user/legacy/linux/drbd_nl.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_nl.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,162 @@
+/*
+   PAKET( name,
+	  TYPE ( pn, pr, member )
+	  ...
+   )
+
+   You may never reissue one of the pn arguments
+*/
+
+#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
+#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
+#endif
+
+NL_PACKET(primary, 1,
+       NL_BIT(		1,	T_MAY_IGNORE,	primary_force)
+)
+
+NL_PACKET(secondary, 2, )
+
+NL_PACKET(disk_conf, 3,
+	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
+	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
+	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
+	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
+	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
+	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
+	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
+	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
+	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
+	  /*  55 max_bio_size was available in 8.2.6rc2 */
+	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
+	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
+	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
+	NL_INTEGER(	89,	T_MAY_IGNORE,	disk_timeout)
+)
+
+NL_PACKET(detach, 4,
+	NL_BIT(		88,	T_MANDATORY,	detach_force)
+)
+
+NL_PACKET(net_conf, 5,
+	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
+	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
+	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
+	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
+	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
+	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
+	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
+	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
+	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
+	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
+	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
+	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
+	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
+	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
+	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
+	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
+	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
+	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
+	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
+	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
+	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
+	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
+	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
+	  /* 59 addr_family was available in GIT, never released */
+	NL_BIT(		60,	T_MANDATORY,	mind_af)
+	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
+	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
+	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
+	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
+	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
+	NL_BIT(		70,	T_MANDATORY,	dry_run)
+)
+
+NL_PACKET(disconnect, 6,
+	NL_BIT(		84,	T_MAY_IGNORE,	force)
+)
+
+NL_PACKET(resize, 7,
+	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
+	NL_BIT(			69,	T_MANDATORY,	no_resync)
+)
+
+NL_PACKET(syncer_conf, 8,
+	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
+	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
+	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
+	  /* NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume) */
+	  /* NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval) */
+	  /* NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th) removed */
+	  /* NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th) removed */
+	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
+	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
+	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
+	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
+	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
+	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
+	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
+	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
+	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
+	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
+)
+
+NL_PACKET(invalidate, 9, )
+NL_PACKET(invalidate_peer, 10, )
+NL_PACKET(pause_sync, 11, )
+NL_PACKET(resume_sync, 12, )
+NL_PACKET(suspend_io, 13, )
+NL_PACKET(resume_io, 14, )
+NL_PACKET(outdate, 15, )
+NL_PACKET(get_config, 16, )
+NL_PACKET(get_state, 17,
+	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
+)
+
+NL_PACKET(get_uuids, 18,
+	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
+	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
+)
+
+NL_PACKET(get_timeout_flag, 19,
+	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
+)
+
+NL_PACKET(call_helper, 20,
+	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
+)
+
+/* Tag nr 42 already allocated in drbd-8.1 development. */
+
+NL_PACKET(sync_progress, 23,
+	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
+)
+
+NL_PACKET(dump_ee, 24,
+	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
+	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
+	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
+	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
+	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
+	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
+)
+
+NL_PACKET(start_ov, 25,
+	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
+)
+
+NL_PACKET(new_c_uuid, 26,
+       NL_BIT(		63,	T_MANDATORY,	clear_bm)
+)
+
+#ifdef NL_RESPONSE
+NL_RESPONSE(return_code_only, 27)
+#endif
+
+#undef NL_PACKET
+#undef NL_INTEGER
+#undef NL_INT64
+#undef NL_BIT
+#undef NL_STRING
+#undef NL_RESPONSE
diff -Nru drbd8-8.3.7/user/legacy/linux/drbd_tag_magic.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_tag_magic.h
--- drbd8-8.3.7/user/legacy/linux/drbd_tag_magic.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/linux/drbd_tag_magic.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,84 @@
+#ifndef DRBD_TAG_MAGIC_H
+#define DRBD_TAG_MAGIC_H
+
+#define TT_END     0
+#define TT_REMOVED 0xE000
+
+/* declare packet_type enums */
+enum packet_types {
+#define NL_PACKET(name, number, fields) P_ ## name = number,
+#define NL_RESPONSE(name, number) P_ ## name = number,
+#define NL_INTEGER(pn, pr, member)
+#define NL_INT64(pn, pr, member)
+#define NL_BIT(pn, pr, member)
+#define NL_STRING(pn, pr, member, len)
+#include "drbd_nl.h"
+	P_nl_after_last_packet,
+};
+
+/* These struct are used to deduce the size of the tag lists: */
+#define NL_PACKET(name, number, fields)	\
+	struct name ## _tag_len_struct { fields };
+#define NL_INTEGER(pn, pr, member)		\
+	int member; int tag_and_len ## member;
+#define NL_INT64(pn, pr, member)		\
+	__u64 member; int tag_and_len ## member;
+#define NL_BIT(pn, pr, member)		\
+	unsigned char member:1; int tag_and_len ## member;
+#define NL_STRING(pn, pr, member, len)	\
+	unsigned char member[len]; int member ## _len; \
+	int tag_and_len ## member;
+#include "drbd_nl.h"
+
+/* declare tag-list-sizes */
+static const int tag_list_sizes[] = {
+#define NL_PACKET(name, number, fields) 2 fields ,
+#define NL_INTEGER(pn, pr, member)      + 4 + 4
+#define NL_INT64(pn, pr, member)        + 4 + 8
+#define NL_BIT(pn, pr, member)          + 4 + 1
+#define NL_STRING(pn, pr, member, len)  + 4 + (len)
+#include "drbd_nl.h"
+};
+
+/* The two highest bits are used for the tag type */
+#define TT_MASK      0xC000
+#define TT_INTEGER   0x0000
+#define TT_INT64     0x4000
+#define TT_BIT       0x8000
+#define TT_STRING    0xC000
+/* The next bit indicates if processing of the tag is mandatory */
+#define T_MANDATORY  0x2000
+#define T_MAY_IGNORE 0x0000
+#define TN_MASK      0x1fff
+/* The remaining 13 bits are used to enumerate the tags */
+
+#define tag_type(T)   ((T) & TT_MASK)
+#define tag_number(T) ((T) & TN_MASK)
+
+/* declare tag enums */
+#define NL_PACKET(name, number, fields) fields
+enum drbd_tags {
+#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
+#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
+#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
+#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
+#include "drbd_nl.h"
+};
+
+struct tag {
+	const char *name;
+	int type_n_flags;
+	int max_len;
+};
+
+/* declare tag names */
+#define NL_PACKET(name, number, fields) fields
+static const struct tag tag_descriptions[] = {
+#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
+#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
+#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
+#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
+#include "drbd_nl.h"
+};
+
+#endif
diff -Nru drbd8-8.3.7/user/legacy/unaligned.h drbd8-8.4.1+git55a81dc~cmd1/user/legacy/unaligned.h
--- drbd8-8.3.7/user/legacy/unaligned.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/legacy/unaligned.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,78 @@
+#ifndef UNALIGNED_H
+#define UNALIGNED_H
+
+#include <stdint.h>
+
+#if defined(__i386__) || defined(__x86_64__)
+#define UNALIGNED_ACCESS_SUPPORTED
+#endif
+
+#ifndef UNALIGNED_ACCESS_SUPPORTED
+#warning "Assuming that your architecture can not do unaligned memory accesses."
+#warning "Enabling extra code for unaligned memory accesses."
+#endif
+
+#ifdef UNALIGNED_ACCESS_SUPPORTED
+
+/* On some architectures the hardware (or microcode) does it */
+
+#define get_unaligned(ptr)		*(ptr)
+#define put_unaligned(val, ptr)		*(ptr) = (val)
+
+#else
+
+/* on some architectures we have to do it in program code */
+
+/* Better not use memcpy(). gcc generates broken code an ARM at higher
+   optimisation levels
+*/
+
+#define __bad_unaligned_access_size() ({			\
+	fprintf(stderr, "bad unaligned access. abort()\n");	\
+	abort();						\
+	})
+
+#define get_unaligned(ptr) ((typeof(*(ptr)))({		\
+	typeof(*(ptr)) v;			 	\
+	unsigned char *s = (unsigned char*)(ptr);	\
+	unsigned char *d = (unsigned char*)&v;		\
+	switch (sizeof(v)) {				\
+	case 8: *d++ = *s++;				\
+		*d++ = *s++;				\
+		*d++ = *s++;				\
+		*d++ = *s++;				\
+	case 4: *d++ = *s++;				\
+		*d++ = *s++;				\
+	case 2:	*d++ = *s++;				\
+	case 1:	*d++ = *s++;				\
+		break;					\
+	default:					\
+		__bad_unaligned_access_size();		\
+		break;					\
+	}						\
+	v; }))
+
+
+#define put_unaligned(val, ptr) ({			\
+	typeof(*(ptr)) v = (val);			\
+	unsigned char *d = (unsigned char*)(ptr);	\
+	unsigned char *s = (unsigned char*)&v;		\
+	switch (sizeof(v)) {				\
+	case 8: *d++ = *s++;				\
+		*d++ = *s++;				\
+		*d++ = *s++;				\
+		*d++ = *s++;				\
+	case 4: *d++ = *s++;				\
+		*d++ = *s++;				\
+	case 2:	*d++ = *s++;				\
+	case 1:	*d++ = *s++;				\
+		break;					\
+	default:					\
+		__bad_unaligned_access_size();		\
+		break;					\
+	}						\
+	(void)0; })
+
+#endif
+
+#endif
diff -Nru drbd8-8.3.7/user/libgenl.c drbd8-8.4.1+git55a81dc~cmd1/user/libgenl.c
--- drbd8-8.3.7/user/libgenl.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/libgenl.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,833 @@
+#include "libgenl.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <time.h>
+#include <poll.h>
+
+int genl_join_mc_group(struct genl_sock *s, const char *name) {
+	int g_id;
+	int i;
+
+	BUG_ON(!s || !s->s_family);
+	for (i = 0; i < 32; i++) {
+		if (!s->s_family->mc_groups[i].id)
+			continue;
+		if (strcmp(s->s_family->mc_groups[i].name, name))
+			continue;
+
+		g_id = s->s_family->mc_groups[i].id;
+		return setsockopt(s->s_fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+				&g_id, sizeof(g_id));
+	}
+	return -2;
+}
+
+static struct genl_sock *genl_connect(__u32 nl_groups)
+{
+	struct genl_sock *s = calloc(1, sizeof(*s));
+	int err;
+	int bsz = 2 << 10;
+
+	if (!s)
+		return NULL;
+
+	/* the netlink port id - use the process id, it is unique,
+	 * and "everyone else does it". */
+	s->s_local.nl_pid = getpid();
+	s->s_local.nl_family = AF_NETLINK;
+	/*
+	 * If we want to receive multicast traffic on this socket, kernels
+	 * before v2.6.23-rc1 require us to indicate which multicast groups we
+	 * are interested in in nl_groups.
+	 */
+	s->s_local.nl_groups = nl_groups;
+	s->s_peer.nl_family = AF_NETLINK;
+	/* start with some sane sequence number */
+	s->s_seq_expect = s->s_seq_next = time(0);
+
+	s->s_fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_GENERIC);
+	if (s->s_fd == -1)
+		goto fail;
+
+	err = setsockopt(s->s_fd, SOL_SOCKET, SO_SNDBUF, &bsz, sizeof(bsz)) ||
+	      setsockopt(s->s_fd, SOL_SOCKET, SO_RCVBUF, &bsz, sizeof(bsz)) ||
+	      bind(s->s_fd, (struct sockaddr*) &s->s_local, sizeof(s->s_local));
+
+	if (err)
+		goto fail;
+
+	return s;
+
+fail:
+	free(s);
+	return NULL;
+}
+
+static int do_send(int fd, const void *buf, int len)
+{
+	int c;
+	while ((c = write(fd, buf, len)) < len) {
+		if (c == -1) {
+			if (errno == EINTR)
+				continue;
+			return -1;
+		}
+		buf += c;
+		len -= c;
+	}
+	return 0;
+}
+
+int genl_send(struct genl_sock *s, struct msg_buff *msg)
+{
+	struct nlmsghdr *n = (struct nlmsghdr *)msg->data;
+	struct genlmsghdr *g;
+
+	n->nlmsg_len = msg->tail - msg->data;
+	n->nlmsg_flags |= NLM_F_REQUEST;
+	n->nlmsg_seq = s->s_seq_expect = s->s_seq_next++;
+	n->nlmsg_pid = s->s_local.nl_pid;
+
+	g = nlmsg_data(n);
+
+	dbg(3, "sending %smessage, pid:%u seq:%u, g.cmd/version:%u/%u",
+			n->nlmsg_type == GENL_ID_CTRL ? "ctrl " : "",
+			n->nlmsg_pid, n->nlmsg_seq, g->cmd, g->version);
+
+	return do_send(s->s_fd, msg->data, n->nlmsg_len);
+}
+
+/* "inspired" by libnl nl_recv()
+ * You pass in one iovec, which may contain pre-allocated buffer space,
+ * obtained by malloc(). It will be realloc()ed on demand.
+ * Caller is responsible for free()ing it up on return,
+ * regardless of return code.
+ */
+int genl_recv_timeout(struct genl_sock *s, struct iovec *iov, int timeout_ms)
+{
+	struct sockaddr_nl addr;
+	struct pollfd pfd;
+	int flags;
+	struct msghdr msg = {
+		.msg_name = &addr,
+		.msg_namelen = sizeof(struct sockaddr_nl),
+		.msg_iov = iov,
+		.msg_iovlen = 1,
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_flags = 0,
+	};
+	int n;
+
+	if (!iov->iov_len) {
+		iov->iov_len = 8192;
+		iov->iov_base = malloc(iov->iov_len);
+	}
+
+	flags = MSG_PEEK;
+retry:
+	pfd.fd = s->s_fd;
+	pfd.events = POLLIN;
+	if ((poll(&pfd, 1, timeout_ms) != 1) || !(pfd.revents & POLLIN))
+		return 0; /* which is E_RCV_TIMEDOUT */
+
+	/* for most cases this method will memcopy twice, as the default buffer
+	 * is large enough.  But for those few other cases, we now have a
+	 * chance to realloc before the rest of the datagram is discarded.
+	 */
+	n = recvmsg(s->s_fd, &msg, flags);
+	if (!n)
+		return 0;
+	else if (n < 0) {
+		if (errno == EINTR) {
+			dbg(3, "recvmsg() returned EINTR, retrying\n");
+			goto retry;
+		} else if (errno == EAGAIN) {
+			dbg(3, "recvmsg() returned EAGAIN, aborting\n");
+			return 0;
+		} else
+			return -E_RCV_FAILED;
+	}
+
+	if (iov->iov_len < (unsigned)n ||
+	    msg.msg_flags & MSG_TRUNC) {
+		/* Provided buffer is not long enough, enlarge it
+		 * and try again. */
+		iov->iov_len *= 2;
+		iov->iov_base = realloc(iov->iov_base, iov->iov_len);
+		goto retry;
+	} else if (flags != 0) {
+		/* Buffer is big enough, do the actual reading */
+		flags = 0;
+		goto retry;
+	}
+
+	if (msg.msg_namelen != sizeof(struct sockaddr_nl))
+		return -E_RCV_NO_SOURCE_ADDR;
+
+	if (addr.nl_pid != 0) {
+		dbg(3, "ignoring message from sender pid %u != 0\n",
+				addr.nl_pid);
+		goto retry;
+	}
+	return n;
+}
+
+
+/* Note that one datagram may contain multiple netlink messages
+ * (e.g. for a dump response). This only checks the _first_ message,
+ * caller has to iterate over multiple messages with nlmsg_for_each_msg()
+ * when necessary. */
+int genl_recv_msgs(struct genl_sock *s, struct iovec *iov, char **err_desc, int timeout_ms)
+{
+	struct nlmsghdr *nlh;
+	int c = genl_recv_timeout(s, iov, timeout_ms);
+	if (c <= 0) {
+		if (err_desc)
+			*err_desc = (c == -E_RCV_TIMEDOUT)
+				? "timed out waiting for reply"
+				: (c == -E_RCV_NO_SOURCE_ADDR)
+				? "no source address!"
+				: "failed to receive netlink reply";
+		return c;
+	}
+
+	nlh = (struct nlmsghdr*)iov->iov_base;
+	if (!nlmsg_ok(nlh, c)) {
+		if (err_desc)
+			*err_desc = "truncated message in netlink reply";
+		return -E_RCV_MSG_TRUNC;
+	}
+
+	if (s->s_seq_expect && nlh->nlmsg_seq != s->s_seq_expect) {
+		if (err_desc)
+			*err_desc = "sequence mismatch in netlink reply";
+		return -E_RCV_SEQ_MISMATCH;
+	}
+
+	if (nlh->nlmsg_type == NLMSG_NOOP ||
+	    nlh->nlmsg_type == NLMSG_OVERRUN) {
+		if (err_desc)
+			*err_desc = "unexpected message type in reply";
+		return -E_RCV_UNEXPECTED_TYPE;
+	}
+	if (nlh->nlmsg_type == NLMSG_DONE)
+		return -E_RCV_NLMSG_DONE;
+
+	if (nlh->nlmsg_type == NLMSG_ERROR) {
+		struct nlmsgerr *e = nlmsg_data(nlh);
+		errno = -e->error;
+		if (!errno)
+			/* happens if you request NLM_F_ACK */
+			dbg(3, "got a positive ACK message for seq:%u",
+					s->s_seq_expect);
+		else {
+			dbg(3, "got a NACK message for seq:%u, error:%d",
+					s->s_seq_expect, e->error);
+			if (err_desc)
+				*err_desc = strerror(errno);
+		}
+		return -E_RCV_ERROR_REPLY;
+	}
+
+	/* good reply message(s) */
+	dbg(3, "received a good message for seq:%u", s->s_seq_expect);
+	return c;
+}
+
+static struct genl_family genl_ctrl = {
+        .id = GENL_ID_CTRL,
+        .name = "nlctrl",
+        .version = 0x2,
+        .maxattr = CTRL_ATTR_MAX,
+};
+
+struct genl_sock *genl_connect_to_family(struct genl_family *family)
+{
+	struct genl_sock *s = NULL;
+	struct msg_buff *msg;
+	struct nlmsghdr *nlh;
+	struct nlattr *nla;
+	struct iovec iov = { .iov_len = 0 };
+	int rem;
+
+	BUG_ON(!family);
+	BUG_ON(!strlen(family->name));
+
+	msg = msg_new(DEFAULT_MSG_SIZE);
+	if (!msg) {
+		dbg(1, "could not allocate genl message");
+		goto out;
+	}
+
+	s = genl_connect(family->nl_groups);
+	if (!s) {
+		dbg(1, "error creating netlink socket");
+		goto out;
+	}
+	genlmsg_put(msg, &genl_ctrl, 0, CTRL_CMD_GETFAMILY);
+
+	nla_put_string(msg, CTRL_ATTR_FAMILY_NAME, family->name);
+	if (genl_send(s, msg)) {
+		dbg(1, "failed to send netlink message");
+		free(s);
+		s = NULL;
+		goto out;
+	}
+
+	if (genl_recv_msgs(s, &iov, NULL, 3000) <= 0) {
+		close(s->s_fd);
+		free(s);
+		s = NULL;
+		goto out;
+	}
+
+
+	nlh = (struct nlmsghdr*)iov.iov_base;
+	nla_for_each_attr(nla, nlmsg_attrdata(nlh, GENL_HDRLEN),
+			nlmsg_attrlen(nlh, GENL_HDRLEN), rem) {
+		switch (nla_type(nla)) {
+		case CTRL_ATTR_FAMILY_ID:
+			family->id = nla_get_u16(nla);
+			dbg(2, "'%s' genl family id: %d", family->name, family->id);
+			break;
+		case CTRL_ATTR_FAMILY_NAME:
+			break;
+#ifdef HAVE_CTRL_ATTR_VERSION
+		case CTRL_ATTR_VERSION:
+			family->version = nla_get_u32(nla);
+			dbg(2, "'%s' genl family version: %d", family->name, family->version);
+			break;
+#endif
+#ifdef HAVE_CTRL_ATTR_HDRSIZE
+		case CTRL_ATTR_HDRSIZE:
+			family->hdrsize = nla_get_u32(nla);
+			dbg(2, "'%s' genl family hdrsize: %d", family->name, family->hdrsize);
+			break;
+#endif
+#ifdef HAVE_CTRL_ATTR_MCAST_GROUPS
+		case CTRL_ATTR_MCAST_GROUPS:
+			{
+			static struct nla_policy policy[] = {
+				[CTRL_ATTR_MCAST_GRP_NAME] = { .type = NLA_NUL_STRING, .len = GENL_NAMSIZ },
+				[CTRL_ATTR_MCAST_GRP_ID] = { .type = NLA_U32 },
+			};
+			struct nlattr *ntb[__CTRL_ATTR_MCAST_GRP_MAX];
+			struct nlattr *idx;
+			int tmp;
+			int i = 0;
+			nla_for_each_nested(idx, nla, tmp) {
+				BUG_ON(i >= 32);
+				nla_parse_nested(ntb, CTRL_ATTR_MCAST_GRP_MAX, idx, policy);
+				if (ntb[CTRL_ATTR_MCAST_GRP_NAME] &&
+				    ntb[CTRL_ATTR_MCAST_GRP_ID]) {
+					struct genl_multicast_group *grp = &family->mc_groups[i++];
+					grp->id = nla_get_u32(ntb[CTRL_ATTR_MCAST_GRP_ID]);
+					nla_strlcpy(grp->name, ntb[CTRL_ATTR_MCAST_GRP_NAME],
+							sizeof(grp->name));
+					dbg(2, "'%s'-'%s' multicast group found (id: %u)\n",
+						family->name, grp->name, grp->id);
+				}
+			}
+			break;
+			};
+#endif
+		default: ;
+		}
+	}
+
+	if (!family->id)
+		dbg(1, "genl family '%s' not found", family->name);
+	else
+		s->s_family = family;
+
+out:
+	free(iov.iov_base);
+	msg_free(msg);
+
+	return s;
+}
+
+/*
+ * Stripped down copy from linux-2.6.32/lib/nlattr.c
+ * skb -> "msg_buff"
+ *	- Lars Ellenberg
+ *
+ * NETLINK      Netlink attributes
+ *
+ *		Authors:	Thomas Graf <tgraf@suug.ch>
+ *				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <string.h>
+#include <linux/types.h>
+
+static __u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+	[NLA_U8]	= sizeof(__u8),
+	[NLA_U16]	= sizeof(__u16),
+	[NLA_U32]	= sizeof(__u32),
+	[NLA_U64]	= sizeof(__u64),
+	[NLA_NESTED]	= NLA_HDRLEN,
+};
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+			const struct nla_policy *policy)
+{
+	const struct nla_policy *pt;
+	int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
+
+	if (type <= 0 || type > maxtype)
+		return 0;
+
+	pt = &policy[type];
+
+	BUG_ON(pt->type > NLA_TYPE_MAX);
+
+	switch (pt->type) {
+	case NLA_FLAG:
+		if (attrlen > 0)
+			return -ERANGE;
+		break;
+
+	case NLA_NUL_STRING:
+		if (pt->len)
+			minlen = min_t(int, attrlen, pt->len + 1);
+		else
+			minlen = attrlen;
+
+		if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
+			return -EINVAL;
+		/* fall through */
+
+	case NLA_STRING:
+		if (attrlen < 1)
+			return -ERANGE;
+
+		if (pt->len) {
+			char *buf = nla_data(nla);
+
+			if (buf[attrlen - 1] == '\0')
+				attrlen--;
+
+			if (attrlen > pt->len)
+				return -ERANGE;
+		}
+		break;
+
+	case NLA_BINARY:
+		if (pt->len && attrlen > pt->len)
+			return -ERANGE;
+		break;
+
+	case NLA_NESTED_COMPAT:
+		if (attrlen < pt->len)
+			return -ERANGE;
+		if (attrlen < NLA_ALIGN(pt->len))
+			break;
+		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
+			return -ERANGE;
+		nla = nla_data(nla) + NLA_ALIGN(pt->len);
+		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
+			return -ERANGE;
+		break;
+	case NLA_NESTED:
+		/* a nested attributes is allowed to be empty; if its not,
+		 * it must have a size of at least NLA_HDRLEN.
+		 */
+		if (attrlen == 0)
+			break;
+	default:
+		if (pt->len)
+			minlen = pt->len;
+		else if (pt->type != NLA_UNSPEC)
+			minlen = nla_attr_minlen[pt->type];
+
+		if (attrlen < minlen)
+			return -ERANGE;
+	}
+
+	return 0;
+}
+
+/**
+ * nla_validate - Validate a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the specified attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_validate(struct nlattr *head, int len, int maxtype,
+		 const struct nla_policy *policy)
+{
+	struct nlattr *nla;
+	int rem, err;
+
+	nla_for_each_attr(nla, head, len, rem) {
+		err = validate_nla(nla, maxtype, policy);
+		if (err < 0)
+			goto errout;
+	}
+
+	err = 0;
+errout:
+	return err;
+}
+
+/**
+ * nla_policy_len - Determin the max. length of a policy
+ * @policy: policy to use
+ * @n: number of policies
+ *
+ * Determines the max. length of the policy.  It is currently used
+ * to allocated Netlink buffers roughly the size of the actual
+ * message.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int
+nla_policy_len(const struct nla_policy *p, int n)
+{
+	int i, len = 0;
+
+	for (i = 0; i < n; i++, p++) {
+		if (p->len)
+			len += nla_total_size(p->len);
+		else if (nla_attr_minlen[p->type])
+			len += nla_total_size(nla_attr_minlen[p->type]);
+	}
+
+	return len;
+}
+
+/**
+ * nla_parse - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @policy: validation policy
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessable via the attribute type. Attributes with a type
+ * exceeding maxtype will be silently ignored for backwards compatibility
+ * reasons. policy may be set to NULL if no validation is required.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+	      const struct nla_policy *policy)
+{
+	struct nlattr *nla;
+	int rem, err;
+
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+	nla_for_each_attr(nla, head, len, rem) {
+		__u16 type = nla_type(nla);
+
+		if (type > 0 && type <= maxtype) {
+			if (policy) {
+				err = validate_nla(nla, maxtype, policy);
+				if (err < 0)
+					goto errout;
+			}
+
+			tb[type] = nla;
+		}
+	}
+
+	if (unlikely(rem > 0))
+		dbg(1, "netlink: %d bytes leftover after parsing "
+		       "attributes.\n", rem);
+
+	err = 0;
+errout:
+	if (err)
+		dbg(1, "netlink: policy violation t:%d[%x] e:%d\n",
+				nla_type(nla), nla->nla_type, err);
+	return err;
+}
+
+/**
+ * nla_find - Find a specific attribute in a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute in the stream matching the specified type.
+ */
+struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
+{
+	struct nlattr *nla;
+	int rem;
+
+	nla_for_each_attr(nla, head, len, rem)
+		if (nla_type(nla) == attrtype)
+			return nla;
+
+	return NULL;
+}
+
+/**
+ * nla_strlcpy - Copy string attribute payload into a sized buffer
+ * @dst: where to copy the string to
+ * @nla: attribute to copy the string from
+ * @dstsize: size of destination buffer
+ *
+ * Copies at most dstsize - 1 bytes into the destination buffer.
+ * The result is always a valid NUL-terminated string. Unlike
+ * strlcpy the destination buffer is always padded out.
+ *
+ * Returns the length of the source buffer.
+ */
+size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
+{
+	size_t srclen = nla_len(nla);
+	char *src = nla_data(nla);
+
+	if (srclen > 0 && src[srclen - 1] == '\0')
+		srclen--;
+
+	if (dstsize > 0) {
+		size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
+
+		memset(dst, 0, dstsize);
+		memcpy(dst, src, len);
+	}
+
+	return srclen;
+}
+
+/**
+ * nla_memcpy - Copy a netlink attribute into another memory area
+ * @dest: where to copy to memcpy
+ * @src: netlink attribute to copy from
+ * @count: size of the destination area
+ *
+ * Note: The number of bytes copied is limited by the length of
+ *       attribute's payload. memcpy
+ *
+ * Returns the number of bytes copied.
+ */
+int nla_memcpy(void *dest, const struct nlattr *src, int count)
+{
+	int minlen = min_t(int, count, nla_len(src));
+
+	memcpy(dest, nla_data(src), minlen);
+
+	return minlen;
+}
+
+/**
+ * nla_memcmp - Compare an attribute with sized memory area
+ * @nla: netlink attribute
+ * @data: memory area
+ * @size: size of memory area
+ */
+int nla_memcmp(const struct nlattr *nla, const void *data,
+			     size_t size)
+{
+	int d = nla_len(nla) - size;
+
+	if (d == 0)
+		d = memcmp(nla_data(nla), data, size);
+
+	return d;
+}
+
+/**
+ * nla_strcmp - Compare a string attribute against a string
+ * @nla: netlink string attribute
+ * @str: another string
+ */
+int nla_strcmp(const struct nlattr *nla, const char *str)
+{
+	int len = strlen(str) + 1;
+	int d = nla_len(nla) - len;
+
+	if (d == 0)
+		d = memcmp(nla_data(nla), str, len);
+
+	return d;
+}
+
+/**
+ * __nla_reserve - reserve room for attribute on the msg
+ * @msg: message buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a message buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * The caller is responsible to ensure that the msg provides enough
+ * tailroom for the attribute header and payload.
+ */
+struct nlattr *__nla_reserve(struct msg_buff *msg, int attrtype, int attrlen)
+{
+	struct nlattr *nla;
+
+	nla = (struct nlattr *) msg_put(msg, nla_total_size(attrlen));
+	nla->nla_type = attrtype;
+	nla->nla_len = nla_attr_size(attrlen);
+
+	memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
+
+	return nla;
+}
+
+/**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @msg: message buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the msg provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct msg_buff *msg, int attrlen)
+{
+	void *start;
+
+	start = msg_put(msg, NLA_ALIGN(attrlen));
+	memset(start, 0, NLA_ALIGN(attrlen));
+
+	return start;
+}
+
+/**
+ * nla_reserve - reserve room for attribute on the msg
+ * @msg: message buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a message buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * Returns NULL if the tailroom of the msg is insufficient to store
+ * the attribute header and payload.
+ */
+struct nlattr *nla_reserve(struct msg_buff *msg, int attrtype, int attrlen)
+{
+	if (unlikely(msg_tailroom(msg) < nla_total_size(attrlen)))
+		return NULL;
+
+	return __nla_reserve(msg, attrtype, attrlen);
+}
+
+/**
+ * nla_reserve_nohdr - reserve room for attribute without header
+ * @msg: message buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the msg is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct msg_buff *msg, int attrlen)
+{
+	if (unlikely(msg_tailroom(msg) < NLA_ALIGN(attrlen)))
+		return NULL;
+
+	return __nla_reserve_nohdr(msg, attrlen);
+}
+
+/**
+ * __nla_put - Add a netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the msg provides enough
+ * tailroom for the attribute header and payload.
+ */
+void __nla_put(struct msg_buff *msg, int attrtype, int attrlen,
+			     const void *data)
+{
+	struct nlattr *nla;
+
+	nla = __nla_reserve(msg, attrtype, attrlen);
+	memcpy(nla_data(nla), data, attrlen);
+}
+
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @msg: message buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the msg provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct msg_buff *msg, int attrlen, const void *data)
+{
+	void *start;
+
+	start = __nla_reserve_nohdr(msg, attrlen);
+	memcpy(start, data, attrlen);
+}
+
+/**
+ * nla_put - Add a netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the msg is insufficient to store
+ * the attribute header and payload.
+ */
+int nla_put(struct msg_buff *msg, int attrtype, int attrlen, const void *data)
+{
+	if (unlikely(msg_tailroom(msg) < nla_total_size(attrlen)))
+		return -EMSGSIZE;
+
+	__nla_put(msg, attrtype, attrlen, data);
+	return 0;
+}
+
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @msg: message buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the msg is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct msg_buff *msg, int attrlen, const void *data)
+{
+	if (unlikely(msg_tailroom(msg) < NLA_ALIGN(attrlen)))
+		return -EMSGSIZE;
+
+	__nla_put_nohdr(msg, attrlen, data);
+	return 0;
+}
+
+/**
+ * nla_append - Add a netlink attribute without header or padding
+ * @msg: message buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the msg is insufficient to store
+ * the attribute payload.
+ */
+int nla_append(struct msg_buff *msg, int attrlen, const void *data)
+{
+	if (unlikely(msg_tailroom(msg) < NLA_ALIGN(attrlen)))
+		return -EMSGSIZE;
+
+	memcpy(msg_put(msg, attrlen), data, attrlen);
+	return 0;
+}
diff -Nru drbd8-8.3.7/user/libgenl.h drbd8-8.4.1+git55a81dc~cmd1/user/libgenl.h
--- drbd8-8.3.7/user/libgenl.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/libgenl.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,1067 @@
+#ifndef LIBGENL_H
+#define LIBGENL_H
+
+/*
+ * stripped down copy of
+ * linux-2.6.32/include/net/netlink.h and
+ * linux-2.6.32/include/net/genetlink.h
+ *
+ * sk_buff -> "msg_buff"
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+#define DEBUG_LEVEL 1
+
+#define dbg(lvl, fmt, arg...)				\
+do {							\
+	if (lvl <= DEBUG_LEVEL)				\
+		fprintf(stderr, "<%d>" fmt "\n",	\
+				lvl , ##arg);		\
+} while (0)
+
+#define BUG_ON(cond)						\
+	do {							\
+		int __cond = (cond);				\
+		if (!__cond)					\
+			break;					\
+		fprintf(stderr, "BUG: %s:%d: %s == %u\n",	\
+				__FILE__, __LINE__,		\
+				#cond, __cond);			\
+		abort();				\
+	} while (0)
+
+#define min_t(type, x, y) ({                    \
+        type __min1 = (x);                      \
+        type __min2 = (y);                      \
+        __min1 < __min2 ? __min1: __min2; })
+
+#ifndef __unused
+#define __unused __attribute((unused))
+#endif
+
+#ifndef __read_mostly
+#define __read_mostly
+#endif
+
+#ifndef unlikely
+#define unlikely(arg) (arg)
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+
+struct msg_buff {
+	/* housekeeping */
+	unsigned char *tail;
+	unsigned char *end;
+	/* start of data to be send(),
+	 * or received into */
+	unsigned char data[0];
+};
+
+#define DEFAULT_MSG_SIZE	8192
+
+static inline int msg_tailroom(struct msg_buff *msg)
+{
+	return msg->end - msg->tail;
+}
+
+static inline struct msg_buff *msg_new(size_t size)
+{
+	struct msg_buff *m = calloc(1, sizeof(*m) + size);
+	if (!m)
+		return NULL;
+
+	m->tail = m->data;
+	m->end  = m->tail + size;
+
+	return m;
+}
+
+static inline void msg_free(struct msg_buff *m)
+{
+	free(m);
+}
+
+static inline void *msg_put(struct msg_buff *msg, unsigned int len)
+{
+	void *tmp = msg->tail;
+	msg->tail += len;
+	BUG_ON(msg->tail > msg->end);
+	return (void*)tmp;
+}
+
+/* ========================================================================
+ *         Netlink Messages and Attributes Interface (As Seen On TV)
+ * ------------------------------------------------------------------------
+ *                          Messages Interface
+ * ------------------------------------------------------------------------
+ *
+ * Message Format:
+ *    <--- nlmsg_total_size(payload)  --->
+ *    <-- nlmsg_msg_size(payload) ->
+ *   +----------+- - -+-------------+- - -+-------- - -
+ *   | nlmsghdr | Pad |   Payload   | Pad | nlmsghdr
+ *   +----------+- - -+-------------+- - -+-------- - -
+ *   nlmsg_data(nlh)---^                   ^
+ *   nlmsg_next(nlh)-----------------------+
+ *
+ * Payload Format:
+ *    <---------------------- nlmsg_len(nlh) --------------------->
+ *    <------ hdrlen ------>       <- nlmsg_attrlen(nlh, hdrlen) ->
+ *   +----------------------+- - -+--------------------------------+
+ *   |     Family Header    | Pad |           Attributes           |
+ *   +----------------------+- - -+--------------------------------+
+ *   nlmsg_attrdata(nlh, hdrlen)---^
+ *
+ * Data Structures:
+ *   struct nlmsghdr			netlink message header
+ *
+ * Message Construction:
+ *   nlmsg_new()			create a new netlink message
+ *   nlmsg_put()			add a netlink message to an msg
+ *   nlmsg_put_answer()			callback based nlmsg_put()
+ *   nlmsg_end()			finanlize netlink message
+ *   nlmsg_get_pos()			return current position in message
+ *   nlmsg_trim()			trim part of message
+ *   nlmsg_cancel()			cancel message construction
+ *   nlmsg_free()			free a netlink message
+ *
+ * Message Sending:
+ *   nlmsg_multicast()			multicast message to several groups
+ *   nlmsg_unicast()			unicast a message to a single socket
+ *   nlmsg_notify()			send notification message
+ *
+ * Message Length Calculations:
+ *   nlmsg_msg_size(payload)		length of message w/o padding
+ *   nlmsg_total_size(payload)		length of message w/ padding
+ *   nlmsg_padlen(payload)		length of padding at tail
+ *
+ * Message Payload Access:
+ *   nlmsg_data(nlh)			head of message payload
+ *   nlmsg_len(nlh)			length of message payload
+ *   nlmsg_attrdata(nlh, hdrlen)	head of attributes data
+ *   nlmsg_attrlen(nlh, hdrlen)		length of attributes data
+ *
+ * Message Parsing:
+ *   nlmsg_ok(nlh, remaining)		does nlh fit into remaining bytes?
+ *   nlmsg_next(nlh, remaining)		get next netlink message
+ *   nlmsg_parse()			parse attributes of a message
+ *   nlmsg_find_attr()			find an attribute in a message
+ *   nlmsg_for_each_msg()		loop over all messages
+ *   nlmsg_validate()			validate netlink message incl. attrs
+ *   nlmsg_for_each_attr()		loop over all attributes
+ *
+ * Misc:
+ *   nlmsg_report()			report back to application?
+ *
+ * ------------------------------------------------------------------------
+ *                          Attributes Interface
+ * ------------------------------------------------------------------------
+ *
+ * Attribute Format:
+ *    <------- nla_total_size(payload) ------->
+ *    <---- nla_attr_size(payload) ----->
+ *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
+ *   |  Header  | Pad |     Payload      | Pad |  Header
+ *   +----------+- - -+- - - - - - - - - +- - -+-------- - -
+ *                     <- nla_len(nla) ->      ^
+ *   nla_data(nla)----^                        |
+ *   nla_next(nla)-----------------------------'
+ *
+ * Data Structures:
+ *   struct nlattr			netlink attribute header
+ *
+ * Attribute Construction:
+ *   nla_reserve(msg, type, len)	reserve room for an attribute
+ *   nla_reserve_nohdr(msg, len)	reserve room for an attribute w/o hdr
+ *   nla_put(msg, type, len, data)	add attribute to msg
+ *   nla_put_nohdr(msg, len, data)	add attribute w/o hdr
+ *   nla_append(msg, len, data)		append data to msg
+ *
+ * Attribute Construction for Basic Types:
+ *   nla_put_u8(msg, type, value)	add u8 attribute to msg
+ *   nla_put_u16(msg, type, value)	add u16 attribute to msg
+ *   nla_put_u32(msg, type, value)	add u32 attribute to msg
+ *   nla_put_u64(msg, type, value)	add u64 attribute to msg
+ *   nla_put_string(msg, type, str)	add string attribute to msg
+ *   nla_put_flag(msg, type)		add flag attribute to msg
+ *   nla_put_msecs(msg, type, jiffies)	add msecs attribute to msg
+ *
+ * Exceptions Based Attribute Construction:
+ *   NLA_PUT(msg, type, len, data)	add attribute to msg
+ *   NLA_PUT_U8(msg, type, value)	add u8 attribute to msg
+ *   NLA_PUT_U16(msg, type, value)	add u16 attribute to msg
+ *   NLA_PUT_U32(msg, type, value)	add u32 attribute to msg
+ *   NLA_PUT_U64(msg, type, value)	add u64 attribute to msg
+ *   NLA_PUT_STRING(msg, type, str)	add string attribute to msg
+ *   NLA_PUT_FLAG(msg, type)		add flag attribute to msg
+ *   NLA_PUT_MSECS(msg, type, jiffies)	add msecs attribute to msg
+ *
+ *   The meaning of these functions is equal to their lower case
+ *   variants but they jump to the label nla_put_failure in case
+ *   of a failure.
+ *
+ * Nested Attributes Construction:
+ *   nla_nest_start(msg, type)		start a nested attribute
+ *   nla_nest_end(msg, nla)		finalize a nested attribute
+ *   nla_nest_cancel(msg, nla)		cancel nested attribute construction
+ *
+ * Attribute Length Calculations:
+ *   nla_attr_size(payload)		length of attribute w/o padding
+ *   nla_total_size(payload)		length of attribute w/ padding
+ *   nla_padlen(payload)		length of padding
+ *
+ * Attribute Payload Access:
+ *   nla_data(nla)			head of attribute payload
+ *   nla_len(nla)			length of attribute payload
+ *
+ * Attribute Payload Access for Basic Types:
+ *   nla_get_u8(nla)			get payload for a u8 attribute
+ *   nla_get_u16(nla)			get payload for a u16 attribute
+ *   nla_get_u32(nla)			get payload for a u32 attribute
+ *   nla_get_u64(nla)			get payload for a u64 attribute
+ *   nla_get_flag(nla)			return 1 if flag is true
+ *   nla_get_msecs(nla)			get payload for a msecs attribute
+ *
+ * Attribute Misc:
+ *   nla_memcpy(dest, nla, count)	copy attribute into memory
+ *   nla_memcmp(nla, data, size)	compare attribute with memory area
+ *   nla_strlcpy(dst, nla, size)	copy attribute to a sized string
+ *   nla_strcmp(nla, str)		compare attribute with string
+ *
+ * Attribute Parsing:
+ *   nla_ok(nla, remaining)		does nla fit into remaining bytes?
+ *   nla_next(nla, remaining)		get next netlink attribute
+ *   nla_validate()			validate a stream of attributes
+ *   nla_validate_nested()		validate a stream of nested attributes
+ *   nla_find()				find attribute in stream of attributes
+ *   nla_find_nested()			find attribute in nested attributes
+ *   nla_parse()			parse and validate stream of attrs
+ *   nla_parse_nested()			parse nested attribuets
+ *   nla_for_each_attr()		loop over all attributes
+ *   nla_for_each_nested()		loop over the nested attributes
+ *=========================================================================
+ */
+
+ /**
+  * Standard attribute types to specify validation policy
+  */
+enum {
+	NLA_UNSPEC,
+	NLA_U8,
+	NLA_U16,
+	NLA_U32,
+	NLA_U64,
+	NLA_STRING,
+	NLA_FLAG,
+	NLA_MSECS,
+	NLA_NESTED,
+	NLA_NESTED_COMPAT,
+	NLA_NUL_STRING,
+	NLA_BINARY,
+	__NLA_TYPE_MAX,
+};
+
+#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+
+/**
+ * struct nla_policy - attribute validation policy
+ * @type: Type of attribute or NLA_UNSPEC
+ * @len: Type specific length of payload
+ *
+ * Policies are defined as arrays of this struct, the array must be
+ * accessible by attribute type up to the highest identifier to be expected.
+ *
+ * Meaning of `len' field:
+ *    NLA_STRING           Maximum length of string
+ *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
+ *    NLA_FLAG             Unused
+ *    NLA_BINARY           Maximum length of attribute payload
+ *    NLA_NESTED_COMPAT    Exact length of structure payload
+ *    All other            Exact length of attribute payload
+ *
+ * Example:
+ * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = {
+ * 	[ATTR_FOO] = { .type = NLA_U16 },
+ *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
+ *	[ATTR_BAZ] = { .len = sizeof(struct mystruct) },
+ * };
+ */
+struct nla_policy {
+	__u16		type;
+	__u16		len;
+};
+
+extern int		nla_validate(struct nlattr *head, int len, int maxtype,
+				     const struct nla_policy *policy);
+extern int		nla_parse(struct nlattr *tb[], int maxtype,
+				  struct nlattr *head, int len,
+				  const struct nla_policy *policy);
+extern int		nla_policy_len(const struct nla_policy *, int);
+extern struct nlattr *	nla_find(struct nlattr *head, int len, int attrtype);
+extern size_t		nla_strlcpy(char *dst, const struct nlattr *nla,
+				    size_t dstsize);
+extern int		nla_memcpy(void *dest, const struct nlattr *src, int count);
+extern int		nla_memcmp(const struct nlattr *nla, const void *data,
+				   size_t size);
+extern int		nla_strcmp(const struct nlattr *nla, const char *str);
+extern struct nlattr *	__nla_reserve(struct msg_buff *msg, int attrtype,
+				      int attrlen);
+extern void *		__nla_reserve_nohdr(struct msg_buff *msg, int attrlen);
+extern struct nlattr *	nla_reserve(struct msg_buff *msg, int attrtype,
+				    int attrlen);
+extern void *		nla_reserve_nohdr(struct msg_buff *msg, int attrlen);
+extern void		__nla_put(struct msg_buff *msg, int attrtype,
+				  int attrlen, const void *data);
+extern void		__nla_put_nohdr(struct msg_buff *msg, int attrlen,
+					const void *data);
+extern int		nla_put(struct msg_buff *msg, int attrtype,
+				int attrlen, const void *data);
+extern int		nla_put_nohdr(struct msg_buff *msg, int attrlen,
+				      const void *data);
+extern int		nla_append(struct msg_buff *msg, int attrlen,
+				   const void *data);
+
+/**************************************************************************
+ * Netlink Messages
+ **************************************************************************/
+
+/**
+ * nlmsg_msg_size - length of netlink message not including padding
+ * @payload: length of message payload
+ */
+static inline int nlmsg_msg_size(int payload)
+{
+	return NLMSG_HDRLEN + payload;
+}
+
+/**
+ * nlmsg_total_size - length of netlink message including padding
+ * @payload: length of message payload
+ */
+static inline int nlmsg_total_size(int payload)
+{
+	return NLMSG_ALIGN(nlmsg_msg_size(payload));
+}
+
+/**
+ * nlmsg_padlen - length of padding at the message's tail
+ * @payload: length of message payload
+ */
+static inline int nlmsg_padlen(int payload)
+{
+	return nlmsg_total_size(payload) - nlmsg_msg_size(payload);
+}
+
+/**
+ * nlmsg_data - head of message payload
+ * @nlh: netlink messsage header
+ */
+static inline void *nlmsg_data(const struct nlmsghdr *nlh)
+{
+	return (unsigned char *) nlh + NLMSG_HDRLEN;
+}
+
+/**
+ * nlmsg_len - length of message payload
+ * @nlh: netlink message header
+ */
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+	return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * nlmsg_attrdata - head of attributes data
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ */
+static inline struct nlattr *nlmsg_attrdata(const struct nlmsghdr *nlh,
+					    int hdrlen)
+{
+	unsigned char *data = nlmsg_data(nlh);
+	return (struct nlattr *) (data + NLMSG_ALIGN(hdrlen));
+}
+
+/**
+ * nlmsg_attrlen - length of attributes data
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ */
+static inline int nlmsg_attrlen(const struct nlmsghdr *nlh, int hdrlen)
+{
+	return nlmsg_len(nlh) - NLMSG_ALIGN(hdrlen);
+}
+
+/**
+ * nlmsg_ok - check if the netlink message fits into the remaining bytes
+ * @nlh: netlink message header
+ * @remaining: number of bytes remaining in message stream
+ */
+static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining)
+{
+	return (remaining >= (int) sizeof(struct nlmsghdr) &&
+		nlh->nlmsg_len >= sizeof(struct nlmsghdr) &&
+		nlh->nlmsg_len <= (__u32)remaining);
+}
+
+/**
+ * nlmsg_next - next netlink message in message stream
+ * @nlh: netlink message header
+ * @remaining: number of bytes remaining in message stream
+ *
+ * Returns the next netlink message in the message stream and
+ * decrements remaining by the size of the current message.
+ */
+static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining)
+{
+	int totlen = NLMSG_ALIGN(nlh->nlmsg_len);
+
+	*remaining -= totlen;
+
+	return (struct nlmsghdr *) ((unsigned char *) nlh + totlen);
+}
+
+/**
+ * nlmsg_parse - parse attributes of a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of family specific header
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * See nla_parse()
+ */
+static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
+			      struct nlattr *tb[], int maxtype,
+			      const struct nla_policy *policy)
+{
+	if (nlh->nlmsg_len < (__u32)nlmsg_msg_size(hdrlen))
+		return -EINVAL;
+
+	return nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
+			 nlmsg_attrlen(nlh, hdrlen), policy);
+}
+
+/**
+ * nlmsg_find_attr - find a specific attribute in a netlink message
+ * @nlh: netlink message header
+ * @hdrlen: length of familiy specific header
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nlmsg_find_attr(struct nlmsghdr *nlh,
+					     int hdrlen, int attrtype)
+{
+	return nla_find(nlmsg_attrdata(nlh, hdrlen),
+			nlmsg_attrlen(nlh, hdrlen), attrtype);
+}
+
+/**
+ * nlmsg_validate - validate a netlink message including attributes
+ * @nlh: netlinket message header
+ * @hdrlen: length of familiy specific header
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ */
+static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
+				 const struct nla_policy *policy)
+{
+	if (nlh->nlmsg_len < (__u32)nlmsg_msg_size(hdrlen))
+		return -EINVAL;
+
+	return nla_validate(nlmsg_attrdata(nlh, hdrlen),
+			    nlmsg_attrlen(nlh, hdrlen), maxtype, policy);
+}
+
+/**
+ * nlmsg_report - need to report back to application?
+ * @nlh: netlink message header
+ *
+ * Returns 1 if a report back to the application is requested.
+ */
+static inline int nlmsg_report(const struct nlmsghdr *nlh)
+{
+	return !!(nlh->nlmsg_flags & NLM_F_ECHO);
+}
+
+/**
+ * nlmsg_for_each_attr - iterate over a stream of attributes
+ * @pos: loop counter, set to current attribute
+ * @nlh: netlink message header
+ * @hdrlen: length of familiy specific header
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nlmsg_for_each_attr(pos, nlh, hdrlen, rem) \
+	nla_for_each_attr(pos, nlmsg_attrdata(nlh, hdrlen), \
+			  nlmsg_attrlen(nlh, hdrlen), rem)
+
+/**
+ * nlmsg_for_each_msg - iterate over a stream of messages
+ * @pos: loop counter, set to current message
+ * @head: head of message stream
+ * @len: length of message stream
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nlmsg_for_each_msg(pos, head, len, rem) \
+	for (pos = head, rem = len; \
+	     nlmsg_ok(pos, rem); \
+	     pos = nlmsg_next(pos, &(rem)))
+
+/**************************************************************************
+ * Netlink Attributes
+ **************************************************************************/
+
+/**
+ * nla_attr_size - length of attribute not including padding
+ * @payload: length of payload
+ */
+static inline int nla_attr_size(int payload)
+{
+	return NLA_HDRLEN + payload;
+}
+
+/**
+ * nla_total_size - total length of attribute including padding
+ * @payload: length of payload
+ */
+static inline int nla_total_size(int payload)
+{
+	return NLA_ALIGN(nla_attr_size(payload));
+}
+
+/**
+ * nla_padlen - length of padding at the tail of attribute
+ * @payload: length of payload
+ */
+static inline int nla_padlen(int payload)
+{
+	return nla_total_size(payload) - nla_attr_size(payload);
+}
+
+#ifndef NLA_TYPE_MASK
+#define NLA_TYPE_MASK ~0
+#endif
+
+/**
+ * nla_type - attribute type
+ * @nla: netlink attribute
+ */
+static inline int nla_type(const struct nlattr *nla)
+{
+	return nla->nla_type & NLA_TYPE_MASK;
+}
+
+/**
+ * nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *nla_data(const struct nlattr *nla)
+{
+	return (char *) nla + NLA_HDRLEN;
+}
+
+/**
+ * nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int nla_len(const struct nlattr *nla)
+{
+	return nla->nla_len - NLA_HDRLEN;
+}
+
+/**
+ * nla_ok - check if the netlink attribute fits into the remaining bytes
+ * @nla: netlink attribute
+ * @remaining: number of bytes remaining in attribute stream
+ */
+static inline int nla_ok(const struct nlattr *nla, int remaining)
+{
+	return remaining >= (int) sizeof(*nla) &&
+	       nla->nla_len >= sizeof(*nla) &&
+	       nla->nla_len <= remaining;
+}
+
+/**
+ * nla_next - next netlink attribute in attribute stream
+ * @nla: netlink attribute
+ * @remaining: number of bytes remaining in attribute stream
+ *
+ * Returns the next netlink attribute in the attribute stream and
+ * decrements remaining by the size of the current attribute.
+ */
+static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+	int totlen = NLA_ALIGN(nla->nla_len);
+
+	*remaining -= totlen;
+	return (struct nlattr *) ((char *) nla + totlen);
+}
+
+/**
+ * nla_find_nested - find attribute in a set of nested attributes
+ * @nla: attribute containing the nested attributes
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute which matches the specified type.
+ */
+static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype)
+{
+	return nla_find(nla_data(nla), nla_len(nla), attrtype);
+}
+
+/**
+ * nla_parse_nested - parse nested attributes
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @nla: attribute containing the nested attributes
+ * @policy: validation policy
+ *
+ * See nla_parse()
+ */
+static inline int nla_parse_nested(struct nlattr *tb[], int maxtype,
+				   const struct nlattr *nla,
+				   const struct nla_policy *policy)
+{
+	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
+}
+
+/**
+ * nla_put_u8 - Add a u8 netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u8(struct msg_buff *msg, int attrtype, __u8 value)
+{
+	return nla_put(msg, attrtype, sizeof(__u8), &value);
+}
+
+/**
+ * nla_put_u16 - Add a u16 netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u16(struct msg_buff *msg, int attrtype, __u16 value)
+{
+	return nla_put(msg, attrtype, sizeof(__u16), &value);
+}
+
+/**
+ * nla_put_u32 - Add a u32 netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u32(struct msg_buff *msg, int attrtype, __u32 value)
+{
+	return nla_put(msg, attrtype, sizeof(__u32), &value);
+}
+
+/**
+ * nla_put_64 - Add a u64 netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ */
+static inline int nla_put_u64(struct msg_buff *msg, int attrtype, __u64 value)
+{
+	return nla_put(msg, attrtype, sizeof(__u64), &value);
+}
+
+/**
+ * nla_put_string - Add a string netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ * @str: NUL terminated string
+ */
+static inline int nla_put_string(struct msg_buff *msg, int attrtype,
+				 const char *str)
+{
+	return nla_put(msg, attrtype, strlen(str) + 1, str);
+}
+
+/**
+ * nla_put_flag - Add a flag netlink attribute to a message buffer
+ * @msg: message buffer to add attribute to
+ * @attrtype: attribute type
+ */
+static inline int nla_put_flag(struct msg_buff *msg, int attrtype)
+{
+	return nla_put(msg, attrtype, 0, NULL);
+}
+
+#define NLA_PUT(msg, attrtype, attrlen, data) \
+	do { \
+		if (unlikely(nla_put(msg, attrtype, attrlen, data) < 0)) \
+			goto nla_put_failure; \
+	} while(0)
+
+#define NLA_PUT_TYPE(msg, type, attrtype, value) \
+	do { \
+		type __tmp = value; \
+		NLA_PUT(msg, attrtype, sizeof(type), &__tmp); \
+	} while(0)
+
+#define NLA_PUT_U8(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __u8, attrtype, value)
+
+#define NLA_PUT_U16(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __u16, attrtype, value)
+
+#define NLA_PUT_LE16(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __le16, attrtype, value)
+
+#define NLA_PUT_BE16(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __be16, attrtype, value)
+
+#define NLA_PUT_U32(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __u32, attrtype, value)
+
+#define NLA_PUT_BE32(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __be32, attrtype, value)
+
+#define NLA_PUT_U64(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __u64, attrtype, value)
+
+#define NLA_PUT_BE64(msg, attrtype, value) \
+	NLA_PUT_TYPE(msg, __be64, attrtype, value)
+
+#define NLA_PUT_STRING(msg, attrtype, value) \
+	NLA_PUT(msg, attrtype, strlen(value) + 1, value)
+
+#define NLA_PUT_FLAG(msg, attrtype) \
+	NLA_PUT(msg, attrtype, 0, NULL)
+
+/**
+ * nla_get_u32 - return payload of u32 attribute
+ * @nla: u32 netlink attribute
+ */
+static inline __u32 nla_get_u32(const struct nlattr *nla)
+{
+	return *(__u32 *) nla_data(nla);
+}
+
+/**
+ * nla_get_be32 - return payload of __be32 attribute
+ * @nla: __be32 netlink attribute
+ */
+static inline __be32 nla_get_be32(const struct nlattr *nla)
+{
+	return *(__be32 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u16 - return payload of u16 attribute
+ * @nla: u16 netlink attribute
+ */
+static inline __u16 nla_get_u16(const struct nlattr *nla)
+{
+	return *(__u16 *) nla_data(nla);
+}
+
+/**
+ * nla_get_be16 - return payload of __be16 attribute
+ * @nla: __be16 netlink attribute
+ */
+static inline __be16 nla_get_be16(const struct nlattr *nla)
+{
+	return *(__be16 *) nla_data(nla);
+}
+
+/**
+ * nla_get_le16 - return payload of __le16 attribute
+ * @nla: __le16 netlink attribute
+ */
+static inline __le16 nla_get_le16(const struct nlattr *nla)
+{
+	return *(__le16 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u8 - return payload of u8 attribute
+ * @nla: u8 netlink attribute
+ */
+static inline __u8 nla_get_u8(const struct nlattr *nla)
+{
+	return *(__u8 *) nla_data(nla);
+}
+
+/**
+ * nla_get_u64 - return payload of u64 attribute
+ * @nla: u64 netlink attribute
+ */
+static inline __u64 nla_get_u64(const struct nlattr *nla)
+{
+	__u64 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+
+	return tmp;
+}
+
+/**
+ * nla_get_be64 - return payload of __be64 attribute
+ * @nla: __be64 netlink attribute
+ */
+static inline __be64 nla_get_be64(const struct nlattr *nla)
+{
+	return *(__be64 *) nla_data(nla);
+}
+
+/**
+ * nla_get_flag - return payload of flag attribute
+ * @nla: flag netlink attribute
+ */
+static inline int nla_get_flag(const struct nlattr *nla)
+{
+	return !!nla;
+}
+
+/**
+ * nla_nest_start - Start a new level of nested attributes
+ * @msg: message buffer to add attributes to
+ * @attrtype: attribute type of container
+ *
+ * Returns the container attribute
+ */
+static inline struct nlattr *nla_nest_start(struct msg_buff *msg, int attrtype)
+{
+	struct nlattr *start = (struct nlattr *)msg->tail;
+
+	if (nla_put(msg, attrtype, 0, NULL) < 0)
+		return NULL;
+
+	return start;
+}
+
+/**
+ * nla_nest_end - Finalize nesting of attributes
+ * @msg: message buffer the attributes are stored in
+ * @start: container attribute
+ *
+ * Corrects the container attribute header to include the all
+ * appeneded attributes.
+ *
+ * Returns the total data length of the msg.
+ */
+static inline int nla_nest_end(struct msg_buff *msg, struct nlattr *start)
+{
+	start->nla_len = msg->tail - (unsigned char *)start;
+	return msg->tail - msg->data;
+}
+
+/**
+ * nla_validate_nested - Validate a stream of nested attributes
+ * @start: container attribute
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the nested attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+static inline int nla_validate_nested(struct nlattr *start, int maxtype,
+				      const struct nla_policy *policy)
+{
+	return nla_validate(nla_data(start), nla_len(start), maxtype, policy);
+}
+
+/**
+ * nla_for_each_attr - iterate over a stream of attributes
+ * @pos: loop counter, set to current attribute
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr(pos, head, len, rem) \
+	for (pos = head, rem = len; \
+	     nla_ok(pos, rem); \
+	     pos = nla_next(pos, &(rem)))
+
+/**
+ * nla_for_each_nested - iterate over nested attributes
+ * @pos: loop counter, set to current attribute
+ * @nla: attribute containing the nested attributes
+ * @rem: initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_nested(pos, nla, rem) \
+	nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem)
+
+
+/**
+ * struct genl_multicast_group - generic netlink multicast group
+ * @name: name of the multicast group, names are per-family
+ * @id: multicast group ID, assigned by the core, to use with
+ *      genlmsg_multicast().
+ */
+struct genl_multicast_group
+{
+	char			name[GENL_NAMSIZ];
+	__u32			id;
+};
+
+/**
+ * struct genl_family - generic netlink family
+ * @id: protocol family idenfitier
+ * @hdrsize: length of user specific header in bytes
+ * @name: name of family
+ * @version: protocol version
+ * @maxattr: maximum number of attributes supported
+ * @attrbuf: buffer to store parsed attributes
+ * @ops_list: list of all assigned operations
+ * @mcast_groups: multicast groups list
+ */
+struct genl_family
+{
+	unsigned int		id;
+	unsigned int		hdrsize;
+	char			name[GENL_NAMSIZ];
+	unsigned int		version;
+	unsigned int		maxattr;
+	/* 32 should be enough for most genl families */
+	struct genl_multicast_group mc_groups[32];
+	__u32			nl_groups;
+};
+
+/**
+ * struct genl_info - receiving information
+ * @snd_seq: sending sequence number
+ * @nlhdr: netlink message header
+ * @genlhdr: generic netlink message header
+ * @userhdr: user specific header
+ * @attrs: netlink attributes
+ */
+struct genl_info
+{
+	__u32			seq;
+	struct nlmsghdr *	nlhdr;
+	struct genlmsghdr *	genlhdr;
+	void *			userhdr;
+	struct nlattr **	attrs;
+};
+
+/**
+ * genlmsg_put - Add generic netlink header to netlink message
+ * @msg: message buffer holding the message
+ * @family: generic netlink family
+ * @flags netlink message flags
+ * @cmd: generic netlink command
+ *
+ * Returns pointer to user specific header
+ */
+static inline void *genlmsg_put(struct msg_buff *msg, struct genl_family *family,
+		int flags, __u8 cmd)
+{
+	const unsigned hdrsize = NLMSG_HDRLEN + GENL_HDRLEN + family->hdrsize;
+	struct nlmsghdr *nlh;
+	struct genlmsghdr *hdr;
+
+	if (unlikely(msg_tailroom(msg) < nlmsg_total_size(hdrsize)))
+		return NULL;
+
+	nlh = msg_put(msg, hdrsize);
+
+	nlh->nlmsg_type = family->id;
+	nlh->nlmsg_flags = flags;
+	/* pid and seq will be reassigned in genl_send() */
+	nlh->nlmsg_pid = 0;
+	nlh->nlmsg_seq = 0;
+
+	hdr = nlmsg_data(nlh);
+	hdr->cmd = cmd;
+	hdr->version = family->version; /* truncated to u8! */
+	hdr->reserved = 0;
+
+	return (char *) hdr + GENL_HDRLEN;
+}
+
+/**
+ * gennlmsg_data - head of message payload
+ * @gnlh: genetlink messsage header
+ */
+static inline void *genlmsg_data(const struct genlmsghdr *gnlh)
+{
+	return ((unsigned char *) gnlh + GENL_HDRLEN);
+}
+
+/**
+ * genlmsg_len - length of message payload
+ * @gnlh: genetlink message header
+ */
+static inline int genlmsg_len(const struct genlmsghdr *gnlh)
+{
+	struct nlmsghdr *nlh = (struct nlmsghdr *)((unsigned char *)gnlh -
+							NLMSG_HDRLEN);
+	return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN);
+}
+
+/**
+ * genlmsg_msg_size - length of genetlink message not including padding
+ * @payload: length of message payload
+ */
+static inline int genlmsg_msg_size(int payload)
+{
+	return GENL_HDRLEN + payload;
+}
+
+/**
+ * genlmsg_total_size - length of genetlink message including padding
+ * @payload: length of message payload
+ */
+static inline int genlmsg_total_size(int payload)
+{
+	return NLMSG_ALIGN(genlmsg_msg_size(payload));
+}
+
+/*
+ * Some helpers to simplify communicating with a particular family
+ */
+struct genl_sock {
+	struct sockaddr_nl	s_local;
+	struct sockaddr_nl	s_peer;
+	int			s_fd;
+	unsigned int		s_seq_next;
+	unsigned int		s_seq_expect;
+	unsigned int		s_flags;
+	struct genl_family	*s_family;
+};
+
+extern struct genl_sock *genl_connect_to_family(struct genl_family *family);
+extern int genl_join_mc_group(struct genl_sock *s, const char *name);
+extern int genl_send(struct genl_sock *s, struct msg_buff *msg);
+enum {
+	E_RCV_TIMEDOUT = 0,
+	E_RCV_FAILED,
+	E_RCV_NO_SOURCE_ADDR,
+	E_RCV_SEQ_MISMATCH,
+	E_RCV_MSG_TRUNC,
+	E_RCV_UNEXPECTED_TYPE,
+	E_RCV_NLMSG_DONE,
+	E_RCV_ERROR_REPLY,
+};
+/* returns negative E_RCV_*, or length of message */
+extern int genl_recv_msgs(struct genl_sock *s, struct iovec *iov, char **err_desc, int timeout_ms);
+
+
+#endif	/* LIBGENL_H */
diff -Nru drbd8-8.3.7/user/registry.c drbd8-8.4.1+git55a81dc~cmd1/user/registry.c
--- drbd8-8.3.7/user/registry.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/registry.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,202 @@
+/*
+   drbdadm_registry.c
+
+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+   It was written by Johannes Thoma <johannes.thoma@linbit.com>
+
+   Copyright (C) 2002-2008, LINBIT Information Technologies GmbH.
+   Copyright (C) 2002-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+   drbd is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   drbd is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with drbd; see the file COPYING.  If not, write to
+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+/* This keeps track of which DRBD minor was configured in which
+ * config file. This is required to have alternative config files
+ * (-c switch) and userland event handlers.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <limits.h>
+
+#include "config.h"
+#include "registry.h"
+
+static void linkname_from_minor(char *linkname, int minor)
+{
+	sprintf(linkname, "%s/drbd-minor-%d.conf", DRBD_RUN_DIR, minor);
+}
+
+int unregister_minor(int minor)
+{
+	char linkname[PATH_MAX];
+
+	linkname_from_minor(linkname, minor);
+	if (unlink(linkname) < 0) {
+		if (errno != ENOENT) {
+			perror("unlink");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static ssize_t __readlink(const char *path, char *buf, size_t bufsiz)
+{
+	ssize_t ret;
+
+	ret = readlink(path, buf, bufsiz);
+	if (ret >= 0) {
+		if (ret >= bufsiz) {
+			errno = ENAMETOOLONG;
+			return -1;
+		}
+		buf[ret] = 0;
+	}
+	return ret;
+}
+
+static int register_path(const char *linkname, const char *path)
+{
+	char target[PATH_MAX];
+
+	if (path[0] != '/') {
+		fprintf(stderr, "File %s: absolute path expected; won't "
+				"register relative path.",
+			path);
+		return -1;
+	}
+	/* safeguard against symlink loops in DRBD_RUN_DIR */
+	if (!strncmp(path, DRBD_RUN_DIR "/", strlen(DRBD_RUN_DIR "/")))
+		return -1;
+	if (__readlink(linkname, target, sizeof(target)) >= 0 &&
+	    !strcmp(target, path))
+		return 0;
+	if (unlink(linkname) != 0 && errno != ENOENT) {
+		perror(linkname);
+		return -1;
+	}
+	if (mkdir(DRBD_RUN_DIR, S_IRWXU) != 0 && errno != EEXIST) {
+		perror(DRBD_RUN_DIR);
+		return -1;
+	}
+	if (symlink(path, linkname) != 0) {
+		fprintf(stderr, "symlink(%s, %s): %m\n", path, linkname);
+		return -1;
+	}
+	return 0;
+}
+
+int register_minor(int minor, const char *path)
+{
+	char linkname[PATH_MAX];
+
+	linkname_from_minor(linkname, minor);
+	return register_path(linkname, path);
+}
+
+static char *resolve_symlink(const char *linkname)
+{
+	static char target[PATH_MAX];
+
+	if (__readlink(linkname, target, sizeof(target)) < 0)
+		return NULL;
+	return target;
+}
+
+char *lookup_minor(int minor)
+{
+	static char linkname[PATH_MAX];
+	struct stat stat_buf;
+
+	linkname_from_minor(linkname, minor);
+	if (stat(linkname, &stat_buf) != 0) {
+		if (errno != ENOENT)
+			perror(linkname);
+		return NULL;
+	}
+	return resolve_symlink(linkname);
+}
+
+static void linkname_from_resource_name(char *linkname, const char *name)
+{
+	sprintf(linkname, "%s/drbd-resource-%s.conf", DRBD_RUN_DIR, name);
+}
+
+int unregister_resource(const char *name)
+{
+	char linkname[PATH_MAX];
+
+	linkname_from_resource_name(linkname, name);
+	if (unlink(linkname) != 0) {
+		if (errno != ENOENT) {
+			perror(linkname);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+int register_resource(const char *name, const char *path)
+{
+	char linkname[PATH_MAX];
+
+	linkname_from_resource_name(linkname, name);
+	return register_path(linkname, path);
+}
+
+/* This returns a static buffer containing the real
+ * configuration file known to be used last for this minor.
+ * If you need the return value longer, stuff it away with strdup. */
+char *lookup_resource(const char *name)
+{
+	static char linkname[PATH_MAX];
+	struct stat stat_buf;
+
+	linkname_from_resource_name(linkname, name);
+	if (stat(linkname, &stat_buf) != 0) {
+		if (errno != ENOENT)
+			perror(linkname);
+		return NULL;
+	}
+	return resolve_symlink(linkname);
+}
+
+
+#ifdef TEST
+
+int main(int argc, char ** argv)
+{
+	register_minor(1, "/etc/drbd-xy.conf");
+	register_minor(15, "/etc/drbd-82.conf");
+	register_minor(14, "/../../../../../../etc/drbd-82.conf");
+	printf("Minor 1 is %s.\n", lookup_minor(1));
+	printf("Minor 2 is %s.\n", lookup_minor(2));
+	printf("Minor 14 is %s.\n", lookup_minor(14));
+	printf("Minor 15 is %s.\n", lookup_minor(15));
+	return 0;
+}
+
+#endif
diff -Nru drbd8-8.3.7/user/registry.h drbd8-8.4.1+git55a81dc~cmd1/user/registry.h
--- drbd8-8.3.7/user/registry.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/registry.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,11 @@
+#ifndef __REGISTRY_H
+#define __REGISTRY_H
+
+extern int register_minor(int minor, const char *path);
+extern int unregister_minor(int minor);
+extern char *lookup_minor(int minor);
+extern int unregister_resource(const char *name);
+extern int register_resource(const char *name, const char *path);
+extern char *lookup_resource(const char *name);
+
+#endif  /* __REGISTRY_H */
diff -Nru drbd8-8.3.7/user/unaligned.h drbd8-8.4.1+git55a81dc~cmd1/user/unaligned.h
--- drbd8-8.3.7/user/unaligned.h	2009-07-27 08:47:43.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/unaligned.h	1970-01-01 00:00:00.000000000 +0000
@@ -1,78 +0,0 @@
-#ifndef UNALIGNED_H
-#define UNALIGNED_H
-
-#include <stdint.h>
-
-#if defined(__i386__) || defined(__x86_64__)
-#define UNALIGNED_ACCESS_SUPPORTED
-#endif
-
-#ifndef UNALIGNED_ACCESS_SUPPORTED
-#warning "Assuming that your architecture can not do unaligned memory accesses."
-#warning "Enabling extra code for unaligned memory accesses."
-#endif
-
-#ifdef UNALIGNED_ACCESS_SUPPORTED
-
-/* On some architectures the hardware (or microcode) does it */
-
-#define get_unaligned(ptr)		*(ptr)
-#define put_unaligned(val, ptr)		*(ptr) = (val)
-
-#else
-
-/* on some architectures we have to do it in program code */
-
-/* Better not use memcpy(). gcc generates broken code an ARM at higher
-   optimisation levels
-*/
-
-#define __bad_unaligned_access_size() ({			\
-	fprintf(stderr, "bad unaligned access. abort()\n");	\
-	abort();						\
-	})
-
-#define get_unaligned(ptr) ((typeof(*(ptr)))({		\
-	typeof(*(ptr)) v;			 	\
-	unsigned char *s = (unsigned char*)(ptr);	\
-	unsigned char *d = (unsigned char*)&v;		\
-	switch (sizeof(v)) {				\
-	case 8: *d++ = *s++;				\
-		*d++ = *s++;				\
-		*d++ = *s++;				\
-		*d++ = *s++;				\
-	case 4: *d++ = *s++;				\
-		*d++ = *s++;				\
-	case 2:	*d++ = *s++;				\
-	case 1:	*d++ = *s++;				\
-		break;					\
-	default:					\
-		__bad_unaligned_access_size();		\
-		break;					\
-	}						\
-	v; }))
-
-
-#define put_unaligned(val, ptr) ({			\
-	typeof(*(ptr)) v = (val);			\
-	unsigned char *d = (unsigned char*)(ptr);	\
-	unsigned char *s = (unsigned char*)&v;		\
-	switch (sizeof(v)) {				\
-	case 8: *d++ = *s++;				\
-		*d++ = *s++;				\
-		*d++ = *s++;				\
-		*d++ = *s++;				\
-	case 4: *d++ = *s++;				\
-		*d++ = *s++;				\
-	case 2:	*d++ = *s++;				\
-	case 1:	*d++ = *s++;				\
-		break;					\
-	default:					\
-		__bad_unaligned_access_size();		\
-		break;					\
-	}						\
-	(void)0; })
-
-#endif
-
-#endif
diff -Nru drbd8-8.3.7/user/wrap_printf.c drbd8-8.4.1+git55a81dc~cmd1/user/wrap_printf.c
--- drbd8-8.3.7/user/wrap_printf.c	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/wrap_printf.c	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,48 @@
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+int wrap_printf(int indent, char *format, ...)
+{
+	static int columns, col;
+	va_list ap1, ap2;
+	int n;
+	const char *nl;
+
+	if (columns == 0) {
+		struct winsize ws = { };
+
+		ioctl(1, TIOCGWINSZ, &ws);
+		columns = ws.ws_col;
+		if (columns <= 0)
+			columns = 80;
+	}
+
+	va_start(ap1, format);
+	va_copy(ap2, ap1);
+	n = vsnprintf(NULL, 0, format, ap1);
+	va_end(ap1);
+	if (col + n > columns) {
+		putchar('\n');
+		col = 0;
+	}
+	if (col == 0) {
+		while (*format == ' ')
+			format++;
+		col += indent;
+		while (indent--)
+			putchar(' ');
+	}
+	n = vprintf(format, ap2);
+	va_end(ap2);
+	if (n > 0)
+		col += n;
+
+	nl = strrchr(format, '\n');
+	if (nl && nl[1] == 0)
+		col = 0;
+
+	return n;
+}
diff -Nru drbd8-8.3.7/user/wrap_printf.h drbd8-8.4.1+git55a81dc~cmd1/user/wrap_printf.h
--- drbd8-8.3.7/user/wrap_printf.h	1970-01-01 00:00:00.000000000 +0000
+++ drbd8-8.4.1+git55a81dc~cmd1/user/wrap_printf.h	2012-02-02 14:09:14.000000000 +0000
@@ -0,0 +1,6 @@
+#ifndef __WRAP_PRINTF
+#define __WRAP_PRINTF
+
+extern int wrap_printf(int indent, char *format, ...);
+
+#endif  /* __WRAP_PRINTF */