diff -Nru dm-writeboost-2.1.1/ChangeLog dm-writeboost-2.2.6/ChangeLog
--- dm-writeboost-2.1.1/ChangeLog	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/ChangeLog	2016-09-19 06:15:04.000000000 +0000
@@ -1,3 +1,60 @@
+2016-09-19  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.6
+	* Clarify producer-consumer pattern
+	* Fix build error with 3.10 kernel
+	* Fix build error with 3.14 kernel
+
+2016-09-12  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.5
+	* Fix read-caching data corruption issue
+	* Insert memory barriers
+	* Code cleanup
+
+2016-08-28  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.4
+	* Fix update_sb_record_interval
+	* Throttle writeback when there are only few empty segments in the
+	caching device
+	* Remove experimental from read-caching
+
+2016-08-02  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.3
+	* Rename write_through_mode to write_around_mode because it's more
+	precise
+	* Reformat the caching device when it's write_around_mode
+
+2016-07-30  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.2
+	* Use kmap_atomic() to access the bio payload
+	* Fix doc (clear_stat)
+
+2016-07-18  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.1
+	* Unsupport TRIM
+	* Fixes (fail if partial read from caching device fails etc.)
+
+2016-05-01  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.2.0
+	* Remove partial writeback in foreground. This results in writing
+	back cached data strictly from the older ones, which makes cache
+	device corruption safer
+	* Fix build error for kernel 4.6. per_bio_data_size is renamed to
+	per_io_data_size
+	* Remove SECTOR_SHIFT
+
+2016-03-05  Akira Hayakawa  <ruby.wktk@gmail.com>
+
+	* v2.1.2
+	* Remove blockup mechanism
+	* Use vmalloc for read_cache_cell's buffer
+
 2016-01-04  Akira Hayakawa  <ruby.wktk@gmail.com>
 
 	* v2.1.1
diff -Nru dm-writeboost-2.1.1/debian/changelog dm-writeboost-2.2.6/debian/changelog
--- dm-writeboost-2.1.1/debian/changelog	2017-02-06 13:55:59.000000000 +0000
+++ dm-writeboost-2.2.6/debian/changelog	2017-07-14 11:07:17.000000000 +0000
@@ -1,8 +1,55 @@
-dm-writeboost (2.1.1-1ubuntu1) xenial; urgency=medium
+dm-writeboost (2.2.6-1~16.04.1) xenial; urgency=low
 
-  * Add kernel 4.6/4.8 compat code (LP: #1662107)
+  * Backport to xenial to support linux-hwe 4.10 kernels.
+    (LP: #1704280)
 
- -- Stefan Bader <stefan.bader@canonical.com>  Mon, 06 Feb 2017 11:39:32 +0100
+ -- Andy Whitcroft <apw@ubuntu.com>  Fri, 14 Jul 2017 12:07:17 +0100
+
+dm-writeboost (2.2.6-1) unstable; urgency=medium
+
+  * New upstream release [September 2016].
+    + fixed FTBFS with Linux 4.8 (Closes: #838547).
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Thu, 22 Sep 2016 21:26:58 +1000
+
+dm-writeboost (2.2.5-1) unstable; urgency=medium
+
+  * New upstream release [September 2016].
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Tue, 13 Sep 2016 07:59:37 +1000
+
+dm-writeboost (2.2.4-1) unstable; urgency=medium
+
+  * New upstream release [August 2016].
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Sun, 28 Aug 2016 21:02:43 +1000
+
+dm-writeboost (2.2.3-1) unstable; urgency=medium
+
+  * New upstream release [August 2016].
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Tue, 02 Aug 2016 22:41:47 +1000
+
+dm-writeboost (2.2.1-1) unstable; urgency=medium
+
+  * New upstream release [July 2016].
+  * Vcs-Git URL to HTTPS.
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Mon, 18 Jul 2016 19:31:34 +1000
+
+dm-writeboost (2.2.0-1) unstable; urgency=medium
+
+  * New upstream release [May 2016].
+  * Standards-Version: 3.9.8.
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Sun, 01 May 2016 21:26:05 +1000
+
+dm-writeboost (2.1.2-1) unstable; urgency=medium
+
+  * New upstream release [March 2016].
+  * Standards-Version: 3.9.7.
+
+ -- Dmitry Smirnov <onlyjob@debian.org>  Sun, 06 Mar 2016 00:16:26 +1100
 
 dm-writeboost (2.1.1-1) unstable; urgency=medium
 
diff -Nru dm-writeboost-2.1.1/debian/control dm-writeboost-2.2.6/debian/control
--- dm-writeboost-2.1.1/debian/control	2015-07-08 17:06:56.000000000 +0000
+++ dm-writeboost-2.2.6/debian/control	2017-07-14 11:07:17.000000000 +0000
@@ -1,12 +1,13 @@
 Source: dm-writeboost
 Section: kernel
 Priority: optional
-Maintainer: Dmitry Smirnov <onlyjob@debian.org>
+Maintainer: Ubuntu Developers <ubuntu-devel-discuss@lists.ubuntu.com>
+XSBC-Original-Maintainer: Dmitry Smirnov <onlyjob@debian.org>
 Build-Depends: debhelper (>= 9), dkms
-Standards-Version: 3.9.6
+Standards-Version: 3.9.8
 Homepage: https://github.com/akiradeveloper/dm-writeboost
-Vcs-Browser: http://anonscm.debian.org/cgit/collab-maint/dm-writeboost.git
-Vcs-Git: git://anonscm.debian.org/collab-maint/dm-writeboost.git
+Vcs-Browser: https://anonscm.debian.org/cgit/collab-maint/dm-writeboost.git
+Vcs-Git: https://anonscm.debian.org/git/collab-maint/dm-writeboost.git
 
 Package: dm-writeboost-dkms
 Architecture: all
diff -Nru dm-writeboost-2.1.1/debian/patches/add-compat-4.6.patch dm-writeboost-2.2.6/debian/patches/add-compat-4.6.patch
--- dm-writeboost-2.1.1/debian/patches/add-compat-4.6.patch	2017-02-06 13:54:32.000000000 +0000
+++ dm-writeboost-2.2.6/debian/patches/add-compat-4.6.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,38 +0,0 @@
-Description: Add compat code for kernel 4.6+
- Rename per_bio_data_size -> per_io_data_size
-Forwarded: yes
-
-Index: dm-writeboost-2.1.1/src/dm-writeboost-target.c
-===================================================================
---- dm-writeboost-2.1.1.orig/src/dm-writeboost-target.c
-+++ dm-writeboost-2.1.1/src/dm-writeboost-target.c
-@@ -881,6 +881,11 @@ enum PBD_FLAG {
- 	PBD_READ_SEG = 2,
- };
- 
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
-+#define PER_BIO_DATA_SIZE per_io_data_size
-+#else
-+#define PER_BIO_DATA_SIZE per_bio_data_size
-+#endif
- struct per_bio_data {
- 	enum PBD_FLAG type;
- 	union {
-@@ -888,7 +893,7 @@ struct per_bio_data {
- 		struct segment_header *seg;
- 	};
- };
--#define per_bio_data(wb, bio) ((struct per_bio_data *)dm_per_bio_data((bio), (wb)->ti->per_bio_data_size))
-+#define per_bio_data(wb, bio) ((struct per_bio_data *)dm_per_bio_data((bio), (wb)->ti->PER_BIO_DATA_SIZE))
- 
- static void reserve_read_cache_cell(struct wb_device *, struct bio *);
- static int process_read(struct wb_device *wb, struct bio *bio)
-@@ -1461,7 +1466,7 @@ static int init_core_struct(struct dm_ta
- 	ti->num_flush_bios = 1;
- 	ti->num_discard_bios = 1;
- 	ti->discard_zeroes_data_unsupported = true;
--	ti->per_bio_data_size = sizeof(struct per_bio_data);
-+	ti->PER_BIO_DATA_SIZE = sizeof(struct per_bio_data);
- 
- 	wb = kzalloc(sizeof(*wb), GFP_KERNEL);
- 	if (!wb) {
diff -Nru dm-writeboost-2.1.1/debian/patches/add-compat-4.8.patch dm-writeboost-2.2.6/debian/patches/add-compat-4.8.patch
--- dm-writeboost-2.1.1/debian/patches/add-compat-4.8.patch	2017-02-06 13:54:41.000000000 +0000
+++ dm-writeboost-2.2.6/debian/patches/add-compat-4.8.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,209 +0,0 @@
-Description: Fix FTBS of DKMS package with 4.8+ kernels
- Add compat code to handle rename/split of bi_rw.
-Author: Stefan Bader <stefan.bader@canonical.com>
-Index: dm-writeboost-2.1.1/src/dm-writeboost-daemon.c
-===================================================================
---- dm-writeboost-2.1.1.orig/src/dm-writeboost-daemon.c
-+++ dm-writeboost-2.1.1/src/dm-writeboost-daemon.c
-@@ -85,7 +85,11 @@ void flush_proc(struct work_struct *work
- 
- 	struct dm_io_request io_req = {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = WRITE,
-+#else
-+		.bi_op = REQ_OP_WRITE,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_VMA,
- 		.mem.ptr.addr = rambuf->data,
-@@ -142,7 +146,11 @@ static void submit_writeback_io(struct w
- 	if (writeback_io->data_bits == 255) {
- 		struct dm_io_request io_req_w = {
- 			.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 			.bi_rw = WRITE,
-+#else
-+			.bi_op = REQ_OP_WRITE,
-+#endif
- 			.notify.fn = writeback_endio,
- 			.notify.context = wb,
- 			.mem.type = DM_IO_VMA,
-@@ -168,7 +176,11 @@ static void submit_writeback_io(struct w
- 
- 			io_req_w = (struct dm_io_request) {
- 				.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 				.bi_rw = WRITE,
-+#else
-+				.bi_op = REQ_OP_WRITE,
-+#endif
- 				.notify.fn = writeback_endio,
- 				.notify.context = wb,
- 				.mem.type = DM_IO_VMA,
-@@ -267,7 +279,11 @@ static void prepare_writeback_ios(struct
- 
- 	struct dm_io_request io_req_r = {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = READ,
-+#else
-+		.bi_op = REQ_OP_READ,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_VMA,
- 		.mem.ptr.addr = writeback_seg->buf,
-@@ -467,7 +483,12 @@ static void update_superblock_record(str
- 
- 	io_req = (struct dm_io_request) {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = WRITE_FUA,
-+#else
-+		.bi_op = REQ_OP_WRITE,
-+		.bi_op_flags = WRITE_FUA,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_KMEM,
- 		.mem.ptr.addr = buf,
-Index: dm-writeboost-2.1.1/src/dm-writeboost-metadata.c
-===================================================================
---- dm-writeboost-2.1.1.orig/src/dm-writeboost-metadata.c
-+++ dm-writeboost-2.1.1/src/dm-writeboost-metadata.c
-@@ -321,7 +321,11 @@ static int read_superblock_header(struct
- 
- 	io_req_sup = (struct dm_io_request) {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = READ,
-+#else
-+		.bi_op = REQ_OP_READ,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_KMEM,
- 		.mem.ptr.addr = buf,
-@@ -385,7 +389,12 @@ static int format_superblock_header(stru
- 
- 	io_req_sup = (struct dm_io_request) {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = WRITE_FUA,
-+#else
-+		.bi_op = REQ_OP_WRITE,
-+		.bi_op_flags = WRITE_FUA,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_KMEM,
- 		.mem.ptr.addr = buf,
-@@ -479,7 +488,11 @@ static int format_all_segment_headers(st
- 	for (i = 0; i < wb->nr_segments; i++) {
- 		struct dm_io_request io_req_seg = {
- 			.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 			.bi_rw = WRITE,
-+#else
-+			.bi_op = REQ_OP_WRITE,
-+#endif
- 			.notify.fn = format_segmd_endio,
- 			.notify.context = &context,
- 			.mem.type = DM_IO_KMEM,
-@@ -653,7 +666,11 @@ static int read_superblock_record(struct
- 
- 	io_req = (struct dm_io_request) {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = READ,
-+#else
-+		.bi_op = REQ_OP_READ,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_KMEM,
- 		.mem.ptr.addr = buf,
-@@ -682,7 +699,11 @@ static int read_whole_segment(void *buf,
- {
- 	struct dm_io_request io_req = {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = READ,
-+#else
-+		.bi_op = REQ_OP_READ,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_VMA,
- 		.mem.ptr.addr = buf,
-@@ -778,7 +799,11 @@ static int read_segment_header(void *buf
- {
- 	struct dm_io_request io_req = {
- 		.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		.bi_rw = READ,
-+#else
-+		.bi_op = REQ_OP_READ,
-+#endif
- 		.notify.fn = NULL,
- 		.mem.type = DM_IO_KMEM,
- 		.mem.ptr.addr = buf,
-Index: dm-writeboost-2.1.1/src/dm-writeboost-target.c
-===================================================================
---- dm-writeboost-2.1.1.orig/src/dm-writeboost-target.c
-+++ dm-writeboost-2.1.1/src/dm-writeboost-target.c
-@@ -106,9 +106,16 @@ int wb_io_internal(struct wb_device *wb,
- 			eb = *err_bits;
- 
- 		format_dev_t(buf, dev);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 		DMERR("%s() I/O error(%d), bits(%lu), dev(%s), sector(%llu), rw(%d)",
- 		      caller, err, eb,
- 		      buf, (unsigned long long) regions->sector, io_req->bi_rw);
-+#else
-+		DMERR("%s() I/O error(%d), bits(%lu), dev(%s), sector(%llu), op(%d), op_flags(%d)",
-+		      caller, err, eb,
-+		      buf, (unsigned long long) regions->sector,
-+		      io_req->bi_op, io_req->bi_op_flags);
-+#endif
- 	}
- 
- 	return err;
-@@ -553,7 +560,11 @@ static void writeback_buffered_mb(struct
- 		memcpy(buf, src, 1 << SECTOR_SHIFT);
- 		io_req = (struct dm_io_request) {
- 			.client = wb->io_client,
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 			.bi_rw = WRITE,
-+#else
-+			.bi_op = REQ_OP_WRITE,
-+#endif
- 			.notify.fn = NULL,
- 			.mem.type = DM_IO_KMEM,
- 			.mem.ptr.addr = buf,
-@@ -802,7 +813,11 @@ static int do_process_write(struct wb_de
- 	 * bio with REQ_FUA has data.
- 	 * For such bio, we first treat it like a normal bio and then as a REQ_FLUSH bio.
- 	 */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 	if (bio->bi_rw & REQ_FUA) {
-+#else
-+	if (bio->bi_opf & REQ_FUA) {
-+#endif
- 		queue_barrier_io(wb, bio);
- 		return DM_MAPIO_SUBMITTED;
- 	}
-@@ -963,10 +978,18 @@ static int writeboost_map(struct dm_targ
- 	struct per_bio_data *pbd = per_bio_data(wb, bio);
- 	pbd->type = PBD_NONE;
- 
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 	if (bio->bi_rw & REQ_DISCARD)
-+#else
-+	if (bio_op(bio) == REQ_OP_DISCARD)
-+#endif
- 		return process_discard_bio(wb, bio);
- 
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
- 	if (bio->bi_rw & REQ_FLUSH)
-+#else
-+	if (bio->bi_opf & REQ_PREFLUSH)
-+#endif
- 		return process_flush_bio(wb, bio);
- 
- 	return process_bio(wb, bio);
diff -Nru dm-writeboost-2.1.1/debian/patches/series dm-writeboost-2.2.6/debian/patches/series
--- dm-writeboost-2.1.1/debian/patches/series	2017-02-06 13:51:17.000000000 +0000
+++ dm-writeboost-2.2.6/debian/patches/series	1970-01-01 00:00:00.000000000 +0000
@@ -1,2 +0,0 @@
-add-compat-4.6.patch
-add-compat-4.8.patch
diff -Nru dm-writeboost-2.1.1/doc/dm-writeboost-readme.txt dm-writeboost-2.2.6/doc/dm-writeboost-readme.txt
--- dm-writeboost-2.1.1/doc/dm-writeboost-readme.txt	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/doc/dm-writeboost-readme.txt	2016-09-19 06:15:04.000000000 +0000
@@ -1,21 +1,20 @@
 dm-writeboost
 =============
 dm-writeboost target provides block-level log-structured caching.
-All cache data, writes and reads, are written to the cache device in sequential
-manner.
+All writes and reads are written to the caching device in sequential manner.
 
 
 Mechanism
 =========
-Control three layers (RAM buffer, cache device and backing device)
-------------------------------------------------------------------
-dm-writeboost controls three different layers - RAM buffer (rambuf), cache
+Control three layers (RAM buffer, caching device and backing device)
+--------------------------------------------------------------------
+dm-writeboost controls three different layers - RAM buffer (rambuf), caching
 device (cache_dev, e.g SSD) and backing device (backing_dev, e.g. HDD).
 All data are first stored in the RAM buffer and when the RAM buffer is full,
 dm-writeboost adds metadata block (with checksum) on the RAM buffer to create a
-"log". Afterward, the log is written to the cache device as background
-processing in sequential manner and thereafter it's written back to the backing
-device in background as well.
+"log". Afterward, the log is written to the caching device sequentially by a
+background thread and thereafter written back to the backing device in the
+background as well.
 
 
 dm-writeboost vs dm-cache or bcache
@@ -34,20 +33,20 @@
 it may not be the best when the ave. I/O size is very large in your workload.
 However, if the splitting overhead aside, dm-writeboost is always the best of
 all because it caches data in sequential manner - the most efficient I/O pattern
-for the SSD cache device in terms of performance.
+yet for the SSD caching device in terms of performance.
 
 It's known from experiments that dm-writeboost performs no good when you create
 a dm-writeboost'd device in virtual environment like KVM. So, keep in mind to
-use this driver in the host (or physical) machine.
+use this driver in a physical machine.
 
 
 How To Use dm-writeboost
 ========================
-Trigger cache device reformat
------------------------------
-The cache device is triggered reformating only if the first one sector of the
-cache device is zeroed out. Note that this operation should be omitted when
-you resume the cache device.
+Trigger caching device reformat
+-------------------------------
+The caching device is triggered reformating only if the first one sector of the
+caching device is zeroed out. Note that this operation should be omitted when
+you resume the caching device.
 e.g. dd if=/dev/zero of=/dev/mapper/wbdev oflag=direct bs=512 count=1
 
 Construct dm-writeboost'd device
@@ -77,23 +76,26 @@
 Shut down the system
 --------------------
 On shutting down the system, you don't need to do anything at all. The data
-and metadata is safely saved on the cache device. But, if you want to do
+and metadata is safely saved on the caching device. But, if you want to do
 deconstruct the device manually, use dmsetup remove.
 
 Resume after system reboot
 --------------------------
 To resume your caching device of the on-disk state, run dmsetup create command
-with the same parameter but DO NOT zero out the first sector of the cache device.
-This replays the logs on the cache device to rebuild the internal data structures.
+with the same parameter but DO NOT zero out the first sector of the caching device.
+This replays the logs on the caching device to rebuild the internal data structures.
 
-Remove cache device
--------------------
-If you want to detach your cache device for some reasons (you don't like
-dm-writeboost anymore or you try to upgrade the cache device to a newly
+Remove caching device
+---------------------
+If you want to detach your caching device for some reasons (you don't like
+dm-writeboost anymore or you try to upgrade the caching device to a newly
 perchased device) the safest way to do this is clean the dirty data up from your
-cache device first and then deconstrust the dm-writeboost'd device.
-You can use drop_caches message to forcibly clean up your cache device.
+caching device first and then deconstrust the dm-writeboost'd device.
+You can do this by first suspend/resuming the device to drop all transient data
+from RAM buffer and then sending drop_caches message to drop dirty cache blocks
+from the caching device.
 e.g.
+dmsetup suspend wbdev; dmsetup resume wbdev
 dmsetup message wbdev 0 drop_caches
 dmsetup remove wbdev
 
@@ -107,7 +109,7 @@
 
 nr_max_batched_writeback
   accepts: 1..32
-  default: 8
+  default: 32
 As optimization, dm-writeboost writes back $nr_max_batched_writeback segments
 simultaneously. The dirty caches in the segments are sorted in ascending order
 of the destination address and then written back. Setting large value can boost
@@ -126,16 +128,15 @@
   default: 0 (disabled)
 Sync all the volatile data every $sync_data_interval second. 0 means disabled.
 
-read_cache_threshold (int) [Experimental]
+read_cache_threshold (int)
   accepts: 0..127
   default: 0 (read caching disabled)
 More than $read_cache_threshold * 4KB consecutive reads won't be staged.
 
-write_through_mode (bool)
+write_around_mode (bool)
   accepts: 0..1
   default: 0
-By enabling this, dm-writeboost never cache dirty data by writing data directly
-to the backing device.
+By enabling this, dm-writeboost writes data directly to the backing device.
 
 Messages
 --------
@@ -153,9 +154,9 @@
 
 (2) Others
 drop_caches
-  Wait for all dirty data on the cache device to be written back to the backing
+  Wait for all dirty data on the caching device to be written back to the backing
   device. This is interruptible.
-clear_stats
+clear_stat
   Clear the statistic info (see `Status`).
 
 Status
diff -Nru dm-writeboost-2.1.1/Makefile dm-writeboost-2.2.6/Makefile
--- dm-writeboost-2.1.1/Makefile	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/Makefile	2016-09-19 06:15:04.000000000 +0000
@@ -1,4 +1,4 @@
-MODULE_VERSION ?= 2.1.1
+MODULE_VERSION ?= 2.2.6
 DKMS_DIR := /usr/src/dm-writeboost-$(MODULE_VERSION)
 DKMS_KEY := -m dm-writeboost -v $(MODULE_VERSION)
 
diff -Nru dm-writeboost-2.1.1/README.md dm-writeboost-2.2.6/README.md
--- dm-writeboost-2.1.1/README.md	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/README.md	2016-09-19 06:15:04.000000000 +0000
@@ -36,15 +36,15 @@
   for system shutdown because dm-writeboost is even durable even against sudden power failure.
 
 ## Distribution Packages
-- Debian: [Stretch](https://packages.debian.org/testing/dm-writeboost-dkms), [Sid](https://packages.debian.org/sid/dm-writeboost-dkms)  
-- Ubuntu: [Wily](http://packages.ubuntu.com/wily/dm-writeboost-dkms)
+- Debian: [Stretch](https://packages.debian.org/source/testing/dm-writeboost), [Sid](https://packages.debian.org/source/sid/dm-writeboost)  
+- Ubuntu: [Yakkety](http://packages.ubuntu.com/yakkety/kernel/dm-writeboost-dkms),  [Xenial](http://packages.ubuntu.com/xenial/dm-writeboost-dkms), [Wily](http://packages.ubuntu.com/wily/dm-writeboost-dkms)
 - [Tanglu](http://packages.tanglu.org/ja/dasyatis/kernel/dm-writeboost-dkms)
 - Momonga
 
 ## Related Projects
 * https://github.com/akiradeveloper/dm-writeboost-tools: Tools to help users analyze the state of the cache device  
 * https://gitlab.com/onlyjob/writeboost: A management tool including init script  
-* https://github.com/jthornber/device-mapper-test-suite: Testing framework written in Ruby
+* https://github.com/akiradeveloper/writeboost-test-suite: Testing framework written in Scala
 
 ## Related works
 * Y. Hu and Q. Yang -- DCD Disk Caching Disk: A New Approach for Boosting I/O Performance (1995)
diff -Nru dm-writeboost-2.1.1/src/dkms.conf dm-writeboost-2.2.6/src/dkms.conf
--- dm-writeboost-2.1.1/src/dkms.conf	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dkms.conf	2016-09-19 06:15:04.000000000 +0000
@@ -1,5 +1,5 @@
 PACKAGE_NAME="dm-writeboost"
-PACKAGE_VERSION="2.1.1"
+PACKAGE_VERSION="2.2.6"
 BUILT_MODULE_NAME="dm-writeboost"
 DEST_MODULE_LOCATION="/kernel/drivers/md"
 MAKE="make all KERNEL_TREE=$kernel_source_dir"
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost-daemon.c dm-writeboost-2.2.6/src/dm-writeboost-daemon.c
--- dm-writeboost-2.1.1/src/dm-writeboost-daemon.c	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost-daemon.c	2016-09-19 06:15:04.000000000 +0000
@@ -52,72 +52,92 @@
 
 /*----------------------------------------------------------------------------*/
 
-static void process_deferred_barriers(struct wb_device *wb, struct flush_job *job)
+static void process_deferred_barriers(struct wb_device *wb, struct rambuffer *rambuf)
 {
-	int r = 0;
-	bool has_barrier = !bio_list_empty(&job->barrier_ios);
-
-	/* Make all the preceding data persistent. */
-	if (has_barrier)
-		maybe_IO(blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL));
-
-	/* Ack the chained barrier requests. */
+	bool has_barrier = !bio_list_empty(&rambuf->barrier_ios);
 	if (has_barrier) {
 		struct bio *bio;
-		while ((bio = bio_list_pop(&job->barrier_ios))) {
-			if (is_live(wb))
-				bio_endio_compat(bio, 0);
-			else
-				bio_endio_compat(bio, -EIO);
-		}
+
+		/* Make all the preceding data persistent. */
+		int err = blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL);
+
+		/* Ack the chained barrier requests. */
+		while ((bio = bio_list_pop(&rambuf->barrier_ios)))
+			bio_endio_compat(bio, err);
 	}
 }
 
-void flush_proc(struct work_struct *work)
+static bool should_flush(struct wb_device *wb)
 {
-	int r = 0;
+	return atomic64_read(&wb->last_queued_segment_id) >
+	       atomic64_read(&wb->last_flushed_segment_id);
+}
 
-	struct flush_job *job = container_of(work, struct flush_job, work);
-	struct rambuffer *rambuf = container_of(job, struct rambuffer, job);
+static void do_flush_proc(struct wb_device *wb)
+{
+	struct segment_header *seg;
+	struct rambuffer *rambuf;
+	u64 id;
+	struct dm_io_request io_req;
+	struct dm_io_region region;
 
-	struct wb_device *wb = job->wb;
-	struct segment_header *seg = job->seg;
+	if (!should_flush(wb)) {
+		schedule_timeout_interruptible(msecs_to_jiffies(1000));
+		return;
+	}
+
+	id = atomic64_read(&wb->last_flushed_segment_id) + 1;
 
-	struct dm_io_request io_req = {
+	smp_rmb();
+
+	rambuf = get_rambuffer_by_id(wb, id);
+	seg = rambuf->seg;
+
+	io_req = (struct dm_io_request) {
+		WB_IO_WRITE,
 		.client = wb->io_client,
-		.bi_rw = WRITE,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_VMA,
 		.mem.ptr.addr = rambuf->data,
 	};
-	struct dm_io_region region = {
+	region = (struct dm_io_region) {
 		.bdev = wb->cache_dev->bdev,
 		.sector = seg->start_sector,
 		.count = (seg->length + 1) << 3,
 	};
 
-	maybe_IO(wb_io(&io_req, 1, &region, NULL, false));
+	if (wb_io(&io_req, 1, &region, NULL, false))
+		return;
 
 	/*
 	 * Deferred ACK for barrier requests
 	 * To serialize barrier ACK in logging we wait for the previous segment
 	 * to be persistently written (if needed).
 	 */
-	wait_for_flushing(wb, SUB_ID(seg->id, 1));
-	process_deferred_barriers(wb, job);
+	process_deferred_barriers(wb, rambuf);
 
 	/*
 	 * We can count up the last_flushed_segment_id only after segment
 	 * is written persistently. Counting up the id is serialized.
 	 */
+	smp_wmb();
 	atomic64_inc(&wb->last_flushed_segment_id);
 	wake_up(&wb->flush_wait_queue);
 }
 
+int flush_daemon_proc(void *data)
+{
+	struct wb_device *wb = data;
+	while (!kthread_should_stop())
+		do_flush_proc(wb);
+	return 0;
+}
+
 void wait_for_flushing(struct wb_device *wb, u64 id)
 {
 	wait_event(wb->flush_wait_queue,
 		atomic64_read(&wb->last_flushed_segment_id) >= id);
+	smp_rmb();
 }
 
 /*----------------------------------------------------------------------------*/
@@ -135,14 +155,12 @@
 
 static void submit_writeback_io(struct wb_device *wb, struct writeback_io *writeback_io)
 {
-	int r;
-
-	BUG_ON(!writeback_io->data_bits);
+	ASSERT(writeback_io->data_bits > 0);
 
 	if (writeback_io->data_bits == 255) {
 		struct dm_io_request io_req_w = {
+			WB_IO_WRITE,
 			.client = wb->io_client,
-			.bi_rw = WRITE,
 			.notify.fn = writeback_endio,
 			.notify.context = wb,
 			.mem.type = DM_IO_VMA,
@@ -153,9 +171,8 @@
 			.sector = writeback_io->sector,
 			.count = 1 << 3,
 		};
-		maybe_IO(wb_io(&io_req_w, 1, &region_w, NULL, false));
-		if (r)
-			writeback_endio(0, wb);
+		if (wb_io(&io_req_w, 1, &region_w, NULL, false))
+			writeback_endio(1, wb);
 	} else {
 		u8 i;
 		for (i = 0; i < 8; i++) {
@@ -167,21 +184,20 @@
 				continue;
 
 			io_req_w = (struct dm_io_request) {
+				WB_IO_WRITE,
 				.client = wb->io_client,
-				.bi_rw = WRITE,
 				.notify.fn = writeback_endio,
 				.notify.context = wb,
 				.mem.type = DM_IO_VMA,
-				.mem.ptr.addr = writeback_io->data + (i << SECTOR_SHIFT),
+				.mem.ptr.addr = writeback_io->data + (i << 9),
 			};
 			region_w = (struct dm_io_region) {
 				.bdev = wb->backing_dev->bdev,
 				.sector = writeback_io->sector + i,
 				.count = 1,
 			};
-			maybe_IO(wb_io(&io_req_w, 1, &region_w, NULL, false));
-			if (r)
-				writeback_endio(0, wb);
+			if (wb_io(&io_req_w, 1, &region_w, NULL, false))
+				writeback_endio(1, wb);
 		}
 	}
 }
@@ -209,8 +225,8 @@
  */
 static bool compare_writeback_io(struct writeback_io *a, struct writeback_io *b)
 {
-	BUG_ON(!a);
-	BUG_ON(!b);
+	ASSERT(a);
+	ASSERT(b);
 	if (a->sector < b->sector)
 		return true;
 	if (a->id < b->id)
@@ -254,20 +270,13 @@
 	rb_insert_color(&writeback_io->rb_node, &wb->writeback_tree);
 }
 
-/*
- * Read the data to writeback IOs and add them into the RB-tree to sort.
- */
-static void prepare_writeback_ios(struct wb_device *wb, struct writeback_segment *writeback_seg,
-				  size_t *writeback_io_count)
+static int fill_writeback_seg(struct wb_device *wb, struct writeback_segment *writeback_seg)
 {
-	int r = 0;
-	u8 i;
-
 	struct segment_header *seg = writeback_seg->seg;
 
 	struct dm_io_request io_req_r = {
+		WB_IO_READ,
 		.client = wb->io_client,
-		.bi_rw = READ,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_VMA,
 		.mem.ptr.addr = writeback_seg->buf,
@@ -282,14 +291,21 @@
 	 * dm_io() allows region.count = 0
 	 * so we don't need to skip here in case of seg->length = 0
 	 */
-	maybe_IO(wb_io(&io_req_r, 1, &region_r, NULL, false));
+	return wb_io(&io_req_r, 1, &region_r, NULL, false);
+}
 
+static void prepare_writeback_ios(struct wb_device *wb, struct writeback_segment *writeback_seg,
+				  size_t *writeback_io_count)
+{
+	struct segment_header *seg = writeback_seg->seg;
+
+	u8 i;
 	for (i = 0; i < seg->length; i++) {
 		struct writeback_io *writeback_io;
 
 		struct metablock *mb = seg->mb_array + i;
 		struct dirtiness dirtiness = read_mb_dirtiness(wb, seg, mb);
-		BUG_ON(!dirtiness.data_bits);
+		ASSERT(dirtiness.data_bits > 0);
 		if (!dirtiness.is_dirty)
 			continue;
 
@@ -304,7 +320,7 @@
 	}
 }
 
-static void mark_clean_seg(struct wb_device *wb, struct segment_header *seg)
+void mark_clean_seg(struct wb_device *wb, struct segment_header *seg)
 {
 	u8 i;
 	for (i = 0; i < seg->length; i++) {
@@ -314,54 +330,66 @@
 	}
 }
 
-static void do_writeback_segs(struct wb_device *wb)
+/*
+ * Try writeback some specified segs and returns if all writeback ios succeeded.
+ */
+static bool try_writeback_segs(struct wb_device *wb)
 {
-	int r;
-	size_t k;
 	struct writeback_segment *writeback_seg;
-
 	size_t writeback_io_count = 0;
+	u32 k;
 
 	/* Create RB-tree */
 	wb->writeback_tree = RB_ROOT;
-	for (k = 0; k < wb->num_writeback_segs; k++) {
+	for (k = 0; k < wb->nr_cur_batched_writeback; k++) {
 		writeback_seg = *(wb->writeback_segs + k);
+
+		if (fill_writeback_seg(wb, writeback_seg))
+			return false;
+
 		prepare_writeback_ios(wb, writeback_seg, &writeback_io_count);
 	}
+
 	atomic_set(&wb->writeback_io_count, writeback_io_count);
 	atomic_set(&wb->writeback_fail_count, 0);
 
 	/* Pop rbnodes out of the tree and submit writeback I/Os */
 	submit_writeback_ios(wb);
 	wait_event(wb->writeback_io_wait_queue, !atomic_read(&wb->writeback_io_count));
-	if (atomic_read(&wb->writeback_fail_count))
-		mark_dead(wb);
-	maybe_IO(blkdev_issue_flush(wb->backing_dev->bdev, GFP_NOIO, NULL));
 
-	/* A segment after written back is clean */
-	for (k = 0; k < wb->num_writeback_segs; k++) {
-		writeback_seg = *(wb->writeback_segs + k);
-		mark_clean_seg(wb, writeback_seg->seg);
-	}
-	atomic64_add(wb->num_writeback_segs, &wb->last_writeback_segment_id);
+	return atomic_read(&wb->writeback_fail_count) == 0;
+}
+
+static bool do_writeback_segs(struct wb_device *wb)
+{
+	if (!try_writeback_segs(wb))
+		return false;
+
+	blkdev_issue_flush(wb->backing_dev->bdev, GFP_NOIO, NULL);
+	return true;
 }
 
 /*
  * Calculate the number of segments to write back.
  */
-static u32 calc_nr_writeback(struct wb_device *wb)
+void update_nr_empty_segs(struct wb_device *wb)
 {
-	u32 nr_writeback_candidates, nr_max_batch;
+	wb->nr_empty_segs =
+		atomic64_read(&wb->last_writeback_segment_id) + wb->nr_segments
+		- wb->current_seg->id;
+}
 
-	nr_writeback_candidates = atomic64_read(&wb->last_flushed_segment_id) -
-				  atomic64_read(&wb->last_writeback_segment_id);
-	if (!nr_writeback_candidates)
-		return 0;
+static u32 calc_nr_writeback(struct wb_device *wb)
+{
+	u32 nr_writeback_candidates =
+		atomic64_read(&wb->last_flushed_segment_id)
+		- atomic64_read(&wb->last_writeback_segment_id);
 
-	nr_max_batch = ACCESS_ONCE(wb->nr_max_batched_writeback);
-	if (wb->nr_cur_batched_writeback != nr_max_batch)
+	u32 nr_max_batch = ACCESS_ONCE(wb->nr_max_batched_writeback);
+	if (wb->nr_writeback_segs != nr_max_batch)
 		try_alloc_writeback_ios(wb, nr_max_batch, GFP_NOIO | __GFP_NOWARN);
-	return min(nr_writeback_candidates, wb->nr_cur_batched_writeback);
+
+	return min3(nr_writeback_candidates, wb->nr_writeback_segs, wb->nr_empty_segs + 1);
 }
 
 static bool should_writeback(struct wb_device *wb)
@@ -373,29 +401,40 @@
 
 static void do_writeback_proc(struct wb_device *wb)
 {
-	u32 k, nr_writeback;
+	u32 k, nr_writeback_tbd;
 
 	if (!should_writeback(wb)) {
 		schedule_timeout_interruptible(msecs_to_jiffies(1000));
 		return;
 	}
 
-	nr_writeback = calc_nr_writeback(wb);
-	if (!nr_writeback) {
+	nr_writeback_tbd = calc_nr_writeback(wb);
+	if (!nr_writeback_tbd) {
 		schedule_timeout_interruptible(msecs_to_jiffies(1000));
 		return;
 	}
 
+	smp_rmb();
+
 	/* Store segments into writeback_segs */
-	for (k = 0; k < nr_writeback; k++) {
+	for (k = 0; k < nr_writeback_tbd; k++) {
 		struct writeback_segment *writeback_seg = *(wb->writeback_segs + k);
 		writeback_seg->seg = get_segment_header_by_id(wb,
 			atomic64_read(&wb->last_writeback_segment_id) + 1 + k);
 	}
-	wb->num_writeback_segs = nr_writeback;
+	wb->nr_cur_batched_writeback = nr_writeback_tbd;
+
+	if (!do_writeback_segs(wb))
+		return;
 
-	do_writeback_segs(wb);
+	/* A segment after written back is clean */
+	for (k = 0; k < wb->nr_cur_batched_writeback; k++) {
+		struct writeback_segment *writeback_seg = *(wb->writeback_segs + k);
+		mark_clean_seg(wb, writeback_seg->seg);
+	}
 
+	smp_wmb();
+	atomic64_add(wb->nr_cur_batched_writeback, &wb->last_writeback_segment_id);
 	wake_up(&wb->writeback_wait_queue);
 }
 
@@ -417,6 +456,7 @@
 	wake_up_process(wb->writeback_daemon);
 	wait_event(wb->writeback_wait_queue,
 		atomic64_read(&wb->last_writeback_segment_id) >= id);
+	smp_rmb();
 	wb->urge_writeback = false;
 }
 
@@ -442,6 +482,8 @@
 
 		old = new;
 
+		update_nr_empty_segs(wb);
+
 		schedule_timeout_interruptible(msecs_to_jiffies(intvl));
 	}
 	return 0;
@@ -451,8 +493,6 @@
 
 static void update_superblock_record(struct wb_device *wb)
 {
-	int r = 0;
-
 	struct superblock_record_device o;
 	void *buf;
 	struct dm_io_request io_req;
@@ -466,8 +506,8 @@
 	memcpy(buf, &o, sizeof(o));
 
 	io_req = (struct dm_io_request) {
+		WB_IO_WRITE_FUA,
 		.client = wb->io_client,
-		.bi_rw = WRITE_FUA,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = buf,
@@ -477,7 +517,7 @@
 		.sector = (1 << 11) - 1,
 		.count = 1,
 	};
-	maybe_IO(wb_io(&io_req, 1, &region, NULL, false));
+	wb_io(&io_req, 1, &region, NULL, false);
 
 	mempool_free(buf, wb->buf_1_pool);
 }
@@ -507,8 +547,6 @@
 
 int data_synchronizer_proc(void *data)
 {
-	int r = 0;
-
 	struct wb_device *wb = data;
 	unsigned long intvl;
 
@@ -522,7 +560,7 @@
 		}
 
 		flush_current_buffer(wb);
-		maybe_IO(blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL));
+		blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL);
 		schedule_timeout_interruptible(msecs_to_jiffies(intvl));
 	}
 	return 0;
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost-daemon.h dm-writeboost-2.2.6/src/dm-writeboost-daemon.h
--- dm-writeboost-2.1.1/src/dm-writeboost-daemon.h	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost-daemon.h	2016-09-19 06:15:04.000000000 +0000
@@ -22,7 +22,7 @@
 
 /*----------------------------------------------------------------------------*/
 
-void flush_proc(struct work_struct *);
+int flush_daemon_proc(void *);
 void wait_for_flushing(struct wb_device *, u64 id);
 
 /*----------------------------------------------------------------------------*/
@@ -32,8 +32,10 @@
 
 /*----------------------------------------------------------------------------*/
 
+void update_nr_empty_segs(struct wb_device *);
 int writeback_daemon_proc(void *);
 void wait_for_writeback(struct wb_device *, u64 id);
+void mark_clean_seg(struct wb_device *, struct segment_header *seg);
 
 /*----------------------------------------------------------------------------*/
 
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost.h dm-writeboost-2.2.6/src/dm-writeboost.h
--- dm-writeboost-2.1.1/src/dm-writeboost.h	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost.h	2016-09-19 06:15:04.000000000 +0000
@@ -104,7 +104,6 @@
 	 */
 	/* - FROM ------------------------------------ */
 	__le64 id;
-	/* TODO Add timestamp? */
 	__le32 checksum;
 	/*
 	 * The number of metablocks in this segment header to be considered in
@@ -150,22 +149,12 @@
 /*----------------------------------------------------------------------------*/
 
 /*
- * Foreground queues this object and flush daemon later pops one job to submit
- * logging write to the cache device.
- */
-struct flush_job {
-	struct work_struct work;
-	struct wb_device *wb;
-	struct segment_header *seg;
-	struct bio_list barrier_ios; /* List of deferred bios */
-};
-
-/*
  * RAM buffer is a buffer that any dirty data are first written into.
  */
 struct rambuffer {
+	struct segment_header *seg;
 	void *data;
-	struct flush_job job;
+	struct bio_list barrier_ios; /* List of deferred bios */
 };
 
 /*----------------------------------------------------------------------------*/
@@ -245,13 +234,7 @@
 #define STATLEN (1 << 4)
 
 enum WB_FLAG {
-	/*
-	 * This flag is set when either one of the underlying devices returned
-	 * EIO and we must immediately block up the whole to avoid further
-	 * damage.
-	 */
-	WB_DEAD = 0,
-	WB_CREATED = 1,
+	WB_CREATED = 0,
 };
 
 #define SEGMENT_SIZE_ORDER 10
@@ -266,7 +249,7 @@
 	struct dm_dev *backing_dev; /* Slow device (HDD) */
 	struct dm_dev *cache_dev; /* Fast device (SSD) */
 
-	bool write_through_mode;
+	bool write_around_mode;
 
 	unsigned nr_ctr_args;
 	const char **ctr_args;
@@ -336,23 +319,23 @@
 
 	struct rambuffer *rambuf_pool;
 
+	atomic64_t last_queued_segment_id;
+
 	/*--------------------------------------------------------------------*/
 
 	/********************
 	 * One-shot Writeback
 	 ********************/
 
-	wait_queue_head_t writeback_mb_wait_queue;
 	struct dm_kcopyd_client *copier;
 
 	/*--------------------------------------------------------------------*/
 
-	/****************
-	 * Buffer Flusher
-	 ****************/
+	/**************
+	 * Flush Daemon
+	 **************/
 
-	mempool_t *flush_job_pool;
-	struct workqueue_struct *flusher_wq;
+	struct task_struct *flush_daemon;
 
 	/*
 	 * Wait for a specified segment to be flushed. Non-interruptible
@@ -407,13 +390,15 @@
 	atomic_t writeback_io_count;
 	atomic_t writeback_fail_count;
 
-	u32 nr_cur_batched_writeback;
 	u32 nr_max_batched_writeback; /* Tunable */
+	u32 nr_max_batched_writeback_saved;
 
 	struct rb_root writeback_tree;
 
-	u32 num_writeback_segs; /* Number of segments to write back */
+	u32 nr_writeback_segs;
 	struct writeback_segment **writeback_segs;
+	u32 nr_cur_batched_writeback; /* Number of segments to be written back */
+	u32 nr_empty_segs;
 
 	/*--------------------------------------------------------------------*/
 
@@ -423,6 +408,7 @@
 
 	struct task_struct *writeback_modulator;
 	u8 writeback_threshold; /* Tunable */
+	u8 writeback_threshold_saved;
 
 	/*--------------------------------------------------------------------*/
 
@@ -432,6 +418,7 @@
 
 	struct task_struct *sb_record_updater;
 	unsigned long update_sb_record_interval; /* Tunable */
+	unsigned long update_sb_record_interval_saved;
 
 	/*--------------------------------------------------------------------*/
 
@@ -441,6 +428,7 @@
 
 	struct task_struct *data_synchronizer;
 	unsigned long sync_data_interval; /* Tunable */
+	unsigned long sync_data_interval_saved;
 
 	/*--------------------------------------------------------------------*/
 
@@ -448,9 +436,12 @@
 	 * Read Caching
 	 **************/
 
+	u32 nr_read_cache_cells;
+	u32 nr_read_cache_cells_saved;
 	struct work_struct read_cache_work;
 	struct read_cache_cells *read_cache_cells;
 	u32 read_cache_threshold; /* Tunable */
+	u32 read_cache_threshold_saved;
 
 	/*--------------------------------------------------------------------*/
 
@@ -468,6 +459,11 @@
 
 /*----------------------------------------------------------------------------*/
 
+struct write_io {
+	void *data; /* 4KB */
+	u8 data_bits;
+};
+
 void acquire_new_seg(struct wb_device *, u64 id);
 void cursor_init(struct wb_device *);
 void flush_current_buffer(struct wb_device *);
@@ -475,10 +471,12 @@
 void dec_nr_dirty_caches(struct wb_device *);
 bool mark_clean_mb(struct wb_device *, struct metablock *);
 struct dirtiness read_mb_dirtiness(struct wb_device *, struct segment_header *, struct metablock *);
-void prepare_overwrite(struct wb_device *, struct segment_header *, struct metablock *old_mb, bool overwrite_fullsize);
+int prepare_overwrite(struct wb_device *, struct segment_header *, struct metablock *old_mb, struct write_io *, u8 overwrite_bits);
 
 /*----------------------------------------------------------------------------*/
 
+#define ASSERT(cond) BUG_ON(!(cond))
+
 #define check_buffer_alignment(buf) \
 	do_check_buffer_alignment(buf, #buf, __func__)
 void do_check_buffer_alignment(void *, const char *, const char *);
@@ -500,45 +498,21 @@
 
 /*----------------------------------------------------------------------------*/
 
-/*
- * Device blockup (Marking the device as dead)
- * -------------------------------------------
- *
- * I/O error on cache device blocks up the whole system.
- * After the system is blocked up, cache device is dead, all I/Os to cache
- * device are ignored as if it becomes /dev/null.
- */
-#define mark_dead(wb) set_bit(WB_DEAD, &wb->flags)
-#define is_live(wb) likely(!test_bit(WB_DEAD, &wb->flags))
-
-/*
- * This macro wraps I/Os to cache device to add context of failure.
- */
-#define maybe_IO(proc) \
-	do { \
-		r = 0; \
-		if (is_live(wb)) {\
-			r = proc; \
-		} else { \
-			r = -EIO; \
-			break; \
-		} \
-		\
-		if (r == -EIO) { \
-			mark_dead(wb); \
-			DMERR("device is marked as dead"); \
-			break; \
-		} else if (r == -ENOMEM) { \
-			DMERR("I/O failed by ENOMEM"); \
-			schedule_timeout_interruptible(msecs_to_jiffies(1000));\
-			continue; \
-		} else if (r == -EOPNOTSUPP) { \
-			break; \
-		} else if (r) { \
-			WARN_ONCE(1, "I/O failed for unknown reason err(%d)", r); \
-			break; \
-		} \
-	} while (r)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)
+#define req_is_write(req) op_is_write((req)->bi_op)
+#define bio_is_barrier(bio) ((bio)->bi_opf & REQ_PREFLUSH)
+#define bio_is_fua(bio) ((bio)->bi_opf & REQ_FUA)
+#define WB_IO_WRITE .bi_op = REQ_OP_WRITE, .bi_op_flags = 0
+#define WB_IO_READ .bi_op = REQ_OP_READ, .bi_op_flags = 0
+#define WB_IO_WRITE_FUA .bi_op = REQ_OP_WRITE, .bi_op_flags = REQ_FUA
+#else
+#define req_is_write(req) ((req)->bi_rw == WRITE)
+#define bio_is_barrier(bio) ((bio)->bi_rw & REQ_FLUSH)
+#define bio_is_fua(bio) ((bio)->bi_rw & REQ_FUA)
+#define WB_IO_WRITE .bi_rw = WRITE
+#define WB_IO_READ .bi_rw = READ
+#define WB_IO_WRITE_FUA .bi_rw = WRITE_FUA
+#endif
 
 /*----------------------------------------------------------------------------*/
 
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost-metadata.c dm-writeboost-2.2.6/src/dm-writeboost-metadata.c
--- dm-writeboost-2.1.1/src/dm-writeboost-metadata.c	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost-metadata.c	2016-09-19 06:15:04.000000000 +0000
@@ -249,7 +249,7 @@
 struct ht_head *ht_get_head(struct wb_device *wb, struct lookup_key *key)
 {
 	u32 idx;
-	div_u64_rem(key->sector, wb->htsize, &idx);
+	div_u64_rem(key->sector >> 3, wb->htsize, &idx);
 	return large_array_at(wb->htable, idx);
 }
 
@@ -277,6 +277,7 @@
 	hlist_del(&mb->ht_list);
 	hlist_add_head(&mb->ht_list, &head->ht_list);
 
+	BUG_ON(key->sector & 7); // should be 4KB aligned
 	mb->sector = key->sector;
 };
 
@@ -310,7 +311,7 @@
 static int read_superblock_header(struct superblock_header_device *sup,
 				  struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_io_request io_req_sup;
 	struct dm_io_region region_sup;
 
@@ -320,8 +321,8 @@
 	check_buffer_alignment(buf);
 
 	io_req_sup = (struct dm_io_request) {
+		WB_IO_READ,
 		.client = wb->io_client,
-		.bi_rw = READ,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = buf,
@@ -331,15 +332,15 @@
 		.sector = 0,
 		.count = 1,
 	};
-	r = wb_io(&io_req_sup, 1, &region_sup, NULL, false);
-	if (r)
+	err = wb_io(&io_req_sup, 1, &region_sup, NULL, false);
+	if (err)
 		goto bad_io;
 
 	memcpy(sup, buf, sizeof(*sup));
 
 bad_io:
 	mempool_free(buf, wb->buf_1_pool);
-	return r;
+	return err;
 }
 
 /*
@@ -348,27 +349,28 @@
  */
 static int audit_cache_device(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	struct superblock_header_device sup;
-	r = read_superblock_header(&sup, wb);
-	if (r) {
+	err = read_superblock_header(&sup, wb);
+	if (err) {
 		DMERR("read_superblock_header failed");
-		return r;
+		return err;
 	}
 
 	wb->do_format = false;
-	if (le32_to_cpu(sup.magic) != WB_MAGIC) {
+	if (le32_to_cpu(sup.magic) != WB_MAGIC ||
+	    wb->write_around_mode) { /* write-around mode should discard all caches */
 		wb->do_format = true;
 		DMERR("Superblock Header: Magic number invalid");
 		return 0;
 	}
 
-	return r;
+	return err;
 }
 
 static int format_superblock_header(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
 	struct dm_io_request io_req_sup;
 	struct dm_io_region region_sup;
@@ -384,8 +386,8 @@
 	memcpy(buf, &sup, sizeof(sup));
 
 	io_req_sup = (struct dm_io_request) {
+		WB_IO_WRITE_FUA,
 		.client = wb->io_client,
-		.bi_rw = WRITE_FUA,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = buf,
@@ -395,13 +397,13 @@
 		.sector = 0,
 		.count = 1,
 	};
-	r = wb_io(&io_req_sup, 1, &region_sup, NULL, false);
-	if (r)
+	err = wb_io(&io_req_sup, 1, &region_sup, NULL, false);
+	if (err)
 		goto bad_io;
 
 bad_io:
 	mempool_free(buf, wb->buf_1_pool);
-	return r;
+	return err;
 }
 
 struct format_segmd_context {
@@ -435,13 +437,13 @@
  */
 static int do_zeroing_region(struct wb_device *wb, struct dm_io_region *region)
 {
-	int r;
+	int err;
 	struct zeroing_context zc;
 	zc.error = 0;
 	init_completion(&zc.complete);
-	r = dm_kcopyd_zero(wb->copier, 1, region, 0, zeroing_complete, &zc);
-	if (r)
-		return r;
+	err = dm_kcopyd_zero(wb->copier, 1, region, 0, zeroing_complete, &zc);
+	if (err)
+		return err;
 	wait_for_completion(&zc.complete);
 	return zc.error;
 }
@@ -458,7 +460,7 @@
 
 static int format_all_segment_headers(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_dev *dev = wb->cache_dev;
 	u32 i;
 
@@ -473,13 +475,11 @@
 	atomic64_set(&context.count, wb->nr_segments);
 	context.err = 0;
 
-	/*
-	 * Submit all the writes asynchronously.
-	 */
+	/* Submit all the writes asynchronously. */
 	for (i = 0; i < wb->nr_segments; i++) {
 		struct dm_io_request io_req_seg = {
+			WB_IO_WRITE,
 			.client = wb->io_client,
-			.bi_rw = WRITE,
 			.notify.fn = format_segmd_endio,
 			.notify.context = &context,
 			.mem.type = DM_IO_KMEM,
@@ -490,12 +490,12 @@
 			.sector = calc_segment_header_start(wb, i),
 			.count = (1 << 3),
 		};
-		r = wb_io(&io_req_seg, 1, &region_seg, NULL, false);
-		if (r)
+		err = wb_io(&io_req_seg, 1, &region_seg, NULL, false);
+		if (err)
 			break;
 	}
 
-	if (r)
+	if (err)
 		goto bad;
 
 	/* Wait for all the writes complete. */
@@ -504,12 +504,15 @@
 
 	if (context.err) {
 		DMERR("I/O failed");
-		r = -EIO;
+		err = -EIO;
+		goto bad;
 	}
 
+	err = blkdev_issue_flush(dev->bdev, GFP_KERNEL, NULL);
+
 bad:
 	mempool_free(buf, wb->buf_8_pool);
-	return r;
+	return err;
 }
 
 /*
@@ -517,27 +520,22 @@
  */
 static int format_cache_device(struct wb_device *wb)
 {
-	int r = 0;
-	struct dm_dev *dev = wb->cache_dev;
-
-	r = zeroing_full_superblock(wb);
-	if (r) {
+	int err = zeroing_full_superblock(wb);
+	if (err) {
 		DMERR("zeroing_full_superblock failed");
-		return r;
+		return err;
 	}
-	r = format_superblock_header(wb); /* First 512B */
-	if (r) {
-		DMERR("format_superblock_header failed");
-		return r;
-	}
-	r = format_all_segment_headers(wb);
-	if (r) {
+	err = format_all_segment_headers(wb);
+	if (err) {
 		DMERR("format_all_segment_headers failed");
-		return r;
+		return err;
 	}
-	r = blkdev_issue_flush(dev->bdev, GFP_KERNEL, NULL);
-
-	return r;
+	err = format_superblock_header(wb); /* First 512B */
+	if (err) {
+		DMERR("format_superblock_header failed");
+		return err;
+	}
+	return err;
 }
 
 /*
@@ -548,30 +546,30 @@
  */
 static int might_format_cache_device(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
-	r = audit_cache_device(wb);
-	if (r) {
+	err = audit_cache_device(wb);
+	if (err) {
 		DMERR("audit_cache_device failed");
-		return r;
+		return err;
 	}
 
 	if (wb->do_format) {
-		r = format_cache_device(wb);
-		if (r) {
+		err = format_cache_device(wb);
+		if (err) {
 			DMERR("format_cache_device failed");
-			return r;
+			return err;
 		}
 	}
 
-	return r;
+	return err;
 }
 
 /*----------------------------------------------------------------------------*/
 
 static int init_rambuf_pool(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	size_t i;
 
 	wb->rambuf_pool = kmalloc(sizeof(struct rambuffer) * NR_RAMBUF_POOL, GFP_KERNEL);
@@ -579,24 +577,24 @@
 		return -ENOMEM;
 
 	for (i = 0; i < NR_RAMBUF_POOL; i++) {
-		void *alloced = vmalloc(1 << (SEGMENT_SIZE_ORDER + SECTOR_SHIFT));
+		void *alloced = vmalloc(1 << (SEGMENT_SIZE_ORDER + 9));
 		if (!alloced) {
 			size_t j;
 			DMERR("Failed to allocate rambuf->data");
 			for (j = 0; j < i; j++) {
 				vfree(wb->rambuf_pool[j].data);
 			}
-			r = -ENOMEM;
+			err = -ENOMEM;
 			goto bad_alloc_data;
 		}
 		wb->rambuf_pool[i].data = alloced;
 	}
 
-	return r;
+	return err;
 
 bad_alloc_data:
 	kfree(wb->rambuf_pool);
-	return r;
+	return err;
 }
 
 static void free_rambuf_pool(struct wb_device *wb)
@@ -607,6 +605,13 @@
 	kfree(wb->rambuf_pool);
 }
 
+struct rambuffer *get_rambuffer_by_id(struct wb_device *wb, u64 id)
+{
+	u32 tmp32;
+	div_u64_rem(id - 1, NR_RAMBUF_POOL, &tmp32);
+	return wb->rambuf_pool + tmp32;
+}
+
 /*----------------------------------------------------------------------------*/
 
 /*
@@ -616,19 +621,19 @@
  */
 static int init_devices(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
-	r = might_format_cache_device(wb);
-	if (r)
-		return r;
+	err = might_format_cache_device(wb);
+	if (err)
+		return err;
 
-	r = init_rambuf_pool(wb);
-	if (r) {
+	err = init_rambuf_pool(wb);
+	if (err) {
 		DMERR("init_rambuf_pool failed");
-		return r;
+		return err;
 	}
 
-	return r;
+	return err;
 }
 
 static void free_devices(struct wb_device *wb)
@@ -641,7 +646,7 @@
 static int read_superblock_record(struct superblock_record_device *record,
 				  struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_io_request io_req;
 	struct dm_io_region region;
 
@@ -652,8 +657,8 @@
 	check_buffer_alignment(buf);
 
 	io_req = (struct dm_io_request) {
+		WB_IO_READ,
 		.client = wb->io_client,
-		.bi_rw = READ,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = buf,
@@ -663,15 +668,15 @@
 		.sector = (1 << 11) - 1,
 		.count = 1,
 	};
-	r = wb_io(&io_req, 1, &region, NULL, false);
-	if (r)
+	err = wb_io(&io_req, 1, &region, NULL, false);
+	if (err)
 		goto bad_io;
 
 	memcpy(record, buf, sizeof(*record));
 
 bad_io:
 	mempool_free(buf, wb->buf_1_pool);
-	return r;
+	return err;
 }
 
 /*
@@ -681,8 +686,8 @@
 			      struct segment_header *seg)
 {
 	struct dm_io_request io_req = {
+		WB_IO_READ,
 		.client = wb->io_client,
-		.bi_rw = READ,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_VMA,
 		.mem.ptr.addr = buf,
@@ -712,7 +717,7 @@
 	struct segment_header_device *dest = rambuffer;
 	u32 i;
 
-	BUG_ON((src->length) != (wb->cursor - src->start_idx));
+	ASSERT((src->length) == (wb->cursor - src->start_idx));
 
 	for (i = 0; i < src->length; i++) {
 		struct metablock *mb = src->mb_array + i;
@@ -732,8 +737,8 @@
 /*
  * Apply @i-th metablock in @src to @seg
  */
-static void apply_metablock_device(struct wb_device *wb, struct segment_header *seg,
-				   struct segment_header_device *src, u8 i)
+static int apply_metablock_device(struct wb_device *wb, struct segment_header *seg,
+				  struct segment_header_device *src, u8 i)
 {
 	struct lookup_key key;
 	struct ht_head *head;
@@ -751,23 +756,70 @@
 	head = ht_get_head(wb, &key);
 	found = ht_lookup(wb, head, &key);
 	if (found) {
-		bool overwrite_fullsize = (mb->dirtiness.data_bits == 255);
-		prepare_overwrite(wb, mb_to_seg(wb, found), found, overwrite_fullsize);
+		int err = 0;
+		u8 i;
+		struct write_io wio;
+		void *buf = mempool_alloc(wb->buf_8_pool, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+
+		wio = (struct write_io) {
+			.data = buf,
+			.data_bits = 0,
+		};
+		err = prepare_overwrite(wb, mb_to_seg(wb, found), found, &wio, mb->dirtiness.data_bits);
+		if (err)
+			goto fail_out;
+
+		for (i = 0; i < 8; i++) {
+			struct dm_io_request io_req;
+			struct dm_io_region region;
+			if (!(wio.data_bits & (1 << i)))
+				continue;
+
+			io_req = (struct dm_io_request) {
+				WB_IO_WRITE,
+				.client = wb->io_client,
+				.notify.fn = NULL,
+				.mem.type = DM_IO_KMEM,
+				.mem.ptr.addr = wio.data + (i << 9),
+			};
+			region = (struct dm_io_region) {
+				.bdev = wb->backing_dev->bdev,
+				.sector = mb->sector + i,
+				.count = 1,
+			};
+			err = wb_io(&io_req, 1, &region, NULL, true);
+			if (err)
+				break;
+		}
+
+fail_out:
+		mempool_free(buf, wb->buf_8_pool);
+		if (err)
+			return err;
 	}
 
 	ht_register(wb, head, mb, &key);
 
 	if (mb->dirtiness.is_dirty)
 		inc_nr_dirty_caches(wb);
+
+	return 0;
 }
 
-static void apply_segment_header_device(struct wb_device *wb, struct segment_header *seg,
-					struct segment_header_device *src)
+static int apply_segment_header_device(struct wb_device *wb, struct segment_header *seg,
+				       struct segment_header_device *src)
 {
+	int err = 0;
 	u8 i;
 	seg->length = src->length;
-	for (i = 0; i < src->length; i++)
-		apply_metablock_device(wb, seg, src, i);
+	for (i = 0; i < src->length; i++) {
+		err = apply_metablock_device(wb, seg, src, i);
+		if (err)
+			break;
+	}
+	return err;
 }
 
 /*
@@ -777,8 +829,8 @@
 			       struct segment_header *seg)
 {
 	struct dm_io_request io_req = {
+		WB_IO_READ,
 		.client = wb->io_client,
-		.bi_rw = READ,
 		.notify.fn = NULL,
 		.mem.type = DM_IO_KMEM,
 		.mem.ptr.addr = buf,
@@ -797,7 +849,7 @@
  */
 static int do_find_max_id(struct wb_device *wb, u64 *max_id)
 {
-	int r = 0;
+	int err = 0;
 	u32 k;
 
 	void *buf = mempool_alloc(wb->buf_8_pool, GFP_KERNEL);
@@ -809,18 +861,17 @@
 	for (k = 0; k < wb->nr_segments; k++) {
 		struct segment_header *seg = segment_at(wb, k);
 		struct segment_header_device *header;
-		r = read_segment_header(buf, wb, seg);
-		if (r) {
-			kfree(buf);
-			return r;
-		}
+		err = read_segment_header(buf, wb, seg);
+		if (err)
+			goto out;
 
 		header = buf;
 		if (le64_to_cpu(header->id) > *max_id)
 			*max_id = le64_to_cpu(header->id);
 	}
+out:
 	mempool_free(buf, wb->buf_8_pool);
-	return r;
+	return err;
 }
 
 static int find_max_id(struct wb_device *wb, u64 *max_id)
@@ -850,12 +901,12 @@
  */
 static int do_apply_valid_segments(struct wb_device *wb, u64 *max_id)
 {
-	int r = 0;
+	int err = 0;
 	struct segment_header *seg;
 	struct segment_header_device *header;
 	u32 i, start_idx;
 
-	void *rambuf = vmalloc(1 << (SEGMENT_SIZE_ORDER + SECTOR_SHIFT));
+	void *rambuf = vmalloc(1 << (SEGMENT_SIZE_ORDER + 9));
 	if (!rambuf)
 		return -ENOMEM;
 
@@ -871,8 +922,8 @@
 		div_u64_rem(i, wb->nr_segments, &k);
 		seg = segment_at(wb, k);
 
-		r = read_whole_segment(rambuf, wb, seg);
-		if (r)
+		err = read_whole_segment(rambuf, wb, seg);
+		if (err)
 			break;
 
 		header = rambuf;
@@ -900,12 +951,15 @@
 		}
 
 		/* This segment is correct and we apply */
-		apply_segment_header_device(wb, seg, header);
+		err = apply_segment_header_device(wb, seg, header);
+		if (err)
+			break;
+
 		*max_id = le64_to_cpu(header->id);
 	}
 
 	vfree(rambuf);
-	return r;
+	return err;
 }
 
 static int apply_valid_segments(struct wb_device *wb, u64 *max_id)
@@ -923,17 +977,18 @@
 
 static int infer_last_writeback_id(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
+	u64 inferred_last_writeback_id;
 	u64 record_id;
+
 	struct superblock_record_device uninitialized_var(record);
-	r = read_superblock_record(&record, wb);
-	if (r)
-		return r;
-
-	atomic64_set(&wb->last_writeback_segment_id,
-		atomic64_read(&wb->last_flushed_segment_id) > wb->nr_segments ?
-		atomic64_read(&wb->last_flushed_segment_id) - wb->nr_segments : 0);
+	err = read_superblock_record(&record, wb);
+	if (err)
+		return err;
+
+	inferred_last_writeback_id =
+		SUB_ID(atomic64_read(&wb->last_flushed_segment_id), wb->nr_segments);
 
 	/*
 	 * If last_writeback_id is recorded on the super block
@@ -941,10 +996,15 @@
 	 * written back before.
 	 */
 	record_id = le64_to_cpu(record.last_writeback_segment_id);
-	if (record_id > atomic64_read(&wb->last_writeback_segment_id))
-		atomic64_set(&wb->last_writeback_segment_id, record_id);
+	if (record_id > inferred_last_writeback_id) {
+		u64 id;
+		for (id = inferred_last_writeback_id + 1; id <= record_id; id++)
+			mark_clean_seg(wb, get_segment_header_by_id(wb, id));
+		inferred_last_writeback_id = record_id;
+	}
 
-	return r;
+	atomic64_set(&wb->last_writeback_segment_id, inferred_last_writeback_id);
+	return err;
 }
 
 /*
@@ -961,28 +1021,31 @@
  */
 static int replay_log_on_cache(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
 	u64 max_id;
-	r = find_max_id(wb, &max_id);
-	if (r) {
+	err = find_max_id(wb, &max_id);
+	if (err) {
 		DMERR("find_max_id failed");
-		return r;
+		return err;
 	}
 
-	r = apply_valid_segments(wb, &max_id);
-	if (r) {
+	err = apply_valid_segments(wb, &max_id);
+	if (err) {
 		DMERR("apply_valid_segments failed");
-		return r;
+		return err;
 	}
 
 	/* Setup last_flushed_segment_id */
 	atomic64_set(&wb->last_flushed_segment_id, max_id);
 
+	/* Setup last_queued_segment_id */
+	atomic64_set(&wb->last_queued_segment_id, max_id);
+
 	/* Setup last_writeback_segment_id */
 	infer_last_writeback_id(wb);
 
-	return r;
+	return err;
 }
 
 /*
@@ -1000,12 +1063,12 @@
  */
 static int recover_cache(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
-	r = replay_log_on_cache(wb);
-	if (r) {
+	err = replay_log_on_cache(wb);
+	if (err) {
 		DMERR("replay_log_on_cache failed");
-		return r;
+		return err;
 	}
 
 	prepare_first_seg(wb);
@@ -1026,7 +1089,7 @@
 	if (!writeback_seg->ios)
 		goto bad_ios;
 
-	writeback_seg->buf = vmalloc((1 << (SEGMENT_SIZE_ORDER + SECTOR_SHIFT)) - (1 << 12));
+	writeback_seg->buf = vmalloc((1 << (SEGMENT_SIZE_ORDER + 9)) - (1 << 12));
 	if (!writeback_seg->buf)
 		goto bad_buf;
 
@@ -1073,7 +1136,7 @@
  */
 int try_alloc_writeback_ios(struct wb_device *wb, size_t nr_batch, gfp_t gfp)
 {
-	int r = 0;
+	int err = 0;
 	size_t i;
 
 	struct writeback_segment **writeback_segs = kzalloc(
@@ -1104,9 +1167,9 @@
 
 	/* And then swap by new values */
 	wb->writeback_segs = writeback_segs;
-	wb->nr_cur_batched_writeback = nr_batch;
+	wb->nr_writeback_segs = nr_batch;
 
-	return r;
+	return err;
 }
 
 /*----------------------------------------------------------------------------*/
@@ -1116,7 +1179,7 @@
 		wb->name = kthread_create( \
 				name##_proc, wb,  "dmwb_" #name); \
 		if (IS_ERR(wb->name)) { \
-			r = PTR_ERR(wb->name); \
+			err = PTR_ERR(wb->name); \
 			wb->name = NULL; \
 			DMERR("couldn't spawn " #name); \
 			goto bad_##name; \
@@ -1134,26 +1197,26 @@
  */
 static int init_metadata(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
-	r = init_segment_header_array(wb);
-	if (r) {
+	err = init_segment_header_array(wb);
+	if (err) {
 		DMERR("init_segment_header_array failed");
 		goto bad_alloc_segment_header_array;
 	}
 
-	r = ht_empty_init(wb);
-	if (r) {
+	err = ht_empty_init(wb);
+	if (err) {
 		DMERR("ht_empty_init failed");
 		goto bad_alloc_ht;
 	}
 
-	return r;
+	return err;
 
 bad_alloc_ht:
 	free_segment_header_array(wb);
 bad_alloc_segment_header_array:
-	return r;
+	return err;
 }
 
 static void free_metadata(struct wb_device *wb)
@@ -1164,13 +1227,13 @@
 
 static int init_writeback_daemon(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	size_t nr_batch;
 
 	atomic_set(&wb->writeback_fail_count, 0);
 	atomic_set(&wb->writeback_io_count, 0);
 
-	nr_batch = 8;
+	nr_batch = 32;
 	wb->nr_max_batched_writeback = nr_batch;
 	if (try_alloc_writeback_ios(wb, nr_batch, GFP_KERNEL))
 		return -ENOMEM;
@@ -1184,23 +1247,22 @@
 	wb->force_drop = false;
 	CREATE_DAEMON(writeback_daemon);
 
-	return r;
+	return err;
 
 bad_writeback_daemon:
 	free_writeback_ios(wb);
-	return r;
+	return err;
 }
 
-static int init_flusher(struct wb_device *wb)
+static int init_flush_daemon(struct wb_device *wb)
 {
-	wb->flusher_wq = create_singlethread_workqueue("dmwb_flusher");
-	if (!wb->flusher_wq) {
-		DMERR("Failed to allocate flusher_wq");
-		return -ENOMEM;
-	}
-
+	int err = 0;
 	init_waitqueue_head(&wb->flush_wait_queue);
-	return 0;
+	CREATE_DAEMON(flush_daemon);
+	return err;
+
+bad_flush_daemon:
+	return err;
 }
 
 static int init_flush_barrier_work(struct wb_device *wb)
@@ -1217,96 +1279,96 @@
 
 static int init_writeback_modulator(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	wb->writeback_threshold = 0;
 	CREATE_DAEMON(writeback_modulator);
-	return r;
+	return err;
 
 bad_writeback_modulator:
-	return r;
+	return err;
 }
 
 static int init_sb_record_updater(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	wb->update_sb_record_interval = 0;
 	CREATE_DAEMON(sb_record_updater);
-	return r;
+	return err;
 
 bad_sb_record_updater:
-	return r;
+	return err;
 }
 
 static int init_data_synchronizer(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 	wb->sync_data_interval = 0;
 	CREATE_DAEMON(data_synchronizer);
-	return r;
+	return err;
 
 bad_data_synchronizer:
-	return r;
+	return err;
 }
 
 int resume_cache(struct wb_device *wb)
 {
-	int r = 0;
+	int err = 0;
 
 	wb->nr_segments = calc_nr_segments(wb->cache_dev, wb);
 	wb->nr_caches_inseg = (1 << (SEGMENT_SIZE_ORDER - 3)) - 1;
 	wb->nr_caches = wb->nr_segments * wb->nr_caches_inseg;
 
-	r = init_devices(wb);
-	if (r)
+	err = init_devices(wb);
+	if (err)
 		goto bad_devices;
 
-	r = init_metadata(wb);
-	if (r)
+	err = init_metadata(wb);
+	if (err)
 		goto bad_metadata;
 
-	r = init_writeback_daemon(wb);
-	if (r) {
+	err = init_writeback_daemon(wb);
+	if (err) {
 		DMERR("init_writeback_daemon failed");
 		goto bad_writeback_daemon;
 	}
 
-	r = recover_cache(wb);
-	if (r) {
+	err = recover_cache(wb);
+	if (err) {
 		DMERR("recover_cache failed");
 		goto bad_recover;
 	}
 
-	r = init_flusher(wb);
-	if (r) {
-		DMERR("init_flusher failed");
-		goto bad_flusher;
+	err = init_flush_daemon(wb);
+	if (err) {
+		DMERR("init_flush_daemon failed");
+		goto bad_flush_daemon;
 	}
 
-	r = init_flush_barrier_work(wb);
-	if (r) {
+	err = init_flush_barrier_work(wb);
+	if (err) {
 		DMERR("init_flush_barrier_work failed");
 		goto bad_flush_barrier_work;
 	}
 
-	r = init_writeback_modulator(wb);
-	if (r) {
+	err = init_writeback_modulator(wb);
+	if (err) {
 		DMERR("init_writeback_modulator failed");
 		goto bad_modulator;
 	}
 
-	r = init_sb_record_updater(wb);
-	if (r) {
+	err = init_sb_record_updater(wb);
+	if (err) {
 		DMERR("init_sb_recorder failed");
 		goto bad_updater;
 	}
 
-	r = init_data_synchronizer(wb);
-	if (r) {
+	err = init_data_synchronizer(wb);
+	if (err) {
 		DMERR("init_data_synchronizer failed");
 		goto bad_synchronizer;
 	}
 
-	return r;
+	return err;
 
 bad_synchronizer:
 	kthread_stop(wb->sb_record_updater);
@@ -1315,8 +1377,8 @@
 bad_modulator:
 	destroy_workqueue(wb->barrier_wq);
 bad_flush_barrier_work:
-	destroy_workqueue(wb->flusher_wq);
-bad_flusher:
+	kthread_stop(wb->flush_daemon);
+bad_flush_daemon:
 bad_recover:
 	kthread_stop(wb->writeback_daemon);
 	free_writeback_ios(wb);
@@ -1325,7 +1387,7 @@
 bad_metadata:
 	free_devices(wb);
 bad_devices:
-	return r;
+	return err;
 }
 
 void free_cache(struct wb_device *wb)
@@ -1340,7 +1402,7 @@
 
 	destroy_workqueue(wb->barrier_wq);
 
-	destroy_workqueue(wb->flusher_wq);
+	kthread_stop(wb->flush_daemon);
 
 	kthread_stop(wb->writeback_daemon);
 	free_writeback_ios(wb);
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost-metadata.h dm-writeboost-2.2.6/src/dm-writeboost-metadata.h
--- dm-writeboost-2.1.1/src/dm-writeboost-metadata.h	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost-metadata.h	2016-09-19 06:15:04.000000000 +0000
@@ -24,6 +24,7 @@
 
 struct segment_header *
 get_segment_header_by_id(struct wb_device *, u64 segment_id);
+struct rambuffer *get_rambuffer_by_id(struct wb_device *wb, u64 id);
 sector_t calc_mb_start_sector(struct wb_device *, struct segment_header *,
 			      u32 mb_idx);
 u8 mb_idx_inseg(struct wb_device *, u32 mb_idx);
diff -Nru dm-writeboost-2.1.1/src/dm-writeboost-target.c dm-writeboost-2.2.6/src/dm-writeboost-target.c
--- dm-writeboost-2.1.1/src/dm-writeboost-target.c	2016-01-04 13:25:39.000000000 +0000
+++ dm-writeboost-2.2.6/src/dm-writeboost-target.c	2016-09-19 06:15:04.000000000 +0000
@@ -28,28 +28,18 @@
 
 /*----------------------------------------------------------------------------*/
 
-void bio_endio_compat(struct bio *bio, int error)
-{
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)
-	bio->bi_error = error;
-	bio_endio(bio);
-#else
-	bio_endio(bio, error);
-#endif
-}
-
-/*----------------------------------------------------------------------------*/
-
 void do_check_buffer_alignment(void *buf, const char *name, const char *caller)
 {
 	unsigned long addr = (unsigned long) buf;
 
-	if (!IS_ALIGNED(addr, 1 << SECTOR_SHIFT)) {
+	if (!IS_ALIGNED(addr, 1 << 9)) {
 		DMCRIT("@%s in %s is not sector-aligned. I/O buffer must be sector-aligned.", name, caller);
 		BUG();
 	}
 }
 
+/*----------------------------------------------------------------------------*/
+
 struct wb_io {
 	struct work_struct work;
 	int err;
@@ -78,7 +68,7 @@
 			.regions = regions,
 			.num_regions = num_regions,
 		};
-		BUG_ON(io_req->notify.fn);
+		ASSERT(io_req->notify.fn == NULL);
 
 		INIT_WORK_ONSTACK(&io.work, wb_io_fn);
 		queue_work(wb->io_wq, &io.work);
@@ -92,9 +82,7 @@
 		err = dm_io(io_req, num_regions, regions, err_bits);
 	}
 
-	/*
-	 * err_bits can be NULL.
-	 */
+	/* err_bits can be NULL. */
 	if (err || (err_bits && *err_bits)) {
 		char buf[BDEVNAME_SIZE];
 		dev_t dev = regions->bdev->bd_dev;
@@ -106,9 +94,10 @@
 			eb = *err_bits;
 
 		format_dev_t(buf, dev);
-		DMERR("%s() I/O error(%d), bits(%lu), dev(%s), sector(%llu), rw(%d)",
+		DMERR("%s() I/O error(%d), bits(%lu), dev(%s), sector(%llu), %s",
 		      caller, err, eb,
-		      buf, (unsigned long long) regions->sector, io_req->bi_rw);
+		      buf, (unsigned long long) regions->sector,
+		      req_is_write(io_req) ? "write" : "read");
 	}
 
 	return err;
@@ -116,17 +105,25 @@
 
 sector_t dm_devsize(struct dm_dev *dev)
 {
-	return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
+	return i_size_read(dev->bdev->bd_inode) >> 9;
 }
 
 /*----------------------------------------------------------------------------*/
 
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(3,14,0)
-#define bi_sector(bio) (bio)->bi_sector
-#define bi_size(bio) (bio)->bi_size
+void bio_endio_compat(struct bio *bio, int error)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,3,0)
+	bio->bi_error = error;
+	bio_endio(bio);
 #else
+	bio_endio(bio, error);
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
 #define bi_sector(bio) (bio)->bi_iter.bi_sector
-#define bi_size(bio) (bio)->bi_iter.bi_size
+#else
+#define bi_sector(bio) (bio)->bi_sector
 #endif
 
 static void bio_remap(struct bio *bio, struct dm_dev *dev, sector_t sector)
@@ -135,24 +132,24 @@
 	bi_sector(bio) = sector;
 }
 
-static u8 do_io_offset(sector_t sector)
+static u8 calc_offset(sector_t sector)
 {
 	u32 tmp32;
 	div_u64_rem(sector, 1 << 3, &tmp32);
 	return tmp32;
 }
 
-static u8 io_offset(struct bio *bio)
+static u8 bio_calc_offset(struct bio *bio)
 {
-	return do_io_offset(bi_sector(bio));
+	return calc_offset(bi_sector(bio));
 }
 
-static bool io_fullsize(struct bio *bio)
+static bool bio_is_fullsize(struct bio *bio)
 {
 	return bio_sectors(bio) == (1 << 3);
 }
 
-static bool io_write(struct bio *bio)
+static bool bio_is_write(struct bio *bio)
 {
 	return bio_data_dir(bio) == WRITE;
 }
@@ -193,14 +190,114 @@
 	return count;
 }
 
+void inc_nr_dirty_caches(struct wb_device *wb)
+{
+	ASSERT(wb);
+	atomic64_inc(&wb->nr_dirty_caches);
+}
+
+void dec_nr_dirty_caches(struct wb_device *wb)
+{
+	ASSERT(wb);
+	if (atomic64_dec_and_test(&wb->nr_dirty_caches))
+		wake_up_interruptible(&wb->wait_drop_caches);
+}
+
+static bool taint_mb(struct wb_device *wb, struct metablock *mb, u8 data_bits)
+{
+	unsigned long flags;
+	bool flipped = false;
+
+	ASSERT(data_bits > 0);
+	spin_lock_irqsave(&wb->mb_lock, flags);
+	if (!mb->dirtiness.is_dirty) {
+		mb->dirtiness.is_dirty = true;
+		flipped = true;
+	}
+	mb->dirtiness.data_bits |= data_bits;
+	spin_unlock_irqrestore(&wb->mb_lock, flags);
+
+	return flipped;
+}
+
+bool mark_clean_mb(struct wb_device *wb, struct metablock *mb)
+{
+	unsigned long flags;
+	bool flipped = false;
+
+	spin_lock_irqsave(&wb->mb_lock, flags);
+	if (mb->dirtiness.is_dirty) {
+		mb->dirtiness.is_dirty = false;
+		flipped = true;
+	}
+	spin_unlock_irqrestore(&wb->mb_lock, flags);
+
+	return flipped;
+}
+
+/*
+ * Read the dirtiness of a metablock at the moment.
+ */
+struct dirtiness read_mb_dirtiness(struct wb_device *wb, struct segment_header *seg,
+				   struct metablock *mb)
+{
+	unsigned long flags;
+	struct dirtiness retval;
+
+	spin_lock_irqsave(&wb->mb_lock, flags);
+	retval = mb->dirtiness;
+	spin_unlock_irqrestore(&wb->mb_lock, flags);
+
+	return retval;
+}
+
+/*----------------------------------------------------------------------------*/
+
+void cursor_init(struct wb_device *wb)
+{
+	wb->cursor = wb->current_seg->start_idx;
+	wb->current_seg->length = 0;
+}
+
 /*
- * Prepare the RAM buffer for segment write.
+ * Advance the cursor and return the old cursor.
+ * After returned, nr_inflight_ios is incremented to wait for this write to complete.
  */
+static u32 advance_cursor(struct wb_device *wb)
+{
+	u32 old;
+	if (wb->cursor == wb->nr_caches)
+		wb->cursor = 0;
+	old = wb->cursor;
+	wb->cursor++;
+	wb->current_seg->length++;
+	BUG_ON(wb->current_seg->length > wb->nr_caches_inseg);
+	atomic_inc(&wb->current_seg->nr_inflight_ios);
+	return old;
+}
+
+static bool needs_queue_seg(struct wb_device *wb)
+{
+	bool rambuf_no_space = !mb_idx_inseg(wb, wb->cursor);
+	return rambuf_no_space;
+}
+
+/*----------------------------------------------------------------------------*/
+
+static void copy_barrier_requests(struct rambuffer *rambuf, struct wb_device *wb)
+{
+	bio_list_init(&rambuf->barrier_ios);
+	bio_list_merge(&rambuf->barrier_ios, &wb->barrier_ios);
+	bio_list_init(&wb->barrier_ios);
+}
+
 static void prepare_rambuffer(struct rambuffer *rambuf,
 			      struct wb_device *wb,
 			      struct segment_header *seg)
 {
+	rambuf->seg = seg;
 	prepare_segment_header_device(rambuf->data, wb, seg);
+	copy_barrier_requests(rambuf, wb);
 }
 
 static void init_rambuffer(struct wb_device *wb)
@@ -211,27 +308,16 @@
 /*
  * Acquire a new RAM buffer for the new segment.
  */
-static void acquire_new_rambuffer(struct wb_device *wb, u64 id)
+static void __acquire_new_rambuffer(struct wb_device *wb, u64 id)
 {
-	struct rambuffer *next_rambuf;
-	u32 tmp32;
-
 	wait_for_flushing(wb, SUB_ID(id, NR_RAMBUF_POOL));
 
-	div_u64_rem(id - 1, NR_RAMBUF_POOL, &tmp32);
-	next_rambuf = wb->rambuf_pool + tmp32;
-
-	wb->current_rambuf = next_rambuf;
+	wb->current_rambuf = get_rambuffer_by_id(wb, id);
 
 	init_rambuffer(wb);
 }
 
-/*
- * Acquire the new segment and RAM buffer for the following writes.
- * Guarantees all dirty caches in the segments are written back and
- * all metablocks in it are invalidated (Linked to null head).
- */
-void acquire_new_seg(struct wb_device *wb, u64 id)
+static void __acquire_new_seg(struct wb_device *wb, u64 id)
 {
 	struct segment_header *new_seg = get_segment_header_by_id(wb, id);
 
@@ -256,8 +342,17 @@
 	 */
 	new_seg->id = id;
 	wb->current_seg = new_seg;
+}
 
-	acquire_new_rambuffer(wb, id);
+/*
+ * Acquire the new segment and RAM buffer for the following writes.
+ * Guarantees all dirty caches in the segments are written back and
+ * all metablocks in it are invalidated (Linked to null head).
+ */
+void acquire_new_seg(struct wb_device *wb, u64 id)
+{
+	__acquire_new_rambuffer(wb, id);
+	__acquire_new_seg(wb, id);
 }
 
 static void prepare_new_seg(struct wb_device *wb)
@@ -269,32 +364,15 @@
 
 /*----------------------------------------------------------------------------*/
 
-static void copy_barrier_requests(struct flush_job *job, struct wb_device *wb)
-{
-	bio_list_init(&job->barrier_ios);
-	bio_list_merge(&job->barrier_ios, &wb->barrier_ios);
-	bio_list_init(&wb->barrier_ios);
-}
-
-static void init_flush_job(struct flush_job *job, struct wb_device *wb)
-{
-	job->wb = wb;
-	job->seg = wb->current_seg;
-
-	copy_barrier_requests(job, wb);
-}
-
 static void queue_flush_job(struct wb_device *wb)
 {
-	struct flush_job *job = &wb->current_rambuf->job;
-
 	wait_event(wb->inflight_ios_wq, !atomic_read(&wb->current_seg->nr_inflight_ios));
 
 	prepare_rambuffer(wb->current_rambuf, wb, wb->current_seg);
 
-	init_flush_job(job, wb);
-	INIT_WORK(&job->work, flush_proc);
-	queue_work(wb->flusher_wq, &job->work);
+	smp_wmb();
+	atomic64_inc(&wb->last_queued_segment_id);
+	wake_up_process(wb->flush_daemon);
 }
 
 static void queue_current_buffer(struct wb_device *wb)
@@ -303,10 +381,15 @@
 	prepare_new_seg(wb);
 }
 
-void cursor_init(struct wb_device *wb)
+/*
+ * queue_current_buffer if the RAM buffer can't make space any more.
+ */
+static void might_queue_current_buffer(struct wb_device *wb)
 {
-	wb->cursor = wb->current_seg->start_idx;
-	wb->current_seg->length = 0;
+	if (needs_queue_seg(wb)) {
+		update_nr_empty_segs(wb);
+		queue_current_buffer(wb);
+	}
 }
 
 /*
@@ -320,8 +403,6 @@
 	old_seg = wb->current_seg;
 
 	queue_current_buffer(wb);
-
-	cursor_init(wb); /* FIXME this looks dup call */
 	mutex_unlock(&wb->io_lock);
 
 	wait_for_flushing(wb, old_seg->id);
@@ -360,458 +441,750 @@
 
 /*----------------------------------------------------------------------------*/
 
-void inc_nr_dirty_caches(struct wb_device *wb)
-{
-	BUG_ON(!wb);
-	atomic64_inc(&wb->nr_dirty_caches);
-}
-
-void dec_nr_dirty_caches(struct wb_device *wb)
-{
-	BUG_ON(!wb);
-	if (atomic64_dec_and_test(&wb->nr_dirty_caches))
-		wake_up_interruptible(&wb->wait_drop_caches);
-}
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
+#define bv_vec struct bio_vec
+#define bv_page(vec) vec.bv_page
+#define bv_offset(vec) vec.bv_offset
+#define bv_len(vec) vec.bv_len
+#define bv_it struct bvec_iter
+#else
+#define bv_vec struct bio_vec *
+#define bv_page(vec) vec->bv_page
+#define bv_offset(vec) vec->bv_offset
+#define bv_len(vec) vec->bv_len
+#define bv_it int
+#endif
 
-static bool taint_mb(struct wb_device *wb, struct metablock *mb, struct bio *bio)
+/*
+ * Incoming bio may have multiple bio vecs as a result bvec merging.
+ * We shouldn't use bio_data directly to access to whole payload but
+ * should iterate over the vector.
+ */
+static void copy_bio_payload(void *buf, struct bio *bio)
 {
-	unsigned long flags;
-	bool flip = false;
-
-	spin_lock_irqsave(&wb->mb_lock, flags);
-	if (!mb->dirtiness.is_dirty) {
-		mb->dirtiness.is_dirty = true;
-		flip = true;
-	}
-
-	if (likely(io_fullsize(bio))) {
-		mb->dirtiness.data_bits = 255;
-	} else {
-		u8 i;
-		u8 acc_bits = 0;
-		for (i = io_offset(bio); i < (io_offset(bio) + bio_sectors(bio)); i++)
-			acc_bits += (1 << i);
-
-		mb->dirtiness.data_bits |= acc_bits;
+	size_t sum = 0;
+	bv_vec vec;
+	bv_it it;
+	bio_for_each_segment(vec, bio, it) {
+		void *dst = kmap_atomic(bv_page(vec));
+		size_t l = bv_len(vec);
+		memcpy(buf, dst + bv_offset(vec), l);
+		kunmap_atomic(dst);
+		buf += l;
+		sum += l;
 	}
-
-	BUG_ON(!bio_sectors(bio));
-	BUG_ON(!mb->dirtiness.data_bits);
-	spin_unlock_irqrestore(&wb->mb_lock, flags);
-
-	return flip;
+	ASSERT(sum == (bio_sectors(bio) << 9));
 }
 
-bool mark_clean_mb(struct wb_device *wb, struct metablock *mb)
+/*
+ * Copy 512B buffer data to bio payload's i-th 512B area.
+ */
+static void __copy_to_bio_payload(struct bio *bio, void *buf, u8 i)
 {
-	unsigned long flags;
-	bool flip = false;
+	size_t head = 0;
+	size_t tail = head;
 
-	spin_lock_irqsave(&wb->mb_lock, flags);
-	if (mb->dirtiness.is_dirty) {
-		mb->dirtiness.is_dirty = false;
-		flip = true;
+	bv_vec vec;
+	bv_it it;
+	bio_for_each_segment(vec, bio, it) {
+		size_t l = bv_len(vec);
+		tail += l;
+		if ((i << 9) < tail) {
+			void *dst = kmap_atomic(bv_page(vec));
+			size_t offset = (i << 9) - head;
+			BUG_ON((l - offset) < (1 << 9));
+			memcpy(dst + bv_offset(vec) + offset, buf, 1 << 9);
+			kunmap_atomic(dst);
+			return;
+		}
+		head += l;
 	}
-	spin_unlock_irqrestore(&wb->mb_lock, flags);
-
-	return flip;
+	BUG();
 }
 
 /*
- * Read the dirtiness of a metablock at the moment.
+ * Copy 4KB buffer to bio payload with care to bio offset and copy bits.
  */
-struct dirtiness read_mb_dirtiness(struct wb_device *wb, struct segment_header *seg,
-				   struct metablock *mb)
+static void copy_to_bio_payload(struct bio *bio, void *buf, u8 copy_bits)
 {
-	unsigned long flags;
-	struct dirtiness retval;
-
-	spin_lock_irqsave(&wb->mb_lock, flags);
-	retval = mb->dirtiness;
-	spin_unlock_irqrestore(&wb->mb_lock, flags);
-
-	return retval;
+	u8 offset = bio_calc_offset(bio);
+	u8 i;
+	for (i = 0; i < bio_sectors(bio); i++) {
+		u8 i_offset = i + offset;
+		if (copy_bits & (1 << i_offset))
+			__copy_to_bio_payload(bio, buf + (i_offset << 9), i);
+	}
 }
 
 /*----------------------------------------------------------------------------*/
 
-struct writeback_mb_context {
-	struct wb_device *wb;
-	atomic_t count;
-	int err;
-};
-
-static void writeback_mb_complete(int read_err, unsigned long write_err, void *__context)
-{
-	struct writeback_mb_context *context = __context;
+struct lookup_result {
+	struct ht_head *head; /* Lookup head used */
+	struct lookup_key key; /* Lookup key used */
 
-	if (read_err || write_err)
-		context->err = 1;
+	struct segment_header *found_seg;
+	struct metablock *found_mb;
 
-	if (atomic_dec_and_test(&context->count))
-		wake_up_active_wq(&context->wb->writeback_mb_wait_queue);
-}
+	bool found; /* Cache hit? */
+	bool on_buffer; /* Is the metablock found on the RAM buffer? */
+};
 
 /*
- * Write back a cache from cache device to the backing device.
- * We don't need to make the data written back persistent because this segment
- * will be reused only after writeback daemon wrote this segment back.
+ * Lookup a bio relevant cache data.
+ * In case of cache hit, nr_inflight_ios is incremented.
  */
-static void writeback_mb(struct wb_device *wb, struct segment_header *seg,
-			 struct metablock *mb, u8 data_bits, bool thread)
+static void cache_lookup(struct wb_device *wb, struct bio *bio, struct lookup_result *res)
 {
-	int r = 0;
+	res->key = (struct lookup_key) {
+		.sector = calc_cache_alignment(bi_sector(bio)),
+	};
+	res->head = ht_get_head(wb, &res->key);
 
-	struct writeback_mb_context context;
-	context.wb = wb;
-	context.err = 0;
+	res->found_mb = ht_lookup(wb, res->head, &res->key);
+	if (res->found_mb) {
+		res->found_seg = mb_to_seg(wb, res->found_mb);
+		atomic_inc(&res->found_seg->nr_inflight_ios);
+	}
 
-	BUG_ON(!data_bits);
+	res->found = (res->found_mb != NULL);
 
-	if (data_bits == 255) {
-		struct dm_io_region src, dest;
+	res->on_buffer = false;
+	if (res->found)
+		res->on_buffer = is_on_buffer(wb, res->found_mb->idx);
 
-		atomic_set(&context.count, 1);
+	inc_stat(wb, bio_is_write(bio), res->found, res->on_buffer, bio_is_fullsize(bio));
+}
 
-		src = (struct dm_io_region) {
-			.bdev = wb->cache_dev->bdev,
-			.sector = calc_mb_start_sector(wb, seg, mb->idx),
-			.count = (1 << 3),
-		};
-		dest = (struct dm_io_region) {
-			.bdev = wb->backing_dev->bdev,
-			.sector = mb->sector,
-			.count = (1 << 3),
-		};
-		maybe_IO(dm_kcopyd_copy(wb->copier, &src, 1, &dest, 0, writeback_mb_complete, &context));
-		if (r)
-			writeback_mb_complete(0, 0, &context);
-	} else {
-		u8 i;
+static void dec_inflight_ios(struct wb_device *wb, struct segment_header *seg)
+{
+	if (atomic_dec_and_test(&seg->nr_inflight_ios))
+		wake_up_active_wq(&wb->inflight_ios_wq);
+}
 
-		u8 count = 0;
-		for (i = 0; i < 8; i++)
-			if (data_bits & (1 << i))
-				count++;
-
-		atomic_set(&context.count, count);
-
-		for (i = 0; i < 8; i++) {
-			struct dm_io_region src, dest;
-
-			if (!(data_bits & (1 << i)))
-				continue;
-
-			src = (struct dm_io_region) {
-				.bdev = wb->cache_dev->bdev,
-				.sector = calc_mb_start_sector(wb, seg, mb->idx) + i,
-				.count = 1,
-			};
-			dest = (struct dm_io_region) {
-				.bdev = wb->backing_dev->bdev,
-				.sector = mb->sector + i,
-				.count = 1,
-			};
-			maybe_IO(dm_kcopyd_copy(wb->copier, &src, 1, &dest, 0, writeback_mb_complete, &context));
-			if (r)
-				writeback_mb_complete(0, 0, &context);
-		}
+/*----------------------------------------------------------------------------*/
+
+static u8 to_mask(u8 offset, u8 count)
+{
+	u8 i;
+	u8 result = 0;
+	if (count == 8) {
+		result = 255;
+	} else {
+		for (i = 0; i < count; i++)
+			result |= (1 << (i + offset));
 	}
+	return result;
+}
 
-	wait_event(wb->writeback_mb_wait_queue, !atomic_read(&context.count));
-	if (context.err)
-		mark_dead(wb);
+static int fill_payload_by_backing(struct wb_device *wb, struct bio *bio)
+{
+	struct dm_io_request io_req;
+	struct dm_io_region region;
+
+	sector_t start = bi_sector(bio);
+	u8 offset = calc_offset(start);
+	u8 len = bio_sectors(bio);
+	u8 copy_bits = to_mask(offset, len);
+
+	int err = 0;
+	void *buf = mempool_alloc(wb->buf_8_pool, GFP_NOIO);
+	if (!buf)
+		return -ENOMEM;
+
+	io_req = (struct dm_io_request) {
+		WB_IO_READ,
+		.client = wb->io_client,
+		.notify.fn = NULL,
+		.mem.type = DM_IO_KMEM,
+		.mem.ptr.addr = buf + (offset << 9),
+	};
+	region = (struct dm_io_region) {
+		.bdev = wb->backing_dev->bdev,
+		.sector = start,
+		.count = len,
+	};
+	err = wb_io(&io_req, 1, &region, NULL, true);
+	if (err)
+		goto bad;
+
+	copy_to_bio_payload(bio, buf, copy_bits);
+bad:
+	mempool_free(buf, wb->buf_8_pool);
+	return err;
 }
 
 /*
- * Write back a cache on the RAM buffer to backing device.
- * Calling this function is really rare so the code needs not to be optimal.
- * There is no need to write them back with FUA flag because the cache isn't
- * flushed yet and thus isn't persistent.
+ * Get the reference to the 4KB-aligned data in RAM buffer.
+ * Since it only takes the reference caller need not to free the pointer.
  */
-static void writeback_buffered_mb(struct wb_device *wb, struct metablock *mb, u8 data_bits)
+static void *ref_buffered_mb(struct wb_device *wb, struct metablock *mb)
 {
-	int r = 0;
-
 	sector_t offset = ((mb_idx_inseg(wb, mb->idx) + 1) << 3);
-	void *buf = mempool_alloc(wb->buf_1_pool, GFP_NOIO);
+	return wb->current_rambuf->data + (offset << 9);
+}
 
+/*
+ * Read cache block of the mb.
+ * Caller should free the returned pointer after used by mempool_alloc().
+ */
+static void *read_mb(struct wb_device *wb, struct segment_header *seg,
+		     struct metablock *mb, u8 data_bits)
+{
 	u8 i;
+	void *result = mempool_alloc(wb->buf_8_pool, GFP_NOIO);
+	if (!result)
+		return NULL;
+
 	for (i = 0; i < 8; i++) {
+		int err = 0;
 		struct dm_io_request io_req;
 		struct dm_io_region region;
 
-		void *src;
-		sector_t dest;
-
 		if (!(data_bits & (1 << i)))
 			continue;
 
-		src = wb->current_rambuf->data + ((offset + i) << SECTOR_SHIFT);
-		dest = mb->sector + i;
-
-		memcpy(buf, src, 1 << SECTOR_SHIFT);
 		io_req = (struct dm_io_request) {
+			WB_IO_READ,
 			.client = wb->io_client,
-			.bi_rw = WRITE,
 			.notify.fn = NULL,
 			.mem.type = DM_IO_KMEM,
-			.mem.ptr.addr = buf,
+			.mem.ptr.addr = result + (i << 9),
 		};
+
 		region = (struct dm_io_region) {
-			.bdev = wb->backing_dev->bdev,
-			.sector = dest,
+			.bdev = wb->cache_dev->bdev,
+			.sector = calc_mb_start_sector(wb, seg, mb->idx) + i,
 			.count = 1,
 		};
-		maybe_IO(wb_io(&io_req, 1, &region, NULL, true));
+
+		err = wb_io(&io_req, 1, &region, NULL, true);
+		if (err) {
+			mempool_free(result, wb->buf_8_pool);
+			return NULL;
+		}
 	}
-	mempool_free(buf, wb->buf_1_pool);
+	return result;
 }
 
-void prepare_overwrite(struct wb_device *wb, struct segment_header *seg, struct metablock *old_mb, bool overwrite_fullsize)
+/*----------------------------------------------------------------------------*/
+
+enum PBD_FLAG {
+	PBD_NONE = 0,
+	PBD_WILL_CACHE = 1,
+	PBD_READ_SEG = 2,
+};
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
+#define PER_BIO_DATA_SIZE per_io_data_size
+#else
+#define PER_BIO_DATA_SIZE per_bio_data_size
+#endif
+struct per_bio_data {
+	enum PBD_FLAG type;
+	union {
+		u32 cell_idx;
+		struct segment_header *seg;
+	};
+};
+#define per_bio_data(wb, bio) ((struct per_bio_data *)dm_per_bio_data((bio), (wb)->ti->PER_BIO_DATA_SIZE))
+
+/*----------------------------------------------------------------------------*/
+
+#define read_cache_cell_from_node(node) rb_entry((node), struct read_cache_cell, rb_node)
+
+static void read_cache_add(struct read_cache_cells *cells, struct read_cache_cell *cell)
 {
-	struct dirtiness dirtiness = read_mb_dirtiness(wb, seg, old_mb);
+	struct rb_node **rbp, *parent;
+	rbp = &cells->rb_root.rb_node;
+	parent = NULL;
+	while (*rbp) {
+		struct read_cache_cell *parent_cell;
+		parent = *rbp;
+		parent_cell = read_cache_cell_from_node(parent);
+		if (cell->sector < parent_cell->sector)
+			rbp = &(*rbp)->rb_left;
+		else
+			rbp = &(*rbp)->rb_right;
+	}
+	rb_link_node(&cell->rb_node, parent, rbp);
+	rb_insert_color(&cell->rb_node, &cells->rb_root);
+}
+
+static struct read_cache_cell *lookup_read_cache_cell(struct wb_device *wb, sector_t sector)
+{
+	struct rb_node **rbp, *parent;
+	rbp = &wb->read_cache_cells->rb_root.rb_node;
+	parent = NULL;
+	while (*rbp) {
+		struct read_cache_cell *parent_cell;
+		parent = *rbp;
+		parent_cell = read_cache_cell_from_node(parent);
+		if (parent_cell->sector == sector)
+			return parent_cell;
+
+		if (sector < parent_cell->sector)
+			rbp = &(*rbp)->rb_left;
+		else
+			rbp = &(*rbp)->rb_right;
+	}
+	return NULL;
+}
+
+static void read_cache_cancel_cells(struct read_cache_cells *cells, u32 n)
+{
+	u32 i;
+	u32 last = cells->cursor + cells->seqcount;
+	if (last > cells->size)
+		last = cells->size;
+	for (i = cells->cursor; i < last; i++) {
+		struct read_cache_cell *cell = cells->array + i;
+		cell->cancelled = true;
+	}
+}
+
+/*
+ * Track the forefront read address and cancel cells in case of over threshold.
+ * If the cell is cancelled foreground, we can save the memory copy in the background.
+ */
+static void read_cache_cancel_foreground(struct read_cache_cells *cells,
+					 struct read_cache_cell *new_cell)
+{
+	if (new_cell->sector == (cells->last_sector + 8))
+		cells->seqcount++;
+	else {
+		cells->seqcount = 1;
+		cells->over_threshold = false;
+	}
+
+	if (cells->seqcount > cells->threshold) {
+		if (cells->over_threshold)
+			new_cell->cancelled = true;
+		else {
+			cells->over_threshold = true;
+			read_cache_cancel_cells(cells, cells->seqcount);
+		}
+	}
+	cells->last_sector = new_cell->sector;
+}
+
+static bool reserve_read_cache_cell(struct wb_device *wb, struct bio *bio)
+{
+	struct per_bio_data *pbd;
+	struct read_cache_cells *cells = wb->read_cache_cells;
+	struct read_cache_cell *found, *new_cell;
+
+	ASSERT(cells->threshold > 0);
+
+	if (!ACCESS_ONCE(wb->read_cache_threshold))
+		return false;
+
+	if (!cells->cursor)
+		return false;
 
 	/*
-	 * First clean up the previous cache and write back the cache if needed.
+	 * We only cache 4KB read data for following reasons:
+	 * 1) Caching partial data (< 4KB) is likely meaningless.
+	 * 2) Caching partial data makes the read-caching mechanism very hard.
 	 */
-	bool needs_writeback_prev_cache = !overwrite_fullsize || !(dirtiness.data_bits == 255);
+	if (!bio_is_fullsize(bio))
+		return false;
 
 	/*
-	 * Writeback works in background and may have cleaned up the metablock.
-	 * If the metablock is clean we don't have to write back.
+	 * We don't need to reserve the same address twice
+	 * because it's either unchanged or invalidated.
 	 */
-	if (!dirtiness.is_dirty)
-		needs_writeback_prev_cache = false;
+	found = lookup_read_cache_cell(wb, bi_sector(bio));
+	if (found)
+		return false;
 
-	if (overwrite_fullsize)
-		needs_writeback_prev_cache = false;
+	cells->cursor--;
+	new_cell = cells->array + cells->cursor;
+	new_cell->sector = bi_sector(bio);
+	read_cache_add(cells, new_cell);
 
-	if (unlikely(needs_writeback_prev_cache)) {
-		wait_for_flushing(wb, seg->id);
-		BUG_ON(!dirtiness.is_dirty);
-		writeback_mb(wb, seg, old_mb, dirtiness.data_bits, true);
-	}
+	pbd = per_bio_data(wb, bio);
+	pbd->type = PBD_WILL_CACHE;
+	pbd->cell_idx = cells->cursor;
 
-	if (mark_clean_mb(wb, old_mb))
-		dec_nr_dirty_caches(wb);
+	/* Cancel the new_cell if needed */
+	read_cache_cancel_foreground(cells, new_cell);
 
-	ht_del(wb, old_mb);
+	return true;
 }
 
-/*----------------------------------------------------------------------------*/
+static void might_cancel_read_cache_cell(struct wb_device *wb, struct bio *bio)
+{
+	struct read_cache_cell *found;
+	found = lookup_read_cache_cell(wb, calc_cache_alignment(bi_sector(bio)));
+	if (found)
+		found->cancelled = true;
+}
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
-#define bv_vec struct bio_vec
-#define bv_page(vec) vec.bv_page
-#define bv_offset(vec) vec.bv_offset
-#define bv_len(vec) vec.bv_len
-#define bv_it struct bvec_iter
-#else
-#define bv_vec struct bio_vec *
-#define bv_page(vec) vec->bv_page
-#define bv_offset(vec) vec->bv_offset
-#define bv_len(vec) vec->bv_len
-#define bv_it int
-#endif
+static void read_cache_cell_copy_data(struct wb_device *wb, struct bio *bio, unsigned long error)
+{
+	struct per_bio_data *pbd = per_bio_data(wb, bio);
+	struct read_cache_cells *cells = wb->read_cache_cells;
+	struct read_cache_cell *cell = cells->array + pbd->cell_idx;
+
+	ASSERT(pbd->type == PBD_WILL_CACHE);
+
+	/* Data can be broken. So don't stage. */
+	if (error)
+		cell->cancelled = true;
+
+	/*
+	 * We can omit copying if the cell is cancelled but
+	 * copying for a non-cancelled cell isn't problematic.
+	 */
+	if (!cell->cancelled)
+		copy_bio_payload(cell->data, bio);
+
+	if (atomic_dec_and_test(&cells->ack_count))
+		queue_work(cells->wq, &wb->read_cache_work);
+}
 
 /*
- * Incoming bio may have multiple bio vecs as a result bvec merging.
- * We shouldn't use bio_data directly to access to whole payload but
- * should iterate over the vector.
+ * Get a read cache cell through simplified write path if the cell data isn't stale.
  */
-static void copy_bio_payload(void *buf, struct bio *bio)
+static void inject_read_cache(struct wb_device *wb, struct read_cache_cell *cell)
 {
-	bv_vec vec;
-	bv_it it;
-	bio_for_each_segment(vec, bio, it) {
-		size_t l = bv_len(vec);
-		memcpy(buf, page_address(bv_page(vec)) + bv_offset(vec), l);
-		buf += l;
+	struct metablock *mb;
+	u32 _mb_idx_inseg;
+	struct segment_header *seg;
+
+	struct lookup_key key = {
+		.sector = cell->sector,
+	};
+	struct ht_head *head = ht_get_head(wb, &key);
+
+	mutex_lock(&wb->io_lock);
+	/*
+	 * if might_cancel_read_cache_cell() on the foreground
+	 * cancelled this cell, the data is now stale.
+	 */
+	if (cell->cancelled) {
+		mutex_unlock(&wb->io_lock);
+		return;
 	}
+
+	might_queue_current_buffer(wb);
+
+	seg = wb->current_seg;
+	_mb_idx_inseg = mb_idx_inseg(wb, advance_cursor(wb));
+
+	/*
+	 * We should copy the cell data into the rambuf with lock held
+	 * otherwise subsequent write data may be written first and then overwritten by
+	 * the old data in the cell.
+	 */
+	memcpy(wb->current_rambuf->data + ((_mb_idx_inseg + 1) << 12), cell->data, 1 << 12);
+
+	mb = seg->mb_array + _mb_idx_inseg;
+	ASSERT(!mb->dirtiness.is_dirty);
+	mb->dirtiness.data_bits = 255;
+
+	ht_register(wb, head, mb, &key);
+
+	mutex_unlock(&wb->io_lock);
+
+	dec_inflight_ios(wb, seg);
 }
 
-static void write_on_rambuffer(struct wb_device *wb, struct metablock *write_pos, struct bio *bio)
+static void free_read_cache_cell_data(struct read_cache_cells *cells)
 {
-	sector_t start_sector = ((mb_idx_inseg(wb, write_pos->idx) + 1) << 3) + io_offset(bio);
-	size_t start_byte = start_sector << SECTOR_SHIFT;
-	copy_bio_payload(wb->current_rambuf->data + start_byte, bio);
+	u32 i;
+	for (i = 0; i < cells->size; i++) {
+		struct read_cache_cell *cell = cells->array + i;
+		vfree(cell->data);
+	}
 }
 
-/*
- * Advance the cursor and return the old cursor.
- * After returned, nr_inflight_ios is incremented to wait for this write to complete.
- */
-static u32 advance_cursor(struct wb_device *wb)
+static struct read_cache_cells *alloc_read_cache_cells(struct wb_device *wb, u32 n)
 {
-	u32 old;
-	if (wb->cursor == wb->nr_caches)
-		wb->cursor = 0;
-	old = wb->cursor;
-	wb->cursor++;
-	wb->current_seg->length++;
-	BUG_ON(wb->current_seg->length > wb->nr_caches_inseg);
-	atomic_inc(&wb->current_seg->nr_inflight_ios);
-	return old;
+	struct read_cache_cells *cells;
+	u32 i;
+	cells = kmalloc(sizeof(struct read_cache_cells), GFP_KERNEL);
+	if (!cells)
+		return NULL;
+
+	cells->size = n;
+	cells->threshold = UINT_MAX; /* Default: every read will be cached */
+	cells->last_sector = ~0;
+	cells->seqcount = 0;
+	cells->over_threshold = false;
+	cells->array = kmalloc(sizeof(struct read_cache_cell) * n, GFP_KERNEL);
+	if (!cells->array)
+		goto bad_cells_array;
+
+	for (i = 0; i < cells->size; i++) {
+		struct read_cache_cell *cell = cells->array + i;
+		cell->data = vmalloc(1 << 12);
+		if (!cell->data) {
+			u32 j;
+			for (j = 0; j < i; j++) {
+				cell = cells->array + j;
+				vfree(cell->data);
+			}
+			goto bad_cell_data;
+		}
+	}
+
+	cells->wq = create_singlethread_workqueue("dmwb_read_cache");
+	if (!cells->wq)
+		goto bad_wq;
+
+	return cells;
+
+bad_wq:
+	free_read_cache_cell_data(cells);
+bad_cell_data:
+	kfree(cells->array);
+bad_cells_array:
+	kfree(cells);
+	return NULL;
 }
 
-static bool needs_queue_seg(struct wb_device *wb)
+static void free_read_cache_cells(struct wb_device *wb)
 {
-	bool rambuf_no_space = !mb_idx_inseg(wb, wb->cursor);
-	return rambuf_no_space;
+	struct read_cache_cells *cells = wb->read_cache_cells;
+	destroy_workqueue(cells->wq); /* This drains wq. So, must precede the others */
+	free_read_cache_cell_data(cells);
+	kfree(cells->array);
+	kfree(cells);
 }
 
-/*
- * queue_current_buffer if the RAM buffer can't make space any more.
- */
-static void might_queue_current_buffer(struct wb_device *wb)
+static void reinit_read_cache_cells(struct wb_device *wb)
 {
-	if (needs_queue_seg(wb))
-		queue_current_buffer(wb);
+	struct read_cache_cells *cells = wb->read_cache_cells;
+	u32 i, cur_threshold;
+
+	mutex_lock(&wb->io_lock);
+	cells->rb_root = RB_ROOT;
+	cells->cursor = cells->size;
+	atomic_set(&cells->ack_count, cells->size);
+	for (i = 0; i < cells->size; i++) {
+		struct read_cache_cell *cell = cells->array + i;
+		cell->cancelled = false;
+	}
+	cur_threshold = ACCESS_ONCE(wb->read_cache_threshold);
+	if (cur_threshold && (cur_threshold != cells->threshold)) {
+		cells->threshold = cur_threshold;
+		cells->over_threshold = false;
+	}
+	mutex_unlock(&wb->io_lock);
 }
 
 /*
- * Process bio with REQ_DISCARD
- * We only discard sectors on only the backing store because blocks on cache
- * device are unlikely to be discarded. As discarding blocks is likely to be
- * operated long after writing the block is likely to be written back before that.
+ * Cancel cells [first, last)
  */
-static int process_discard_bio(struct wb_device *wb, struct bio *bio)
+static void visit_and_cancel_cells(struct rb_node *first, struct rb_node *last)
 {
-	bio_remap(bio, wb->backing_dev, bi_sector(bio));
-	return DM_MAPIO_REMAPPED;
+	struct rb_node *rbp = first;
+	while (rbp != last) {
+		struct read_cache_cell *cell = read_cache_cell_from_node(rbp);
+		cell->cancelled = true;
+		rbp = rb_next(rbp);
+	}
 }
 
 /*
- * Process bio with REQ_FLUSH
+ * Find out sequence from cells and cancel them if larger than threshold.
  */
-static int process_flush_bio(struct wb_device *wb, struct bio *bio)
+static void read_cache_cancel_background(struct read_cache_cells *cells)
 {
-	/* In device-mapper bio with REQ_FLUSH is for sure to have no data. */
-	BUG_ON(bi_size(bio));
-	queue_barrier_io(wb, bio);
-	return DM_MAPIO_SUBMITTED;
+	struct rb_node *rbp = rb_first(&cells->rb_root);
+	struct rb_node *seqhead = rbp;
+	sector_t last_sector = ~0;
+	u32 seqcount = 0;
+
+	while (rbp) {
+		struct read_cache_cell *cell = read_cache_cell_from_node(rbp);
+		if (cell->sector == (last_sector + 8))
+			seqcount++;
+		else {
+			if (seqcount > cells->threshold)
+				visit_and_cancel_cells(seqhead, rbp);
+			seqcount = 1;
+			seqhead = rbp;
+		}
+		last_sector = cell->sector;
+		rbp = rb_next(rbp);
+	}
+	if (seqcount > cells->threshold)
+		visit_and_cancel_cells(seqhead, rbp);
 }
 
-struct lookup_result {
-	struct ht_head *head; /* Lookup head used */
-	struct lookup_key key; /* Lookup key used */
+static void read_cache_proc(struct work_struct *work)
+{
+	struct wb_device *wb = container_of(work, struct wb_device, read_cache_work);
+	struct read_cache_cells *cells = wb->read_cache_cells;
+	u32 i;
 
-	struct segment_header *found_seg;
-	struct metablock *found_mb;
+	read_cache_cancel_background(cells);
 
-	bool found; /* Cache hit? */
-	bool on_buffer; /* Is the metablock found on the RAM buffer? */
-};
+	for (i = 0; i < cells->size; i++) {
+		struct read_cache_cell *cell = cells->array + i;
+		inject_read_cache(wb, cell);
+	}
 
-/*
- * Lookup a bio relevant cache data.
- * In case of cache hit, nr_inflight_ios is incremented.
- */
-static void cache_lookup(struct wb_device *wb, struct bio *bio, struct lookup_result *res)
+	reinit_read_cache_cells(wb);
+}
+
+static int init_read_cache_cells(struct wb_device *wb)
 {
-	res->key = (struct lookup_key) {
-		.sector = calc_cache_alignment(bi_sector(bio)),
-	};
-	res->head = ht_get_head(wb, &res->key);
+	struct read_cache_cells *cells;
+	INIT_WORK(&wb->read_cache_work, read_cache_proc);
+	cells = alloc_read_cache_cells(wb, wb->nr_read_cache_cells);
+	if (!cells)
+		return -ENOMEM;
+	wb->read_cache_cells = cells;
+	reinit_read_cache_cells(wb);
+	return 0;
+}
 
-	res->found_mb = ht_lookup(wb, res->head, &res->key);
-	if (res->found_mb) {
-		res->found_seg = mb_to_seg(wb, res->found_mb);
-		atomic_inc(&res->found_seg->nr_inflight_ios);
+/*----------------------------------------------------------------------------*/
+
+static void initialize_write_io(struct write_io *wio, struct bio *bio)
+{
+	u8 offset = bio_calc_offset(bio);
+	sector_t count = bio_sectors(bio);
+	copy_bio_payload(wio->data + (offset << 9), bio);
+	wio->data_bits = to_mask(offset, count);
+}
+
+static void memcpy_masked(void *to, u8 protect_bits, void *from, u8 copy_bits)
+{
+	u8 i;
+	for (i = 0; i < 8; i++) {
+		bool will_copy = copy_bits & (1 << i);
+		bool protected = protect_bits & (1 << i);
+		if (will_copy && (!protected)) {
+			size_t offset = (i << 9);
+			memcpy(to + offset, from + offset, 1 << 9);
+		}
 	}
+}
 
-	res->found = (res->found_mb != NULL);
+int prepare_overwrite(struct wb_device *wb, struct segment_header *seg, struct metablock *old_mb, struct write_io* wio, u8 overwrite_bits)
+{
+	struct dirtiness dirtiness = read_mb_dirtiness(wb, seg, old_mb);
+
+	bool needs_merge_prev_cache = !(overwrite_bits == 255) || !(dirtiness.data_bits == 255);
+
+	if (!dirtiness.is_dirty)
+		needs_merge_prev_cache = false;
+
+	if (overwrite_bits == 255)
+		needs_merge_prev_cache = false;
+
+	if (unlikely(needs_merge_prev_cache)) {
+		void *buf;
+
+		wait_for_flushing(wb, seg->id);
+		ASSERT(dirtiness.is_dirty);
+
+		buf = read_mb(wb, seg, old_mb, dirtiness.data_bits);
+		if (!buf)
+			return -EIO;
+
+		/* newer data should be prioritized */
+		memcpy_masked(wio->data, wio->data_bits, buf, dirtiness.data_bits);
+		wio->data_bits |= dirtiness.data_bits;
+		mempool_free(buf, wb->buf_8_pool);
+	}
+
+	if (mark_clean_mb(wb, old_mb))
+		dec_nr_dirty_caches(wb);
 
-	res->on_buffer = false;
-	if (res->found)
-		res->on_buffer = is_on_buffer(wb, res->found_mb->idx);
+	ht_del(wb, old_mb);
 
-	inc_stat(wb, io_write(bio), res->found, res->on_buffer, io_fullsize(bio));
+	return 0;
 }
 
 /*
- * Get new place to write.
+ * Get a new place to write.
  */
 static struct metablock *prepare_new_write_pos(struct wb_device *wb)
 {
 	struct metablock *ret = wb->current_seg->mb_array + mb_idx_inseg(wb, advance_cursor(wb));
-	BUG_ON(ret->dirtiness.is_dirty);
+	ASSERT(!ret->dirtiness.is_dirty);
 	ret->dirtiness.data_bits = 0;
-	BUG_ON(ret->dirtiness.data_bits);
 	return ret;
 }
 
-static void dec_inflight_ios(struct wb_device *wb, struct segment_header *seg)
+static void write_on_rambuffer(struct wb_device *wb, struct metablock *write_pos, struct write_io *wio)
 {
-	if (atomic_dec_and_test(&seg->nr_inflight_ios))
-		wake_up_active_wq(&wb->inflight_ios_wq);
+	size_t mb_offset = (mb_idx_inseg(wb, write_pos->idx) + 1) << 12;
+	void *mb_data = wb->current_rambuf->data + mb_offset;
+	if (wio->data_bits == 255)
+		memcpy(mb_data, wio->data, 1 << 12);
+	else
+		memcpy_masked(mb_data, 0, wio->data, wio->data_bits);
 }
 
-static void might_cancel_read_cache_cell(struct wb_device *, struct bio *);
-static struct metablock *prepare_write_pos(struct wb_device *wb, struct bio *bio)
+static int do_process_write(struct wb_device *wb, struct bio *bio)
 {
-	struct metablock *ret;
+	int err = 0;
+
+	struct metablock *write_pos = NULL;
 	struct lookup_result res;
 
+	struct write_io wio;
+	wio.data = mempool_alloc(wb->buf_8_pool, GFP_NOIO);
+	if (!wio.data)
+		return -ENOMEM;
+	initialize_write_io(&wio, bio);
+
 	mutex_lock(&wb->io_lock);
 
 	cache_lookup(wb, bio, &res);
+
 	if (res.found) {
 		if (unlikely(res.on_buffer)) {
-			/* Overwrite on the ram buffer */
-			mutex_unlock(&wb->io_lock);
-			return res.found_mb;
+			write_pos = res.found_mb;
+			goto do_write;
 		} else {
-			/*
-			 * Invalidate the old cache on the cache device because
-			 * we can't overwrite cache block on the cache device.
-			 */
-			prepare_overwrite(wb, res.found_seg, res.found_mb, io_fullsize(bio));
+			err = prepare_overwrite(wb, res.found_seg, res.found_mb, &wio, wio.data_bits);
 			dec_inflight_ios(wb, res.found_seg);
+			if (err)
+				goto out;
 		}
 	} else
 		might_cancel_read_cache_cell(wb, bio);
 
-	/* Write on a new position on the ram buffer */
-
 	might_queue_current_buffer(wb);
 
-	ret = prepare_new_write_pos(wb);
+	write_pos = prepare_new_write_pos(wb);
 
-	ht_register(wb, res.head, ret, &res.key);
+do_write:
+	ASSERT(write_pos);
+	write_on_rambuffer(wb, write_pos, &wio);
 
-	mutex_unlock(&wb->io_lock);
+	if (taint_mb(wb, write_pos, wio.data_bits))
+		inc_nr_dirty_caches(wb);
 
-	return ret;
+	ht_register(wb, res.head, write_pos, &res.key);
+
+out:
+	mutex_unlock(&wb->io_lock);
+	mempool_free(wio.data, wb->buf_8_pool);
+	return err;
 }
 
-/*
- * Write bio data to RAM buffer.
- */
-static int do_process_write(struct wb_device *wb, struct metablock *write_pos, struct bio *bio)
+static int complete_process_write(struct wb_device *wb, struct bio *bio)
 {
-	if (taint_mb(wb, write_pos, bio))
-		inc_nr_dirty_caches(wb);
-
-	write_on_rambuffer(wb, write_pos, bio);
-
 	dec_inflight_ios(wb, wb->current_seg);
 
 	/*
-	 * bio with REQ_FUA has data.
-	 * For such bio, we first treat it like a normal bio and then as a REQ_FLUSH bio.
+	 * bio with FUA flag has data.
+	 * We first handle it as a normal write bio and then as a barrier bio.
 	 */
-	if (bio->bi_rw & REQ_FUA) {
+	if (bio_is_fua(bio)) {
 		queue_barrier_io(wb, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
 
-	if (is_live(wb))
-		bio_endio_compat(bio, 0);
-	else
-		bio_endio_compat(bio, -EIO);
-
+	bio_endio_compat(bio, 0);
 	return DM_MAPIO_SUBMITTED;
 }
 
@@ -837,22 +1210,24 @@
  * 2) Wait for decrement outside the lock
  *
  * process_write:
- *   prepare_write_pos:
+ *   do_process_write:
  *     mutex_lock (to serialize write)
  *       inc in_flight_ios # refcount on the dst segment
  *     mutex_unlock
  *
- *   do_process_write:
+ *   complete_process_write:
  *     dec in_flight_ios
  *     bio_endio(bio)
  */
 static int process_write_wb(struct wb_device *wb, struct bio *bio)
 {
-	struct metablock *write_pos = prepare_write_pos(wb, bio);
-	return do_process_write(wb, write_pos, bio);
+	int err = do_process_write(wb, bio);
+	if (err)
+		return err;
+	return complete_process_write(wb, bio);
 }
 
-static int process_write_wt(struct wb_device *wb, struct bio *bio)
+static int process_write_wa(struct wb_device *wb, struct bio *bio)
 {
 	struct lookup_result res;
 
@@ -872,499 +1247,241 @@
 
 static int process_write(struct wb_device *wb, struct bio *bio)
 {
-	return wb->write_through_mode ? process_write_wt(wb, bio) : process_write_wb(wb, bio);
+	return wb->write_around_mode ? process_write_wa(wb, bio) : process_write_wb(wb, bio);
 }
 
-enum PBD_FLAG {
-	PBD_NONE = 0,
-	PBD_WILL_CACHE = 1,
-	PBD_READ_SEG = 2,
-};
-
-struct per_bio_data {
-	enum PBD_FLAG type;
-	union {
-		u32 cell_idx;
-		struct segment_header *seg;
-	};
+struct read_backing_async_context {
+	struct wb_device *wb;
+	struct bio *bio;
 };
-#define per_bio_data(wb, bio) ((struct per_bio_data *)dm_per_bio_data((bio), (wb)->ti->per_bio_data_size))
-
-static void reserve_read_cache_cell(struct wb_device *, struct bio *);
-static int process_read(struct wb_device *wb, struct bio *bio)
-{
-	struct lookup_result res;
-	struct dirtiness dirtiness;
-
-	mutex_lock(&wb->io_lock);
-	cache_lookup(wb, bio, &res);
-	if (!res.found)
-		reserve_read_cache_cell(wb, bio);
-	mutex_unlock(&wb->io_lock);
-
-	if (!res.found) {
-		bio_remap(bio, wb->backing_dev, bi_sector(bio));
-		return DM_MAPIO_REMAPPED;
-	}
-
-	dirtiness = read_mb_dirtiness(wb, res.found_seg, res.found_mb);
-	if (unlikely(res.on_buffer)) {
-		if (dirtiness.is_dirty)
-			writeback_buffered_mb(wb, res.found_mb, dirtiness.data_bits);
-
-		dec_inflight_ios(wb, res.found_seg);
-		bio_remap(bio, wb->backing_dev, bi_sector(bio));
-		return DM_MAPIO_REMAPPED;
-	}
-
-	/*
-	 * We need to wait for the segment to be flushed to the cache device.
-	 * Without this, we might read the wrong data from the cache device.
-	 */
-	wait_for_flushing(wb, res.found_seg->id);
-
-	if (likely(dirtiness.data_bits == 255)) {
-		struct per_bio_data *pbd = per_bio_data(wb, bio);
-		pbd->type = PBD_READ_SEG;
-		pbd->seg = res.found_seg;
-
-		bio_remap(bio, wb->cache_dev,
-			  calc_mb_start_sector(wb, res.found_seg, res.found_mb->idx) +
-			  io_offset(bio));
-	} else {
-		if (dirtiness.is_dirty)
-			writeback_mb(wb, res.found_seg, res.found_mb, dirtiness.data_bits, true);
-		if (mark_clean_mb(wb, res.found_mb))
-			dec_nr_dirty_caches(wb);
-		dec_inflight_ios(wb, res.found_seg);
-		bio_remap(bio, wb->backing_dev, bi_sector(bio));
-	}
-
-	if (!is_live(wb))
-		bio_io_error(bio);
-
-	return DM_MAPIO_REMAPPED;
-}
-
-static int process_bio(struct wb_device *wb, struct bio *bio)
-{
-	return io_write(bio) ? process_write(wb, bio) : process_read(wb, bio);
-}
-
-static int writeboost_map(struct dm_target *ti, struct bio *bio)
-{
-	struct wb_device *wb = ti->private;
-
-	struct per_bio_data *pbd = per_bio_data(wb, bio);
-	pbd->type = PBD_NONE;
-
-	if (bio->bi_rw & REQ_DISCARD)
-		return process_discard_bio(wb, bio);
-
-	if (bio->bi_rw & REQ_FLUSH)
-		return process_flush_bio(wb, bio);
-
-	return process_bio(wb, bio);
-}
-
-static void read_cache_cell_copy_data(struct wb_device *, struct bio*, int error);
-static int writeboost_end_io(struct dm_target *ti, struct bio *bio, int error)
-{
-	struct wb_device *wb = ti->private;
-	struct per_bio_data *pbd = per_bio_data(wb, bio);
-
-	switch (pbd->type) {
-	case PBD_NONE:
-		return 0;
-	case PBD_WILL_CACHE:
-		read_cache_cell_copy_data(wb, bio, error);
-		return 0;
-	case PBD_READ_SEG:
-		dec_inflight_ios(wb, pbd->seg);
-		return 0;
-	default:
-		BUG();
-	}
-}
-
-/*----------------------------------------------------------------------------*/
-
-#define read_cache_cell_from_node(node) rb_entry((node), struct read_cache_cell, rb_node)
-
-static void read_cache_add(struct read_cache_cells *cells, struct read_cache_cell *cell)
-{
-	struct rb_node **rbp, *parent;
-	rbp = &cells->rb_root.rb_node;
-	parent = NULL;
-	while (*rbp) {
-		struct read_cache_cell *parent_cell;
-		parent = *rbp;
-		parent_cell = read_cache_cell_from_node(parent);
-		if (cell->sector < parent_cell->sector)
-			rbp = &(*rbp)->rb_left;
-		else
-			rbp = &(*rbp)->rb_right;
-	}
-	rb_link_node(&cell->rb_node, parent, rbp);
-	rb_insert_color(&cell->rb_node, &cells->rb_root);
-}
-
-static struct read_cache_cell *lookup_read_cache_cell(struct wb_device *wb, sector_t sector)
-{
-	struct rb_node **rbp, *parent;
-	rbp = &wb->read_cache_cells->rb_root.rb_node;
-	parent = NULL;
-	while (*rbp) {
-		struct read_cache_cell *parent_cell;
-		parent = *rbp;
-		parent_cell = read_cache_cell_from_node(parent);
-		if (parent_cell->sector == sector)
-			return parent_cell;
-
-		if (sector < parent_cell->sector)
-			rbp = &(*rbp)->rb_left;
-		else
-			rbp = &(*rbp)->rb_right;
-	}
-	return NULL;
-}
-
-static void read_cache_cancel_cells(struct read_cache_cells *cells, u32 n)
-{
-	u32 i;
-	u32 last = cells->cursor + cells->seqcount;
-	if (last > cells->size)
-		last = cells->size;
-	for (i = cells->cursor; i < last; i++) {
-		struct read_cache_cell *cell = cells->array + i;
-		cell->cancelled = true;
-	}
-}
-
-/*
- * Track the forefront read address and cancel cells in case of over threshold.
- * If the cell is cancelled foreground, we can save the memory copy in the background.
- */
-static void read_cache_cancel_foreground(struct read_cache_cells *cells,
-					 struct read_cache_cell *new_cell)
-{
-	if (new_cell->sector == (cells->last_sector + 8))
-		cells->seqcount++;
-	else {
-		cells->seqcount = 1;
-		cells->over_threshold = false;
-	}
-
-	if (cells->seqcount > cells->threshold) {
-		if (cells->over_threshold)
-			new_cell->cancelled = true;
-		else {
-			cells->over_threshold = true;
-			read_cache_cancel_cells(cells, cells->seqcount);
-		}
-	}
-	cells->last_sector = new_cell->sector;
-}
-
-static void reserve_read_cache_cell(struct wb_device *wb, struct bio *bio)
-{
-	struct per_bio_data *pbd;
-	struct read_cache_cells *cells = wb->read_cache_cells;
-	struct read_cache_cell *found, *new_cell;
-
-	BUG_ON(!cells->threshold);
-
-	if (!ACCESS_ONCE(wb->read_cache_threshold))
-		return;
-
-	if (!cells->cursor)
-		return;
-
-	/*
-	 * We only cache 4KB read data for following reasons:
-	 * 1) Caching partial data (< 4KB) is likely meaningless.
-	 * 2) Caching partial data makes the read-caching mechanism very hard.
-	 */
-	if (!io_fullsize(bio))
-		return;
-
-	/*
-	 * We don't need to reserve the same address twice
-	 * because it's either unchanged or invalidated.
-	 */
-	found = lookup_read_cache_cell(wb, bi_sector(bio));
-	if (found)
-		return;
-
-	cells->cursor--;
-	new_cell = cells->array + cells->cursor;
-	new_cell->sector = bi_sector(bio);
-	read_cache_add(cells, new_cell);
-
-	pbd = per_bio_data(wb, bio);
-	pbd->type = PBD_WILL_CACHE;
-	pbd->cell_idx = cells->cursor;
-
-	/* Cancel the new_cell if needed */
-	read_cache_cancel_foreground(cells, new_cell);
-}
 
-static void might_cancel_read_cache_cell(struct wb_device *wb, struct bio *bio)
+static void read_backing_async_callback_onstack(unsigned long error, struct read_backing_async_context *ctx)
 {
-	struct read_cache_cell *found;
-	found = lookup_read_cache_cell(wb, calc_cache_alignment(bi_sector(bio)));
-	if (found)
-		found->cancelled = true;
-}
+	ASSERT(bio_is_fullsize(ctx->bio));
 
-static void read_cache_cell_copy_data(struct wb_device *wb, struct bio *bio, int error)
-{
-	struct per_bio_data *pbd = per_bio_data(wb, bio);
-	struct read_cache_cells *cells = wb->read_cache_cells;
-	struct read_cache_cell *cell = cells->array + pbd->cell_idx;
+	read_cache_cell_copy_data(ctx->wb, ctx->bio, error);
 
-	/* Data can be broken. So don't stage. */
 	if (error)
-		cell->cancelled = true;
-
-	/*
-	 * We can omit copying if the cell is cancelled but
-	 * copying for a non-cancelled cell isn't problematic.
-	 */
-	if (!cell->cancelled)
-		copy_bio_payload(cell->data, bio);
-
-	if (atomic_dec_and_test(&cells->ack_count))
-		queue_work(cells->wq, &wb->read_cache_work);
+		bio_io_error(ctx->bio);
+	else
+		bio_endio_compat(ctx->bio, 0);
 }
 
-/*
- * Get a read cache cell through simplified write path if the cell data isn't stale.
- */
-static void inject_read_cache(struct wb_device *wb, struct read_cache_cell *cell)
+static void read_backing_async_callback(unsigned long error, void *context)
 {
-	struct metablock *mb;
-	u32 _mb_idx_inseg;
-	struct ht_head *head;
-	struct segment_header *seg;
+	struct read_backing_async_context *ctx = context;
+	read_backing_async_callback_onstack(error, ctx);
+	kfree(ctx);
+}
 
-	struct lookup_key key = {
-		.sector = cell->sector,
-	};
+static int read_backing_async(struct wb_device *wb, struct bio *bio)
+{
+	int err = 0;
 
-	mutex_lock(&wb->io_lock);
-	/*
-	 * if might_cancel_read_cache_cell() on the foreground
-	 * cancelled this cell, the data is now stale.
-	 */
-	if (cell->cancelled) {
-		mutex_unlock(&wb->io_lock);
-		return;
-	}
+	struct dm_io_request io_req;
+	struct dm_io_region region;
 
-	/*
-	 * FIXME Why do we need to double-check here?
-	 */
-	head = ht_get_head(wb, &key);
-	mb = ht_lookup(wb, head, &key);
-	if (unlikely(mb)) {
-		mutex_unlock(&wb->io_lock);
-		return;
-	}
+	struct read_backing_async_context *ctx = kmalloc(sizeof(struct read_backing_async_context), GFP_NOIO);
+	if (!ctx)
+		return -ENOMEM;
 
-	might_queue_current_buffer(wb);
+	ctx->wb = wb;
+	ctx->bio = bio;
 
-	seg = wb->current_seg;
-	_mb_idx_inseg = mb_idx_inseg(wb, advance_cursor(wb));
-	mb = seg->mb_array + _mb_idx_inseg;
-	BUG_ON(mb->dirtiness.is_dirty);
-	mb->dirtiness.data_bits = 255;
+	ASSERT(bio_is_fullsize(bio));
 
-	ht_register(wb, head, mb, &key);
+	io_req = (struct dm_io_request) {
+		WB_IO_READ,
+		.client = wb->io_client,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0)
+		.mem.type = DM_IO_BIO,
+		.mem.ptr.bio = bio,
+#else
+		.mem.type = DM_IO_BVEC,
+		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
+#endif
+		.notify.fn = read_backing_async_callback,
+		.notify.context = ctx
+	};
+	region = (struct dm_io_region) {
+		.bdev = wb->backing_dev->bdev,
+		.sector = bi_sector(bio),
+		.count = 8
+	};
 
-	mutex_unlock(&wb->io_lock);
+	err = wb_io(&io_req, 1, &region, NULL, false);
+	if (err)
+		kfree(ctx);
 
-	memcpy(wb->current_rambuf->data + ((_mb_idx_inseg + 1) << 12), cell->data, 1 << 12);
-	dec_inflight_ios(wb, seg);
+	return err;
 }
 
-static void free_read_cache_cell_data(struct read_cache_cells *cells)
+static int process_read(struct wb_device *wb, struct bio *bio)
 {
-	u32 i;
-	for (i = 0; i < cells->size; i++) {
-		struct read_cache_cell *cell = cells->array + i;
-		kfree(cell->data);
-	}
-}
+	struct lookup_result res;
+	struct dirtiness dirtiness;
+	struct per_bio_data *pbd;
 
-static struct read_cache_cells *alloc_read_cache_cells(struct wb_device *wb, u32 n)
-{
-	struct read_cache_cells *cells;
-	u32 i;
-	cells = kmalloc(sizeof(struct read_cache_cells), GFP_KERNEL);
-	if (!cells)
-		return NULL;
+	bool reserved = false;
 
-	cells->size = n;
-	cells->threshold = UINT_MAX; /* Default: every read will be cached */
-	cells->last_sector = ~0;
-	cells->seqcount = 0;
-	cells->over_threshold = false;
-	cells->array = kmalloc(sizeof(struct read_cache_cell) * n, GFP_KERNEL);
-	if (!cells->array)
-		goto bad_cells_array;
+	mutex_lock(&wb->io_lock);
+	cache_lookup(wb, bio, &res);
+	if (!res.found)
+		reserved = reserve_read_cache_cell(wb, bio);
+	mutex_unlock(&wb->io_lock);
 
-	for (i = 0; i < cells->size; i++) {
-		struct read_cache_cell *cell = cells->array + i;
-		cell->data = kmalloc(1 << 12, GFP_KERNEL);
-		if (!cell->data) {
-			u32 j;
-			for (j = 0; j < i; j++) {
-				cell = cells->array + j;
-				kfree(cell->data);
+	if (!res.found) {
+		if (reserved) {
+			/*
+			 * Remapping clone bio to the backing store leads to
+			 * empty payload in clone_endio().
+			 * To avoid caching junk data, we need this workaround
+			 * to call dm_io() to certainly fill the bio payload.
+			 */
+			if (read_backing_async(wb, bio)) {
+				struct read_backing_async_context ctx = {
+					.wb = wb,
+					.bio = bio
+				};
+				read_backing_async_callback_onstack(1, &ctx);
 			}
-			goto bad_cell_data;
+			return DM_MAPIO_SUBMITTED;
+		} else {
+			bio_remap(bio, wb->backing_dev, bi_sector(bio));
+			return DM_MAPIO_REMAPPED;
 		}
 	}
 
-	cells->wq = create_singlethread_workqueue("dmwb_read_cache");
-	if (!cells->wq)
-		goto bad_wq;
+	dirtiness = read_mb_dirtiness(wb, res.found_seg, res.found_mb);
+	if (unlikely(res.on_buffer)) {
+		int err = fill_payload_by_backing(wb, bio);
+		if (err)
+			goto read_buffered_mb_exit;
 
-	return cells;
+		if (dirtiness.is_dirty)
+			copy_to_bio_payload(bio, ref_buffered_mb(wb, res.found_mb), dirtiness.data_bits);
 
-bad_wq:
-	free_read_cache_cell_data(cells);
-bad_cell_data:
-	kfree(cells->array);
-bad_cells_array:
-	kfree(cells);
-	return NULL;
-}
+read_buffered_mb_exit:
+		dec_inflight_ios(wb, res.found_seg);
 
-static void free_read_cache_cells(struct wb_device *wb)
-{
-	struct read_cache_cells *cells = wb->read_cache_cells;
-	destroy_workqueue(cells->wq); /* This drains wq. So, must precede the others */
-	free_read_cache_cell_data(cells);
-	kfree(cells->array);
-	kfree(cells);
-}
+		if (unlikely(err))
+			bio_io_error(bio);
+		else
+			bio_endio_compat(bio, 0);
 
-static void reinit_read_cache_cells(struct wb_device *wb)
-{
-	struct read_cache_cells *cells = wb->read_cache_cells;
-	u32 i, cur_threshold;
-	for (i = 0; i < cells->size; i++) {
-		struct read_cache_cell *cell = cells->array + i;
-		cell->cancelled = false;
+		return DM_MAPIO_SUBMITTED;
 	}
-	atomic_set(&cells->ack_count, cells->size);
 
-	mutex_lock(&wb->io_lock);
-	cells->rb_root = RB_ROOT;
-	cells->cursor = cells->size;
-	cur_threshold = ACCESS_ONCE(wb->read_cache_threshold);
-	if (cur_threshold && (cur_threshold != cells->threshold)) {
-		cells->threshold = cur_threshold;
-		cells->over_threshold = false;
+	/*
+	 * We need to wait for the segment to be flushed to the cache device.
+	 * Without this, we might read the wrong data from the cache device.
+	 */
+	wait_for_flushing(wb, res.found_seg->id);
+
+	if (unlikely(dirtiness.data_bits != 255)) {
+		int err = fill_payload_by_backing(wb, bio);
+		if (err)
+			goto read_mb_exit;
+
+		if (dirtiness.is_dirty) {
+			void *buf = read_mb(wb, res.found_seg, res.found_mb, dirtiness.data_bits);
+			if (!buf) {
+				err = -EIO;
+				goto read_mb_exit;
+			}
+			copy_to_bio_payload(bio, buf, dirtiness.data_bits);
+			mempool_free(buf, wb->buf_8_pool);
+		}
+
+read_mb_exit:
+		dec_inflight_ios(wb, res.found_seg);
+
+		if (unlikely(err))
+			bio_io_error(bio);
+		else
+			bio_endio_compat(bio, 0);
+
+		return DM_MAPIO_SUBMITTED;
 	}
-	mutex_unlock(&wb->io_lock);
+
+	pbd = per_bio_data(wb, bio);
+	pbd->type = PBD_READ_SEG;
+	pbd->seg = res.found_seg;
+
+	bio_remap(bio, wb->cache_dev,
+		  calc_mb_start_sector(wb, res.found_seg, res.found_mb->idx) +
+		  bio_calc_offset(bio));
+
+	return DM_MAPIO_REMAPPED;
 }
 
-/*
- * Cancel cells [first, last)
- */
-static void visit_and_cancel_cells(struct rb_node *first, struct rb_node *last)
+static int process_bio(struct wb_device *wb, struct bio *bio)
 {
-	struct rb_node *rbp = first;
-	while (rbp != last) {
-		struct read_cache_cell *cell = read_cache_cell_from_node(rbp);
-		cell->cancelled = true;
-		rbp = rb_next(rbp);
-	}
+	return bio_is_write(bio) ? process_write(wb, bio) : process_read(wb, bio);
 }
 
-/*
- * Find out sequence from cells and cancel them if larger than threshold.
- */
-static void read_cache_cancel_background(struct read_cache_cells *cells)
+static int process_barrier_bio(struct wb_device *wb, struct bio *bio)
 {
-	struct rb_node *rbp = rb_first(&cells->rb_root);
-	struct rb_node *seqhead = rbp;
-	sector_t last_sector = ~0;
-	u32 seqcount = 0;
-
-	while (rbp) {
-		struct read_cache_cell *cell = read_cache_cell_from_node(rbp);
-		if (cell->sector == (last_sector + 8))
-			seqcount++;
-		else {
-			if (seqcount > cells->threshold)
-				visit_and_cancel_cells(seqhead, rbp);
-			seqcount = 1;
-			seqhead = rbp;
-		}
-		last_sector = cell->sector;
-		rbp = rb_next(rbp);
-	}
-	if (seqcount > cells->threshold)
-		visit_and_cancel_cells(seqhead, rbp);
+	/* barrier bio doesn't have data */
+	ASSERT(bio_sectors(bio) == 0);
+	queue_barrier_io(wb, bio);
+	return DM_MAPIO_SUBMITTED;
 }
 
-static void read_cache_proc(struct work_struct *work)
+static int writeboost_map(struct dm_target *ti, struct bio *bio)
 {
-	struct wb_device *wb = container_of(work, struct wb_device, read_cache_work);
-	struct read_cache_cells *cells = wb->read_cache_cells;
-	u32 i;
+	struct wb_device *wb = ti->private;
 
-	read_cache_cancel_background(cells);
+	struct per_bio_data *pbd = per_bio_data(wb, bio);
+	pbd->type = PBD_NONE;
 
-	for (i = 0; i < cells->size; i++) {
-		struct read_cache_cell *cell = cells->array + i;
-		inject_read_cache(wb, cell);
-	}
-	reinit_read_cache_cells(wb);
+	if (bio_is_barrier(bio))
+		return process_barrier_bio(wb, bio);
+
+	return process_bio(wb, bio);
 }
 
-static int init_read_cache_cells(struct wb_device *wb)
+static int writeboost_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
-	struct read_cache_cells *cells;
-	INIT_WORK(&wb->read_cache_work, read_cache_proc);
-	cells = alloc_read_cache_cells(wb, 2048); /* 8MB */
-	if (!cells)
-		return -ENOMEM;
-	wb->read_cache_cells = cells;
-	reinit_read_cache_cells(wb);
-	return 0;
-}
+	struct wb_device *wb = ti->private;
+	struct per_bio_data *pbd = per_bio_data(wb, bio);
 
-/*----------------------------------------------------------------------------*/
+	switch (pbd->type) {
+	case PBD_NONE:
+	case PBD_WILL_CACHE:
+		return 0;
+	case PBD_READ_SEG:
+		dec_inflight_ios(wb, pbd->seg);
+		return 0;
+	default:
+		BUG();
+	}
+}
 
 static int consume_essential_argv(struct wb_device *wb, struct dm_arg_set *as)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_target *ti = wb->ti;
 
-	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
-			  &wb->backing_dev);
-	if (r) {
+	err = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
+			    &wb->backing_dev);
+	if (err) {
 		DMERR("Failed to get backing_dev");
-		return r;
+		return err;
 	}
 
-	r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
-			  &wb->cache_dev);
-	if (r) {
+	err = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
+			    &wb->cache_dev);
+	if (err) {
 		DMERR("Failed to get cache_dev");
 		goto bad_get_cache;
 	}
 
-	return r;
+	return err;
 
 bad_get_cache:
 	dm_put_device(ti, wb->backing_dev);
-	return r;
+	return err;
 }
 
 #define consume_kv(name, nr, is_static) { \
@@ -1375,8 +1492,8 @@
 			DMERR("%s is a static option", #name); \
 			break; \
 		} \
-		r = dm_read_arg(_args + (nr), as, &tmp, &ti->error); \
-		if (r) { \
+		err = dm_read_arg(_args + (nr), as, &tmp, &ti->error); \
+		if (err) { \
 			DMERR("%s", ti->error); \
 			break; \
 		} \
@@ -1385,7 +1502,7 @@
 
 static int do_consume_optional_argv(struct wb_device *wb, struct dm_arg_set *as, unsigned argc)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_target *ti = wb->ti;
 
 	static struct dm_arg _args[] = {
@@ -1394,7 +1511,8 @@
 		{0, 3600, "Invalid update_sb_record_interval"},
 		{0, 3600, "Invalid sync_data_interval"},
 		{0, 127, "Invalid read_cache_threshold"},
-		{0, 1, "Invalid write_through_mode"},
+		{0, 1, "Invalid write_around_mode"},
+		{1, 2048, "Invalid nr_read_cache_cells"},
 	};
 	unsigned tmp;
 
@@ -1402,16 +1520,17 @@
 		const char *key = dm_shift_arg(as);
 		argc--;
 
-		r = -EINVAL;
+		err = -EINVAL;
 
 		consume_kv(writeback_threshold, 0, false);
 		consume_kv(nr_max_batched_writeback, 1, false);
 		consume_kv(update_sb_record_interval, 2, false);
 		consume_kv(sync_data_interval, 3, false);
 		consume_kv(read_cache_threshold, 4, false);
-		consume_kv(write_through_mode, 5, true);
+		consume_kv(write_around_mode, 5, true);
+		consume_kv(nr_read_cache_cells, 6, true);
 
-		if (!r) {
+		if (!err) {
 			argc--;
 		} else {
 			ti->error = "Invalid optional key";
@@ -1419,24 +1538,24 @@
 		}
 	}
 
-	return r;
+	return err;
 }
 
 static int consume_optional_argv(struct wb_device *wb, struct dm_arg_set *as)
 {
-	int r = 0;
+	int err = 0;
 	struct dm_target *ti = wb->ti;
 
 	static struct dm_arg _args[] = {
-		{0, 12, "Invalid optional argc"},
+		{0, 14, "Invalid optional argc"},
 	};
 	unsigned argc = 0;
 
 	if (as->argc) {
-		r = dm_read_arg_group(_args, as, &argc, &ti->error);
-		if (r) {
+		err = dm_read_arg_group(_args, as, &argc, &ti->error);
+		if (err) {
 			DMERR("%s", ti->error);
-			return r;
+			return err;
 		}
 	}
 
@@ -1448,20 +1567,30 @@
 
 static int init_core_struct(struct dm_target *ti)
 {
-	int r = 0;
+	int err = 0;
 	struct wb_device *wb;
 
-	r = dm_set_target_max_io_len(ti, 1 << 3);
-	if (r) {
+	err = dm_set_target_max_io_len(ti, 1 << 3);
+	if (err) {
 		DMERR("Failed to set max_io_len");
-		return r;
+		return err;
 	}
 
-	ti->flush_supported = true;
 	ti->num_flush_bios = 1;
-	ti->num_discard_bios = 1;
-	ti->discard_zeroes_data_unsupported = true;
-	ti->per_bio_data_size = sizeof(struct per_bio_data);
+	ti->flush_supported = true;
+
+	/*
+	 * dm-writeboost does't support TRIM
+	 *
+	 * https://github.com/akiradeveloper/dm-writeboost/issues/110
+	 * - discarding backing data only violates DRAT
+	 * - strictly discarding both cache blocks and backing data is nearly impossible
+	 *   considering cache hits may occur partially.
+	 */
+	ti->num_discard_bios = 0;
+	ti->discards_supported = false;
+
+	ti->PER_BIO_DATA_SIZE = sizeof(struct per_bio_data);
 
 	wb = kzalloc(sizeof(*wb), GFP_KERNEL);
 	if (!wb) {
@@ -1471,48 +1600,47 @@
 	ti->private = wb;
 	wb->ti = ti;
 
-	init_waitqueue_head(&wb->writeback_mb_wait_queue);
 	wb->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
 	if (IS_ERR(wb->copier)) {
-		r = PTR_ERR(wb->copier);
+		err = PTR_ERR(wb->copier);
 		goto bad_kcopyd_client;
 	}
 
 	wb->buf_1_cachep = kmem_cache_create("dmwb_buf_1",
-			1 << 9, 1 << SECTOR_SHIFT, SLAB_RED_ZONE, NULL);
+			1 << 9, 1 << 9, SLAB_RED_ZONE, NULL);
 	if (!wb->buf_1_cachep) {
-		r = -ENOMEM;
+		err = -ENOMEM;
 		goto bad_buf_1_cachep;
 	}
 	wb->buf_1_pool = mempool_create_slab_pool(16, wb->buf_1_cachep);
 	if (!wb->buf_1_pool) {
-		r = -ENOMEM;
+		err = -ENOMEM;
 		goto bad_buf_1_pool;
 	}
 
 	wb->buf_8_cachep = kmem_cache_create("dmwb_buf_8",
 			1 << 12, 1 << 12, SLAB_RED_ZONE, NULL);
 	if (!wb->buf_8_cachep) {
-		r = -ENOMEM;
+		err = -ENOMEM;
 		goto bad_buf_8_cachep;
 	}
 	wb->buf_8_pool = mempool_create_slab_pool(16, wb->buf_8_cachep);
 	if (!wb->buf_8_pool) {
-		r = -ENOMEM;
+		err = -ENOMEM;
 		goto bad_buf_8_pool;
 	}
 
 	wb->io_wq = create_singlethread_workqueue("dmwb_io");
 	if (!wb->io_wq) {
 		DMERR("Failed to allocate io_wq");
-		r = -ENOMEM;
+		err = -ENOMEM;
 		goto bad_io_wq;
 	}
 
 	wb->io_client = dm_io_client_create();
 	if (IS_ERR(wb->io_client)) {
 		DMERR("Failed to allocate io_client");
-		r = PTR_ERR(wb->io_client);
+		err = PTR_ERR(wb->io_client);
 		goto bad_io_client;
 	}
 
@@ -1520,10 +1648,9 @@
 	init_waitqueue_head(&wb->inflight_ios_wq);
 	spin_lock_init(&wb->mb_lock);
 	atomic64_set(&wb->nr_dirty_caches, 0);
-	clear_bit(WB_DEAD, &wb->flags);
 	clear_bit(WB_CREATED, &wb->flags);
 
-	return r;
+	return err;
 
 bad_io_client:
 	destroy_workqueue(wb->io_wq);
@@ -1539,7 +1666,7 @@
 	dm_kcopyd_client_destroy(wb->copier);
 bad_kcopyd_client:
 	kfree(wb);
-	return r;
+	return err;
 }
 
 static void free_core_struct(struct wb_device *wb)
@@ -1586,6 +1713,9 @@
 	kfree(wb->ctr_args);
 }
 
+#define save_arg(name) wb->name##_saved = wb->name
+#define restore_arg(name) if (wb->name##_saved) { wb->name = wb->name##_saved; }
+
 /*
  * Create a writeboost device
  *
@@ -1597,47 +1727,55 @@
   */
 static int writeboost_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
-	int r = 0;
+	int err = 0;
 	struct wb_device *wb;
 
 	struct dm_arg_set as;
 	as.argc = argc;
 	as.argv = argv;
 
-	r = init_core_struct(ti);
-	if (r) {
+	err = init_core_struct(ti);
+	if (err) {
 		ti->error = "init_core_struct failed";
-		return r;
+		return err;
 	}
 	wb = ti->private;
 
-	r = copy_ctr_args(wb, argc - 2, (const char **)argv + 2);
-	if (r) {
+	err = copy_ctr_args(wb, argc - 2, (const char **)argv + 2);
+	if (err) {
 		ti->error = "copy_ctr_args failed";
 		goto bad_ctr_args;
 	}
 
-	r = consume_essential_argv(wb, &as);
-	if (r) {
+	err = consume_essential_argv(wb, &as);
+	if (err) {
 		ti->error = "consume_essential_argv failed";
 		goto bad_essential_argv;
 	}
 
-	r = resume_cache(wb);
-	if (r) {
-		ti->error = "resume_cache failed";
-		goto bad_resume_cache;
-	}
-
-	wb->read_cache_threshold = 0; /* Default: read-caching disabled */
-	r = consume_optional_argv(wb, &as);
-	if (r) {
+	err = consume_optional_argv(wb, &as);
+	if (err) {
 		ti->error = "consume_optional_argv failed";
 		goto bad_optional_argv;
 	}
 
-	r = init_read_cache_cells(wb);
-	if (r) {
+	save_arg(writeback_threshold);
+	save_arg(nr_max_batched_writeback);
+	save_arg(update_sb_record_interval);
+	save_arg(sync_data_interval);
+	save_arg(read_cache_threshold);
+	save_arg(nr_read_cache_cells);
+
+	err = resume_cache(wb);
+	if (err) {
+		ti->error = "resume_cache failed";
+		goto bad_resume_cache;
+	}
+
+	wb->nr_read_cache_cells = 2048; /* 8MB */
+	restore_arg(nr_read_cache_cells);
+	err = init_read_cache_cells(wb);
+	if (err) {
 		ti->error = "init_read_cache_cells failed";
 		goto bad_read_cache_cells;
 	}
@@ -1645,21 +1783,28 @@
 	clear_stat(wb);
 
 	set_bit(WB_CREATED, &wb->flags);
-	return r;
+
+	restore_arg(writeback_threshold);
+	restore_arg(nr_max_batched_writeback);
+	restore_arg(update_sb_record_interval);
+	restore_arg(sync_data_interval);
+	restore_arg(read_cache_threshold);
+
+	return err;
 
 bad_read_cache_cells:
-bad_optional_argv:
 	free_cache(wb);
 bad_resume_cache:
 	dm_put_device(ti, wb->cache_dev);
 	dm_put_device(ti, wb->backing_dev);
+bad_optional_argv:
 bad_essential_argv:
 	free_ctr_args(wb);
 bad_ctr_args:
 	free_core_struct(wb);
 	ti->private = NULL;
 
-	return r;
+	return err;
 }
 
 static void writeboost_dtr(struct dm_target *ti)
@@ -1687,10 +1832,9 @@
  */
 static void writeboost_postsuspend(struct dm_target *ti)
 {
-	int r = 0;
 	struct wb_device *wb = ti->private;
 	flush_current_buffer(wb);
-	maybe_IO(blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL));
+	blkdev_issue_flush(wb->cache_dev->bdev, GFP_NOIO, NULL);
 }
 
 static int writeboost_message(struct dm_target *ti, unsigned argc, char **argv)
@@ -1707,12 +1851,12 @@
 	}
 
 	if (!strcasecmp(argv[0], "drop_caches")) {
-		int r = 0;
+		int err = 0;
 		wb->force_drop = true;
-		r = wait_event_interruptible(wb->wait_drop_caches,
-			     !atomic64_read(&wb->nr_dirty_caches));
+		err = wait_event_interruptible(wb->wait_drop_caches,
+			!atomic64_read(&wb->nr_dirty_caches));
 		wb->force_drop = false;
-		return r;
+		return err;
 	}
 
 	return do_consume_optional_argv(wb, &as, 2);
@@ -1796,7 +1940,7 @@
 
 static struct target_type writeboost_target = {
 	.name = "writeboost",
-	.version = {2, 1, 1},
+	.version = {2, 2, 6},
 	.module = THIS_MODULE,
 	.map = writeboost_map,
 	.end_io = writeboost_end_io,
@@ -1811,15 +1955,15 @@
 
 static int __init writeboost_module_init(void)
 {
-	int r = 0;
+	int err = 0;
 
-	r = dm_register_target(&writeboost_target);
-	if (r < 0) {
+	err = dm_register_target(&writeboost_target);
+	if (err < 0) {
 		DMERR("Failed to register target");
-		return r;
+		return err;
 	}
 
-	return r;
+	return err;
 }
 
 static void __exit writeboost_module_exit(void)