16 files changed, 338 insertions, 153 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 3c955e10a61..c6083132c4b 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1025,6 +1025,8 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
 {
 	struct blk_plug plug;
 
+	BUG_ON(dm_bufio_in_request());
+
 	blk_start_plug(&plug);
 	dm_bufio_lock(c);
 
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index fbd3625f274..83e995fece8 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -83,6 +83,8 @@ struct cache_disk_superblock {
 	__le32 read_misses;
 	__le32 write_hits;
 	__le32 write_misses;
+
+	__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
 } __packed;
 
 struct dm_cache_metadata {
@@ -109,6 +111,7 @@ struct dm_cache_metadata {
 	bool clean_when_opened:1;
 
 	char policy_name[CACHE_POLICY_NAME_SIZE];
+	unsigned policy_version[CACHE_POLICY_VERSION_SIZE];
 	size_t policy_hint_size;
 	struct dm_cache_statistics stats;
 };
@@ -268,7 +271,8 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
 	disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
 	disk_super->version = cpu_to_le32(CACHE_VERSION);
-	memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE);
+	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
+	memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
 	disk_super->policy_hint_size = 0;
 
 	r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root,
@@ -284,7 +288,6 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
 	disk_super->cache_blocks = cpu_to_le32(0);
-	memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
 
 	disk_super->read_hits = cpu_to_le32(0);
 	disk_super->read_misses = cpu_to_le32(0);
@@ -478,6 +481,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
 	cmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 	cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks));
 	strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name));
+	cmd->policy_version[0] = le32_to_cpu(disk_super->policy_version[0]);
+	cmd->policy_version[1] = le32_to_cpu(disk_super->policy_version[1]);
+	cmd->policy_version[2] = le32_to_cpu(disk_super->policy_version[2]);
 	cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size);
 
 	cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits);
@@ -572,6 +578,9 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
 	disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks));
 	disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks));
 	strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name));
+	disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]);
+	disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]);
+	disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]);
 
 	disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits);
 	disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses);
@@ -854,18 +863,43 @@ struct thunk {
 	bool hints_valid;
 };
 
+static bool policy_unchanged(struct dm_cache_metadata *cmd,
+			     struct dm_cache_policy *policy)
+{
+	const char *policy_name = dm_cache_policy_get_name(policy);
+	const unsigned *policy_version = dm_cache_policy_get_version(policy);
+	size_t policy_hint_size = dm_cache_policy_get_hint_size(policy);
+
+	/*
+	 * Ensure policy names match.
+	 */
+	if (strncmp(cmd->policy_name, policy_name, sizeof(cmd->policy_name)))
+		return false;
+
+	/*
+	 * Ensure policy major versions match.
+	 */
+	if (cmd->policy_version[0] != policy_version[0])
+		return false;
+
+	/*
+	 * Ensure policy hint sizes match.
+	 */
+	if (cmd->policy_hint_size != policy_hint_size)
+		return false;
+
+	return true;
+}
+
 static bool hints_array_initialized(struct dm_cache_metadata *cmd)
 {
 	return cmd->hint_root && cmd->policy_hint_size;
 }
 
 static bool hints_array_available(struct dm_cache_metadata *cmd,
-				  const char *policy_name)
+				  struct dm_cache_policy *policy)
 {
-	bool policy_names_match = !strncmp(cmd->policy_name, policy_name,
-					   sizeof(cmd->policy_name));
-
-	return cmd->clean_when_opened && policy_names_match &&
+	return cmd->clean_when_opened && policy_unchanged(cmd, policy) &&
 		hints_array_initialized(cmd);
 }
 
@@ -899,7 +933,8 @@ static int __load_mapping(void *context, uint64_t cblock, void *leaf)
 	return r;
 }
 
-static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+static int __load_mappings(struct dm_cache_metadata *cmd,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn, void *context)
 {
 	struct thunk thunk;
@@ -909,18 +944,19 @@ static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_nam
 
 	thunk.cmd = cmd;
 	thunk.respect_dirty_flags = cmd->clean_when_opened;
-	thunk.hints_valid = hints_array_available(cmd, policy_name);
+	thunk.hints_valid = hints_array_available(cmd, policy);
 
 	return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
 }
 
-int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name,
+int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn, void *context)
 {
 	int r;
 
 	down_read(&cmd->root_lock);
-	r = __load_mappings(cmd, policy_name, fn, context);
+	r = __load_mappings(cmd, policy, fn, context);
 	up_read(&cmd->root_lock);
 
 	return r;
@@ -979,7 +1015,7 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty
 		/* nothing to be done */
 		return 0;
 
-	value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0));
+	value = pack_value(oblock, (flags & ~M_DIRTY) | (dirty ? M_DIRTY : 0));
 	__dm_bless_for_disk(&value);
 
 	r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock),
@@ -1070,13 +1106,15 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
 	__le32 value;
 	size_t hint_size;
 	const char *policy_name = dm_cache_policy_get_name(policy);
+	const unsigned *policy_version = dm_cache_policy_get_version(policy);
 
 	if (!policy_name[0] ||
 	    (strlen(policy_name) > sizeof(cmd->policy_name) - 1))
 		return -EINVAL;
 
-	if (strcmp(cmd->policy_name, policy_name)) {
+	if (!policy_unchanged(cmd, policy)) {
 		strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
+		memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
 
 		hint_size = dm_cache_policy_get_hint_size(policy);
 		if (!hint_size)
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index 135864ea0ee..f45cef21f3d 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -89,7 +89,7 @@ typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock,
 			       dm_cblock_t cblock, bool dirty,
 			       uint32_t hint, bool hint_valid);
 int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
-			   const char *policy_name,
+			   struct dm_cache_policy *policy,
 			   load_mapping_fn fn,
 			   void *context);
 
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
index cc05d70b3cb..b04d1f904d0 100644
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -17,7 +17,6 @@
 /*----------------------------------------------------------------*/
 
 #define DM_MSG_PREFIX "cache cleaner"
-#define CLEANER_VERSION "1.0.0"
 
 /* Cache entry struct. */
 struct wb_cache_entry {
@@ -434,6 +433,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
 
 static struct dm_cache_policy_type wb_policy_type = {
 	.name = "cleaner",
+	.version = {1, 0, 0},
 	.hint_size = 0,
 	.owner = THIS_MODULE,
 	.create = wb_create
@@ -446,7 +446,10 @@ static int __init wb_init(void)
 	if (r < 0)
 		DMERR("register failed %d", r);
 	else
-		DMINFO("version " CLEANER_VERSION " loaded");
+		DMINFO("version %u.%u.%u loaded",
+		       wb_policy_type.version[0],
+		       wb_policy_type.version[1],
+		       wb_policy_type.version[2]);
 
 	return r;
 }
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index 52a75beeced..0928abdc49f 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -117,6 +117,8 @@ void dm_cache_policy_destroy(struct dm_cache_policy *p);
  */
 const char *dm_cache_policy_get_name(struct dm_cache_policy *p);
 
+const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p);
+
 size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p);
 
 /*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 96415325507..dc112a7137f 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -14,7 +14,6 @@
 #include <linux/vmalloc.h>
 
 #define DM_MSG_PREFIX "cache-policy-mq"
-#define MQ_VERSION	"1.0.0"
 
 static struct kmem_cache *mq_entry_cache;
 
@@ -1133,6 +1132,7 @@ bad_cache_alloc:
 
 static struct dm_cache_policy_type mq_policy_type = {
 	.name = "mq",
+	.version = {1, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
@@ -1140,6 +1140,7 @@ static struct dm_cache_policy_type mq_policy_type = {
 
 static struct dm_cache_policy_type default_policy_type = {
 	.name = "default",
+	.version = {1, 0, 0},
 	.hint_size = 4,
 	.owner = THIS_MODULE,
 	.create = mq_create
@@ -1164,7 +1165,10 @@ static int __init mq_init(void)
 
 	r = dm_cache_policy_register(&default_policy_type);
 	if (!r) {
-		DMINFO("version " MQ_VERSION " loaded");
+		DMINFO("version %u.%u.%u loaded",
+		       mq_policy_type.version[0],
+		       mq_policy_type.version[1],
+		       mq_policy_type.version[2]);
 		return 0;
 	}
 
diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c
index 2cbf5fdaac5..21c03c570c0 100644
--- a/drivers/md/dm-cache-policy.c
+++ b/drivers/md/dm-cache-policy.c
@@ -150,6 +150,14 @@ const char *dm_cache_policy_get_name(struct dm_cache_policy *p)
 }
 EXPORT_SYMBOL_GPL(dm_cache_policy_get_name);
 
+const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p)
+{
+	struct dm_cache_policy_type *t = p->private;
+
+	return t->version;
+}
+EXPORT_SYMBOL_GPL(dm_cache_policy_get_version);
+
 size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p)
 {
 	struct dm_cache_policy_type *t = p->private;
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index f0f51b26054..558bdfdabf5 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -196,6 +196,7 @@ struct dm_cache_policy {
  * We maintain a little register of the different policy types.
  */
 #define CACHE_POLICY_NAME_SIZE 16
+#define CACHE_POLICY_VERSION_SIZE 3
 
 struct dm_cache_policy_type {
 	/* For use by the register code only. */
@@ -206,6 +207,7 @@ struct dm_cache_policy_type {
 	 * what gets passed on the target line to select your policy.
 	 */
 	char name[CACHE_POLICY_NAME_SIZE];
+	unsigned version[CACHE_POLICY_VERSION_SIZE];
 
 	/*
 	 * Policies may store a hint for each each cache block.
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 0f4e84b15c3..66120bd46d1 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -142,6 +142,7 @@ struct cache {
 	spinlock_t lock;
 	struct bio_list deferred_bios;
 	struct bio_list deferred_flush_bios;
+	struct bio_list deferred_writethrough_bios;
 	struct list_head quiesced_migrations;
 	struct list_head completed_migrations;
 	struct list_head need_commit_migrations;
@@ -158,7 +159,7 @@ struct cache {
 	/*
 	 * origin_blocks entries, discarded if set.
 	 */
-	sector_t discard_block_size; /* a power of 2 times sectors per block */
+	uint32_t discard_block_size; /* a power of 2 times sectors per block */
 	dm_dblock_t discard_nr_blocks;
 	unsigned long *discard_bitset;
 
@@ -199,6 +200,11 @@ struct per_bio_data {
 	bool tick:1;
 	unsigned req_nr:2;
 	struct dm_deferred_entry *all_io_entry;
+
+	/* writethrough fields */
+	struct cache *cache;
+	dm_cblock_t cblock;
+	bio_end_io_t *saved_bi_end_io;
 };
 
 struct dm_cache_migration {
@@ -412,17 +418,24 @@ static bool block_size_is_power_of_two(struct cache *cache)
 	return cache->sectors_per_block_shift >= 0;
 }
 
+static dm_block_t block_div(dm_block_t b, uint32_t n)
+{
+	do_div(b, n);
+
+	return b;
+}
+
 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
 {
-	sector_t discard_blocks = cache->discard_block_size;
+	uint32_t discard_blocks = cache->discard_block_size;
 	dm_block_t b = from_oblock(oblock);
 
 	if (!block_size_is_power_of_two(cache))
-		(void) sector_div(discard_blocks, cache->sectors_per_block);
+		discard_blocks = discard_blocks / cache->sectors_per_block;
 	else
 		discard_blocks >>= cache->sectors_per_block_shift;
 
-	(void) sector_div(b, discard_blocks);
+	b = block_div(b, discard_blocks);
 
 	return to_dblock(b);
 }
@@ -609,6 +622,56 @@ static void issue(struct cache *cache, struct bio *bio)
 	spin_unlock_irqrestore(&cache->lock, flags);
 }
 
+static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_add(&cache->deferred_writethrough_bios, bio);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	wake_worker(cache);
+}
+
+static void writethrough_endio(struct bio *bio, int err)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+	bio->bi_end_io = pb->saved_bi_end_io;
+
+	if (err) {
+		bio_endio(bio, err);
+		return;
+	}
+
+	remap_to_cache(pb->cache, bio, pb->cblock);
+
+	/*
+	 * We can't issue this bio directly, since we're in interrupt
+	 * context.  So it get's put on a bio list for processing by the
+	 * worker thread.
+	 */
+	defer_writethrough_bio(pb->cache, bio);
+}
+
+/*
+ * When running in writethrough mode we need to send writes to clean blocks
+ * to both the cache and origin devices.  In future we'd like to clone the
+ * bio and send them in parallel, but for now we're doing them in
+ * series as this is easier.
+ */
+static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
+				       dm_oblock_t oblock, dm_cblock_t cblock)
+{
+	struct per_bio_data *pb = get_per_bio_data(bio);
+
+	pb->cache = cache;
+	pb->cblock = cblock;
+	pb->saved_bi_end_io = bio->bi_end_io;
+	bio->bi_end_io = writethrough_endio;
+
+	remap_to_origin_clear_discard(pb->cache, bio, oblock);
+}
+
 /*----------------------------------------------------------------
  * Migration processing
  *
@@ -1002,7 +1065,7 @@ static void process_discard_bio(struct cache *cache, struct bio *bio)
 	dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
 	dm_block_t b;
 
-	(void) sector_div(end_block, cache->discard_block_size);
+	end_block = block_div(end_block, cache->discard_block_size);
 
 	for (b = start_block; b < end_block; b++)
 		set_discard(cache, to_dblock(b));
@@ -1070,14 +1133,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 		inc_hit_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
-			/*
-			 * No need to mark anything dirty in write through mode.
-			 */
-			pb->req_nr == 0 ?
-				remap_to_cache(cache, bio, lookup_result.cblock) :
-				remap_to_origin_clear_discard(cache, bio, block);
-		} else
+		if (is_writethrough_io(cache, bio, lookup_result.cblock))
+			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+		else
 			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
 
 		issue(cache, bio);
@@ -1086,17 +1144,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	case POLICY_MISS:
 		inc_miss_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
-		if (pb->req_nr != 0) {
-			/*
-			 * This is a duplicate writethrough io that is no
-			 * longer needed because the block has been demoted.
-			 */
-			bio_endio(bio, 0);
-		} else {
-			remap_to_origin_clear_discard(cache, bio, block);
-			issue(cache, bio);
-		}
+		remap_to_origin_clear_discard(cache, bio, block);
+		issue(cache, bio);
 		break;
 
 	case POLICY_NEW:
@@ -1217,6 +1266,23 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
 		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
 }
 
+static void process_deferred_writethrough_bios(struct cache *cache)
+{
+	unsigned long flags;
+	struct bio_list bios;
+	struct bio *bio;
+
+	bio_list_init(&bios);
+
+	spin_lock_irqsave(&cache->lock, flags);
+	bio_list_merge(&bios, &cache->deferred_writethrough_bios);
+	bio_list_init(&cache->deferred_writethrough_bios);
+	spin_unlock_irqrestore(&cache->lock, flags);
+
+	while ((bio = bio_list_pop(&bios)))
+		generic_make_request(bio);
+}
+
 static void writeback_some_dirty_blocks(struct cache *cache)
 {
 	int r = 0;
@@ -1313,6 +1379,7 @@ static int more_work(struct cache *cache)
 	else
 		return !bio_list_empty(&cache->deferred_bios) ||
 			!bio_list_empty(&cache->deferred_flush_bios) ||
+			!bio_list_empty(&cache->deferred_writethrough_bios) ||
 			!list_empty(&cache->quiesced_migrations) ||
 			!list_empty(&cache->completed_migrations) ||
 			!list_empty(&cache->need_commit_migrations);
@@ -1331,6 +1398,8 @@ static void do_worker(struct work_struct *ws)
 
 		writeback_some_dirty_blocks(cache);
 
+		process_deferred_writethrough_bios(cache);
+
 		if (commit_if_needed(cache)) {
 			process_deferred_flush_bios(cache, false);
 
@@ -1756,8 +1825,11 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca,
 	}
 
 	r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv);
-	if (r)
+	if (r) {
+		*error = "Error setting cache policy's config values";
 		dm_cache_policy_destroy(cache->policy);
+		cache->policy = NULL;
+	}
 
 	return r;
 }
@@ -1793,8 +1865,6 @@ static sector_t calculate_discard_block_size(sector_t cache_block_size,
 
 #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100)
 
-static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio);
-
 static int cache_create(struct cache_args *ca, struct cache **result)
 {
 	int r = 0;
@@ -1821,9 +1891,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	memcpy(&cache->features, &ca->features, sizeof(cache->features));
 
-	if (cache->features.write_through)
-		ti->num_write_bios = cache_num_write_bios;
-
 	cache->callbacks.congested_fn = cache_is_congested;
 	dm_table_add_target_callbacks(ti->table, &cache->callbacks);
 
@@ -1835,7 +1902,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	/* FIXME: factor out this whole section */
 	origin_blocks = cache->origin_sectors = ca->origin_sectors;
-	(void) sector_div(origin_blocks, ca->block_size);
+	origin_blocks = block_div(origin_blocks, ca->block_size);
 	cache->origin_blocks = to_oblock(origin_blocks);
 
 	cache->sectors_per_block = ca->block_size;
@@ -1848,7 +1915,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 		dm_block_t cache_size = ca->cache_sectors;
 
 		cache->sectors_per_block_shift = -1;
-		(void) sector_div(cache_size, ca->block_size);
+		cache_size = block_div(cache_size, ca->block_size);
 		cache->cache_size = to_cblock(cache_size);
 	} else {
 		cache->sectors_per_block_shift = __ffs(ca->block_size);
@@ -1873,6 +1940,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 	spin_lock_init(&cache->lock);
 	bio_list_init(&cache->deferred_bios);
 	bio_list_init(&cache->deferred_flush_bios);
+	bio_list_init(&cache->deferred_writethrough_bios);
 	INIT_LIST_HEAD(&cache->quiesced_migrations);
 	INIT_LIST_HEAD(&cache->completed_migrations);
 	INIT_LIST_HEAD(&cache->need_commit_migrations);
@@ -2002,6 +2070,8 @@ static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto out;
 
 	r = cache_create(ca, &cache);
+	if (r)
+		goto out;
 
 	r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
 	if (r) {
@@ -2016,20 +2086,6 @@ out:
 	return r;
 }
 
-static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio)
-{
-	int r;
-	struct cache *cache = ti->private;
-	dm_oblock_t block = get_bio_block(cache, bio);
-	dm_cblock_t cblock;
-
-	r = policy_lookup(cache->policy, block, &cblock);
-	if (r < 0)
-		return 2;	/* assume the worst */
-
-	return (!r && !is_dirty(cache, cblock)) ? 2 : 1;
-}
-
 static int cache_map(struct dm_target *ti, struct bio *bio)
 {
 	struct cache *cache = ti->private;
@@ -2097,18 +2153,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 		inc_hit_counter(cache, bio);
 		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 
-		if (is_writethrough_io(cache, bio, lookup_result.cblock)) {
-			/*
-			 * No need to mark anything dirty in write through mode.
-			 */
-			pb->req_nr == 0 ?
-				remap_to_cache(cache, bio, lookup_result.cblock) :
-				remap_to_origin_clear_discard(cache, bio, block);
-			cell_defer(cache, cell, false);
-		} else {
+		if (is_writethrough_io(cache, bio, lookup_result.cblock))
+			remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+		else
 			remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-			cell_defer(cache, cell, false);
-		}
+
+		cell_defer(cache, cell, false);
 		break;
 
 	case POLICY_MISS:
@@ -2319,8 +2369,7 @@ static int cache_preresume(struct dm_target *ti)
 	}
 
 	if (!cache->loaded_mappings) {
-		r = dm_cache_load_mappings(cache->cmd,
-					   dm_cache_policy_get_name(cache->policy),
+		r = dm_cache_load_mappings(cache->cmd, cache->policy,
 					   load_mapping, cache);
 		if (r) {
 			DMERR("could not load cache mappings");
@@ -2535,7 +2584,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 009339d6282..004ad1652b7 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1577,6 +1577,11 @@ static bool data_dev_supports_discard(struct pool_c *pt)
 	return q && blk_queue_discard(q);
 }
 
+static bool is_factor(sector_t block_size, uint32_t n)
+{
+	return !sector_div(block_size, n);
+}
+
 /*
  * If discard_passdown was enabled verify that the data device
  * supports discards.  Disable discard_passdown if not.
@@ -1602,7 +1607,7 @@ static void disable_passdown_if_not_supported(struct pool_c *pt)
 	else if (data_limits->discard_granularity > block_size)
 		reason = "discard granularity larger than a block";
 
-	else if (block_size & (data_limits->discard_granularity - 1))
+	else if (!is_factor(block_size, data_limits->discard_granularity))
 		reason = "discard granularity not a factor of block size";
 
 	if (reason) {
@@ -2544,7 +2549,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 6, 1},
+	.version = {1, 7, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2831,7 +2836,7 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 7, 1},
+	.version = {1, 8, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 6ad538375c3..a746f1d21c6 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -93,6 +93,13 @@ struct dm_verity_io {
 	 */
 };
 
+struct dm_verity_prefetch_work {
+	struct work_struct work;
+	struct dm_verity *v;
+	sector_t block;
+	unsigned n_blocks;
+};
+
 static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
 {
 	return (struct shash_desc *)(io + 1);
@@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bio, int error)
  * The root buffer is not prefetched, it is assumed that it will be cached
  * all the time.
  */
-static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
+static void verity_prefetch_io(struct work_struct *work)
 {
+	struct dm_verity_prefetch_work *pw =
+		container_of(work, struct dm_verity_prefetch_work, work);
+	struct dm_verity *v = pw->v;
 	int i;
 
 	for (i = v->levels - 2; i >= 0; i--) {
 		sector_t hash_block_start;
 		sector_t hash_block_end;
-		verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
-		verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
+		verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
+		verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
 		if (!i) {
 			unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
 
@@ -452,6 +462,25 @@ no_prefetch_cluster:
 		dm_bufio_prefetch(v->bufio, hash_block_start,
 				  hash_block_end - hash_block_start + 1);
 	}
+
+	kfree(pw);
+}
+
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+{
+	struct dm_verity_prefetch_work *pw;
+
+	pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
+		GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+	if (!pw)
+		return;
+
+	INIT_WORK(&pw->work, verity_prefetch_io);
+	pw->v = v;
+	pw->block = io->block;
+	pw->n_blocks = io->n_blocks;
+	queue_work(v->verify_wq, &pw->work);
 }
 
 /*
@@ -498,7 +527,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
 	memcpy(io->io_vec, bio_iovec(bio),
 	       io->io_vec_size * sizeof(struct bio_vec));
 
-	verity_prefetch_io(v, io);
+	verity_submit_prefetch(v, io);
 
 	generic_make_request(bio);
 
@@ -858,7 +887,7 @@ bad:
 
 static struct target_type verity_target = {
 	.name		= "verity",
-	.version	= {1, 1, 1},
+	.version	= {1, 2, 0},
 	.module		= THIS_MODULE,
 	.ctr		= verity_ctr,
 	.dtr		= verity_dtr,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fcb878f8879..aeceedfc530 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7663,10 +7663,8 @@ static int remove_and_add_spares(struct mddev *mddev)
 				removed++;
 			}
 		}
-	if (removed)
-		sysfs_notify(&mddev->kobj, NULL,
-			     "degraded");
-
+	if (removed && mddev->kobj.sd)
+		sysfs_notify(&mddev->kobj, NULL, "degraded");
 
 	rdev_for_each(rdev, mddev) {
 		if (rdev->raid_disk >= 0 &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index eca59c3074e..d90fb1a879e 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -506,7 +506,7 @@ static inline char * mdname (struct mddev * mddev)
 static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
+	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
 	} else
@@ -516,7 +516,7 @@ static inline int sysfs_link_rdev(struct mddev *mddev, struct md_rdev *rdev)
 static inline void sysfs_unlink_rdev(struct mddev *mddev, struct md_rdev *rdev)
 {
 	char nm[20];
-	if (!test_bit(Replacement, &rdev->flags)) {
+	if (!test_bit(Replacement, &rdev->flags) && mddev->kobj.sd) {
 		sprintf(nm, "rd%d", rdev->raid_disk);
 		sysfs_remove_link(&mddev->kobj, nm);
 	}
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index c4f28133ef8..b88757cd0d1 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -139,15 +139,8 @@ struct child {
 	struct btree_node *n;
 };
 
-static struct dm_btree_value_type le64_type = {
-	.context = NULL,
-	.size = sizeof(__le64),
-	.inc = NULL,
-	.dec = NULL,
-	.equal = NULL
-};
-
-static int init_child(struct dm_btree_info *info, struct btree_node *parent,
+static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
+		      struct btree_node *parent,
 		      unsigned index, struct child *result)
 {
 	int r, inc;
@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent,
 	result->n = dm_block_data(result->block);
 
 	if (inc)
-		inc_children(info->tm, result->n, &le64_type);
+		inc_children(info->tm, result->n, vt);
 
 	*((__le64 *) value_ptr(parent, index)) =
 		cpu_to_le64(dm_block_location(result->block));
@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
 }
 
 static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
+		      struct dm_btree_value_type *vt, unsigned left_index)
 {
 	int r;
 	struct btree_node *parent;
@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
 
 	parent = dm_block_data(shadow_current(s));
 
-	r = init_child(info, parent, left_index, &left);
+	r = init_child(info, vt, parent, left_index, &left);
 	if (r)
 		return r;
 
-	r = init_child(info, parent, left_index + 1, &right);
+	r = init_child(info, vt, parent, left_index + 1, &right);
 	if (r) {
 		exit_child(info, &left);
 		return r;
@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
 }
 
 static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
-		      unsigned left_index)
+		      struct dm_btree_value_type *vt, unsigned left_index)
 {
 	int r;
 	struct btree_node *parent = dm_block_data(shadow_current(s));
@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
 	/*
 	 * FIXME: fill out an array?
 	 */
-	r = init_child(info, parent, left_index, &left);
+	r = init_child(info, vt, parent, left_index, &left);
 	if (r)
 		return r;
 
-	r = init_child(info, parent, left_index + 1, &center);
+	r = init_child(info, vt, parent, left_index + 1, &center);
 	if (r) {
 		exit_child(info, &left);
 		return r;
 	}
 
-	r = init_child(info, parent, left_index + 2, &right);
+	r = init_child(info, vt, parent, left_index + 2, &right);
 	if (r) {
 		exit_child(info, &left);
 		exit_child(info, &center);
@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
 }
 
 static int rebalance_children(struct shadow_spine *s,
-			      struct dm_btree_info *info, uint64_t key)
+			      struct dm_btree_info *info,
+			      struct dm_btree_value_type *vt, uint64_t key)
 {
 	int i, r, has_left_sibling, has_right_sibling;
 	uint32_t child_entries;
@@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s,
 	has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
 
 	if (!has_left_sibling)
-		r = rebalance2(s, info, i);
+		r = rebalance2(s, info, vt, i);
 
 	else if (!has_right_sibling)
-		r = rebalance2(s, info, i - 1);
+		r = rebalance2(s, info, vt, i - 1);
 
 	else
-		r = rebalance3(s, info, i - 1);
+		r = rebalance3(s, info, vt, i - 1);
 
 	return r;
 }
@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 		if (le32_to_cpu(n->header.flags) & LEAF_NODE)
 			return do_leaf(n, key, index);
 
-		r = rebalance_children(s, info, key);
+		r = rebalance_children(s, info, vt, key);
 		if (r)
 			break;
 
@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
 	return r;
 }
 
+static struct dm_btree_value_type le64_type = {
+	.context = NULL,
+	.size = sizeof(__le64),
+	.inc = NULL,
+	.dec = NULL,
+	.equal = NULL
+};
+
 int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
 		    uint64_t *keys, dm_block_t *new_root)
 {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 3ee2912889e..24909eb13fe 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			bi->bi_next = NULL;
 			if (rrdev)
 				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
-			trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
-					      bi, disk_devt(conf->mddev->gendisk),
-					      sh->dev[i].sector);
+
+			if (conf->mddev->gendisk)
+				trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+						      bi, disk_devt(conf->mddev->gendisk),
+						      sh->dev[i].sector);
 			generic_make_request(bi);
 		}
 		if (rrdev) {
@@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 			rbi->bi_io_vec[0].bv_offset = 0;
 			rbi->bi_size = STRIPE_SIZE;
 			rbi->bi_next = NULL;
-			trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
-					      rbi, disk_devt(conf->mddev->gendisk),
-					      sh->dev[i].sector);
+			if (conf->mddev->gendisk)
+				trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+						      rbi, disk_devt(conf->mddev->gendisk),
+						      sh->dev[i].sector);
 			generic_make_request(rbi);
 		}
 		if (!rdev && !rrdev) {
@@ -2280,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 	int level = conf->level;
 
 	if (rcw) {
-		/* if we are not expanding this is a proper write request, and
-		 * there will be bios with new data to be drained into the
-		 * stripe cache
-		 */
-		if (!expand) {
-			sh->reconstruct_state = reconstruct_state_drain_run;
-			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		} else
-			sh->reconstruct_state = reconstruct_state_run;
-
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
@@ -2303,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 				s->locked++;
 			}
 		}
+		/* if we are not expanding this is a proper write request, and
+		 * there will be bios with new data to be drained into the
+		 * stripe cache
+		 */
+		if (!expand) {
+			if (!s->locked)
+				/* False alarm, nothing to do */
+				return;
+			sh->reconstruct_state = reconstruct_state_drain_run;
+			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+		} else
+			sh->reconstruct_state = reconstruct_state_run;
+
+		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
+
 		if (s->locked + conf->max_degraded == disks)
 			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
 				atomic_inc(&conf->pending_full_writes);
@@ -2311,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
 			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
 
-		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
-		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
-		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
-
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if (i == pd_idx)
@@ -2330,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
 				s->locked++;
 			}
 		}
+		if (!s->locked)
+			/* False alarm - nothing to do */
+			return;
+		sh->reconstruct_state = reconstruct_state_prexor_drain_run;
+		set_bit(STRIPE_OP_PREXOR, &s->ops_request);
+		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
+		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 	}
 
 	/* keep the parity disk(s) locked while asynchronous operations
@@ -2564,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
 	int i;
 
 	clear_bit(STRIPE_SYNCING, &sh->state);
+	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+		wake_up(&conf->wait_for_overlap);
 	s->syncing = 0;
 	s->replacing = 0;
 	/* There is nothing more to do for sync/check/repair.
@@ -2737,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 {
 	int i;
 	struct r5dev *dev;
+	int discard_pending = 0;
 
 	for (i = disks; i--; )
 		if (sh->dev[i].written) {
@@ -2765,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
 						STRIPE_SECTORS,
 					 !test_bit(STRIPE_DEGRADED, &sh->state),
 						0);
-			}
-		} else if (test_bit(R5_Discard, &sh->dev[i].flags))
-			clear_bit(R5_Discard, &sh->dev[i].flags);
+			} else if (test_bit(R5_Discard, &dev->flags))
+				discard_pending = 1;
+		}
+	if (!discard_pending &&
+	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
+		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
+		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+		if (sh->qd_idx >= 0) {
+			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
+			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
+		}
+		/* now that discard is done we can proceed with any sync */
+		clear_bit(STRIPE_DISCARD, &sh->state);
+		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+			set_bit(STRIPE_HANDLE, &sh->state);
+
+	}
 
 	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
 		if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -2826,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
 	set_bit(STRIPE_HANDLE, &sh->state);
 	if (rmw < rcw && rmw > 0) {
 		/* prefer read-modify-write, but need to get some data */
-		blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
-				  (unsigned long long)sh->sector, rmw);
+		if (conf->mddev->queue)
+			blk_add_trace_msg(conf->mddev->queue,
+					  "raid5 rmw %llu %d",
+					  (unsigned long long)sh->sector, rmw);
 		for (i = disks; i--; ) {
 			struct r5dev *dev = &sh->dev[i];
 			if ((dev->towrite || i == sh->pd_idx) &&
@@ -2877,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
 				}
 			}
 		}
-		if (rcw)
+		if (rcw && conf->mddev->queue)
 			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
 					  (unsigned long long)sh->sector,
 					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
@@ -3417,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)
 		return;
 	}
 
-	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
-		set_bit(STRIPE_SYNCING, &sh->state);
-		clear_bit(STRIPE_INSYNC, &sh->state);
+	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+		spin_lock(&sh->stripe_lock);
+		/* Cannot process 'sync' concurrently with 'discard' */
+		if (!test_bit(STRIPE_DISCARD, &sh->state) &&
+		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+			set_bit(STRIPE_SYNCING, &sh->state);
+			clear_bit(STRIPE_INSYNC, &sh->state);
+		}
+		spin_unlock(&sh->stripe_lock);
 	}
 	clear_bit(STRIPE_DELAYED, &sh->state);
 
@@ -3579,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)
 	    test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
+		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
+			wake_up(&conf->wait_for_overlap);
 	}
 
 	/* If the failed drives are just a ReadError, then we might need
@@ -3982,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 		atomic_inc(&conf->active_aligned_reads);
 		spin_unlock_irq(&conf->device_lock);
 
-		trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
-				      align_bi, disk_devt(mddev->gendisk),
-				      raid_bio->bi_sector);
+		if (mddev->gendisk)
+			trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+					      align_bi, disk_devt(mddev->gendisk),
+					      raid_bio->bi_sector);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -4078,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
 		}
 		spin_unlock_irq(&conf->device_lock);
 	}
-	trace_block_unplug(mddev->queue, cnt, !from_schedule);
+	if (mddev->queue)
+		trace_block_unplug(mddev->queue, cnt, !from_schedule);
 	kfree(cb);
 }
 
@@ -4141,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 		sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
 		prepare_to_wait(&conf->wait_for_overlap, &w,
 				TASK_UNINTERRUPTIBLE);
+		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
+		if (test_bit(STRIPE_SYNCING, &sh->state)) {
+			release_stripe(sh);
+			schedule();
+			goto again;
+		}
+		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
 		spin_lock_irq(&sh->stripe_lock);
 		for (d = 0; d < conf->raid_disks; d++) {
 			if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4153,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
 				goto again;
 			}
 		}
+		set_bit(STRIPE_DISCARD, &sh->state);
 		finish_wait(&conf->wait_for_overlap, &w);
 		for (d = 0; d < conf->raid_disks; d++) {
 			if (d == sh->pd_idx || d == sh->qd_idx)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 18b2c4a8a1f..b0b663b119a 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -221,10 +221,6 @@ struct stripe_head {
 	struct stripe_operations {
 		int 		     target, target2;
 		enum sum_check_flags zero_sum_result;
-		#ifdef CONFIG_MULTICORE_RAID456
-		unsigned long	     request;
-		wait_queue_head_t    wait_for_ops;
-		#endif
 	} ops;
 	struct r5dev {
 		/* rreq and rvec are used for the replacement device when
@@ -323,6 +319,7 @@ enum {
 	STRIPE_COMPUTE_RUN,
 	STRIPE_OPS_REQ_PENDING,
 	STRIPE_ON_UNPLUG_LIST,
+	STRIPE_DISCARD,
 };
 
 /*