diff options
| -rw-r--r-- | drivers/md/md.c | 2 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 38 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 29 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 4 | 
4 files changed, 47 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 681d1099a2d..9b82377a833 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5268,8 +5268,8 @@ static void md_clean(struct mddev *mddev)  static void __md_stop_writes(struct mddev *mddev)  { +	set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);  	if (mddev->sync_thread) { -		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);  		set_bit(MD_RECOVERY_INTR, &mddev->recovery);  		md_reap_sync_thread(mddev);  	} diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 55951182af7..6e17f8181c4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)  		r1_bio->bios[mirror] = NULL;  		to_put = bio; -		set_bit(R1BIO_Uptodate, &r1_bio->state); +		/* +		 * Do not set R1BIO_Uptodate if the current device is +		 * rebuilding or Faulty. This is because we cannot use +		 * such device for properly reading the data back (we could +		 * potentially use it, if the current write would have felt +		 * before rdev->recovery_offset, but for simplicity we don't +		 * check this here. +		 */ +		if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) && +		    !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)) +			set_bit(R1BIO_Uptodate, &r1_bio->state);  		/* Maybe we can clear some bad blocks. */  		if (is_badblock(conf->mirrors[mirror].rdev, @@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)  	wake_up(&conf->wait_barrier);  } -static void freeze_array(struct r1conf *conf) +static void freeze_array(struct r1conf *conf, int extra)  {  	/* stop syncio and normal IO and wait for everything to  	 * go quite.  	 * We increment barrier and nr_waiting, and then -	 * wait until nr_pending match nr_queued+1 +	 * wait until nr_pending match nr_queued+extra  	 * This is called in the context of one normal IO request  	 * that has failed. Thus any sync request that might be pending  	 * will be blocked by nr_pending, and we need to wait for  	 * pending IO requests to complete or be queued for re-try. -	 * Thus the number queued (nr_queued) plus this request (1) +	 * Thus the number queued (nr_queued) plus this request (extra)  	 * must match the number of pending IOs (nr_pending) before  	 * we continue.  	 */ @@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)  	conf->barrier++;  	conf->nr_waiting++;  	wait_event_lock_irq_cmd(conf->wait_barrier, -				conf->nr_pending == conf->nr_queued+1, +				conf->nr_pending == conf->nr_queued+extra,  				conf->resync_lock,  				flush_pending_writes(conf));  	spin_unlock_irq(&conf->resync_lock); @@ -1544,8 +1554,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  		 * we wait for all outstanding requests to complete.  		 */  		synchronize_sched(); -		raise_barrier(conf); -		lower_barrier(conf); +		freeze_array(conf, 0); +		unfreeze_array(conf);  		clear_bit(Unmerged, &rdev->flags);  	}  	md_integrity_add_rdev(rdev, mddev); @@ -1595,11 +1605,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)  			 */  			struct md_rdev *repl =  				conf->mirrors[conf->raid_disks + number].rdev; -			raise_barrier(conf); +			freeze_array(conf, 0);  			clear_bit(Replacement, &repl->flags);  			p->rdev = repl;  			conf->mirrors[conf->raid_disks + number].rdev = NULL; -			lower_barrier(conf); +			unfreeze_array(conf);  			clear_bit(WantReplacement, &rdev->flags);  		} else  			clear_bit(WantReplacement, &rdev->flags); @@ -2195,7 +2205,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)  	 * frozen  	 */  	if (mddev->ro == 0) { -		freeze_array(conf); +		freeze_array(conf, 1);  		fix_read_error(conf, r1_bio->read_disk,  			       r1_bio->sector, r1_bio->sectors);  		unfreeze_array(conf); @@ -2780,8 +2790,8 @@ static int run(struct mddev *mddev)  		return PTR_ERR(conf);  	if (mddev->queue) -		blk_queue_max_write_same_sectors(mddev->queue, -						 mddev->chunk_sectors); +		blk_queue_max_write_same_sectors(mddev->queue, 0); +  	rdev_for_each(rdev, mddev) {  		if (!mddev->gendisk)  			continue; @@ -2963,7 +2973,7 @@ static int raid1_reshape(struct mddev *mddev)  		return -ENOMEM;  	} -	raise_barrier(conf); +	freeze_array(conf, 0);  	/* ok, everything is stopped */  	oldpool = conf->r1bio_pool; @@ -2994,7 +3004,7 @@ static int raid1_reshape(struct mddev *mddev)  	conf->raid_disks = mddev->raid_disks = raid_disks;  	mddev->delta_disks = 0; -	lower_barrier(conf); +	unfreeze_array(conf);  	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);  	md_wakeup_thread(mddev->thread); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 59d4daa5f4c..6ddae2501b9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -490,7 +490,17 @@ static void raid10_end_write_request(struct bio *bio, int error)  		sector_t first_bad;  		int bad_sectors; -		set_bit(R10BIO_Uptodate, &r10_bio->state); +		/* +		 * Do not set R10BIO_Uptodate if the current device is +		 * rebuilding or Faulty. This is because we cannot use +		 * such device for properly reading the data back (we could +		 * potentially use it, if the current write would have felt +		 * before rdev->recovery_offset, but for simplicity we don't +		 * check this here. +		 */ +		if (test_bit(In_sync, &rdev->flags) && +		    !test_bit(Faulty, &rdev->flags)) +			set_bit(R10BIO_Uptodate, &r10_bio->state);  		/* Maybe we can clear some bad blocks. */  		if (is_badblock(rdev, @@ -1055,17 +1065,17 @@ static void allow_barrier(struct r10conf *conf)  	wake_up(&conf->wait_barrier);  } -static void freeze_array(struct r10conf *conf) +static void freeze_array(struct r10conf *conf, int extra)  {  	/* stop syncio and normal IO and wait for everything to  	 * go quiet.  	 * We increment barrier and nr_waiting, and then -	 * wait until nr_pending match nr_queued+1 +	 * wait until nr_pending match nr_queued+extra  	 * This is called in the context of one normal IO request  	 * that has failed. Thus any sync request that might be pending  	 * will be blocked by nr_pending, and we need to wait for  	 * pending IO requests to complete or be queued for re-try. -	 * Thus the number queued (nr_queued) plus this request (1) +	 * Thus the number queued (nr_queued) plus this request (extra)  	 * must match the number of pending IOs (nr_pending) before  	 * we continue.  	 */ @@ -1073,7 +1083,7 @@ static void freeze_array(struct r10conf *conf)  	conf->barrier++;  	conf->nr_waiting++;  	wait_event_lock_irq_cmd(conf->wait_barrier, -				conf->nr_pending == conf->nr_queued+1, +				conf->nr_pending == conf->nr_queued+extra,  				conf->resync_lock,  				flush_pending_writes(conf)); @@ -1837,8 +1847,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)  		 * we wait for all outstanding requests to complete.  		 */  		synchronize_sched(); -		raise_barrier(conf, 0); -		lower_barrier(conf); +		freeze_array(conf, 0); +		unfreeze_array(conf);  		clear_bit(Unmerged, &rdev->flags);  	}  	md_integrity_add_rdev(rdev, mddev); @@ -2612,7 +2622,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)  	r10_bio->devs[slot].bio = NULL;  	if (mddev->ro == 0) { -		freeze_array(conf); +		freeze_array(conf, 1);  		fix_read_error(conf, mddev, r10_bio);  		unfreeze_array(conf);  	} else @@ -3609,8 +3619,7 @@ static int run(struct mddev *mddev)  	if (mddev->queue) {  		blk_queue_max_discard_sectors(mddev->queue,  					      mddev->chunk_sectors); -		blk_queue_max_write_same_sectors(mddev->queue, -						 mddev->chunk_sectors); +		blk_queue_max_write_same_sectors(mddev->queue, 0);  		blk_queue_io_min(mddev->queue, chunk_size);  		if (conf->geo.raid_disks % conf->geo.near_copies)  			blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 753f318c898..05e4a105b9c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5466,7 +5466,7 @@ static int run(struct mddev *mddev)  		if (mddev->major_version == 0 &&  		    mddev->minor_version > 90)  			rdev->recovery_offset = reshape_offset; -			 +  		if (rdev->recovery_offset < reshape_offset) {  			/* We need to check old and new layout */  			if (!only_parity(rdev->raid_disk, @@ -5589,6 +5589,8 @@ static int run(struct mddev *mddev)  		 */  		mddev->queue->limits.discard_zeroes_data = 0; +		blk_queue_max_write_same_sectors(mddev->queue, 0); +  		rdev_for_each(rdev, mddev) {  			disk_stack_limits(mddev->gendisk, rdev->bdev,  					  rdev->data_offset << 9);  |