diff options
Diffstat (limited to 'drivers/md/raid5.c')
| -rw-r--r-- | drivers/md/raid5.c | 154 | 
1 files changed, 82 insertions, 72 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5af2d270908..24909eb13fe 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -671,9 +671,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)  			bi->bi_next = NULL;  			if (rrdev)  				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); -			trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), -					      bi, disk_devt(conf->mddev->gendisk), -					      sh->dev[i].sector); + +			if (conf->mddev->gendisk) +				trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), +						      bi, disk_devt(conf->mddev->gendisk), +						      sh->dev[i].sector);  			generic_make_request(bi);  		}  		if (rrdev) { @@ -701,9 +703,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)  			rbi->bi_io_vec[0].bv_offset = 0;  			rbi->bi_size = STRIPE_SIZE;  			rbi->bi_next = NULL; -			trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), -					      rbi, disk_devt(conf->mddev->gendisk), -					      sh->dev[i].sector); +			if (conf->mddev->gendisk) +				trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), +						      rbi, disk_devt(conf->mddev->gendisk), +						      sh->dev[i].sector);  			generic_make_request(rbi);  		}  		if (!rdev && !rrdev) { @@ -1403,7 +1406,7 @@ static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu  			   &sh->ops.zero_sum_result, percpu->spare_page, &submit);  } -static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request) +static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)  {  	int overlap_clear = 0, i, disks = sh->disks;  	struct dma_async_tx_descriptor *tx = NULL; @@ -1468,36 +1471,6 @@ static void __raid_run_ops(struct stripe_head *sh, unsigned long ops_request)  	put_cpu();  } -#ifdef CONFIG_MULTICORE_RAID456 -static void async_run_ops(void *param, async_cookie_t cookie) -{ -	struct stripe_head *sh = param; -	unsigned long ops_request = sh->ops.request; - -	clear_bit_unlock(STRIPE_OPS_REQ_PENDING, &sh->state); -	wake_up(&sh->ops.wait_for_ops); - -	__raid_run_ops(sh, ops_request); -	release_stripe(sh); -} - -static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) -{ -	/* since handle_stripe can be called outside of raid5d context -	 * we need to ensure sh->ops.request is de-staged before another -	 * request arrives -	 */ -	wait_event(sh->ops.wait_for_ops, -		   !test_and_set_bit_lock(STRIPE_OPS_REQ_PENDING, &sh->state)); -	sh->ops.request = ops_request; - -	atomic_inc(&sh->count); -	async_schedule(async_run_ops, sh); -} -#else -#define raid_run_ops __raid_run_ops -#endif -  static int grow_one_stripe(struct r5conf *conf)  {  	struct stripe_head *sh; @@ -1506,9 +1479,6 @@ static int grow_one_stripe(struct r5conf *conf)  		return 0;  	sh->raid_conf = conf; -	#ifdef CONFIG_MULTICORE_RAID456 -	init_waitqueue_head(&sh->ops.wait_for_ops); -	#endif  	spin_lock_init(&sh->stripe_lock); @@ -1627,9 +1597,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)  			break;  		nsh->raid_conf = conf; -		#ifdef CONFIG_MULTICORE_RAID456 -		init_waitqueue_head(&nsh->ops.wait_for_ops); -		#endif  		spin_lock_init(&nsh->stripe_lock);  		list_add(&nsh->lru, &newstripes); @@ -2316,17 +2283,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  	int level = conf->level;  	if (rcw) { -		/* if we are not expanding this is a proper write request, and -		 * there will be bios with new data to be drained into the -		 * stripe cache -		 */ -		if (!expand) { -			sh->reconstruct_state = reconstruct_state_drain_run; -			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); -		} else -			sh->reconstruct_state = reconstruct_state_run; - -		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i]; @@ -2339,6 +2295,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  				s->locked++;  			}  		} +		/* if we are not expanding this is a proper write request, and +		 * there will be bios with new data to be drained into the +		 * stripe cache +		 */ +		if (!expand) { +			if (!s->locked) +				/* False alarm, nothing to do */ +				return; +			sh->reconstruct_state = reconstruct_state_drain_run; +			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); +		} else +			sh->reconstruct_state = reconstruct_state_run; + +		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); +  		if (s->locked + conf->max_degraded == disks)  			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))  				atomic_inc(&conf->pending_full_writes); @@ -2347,11 +2318,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||  			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); -		sh->reconstruct_state = reconstruct_state_prexor_drain_run; -		set_bit(STRIPE_OP_PREXOR, &s->ops_request); -		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); -		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); -  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i];  			if (i == pd_idx) @@ -2366,6 +2332,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  				s->locked++;  			}  		} +		if (!s->locked) +			/* False alarm - nothing to do */ +			return; +		sh->reconstruct_state = reconstruct_state_prexor_drain_run; +		set_bit(STRIPE_OP_PREXOR, &s->ops_request); +		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); +		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);  	}  	/* keep the parity disk(s) locked while asynchronous operations @@ -2600,6 +2573,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,  	int i;  	clear_bit(STRIPE_SYNCING, &sh->state); +	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) +		wake_up(&conf->wait_for_overlap);  	s->syncing = 0;  	s->replacing = 0;  	/* There is nothing more to do for sync/check/repair. @@ -2773,6 +2748,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,  {  	int i;  	struct r5dev *dev; +	int discard_pending = 0;  	for (i = disks; i--; )  		if (sh->dev[i].written) { @@ -2801,9 +2777,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,  						STRIPE_SECTORS,  					 !test_bit(STRIPE_DEGRADED, &sh->state),  						0); -			} -		} else if (test_bit(R5_Discard, &sh->dev[i].flags)) -			clear_bit(R5_Discard, &sh->dev[i].flags); +			} else if (test_bit(R5_Discard, &dev->flags)) +				discard_pending = 1; +		} +	if (!discard_pending && +	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { +		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); +		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); +		if (sh->qd_idx >= 0) { +			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); +			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); +		} +		/* now that discard is done we can proceed with any sync */ +		clear_bit(STRIPE_DISCARD, &sh->state); +		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) +			set_bit(STRIPE_HANDLE, &sh->state); + +	}  	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))  		if (atomic_dec_and_test(&conf->pending_full_writes)) @@ -2862,8 +2852,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,  	set_bit(STRIPE_HANDLE, &sh->state);  	if (rmw < rcw && rmw > 0) {  		/* prefer read-modify-write, but need to get some data */ -		blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", -				  (unsigned long long)sh->sector, rmw); +		if (conf->mddev->queue) +			blk_add_trace_msg(conf->mddev->queue, +					  "raid5 rmw %llu %d", +					  (unsigned long long)sh->sector, rmw);  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i];  			if ((dev->towrite || i == sh->pd_idx) && @@ -2913,7 +2905,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,  				}  			}  		} -		if (rcw) +		if (rcw && conf->mddev->queue)  			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",  					  (unsigned long long)sh->sector,  					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); @@ -3453,9 +3445,15 @@ static void handle_stripe(struct stripe_head *sh)  		return;  	} -	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { -		set_bit(STRIPE_SYNCING, &sh->state); -		clear_bit(STRIPE_INSYNC, &sh->state); +	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { +		spin_lock(&sh->stripe_lock); +		/* Cannot process 'sync' concurrently with 'discard' */ +		if (!test_bit(STRIPE_DISCARD, &sh->state) && +		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { +			set_bit(STRIPE_SYNCING, &sh->state); +			clear_bit(STRIPE_INSYNC, &sh->state); +		} +		spin_unlock(&sh->stripe_lock);  	}  	clear_bit(STRIPE_DELAYED, &sh->state); @@ -3615,6 +3613,8 @@ static void handle_stripe(struct stripe_head *sh)  	    test_bit(STRIPE_INSYNC, &sh->state)) {  		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);  		clear_bit(STRIPE_SYNCING, &sh->state); +		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) +			wake_up(&conf->wait_for_overlap);  	}  	/* If the failed drives are just a ReadError, then we might need @@ -4018,9 +4018,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)  		atomic_inc(&conf->active_aligned_reads);  		spin_unlock_irq(&conf->device_lock); -		trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), -				      align_bi, disk_devt(mddev->gendisk), -				      raid_bio->bi_sector); +		if (mddev->gendisk) +			trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), +					      align_bi, disk_devt(mddev->gendisk), +					      raid_bio->bi_sector);  		generic_make_request(align_bi);  		return 1;  	} else { @@ -4114,7 +4115,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)  		}  		spin_unlock_irq(&conf->device_lock);  	} -	trace_block_unplug(mddev->queue, cnt, !from_schedule); +	if (mddev->queue) +		trace_block_unplug(mddev->queue, cnt, !from_schedule);  	kfree(cb);  } @@ -4177,6 +4179,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)  		sh = get_active_stripe(conf, logical_sector, 0, 0, 0);  		prepare_to_wait(&conf->wait_for_overlap, &w,  				TASK_UNINTERRUPTIBLE); +		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); +		if (test_bit(STRIPE_SYNCING, &sh->state)) { +			release_stripe(sh); +			schedule(); +			goto again; +		} +		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);  		spin_lock_irq(&sh->stripe_lock);  		for (d = 0; d < conf->raid_disks; d++) {  			if (d == sh->pd_idx || d == sh->qd_idx) @@ -4189,6 +4198,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)  				goto again;  			}  		} +		set_bit(STRIPE_DISCARD, &sh->state);  		finish_wait(&conf->wait_for_overlap, &w);  		for (d = 0; d < conf->raid_disks; d++) {  			if (d == sh->pd_idx || d == sh->qd_idx)  |