diff options
Diffstat (limited to 'drivers/md/raid5.c')
| -rw-r--r-- | drivers/md/raid5.c | 143 | 
1 files changed, 103 insertions, 40 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3ee2912889e..4a7be455d6d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,6 +184,8 @@ static void return_io(struct bio *return_bi)  		return_bi = bi->bi_next;  		bi->bi_next = NULL;  		bi->bi_size = 0; +		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), +					 bi, 0);  		bio_endio(bi, 0);  		bi = return_bi;  	} @@ -671,9 +673,11 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)  			bi->bi_next = NULL;  			if (rrdev)  				set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); -			trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), -					      bi, disk_devt(conf->mddev->gendisk), -					      sh->dev[i].sector); + +			if (conf->mddev->gendisk) +				trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), +						      bi, disk_devt(conf->mddev->gendisk), +						      sh->dev[i].sector);  			generic_make_request(bi);  		}  		if (rrdev) { @@ -701,9 +705,10 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)  			rbi->bi_io_vec[0].bv_offset = 0;  			rbi->bi_size = STRIPE_SIZE;  			rbi->bi_next = NULL; -			trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), -					      rbi, disk_devt(conf->mddev->gendisk), -					      sh->dev[i].sector); +			if (conf->mddev->gendisk) +				trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), +						      rbi, disk_devt(conf->mddev->gendisk), +						      sh->dev[i].sector);  			generic_make_request(rbi);  		}  		if (!rdev && !rrdev) { @@ -1882,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error)  					&rdev->mddev->recovery);  		} else if (is_badblock(rdev, sh->sector,  				       STRIPE_SECTORS, -				       &first_bad, &bad_sectors)) +				       &first_bad, &bad_sectors)) {  			set_bit(R5_MadeGood, &sh->dev[i].flags); +			if (test_bit(R5_ReadError, &sh->dev[i].flags)) +				/* That was a successful write so make +				 * sure it looks like we already did +				 * a re-write. +				 */ +				set_bit(R5_ReWrite, &sh->dev[i].flags); +		}  	}  	rdev_dec_pending(rdev, conf->mddev); @@ -2280,17 +2292,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  	int level = conf->level;  	if (rcw) { -		/* if we are not expanding this is a proper write request, and -		 * there will be bios with new data to be drained into the -		 * stripe cache -		 */ -		if (!expand) { -			sh->reconstruct_state = reconstruct_state_drain_run; -			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); -		} else -			sh->reconstruct_state = reconstruct_state_run; - -		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i]; @@ -2303,6 +2304,21 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  				s->locked++;  			}  		} +		/* if we are not expanding this is a proper write request, and +		 * there will be bios with new data to be drained into the +		 * stripe cache +		 */ +		if (!expand) { +			if (!s->locked) +				/* False alarm, nothing to do */ +				return; +			sh->reconstruct_state = reconstruct_state_drain_run; +			set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); +		} else +			sh->reconstruct_state = reconstruct_state_run; + +		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); +  		if (s->locked + conf->max_degraded == disks)  			if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))  				atomic_inc(&conf->pending_full_writes); @@ -2311,11 +2327,6 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||  			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); -		sh->reconstruct_state = reconstruct_state_prexor_drain_run; -		set_bit(STRIPE_OP_PREXOR, &s->ops_request); -		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); -		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request); -  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i];  			if (i == pd_idx) @@ -2330,6 +2341,13 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,  				s->locked++;  			}  		} +		if (!s->locked) +			/* False alarm - nothing to do */ +			return; +		sh->reconstruct_state = reconstruct_state_prexor_drain_run; +		set_bit(STRIPE_OP_PREXOR, &s->ops_request); +		set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); +		set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);  	}  	/* keep the parity disk(s) locked while asynchronous operations @@ -2564,6 +2582,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,  	int i;  	clear_bit(STRIPE_SYNCING, &sh->state); +	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) +		wake_up(&conf->wait_for_overlap);  	s->syncing = 0;  	s->replacing = 0;  	/* There is nothing more to do for sync/check/repair. @@ -2737,6 +2757,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,  {  	int i;  	struct r5dev *dev; +	int discard_pending = 0;  	for (i = disks; i--; )  		if (sh->dev[i].written) { @@ -2765,9 +2786,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,  						STRIPE_SECTORS,  					 !test_bit(STRIPE_DEGRADED, &sh->state),  						0); -			} -		} else if (test_bit(R5_Discard, &sh->dev[i].flags)) -			clear_bit(R5_Discard, &sh->dev[i].flags); +			} else if (test_bit(R5_Discard, &dev->flags)) +				discard_pending = 1; +		} +	if (!discard_pending && +	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { +		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags); +		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); +		if (sh->qd_idx >= 0) { +			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags); +			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags); +		} +		/* now that discard is done we can proceed with any sync */ +		clear_bit(STRIPE_DISCARD, &sh->state); +		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) +			set_bit(STRIPE_HANDLE, &sh->state); + +	}  	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))  		if (atomic_dec_and_test(&conf->pending_full_writes)) @@ -2826,8 +2861,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,  	set_bit(STRIPE_HANDLE, &sh->state);  	if (rmw < rcw && rmw > 0) {  		/* prefer read-modify-write, but need to get some data */ -		blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d", -				  (unsigned long long)sh->sector, rmw); +		if (conf->mddev->queue) +			blk_add_trace_msg(conf->mddev->queue, +					  "raid5 rmw %llu %d", +					  (unsigned long long)sh->sector, rmw);  		for (i = disks; i--; ) {  			struct r5dev *dev = &sh->dev[i];  			if ((dev->towrite || i == sh->pd_idx) && @@ -2877,7 +2914,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,  				}  			}  		} -		if (rcw) +		if (rcw && conf->mddev->queue)  			blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",  					  (unsigned long long)sh->sector,  					  rcw, qread, test_bit(STRIPE_DELAYED, &sh->state)); @@ -3417,9 +3454,15 @@ static void handle_stripe(struct stripe_head *sh)  		return;  	} -	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { -		set_bit(STRIPE_SYNCING, &sh->state); -		clear_bit(STRIPE_INSYNC, &sh->state); +	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { +		spin_lock(&sh->stripe_lock); +		/* Cannot process 'sync' concurrently with 'discard' */ +		if (!test_bit(STRIPE_DISCARD, &sh->state) && +		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { +			set_bit(STRIPE_SYNCING, &sh->state); +			clear_bit(STRIPE_INSYNC, &sh->state); +		} +		spin_unlock(&sh->stripe_lock);  	}  	clear_bit(STRIPE_DELAYED, &sh->state); @@ -3579,6 +3622,8 @@ static void handle_stripe(struct stripe_head *sh)  	    test_bit(STRIPE_INSYNC, &sh->state)) {  		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);  		clear_bit(STRIPE_SYNCING, &sh->state); +		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags)) +			wake_up(&conf->wait_for_overlap);  	}  	/* If the failed drives are just a ReadError, then we might need @@ -3878,6 +3923,8 @@ static void raid5_align_endio(struct bio *bi, int error)  	rdev_dec_pending(rdev, conf->mddev);  	if (!error && uptodate) { +		trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), +					 raid_bi, 0);  		bio_endio(raid_bi, 0);  		if (atomic_dec_and_test(&conf->active_aligned_reads))  			wake_up(&conf->wait_for_stripe); @@ -3982,9 +4029,10 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)  		atomic_inc(&conf->active_aligned_reads);  		spin_unlock_irq(&conf->device_lock); -		trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), -				      align_bi, disk_devt(mddev->gendisk), -				      raid_bio->bi_sector); +		if (mddev->gendisk) +			trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), +					      align_bi, disk_devt(mddev->gendisk), +					      raid_bio->bi_sector);  		generic_make_request(align_bi);  		return 1;  	} else { @@ -4078,7 +4126,8 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)  		}  		spin_unlock_irq(&conf->device_lock);  	} -	trace_block_unplug(mddev->queue, cnt, !from_schedule); +	if (mddev->queue) +		trace_block_unplug(mddev->queue, cnt, !from_schedule);  	kfree(cb);  } @@ -4141,6 +4190,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)  		sh = get_active_stripe(conf, logical_sector, 0, 0, 0);  		prepare_to_wait(&conf->wait_for_overlap, &w,  				TASK_UNINTERRUPTIBLE); +		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); +		if (test_bit(STRIPE_SYNCING, &sh->state)) { +			release_stripe(sh); +			schedule(); +			goto again; +		} +		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);  		spin_lock_irq(&sh->stripe_lock);  		for (d = 0; d < conf->raid_disks; d++) {  			if (d == sh->pd_idx || d == sh->qd_idx) @@ -4153,6 +4209,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)  				goto again;  			}  		} +		set_bit(STRIPE_DISCARD, &sh->state);  		finish_wait(&conf->wait_for_overlap, &w);  		for (d = 0; d < conf->raid_disks; d++) {  			if (d == sh->pd_idx || d == sh->qd_idx) @@ -4336,6 +4393,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)  		if ( rw == WRITE )  			md_write_end(mddev); +		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), +					 bi, 0);  		bio_endio(bi, 0);  	}  } @@ -4620,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int  		*skipped = 1;  		return rv;  	} -	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && -	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && -	    !conf->fullsync && sync_blocks >= STRIPE_SECTORS) { +	if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && +	    !conf->fullsync && +	    !bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && +	    sync_blocks >= STRIPE_SECTORS) {  		/* we can skip this block, and probably more */  		sync_blocks /= STRIPE_SECTORS;  		*skipped = 1; @@ -4712,8 +4772,11 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)  		handled++;  	}  	remaining = raid5_dec_bi_active_stripes(raid_bio); -	if (remaining == 0) +	if (remaining == 0) { +		trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), +					 raid_bio, 0);  		bio_endio(raid_bio, 0); +	}  	if (atomic_dec_and_test(&conf->active_aligned_reads))  		wake_up(&conf->wait_for_stripe);  	return handled;  |