diff options
Diffstat (limited to 'drivers/md/raid1.c')
| -rw-r--r-- | drivers/md/raid1.c | 174 | 
1 files changed, 131 insertions, 43 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index ede2461e79c..cc24f0cb7ee 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -135,7 +135,7 @@ out_free_pages:  			put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);  	j = -1;  out_free_bio: -	while ( ++j < pi->raid_disks ) +	while (++j < pi->raid_disks)  		bio_put(r1_bio->bios[j]);  	r1bio_pool_free(r1_bio, data);  	return NULL; @@ -164,7 +164,7 @@ static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)  {  	int i; -	for (i = 0; i < conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		struct bio **bio = r1_bio->bios + i;  		if (!BIO_SPECIAL(*bio))  			bio_put(*bio); @@ -185,7 +185,7 @@ static void put_buf(struct r1bio *r1_bio)  	struct r1conf *conf = r1_bio->mddev->private;  	int i; -	for (i=0; i<conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		struct bio *bio = r1_bio->bios[i];  		if (bio->bi_end_io)  			rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev); @@ -277,13 +277,14 @@ static inline void update_head_pos(int disk, struct r1bio *r1_bio)  static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)  {  	int mirror; -	int raid_disks = r1_bio->mddev->raid_disks; +	struct r1conf *conf = r1_bio->mddev->private; +	int raid_disks = conf->raid_disks; -	for (mirror = 0; mirror < raid_disks; mirror++) +	for (mirror = 0; mirror < raid_disks * 2; mirror++)  		if (r1_bio->bios[mirror] == bio)  			break; -	BUG_ON(mirror == raid_disks); +	BUG_ON(mirror == raid_disks * 2);  	update_head_pos(mirror, r1_bio);  	return mirror; @@ -390,6 +391,11 @@ static void raid1_end_write_request(struct bio *bio, int error)  	if (!uptodate) {  		set_bit(WriteErrorSeen,  			&conf->mirrors[mirror].rdev->flags); +		if (!test_and_set_bit(WantReplacement, +				      &conf->mirrors[mirror].rdev->flags)) +			set_bit(MD_RECOVERY_NEEDED, & +				conf->mddev->recovery); +  		set_bit(R1BIO_WriteError, &r1_bio->state);  	} else {  		/* @@ -505,7 +511,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect  		start_disk = conf->last_used;  	} -	for (i = 0 ; i < conf->raid_disks ; i++) { +	for (i = 0 ; i < conf->raid_disks * 2 ; i++) {  		sector_t dist;  		sector_t first_bad;  		int bad_sectors; @@ -609,7 +615,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)  		return 1;  	rcu_read_lock(); -	for (i = 0; i < mddev->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks; i++) {  		struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);  		if (rdev && !test_bit(Faulty, &rdev->flags)) {  			struct request_queue *q = bdev_get_queue(rdev->bdev); @@ -974,7 +980,7 @@ read_again:  	 */  	plugged = mddev_check_plugged(mddev); -	disks = conf->raid_disks; +	disks = conf->raid_disks * 2;   retry_write:  	blocked_rdev = NULL;  	rcu_read_lock(); @@ -988,7 +994,8 @@ read_again:  		}  		r1_bio->bios[i] = NULL;  		if (!rdev || test_bit(Faulty, &rdev->flags)) { -			set_bit(R1BIO_Degraded, &r1_bio->state); +			if (i < conf->raid_disks) +				set_bit(R1BIO_Degraded, &r1_bio->state);  			continue;  		} @@ -1263,6 +1270,25 @@ static int raid1_spare_active(struct mddev *mddev)  	 */  	for (i = 0; i < conf->raid_disks; i++) {  		struct md_rdev *rdev = conf->mirrors[i].rdev; +		struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev; +		if (repl +		    && repl->recovery_offset == MaxSector +		    && !test_bit(Faulty, &repl->flags) +		    && !test_and_set_bit(In_sync, &repl->flags)) { +			/* replacement has just become active */ +			if (!rdev || +			    !test_and_clear_bit(In_sync, &rdev->flags)) +				count++; +			if (rdev) { +				/* Replaced device not technically +				 * faulty, but we need to be sure +				 * it gets removed and never re-added +				 */ +				set_bit(Faulty, &rdev->flags); +				sysfs_notify_dirent_safe( +					rdev->sysfs_state); +			} +		}  		if (rdev  		    && !test_bit(Faulty, &rdev->flags)  		    && !test_and_set_bit(In_sync, &rdev->flags)) { @@ -1286,7 +1312,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  	int mirror = 0;  	struct mirror_info *p;  	int first = 0; -	int last = mddev->raid_disks - 1; +	int last = conf->raid_disks - 1;  	if (mddev->recovery_disabled == conf->recovery_disabled)  		return -EBUSY; @@ -1294,8 +1320,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  	if (rdev->raid_disk >= 0)  		first = last = rdev->raid_disk; -	for (mirror = first; mirror <= last; mirror++) -		if ( !(p=conf->mirrors+mirror)->rdev) { +	for (mirror = first; mirror <= last; mirror++) { +		p = conf->mirrors+mirror; +		if (!p->rdev) {  			disk_stack_limits(mddev->gendisk, rdev->bdev,  					  rdev->data_offset << 9); @@ -1322,21 +1349,35 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)  			rcu_assign_pointer(p->rdev, rdev);  			break;  		} +		if (test_bit(WantReplacement, &p->rdev->flags) && +		    p[conf->raid_disks].rdev == NULL) { +			/* Add this device as a replacement */ +			clear_bit(In_sync, &rdev->flags); +			set_bit(Replacement, &rdev->flags); +			rdev->raid_disk = mirror; +			err = 0; +			conf->fullsync = 1; +			rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); +			break; +		} +	}  	md_integrity_add_rdev(rdev, mddev);  	print_conf(conf);  	return err;  } -static int raid1_remove_disk(struct mddev *mddev, int number) +static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)  {  	struct r1conf *conf = mddev->private;  	int err = 0; -	struct md_rdev *rdev; +	int number = rdev->raid_disk;  	struct mirror_info *p = conf->mirrors+ number; +	if (rdev != p->rdev) +		p = conf->mirrors + conf->raid_disks + number; +  	print_conf(conf); -	rdev = p->rdev; -	if (rdev) { +	if (rdev == p->rdev) {  		if (test_bit(In_sync, &rdev->flags) ||  		    atomic_read(&rdev->nr_pending)) {  			err = -EBUSY; @@ -1358,7 +1399,21 @@ static int raid1_remove_disk(struct mddev *mddev, int number)  			err = -EBUSY;  			p->rdev = rdev;  			goto abort; -		} +		} else if (conf->mirrors[conf->raid_disks + number].rdev) { +			/* We just removed a device that is being replaced. +			 * Move down the replacement.  We drain all IO before +			 * doing this to avoid confusion. +			 */ +			struct md_rdev *repl = +				conf->mirrors[conf->raid_disks + number].rdev; +			raise_barrier(conf); +			clear_bit(Replacement, &repl->flags); +			p->rdev = repl; +			conf->mirrors[conf->raid_disks + number].rdev = NULL; +			lower_barrier(conf); +			clear_bit(WantReplacement, &rdev->flags); +		} else +			clear_bit(WantReplacement, &rdev->flags);  		err = md_integrity_register(mddev);  	}  abort: @@ -1411,6 +1466,10 @@ static void end_sync_write(struct bio *bio, int error)  		} while (sectors_to_go > 0);  		set_bit(WriteErrorSeen,  			&conf->mirrors[mirror].rdev->flags); +		if (!test_and_set_bit(WantReplacement, +				      &conf->mirrors[mirror].rdev->flags)) +			set_bit(MD_RECOVERY_NEEDED, & +				mddev->recovery);  		set_bit(R1BIO_WriteError, &r1_bio->state);  	} else if (is_badblock(conf->mirrors[mirror].rdev,  			       r1_bio->sector, @@ -1441,8 +1500,13 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,  	if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))  		/* success */  		return 1; -	if (rw == WRITE) +	if (rw == WRITE) {  		set_bit(WriteErrorSeen, &rdev->flags); +		if (!test_and_set_bit(WantReplacement, +				      &rdev->flags)) +			set_bit(MD_RECOVERY_NEEDED, & +				rdev->mddev->recovery); +	}  	/* need to record an error - either for the block or the device */  	if (!rdev_set_badblocks(rdev, sector, sectors, 0))  		md_error(rdev->mddev, rdev); @@ -1493,7 +1557,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)  				}  			}  			d++; -			if (d == conf->raid_disks) +			if (d == conf->raid_disks * 2)  				d = 0;  		} while (!success && d != r1_bio->read_disk); @@ -1510,7 +1574,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)  			       mdname(mddev),  			       bdevname(bio->bi_bdev, b),  			       (unsigned long long)r1_bio->sector); -			for (d = 0; d < conf->raid_disks; d++) { +			for (d = 0; d < conf->raid_disks * 2; d++) {  				rdev = conf->mirrors[d].rdev;  				if (!rdev || test_bit(Faulty, &rdev->flags))  					continue; @@ -1536,7 +1600,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)  		/* write it back and re-read */  		while (d != r1_bio->read_disk) {  			if (d == 0) -				d = conf->raid_disks; +				d = conf->raid_disks * 2;  			d--;  			if (r1_bio->bios[d]->bi_end_io != end_sync_read)  				continue; @@ -1551,7 +1615,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)  		d = start;  		while (d != r1_bio->read_disk) {  			if (d == 0) -				d = conf->raid_disks; +				d = conf->raid_disks * 2;  			d--;  			if (r1_bio->bios[d]->bi_end_io != end_sync_read)  				continue; @@ -1584,7 +1648,7 @@ static int process_checks(struct r1bio *r1_bio)  	int primary;  	int i; -	for (primary = 0; primary < conf->raid_disks; primary++) +	for (primary = 0; primary < conf->raid_disks * 2; primary++)  		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&  		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {  			r1_bio->bios[primary]->bi_end_io = NULL; @@ -1592,7 +1656,7 @@ static int process_checks(struct r1bio *r1_bio)  			break;  		}  	r1_bio->read_disk = primary; -	for (i = 0; i < conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		int j;  		int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);  		struct bio *pbio = r1_bio->bios[primary]; @@ -1656,7 +1720,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)  {  	struct r1conf *conf = mddev->private;  	int i; -	int disks = conf->raid_disks; +	int disks = conf->raid_disks * 2;  	struct bio *bio, *wbio;  	bio = r1_bio->bios[r1_bio->read_disk]; @@ -1737,7 +1801,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,  				success = 1;  			else {  				d++; -				if (d == conf->raid_disks) +				if (d == conf->raid_disks * 2)  					d = 0;  			}  		} while (!success && d != read_disk); @@ -1753,7 +1817,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,  		start = d;  		while (d != read_disk) {  			if (d==0) -				d = conf->raid_disks; +				d = conf->raid_disks * 2;  			d--;  			rdev = conf->mirrors[d].rdev;  			if (rdev && @@ -1765,7 +1829,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,  		while (d != read_disk) {  			char b[BDEVNAME_SIZE];  			if (d==0) -				d = conf->raid_disks; +				d = conf->raid_disks * 2;  			d--;  			rdev = conf->mirrors[d].rdev;  			if (rdev && @@ -1887,7 +1951,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio  {  	int m;  	int s = r1_bio->sectors; -	for (m = 0; m < conf->raid_disks ; m++) { +	for (m = 0; m < conf->raid_disks * 2 ; m++) {  		struct md_rdev *rdev = conf->mirrors[m].rdev;  		struct bio *bio = r1_bio->bios[m];  		if (bio->bi_end_io == NULL) @@ -1909,7 +1973,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio  static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)  {  	int m; -	for (m = 0; m < conf->raid_disks ; m++) +	for (m = 0; m < conf->raid_disks * 2 ; m++)  		if (r1_bio->bios[m] == IO_MADE_GOOD) {  			struct md_rdev *rdev = conf->mirrors[m].rdev;  			rdev_clear_badblocks(rdev, @@ -2184,7 +2248,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp  	r1_bio->state = 0;  	set_bit(R1BIO_IsSync, &r1_bio->state); -	for (i=0; i < conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		struct md_rdev *rdev;  		bio = r1_bio->bios[i]; @@ -2203,7 +2267,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp  		rdev = rcu_dereference(conf->mirrors[i].rdev);  		if (rdev == NULL ||  		    test_bit(Faulty, &rdev->flags)) { -			still_degraded = 1; +			if (i < conf->raid_disks) +				still_degraded = 1;  		} else if (!test_bit(In_sync, &rdev->flags)) {  			bio->bi_rw = WRITE;  			bio->bi_end_io = end_sync_write; @@ -2254,7 +2319,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp  		 * need to mark them bad on all write targets  		 */  		int ok = 1; -		for (i = 0 ; i < conf->raid_disks ; i++) +		for (i = 0 ; i < conf->raid_disks * 2 ; i++)  			if (r1_bio->bios[i]->bi_end_io == end_sync_write) {  				struct md_rdev *rdev =  					rcu_dereference(conf->mirrors[i].rdev); @@ -2323,7 +2388,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp  				len = sync_blocks<<9;  		} -		for (i=0 ; i < conf->raid_disks; i++) { +		for (i = 0 ; i < conf->raid_disks * 2; i++) {  			bio = r1_bio->bios[i];  			if (bio->bi_end_io) {  				page = bio->bi_io_vec[bio->bi_vcnt].bv_page; @@ -2356,7 +2421,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp  	 */  	if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {  		atomic_set(&r1_bio->remaining, read_targets); -		for (i=0; i<conf->raid_disks; i++) { +		for (i = 0; i < conf->raid_disks * 2; i++) {  			bio = r1_bio->bios[i];  			if (bio->bi_end_io == end_sync_read) {  				md_sync_acct(bio->bi_bdev, nr_sectors); @@ -2393,7 +2458,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)  	if (!conf)  		goto abort; -	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, +	conf->mirrors = kzalloc(sizeof(struct mirror_info) +				* mddev->raid_disks * 2,  				 GFP_KERNEL);  	if (!conf->mirrors)  		goto abort; @@ -2405,7 +2471,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)  	conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);  	if (!conf->poolinfo)  		goto abort; -	conf->poolinfo->raid_disks = mddev->raid_disks; +	conf->poolinfo->raid_disks = mddev->raid_disks * 2;  	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,  					  r1bio_pool_free,  					  conf->poolinfo); @@ -2414,14 +2480,20 @@ static struct r1conf *setup_conf(struct mddev *mddev)  	conf->poolinfo->mddev = mddev; +	err = -EINVAL;  	spin_lock_init(&conf->device_lock);  	list_for_each_entry(rdev, &mddev->disks, same_set) {  		int disk_idx = rdev->raid_disk;  		if (disk_idx >= mddev->raid_disks  		    || disk_idx < 0)  			continue; -		disk = conf->mirrors + disk_idx; +		if (test_bit(Replacement, &rdev->flags)) +			disk = conf->mirrors + conf->raid_disks + disk_idx; +		else +			disk = conf->mirrors + disk_idx; +		if (disk->rdev) +			goto abort;  		disk->rdev = rdev;  		disk->head_position = 0; @@ -2437,11 +2509,27 @@ static struct r1conf *setup_conf(struct mddev *mddev)  	conf->pending_count = 0;  	conf->recovery_disabled = mddev->recovery_disabled - 1; +	err = -EIO;  	conf->last_used = -1; -	for (i = 0; i < conf->raid_disks; i++) { +	for (i = 0; i < conf->raid_disks * 2; i++) {  		disk = conf->mirrors + i; +		if (i < conf->raid_disks && +		    disk[conf->raid_disks].rdev) { +			/* This slot has a replacement. */ +			if (!disk->rdev) { +				/* No original, just make the replacement +				 * a recovering spare +				 */ +				disk->rdev = +					disk[conf->raid_disks].rdev; +				disk[conf->raid_disks].rdev = NULL; +			} else if (!test_bit(In_sync, &disk->rdev->flags)) +				/* Original is not in_sync - bad */ +				goto abort; +		} +  		if (!disk->rdev ||  		    !test_bit(In_sync, &disk->rdev->flags)) {  			disk->head_position = 0; @@ -2455,7 +2543,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)  			conf->last_used = i;  	} -	err = -EIO;  	if (conf->last_used < 0) {  		printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",  		       mdname(mddev)); @@ -2665,7 +2752,7 @@ static int raid1_reshape(struct mddev *mddev)  	if (!newpoolinfo)  		return -ENOMEM;  	newpoolinfo->mddev = mddev; -	newpoolinfo->raid_disks = raid_disks; +	newpoolinfo->raid_disks = raid_disks * 2;  	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,  				 r1bio_pool_free, newpoolinfo); @@ -2673,7 +2760,8 @@ static int raid1_reshape(struct mddev *mddev)  		kfree(newpoolinfo);  		return -ENOMEM;  	} -	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL); +	newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2, +			     GFP_KERNEL);  	if (!newmirrors) {  		kfree(newpoolinfo);  		mempool_destroy(newpool);  |