diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 195 | 
1 files changed, 116 insertions, 79 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 0f4a70c43ff..9dd872000ce 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)  	else  		new->md_minor = MINOR(unit) >> MdpMinorShift; +	mutex_init(&new->open_mutex);  	mutex_init(&new->reconfig_mutex);  	INIT_LIST_HEAD(&new->disks);  	INIT_LIST_HEAD(&new->all_mddevs); @@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)  	}  	if (mddev->level != LEVEL_MULTIPATH) {  		int role; -		role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); +		if (rdev->desc_nr < 0 || +		    rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { +			role = 0xffff; +			rdev->desc_nr = -1; +		} else +			role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);  		switch(role) {  		case 0xffff: /* spare */  			break; @@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)  		if (rdev2->desc_nr+1 > max_dev)  			max_dev = rdev2->desc_nr+1; -	if (max_dev > le32_to_cpu(sb->max_dev)) +	if (max_dev > le32_to_cpu(sb->max_dev)) { +		int bmask;  		sb->max_dev = cpu_to_le32(max_dev); +		rdev->sb_size = max_dev * 2 + 256; +		bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; +		if (rdev->sb_size & bmask) +			rdev->sb_size = (rdev->sb_size | bmask) + 1; +	}  	for (i=0; i<max_dev;i++)  		sb->dev_roles[i] = cpu_to_le16(0xfffe); @@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)  static LIST_HEAD(pending_raid_disks); -static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) +/* + * Try to register data integrity profile for an mddev + * + * This is called when an array is started and after a disk has been kicked + * from the array. It only succeeds if all working and active component devices + * are integrity capable with matching profiles. + */ +int md_integrity_register(mddev_t *mddev) +{ +	mdk_rdev_t *rdev, *reference = NULL; + +	if (list_empty(&mddev->disks)) +		return 0; /* nothing to do */ +	if (blk_get_integrity(mddev->gendisk)) +		return 0; /* already registered */ +	list_for_each_entry(rdev, &mddev->disks, same_set) { +		/* skip spares and non-functional disks */ +		if (test_bit(Faulty, &rdev->flags)) +			continue; +		if (rdev->raid_disk < 0) +			continue; +		/* +		 * If at least one rdev is not integrity capable, we can not +		 * enable data integrity for the md device. +		 */ +		if (!bdev_get_integrity(rdev->bdev)) +			return -EINVAL; +		if (!reference) { +			/* Use the first rdev as the reference */ +			reference = rdev; +			continue; +		} +		/* does this rdev's profile match the reference profile? */ +		if (blk_integrity_compare(reference->bdev->bd_disk, +				rdev->bdev->bd_disk) < 0) +			return -EINVAL; +	} +	/* +	 * All component devices are integrity capable and have matching +	 * profiles, register the common profile for the md device. +	 */ +	if (blk_integrity_register(mddev->gendisk, +			bdev_get_integrity(reference->bdev)) != 0) { +		printk(KERN_ERR "md: failed to register integrity for %s\n", +			mdname(mddev)); +		return -EINVAL; +	} +	printk(KERN_NOTICE "md: data integrity on %s enabled\n", +		mdname(mddev)); +	return 0; +} +EXPORT_SYMBOL(md_integrity_register); + +/* Disable data integrity if non-capable/non-matching disk is being added */ +void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)  { -	struct mdk_personality *pers = mddev->pers; -	struct gendisk *disk = mddev->gendisk;  	struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); -	struct blk_integrity *bi_mddev = blk_get_integrity(disk); +	struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); -	/* Data integrity passthrough not supported on RAID 4, 5 and 6 */ -	if (pers && pers->level >= 4 && pers->level <= 6) +	if (!bi_mddev) /* nothing to do */  		return; - -	/* If rdev is integrity capable, register profile for mddev */ -	if (!bi_mddev && bi_rdev) { -		if (blk_integrity_register(disk, bi_rdev)) -			printk(KERN_ERR "%s: %s Could not register integrity!\n", -			       __func__, disk->disk_name); -		else -			printk(KERN_NOTICE "Enabling data integrity on %s\n", -			       disk->disk_name); +	if (rdev->raid_disk < 0) /* skip spares */  		return; -	} - -	/* Check that mddev and rdev have matching profiles */ -	if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { -		printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, -		       disk->disk_name, rdev->bdev->bd_disk->disk_name); -		printk(KERN_NOTICE "Disabling data integrity on %s\n", -		       disk->disk_name); -		blk_integrity_unregister(disk); -	} +	if (bi_rdev && blk_integrity_compare(mddev->gendisk, +					     rdev->bdev->bd_disk) >= 0) +		return; +	printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); +	blk_integrity_unregister(mddev->gendisk);  } +EXPORT_SYMBOL(md_integrity_add_rdev);  static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)  { @@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)  	/* May as well allow recovery to be retried once */  	mddev->recovery_disabled = 0; -	md_integrity_check(rdev, mddev);  	return 0;   fail: @@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb)  	__u8 *uuid;  	uuid = sb->set_uuid; -	printk(KERN_INFO "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" -			":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" -	       KERN_INFO "md:    Name: \"%s\" CT:%llu\n", +	printk(KERN_INFO +	       "md:  SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" +	       ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" +	       "md:    Name: \"%s\" CT:%llu\n",  		le32_to_cpu(sb->major_version),  		le32_to_cpu(sb->feature_map),  		uuid[0], uuid[1], uuid[2], uuid[3], @@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb)  		       & MD_SUPERBLOCK_1_TIME_SEC_MASK);  	uuid = sb->device_uuid; -	printk(KERN_INFO "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" +	printk(KERN_INFO +	       "md:       L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"  			" RO:%llu\n" -	       KERN_INFO "md:     Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" -			":%02x%02x%02x%02x%02x%02x\n" -	       KERN_INFO "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" -	       KERN_INFO "md:         (MaxDev:%u) \n", +	       "md:     Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" +	                ":%02x%02x%02x%02x%02x%02x\n" +	       "md:       (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" +	       "md:         (MaxDev:%u) \n",  		le32_to_cpu(sb->level),  		(unsigned long long)le64_to_cpu(sb->size),  		le32_to_cpu(sb->raid_disks), @@ -1923,17 +1975,14 @@ repeat:  		/* otherwise we have to go forward and ... */  		mddev->events ++;  		if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ -			/* .. if the array isn't clean, insist on an odd 'events' */ -			if ((mddev->events&1)==0) { -				mddev->events++; +			/* .. if the array isn't clean, an 'even' event must also go +			 * to spares. */ +			if ((mddev->events&1)==0)  				nospares = 0; -			}  		} else { -			/* otherwise insist on an even 'events' (for clean states) */ -			if ((mddev->events&1)) { -				mddev->events++; +			/* otherwise an 'odd' event must go to spares */ +			if ((mddev->events&1))  				nospares = 0; -			}  		}  	} @@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)  	ssize_t rv = len;  	struct mdk_personality *pers;  	void *priv; +	mdk_rdev_t *rdev;  	if (mddev->pers == NULL) {  		if (len == 0) @@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)  	mddev_suspend(mddev);  	mddev->pers->stop(mddev);  	module_put(mddev->pers->owner); +	/* Invalidate devices that are now superfluous */ +	list_for_each_entry(rdev, &mddev->disks, same_set) +		if (rdev->raid_disk >= mddev->raid_disks) { +			rdev->raid_disk = -1; +			clear_bit(In_sync, &rdev->flags); +		}  	mddev->pers = pers;  	mddev->private = priv;  	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); @@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)  		if (max < mddev->resync_min)  			return -EINVAL;  		if (max < mddev->resync_max && +		    mddev->ro == 0 &&  		    test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))  			return -EBUSY; @@ -3683,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)  	mddev->array_sectors = sectors;  	set_capacity(mddev->gendisk, mddev->array_sectors); -	if (mddev->pers) { -		struct block_device *bdev = bdget_disk(mddev->gendisk, 0); - -		if (bdev) { -			mutex_lock(&bdev->bd_inode->i_mutex); -			i_size_write(bdev->bd_inode, -				     (loff_t)mddev->array_sectors << 9); -			mutex_unlock(&bdev->bd_inode->i_mutex); -			bdput(bdev); -		} -	} +	if (mddev->pers) +		revalidate_disk(mddev->gendisk);  	return len;  } @@ -4046,10 +4094,6 @@ static int do_md_run(mddev_t * mddev)  	}  	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); -	if (pers->level >= 4 && pers->level <= 6) -		/* Cannot support integrity (yet) */ -		blk_integrity_unregister(mddev->gendisk); -  	if (mddev->reshape_position != MaxSector &&  	    pers->start_reshape == NULL) {  		/* This personality cannot handle reshaping... */ @@ -4187,6 +4231,7 @@ static int do_md_run(mddev_t * mddev)  	md_wakeup_thread(mddev->thread);  	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ +	revalidate_disk(mddev->gendisk);  	mddev->changed = 1;  	md_new_event(mddev);  	sysfs_notify_dirent(mddev->sysfs_state); @@ -4258,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)  	struct gendisk *disk = mddev->gendisk;  	mdk_rdev_t *rdev; +	mutex_lock(&mddev->open_mutex);  	if (atomic_read(&mddev->openers) > is_open) {  		printk("md: %s still in use.\n",mdname(mddev)); -		return -EBUSY; -	} - -	if (mddev->pers) { +		err = -EBUSY; +	} else if (mddev->pers) {  		if (mddev->sync_thread) {  			set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -4320,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)  		if (mode == 1)  			set_disk_ro(disk, 1);  		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); +		err = 0;  	} - +out: +	mutex_unlock(&mddev->open_mutex); +	if (err) +		return err;  	/*  	 * Free resources if final stop  	 */ @@ -4387,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)  	blk_integrity_unregister(disk);  	md_new_event(mddev);  	sysfs_notify_dirent(mddev->sysfs_state); -out:  	return err;  } @@ -5085,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)  			return -ENOSPC;  	}  	rv = mddev->pers->resize(mddev, num_sectors); -	if (!rv) { -		struct block_device *bdev; - -		bdev = bdget_disk(mddev->gendisk, 0); -		if (bdev) { -			mutex_lock(&bdev->bd_inode->i_mutex); -			i_size_write(bdev->bd_inode, -				     (loff_t)mddev->array_sectors << 9); -			mutex_unlock(&bdev->bd_inode->i_mutex); -			bdput(bdev); -		} -	} +	if (!rv) +		revalidate_disk(mddev->gendisk);  	return rv;  } @@ -5482,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)  	}  	BUG_ON(mddev != bdev->bd_disk->private_data); -	if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) +	if ((err = mutex_lock_interruptible(&mddev->open_mutex)))  		goto out;  	err = 0;  	atomic_inc(&mddev->openers); -	mddev_unlock(mddev); +	mutex_unlock(&mddev->open_mutex);  	check_disk_change(bdev);   out:  |