diff options
| author | Kyungmin Park <kyungmin.park@samsung.com> | 2008-11-19 16:28:06 +0100 | 
|---|---|---|
| committer | Stefan Roese <sr@denx.de> | 2008-11-19 20:34:39 +0100 | 
| commit | c91a719daa331b5856109313371e4ece5ec06d96 (patch) | |
| tree | e9ff31b1864712270c65f87eef7db26cb4c55ddd | |
| parent | f412fefa079c6aa9a9763f6869bf787ea6bf6e1b (diff) | |
| download | olio-uboot-2014.01-c91a719daa331b5856109313371e4ece5ec06d96.tar.xz olio-uboot-2014.01-c91a719daa331b5856109313371e4ece5ec06d96.zip | |
UBI: Add basic UBI support to U-Boot (Part 5/8)
This patch adds basic UBI (Unsorted Block Image) support to U-Boot.
It's based on the Linux UBI version and basically has a "OS"
translation wrapper that defines most Linux specific calls
(spin_lock() etc.) into no-ops. Some source code parts have been
uncommented by "#ifdef UBI_LINUX". This makes it easier to compare
this version with the Linux version and simplifies future UBI
ports/bug-fixes from the Linux version.
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Stefan Roese <sr@denx.de>
| -rw-r--r-- | drivers/mtd/ubi/upd.c | 441 | ||||
| -rw-r--r-- | drivers/mtd/ubi/vmt.c | 862 | ||||
| -rw-r--r-- | drivers/mtd/ubi/vtbl.c | 837 | ||||
| -rw-r--r-- | drivers/mtd/ubi/wl.c | 1670 | 
4 files changed, 3810 insertions, 0 deletions
| diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c new file mode 100644 index 000000000..5f7ed7b2e --- /dev/null +++ b/drivers/mtd/ubi/upd.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + * + * Jan 2007: Alexander Schmidt, hacked per-volume update. + */ + +/* + * This file contains implementation of the volume update and atomic LEB change + * functionality. + * + * The update operation is based on the per-volume update marker which is + * stored in the volume table. The update marker is set before the update + * starts, and removed after the update has been finished. So if the update was + * interrupted by an unclean re-boot or due to some other reasons, the update + * marker stays on the flash media and UBI finds it when it attaches the MTD + * device next time. If the update marker is set for a volume, the volume is + * treated as damaged and most I/O operations are prohibited. Only a new update + * operation is allowed. + * + * Note, in general it is possible to implement the update operation as a + * transaction with a roll-back capability. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <asm/uaccess.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/** + * set_update_marker - set update marker. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function sets the update marker flag for volume @vol. Returns zero + * in case of success and a negative error code in case of failure. + */ +static int set_update_marker(struct ubi_device *ubi, struct ubi_volume *vol) +{ +	int err; +	struct ubi_vtbl_record vtbl_rec; + +	dbg_msg("set update marker for volume %d", vol->vol_id); + +	if (vol->upd_marker) { +		ubi_assert(ubi->vtbl[vol->vol_id].upd_marker); +		dbg_msg("already set"); +		return 0; +	} + +	memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], +	       sizeof(struct ubi_vtbl_record)); +	vtbl_rec.upd_marker = 1; + +	mutex_lock(&ubi->volumes_mutex); +	err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); +	mutex_unlock(&ubi->volumes_mutex); +	vol->upd_marker = 1; +	return err; +} + +/** + * clear_update_marker - clear update marker. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: new data size in bytes + * + * This function clears the update marker for volume @vol, sets new volume + * data size and clears the "corrupted" flag (static volumes only). Returns + * zero in case of success and a negative error code in case of failure. + */ +static int clear_update_marker(struct ubi_device *ubi, struct ubi_volume *vol, +			       long long bytes) +{ +	int err; +	uint64_t tmp; +	struct ubi_vtbl_record vtbl_rec; + +	dbg_msg("clear update marker for volume %d", vol->vol_id); + +	memcpy(&vtbl_rec, &ubi->vtbl[vol->vol_id], +	       sizeof(struct ubi_vtbl_record)); +	ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); +	vtbl_rec.upd_marker = 0; + +	if (vol->vol_type == UBI_STATIC_VOLUME) { +		vol->corrupted = 0; +		vol->used_bytes = tmp = bytes; +		vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); +		vol->used_ebs = tmp; +		if (vol->last_eb_bytes) +			vol->used_ebs += 1; +		else +			vol->last_eb_bytes = vol->usable_leb_size; +	} + +	mutex_lock(&ubi->volumes_mutex); +	err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); +	mutex_unlock(&ubi->volumes_mutex); +	vol->upd_marker = 0; +	return err; +} + +/** + * ubi_start_update - start volume update. + * @ubi: UBI device description object + * @vol: volume description object + * @bytes: update bytes + * + * This function starts volume update operation. If @bytes is zero, the volume + * is just wiped out. Returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_start_update(struct ubi_device *ubi, struct ubi_volume *vol, +		     long long bytes) +{ +	int i, err; +	uint64_t tmp; + +	dbg_msg("start update of volume %d, %llu bytes", vol->vol_id, bytes); +	ubi_assert(!vol->updating && !vol->changing_leb); +	vol->updating = 1; + +	err = set_update_marker(ubi, vol); +	if (err) +		return err; + +	/* Before updating - wipe out the volume */ +	for (i = 0; i < vol->reserved_pebs; i++) { +		err = ubi_eba_unmap_leb(ubi, vol, i); +		if (err) +			return err; +	} + +	if (bytes == 0) { +		err = clear_update_marker(ubi, vol, 0); +		if (err) +			return err; +		err = ubi_wl_flush(ubi); +		if (!err) +			vol->updating = 0; +	} + +	vol->upd_buf = vmalloc(ubi->leb_size); +	if (!vol->upd_buf) +		return -ENOMEM; + +	tmp = bytes; +	vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); +	vol->upd_ebs += tmp; +	vol->upd_bytes = bytes; +	vol->upd_received = 0; +	return 0; +} + +/** + * ubi_start_leb_change - start atomic LEB change. + * @ubi: UBI device description object + * @vol: volume description object + * @req: operation request + * + * This function starts atomic LEB change operation. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, +			 const struct ubi_leb_change_req *req) +{ +	ubi_assert(!vol->updating && !vol->changing_leb); + +	dbg_msg("start changing LEB %d:%d, %u bytes", +		vol->vol_id, req->lnum, req->bytes); +	if (req->bytes == 0) +		return ubi_eba_atomic_leb_change(ubi, vol, req->lnum, NULL, 0, +						 req->dtype); + +	vol->upd_bytes = req->bytes; +	vol->upd_received = 0; +	vol->changing_leb = 1; +	vol->ch_lnum = req->lnum; +	vol->ch_dtype = req->dtype; + +	vol->upd_buf = vmalloc(req->bytes); +	if (!vol->upd_buf) +		return -ENOMEM; + +	return 0; +} + +/** + * write_leb - write update data. + * @ubi: UBI device description object + * @vol: volume description object + * @lnum: logical eraseblock number + * @buf: data to write + * @len: data size + * @used_ebs: how many logical eraseblocks will this volume contain (static + * volumes only) + * + * This function writes update data to corresponding logical eraseblock. In + * case of dynamic volume, this function checks if the data contains 0xFF bytes + * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole + * buffer contains only 0xFF bytes, the LEB is left unmapped. + * + * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is + * that we want to make sure that more data may be appended to the logical + * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and + * this PEB won't be writable anymore. So if one writes the file-system image + * to the UBI volume where 0xFFs mean free space - UBI makes sure this free + * space is writable after the update. + * + * We do not do this for static volumes because they are read-only. But this + * also cannot be done because we have to store per-LEB CRC and the correct + * data length. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, +		     void *buf, int len, int used_ebs) +{ +	int err; + +	if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +		int l = ALIGN(len, ubi->min_io_size); + +		memset(buf + len, 0xFF, l - len); +		len = ubi_calc_data_len(ubi, buf, l); +		if (len == 0) { +			dbg_msg("all %d bytes contain 0xFF - skip", len); +			return 0; +		} + +		err = ubi_eba_write_leb(ubi, vol, lnum, buf, 0, len, UBI_UNKNOWN); +	} else { +		/* +		 * When writing static volume, and this is the last logical +		 * eraseblock, the length (@len) does not have to be aligned to +		 * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()' +		 * function accepts exact (unaligned) length and stores it in +		 * the VID header. And it takes care of proper alignment by +		 * padding the buffer. Here we just make sure the padding will +		 * contain zeros, not random trash. +		 */ +		memset(buf + len, 0, vol->usable_leb_size - len); +		err = ubi_eba_write_leb_st(ubi, vol, lnum, buf, len, +					   UBI_UNKNOWN, used_ebs); +	} + +	return err; +} + +/** + * ubi_more_update_data - write more update data. + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function writes more data to the volume which is being updated. It may + * be called arbitrary number of times until all the update data arriveis. This + * function returns %0 in case of success, number of bytes written during the + * last call if the whole volume update has been successfully finished, and a + * negative error code in case of failure. + */ +int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, +			 const void __user *buf, int count) +{ +	uint64_t tmp; +	int lnum, offs, err = 0, len, to_write = count; + +	dbg_msg("write %d of %lld bytes, %lld already passed", +		count, vol->upd_bytes, vol->upd_received); + +	if (ubi->ro_mode) +		return -EROFS; + +	tmp = vol->upd_received; +	offs = do_div(tmp, vol->usable_leb_size); +	lnum = tmp; + +	if (vol->upd_received + count > vol->upd_bytes) +		to_write = count = vol->upd_bytes - vol->upd_received; + +	/* +	 * When updating volumes, we accumulate whole logical eraseblock of +	 * data and write it at once. +	 */ +	if (offs != 0) { +		/* +		 * This is a write to the middle of the logical eraseblock. We +		 * copy the data to our update buffer and wait for more data or +		 * flush it if the whole eraseblock is written or the update +		 * is finished. +		 */ + +		len = vol->usable_leb_size - offs; +		if (len > count) +			len = count; + +		err = copy_from_user(vol->upd_buf + offs, buf, len); +		if (err) +			return -EFAULT; + +		if (offs + len == vol->usable_leb_size || +		    vol->upd_received + len == vol->upd_bytes) { +			int flush_len = offs + len; + +			/* +			 * OK, we gathered either the whole eraseblock or this +			 * is the last chunk, it's time to flush the buffer. +			 */ +			ubi_assert(flush_len <= vol->usable_leb_size); +			err = write_leb(ubi, vol, lnum, vol->upd_buf, flush_len, +					vol->upd_ebs); +			if (err) +				return err; +		} + +		vol->upd_received += len; +		count -= len; +		buf += len; +		lnum += 1; +	} + +	/* +	 * If we've got more to write, let's continue. At this point we know we +	 * are starting from the beginning of an eraseblock. +	 */ +	while (count) { +		if (count > vol->usable_leb_size) +			len = vol->usable_leb_size; +		else +			len = count; + +		err = copy_from_user(vol->upd_buf, buf, len); +		if (err) +			return -EFAULT; + +		if (len == vol->usable_leb_size || +		    vol->upd_received + len == vol->upd_bytes) { +			err = write_leb(ubi, vol, lnum, vol->upd_buf, +					len, vol->upd_ebs); +			if (err) +				break; +		} + +		vol->upd_received += len; +		count -= len; +		lnum += 1; +		buf += len; +	} + +	ubi_assert(vol->upd_received <= vol->upd_bytes); +	if (vol->upd_received == vol->upd_bytes) { +		/* The update is finished, clear the update marker */ +		err = clear_update_marker(ubi, vol, vol->upd_bytes); +		if (err) +			return err; +		err = ubi_wl_flush(ubi); +		if (err == 0) { +			vol->updating = 0; +			err = to_write; +			vfree(vol->upd_buf); +		} +	} + +	return err; +} + +/** + * ubi_more_leb_change_data - accept more data for atomic LEB change. + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function accepts more data to the volume which is being under the + * "atomic LEB change" operation. It may be called arbitrary number of times + * until all data arrives. This function returns %0 in case of success, number + * of bytes written during the last call if the whole "atomic LEB change" + * operation has been successfully finished, and a negative error code in case + * of failure. + */ +int ubi_more_leb_change_data(struct ubi_device *ubi, struct ubi_volume *vol, +			     const void __user *buf, int count) +{ +	int err; + +	dbg_msg("write %d of %lld bytes, %lld already passed", +		count, vol->upd_bytes, vol->upd_received); + +	if (ubi->ro_mode) +		return -EROFS; + +	if (vol->upd_received + count > vol->upd_bytes) +		count = vol->upd_bytes - vol->upd_received; + +	err = copy_from_user(vol->upd_buf + vol->upd_received, buf, count); +	if (err) +		return -EFAULT; + +	vol->upd_received += count; + +	if (vol->upd_received == vol->upd_bytes) { +		int len = ALIGN((int)vol->upd_bytes, ubi->min_io_size); + +		memset(vol->upd_buf + vol->upd_bytes, 0xFF, len - vol->upd_bytes); +		len = ubi_calc_data_len(ubi, vol->upd_buf, len); +		err = ubi_eba_atomic_leb_change(ubi, vol, vol->ch_lnum, +						vol->upd_buf, len, UBI_UNKNOWN); +		if (err) +			return err; +	} + +	ubi_assert(vol->upd_received <= vol->upd_bytes); +	if (vol->upd_received == vol->upd_bytes) { +		vol->changing_leb = 0; +		err = count; +		vfree(vol->upd_buf); +	} + +	return err; +} diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c new file mode 100644 index 000000000..a87a2f367 --- /dev/null +++ b/drivers/mtd/ubi/vmt.c @@ -0,0 +1,862 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation;  either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains implementation of volume creation, deletion, updating and + * resizing. + */ + +#ifdef UBI_LINUX +#include <linux/err.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_check_volumes(struct ubi_device *ubi); +#else +#define paranoid_check_volumes(ubi) +#endif + +#ifdef UBI_LINUX +static ssize_t vol_attribute_show(struct device *dev, +				  struct device_attribute *attr, char *buf); + +/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */ +static struct device_attribute attr_vol_reserved_ebs = +	__ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_type = +	__ATTR(type, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_name = +	__ATTR(name, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_corrupted = +	__ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_alignment = +	__ATTR(alignment, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_usable_eb_size = +	__ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_data_bytes = +	__ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute attr_vol_upd_marker = +	__ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL); + +/* + * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'. + * + * Consider a situation: + * A. process 1 opens a sysfs file related to volume Y, say + *    /<sysfs>/class/ubi/ubiX_Y/reserved_ebs; + * B. process 2 removes volume Y; + * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; + * + * In this situation, this function will return %-ENODEV because it will find + * out that the volume was removed from the @ubi->volumes array. + */ +static ssize_t vol_attribute_show(struct device *dev, +				  struct device_attribute *attr, char *buf) +{ +	int ret; +	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); +	struct ubi_device *ubi; + +	ubi = ubi_get_device(vol->ubi->ubi_num); +	if (!ubi) +		return -ENODEV; + +	spin_lock(&ubi->volumes_lock); +	if (!ubi->volumes[vol->vol_id]) { +		spin_unlock(&ubi->volumes_lock); +		ubi_put_device(ubi); +		return -ENODEV; +	} +	/* Take a reference to prevent volume removal */ +	vol->ref_count += 1; +	spin_unlock(&ubi->volumes_lock); + +	if (attr == &attr_vol_reserved_ebs) +		ret = sprintf(buf, "%d\n", vol->reserved_pebs); +	else if (attr == &attr_vol_type) { +		const char *tp; + +		if (vol->vol_type == UBI_DYNAMIC_VOLUME) +			tp = "dynamic"; +		else +			tp = "static"; +		ret = sprintf(buf, "%s\n", tp); +	} else if (attr == &attr_vol_name) +		ret = sprintf(buf, "%s\n", vol->name); +	else if (attr == &attr_vol_corrupted) +		ret = sprintf(buf, "%d\n", vol->corrupted); +	else if (attr == &attr_vol_alignment) +		ret = sprintf(buf, "%d\n", vol->alignment); +	else if (attr == &attr_vol_usable_eb_size) +		ret = sprintf(buf, "%d\n", vol->usable_leb_size); +	else if (attr == &attr_vol_data_bytes) +		ret = sprintf(buf, "%lld\n", vol->used_bytes); +	else if (attr == &attr_vol_upd_marker) +		ret = sprintf(buf, "%d\n", vol->upd_marker); +	else +		/* This must be a bug */ +		ret = -EINVAL; + +	/* We've done the operation, drop volume and UBI device references */ +	spin_lock(&ubi->volumes_lock); +	vol->ref_count -= 1; +	ubi_assert(vol->ref_count >= 0); +	spin_unlock(&ubi->volumes_lock); +	ubi_put_device(ubi); +	return ret; +} +#endif + +/* Release method for volume devices */ +static void vol_release(struct device *dev) +{ +	struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + +	kfree(vol); +} + +#ifdef UBI_LINUX +/** + * volume_sysfs_init - initialize sysfs for new volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, this function does not free allocated resources in case of failure - + * the caller does it. This is because this would cause release() here and the + * caller would oops. + */ +static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol) +{ +	int err; + +	err = device_create_file(&vol->dev, &attr_vol_reserved_ebs); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_type); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_name); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_corrupted); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_alignment); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_usable_eb_size); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_data_bytes); +	if (err) +		return err; +	err = device_create_file(&vol->dev, &attr_vol_upd_marker); +	return err; +} + +/** + * volume_sysfs_close - close sysfs for a volume. + * @vol: volume description object + */ +static void volume_sysfs_close(struct ubi_volume *vol) +{ +	device_remove_file(&vol->dev, &attr_vol_upd_marker); +	device_remove_file(&vol->dev, &attr_vol_data_bytes); +	device_remove_file(&vol->dev, &attr_vol_usable_eb_size); +	device_remove_file(&vol->dev, &attr_vol_alignment); +	device_remove_file(&vol->dev, &attr_vol_corrupted); +	device_remove_file(&vol->dev, &attr_vol_name); +	device_remove_file(&vol->dev, &attr_vol_type); +	device_remove_file(&vol->dev, &attr_vol_reserved_ebs); +	device_unregister(&vol->dev); +} +#endif + +/** + * ubi_create_volume - create volume. + * @ubi: UBI device description object + * @req: volume creation request + * + * This function creates volume described by @req. If @req->vol_id id + * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative + * error code in case of failure. Note, the caller has to have the + * @ubi->volumes_mutex locked. + */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) +{ +	int i, err, vol_id = req->vol_id, dont_free = 0; +	struct ubi_volume *vol; +	struct ubi_vtbl_record vtbl_rec; +	uint64_t bytes; +	dev_t dev; + +	if (ubi->ro_mode) +		return -EROFS; + +	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); +	if (!vol) +		return -ENOMEM; + +	spin_lock(&ubi->volumes_lock); +	if (vol_id == UBI_VOL_NUM_AUTO) { +		/* Find unused volume ID */ +		dbg_msg("search for vacant volume ID"); +		for (i = 0; i < ubi->vtbl_slots; i++) +			if (!ubi->volumes[i]) { +				vol_id = i; +				break; +			} + +		if (vol_id == UBI_VOL_NUM_AUTO) { +			dbg_err("out of volume IDs"); +			err = -ENFILE; +			goto out_unlock; +		} +		req->vol_id = vol_id; +	} + +	dbg_msg("volume ID %d, %llu bytes, type %d, name %s", +		vol_id, (unsigned long long)req->bytes, +		(int)req->vol_type, req->name); + +	/* Ensure that this volume does not exist */ +	err = -EEXIST; +	if (ubi->volumes[vol_id]) { +		dbg_err("volume %d already exists", vol_id); +		goto out_unlock; +	} + +	/* Ensure that the name is unique */ +	for (i = 0; i < ubi->vtbl_slots; i++) +		if (ubi->volumes[i] && +		    ubi->volumes[i]->name_len == req->name_len && +		    !strcmp(ubi->volumes[i]->name, req->name)) { +			dbg_err("volume \"%s\" exists (ID %d)", req->name, i); +			goto out_unlock; +		} + +        /* Calculate how many eraseblocks are requested */ +	vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; +	bytes = req->bytes; +	if (do_div(bytes, vol->usable_leb_size)) +		vol->reserved_pebs = 1; +	vol->reserved_pebs += bytes; + +	/* Reserve physical eraseblocks */ +	if (vol->reserved_pebs > ubi->avail_pebs) { +		dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); +		err = -ENOSPC; +		goto out_unlock; +	} +	ubi->avail_pebs -= vol->reserved_pebs; +	ubi->rsvd_pebs += vol->reserved_pebs; +	spin_unlock(&ubi->volumes_lock); + +	vol->vol_id    = vol_id; +	vol->alignment = req->alignment; +	vol->data_pad  = ubi->leb_size % vol->alignment; +	vol->vol_type  = req->vol_type; +	vol->name_len  = req->name_len; +	memcpy(vol->name, req->name, vol->name_len + 1); +	vol->ubi = ubi; + +	/* +	 * Finish all pending erases because there may be some LEBs belonging +	 * to the same volume ID. +	 */ +	err = ubi_wl_flush(ubi); +	if (err) +		goto out_acc; + +	vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL); +	if (!vol->eba_tbl) { +		err = -ENOMEM; +		goto out_acc; +	} + +	for (i = 0; i < vol->reserved_pebs; i++) +		vol->eba_tbl[i] = UBI_LEB_UNMAPPED; + +	if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +		vol->used_ebs = vol->reserved_pebs; +		vol->last_eb_bytes = vol->usable_leb_size; +		vol->used_bytes = +			(long long)vol->used_ebs * vol->usable_leb_size; +	} else { +		bytes = vol->used_bytes; +		vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); +		vol->used_ebs = bytes; +		if (vol->last_eb_bytes) +			vol->used_ebs += 1; +		else +			vol->last_eb_bytes = vol->usable_leb_size; +	} + +	/* Register character device for the volume */ +	cdev_init(&vol->cdev, &ubi_vol_cdev_operations); +	vol->cdev.owner = THIS_MODULE; +	dev = MKDEV(MAJOR(ubi->cdev.dev), vol_id + 1); +	err = cdev_add(&vol->cdev, dev, 1); +	if (err) { +		ubi_err("cannot add character device"); +		goto out_mapping; +	} + +	err = ubi_create_gluebi(ubi, vol); +	if (err) +		goto out_cdev; + +	vol->dev.release = vol_release; +	vol->dev.parent = &ubi->dev; +	vol->dev.devt = dev; +	vol->dev.class = ubi_class; + +	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); +	err = device_register(&vol->dev); +	if (err) { +		ubi_err("cannot register device"); +		goto out_gluebi; +	} + +	err = volume_sysfs_init(ubi, vol); +	if (err) +		goto out_sysfs; + +	/* Fill volume table record */ +	memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record)); +	vtbl_rec.reserved_pebs = cpu_to_be32(vol->reserved_pebs); +	vtbl_rec.alignment     = cpu_to_be32(vol->alignment); +	vtbl_rec.data_pad      = cpu_to_be32(vol->data_pad); +	vtbl_rec.name_len      = cpu_to_be16(vol->name_len); +	if (vol->vol_type == UBI_DYNAMIC_VOLUME) +		vtbl_rec.vol_type = UBI_VID_DYNAMIC; +	else +		vtbl_rec.vol_type = UBI_VID_STATIC; +	memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); + +	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); +	if (err) +		goto out_sysfs; + +	spin_lock(&ubi->volumes_lock); +	ubi->volumes[vol_id] = vol; +	ubi->vol_count += 1; +	spin_unlock(&ubi->volumes_lock); + +	paranoid_check_volumes(ubi); +	return 0; + +out_sysfs: +	/* +	 * We have registered our device, we should not free the volume* +	 * description object in this function in case of an error - it is +	 * freed by the release function. +	 * +	 * Get device reference to prevent the release function from being +	 * called just after sysfs has been closed. +	 */ +	dont_free = 1; +	get_device(&vol->dev); +	volume_sysfs_close(vol); +out_gluebi: +	if (ubi_destroy_gluebi(vol)) +		dbg_err("cannot destroy gluebi for volume %d:%d", +			ubi->ubi_num, vol_id); +out_cdev: +	cdev_del(&vol->cdev); +out_mapping: +	kfree(vol->eba_tbl); +out_acc: +	spin_lock(&ubi->volumes_lock); +	ubi->rsvd_pebs -= vol->reserved_pebs; +	ubi->avail_pebs += vol->reserved_pebs; +out_unlock: +	spin_unlock(&ubi->volumes_lock); +	if (dont_free) +		put_device(&vol->dev); +	else +		kfree(vol); +	ubi_err("cannot create volume %d, error %d", vol_id, err); +	return err; +} + +/** + * ubi_remove_volume - remove volume. + * @desc: volume descriptor + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error + * code in case of failure. The caller has to have the @ubi->volumes_mutex + * locked. + */ +int ubi_remove_volume(struct ubi_volume_desc *desc) +{ +	struct ubi_volume *vol = desc->vol; +	struct ubi_device *ubi = vol->ubi; +	int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + +	dbg_msg("remove UBI volume %d", vol_id); +	ubi_assert(desc->mode == UBI_EXCLUSIVE); +	ubi_assert(vol == ubi->volumes[vol_id]); + +	if (ubi->ro_mode) +		return -EROFS; + +	spin_lock(&ubi->volumes_lock); +	if (vol->ref_count > 1) { +		/* +		 * The volume is busy, probably someone is reading one of its +		 * sysfs files. +		 */ +		err = -EBUSY; +		goto out_unlock; +	} +	ubi->volumes[vol_id] = NULL; +	spin_unlock(&ubi->volumes_lock); + +	err = ubi_destroy_gluebi(vol); +	if (err) +		goto out_err; + +	err = ubi_change_vtbl_record(ubi, vol_id, NULL); +	if (err) +		goto out_err; + +	for (i = 0; i < vol->reserved_pebs; i++) { +		err = ubi_eba_unmap_leb(ubi, vol, i); +		if (err) +			goto out_err; +	} + +	kfree(vol->eba_tbl); +	vol->eba_tbl = NULL; +	cdev_del(&vol->cdev); +	volume_sysfs_close(vol); + +	spin_lock(&ubi->volumes_lock); +	ubi->rsvd_pebs -= reserved_pebs; +	ubi->avail_pebs += reserved_pebs; +	i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; +	if (i > 0) { +		i = ubi->avail_pebs >= i ? i : ubi->avail_pebs; +		ubi->avail_pebs -= i; +		ubi->rsvd_pebs += i; +		ubi->beb_rsvd_pebs += i; +		if (i > 0) +			ubi_msg("reserve more %d PEBs", i); +	} +	ubi->vol_count -= 1; +	spin_unlock(&ubi->volumes_lock); + +	paranoid_check_volumes(ubi); +	return 0; + +out_err: +	ubi_err("cannot remove volume %d, error %d", vol_id, err); +	spin_lock(&ubi->volumes_lock); +	ubi->volumes[vol_id] = vol; +out_unlock: +	spin_unlock(&ubi->volumes_lock); +	return err; +} + +/** + * ubi_resize_volume - re-size volume. + * @desc: volume descriptor + * @reserved_pebs: new size in physical eraseblocks + * + * This function re-sizes the volume and returns zero in case of success, and a + * negative error code in case of failure. The caller has to have the + * @ubi->volumes_mutex locked. + */ +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) +{ +	int i, err, pebs, *new_mapping; +	struct ubi_volume *vol = desc->vol; +	struct ubi_device *ubi = vol->ubi; +	struct ubi_vtbl_record vtbl_rec; +	int vol_id = vol->vol_id; + +	if (ubi->ro_mode) +		return -EROFS; + +	dbg_msg("re-size volume %d to from %d to %d PEBs", +		vol_id, vol->reserved_pebs, reserved_pebs); + +	if (vol->vol_type == UBI_STATIC_VOLUME && +	    reserved_pebs < vol->used_ebs) { +		dbg_err("too small size %d, %d LEBs contain data", +			reserved_pebs, vol->used_ebs); +		return -EINVAL; +	} + +	/* If the size is the same, we have nothing to do */ +	if (reserved_pebs == vol->reserved_pebs) +		return 0; + +	new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL); +	if (!new_mapping) +		return -ENOMEM; + +	for (i = 0; i < reserved_pebs; i++) +		new_mapping[i] = UBI_LEB_UNMAPPED; + +	spin_lock(&ubi->volumes_lock); +	if (vol->ref_count > 1) { +		spin_unlock(&ubi->volumes_lock); +		err = -EBUSY; +		goto out_free; +	} +	spin_unlock(&ubi->volumes_lock); + +	/* Reserve physical eraseblocks */ +	pebs = reserved_pebs - vol->reserved_pebs; +	if (pebs > 0) { +		spin_lock(&ubi->volumes_lock); +		if (pebs > ubi->avail_pebs) { +			dbg_err("not enough PEBs: requested %d, available %d", +				pebs, ubi->avail_pebs); +			spin_unlock(&ubi->volumes_lock); +			err = -ENOSPC; +			goto out_free; +		} +		ubi->avail_pebs -= pebs; +		ubi->rsvd_pebs += pebs; +		for (i = 0; i < vol->reserved_pebs; i++) +			new_mapping[i] = vol->eba_tbl[i]; +		kfree(vol->eba_tbl); +		vol->eba_tbl = new_mapping; +		spin_unlock(&ubi->volumes_lock); +	} + +	/* Change volume table record */ +	memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); +	vtbl_rec.reserved_pebs = cpu_to_be32(reserved_pebs); +	err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); +	if (err) +		goto out_acc; + +	if (pebs < 0) { +		for (i = 0; i < -pebs; i++) { +			err = ubi_eba_unmap_leb(ubi, vol, reserved_pebs + i); +			if (err) +				goto out_acc; +		} +		spin_lock(&ubi->volumes_lock); +		ubi->rsvd_pebs += pebs; +		ubi->avail_pebs -= pebs; +		pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; +		if (pebs > 0) { +			pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs; +			ubi->avail_pebs -= pebs; +			ubi->rsvd_pebs += pebs; +			ubi->beb_rsvd_pebs += pebs; +			if (pebs > 0) +				ubi_msg("reserve more %d PEBs", pebs); +		} +		for (i = 0; i < reserved_pebs; i++) +			new_mapping[i] = vol->eba_tbl[i]; +		kfree(vol->eba_tbl); +		vol->eba_tbl = new_mapping; +		spin_unlock(&ubi->volumes_lock); +	} + +	vol->reserved_pebs = reserved_pebs; +	if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +		vol->used_ebs = reserved_pebs; +		vol->last_eb_bytes = vol->usable_leb_size; +		vol->used_bytes = +			(long long)vol->used_ebs * vol->usable_leb_size; +	} + +	paranoid_check_volumes(ubi); +	return 0; + +out_acc: +	if (pebs > 0) { +		spin_lock(&ubi->volumes_lock); +		ubi->rsvd_pebs -= pebs; +		ubi->avail_pebs += pebs; +		spin_unlock(&ubi->volumes_lock); +	} +out_free: +	kfree(new_mapping); +	return err; +} + +/** + * ubi_add_volume - add volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function adds an existing volume and initializes all its data + * structures. Returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_add_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ +	int err, vol_id = vol->vol_id; +	dev_t dev; + +	dbg_msg("add volume %d", vol_id); +	ubi_dbg_dump_vol_info(vol); + +	/* Register character device for the volume */ +	cdev_init(&vol->cdev, &ubi_vol_cdev_operations); +	vol->cdev.owner = THIS_MODULE; +	dev = MKDEV(MAJOR(ubi->cdev.dev), vol->vol_id + 1); +	err = cdev_add(&vol->cdev, dev, 1); +	if (err) { +		ubi_err("cannot add character device for volume %d, error %d", +			vol_id, err); +		return err; +	} + +	err = ubi_create_gluebi(ubi, vol); +	if (err) +		goto out_cdev; + +	vol->dev.release = vol_release; +	vol->dev.parent = &ubi->dev; +	vol->dev.devt = dev; +	vol->dev.class = ubi_class; +	sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); +	err = device_register(&vol->dev); +	if (err) +		goto out_gluebi; + +	err = volume_sysfs_init(ubi, vol); +	if (err) { +		cdev_del(&vol->cdev); +		err = ubi_destroy_gluebi(vol); +		volume_sysfs_close(vol); +		return err; +	} + +	paranoid_check_volumes(ubi); +	return 0; + +out_gluebi: +	err = ubi_destroy_gluebi(vol); +out_cdev: +	cdev_del(&vol->cdev); +	return err; +} + +/** + * ubi_free_volume - free volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function frees all resources for volume @vol but does not remove it. + * Used only when the UBI device is detached. + */ +void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) +{ +	int err; + +	dbg_msg("free volume %d", vol->vol_id); + +	ubi->volumes[vol->vol_id] = NULL; +	err = ubi_destroy_gluebi(vol); +	cdev_del(&vol->cdev); +	volume_sysfs_close(vol); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_volume - check volume information. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static void paranoid_check_volume(struct ubi_device *ubi, int vol_id) +{ +	int idx = vol_id2idx(ubi, vol_id); +	int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; +	const struct ubi_volume *vol; +	long long n; +	const char *name; + +	spin_lock(&ubi->volumes_lock); +	reserved_pebs = be32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); +	vol = ubi->volumes[idx]; + +	if (!vol) { +		if (reserved_pebs) { +			ubi_err("no volume info, but volume exists"); +			goto fail; +		} +		spin_unlock(&ubi->volumes_lock); +		return; +	} + +	if (vol->exclusive) { +		/* +		 * The volume may be being created at the moment, do not check +		 * it (e.g., it may be in the middle of ubi_create_volume(). +		 */ +		spin_unlock(&ubi->volumes_lock); +		return; +	} + +	if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || +	    vol->name_len < 0) { +		ubi_err("negative values"); +		goto fail; +	} +	if (vol->alignment > ubi->leb_size || vol->alignment == 0) { +		ubi_err("bad alignment"); +		goto fail; +	} + +	n = vol->alignment & (ubi->min_io_size - 1); +	if (vol->alignment != 1 && n) { +		ubi_err("alignment is not multiple of min I/O unit"); +		goto fail; +	} + +	n = ubi->leb_size % vol->alignment; +	if (vol->data_pad != n) { +		ubi_err("bad data_pad, has to be %lld", n); +		goto fail; +	} + +	if (vol->vol_type != UBI_DYNAMIC_VOLUME && +	    vol->vol_type != UBI_STATIC_VOLUME) { +		ubi_err("bad vol_type"); +		goto fail; +	} + +	if (vol->upd_marker && vol->corrupted) { +		dbg_err("update marker and corrupted simultaneously"); +		goto fail; +	} + +	if (vol->reserved_pebs > ubi->good_peb_count) { +		ubi_err("too large reserved_pebs"); +		goto fail; +	} + +	n = ubi->leb_size - vol->data_pad; +	if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) { +		ubi_err("bad usable_leb_size, has to be %lld", n); +		goto fail; +	} + +	if (vol->name_len > UBI_VOL_NAME_MAX) { +		ubi_err("too long volume name, max is %d", UBI_VOL_NAME_MAX); +		goto fail; +	} + +	if (!vol->name) { +		ubi_err("NULL volume name"); +		goto fail; +	} + +	n = strnlen(vol->name, vol->name_len + 1); +	if (n != vol->name_len) { +		ubi_err("bad name_len %lld", n); +		goto fail; +	} + +	n = (long long)vol->used_ebs * vol->usable_leb_size; +	if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +		if (vol->corrupted) { +			ubi_err("corrupted dynamic volume"); +			goto fail; +		} +		if (vol->used_ebs != vol->reserved_pebs) { +			ubi_err("bad used_ebs"); +			goto fail; +		} +		if (vol->last_eb_bytes != vol->usable_leb_size) { +			ubi_err("bad last_eb_bytes"); +			goto fail; +		} +		if (vol->used_bytes != n) { +			ubi_err("bad used_bytes"); +			goto fail; +		} +	} else { +		if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { +			ubi_err("bad used_ebs"); +			goto fail; +		} +		if (vol->last_eb_bytes < 0 || +		    vol->last_eb_bytes > vol->usable_leb_size) { +			ubi_err("bad last_eb_bytes"); +			goto fail; +		} +		if (vol->used_bytes < 0 || vol->used_bytes > n || +		    vol->used_bytes < n - vol->usable_leb_size) { +			ubi_err("bad used_bytes"); +			goto fail; +		} +	} + +	alignment  = be32_to_cpu(ubi->vtbl[vol_id].alignment); +	data_pad   = be32_to_cpu(ubi->vtbl[vol_id].data_pad); +	name_len   = be16_to_cpu(ubi->vtbl[vol_id].name_len); +	upd_marker = ubi->vtbl[vol_id].upd_marker; +	name       = &ubi->vtbl[vol_id].name[0]; +	if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC) +		vol_type = UBI_DYNAMIC_VOLUME; +	else +		vol_type = UBI_STATIC_VOLUME; + +	if (alignment != vol->alignment || data_pad != vol->data_pad || +	    upd_marker != vol->upd_marker || vol_type != vol->vol_type || +	    name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { +		ubi_err("volume info is different"); +		goto fail; +	} + +	spin_unlock(&ubi->volumes_lock); +	return; + +fail: +	ubi_err("paranoid check failed for volume %d", vol_id); +	ubi_dbg_dump_vol_info(vol); +	ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); +	spin_unlock(&ubi->volumes_lock); +	BUG(); +} + +/** + * paranoid_check_volumes - check information about all volumes. + * @ubi: UBI device description object + */ +static void paranoid_check_volumes(struct ubi_device *ubi) +{ +	int i; + +	for (i = 0; i < ubi->vtbl_slots; i++) +		paranoid_check_volume(ubi, i); +} +#endif diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c new file mode 100644 index 000000000..9264ac68e --- /dev/null +++ b/drivers/mtd/ubi/vtbl.c @@ -0,0 +1,837 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file includes volume table manipulation code. The volume table is an + * on-flash table containing volume meta-data like name, number of reserved + * physical eraseblocks, type, etc. The volume table is stored in the so-called + * "layout volume". + * + * The layout volume is an internal volume which is organized as follows. It + * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical + * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each + * other. This redundancy guarantees robustness to unclean reboots. The volume + * table is basically an array of volume table records. Each record contains + * full information about the volume and protected by a CRC checksum. + * + * The volume table is changed, it is first changed in RAM. Then LEB 0 is + * erased, and the updated volume table is written back to LEB 0. Then same for + * LEB 1. This scheme guarantees recoverability from unclean reboots. + * + * In this UBI implementation the on-flash volume table does not contain any + * information about how many data static volumes contain. This information may + * be found from the scanning data. + * + * But it would still be beneficial to store this information in the volume + * table. For example, suppose we have a static volume X, and all its physical + * eraseblocks became bad for some reasons. Suppose we are attaching the + * corresponding MTD device, the scanning has found no logical eraseblocks + * corresponding to the volume X. According to the volume table volume X does + * exist. So we don't know whether it is just empty or all its physical + * eraseblocks went bad. So we cannot alarm the user about this corruption. + * + * The volume table also stores so-called "update marker", which is used for + * volume updates. Before updating the volume, the update marker is set, and + * after the update operation is finished, the update marker is cleared. So if + * the update operation was interrupted (e.g. by an unclean reboot) - the + * update marker is still there and we know that the volume's contents is + * damaged. + */ + +#ifdef UBI_LINUX +#include <linux/crc32.h> +#include <linux/err.h> +#include <asm/div64.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_vtbl_check(const struct ubi_device *ubi); +#else +#define paranoid_vtbl_check(ubi) +#endif + +/* Empty volume table record */ +static struct ubi_vtbl_record empty_vtbl_record; + +/** + * ubi_change_vtbl_record - change volume table record. + * @ubi: UBI device description object + * @idx: table index to change + * @vtbl_rec: new volume table record + * + * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty + * volume table record is written. The caller does not have to calculate CRC of + * the record as it is done by this function. Returns zero in case of success + * and a negative error code in case of failure. + */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, +			   struct ubi_vtbl_record *vtbl_rec) +{ +	int i, err; +	uint32_t crc; +	struct ubi_volume *layout_vol; + +	ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); +	layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)]; + +	if (!vtbl_rec) +		vtbl_rec = &empty_vtbl_record; +	else { +		crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); +		vtbl_rec->crc = cpu_to_be32(crc); +	} + +	memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); +	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { +		err = ubi_eba_unmap_leb(ubi, layout_vol, i); +		if (err) +			return err; + +		err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0, +					ubi->vtbl_size, UBI_LONGTERM); +		if (err) +			return err; +	} + +	paranoid_vtbl_check(ubi); +	return 0; +} + +/** + * vtbl_check - check if volume table is not corrupted and contains sensible + *              data. + * @ubi: UBI device description object + * @vtbl: volume table + * + * This function returns zero if @vtbl is all right, %1 if CRC is incorrect, + * and %-EINVAL if it contains inconsistent data. + */ +static int vtbl_check(const struct ubi_device *ubi, +		      const struct ubi_vtbl_record *vtbl) +{ +	int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; +	int upd_marker, err; +	uint32_t crc; +	const char *name; + +	for (i = 0; i < ubi->vtbl_slots; i++) { +		cond_resched(); + +		reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); +		alignment = be32_to_cpu(vtbl[i].alignment); +		data_pad = be32_to_cpu(vtbl[i].data_pad); +		upd_marker = vtbl[i].upd_marker; +		vol_type = vtbl[i].vol_type; +		name_len = be16_to_cpu(vtbl[i].name_len); +		name = (const char *) &vtbl[i].name[0]; + +		crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); +		if (be32_to_cpu(vtbl[i].crc) != crc) { +			ubi_err("bad CRC at record %u: %#08x, not %#08x", +				 i, crc, be32_to_cpu(vtbl[i].crc)); +			ubi_dbg_dump_vtbl_record(&vtbl[i], i); +			return 1; +		} + +		if (reserved_pebs == 0) { +			if (memcmp(&vtbl[i], &empty_vtbl_record, +						UBI_VTBL_RECORD_SIZE)) { +				err = 2; +				goto bad; +			} +			continue; +		} + +		if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || +		    name_len < 0) { +			err = 3; +			goto bad; +		} + +		if (alignment > ubi->leb_size || alignment == 0) { +			err = 4; +			goto bad; +		} + +		n = alignment & (ubi->min_io_size - 1); +		if (alignment != 1 && n) { +			err = 5; +			goto bad; +		} + +		n = ubi->leb_size % alignment; +		if (data_pad != n) { +			dbg_err("bad data_pad, has to be %d", n); +			err = 6; +			goto bad; +		} + +		if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { +			err = 7; +			goto bad; +		} + +		if (upd_marker != 0 && upd_marker != 1) { +			err = 8; +			goto bad; +		} + +		if (reserved_pebs > ubi->good_peb_count) { +			dbg_err("too large reserved_pebs, good PEBs %d", +				ubi->good_peb_count); +			err = 9; +			goto bad; +		} + +		if (name_len > UBI_VOL_NAME_MAX) { +			err = 10; +			goto bad; +		} + +		if (name[0] == '\0') { +			err = 11; +			goto bad; +		} + +		if (name_len != strnlen(name, name_len + 1)) { +			err = 12; +			goto bad; +		} +	} + +	/* Checks that all names are unique */ +	for (i = 0; i < ubi->vtbl_slots - 1; i++) { +		for (n = i + 1; n < ubi->vtbl_slots; n++) { +			int len1 = be16_to_cpu(vtbl[i].name_len); +			int len2 = be16_to_cpu(vtbl[n].name_len); + +			if (len1 > 0 && len1 == len2 && +			    !strncmp((char *)vtbl[i].name, (char *)vtbl[n].name, len1)) { +				ubi_err("volumes %d and %d have the same name" +					" \"%s\"", i, n, vtbl[i].name); +				ubi_dbg_dump_vtbl_record(&vtbl[i], i); +				ubi_dbg_dump_vtbl_record(&vtbl[n], n); +				return -EINVAL; +			} +		} +	} + +	return 0; + +bad: +	ubi_err("volume table check failed: record %d, error %d", i, err); +	ubi_dbg_dump_vtbl_record(&vtbl[i], i); +	return -EINVAL; +} + +/** + * create_vtbl - create a copy of volume table. + * @ubi: UBI device description object + * @si: scanning information + * @copy: number of the volume table copy + * @vtbl: contents of the volume table + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, +		       int copy, void *vtbl) +{ +	int err, tries = 0; +	static struct ubi_vid_hdr *vid_hdr; +	struct ubi_scan_volume *sv; +	struct ubi_scan_leb *new_seb, *old_seb = NULL; + +	ubi_msg("create volume table (copy #%d)", copy + 1); + +	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); +	if (!vid_hdr) +		return -ENOMEM; + +	/* +	 * Check if there is a logical eraseblock which would have to contain +	 * this volume table copy was found during scanning. It has to be wiped +	 * out. +	 */ +	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); +	if (sv) +		old_seb = ubi_scan_find_seb(sv, copy); + +retry: +	new_seb = ubi_scan_get_free_peb(ubi, si); +	if (IS_ERR(new_seb)) { +		err = PTR_ERR(new_seb); +		goto out_free; +	} + +	vid_hdr->vol_type = UBI_VID_DYNAMIC; +	vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); +	vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; +	vid_hdr->data_size = vid_hdr->used_ebs = +			     vid_hdr->data_pad = cpu_to_be32(0); +	vid_hdr->lnum = cpu_to_be32(copy); +	vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum); +	vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0); + +	/* The EC header is already there, write the VID header */ +	err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); +	if (err) +		goto write_error; + +	/* Write the layout volume contents */ +	err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size); +	if (err) +		goto write_error; + +	/* +	 * And add it to the scanning information. Don't delete the old +	 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. +	 */ +	err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, +				vid_hdr, 0); +	kfree(new_seb); +	ubi_free_vid_hdr(ubi, vid_hdr); +	return err; + +write_error: +	if (err == -EIO && ++tries <= 5) { +		/* +		 * Probably this physical eraseblock went bad, try to pick +		 * another one. +		 */ +		list_add_tail(&new_seb->u.list, &si->corr); +		goto retry; +	} +	kfree(new_seb); +out_free: +	ubi_free_vid_hdr(ubi, vid_hdr); +	return err; + +} + +/** + * process_lvol - process the layout volume. + * @ubi: UBI device description object + * @si: scanning information + * @sv: layout volume scanning information + * + * This function is responsible for reading the layout volume, ensuring it is + * not corrupted, and recovering from corruptions if needed. Returns volume + * table in case of success and a negative error code in case of failure. + */ +static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, +					    struct ubi_scan_info *si, +					    struct ubi_scan_volume *sv) +{ +	int err; +	struct rb_node *rb; +	struct ubi_scan_leb *seb; +	struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; +	int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; + +	/* +	 * UBI goes through the following steps when it changes the layout +	 * volume: +	 * a. erase LEB 0; +	 * b. write new data to LEB 0; +	 * c. erase LEB 1; +	 * d. write new data to LEB 1. +	 * +	 * Before the change, both LEBs contain the same data. +	 * +	 * Due to unclean reboots, the contents of LEB 0 may be lost, but there +	 * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not. +	 * Similarly, LEB 1 may be lost, but there should be LEB 0. And +	 * finally, unclean reboots may result in a situation when neither LEB +	 * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB +	 * 0 contains more recent information. +	 * +	 * So the plan is to first check LEB 0. Then +	 * a. if LEB 0 is OK, it must be containing the most resent data; then +	 *    we compare it with LEB 1, and if they are different, we copy LEB +	 *    0 to LEB 1; +	 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 +	 *    to LEB 0. +	 */ + +	dbg_msg("check layout volume"); + +	/* Read both LEB 0 and LEB 1 into memory */ +	ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { +		leb[seb->lnum] = vmalloc(ubi->vtbl_size); +		if (!leb[seb->lnum]) { +			err = -ENOMEM; +			goto out_free; +		} +		memset(leb[seb->lnum], 0, ubi->vtbl_size); + +		err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, +				       ubi->vtbl_size); +		if (err == UBI_IO_BITFLIPS || err == -EBADMSG) +			/* +			 * Scrub the PEB later. Note, -EBADMSG indicates an +			 * uncorrectable ECC error, but we have our own CRC and +			 * the data will be checked later. If the data is OK, +			 * the PEB will be scrubbed (because we set +			 * seb->scrub). If the data is not OK, the contents of +			 * the PEB will be recovered from the second copy, and +			 * seb->scrub will be cleared in +			 * 'ubi_scan_add_used()'. +			 */ +			seb->scrub = 1; +		else if (err) +			goto out_free; +	} + +	err = -EINVAL; +	if (leb[0]) { +		leb_corrupted[0] = vtbl_check(ubi, leb[0]); +		if (leb_corrupted[0] < 0) +			goto out_free; +	} + +	if (!leb_corrupted[0]) { +		/* LEB 0 is OK */ +		if (leb[1]) +			leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); +		if (leb_corrupted[1]) { +			ubi_warn("volume table copy #2 is corrupted"); +			err = create_vtbl(ubi, si, 1, leb[0]); +			if (err) +				goto out_free; +			ubi_msg("volume table was restored"); +		} + +		/* Both LEB 1 and LEB 2 are OK and consistent */ +		vfree(leb[1]); +		return leb[0]; +	} else { +		/* LEB 0 is corrupted or does not exist */ +		if (leb[1]) { +			leb_corrupted[1] = vtbl_check(ubi, leb[1]); +			if (leb_corrupted[1] < 0) +				goto out_free; +		} +		if (leb_corrupted[1]) { +			/* Both LEB 0 and LEB 1 are corrupted */ +			ubi_err("both volume tables are corrupted"); +			goto out_free; +		} + +		ubi_warn("volume table copy #1 is corrupted"); +		err = create_vtbl(ubi, si, 0, leb[1]); +		if (err) +			goto out_free; +		ubi_msg("volume table was restored"); + +		vfree(leb[0]); +		return leb[1]; +	} + +out_free: +	vfree(leb[0]); +	vfree(leb[1]); +	return ERR_PTR(err); +} + +/** + * create_empty_lvol - create empty layout volume. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns volume table contents in case of success and a + * negative error code in case of failure. + */ +static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi, +						 struct ubi_scan_info *si) +{ +	int i; +	struct ubi_vtbl_record *vtbl; + +	vtbl = vmalloc(ubi->vtbl_size); +	if (!vtbl) +		return ERR_PTR(-ENOMEM); +	memset(vtbl, 0, ubi->vtbl_size); + +	for (i = 0; i < ubi->vtbl_slots; i++) +		memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); + +	for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { +		int err; + +		err = create_vtbl(ubi, si, i, vtbl); +		if (err) { +			vfree(vtbl); +			return ERR_PTR(err); +		} +	} + +	return vtbl; +} + +/** + * init_volumes - initialize volume information for existing volumes. + * @ubi: UBI device description object + * @si: scanning information + * @vtbl: volume table + * + * This function allocates volume description objects for existing volumes. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, +			const struct ubi_vtbl_record *vtbl) +{ +	int i, reserved_pebs = 0; +	struct ubi_scan_volume *sv; +	struct ubi_volume *vol; + +	for (i = 0; i < ubi->vtbl_slots; i++) { +		cond_resched(); + +		if (be32_to_cpu(vtbl[i].reserved_pebs) == 0) +			continue; /* Empty record */ + +		vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); +		if (!vol) +			return -ENOMEM; + +		vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); +		vol->alignment = be32_to_cpu(vtbl[i].alignment); +		vol->data_pad = be32_to_cpu(vtbl[i].data_pad); +		vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? +					UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; +		vol->name_len = be16_to_cpu(vtbl[i].name_len); +		vol->usable_leb_size = ubi->leb_size - vol->data_pad; +		memcpy(vol->name, vtbl[i].name, vol->name_len); +		vol->name[vol->name_len] = '\0'; +		vol->vol_id = i; + +		if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { +			/* Auto re-size flag may be set only for one volume */ +			if (ubi->autoresize_vol_id != -1) { +				ubi_err("more then one auto-resize volume (%d " +					"and %d)", ubi->autoresize_vol_id, i); +				kfree(vol); +				return -EINVAL; +			} + +			ubi->autoresize_vol_id = i; +		} + +		ubi_assert(!ubi->volumes[i]); +		ubi->volumes[i] = vol; +		ubi->vol_count += 1; +		vol->ubi = ubi; +		reserved_pebs += vol->reserved_pebs; + +		/* +		 * In case of dynamic volume UBI knows nothing about how many +		 * data is stored there. So assume the whole volume is used. +		 */ +		if (vol->vol_type == UBI_DYNAMIC_VOLUME) { +			vol->used_ebs = vol->reserved_pebs; +			vol->last_eb_bytes = vol->usable_leb_size; +			vol->used_bytes = +				(long long)vol->used_ebs * vol->usable_leb_size; +			continue; +		} + +		/* Static volumes only */ +		sv = ubi_scan_find_sv(si, i); +		if (!sv) { +			/* +			 * No eraseblocks belonging to this volume found. We +			 * don't actually know whether this static volume is +			 * completely corrupted or just contains no data. And +			 * we cannot know this as long as data size is not +			 * stored on flash. So we just assume the volume is +			 * empty. FIXME: this should be handled. +			 */ +			continue; +		} + +		if (sv->leb_count != sv->used_ebs) { +			/* +			 * We found a static volume which misses several +			 * eraseblocks. Treat it as corrupted. +			 */ +			ubi_warn("static volume %d misses %d LEBs - corrupted", +				 sv->vol_id, sv->used_ebs - sv->leb_count); +			vol->corrupted = 1; +			continue; +		} + +		vol->used_ebs = sv->used_ebs; +		vol->used_bytes = +			(long long)(vol->used_ebs - 1) * vol->usable_leb_size; +		vol->used_bytes += sv->last_data_size; +		vol->last_eb_bytes = sv->last_data_size; +	} + +	/* And add the layout volume */ +	vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); +	if (!vol) +		return -ENOMEM; + +	vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; +	vol->alignment = 1; +	vol->vol_type = UBI_DYNAMIC_VOLUME; +	vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; +	memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); +	vol->usable_leb_size = ubi->leb_size; +	vol->used_ebs = vol->reserved_pebs; +	vol->last_eb_bytes = vol->reserved_pebs; +	vol->used_bytes = +		(long long)vol->used_ebs * (ubi->leb_size - vol->data_pad); +	vol->vol_id = UBI_LAYOUT_VOLUME_ID; +	vol->ref_count = 1; + +	ubi_assert(!ubi->volumes[i]); +	ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; +	reserved_pebs += vol->reserved_pebs; +	ubi->vol_count += 1; +	vol->ubi = ubi; + +	if (reserved_pebs > ubi->avail_pebs) +		ubi_err("not enough PEBs, required %d, available %d", +			reserved_pebs, ubi->avail_pebs); +	ubi->rsvd_pebs += reserved_pebs; +	ubi->avail_pebs -= reserved_pebs; + +	return 0; +} + +/** + * check_sv - check volume scanning information. + * @vol: UBI volume description object + * @sv: volume scanning information + * + * This function returns zero if the volume scanning information is consistent + * to the data read from the volume tabla, and %-EINVAL if not. + */ +static int check_sv(const struct ubi_volume *vol, +		    const struct ubi_scan_volume *sv) +{ +	int err; + +	if (sv->highest_lnum >= vol->reserved_pebs) { +		err = 1; +		goto bad; +	} +	if (sv->leb_count > vol->reserved_pebs) { +		err = 2; +		goto bad; +	} +	if (sv->vol_type != vol->vol_type) { +		err = 3; +		goto bad; +	} +	if (sv->used_ebs > vol->reserved_pebs) { +		err = 4; +		goto bad; +	} +	if (sv->data_pad != vol->data_pad) { +		err = 5; +		goto bad; +	} +	return 0; + +bad: +	ubi_err("bad scanning information, error %d", err); +	ubi_dbg_dump_sv(sv); +	ubi_dbg_dump_vol_info(vol); +	return -EINVAL; +} + +/** + * check_scanning_info - check that scanning information. + * @ubi: UBI device description object + * @si: scanning information + * + * Even though we protect on-flash data by CRC checksums, we still don't trust + * the media. This function ensures that scanning information is consistent to + * the information read from the volume table. Returns zero if the scanning + * information is OK and %-EINVAL if it is not. + */ +static int check_scanning_info(const struct ubi_device *ubi, +			       struct ubi_scan_info *si) +{ +	int err, i; +	struct ubi_scan_volume *sv; +	struct ubi_volume *vol; + +	if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { +		ubi_err("scanning found %d volumes, maximum is %d + %d", +			si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); +		return -EINVAL; +	} + +	if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT && +	    si->highest_vol_id < UBI_INTERNAL_VOL_START) { +		ubi_err("too large volume ID %d found by scanning", +			si->highest_vol_id); +		return -EINVAL; +	} + +	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { +		cond_resched(); + +		sv = ubi_scan_find_sv(si, i); +		vol = ubi->volumes[i]; +		if (!vol) { +			if (sv) +				ubi_scan_rm_volume(si, sv); +			continue; +		} + +		if (vol->reserved_pebs == 0) { +			ubi_assert(i < ubi->vtbl_slots); + +			if (!sv) +				continue; + +			/* +			 * During scanning we found a volume which does not +			 * exist according to the information in the volume +			 * table. This must have happened due to an unclean +			 * reboot while the volume was being removed. Discard +			 * these eraseblocks. +			 */ +			ubi_msg("finish volume %d removal", sv->vol_id); +			ubi_scan_rm_volume(si, sv); +		} else if (sv) { +			err = check_sv(vol, sv); +			if (err) +				return err; +		} +	} + +	return 0; +} + +/** + * ubi_read_volume_table - read volume table. + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function reads volume table, checks it, recover from errors if needed, + * or creates it if needed. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) +{ +	int i, err; +	struct ubi_scan_volume *sv; + +	empty_vtbl_record.crc = cpu_to_be32(0xf116c36b); + +	/* +	 * The number of supported volumes is limited by the eraseblock size +	 * and by the UBI_MAX_VOLUMES constant. +	 */ +	ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; +	if (ubi->vtbl_slots > UBI_MAX_VOLUMES) +		ubi->vtbl_slots = UBI_MAX_VOLUMES; + +	ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; +	ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); + +	sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); +	if (!sv) { +		/* +		 * No logical eraseblocks belonging to the layout volume were +		 * found. This could mean that the flash is just empty. In +		 * this case we create empty layout volume. +		 * +		 * But if flash is not empty this must be a corruption or the +		 * MTD device just contains garbage. +		 */ +		if (si->is_empty) { +			ubi->vtbl = create_empty_lvol(ubi, si); +			if (IS_ERR(ubi->vtbl)) +				return PTR_ERR(ubi->vtbl); +		} else { +			ubi_err("the layout volume was not found"); +			return -EINVAL; +		} +	} else { +		if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) { +			/* This must not happen with proper UBI images */ +			dbg_err("too many LEBs (%d) in layout volume", +				sv->leb_count); +			return -EINVAL; +		} + +		ubi->vtbl = process_lvol(ubi, si, sv); +		if (IS_ERR(ubi->vtbl)) +			return PTR_ERR(ubi->vtbl); +	} + +	ubi->avail_pebs = ubi->good_peb_count; + +	/* +	 * The layout volume is OK, initialize the corresponding in-RAM data +	 * structures. +	 */ +	err = init_volumes(ubi, si, ubi->vtbl); +	if (err) +		goto out_free; + +	/* +	 * Get sure that the scanning information is consistent to the +	 * information stored in the volume table. +	 */ +	err = check_scanning_info(ubi, si); +	if (err) +		goto out_free; + +	return 0; + +out_free: +	vfree(ubi->vtbl); +	for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) +		if (ubi->volumes[i]) { +			kfree(ubi->volumes[i]); +			ubi->volumes[i] = NULL; +		} +	return err; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_vtbl_check - check volume table. + * @ubi: UBI device description object + */ +static void paranoid_vtbl_check(const struct ubi_device *ubi) +{ +	if (vtbl_check(ubi, ubi->vtbl)) { +		ubi_err("paranoid check failed"); +		BUG(); +	} +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c new file mode 100644 index 000000000..2f9a5e365 --- /dev/null +++ b/drivers/mtd/ubi/wl.c @@ -0,0 +1,1670 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner + */ + +/* + * UBI wear-leveling unit. + * + * This unit is responsible for wear-leveling. It works in terms of physical + * eraseblocks and erase counters and knows nothing about logical eraseblocks, + * volumes, etc. From this unit's perspective all physical eraseblocks are of + * two types - used and free. Used physical eraseblocks are those that were + * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are + * those that were put by the 'ubi_wl_put_peb()' function. + * + * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter + * header. The rest of the physical eraseblock contains only 0xFF bytes. + * + * When physical eraseblocks are returned to the WL unit by means of the + * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is + * done asynchronously in context of the per-UBI device background thread, + * which is also managed by the WL unit. + * + * The wear-leveling is ensured by means of moving the contents of used + * physical eraseblocks with low erase counter to free physical eraseblocks + * with high erase counter. + * + * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick + * an "optimal" physical eraseblock. For example, when it is known that the + * physical eraseblock will be "put" soon because it contains short-term data, + * the WL unit may pick a free physical eraseblock with low erase counter, and + * so forth. + * + * If the WL unit fails to erase a physical eraseblock, it marks it as bad. + * + * This unit is also responsible for scrubbing. If a bit-flip is detected in a + * physical eraseblock, it has to be moved. Technically this is the same as + * moving it for wear-leveling reasons. + * + * As it was said, for the UBI unit all physical eraseblocks are either "free" + * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used + * eraseblocks are kept in a set of different RB-trees: @wl->used, + * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * + * Note, in this implementation, we keep a small in-RAM object for each physical + * eraseblock. This is surely not a scalable solution. But it appears to be good + * enough for moderately large flashes and it is simple. In future, one may + * re-work this unit and make it more scalable. + * + * At the moment this unit does not utilize the sequence number, which was + * introduced relatively recently. But it would be wise to do this because the + * sequence number of a logical eraseblock characterizes how old is it. For + * example, when we move a PEB with low erase counter, and we need to pick the + * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we + * pick target PEB with an average EC if our PEB is not very "old". This is a + * room for future re-works of the WL unit. + * + * FIXME: looks too complex, should be simplified (later). + */ + +#ifdef UBI_LINUX +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#endif + +#include <ubi_uboot.h> +#include "ubi.h" + +/* Number of physical eraseblocks reserved for wear-leveling purposes */ +#define WL_RESERVED_PEBS 1 + +/* + * How many erase cycles are short term, unknown, and long term physical + * eraseblocks protected. + */ +#define ST_PROTECTION 16 +#define U_PROTECTION  10 +#define LT_PROTECTION 4 + +/* + * Maximum difference between two erase counters. If this threshold is + * exceeded, the WL unit starts moving data from used physical eraseblocks with + * low erase counter to free physical eraseblocks with high erase counter. + */ +#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD + +/* + * When a physical eraseblock is moved, the WL unit has to pick the target + * physical eraseblock to move to. The simplest way would be just to pick the + * one with the highest erase counter. But in certain workloads this could lead + * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase + * counter of the free physical eraseblock to pick. Namely, the WL unit does + * not pick eraseblocks with erase counter greater then the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ +#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) + +/* + * Maximum number of consecutive background thread failures which is enough to + * switch to read-only mode. + */ +#define WL_MAX_FAILURES 32 + +/** + * struct ubi_wl_prot_entry - PEB protection entry. + * @rb_pnum: link in the @wl->prot.pnum RB-tree + * @rb_aec: link in the @wl->prot.aec RB-tree + * @abs_ec: the absolute erase counter value when the protection ends + * @e: the wear-leveling entry of the physical eraseblock under protection + * + * When the WL unit returns a physical eraseblock, the physical eraseblock is + * protected from being moved for some "time". For this reason, the physical + * eraseblock is not directly moved from the @wl->free tree to the @wl->used + * tree. There is one more tree in between where this physical eraseblock is + * temporarily stored (@wl->prot). + * + * All this protection stuff is needed because: + *  o we don't want to move physical eraseblocks just after we have given them + *    to the user; instead, we first want to let users fill them up with data; + * + *  o there is a chance that the user will put the physical eraseblock very + *    soon, so it makes sense not to move it for some time, but wait; this is + *    especially important in case of "short term" physical eraseblocks. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * absolute erase counter (@wl->abs_ec). When it reaches certain value, the + * physical eraseblocks are moved from the protection trees (@wl->prot.*) to + * the @wl->used tree. + * + * Protected physical eraseblocks are searched by physical eraseblock number + * (when they are put) and by the absolute erase counter (to check if it is + * time to move them to the @wl->used tree). So there are actually 2 RB-trees + * storing the protected physical eraseblocks: @wl->prot.pnum and + * @wl->prot.aec. They are referred to as the "protection" trees. The + * first one is indexed by the physical eraseblock number. The second one is + * indexed by the absolute erase counter. Both trees store + * &struct ubi_wl_prot_entry objects. + * + * Each physical eraseblock has 2 main states: free and used. The former state + * corresponds to the @wl->free tree. The latter state is split up on several + * sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is temporarily prohibited (@wl->prot.pnum and + * @wl->prot.aec trees); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those trees. + */ +struct ubi_wl_prot_entry { +	struct rb_node rb_pnum; +	struct rb_node rb_aec; +	unsigned long long abs_ec; +	struct ubi_wl_entry *e; +}; + +/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function + * @priv: private data of the worker function + * + * @e: physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * The @func pointer points to the worker function. If the @cancel argument is + * not zero, the worker has to free the resources and exit immediately. The + * worker has to return zero in case of success and a negative error code in + * case of failure. + */ +struct ubi_work { +	struct list_head list; +	int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); +	/* The below fields are only relevant to erasure works */ +	struct ubi_wl_entry *e; +	int torture; +}; + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, +				     struct rb_root *root); +#else +#define paranoid_check_ec(ubi, pnum, ec) 0 +#define paranoid_check_in_wl_tree(e, root) +#endif + +/** + * wl_tree_add - add a wear-leveling entry to a WL RB-tree. + * @e: the wear-leveling entry to add + * @root: the root of the tree + * + * Note, we use (erase counter, physical eraseblock number) pairs as keys in + * the @ubi->used and @ubi->free RB-trees. + */ +static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) +{ +	struct rb_node **p, *parent = NULL; + +	p = &root->rb_node; +	while (*p) { +		struct ubi_wl_entry *e1; + +		parent = *p; +		e1 = rb_entry(parent, struct ubi_wl_entry, rb); + +		if (e->ec < e1->ec) +			p = &(*p)->rb_left; +		else if (e->ec > e1->ec) +			p = &(*p)->rb_right; +		else { +			ubi_assert(e->pnum != e1->pnum); +			if (e->pnum < e1->pnum) +				p = &(*p)->rb_left; +			else +				p = &(*p)->rb_right; +		} +	} + +	rb_link_node(&e->rb, parent, p); +	rb_insert_color(&e->rb, root); +} + +/** + * do_work - do one pending work. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int do_work(struct ubi_device *ubi) +{ +	int err; +	struct ubi_work *wrk; + +	cond_resched(); + +	/* +	 * @ubi->work_sem is used to synchronize with the workers. Workers take +	 * it in read mode, so many of them may be doing works at a time. But +	 * the queue flush code has to be sure the whole queue of works is +	 * done, and it takes the mutex in write mode. +	 */ +	down_read(&ubi->work_sem); +	spin_lock(&ubi->wl_lock); +	if (list_empty(&ubi->works)) { +		spin_unlock(&ubi->wl_lock); +		up_read(&ubi->work_sem); +		return 0; +	} + +	wrk = list_entry(ubi->works.next, struct ubi_work, list); +	list_del(&wrk->list); +	ubi->works_count -= 1; +	ubi_assert(ubi->works_count >= 0); +	spin_unlock(&ubi->wl_lock); + +	/* +	 * Call the worker function. Do not touch the work structure +	 * after this call as it will have been freed or reused by that +	 * time by the worker function. +	 */ +	err = wrk->func(ubi, wrk, 0); +	if (err) +		ubi_err("work failed with error code %d", err); +	up_read(&ubi->work_sem); + +	return err; +} + +/** + * produce_free_peb - produce a free physical eraseblock. + * @ubi: UBI device description object + * + * This function tries to make a free PEB by means of synchronous execution of + * pending works. This may be needed if, for example the background thread is + * disabled. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int produce_free_peb(struct ubi_device *ubi) +{ +	int err; + +	spin_lock(&ubi->wl_lock); +	while (!ubi->free.rb_node) { +		spin_unlock(&ubi->wl_lock); + +		dbg_wl("do one work synchronously"); +		err = do_work(ubi); +		if (err) +			return err; + +		spin_lock(&ubi->wl_lock); +	} +	spin_unlock(&ubi->wl_lock); + +	return 0; +} + +/** + * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns non-zero if @e is in the @root RB-tree and zero if it + * is not. + */ +static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) +{ +	struct rb_node *p; + +	p = root->rb_node; +	while (p) { +		struct ubi_wl_entry *e1; + +		e1 = rb_entry(p, struct ubi_wl_entry, rb); + +		if (e->pnum == e1->pnum) { +			ubi_assert(e == e1); +			return 1; +		} + +		if (e->ec < e1->ec) +			p = p->rb_left; +		else if (e->ec > e1->ec) +			p = p->rb_right; +		else { +			ubi_assert(e->pnum != e1->pnum); +			if (e->pnum < e1->pnum) +				p = p->rb_left; +			else +				p = p->rb_right; +		} +	} + +	return 0; +} + +/** + * prot_tree_add - add physical eraseblock to protection trees. + * @ubi: UBI device description object + * @e: the physical eraseblock to add + * @pe: protection entry object to use + * @abs_ec: absolute erase counter value when this physical eraseblock has + * to be removed from the protection trees. + * + * @wl->lock has to be locked. + */ +static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, +			  struct ubi_wl_prot_entry *pe, int abs_ec) +{ +	struct rb_node **p, *parent = NULL; +	struct ubi_wl_prot_entry *pe1; + +	pe->e = e; +	pe->abs_ec = ubi->abs_ec + abs_ec; + +	p = &ubi->prot.pnum.rb_node; +	while (*p) { +		parent = *p; +		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); + +		if (e->pnum < pe1->e->pnum) +			p = &(*p)->rb_left; +		else +			p = &(*p)->rb_right; +	} +	rb_link_node(&pe->rb_pnum, parent, p); +	rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); + +	p = &ubi->prot.aec.rb_node; +	parent = NULL; +	while (*p) { +		parent = *p; +		pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); + +		if (pe->abs_ec < pe1->abs_ec) +			p = &(*p)->rb_left; +		else +			p = &(*p)->rb_right; +	} +	rb_link_node(&pe->rb_aec, parent, p); +	rb_insert_color(&pe->rb_aec, &ubi->prot.aec); +} + +/** + * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @root: the RB-tree where to look for + * @max: highest possible erase counter + * + * This function looks for a wear leveling entry with erase counter closest to + * @max and less then @max. + */ +static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) +{ +	struct rb_node *p; +	struct ubi_wl_entry *e; + +	e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); +	max += e->ec; + +	p = root->rb_node; +	while (p) { +		struct ubi_wl_entry *e1; + +		e1 = rb_entry(p, struct ubi_wl_entry, rb); +		if (e1->ec >= max) +			p = p->rb_left; +		else { +			p = p->rb_right; +			e = e1; +		} +	} + +	return e; +} + +/** + * ubi_wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * @dtype: type of data which will be stored in this physical eraseblock + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. Might sleep. + */ +int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) +{ +	int err, protect, medium_ec; +	struct ubi_wl_entry *e, *first, *last; +	struct ubi_wl_prot_entry *pe; + +	ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || +		   dtype == UBI_UNKNOWN); + +	pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); +	if (!pe) +		return -ENOMEM; + +retry: +	spin_lock(&ubi->wl_lock); +	if (!ubi->free.rb_node) { +		if (ubi->works_count == 0) { +			ubi_assert(list_empty(&ubi->works)); +			ubi_err("no free eraseblocks"); +			spin_unlock(&ubi->wl_lock); +			kfree(pe); +			return -ENOSPC; +		} +		spin_unlock(&ubi->wl_lock); + +		err = produce_free_peb(ubi); +		if (err < 0) { +			kfree(pe); +			return err; +		} +		goto retry; +	} + +	switch (dtype) { +		case UBI_LONGTERM: +			/* +			 * For long term data we pick a physical eraseblock +			 * with high erase counter. But the highest erase +			 * counter we can pick is bounded by the the lowest +			 * erase counter plus %WL_FREE_MAX_DIFF. +			 */ +			e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); +			protect = LT_PROTECTION; +			break; +		case UBI_UNKNOWN: +			/* +			 * For unknown data we pick a physical eraseblock with +			 * medium erase counter. But we by no means can pick a +			 * physical eraseblock with erase counter greater or +			 * equivalent than the lowest erase counter plus +			 * %WL_FREE_MAX_DIFF. +			 */ +			first = rb_entry(rb_first(&ubi->free), +					 struct ubi_wl_entry, rb); +			last = rb_entry(rb_last(&ubi->free), +					struct ubi_wl_entry, rb); + +			if (last->ec - first->ec < WL_FREE_MAX_DIFF) +				e = rb_entry(ubi->free.rb_node, +						struct ubi_wl_entry, rb); +			else { +				medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; +				e = find_wl_entry(&ubi->free, medium_ec); +			} +			protect = U_PROTECTION; +			break; +		case UBI_SHORTTERM: +			/* +			 * For short term data we pick a physical eraseblock +			 * with the lowest erase counter as we expect it will +			 * be erased soon. +			 */ +			e = rb_entry(rb_first(&ubi->free), +				     struct ubi_wl_entry, rb); +			protect = ST_PROTECTION; +			break; +		default: +			protect = 0; +			e = NULL; +			BUG(); +	} + +	/* +	 * Move the physical eraseblock to the protection trees where it will +	 * be protected from being moved for some time. +	 */ +	paranoid_check_in_wl_tree(e, &ubi->free); +	rb_erase(&e->rb, &ubi->free); +	prot_tree_add(ubi, e, pe, protect); + +	dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); +	spin_unlock(&ubi->wl_lock); + +	return e->pnum; +} + +/** + * prot_tree_del - remove a physical eraseblock from the protection trees + * @ubi: UBI device description object + * @pnum: the physical eraseblock to remove + * + * This function returns PEB @pnum from the protection trees and returns zero + * in case of success and %-ENODEV if the PEB was not found in the protection + * trees. + */ +static int prot_tree_del(struct ubi_device *ubi, int pnum) +{ +	struct rb_node *p; +	struct ubi_wl_prot_entry *pe = NULL; + +	p = ubi->prot.pnum.rb_node; +	while (p) { + +		pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); + +		if (pnum == pe->e->pnum) +			goto found; + +		if (pnum < pe->e->pnum) +			p = p->rb_left; +		else +			p = p->rb_right; +	} + +	return -ENODEV; + +found: +	ubi_assert(pe->e->pnum == pnum); +	rb_erase(&pe->rb_aec, &ubi->prot.aec); +	rb_erase(&pe->rb_pnum, &ubi->prot.pnum); +	kfree(pe); +	return 0; +} + +/** + * sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @e: the the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) +{ +	int err; +	struct ubi_ec_hdr *ec_hdr; +	unsigned long long ec = e->ec; + +	dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); + +	err = paranoid_check_ec(ubi, e->pnum, e->ec); +	if (err > 0) +		return -EINVAL; + +	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); +	if (!ec_hdr) +		return -ENOMEM; + +	err = ubi_io_sync_erase(ubi, e->pnum, torture); +	if (err < 0) +		goto out_free; + +	ec += err; +	if (ec > UBI_MAX_ERASECOUNTER) { +		/* +		 * Erase counter overflow. Upgrade UBI and use 64-bit +		 * erase counters internally. +		 */ +		ubi_err("erase counter overflow at PEB %d, EC %llu", +			e->pnum, ec); +		err = -EINVAL; +		goto out_free; +	} + +	dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); + +	ec_hdr->ec = cpu_to_be64(ec); + +	err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); +	if (err) +		goto out_free; + +	e->ec = ec; +	spin_lock(&ubi->wl_lock); +	if (e->ec > ubi->max_ec) +		ubi->max_ec = e->ec; +	spin_unlock(&ubi->wl_lock); + +out_free: +	kfree(ec_hdr); +	return err; +} + +/** + * check_protection_over - check if it is time to stop protecting some + * physical eraseblocks. + * @ubi: UBI device description object + * + * This function is called after each erase operation, when the absolute erase + * counter is incremented, to check if some physical eraseblock  have not to be + * protected any longer. These physical eraseblocks are moved from the + * protection trees to the used tree. + */ +static void check_protection_over(struct ubi_device *ubi) +{ +	struct ubi_wl_prot_entry *pe; + +	/* +	 * There may be several protected physical eraseblock to remove, +	 * process them all. +	 */ +	while (1) { +		spin_lock(&ubi->wl_lock); +		if (!ubi->prot.aec.rb_node) { +			spin_unlock(&ubi->wl_lock); +			break; +		} + +		pe = rb_entry(rb_first(&ubi->prot.aec), +			      struct ubi_wl_prot_entry, rb_aec); + +		if (pe->abs_ec > ubi->abs_ec) { +			spin_unlock(&ubi->wl_lock); +			break; +		} + +		dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", +		       pe->e->pnum, ubi->abs_ec, pe->abs_ec); +		rb_erase(&pe->rb_aec, &ubi->prot.aec); +		rb_erase(&pe->rb_pnum, &ubi->prot.pnum); +		wl_tree_add(pe->e, &ubi->used); +		spin_unlock(&ubi->wl_lock); + +		kfree(pe); +		cond_resched(); +	} +} + +/** + * schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function enqueues a work defined by @wrk to the tail of the pending + * works list. + */ +static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ +	spin_lock(&ubi->wl_lock); +	list_add_tail(&wrk->list, &ubi->works); +	ubi_assert(ubi->works_count >= 0); +	ubi->works_count += 1; +	if (ubi->thread_enabled) +		wake_up_process(ubi->bgt_thread); +	spin_unlock(&ubi->wl_lock); +} + +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, +			int cancel); + +/** + * schedule_erase - schedule an erase work. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a %-ENOMEM in case of + * failure. + */ +static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, +			  int torture) +{ +	struct ubi_work *wl_wrk; + +	dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", +	       e->pnum, e->ec, torture); + +	wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); +	if (!wl_wrk) +		return -ENOMEM; + +	wl_wrk->func = &erase_worker; +	wl_wrk->e = e; +	wl_wrk->torture = torture; + +	schedule_ubi_work(ubi, wl_wrk); +	return 0; +} + +/** + * wear_leveling_worker - wear-leveling worker function. + * @ubi: UBI device description object + * @wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function copies a more worn out physical eraseblock to a less worn out + * one. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, +				int cancel) +{ +	int err, put = 0, scrubbing = 0, protect = 0; +	struct ubi_wl_prot_entry *uninitialized_var(pe); +	struct ubi_wl_entry *e1, *e2; +	struct ubi_vid_hdr *vid_hdr; + +	kfree(wrk); + +	if (cancel) +		return 0; + +	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); +	if (!vid_hdr) +		return -ENOMEM; + +	mutex_lock(&ubi->move_mutex); +	spin_lock(&ubi->wl_lock); +	ubi_assert(!ubi->move_from && !ubi->move_to); +	ubi_assert(!ubi->move_to_put); + +	if (!ubi->free.rb_node || +	    (!ubi->used.rb_node && !ubi->scrub.rb_node)) { +		/* +		 * No free physical eraseblocks? Well, they must be waiting in +		 * the queue to be erased. Cancel movement - it will be +		 * triggered again when a free physical eraseblock appears. +		 * +		 * No used physical eraseblocks? They must be temporarily +		 * protected from being moved. They will be moved to the +		 * @ubi->used tree later and the wear-leveling will be +		 * triggered again. +		 */ +		dbg_wl("cancel WL, a list is empty: free %d, used %d", +		       !ubi->free.rb_node, !ubi->used.rb_node); +		goto out_cancel; +	} + +	if (!ubi->scrub.rb_node) { +		/* +		 * Now pick the least worn-out used physical eraseblock and a +		 * highly worn-out free physical eraseblock. If the erase +		 * counters differ much enough, start wear-leveling. +		 */ +		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); +		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + +		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { +			dbg_wl("no WL needed: min used EC %d, max free EC %d", +			       e1->ec, e2->ec); +			goto out_cancel; +		} +		paranoid_check_in_wl_tree(e1, &ubi->used); +		rb_erase(&e1->rb, &ubi->used); +		dbg_wl("move PEB %d EC %d to PEB %d EC %d", +		       e1->pnum, e1->ec, e2->pnum, e2->ec); +	} else { +		/* Perform scrubbing */ +		scrubbing = 1; +		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); +		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); +		paranoid_check_in_wl_tree(e1, &ubi->scrub); +		rb_erase(&e1->rb, &ubi->scrub); +		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); +	} + +	paranoid_check_in_wl_tree(e2, &ubi->free); +	rb_erase(&e2->rb, &ubi->free); +	ubi->move_from = e1; +	ubi->move_to = e2; +	spin_unlock(&ubi->wl_lock); + +	/* +	 * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. +	 * We so far do not know which logical eraseblock our physical +	 * eraseblock (@e1) belongs to. We have to read the volume identifier +	 * header first. +	 * +	 * Note, we are protected from this PEB being unmapped and erased. The +	 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB +	 * which is being moved was unmapped. +	 */ + +	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); +	if (err && err != UBI_IO_BITFLIPS) { +		if (err == UBI_IO_PEB_FREE) { +			/* +			 * We are trying to move PEB without a VID header. UBI +			 * always write VID headers shortly after the PEB was +			 * given, so we have a situation when it did not have +			 * chance to write it down because it was preempted. +			 * Just re-schedule the work, so that next time it will +			 * likely have the VID header in place. +			 */ +			dbg_wl("PEB %d has no VID header", e1->pnum); +			goto out_not_moved; +		} + +		ubi_err("error %d while reading VID header from PEB %d", +			err, e1->pnum); +		if (err > 0) +			err = -EIO; +		goto out_error; +	} + +	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); +	if (err) { + +		if (err < 0) +			goto out_error; +		if (err == 1) +			goto out_not_moved; + +		/* +		 * For some reason the LEB was not moved - it might be because +		 * the volume is being deleted. We should prevent this PEB from +		 * being selected for wear-levelling movement for some "time", +		 * so put it to the protection tree. +		 */ + +		dbg_wl("cancelled moving PEB %d", e1->pnum); +		pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); +		if (!pe) { +			err = -ENOMEM; +			goto out_error; +		} + +		protect = 1; +	} + +	ubi_free_vid_hdr(ubi, vid_hdr); +	spin_lock(&ubi->wl_lock); +	if (protect) +		prot_tree_add(ubi, e1, pe, protect); +	if (!ubi->move_to_put) +		wl_tree_add(e2, &ubi->used); +	else +		put = 1; +	ubi->move_from = ubi->move_to = NULL; +	ubi->move_to_put = ubi->wl_scheduled = 0; +	spin_unlock(&ubi->wl_lock); + +	if (put) { +		/* +		 * Well, the target PEB was put meanwhile, schedule it for +		 * erasure. +		 */ +		dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); +		err = schedule_erase(ubi, e2, 0); +		if (err) +			goto out_error; +	} + +	if (!protect) { +		err = schedule_erase(ubi, e1, 0); +		if (err) +			goto out_error; +	} + + +	dbg_wl("done"); +	mutex_unlock(&ubi->move_mutex); +	return 0; + +	/* +	 * For some reasons the LEB was not moved, might be an error, might be +	 * something else. @e1 was not changed, so return it back. @e2 might +	 * be changed, schedule it for erasure. +	 */ +out_not_moved: +	ubi_free_vid_hdr(ubi, vid_hdr); +	spin_lock(&ubi->wl_lock); +	if (scrubbing) +		wl_tree_add(e1, &ubi->scrub); +	else +		wl_tree_add(e1, &ubi->used); +	ubi->move_from = ubi->move_to = NULL; +	ubi->move_to_put = ubi->wl_scheduled = 0; +	spin_unlock(&ubi->wl_lock); + +	err = schedule_erase(ubi, e2, 0); +	if (err) +		goto out_error; + +	mutex_unlock(&ubi->move_mutex); +	return 0; + +out_error: +	ubi_err("error %d while moving PEB %d to PEB %d", +		err, e1->pnum, e2->pnum); + +	ubi_free_vid_hdr(ubi, vid_hdr); +	spin_lock(&ubi->wl_lock); +	ubi->move_from = ubi->move_to = NULL; +	ubi->move_to_put = ubi->wl_scheduled = 0; +	spin_unlock(&ubi->wl_lock); + +	kmem_cache_free(ubi_wl_entry_slab, e1); +	kmem_cache_free(ubi_wl_entry_slab, e2); +	ubi_ro_mode(ubi); + +	mutex_unlock(&ubi->move_mutex); +	return err; + +out_cancel: +	ubi->wl_scheduled = 0; +	spin_unlock(&ubi->wl_lock); +	mutex_unlock(&ubi->move_mutex); +	ubi_free_vid_hdr(ubi, vid_hdr); +	return 0; +} + +/** + * ensure_wear_leveling - schedule wear-leveling if it is needed. + * @ubi: UBI device description object + * + * This function checks if it is time to start wear-leveling and schedules it + * if yes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +static int ensure_wear_leveling(struct ubi_device *ubi) +{ +	int err = 0; +	struct ubi_wl_entry *e1; +	struct ubi_wl_entry *e2; +	struct ubi_work *wrk; + +	spin_lock(&ubi->wl_lock); +	if (ubi->wl_scheduled) +		/* Wear-leveling is already in the work queue */ +		goto out_unlock; + +	/* +	 * If the ubi->scrub tree is not empty, scrubbing is needed, and the +	 * the WL worker has to be scheduled anyway. +	 */ +	if (!ubi->scrub.rb_node) { +		if (!ubi->used.rb_node || !ubi->free.rb_node) +			/* No physical eraseblocks - no deal */ +			goto out_unlock; + +		/* +		 * We schedule wear-leveling only if the difference between the +		 * lowest erase counter of used physical eraseblocks and a high +		 * erase counter of free physical eraseblocks is greater then +		 * %UBI_WL_THRESHOLD. +		 */ +		e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); +		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + +		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) +			goto out_unlock; +		dbg_wl("schedule wear-leveling"); +	} else +		dbg_wl("schedule scrubbing"); + +	ubi->wl_scheduled = 1; +	spin_unlock(&ubi->wl_lock); + +	wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS); +	if (!wrk) { +		err = -ENOMEM; +		goto out_cancel; +	} + +	wrk->func = &wear_leveling_worker; +	schedule_ubi_work(ubi, wrk); +	return err; + +out_cancel: +	spin_lock(&ubi->wl_lock); +	ubi->wl_scheduled = 0; +out_unlock: +	spin_unlock(&ubi->wl_lock); +	return err; +} + +/** + * erase_worker - physical eraseblock erase worker function. + * @ubi: UBI device description object + * @wl_wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function erases a physical eraseblock and perform torture testing if + * needed. It also takes care about marking the physical eraseblock bad if + * needed. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, +			int cancel) +{ +	struct ubi_wl_entry *e = wl_wrk->e; +	int pnum = e->pnum, err, need; + +	if (cancel) { +		dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); +		kfree(wl_wrk); +		kmem_cache_free(ubi_wl_entry_slab, e); +		return 0; +	} + +	dbg_wl("erase PEB %d EC %d", pnum, e->ec); + +	err = sync_erase(ubi, e, wl_wrk->torture); +	if (!err) { +		/* Fine, we've erased it successfully */ +		kfree(wl_wrk); + +		spin_lock(&ubi->wl_lock); +		ubi->abs_ec += 1; +		wl_tree_add(e, &ubi->free); +		spin_unlock(&ubi->wl_lock); + +		/* +		 * One more erase operation has happened, take care about protected +		 * physical eraseblocks. +		 */ +		check_protection_over(ubi); + +		/* And take care about wear-leveling */ +		err = ensure_wear_leveling(ubi); +		return err; +	} + +	ubi_err("failed to erase PEB %d, error %d", pnum, err); +	kfree(wl_wrk); +	kmem_cache_free(ubi_wl_entry_slab, e); + +	if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || +	    err == -EBUSY) { +		int err1; + +		/* Re-schedule the LEB for erasure */ +		err1 = schedule_erase(ubi, e, 0); +		if (err1) { +			err = err1; +			goto out_ro; +		} +		return err; +	} else if (err != -EIO) { +		/* +		 * If this is not %-EIO, we have no idea what to do. Scheduling +		 * this physical eraseblock for erasure again would cause +		 * errors again and again. Well, lets switch to RO mode. +		 */ +		goto out_ro; +	} + +	/* It is %-EIO, the PEB went bad */ + +	if (!ubi->bad_allowed) { +		ubi_err("bad physical eraseblock %d detected", pnum); +		goto out_ro; +	} + +	spin_lock(&ubi->volumes_lock); +	need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1; +	if (need > 0) { +		need = ubi->avail_pebs >= need ? need : ubi->avail_pebs; +		ubi->avail_pebs -= need; +		ubi->rsvd_pebs += need; +		ubi->beb_rsvd_pebs += need; +		if (need > 0) +			ubi_msg("reserve more %d PEBs", need); +	} + +	if (ubi->beb_rsvd_pebs == 0) { +		spin_unlock(&ubi->volumes_lock); +		ubi_err("no reserved physical eraseblocks"); +		goto out_ro; +	} + +	spin_unlock(&ubi->volumes_lock); +	ubi_msg("mark PEB %d as bad", pnum); + +	err = ubi_io_mark_bad(ubi, pnum); +	if (err) +		goto out_ro; + +	spin_lock(&ubi->volumes_lock); +	ubi->beb_rsvd_pebs -= 1; +	ubi->bad_peb_count += 1; +	ubi->good_peb_count -= 1; +	ubi_calculate_reserved(ubi); +	if (ubi->beb_rsvd_pebs == 0) +		ubi_warn("last PEB from the reserved pool was used"); +	spin_unlock(&ubi->volumes_lock); + +	return err; + +out_ro: +	ubi_ro_mode(ubi); +	return err; +} + +/** + * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. + * @ubi: UBI device description object + * @pnum: physical eraseblock to return + * @torture: if this physical eraseblock has to be tortured + * + * This function is called to return physical eraseblock @pnum to the pool of + * free physical eraseblocks. The @torture flag has to be set if an I/O error + * occurred to this @pnum and it has to be tested. This function returns zero + * in case of success, and a negative error code in case of failure. + */ +int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) +{ +	int err; +	struct ubi_wl_entry *e; + +	dbg_wl("PEB %d", pnum); +	ubi_assert(pnum >= 0); +	ubi_assert(pnum < ubi->peb_count); + +retry: +	spin_lock(&ubi->wl_lock); +	e = ubi->lookuptbl[pnum]; +	if (e == ubi->move_from) { +		/* +		 * User is putting the physical eraseblock which was selected to +		 * be moved. It will be scheduled for erasure in the +		 * wear-leveling worker. +		 */ +		dbg_wl("PEB %d is being moved, wait", pnum); +		spin_unlock(&ubi->wl_lock); + +		/* Wait for the WL worker by taking the @ubi->move_mutex */ +		mutex_lock(&ubi->move_mutex); +		mutex_unlock(&ubi->move_mutex); +		goto retry; +	} else if (e == ubi->move_to) { +		/* +		 * User is putting the physical eraseblock which was selected +		 * as the target the data is moved to. It may happen if the EBA +		 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but +		 * the WL unit has not put the PEB to the "used" tree yet, but +		 * it is about to do this. So we just set a flag which will +		 * tell the WL worker that the PEB is not needed anymore and +		 * should be scheduled for erasure. +		 */ +		dbg_wl("PEB %d is the target of data moving", pnum); +		ubi_assert(!ubi->move_to_put); +		ubi->move_to_put = 1; +		spin_unlock(&ubi->wl_lock); +		return 0; +	} else { +		if (in_wl_tree(e, &ubi->used)) { +			paranoid_check_in_wl_tree(e, &ubi->used); +			rb_erase(&e->rb, &ubi->used); +		} else if (in_wl_tree(e, &ubi->scrub)) { +			paranoid_check_in_wl_tree(e, &ubi->scrub); +			rb_erase(&e->rb, &ubi->scrub); +		} else { +			err = prot_tree_del(ubi, e->pnum); +			if (err) { +				ubi_err("PEB %d not found", pnum); +				ubi_ro_mode(ubi); +				spin_unlock(&ubi->wl_lock); +				return err; +			} +		} +	} +	spin_unlock(&ubi->wl_lock); + +	err = schedule_erase(ubi, e, torture); +	if (err) { +		spin_lock(&ubi->wl_lock); +		wl_tree_add(e, &ubi->used); +		spin_unlock(&ubi->wl_lock); +	} + +	return err; +} + +/** + * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to schedule + * + * If a bit-flip in a physical eraseblock is detected, this physical eraseblock + * needs scrubbing. This function schedules a physical eraseblock for + * scrubbing which is done in background. This function returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) +{ +	struct ubi_wl_entry *e; + +	ubi_msg("schedule PEB %d for scrubbing", pnum); + +retry: +	spin_lock(&ubi->wl_lock); +	e = ubi->lookuptbl[pnum]; +	if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { +		spin_unlock(&ubi->wl_lock); +		return 0; +	} + +	if (e == ubi->move_to) { +		/* +		 * This physical eraseblock was used to move data to. The data +		 * was moved but the PEB was not yet inserted to the proper +		 * tree. We should just wait a little and let the WL worker +		 * proceed. +		 */ +		spin_unlock(&ubi->wl_lock); +		dbg_wl("the PEB %d is not in proper tree, retry", pnum); +		yield(); +		goto retry; +	} + +	if (in_wl_tree(e, &ubi->used)) { +		paranoid_check_in_wl_tree(e, &ubi->used); +		rb_erase(&e->rb, &ubi->used); +	} else { +		int err; + +		err = prot_tree_del(ubi, e->pnum); +		if (err) { +			ubi_err("PEB %d not found", pnum); +			ubi_ro_mode(ubi); +			spin_unlock(&ubi->wl_lock); +			return err; +		} +	} + +	wl_tree_add(e, &ubi->scrub); +	spin_unlock(&ubi->wl_lock); + +	/* +	 * Technically scrubbing is the same as wear-leveling, so it is done +	 * by the WL worker. +	 */ +	return ensure_wear_leveling(ubi); +} + +/** + * ubi_wl_flush - flush all pending works. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_wl_flush(struct ubi_device *ubi) +{ +	int err; + +	/* +	 * Erase while the pending works queue is not empty, but not more then +	 * the number of currently pending works. +	 */ +	dbg_wl("flush (%d pending works)", ubi->works_count); +	while (ubi->works_count) { +		err = do_work(ubi); +		if (err) +			return err; +	} + +	/* +	 * Make sure all the works which have been done in parallel are +	 * finished. +	 */ +	down_write(&ubi->work_sem); +	up_write(&ubi->work_sem); + +	/* +	 * And in case last was the WL worker and it cancelled the LEB +	 * movement, flush again. +	 */ +	while (ubi->works_count) { +		dbg_wl("flush more (%d pending works)", ubi->works_count); +		err = do_work(ubi); +		if (err) +			return err; +	} + +	return 0; +} + +/** + * tree_destroy - destroy an RB-tree. + * @root: the root of the tree to destroy + */ +static void tree_destroy(struct rb_root *root) +{ +	struct rb_node *rb; +	struct ubi_wl_entry *e; + +	rb = root->rb_node; +	while (rb) { +		if (rb->rb_left) +			rb = rb->rb_left; +		else if (rb->rb_right) +			rb = rb->rb_right; +		else { +			e = rb_entry(rb, struct ubi_wl_entry, rb); + +			rb = rb_parent(rb); +			if (rb) { +				if (rb->rb_left == &e->rb) +					rb->rb_left = NULL; +				else +					rb->rb_right = NULL; +			} + +			kmem_cache_free(ubi_wl_entry_slab, e); +		} +	} +} + +/** + * ubi_thread - UBI background thread. + * @u: the UBI device description object pointer + */ +int ubi_thread(void *u) +{ +	int failures = 0; +	struct ubi_device *ubi = u; + +	ubi_msg("background thread \"%s\" started, PID %d", +		ubi->bgt_name, task_pid_nr(current)); + +	set_freezable(); +	for (;;) { +		int err; + +		if (kthread_should_stop()) +			break; + +		if (try_to_freeze()) +			continue; + +		spin_lock(&ubi->wl_lock); +		if (list_empty(&ubi->works) || ubi->ro_mode || +			       !ubi->thread_enabled) { +			set_current_state(TASK_INTERRUPTIBLE); +			spin_unlock(&ubi->wl_lock); +			schedule(); +			continue; +		} +		spin_unlock(&ubi->wl_lock); + +		err = do_work(ubi); +		if (err) { +			ubi_err("%s: work failed with error code %d", +				ubi->bgt_name, err); +			if (failures++ > WL_MAX_FAILURES) { +				/* +				 * Too many failures, disable the thread and +				 * switch to read-only mode. +				 */ +				ubi_msg("%s: %d consecutive failures", +					ubi->bgt_name, WL_MAX_FAILURES); +				ubi_ro_mode(ubi); +				break; +			} +		} else +			failures = 0; + +		cond_resched(); +	} + +	dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); +	return 0; +} + +/** + * cancel_pending - cancel all pending works. + * @ubi: UBI device description object + */ +static void cancel_pending(struct ubi_device *ubi) +{ +	while (!list_empty(&ubi->works)) { +		struct ubi_work *wrk; + +		wrk = list_entry(ubi->works.next, struct ubi_work, list); +		list_del(&wrk->list); +		wrk->func(ubi, wrk, 1); +		ubi->works_count -= 1; +		ubi_assert(ubi->works_count >= 0); +	} +} + +/** + * ubi_wl_init_scan - initialize the wear-leveling unit using scanning + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero in case of success, and a negative error code in + * case of failure. + */ +int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +{ +	int err; +	struct rb_node *rb1, *rb2; +	struct ubi_scan_volume *sv; +	struct ubi_scan_leb *seb, *tmp; +	struct ubi_wl_entry *e; + + +	ubi->used = ubi->free = ubi->scrub = RB_ROOT; +	ubi->prot.pnum = ubi->prot.aec = RB_ROOT; +	spin_lock_init(&ubi->wl_lock); +	mutex_init(&ubi->move_mutex); +	init_rwsem(&ubi->work_sem); +	ubi->max_ec = si->max_ec; +	INIT_LIST_HEAD(&ubi->works); + +	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); + +	err = -ENOMEM; +	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); +	if (!ubi->lookuptbl) +		return err; + +	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { +		cond_resched(); + +		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); +		if (!e) +			goto out_free; + +		e->pnum = seb->pnum; +		e->ec = seb->ec; +		ubi->lookuptbl[e->pnum] = e; +		if (schedule_erase(ubi, e, 0)) { +			kmem_cache_free(ubi_wl_entry_slab, e); +			goto out_free; +		} +	} + +	list_for_each_entry(seb, &si->free, u.list) { +		cond_resched(); + +		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); +		if (!e) +			goto out_free; + +		e->pnum = seb->pnum; +		e->ec = seb->ec; +		ubi_assert(e->ec >= 0); +		wl_tree_add(e, &ubi->free); +		ubi->lookuptbl[e->pnum] = e; +	} + +	list_for_each_entry(seb, &si->corr, u.list) { +		cond_resched(); + +		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); +		if (!e) +			goto out_free; + +		e->pnum = seb->pnum; +		e->ec = seb->ec; +		ubi->lookuptbl[e->pnum] = e; +		if (schedule_erase(ubi, e, 0)) { +			kmem_cache_free(ubi_wl_entry_slab, e); +			goto out_free; +		} +	} + +	ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { +		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { +			cond_resched(); + +			e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); +			if (!e) +				goto out_free; + +			e->pnum = seb->pnum; +			e->ec = seb->ec; +			ubi->lookuptbl[e->pnum] = e; +			if (!seb->scrub) { +				dbg_wl("add PEB %d EC %d to the used tree", +				       e->pnum, e->ec); +				wl_tree_add(e, &ubi->used); +			} else { +				dbg_wl("add PEB %d EC %d to the scrub tree", +				       e->pnum, e->ec); +				wl_tree_add(e, &ubi->scrub); +			} +		} +	} + +	if (ubi->avail_pebs < WL_RESERVED_PEBS) { +		ubi_err("no enough physical eraseblocks (%d, need %d)", +			ubi->avail_pebs, WL_RESERVED_PEBS); +		goto out_free; +	} +	ubi->avail_pebs -= WL_RESERVED_PEBS; +	ubi->rsvd_pebs += WL_RESERVED_PEBS; + +	/* Schedule wear-leveling if needed */ +	err = ensure_wear_leveling(ubi); +	if (err) +		goto out_free; + +	return 0; + +out_free: +	cancel_pending(ubi); +	tree_destroy(&ubi->used); +	tree_destroy(&ubi->free); +	tree_destroy(&ubi->scrub); +	kfree(ubi->lookuptbl); +	return err; +} + +/** + * protection_trees_destroy - destroy the protection RB-trees. + * @ubi: UBI device description object + */ +static void protection_trees_destroy(struct ubi_device *ubi) +{ +	struct rb_node *rb; +	struct ubi_wl_prot_entry *pe; + +	rb = ubi->prot.aec.rb_node; +	while (rb) { +		if (rb->rb_left) +			rb = rb->rb_left; +		else if (rb->rb_right) +			rb = rb->rb_right; +		else { +			pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); + +			rb = rb_parent(rb); +			if (rb) { +				if (rb->rb_left == &pe->rb_aec) +					rb->rb_left = NULL; +				else +					rb->rb_right = NULL; +			} + +			kmem_cache_free(ubi_wl_entry_slab, pe->e); +			kfree(pe); +		} +	} +} + +/** + * ubi_wl_close - close the wear-leveling unit. + * @ubi: UBI device description object + */ +void ubi_wl_close(struct ubi_device *ubi) +{ +	dbg_wl("close the UBI wear-leveling unit"); + +	cancel_pending(ubi); +	protection_trees_destroy(ubi); +	tree_destroy(&ubi->used); +	tree_destroy(&ubi->free); +	tree_destroy(&ubi->scrub); +	kfree(ubi->lookuptbl); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_ec - make sure that the erase counter of a physical eraseblock + * is correct. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum + * is equivalent to @ec, %1 if not, and a negative error code if an error + * occurred. + */ +static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) +{ +	int err; +	long long read_ec; +	struct ubi_ec_hdr *ec_hdr; + +	ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); +	if (!ec_hdr) +		return -ENOMEM; + +	err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); +	if (err && err != UBI_IO_BITFLIPS) { +		/* The header does not have to exist */ +		err = 0; +		goto out_free; +	} + +	read_ec = be64_to_cpu(ec_hdr->ec); +	if (ec != read_ec) { +		ubi_err("paranoid check failed for PEB %d", pnum); +		ubi_err("read EC is %lld, should be %d", read_ec, ec); +		ubi_dbg_dump_stack(); +		err = 1; +	} else +		err = 0; + +out_free: +	kfree(ec_hdr); +	return err; +} + +/** + * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present + * in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns zero if @e is in the @root RB-tree and %1 if it + * is not. + */ +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, +				     struct rb_root *root) +{ +	if (in_wl_tree(e, root)) +		return 0; + +	ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", +		e->pnum, e->ec, root); +	ubi_dbg_dump_stack(); +	return 1; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ |