diff options
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | fs/Makefile | 1 | ||||
| -rw-r--r-- | fs/ubifs/Makefile | 52 | ||||
| -rw-r--r-- | fs/ubifs/budget.c | 113 | ||||
| -rw-r--r-- | fs/ubifs/crc16.c | 60 | ||||
| -rw-r--r-- | fs/ubifs/crc16.h | 30 | ||||
| -rw-r--r-- | fs/ubifs/debug.c | 156 | ||||
| -rw-r--r-- | fs/ubifs/debug.h | 392 | ||||
| -rw-r--r-- | fs/ubifs/io.c | 316 | ||||
| -rw-r--r-- | fs/ubifs/key.h | 557 | ||||
| -rw-r--r-- | fs/ubifs/log.c | 104 | ||||
| -rw-r--r-- | fs/ubifs/lprops.c | 842 | ||||
| -rw-r--r-- | fs/ubifs/lpt.c | 1105 | ||||
| -rw-r--r-- | fs/ubifs/lpt_commit.c | 171 | ||||
| -rw-r--r-- | fs/ubifs/master.c | 341 | ||||
| -rw-r--r-- | fs/ubifs/misc.h | 310 | ||||
| -rw-r--r-- | fs/ubifs/orphan.c | 316 | ||||
| -rw-r--r-- | fs/ubifs/recovery.c | 1249 | ||||
| -rw-r--r-- | fs/ubifs/replay.c | 1070 | ||||
| -rw-r--r-- | fs/ubifs/sb.c | 324 | ||||
| -rw-r--r-- | fs/ubifs/scan.c | 362 | ||||
| -rw-r--r-- | fs/ubifs/super.c | 1189 | ||||
| -rw-r--r-- | fs/ubifs/tnc.c | 2767 | ||||
| -rw-r--r-- | fs/ubifs/tnc_commit.c | 1102 | ||||
| -rw-r--r-- | fs/ubifs/tnc_misc.c | 435 | ||||
| -rw-r--r-- | fs/ubifs/ubifs-media.h | 751 | ||||
| -rw-r--r-- | fs/ubifs/ubifs.c | 684 | ||||
| -rw-r--r-- | fs/ubifs/ubifs.h | 2173 | ||||
| -rw-r--r-- | include/linux/math64.h | 85 | ||||
| -rw-r--r-- | include/ubi_uboot.h | 9 | 
30 files changed, 17067 insertions, 2 deletions
| @@ -222,7 +222,8 @@ LIBS += cpu/ixp/npe/libnpe.a  endif  LIBS += lib_$(ARCH)/lib$(ARCH).a  LIBS += fs/cramfs/libcramfs.a fs/fat/libfat.a fs/fdos/libfdos.a fs/jffs2/libjffs2.a \ -	fs/reiserfs/libreiserfs.a fs/ext2/libext2fs.a fs/yaffs2/libyaffs2.a +	fs/reiserfs/libreiserfs.a fs/ext2/libext2fs.a fs/yaffs2/libyaffs2.a \ +	fs/ubifs/libubifs.a  LIBS += net/libnet.a  LIBS += disk/libdisk.a  LIBS += drivers/bios_emulator/libatibiosemu.a diff --git a/fs/Makefile b/fs/Makefile index 8bbd56324..22aad126b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -29,6 +29,7 @@ subdirs-$(CONFIG_CMD_FDOS) += fdos  subdirs-$(CONFIG_CMD_JFFS2) += jffs2  subdirs-$(CONFIG_CMD_REISER) += reiserfs  subdirs-$(CONFIG_YAFFS2) += yaffs2 +subdirs-$(CONFIG_CMD_UBIFS) += ubifs  SUBDIRS	:= $(subdirs-y) diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile new file mode 100644 index 000000000..8328843fe --- /dev/null +++ b/fs/ubifs/Makefile @@ -0,0 +1,52 @@ +# +# (C) Copyright 2006 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# (C) Copyright 2003 +# Pavel Bartusek, Sysgo Real-Time Solutions AG, pba@sysgo.de +# +# +# See file CREDITS for list of people who contributed to this +# project. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +# + +include $(TOPDIR)/config.mk + +LIB	= $(obj)libubifs.a + +COBJS-$(CONFIG_CMD_UBIFS) := ubifs.o io.o super.o sb.o master.o lpt.o +COBJS-$(CONFIG_CMD_UBIFS) += lpt_commit.o scan.o lprops.o +COBJS-$(CONFIG_CMD_UBIFS) += tnc.o tnc_misc.o debug.o crc16.o budget.o +COBJS-$(CONFIG_CMD_UBIFS) += log.o orphan.o recovery.o replay.o + +SRCS	:= $(AOBJS:.o=.S) $(COBJS-y:.o=.c) +OBJS	:= $(addprefix $(obj),$(AOBJS) $(COBJS-y)) + +all:	$(LIB) $(AOBJS) + +$(LIB):	$(obj).depend $(OBJS) +	$(AR) $(ARFLAGS) $@ $(OBJS) + +######################################################################### + +# defines $(obj).depend target +include $(SRCTREE)/rules.mk + +sinclude $(obj).depend + +######################################################################### diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c new file mode 100644 index 000000000..85377ea2a --- /dev/null +++ b/fs/ubifs/budget.c @@ -0,0 +1,113 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the budgeting sub-system which is responsible for UBIFS + * space management. + * + * Factors such as compression, wasted space at the ends of LEBs, space in other + * journal heads, the effect of updates on the index, and so on, make it + * impossible to accurately predict the amount of space needed. Consequently + * approximations are used. + */ + +#include "ubifs.h" +#include <linux/math64.h> + +/** + * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. + * @c: UBIFS file-system description object + * + * This function calculates and returns the number of eraseblocks which should + * be kept for index usage. + */ +int ubifs_calc_min_idx_lebs(struct ubifs_info *c) +{ +	int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; +	long long idx_size; + +	idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + +	/* And make sure we have thrice the index size of space reserved */ +	idx_size = idx_size + (idx_size << 1); + +	/* +	 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' +	 * pair, nor similarly the two variables for the new index size, so we +	 * have to do this costly 64-bit division on fast-path. +	 */ +	idx_size += eff_leb_size - 1; +	idx_lebs = div_u64(idx_size, eff_leb_size); +	/* +	 * The index head is not available for the in-the-gaps method, so add an +	 * extra LEB to compensate. +	 */ +	idx_lebs += 1; +	if (idx_lebs < MIN_INDEX_LEBS) +		idx_lebs = MIN_INDEX_LEBS; +	return idx_lebs; +} + +/** + * ubifs_reported_space - calculate reported free space. + * @c: the UBIFS file-system description object + * @free: amount of free space + * + * This function calculates amount of free space which will be reported to + * user-space. User-space application tend to expect that if the file-system + * (e.g., via the 'statfs()' call) reports that it has N bytes available, they + * are able to write a file of size N. UBIFS attaches node headers to each data + * node and it has to write indexing nodes as well. This introduces additional + * overhead, and UBIFS has to report slightly less free space to meet the above + * expectations. + * + * This function assumes free space is made up of uncompressed data nodes and + * full index nodes (one per data node, tripled because we always allow enough + * space to write the index thrice). + * + * Note, the calculation is pessimistic, which means that most of the time + * UBIFS reports less space than it actually has. + */ +long long ubifs_reported_space(const struct ubifs_info *c, long long free) +{ +	int divisor, factor, f; + +	/* +	 * Reported space size is @free * X, where X is UBIFS block size +	 * divided by UBIFS block size + all overhead one data block +	 * introduces. The overhead is the node header + indexing overhead. +	 * +	 * Indexing overhead calculations are based on the following formula: +	 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number +	 * of data nodes, f - fanout. Because effective UBIFS fanout is twice +	 * as less than maximum fanout, we assume that each data node +	 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. +	 * Note, the multiplier 3 is because UBIFS reserves thrice as more space +	 * for the index. +	 */ +	f = c->fanout > 3 ? c->fanout >> 1 : 2; +	factor = UBIFS_BLOCK_SIZE; +	divisor = UBIFS_MAX_DATA_NODE_SZ; +	divisor += (c->max_idx_node_sz * 3) / (f - 1); +	free *= factor; +	return div_u64(free, divisor); +} diff --git a/fs/ubifs/crc16.c b/fs/ubifs/crc16.c new file mode 100644 index 000000000..443ccf855 --- /dev/null +++ b/fs/ubifs/crc16.c @@ -0,0 +1,60 @@ +/* + *      crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include "crc16.h" + +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ +u16 const crc16_table[256] = { +	0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, +	0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, +	0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, +	0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, +	0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, +	0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, +	0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, +	0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, +	0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, +	0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, +	0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, +	0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, +	0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, +	0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, +	0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, +	0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, +	0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, +	0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, +	0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, +	0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, +	0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, +	0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, +	0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, +	0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, +	0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, +	0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, +	0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, +	0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, +	0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, +	0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, +	0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, +	0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; + +/** + * crc16 - compute the CRC-16 for the data buffer + * @crc:	previous CRC value + * @buffer:	data pointer + * @len:	number of bytes in the buffer + * + * Returns the updated CRC value. + */ +u16 crc16(u16 crc, u8 const *buffer, size_t len) +{ +	while (len--) +		crc = crc16_byte(crc, *buffer++); +	return crc; +} diff --git a/fs/ubifs/crc16.h b/fs/ubifs/crc16.h new file mode 100644 index 000000000..9443c084f --- /dev/null +++ b/fs/ubifs/crc16.h @@ -0,0 +1,30 @@ +/* + *	crc16.h - CRC-16 routine + * + * Implements the standard CRC-16: + *   Width 16 + *   Poly  0x8005 (x^16 + x^15 + x^2 + 1) + *   Init  0 + * + * Copyright (c) 2005 Ben Gardner <bgardner@wabtec.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef __CRC16_H +#define __CRC16_H + +#include <linux/types.h> + +extern u16 const crc16_table[256]; + +extern u16 crc16(u16 crc, const u8 *buffer, size_t len); + +static inline u16 crc16_byte(u16 crc, const u8 data) +{ +	return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; +} + +#endif /* __CRC16_H */ + diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c new file mode 100644 index 000000000..6afb8835a --- /dev/null +++ b/fs/ubifs/debug.c @@ -0,0 +1,156 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file implements most of the debugging stuff which is compiled in only + * when it is enabled. But some debugging check functions are implemented in + * corresponding subsystem, just because they are closely related and utilize + * various local functions of those subsystems. + */ + +#define UBIFS_DBG_PRESERVE_UBI + +#include "ubifs.h" + +#ifdef CONFIG_UBIFS_FS_DEBUG + +DEFINE_SPINLOCK(dbg_lock); + +static char dbg_key_buf0[128]; +static char dbg_key_buf1[128]; + +unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +unsigned int ubifs_tst_flags; + +module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +MODULE_PARM_DESC(debug_chks, "Debug check flags"); +MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + +static const char *get_key_type(int type) +{ +	switch (type) { +	case UBIFS_INO_KEY: +		return "inode"; +	case UBIFS_DENT_KEY: +		return "direntry"; +	case UBIFS_XENT_KEY: +		return "xentry"; +	case UBIFS_DATA_KEY: +		return "data"; +	case UBIFS_TRUN_KEY: +		return "truncate"; +	default: +		return "unknown/invalid key"; +	} +} + +static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, +			char *buffer) +{ +	char *p = buffer; +	int type = key_type(c, key); + +	if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { +		switch (type) { +		case UBIFS_INO_KEY: +			sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), +			       get_key_type(type)); +			break; +		case UBIFS_DENT_KEY: +		case UBIFS_XENT_KEY: +			sprintf(p, "(%lu, %s, %#08x)", +				(unsigned long)key_inum(c, key), +				get_key_type(type), key_hash(c, key)); +			break; +		case UBIFS_DATA_KEY: +			sprintf(p, "(%lu, %s, %u)", +				(unsigned long)key_inum(c, key), +				get_key_type(type), key_block(c, key)); +			break; +		case UBIFS_TRUN_KEY: +			sprintf(p, "(%lu, %s)", +				(unsigned long)key_inum(c, key), +				get_key_type(type)); +			break; +		default: +			sprintf(p, "(bad key type: %#08x, %#08x)", +				key->u32[0], key->u32[1]); +		} +	} else +		sprintf(p, "bad key format %d", c->key_fmt); +} + +const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +{ +	/* dbg_lock must be held */ +	sprintf_key(c, key, dbg_key_buf0); +	return dbg_key_buf0; +} + +const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +{ +	/* dbg_lock must be held */ +	sprintf_key(c, key, dbg_key_buf1); +	return dbg_key_buf1; +} + +/** + * ubifs_debugging_init - initialize UBIFS debugging. + * @c: UBIFS file-system description object + * + * This function initializes debugging-related data for the file system. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_debugging_init(struct ubifs_info *c) +{ +	c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); +	if (!c->dbg) +		return -ENOMEM; + +	c->dbg->buf = vmalloc(c->leb_size); +	if (!c->dbg->buf) +		goto out; + +	return 0; + +out: +	kfree(c->dbg); +	return -ENOMEM; +} + +/** + * ubifs_debugging_exit - free debugging data. + * @c: UBIFS file-system description object + */ +void ubifs_debugging_exit(struct ubifs_info *c) +{ +	vfree(c->dbg->buf); +	kfree(c->dbg); +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h new file mode 100644 index 000000000..62617b692 --- /dev/null +++ b/fs/ubifs/debug.h @@ -0,0 +1,392 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +#ifndef __UBIFS_DEBUG_H__ +#define __UBIFS_DEBUG_H__ + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * ubifs_debug_info - per-FS debugging information. + * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' + * @failure_mode: failure mode for recovery testing + * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls + * @fail_timeout: time in jiffies when delay of failure mode expires + * @fail_cnt: current number of calls to failure mode I/O functions + * @fail_cnt_max: number of calls by which to delay failure mode + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker + * @chk_lpt_lebs: used by LPT tree size checker + * @new_nhead_offs: used by LPT tree size checker + * @new_ihead_lnum: used by debugging to check @c->ihead_lnum + * @new_ihead_offs: used by debugging to check @c->ihead_offs + * + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') + * @saved_free: saved free space (used by 'dbg_save_space_info()') + * + * dfs_dir_name: name of debugfs directory containing this file-system's files + * dfs_dir: direntry object of the file-system debugfs directory + * dfs_dump_lprops: "dump lprops" debugfs knob + * dfs_dump_budg: "dump budgeting information" debugfs knob + * dfs_dump_tnc: "dump TNC" debugfs knob + */ +struct ubifs_debug_info { +	void *buf; +	struct ubifs_zbranch old_zroot; +	int old_zroot_level; +	unsigned long long old_zroot_sqnum; +	int failure_mode; +	int fail_delay; +	unsigned long fail_timeout; +	unsigned int fail_cnt; +	unsigned int fail_cnt_max; +	long long chk_lpt_sz; +	long long chk_lpt_sz2; +	long long chk_lpt_wastage; +	int chk_lpt_lebs; +	int new_nhead_offs; +	int new_ihead_lnum; +	int new_ihead_offs; + +	struct ubifs_lp_stats saved_lst; +	long long saved_free; + +	char dfs_dir_name[100]; +	struct dentry *dfs_dir; +	struct dentry *dfs_dump_lprops; +	struct dentry *dfs_dump_budg; +	struct dentry *dfs_dump_tnc; +}; + +#define UBIFS_DBG(op) op + +#define ubifs_assert(expr) do {                                                \ +	if (unlikely(!(expr))) {                                               \ +		printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ +		       __func__, __LINE__, 0);                      \ +		dbg_dump_stack();                                              \ +	}                                                                      \ +} while (0) + +#define ubifs_assert_cmt_locked(c) do {                                        \ +	if (unlikely(down_write_trylock(&(c)->commit_sem))) {                  \ +		up_write(&(c)->commit_sem);                                    \ +		printk(KERN_CRIT "commit lock is not locked!\n");              \ +		ubifs_assert(0);                                               \ +	}                                                                      \ +} while (0) + +#define dbg_dump_stack() do {                                                  \ +	if (!dbg_failure_mode)                                                 \ +		dump_stack();                                                  \ +} while (0) + +/* Generic debugging messages */ +#define dbg_msg(fmt, ...) do {                                                 \ +	spin_lock(&dbg_lock);                                                  \ +	printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", 0,   \ +	       __func__, ##__VA_ARGS__);                                       \ +	spin_unlock(&dbg_lock);                                                \ +} while (0) + +#define dbg_do_msg(typ, fmt, ...) do {                                         \ +	if (ubifs_msg_flags & typ)                                             \ +		dbg_msg(fmt, ##__VA_ARGS__);                                   \ +} while (0) + +#define dbg_err(fmt, ...) do {                                                 \ +	spin_lock(&dbg_lock);                                                  \ +	ubifs_err(fmt, ##__VA_ARGS__);                                         \ +	spin_unlock(&dbg_lock);                                                \ +} while (0) + +const char *dbg_key_str0(const struct ubifs_info *c, +			 const union ubifs_key *key); +const char *dbg_key_str1(const struct ubifs_info *c, +			 const union ubifs_key *key); + +/* + * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message + * macros. + */ +#define DBGKEY(key)	dbg_key_str0(c, (key)) +#define DBGKEY1(key)	dbg_key_str1(c, (key)) + +/* General messages */ +#define dbg_gen(fmt, ...)   dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) + +/* Additional journal messages */ +#define dbg_jnl(fmt, ...)   dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) + +/* Additional TNC messages */ +#define dbg_tnc(fmt, ...)   dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) + +/* Additional lprops messages */ +#define dbg_lp(fmt, ...)    dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) + +/* Additional LEB find messages */ +#define dbg_find(fmt, ...)  dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) + +/* Additional mount messages */ +#define dbg_mnt(fmt, ...)   dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) + +/* Additional I/O messages */ +#define dbg_io(fmt, ...)    dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) + +/* Additional commit messages */ +#define dbg_cmt(fmt, ...)   dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) + +/* Additional budgeting messages */ +#define dbg_budg(fmt, ...)  dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) + +/* Additional log messages */ +#define dbg_log(fmt, ...)   dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) + +/* Additional gc messages */ +#define dbg_gc(fmt, ...)    dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) + +/* Additional scan messages */ +#define dbg_scan(fmt, ...)  dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) + +/* Additional recovery messages */ +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) + +/* + * Debugging message type flags (must match msg_type_names in debug.c). + * + * UBIFS_MSG_GEN: general messages + * UBIFS_MSG_JNL: journal messages + * UBIFS_MSG_MNT: mount messages + * UBIFS_MSG_CMT: commit messages + * UBIFS_MSG_FIND: LEB find messages + * UBIFS_MSG_BUDG: budgeting messages + * UBIFS_MSG_GC: garbage collection messages + * UBIFS_MSG_TNC: TNC messages + * UBIFS_MSG_LP: lprops messages + * UBIFS_MSG_IO: I/O messages + * UBIFS_MSG_LOG: log messages + * UBIFS_MSG_SCAN: scan messages + * UBIFS_MSG_RCVRY: recovery messages + */ +enum { +	UBIFS_MSG_GEN   = 0x1, +	UBIFS_MSG_JNL   = 0x2, +	UBIFS_MSG_MNT   = 0x4, +	UBIFS_MSG_CMT   = 0x8, +	UBIFS_MSG_FIND  = 0x10, +	UBIFS_MSG_BUDG  = 0x20, +	UBIFS_MSG_GC    = 0x40, +	UBIFS_MSG_TNC   = 0x80, +	UBIFS_MSG_LP    = 0x100, +	UBIFS_MSG_IO    = 0x200, +	UBIFS_MSG_LOG   = 0x400, +	UBIFS_MSG_SCAN  = 0x800, +	UBIFS_MSG_RCVRY = 0x1000, +}; + +/* Debugging message type flags for each default debug message level */ +#define UBIFS_MSG_LVL_0 0 +#define UBIFS_MSG_LVL_1 0x1 +#define UBIFS_MSG_LVL_2 0x7f +#define UBIFS_MSG_LVL_3 0xffff + +/* + * Debugging check flags (must match chk_names in debug.c). + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC + * UBIFS_CHK_IDX_SZ: check index size + * UBIFS_CHK_ORPH: check orphans + * UBIFS_CHK_OLD_IDX: check the old index + * UBIFS_CHK_LPROPS: check lprops + * UBIFS_CHK_FS: check the file-system + */ +enum { +	UBIFS_CHK_GEN     = 0x1, +	UBIFS_CHK_TNC     = 0x2, +	UBIFS_CHK_IDX_SZ  = 0x4, +	UBIFS_CHK_ORPH    = 0x8, +	UBIFS_CHK_OLD_IDX = 0x10, +	UBIFS_CHK_LPROPS  = 0x20, +	UBIFS_CHK_FS      = 0x40, +}; + +/* + * Special testing flags (must match tst_names in debug.c). + * + * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ +enum { +	UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, +	UBIFS_TST_RCVRY             = 0x4, +}; + +#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +#else +#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +#endif + +#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +#else +#define UBIFS_CHK_FLAGS_DEFAULT 0 +#endif + +#define dbg_ntype(type)                       "" +#define dbg_cstate(cmt_state)                 "" +#define dbg_get_key_dump(c, key)              ({}) +#define dbg_dump_inode(c, inode)              ({}) +#define dbg_dump_node(c, node)                ({}) +#define dbg_dump_budget_req(req)              ({}) +#define dbg_dump_lstats(lst)                  ({}) +#define dbg_dump_budg(c)                      ({}) +#define dbg_dump_lprop(c, lp)                 ({}) +#define dbg_dump_lprops(c)                    ({}) +#define dbg_dump_lpt_info(c)                  ({}) +#define dbg_dump_leb(c, lnum)                 ({}) +#define dbg_dump_znode(c, znode)              ({}) +#define dbg_dump_heap(c, heap, cat)           ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c)                       ({}) +#define dbg_dump_index(c)                     ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_old_index_check_init(c, zroot)         0 +#define dbg_check_old_index(c, zroot)              0 +#define dbg_check_cats(c)                          0 +#define dbg_check_ltab(c)                          0 +#define dbg_chk_lpt_free_spc(c)                    0 +#define dbg_chk_lpt_sz(c, action, len)             0 +#define dbg_check_synced_i_size(inode)             0 +#define dbg_check_dir_size(c, dir)                 0 +#define dbg_check_tnc(c, x)                        0 +#define dbg_check_idx_size(c, idx_size)            0 +#define dbg_check_filesystem(c)                    0 +#define dbg_check_heap(c, heap, cat, add_pos)      ({}) +#define dbg_check_lprops(c)                        0 +#define dbg_check_lpt_nodes(c, cnode, row, col)    0 +#define dbg_force_in_the_gaps_enabled              0 +#define dbg_force_in_the_gaps()                    0 +#define dbg_failure_mode                           0 +#define dbg_failure_mode_registration(c)           ({}) +#define dbg_failure_mode_deregistration(c)         ({}) + +int ubifs_debugging_init(struct ubifs_info *c); +void ubifs_debugging_exit(struct ubifs_info *c); + +#else /* !CONFIG_UBIFS_FS_DEBUG */ + +#define UBIFS_DBG(op) + +/* Use "if (0)" to make compiler check arguments even if debugging is off */ +#define ubifs_assert(expr)  do {                                               \ +	if (0 && (expr))                                                       \ +		printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ +		       __func__, __LINE__, 0);                      \ +} while (0) + +#define dbg_err(fmt, ...)   do {                                               \ +	if (0)                                                                 \ +		ubifs_err(fmt, ##__VA_ARGS__);                                 \ +} while (0) + +#define dbg_msg(fmt, ...) do {                                                 \ +	if (0)                                                                 \ +		printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n",         \ +		       0, __func__, ##__VA_ARGS__);                 \ +} while (0) + +#define dbg_dump_stack() +#define ubifs_assert_cmt_locked(c) + +#define dbg_gen(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...)   dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...)    dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...)  dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + +#define DBGKEY(key)  ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + +#define ubifs_debugging_init(c)                0 +#define ubifs_debugging_exit(c)                ({}) + +#define dbg_ntype(type)                       "" +#define dbg_cstate(cmt_state)                 "" +#define dbg_get_key_dump(c, key)              ({}) +#define dbg_dump_inode(c, inode)              ({}) +#define dbg_dump_node(c, node)                ({}) +#define dbg_dump_budget_req(req)              ({}) +#define dbg_dump_lstats(lst)                  ({}) +#define dbg_dump_budg(c)                      ({}) +#define dbg_dump_lprop(c, lp)                 ({}) +#define dbg_dump_lprops(c)                    ({}) +#define dbg_dump_lpt_info(c)                  ({}) +#define dbg_dump_leb(c, lnum)                 ({}) +#define dbg_dump_znode(c, znode)              ({}) +#define dbg_dump_heap(c, heap, cat)           ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c)                       ({}) +#define dbg_dump_index(c)                     ({}) + +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_old_index_check_init(c, zroot)         0 +#define dbg_check_old_index(c, zroot)              0 +#define dbg_check_cats(c)                          0 +#define dbg_check_ltab(c)                          0 +#define dbg_chk_lpt_free_spc(c)                    0 +#define dbg_chk_lpt_sz(c, action, len)             0 +#define dbg_check_synced_i_size(inode)             0 +#define dbg_check_dir_size(c, dir)                 0 +#define dbg_check_tnc(c, x)                        0 +#define dbg_check_idx_size(c, idx_size)            0 +#define dbg_check_filesystem(c)                    0 +#define dbg_check_heap(c, heap, cat, add_pos)      ({}) +#define dbg_check_lprops(c)                        0 +#define dbg_check_lpt_nodes(c, cnode, row, col)    0 +#define dbg_force_in_the_gaps_enabled              0 +#define dbg_force_in_the_gaps()                    0 +#define dbg_failure_mode                           0 +#define dbg_failure_mode_registration(c)           ({}) +#define dbg_failure_mode_deregistration(c)         ({}) + +#endif /* !CONFIG_UBIFS_FS_DEBUG */ + +#endif /* !__UBIFS_DEBUG_H__ */ diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c new file mode 100644 index 000000000..aae5c65ea --- /dev/null +++ b/fs/ubifs/io.c @@ -0,0 +1,316 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * Copyright (C) 2006, 2007 University of Szeged, Hungary + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + *          Zoltan Sogor + */ + +/* + * This file implements UBIFS I/O subsystem which provides various I/O-related + * helper functions (reading/writing/checking/validating nodes) and implements + * write-buffering support. Write buffers help to save space which otherwise + * would have been wasted for padding to the nearest minimal I/O unit boundary. + * Instead, data first goes to the write-buffer and is flushed when the + * buffer is full or when it is not used for some time (by timer). This is + * similar to the mechanism is used by JFFS2. + * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many + * functions related to write-buffers have "nolock" suffix which means that the + * caller has to lock the write-buffer before calling this function. + * + * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not + * aligned, UBIFS starts the next node from the aligned address, and the padded + * bytes may contain any rubbish. In other words, UBIFS does not put padding + * bytes in those small gaps. Common headers of nodes store real node lengths, + * not aligned lengths. Indexing nodes also store real lengths in branches. + * + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * + * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes + * every time they are read from the flash media. + */ + +#include "ubifs.h" + +/** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +void ubifs_ro_mode(struct ubifs_info *c, int err) +{ +	if (!c->ro_media) { +		c->ro_media = 1; +		c->no_chk_data_crc = 0; +		ubifs_warn("switched to read-only mode, error %d", err); +		dbg_dump_stack(); +	} +} + +/** + * ubifs_check_node - check node. + * @c: UBIFS file-system description object + * @buf: node to check + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * @must_chk_crc: indicates whether to always check the CRC + * + * This function checks node magic number and CRC checksum. This function also + * validates node length to prevent UBIFS from becoming crazy when an attacker + * feeds it a file-system image with incorrect nodes. For example, too large + * node length in the common header could cause UBIFS to read memory outside of + * allocated buffer when checking the CRC checksum. + * + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is + * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is + * ignored and CRC is checked. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. + */ +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, +		     int offs, int quiet, int must_chk_crc) +{ +	int err = -EINVAL, type, node_len; +	uint32_t crc, node_crc, magic; +	const struct ubifs_ch *ch = buf; + +	ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); +	ubifs_assert(!(offs & 7) && offs < c->leb_size); + +	magic = le32_to_cpu(ch->magic); +	if (magic != UBIFS_NODE_MAGIC) { +		if (!quiet) +			ubifs_err("bad magic %#08x, expected %#08x", +				  magic, UBIFS_NODE_MAGIC); +		err = -EUCLEAN; +		goto out; +	} + +	type = ch->node_type; +	if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { +		if (!quiet) +			ubifs_err("bad node type %d", type); +		goto out; +	} + +	node_len = le32_to_cpu(ch->len); +	if (node_len + offs > c->leb_size) +		goto out_len; + +	if (c->ranges[type].max_len == 0) { +		if (node_len != c->ranges[type].len) +			goto out_len; +	} else if (node_len < c->ranges[type].min_len || +		   node_len > c->ranges[type].max_len) +		goto out_len; + +	if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && +	     c->no_chk_data_crc) +		return 0; + +	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +	node_crc = le32_to_cpu(ch->crc); +	if (crc != node_crc) { +		if (!quiet) +			ubifs_err("bad CRC: calculated %#08x, read %#08x", +				  crc, node_crc); +		err = -EUCLEAN; +		goto out; +	} + +	return 0; + +out_len: +	if (!quiet) +		ubifs_err("bad node length %d", node_len); +out: +	if (!quiet) { +		ubifs_err("bad node at LEB %d:%d", lnum, offs); +		dbg_dump_node(c, buf); +		dbg_dump_stack(); +	} +	return err; +} + +/** + * ubifs_pad - pad flash space. + * @c: UBIFS file-system description object + * @buf: buffer to put padding to + * @pad: how many bytes to pad + * + * The flash media obliges us to write only in chunks of %c->min_io_size and + * when we have to write less data we add padding node to the write-buffer and + * pad it to the next minimal I/O unit's boundary. Padding nodes help when the + * media is being scanned. If the amount of wasted space is not enough to fit a + * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes + * pattern (%UBIFS_PADDING_BYTE). + * + * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is + * used. + */ +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) +{ +	uint32_t crc; + +	ubifs_assert(pad >= 0 && !(pad & 7)); + +	if (pad >= UBIFS_PAD_NODE_SZ) { +		struct ubifs_ch *ch = buf; +		struct ubifs_pad_node *pad_node = buf; + +		ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); +		ch->node_type = UBIFS_PAD_NODE; +		ch->group_type = UBIFS_NO_NODE_GROUP; +		ch->padding[0] = ch->padding[1] = 0; +		ch->sqnum = 0; +		ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); +		pad -= UBIFS_PAD_NODE_SZ; +		pad_node->pad_len = cpu_to_le32(pad); +		crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); +		ch->crc = cpu_to_le32(crc); +		memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); +	} else if (pad > 0) +		/* Too little space, padding node won't fit */ +		memset(buf, UBIFS_PADDING_BYTE, pad); +} + +/** + * next_sqnum - get next sequence number. + * @c: UBIFS file-system description object + */ +static unsigned long long next_sqnum(struct ubifs_info *c) +{ +	unsigned long long sqnum; + +	spin_lock(&c->cnt_lock); +	sqnum = ++c->max_sqnum; +	spin_unlock(&c->cnt_lock); + +	if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { +		if (sqnum >= SQNUM_WATERMARK) { +			ubifs_err("sequence number overflow %llu, end of life", +				  sqnum); +			ubifs_ro_mode(c, -EINVAL); +		} +		ubifs_warn("running out of sequence numbers, end of life soon"); +	} + +	return sqnum; +} + +/** + * ubifs_prepare_node - prepare node to be written to flash. + * @c: UBIFS file-system description object + * @node: the node to pad + * @len: node length + * @pad: if the buffer has to be padded + * + * This function prepares node at @node to be written to the media - it + * calculates node CRC, fills the common header, and adds proper padding up to + * the next minimum I/O unit if @pad is not zero. + */ +void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) +{ +	uint32_t crc; +	struct ubifs_ch *ch = node; +	unsigned long long sqnum = next_sqnum(c); + +	ubifs_assert(len >= UBIFS_CH_SZ); + +	ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); +	ch->len = cpu_to_le32(len); +	ch->group_type = UBIFS_NO_NODE_GROUP; +	ch->sqnum = cpu_to_le64(sqnum); +	ch->padding[0] = ch->padding[1] = 0; +	crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); +	ch->crc = cpu_to_le32(crc); + +	if (pad) { +		len = ALIGN(len, 8); +		pad = ALIGN(len, c->min_io_size) - len; +		ubifs_pad(c, node + len, pad); +	} +} + +/** + * ubifs_read_node - read node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * + * This function reads a node of known type and and length, checks it and + * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched + * and a negative error code in case of failure. + */ +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, +		    int lnum, int offs) +{ +	int err, l; +	struct ubifs_ch *ch = buf; + +	dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); +	ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); +	ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); +	ubifs_assert(!(offs & 7) && offs < c->leb_size); +	ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + +	err = ubi_read(c->ubi, lnum, buf, offs, len); +	if (err && err != -EBADMSG) { +		ubifs_err("cannot read node %d from LEB %d:%d, error %d", +			  type, lnum, offs, err); +		return err; +	} + +	if (type != ch->node_type) { +		ubifs_err("bad node type (%d but expected %d)", +			  ch->node_type, type); +		goto out; +	} + +	err = ubifs_check_node(c, buf, lnum, offs, 0, 0); +	if (err) { +		ubifs_err("expected node type %d", type); +		return err; +	} + +	l = le32_to_cpu(ch->len); +	if (l != len) { +		ubifs_err("bad node length %d, expected %d", l, len); +		goto out; +	} + +	return 0; + +out: +	ubifs_err("bad node at LEB %d:%d", lnum, offs); +	dbg_dump_node(c, buf); +	dbg_dump_stack(); +	return -EINVAL; +} diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h new file mode 100644 index 000000000..efb3430a2 --- /dev/null +++ b/fs/ubifs/key.h @@ -0,0 +1,557 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This header contains various key-related definitions and helper function. + * UBIFS allows several key schemes, so we access key fields only via these + * helpers. At the moment only one key scheme is supported. + * + * Simple key scheme + * ~~~~~~~~~~~~~~~~~ + * + * Keys are 64-bits long. First 32-bits are inode number (parent inode number + * in case of direntry key). Next 3 bits are node type. The last 29 bits are + * 4KiB offset in case of inode node, and direntry hash in case of a direntry + * node. We use "r5" hash borrowed from reiserfs. + */ + +#ifndef __UBIFS_KEY_H__ +#define __UBIFS_KEY_H__ + +/** + * key_mask_hash - mask a valid hash value. + * @val: value to be masked + * + * We use hash values as offset in directories, so values %0 and %1 are + * reserved for "." and "..". %2 is reserved for "end of readdir" marker. This + * function makes sure the reserved values are not used. + */ +static inline uint32_t key_mask_hash(uint32_t hash) +{ +	hash &= UBIFS_S_KEY_HASH_MASK; +	if (unlikely(hash <= 2)) +		hash += 3; +	return hash; +} + +/** + * key_r5_hash - R5 hash function (borrowed from reiserfs). + * @s: direntry name + * @len: name length + */ +static inline uint32_t key_r5_hash(const char *s, int len) +{ +	uint32_t a = 0; +	const signed char *str = (const signed char *)s; + +	while (*str) { +		a += *str << 4; +		a += *str >> 4; +		a *= 11; +		str++; +	} + +	return key_mask_hash(a); +} + +/** + * key_test_hash - testing hash function. + * @str: direntry name + * @len: name length + */ +static inline uint32_t key_test_hash(const char *str, int len) +{ +	uint32_t a = 0; + +	len = min_t(uint32_t, len, 4); +	memcpy(&a, str, len); +	return key_mask_hash(a); +} + +/** + * ino_key_init - initialize inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void ino_key_init(const struct ubifs_info *c, +				union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * ino_key_init_flash - initialize on-flash inode key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + */ +static inline void ino_key_init_flash(const struct ubifs_info *c, void *k, +				      ino_t inum) +{ +	union ubifs_key *key = k; + +	key->j32[0] = cpu_to_le32(inum); +	key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS); +	memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_ino_key - get the lowest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void lowest_ino_key(const struct ubifs_info *c, +				union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = 0; +} + +/** + * highest_ino_key - get the highest possible inode key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + */ +static inline void highest_ino_key(const struct ubifs_info *c, +				union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = 0xffffffff; +} + +/** + * dent_key_init - initialize directory entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init(const struct ubifs_info *c, +				 union ubifs_key *key, ino_t inum, +				 const struct qstr *nm) +{ +	uint32_t hash = c->key_hash(nm->name, nm->len); + +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->u32[0] = inum; +	key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_hash - initialize directory entry key without re-calculating + *                      hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: parent inode number + * @hash: direntry name hash + */ +static inline void dent_key_init_hash(const struct ubifs_info *c, +				      union ubifs_key *key, ino_t inum, +				      uint32_t hash) +{ +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->u32[0] = inum; +	key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * dent_key_init_flash - initialize on-flash directory entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: parent inode number + * @nm: direntry name and length + */ +static inline void dent_key_init_flash(const struct ubifs_info *c, void *k, +				       ino_t inum, const struct qstr *nm) +{ +	union ubifs_key *key = k; +	uint32_t hash = c->key_hash(nm->name, nm->len); + +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->j32[0] = cpu_to_le32(inum); +	key->j32[1] = cpu_to_le32(hash | +				  (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS)); +	memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_dent_key - get the lowest possible directory entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: parent inode number + */ +static inline void lowest_dent_key(const struct ubifs_info *c, +				   union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * xent_key_init - initialize extended attribute entry key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init(const struct ubifs_info *c, +				 union ubifs_key *key, ino_t inum, +				 const struct qstr *nm) +{ +	uint32_t hash = c->key_hash(nm->name, nm->len); + +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->u32[0] = inum; +	key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_hash - initialize extended attribute entry key without + *                      re-calculating hash function. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: host inode number + * @hash: extended attribute entry name hash + */ +static inline void xent_key_init_hash(const struct ubifs_info *c, +				      union ubifs_key *key, ino_t inum, +				      uint32_t hash) +{ +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->u32[0] = inum; +	key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +} + +/** + * xent_key_init_flash - initialize on-flash extended attribute entry key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: host inode number + * @nm: extended attribute entry name and length + */ +static inline void xent_key_init_flash(const struct ubifs_info *c, void *k, +				       ino_t inum, const struct qstr *nm) +{ +	union ubifs_key *key = k; +	uint32_t hash = c->key_hash(nm->name, nm->len); + +	ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +	key->j32[0] = cpu_to_le32(inum); +	key->j32[1] = cpu_to_le32(hash | +				  (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS)); +	memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * lowest_xent_key - get the lowest possible extended attribute entry key. + * @c: UBIFS file-system description object + * @key: where to store the lowest key + * @inum: host inode number + */ +static inline void lowest_xent_key(const struct ubifs_info *c, +				   union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS; +} + +/** + * data_key_init - initialize data key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init(const struct ubifs_info *c, +				 union ubifs_key *key, ino_t inum, +				 unsigned int block) +{ +	ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); +	key->u32[0] = inum; +	key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS); +} + +/** + * data_key_init_flash - initialize on-flash data key. + * @c: UBIFS file-system description object + * @k: key to initialize + * @inum: inode number + * @block: block number + */ +static inline void data_key_init_flash(const struct ubifs_info *c, void *k, +				       ino_t inum, unsigned int block) +{ +	union ubifs_key *key = k; + +	ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); +	key->j32[0] = cpu_to_le32(inum); +	key->j32[1] = cpu_to_le32(block | +				  (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); +	memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * trun_key_init - initialize truncation node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * @inum: inode number + * + * Note, UBIFS does not have truncation keys on the media and this function is + * only used for purposes of replay. + */ +static inline void trun_key_init(const struct ubifs_info *c, +				 union ubifs_key *key, ino_t inum) +{ +	key->u32[0] = inum; +	key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type - get key type. + * @c: UBIFS file-system description object + * @key: key to get type of + */ +static inline int key_type(const struct ubifs_info *c, +			   const union ubifs_key *key) +{ +	return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_type_flash - get type of a on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to get type of + */ +static inline int key_type_flash(const struct ubifs_info *c, const void *k) +{ +	const union ubifs_key *key = k; + +	return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS; +} + +/** + * key_inum - fetch inode number from key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum(const struct ubifs_info *c, const void *k) +{ +	const union ubifs_key *key = k; + +	return key->u32[0]; +} + +/** + * key_inum_flash - fetch inode number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: key to fetch inode number from + */ +static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k) +{ +	const union ubifs_key *key = k; + +	return le32_to_cpu(key->j32[0]); +} + +/** + * key_hash - get directory entry hash. + * @c: UBIFS file-system description object + * @key: the key to get hash from + */ +static inline int key_hash(const struct ubifs_info *c, +			   const union ubifs_key *key) +{ +	return key->u32[1] & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_hash_flash - get directory entry hash from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get hash from + */ +static inline int key_hash_flash(const struct ubifs_info *c, const void *k) +{ +	const union ubifs_key *key = k; + +	return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK; +} + +/** + * key_block - get data block number. + * @c: UBIFS file-system description object + * @key: the key to get the block number from + */ +static inline unsigned int key_block(const struct ubifs_info *c, +				     const union ubifs_key *key) +{ +	return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_block_flash - get data block number from an on-flash formatted key. + * @c: UBIFS file-system description object + * @k: the key to get the block number from + */ +static inline unsigned int key_block_flash(const struct ubifs_info *c, +					   const void *k) +{ +	const union ubifs_key *key = k; + +	return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK; +} + +/** + * key_read - transform a key to in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_read(const struct ubifs_info *c, const void *from, +			    union ubifs_key *to) +{ +	const union ubifs_key *f = from; + +	to->u32[0] = le32_to_cpu(f->j32[0]); +	to->u32[1] = le32_to_cpu(f->j32[1]); +} + +/** + * key_write - transform a key from in-memory format. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write(const struct ubifs_info *c, +			     const union ubifs_key *from, void *to) +{ +	union ubifs_key *t = to; + +	t->j32[0] = cpu_to_le32(from->u32[0]); +	t->j32[1] = cpu_to_le32(from->u32[1]); +	memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8); +} + +/** + * key_write_idx - transform a key from in-memory format for the index. + * @c: UBIFS file-system description object + * @from: the key to transform + * @to: the key to store the result + */ +static inline void key_write_idx(const struct ubifs_info *c, +				 const union ubifs_key *from, void *to) +{ +	union ubifs_key *t = to; + +	t->j32[0] = cpu_to_le32(from->u32[0]); +	t->j32[1] = cpu_to_le32(from->u32[1]); +} + +/** + * key_copy - copy a key. + * @c: UBIFS file-system description object + * @from: the key to copy from + * @to: the key to copy to + */ +static inline void key_copy(const struct ubifs_info *c, +			    const union ubifs_key *from, union ubifs_key *to) +{ +	to->u64[0] = from->u64[0]; +} + +/** + * keys_cmp - compare keys. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %-1 if @key1 is less than + * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. + */ +static inline int keys_cmp(const struct ubifs_info *c, +			   const union ubifs_key *key1, +			   const union ubifs_key *key2) +{ +	if (key1->u32[0] < key2->u32[0]) +		return -1; +	if (key1->u32[0] > key2->u32[0]) +		return 1; +	if (key1->u32[1] < key2->u32[1]) +		return -1; +	if (key1->u32[1] > key2->u32[1]) +		return 1; + +	return 0; +} + +/** + * keys_eq - determine if keys are equivalent. + * @c: UBIFS file-system description object + * @key1: the first key to compare + * @key2: the second key to compare + * + * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and + * %0 if not. + */ +static inline int keys_eq(const struct ubifs_info *c, +			  const union ubifs_key *key1, +			  const union ubifs_key *key2) +{ +	if (key1->u32[0] != key2->u32[0]) +		return 0; +	if (key1->u32[1] != key2->u32[1]) +		return 0; +	return 1; +} + +/** + * is_hash_key - is a key vulnerable to hash collisions. + * @c: UBIFS file-system description object + * @key: key + * + * This function returns %1 if @key is a hashed key or %0 otherwise. + */ +static inline int is_hash_key(const struct ubifs_info *c, +			      const union ubifs_key *key) +{ +	int type = key_type(c, key); + +	return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY; +} + +/** + * key_max_inode_size - get maximum file size allowed by current key format. + * @c: UBIFS file-system description object + */ +static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) +{ +	switch (c->key_fmt) { +	case UBIFS_SIMPLE_KEY_FMT: +		return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE; +	default: +		return 0; +	} +} +#endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c new file mode 100644 index 000000000..68a9bd98f --- /dev/null +++ b/fs/ubifs/log.c @@ -0,0 +1,104 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file is a part of UBIFS journal implementation and contains various + * functions which manipulate the log. The log is a fixed area on the flash + * which does not contain any data but refers to buds. The log is a part of the + * journal. + */ + +#include "ubifs.h" + +/** + * ubifs_search_bud - search bud LEB. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number to search + * + * This function searches bud LEB @lnum. Returns bud description object in case + * of success and %NULL if there is no bud with this LEB number. + */ +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum) +{ +	struct rb_node *p; +	struct ubifs_bud *bud; + +	spin_lock(&c->buds_lock); +	p = c->buds.rb_node; +	while (p) { +		bud = rb_entry(p, struct ubifs_bud, rb); +		if (lnum < bud->lnum) +			p = p->rb_left; +		else if (lnum > bud->lnum) +			p = p->rb_right; +		else { +			spin_unlock(&c->buds_lock); +			return bud; +		} +	} +	spin_unlock(&c->buds_lock); +	return NULL; +} + +/** + * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list. + * @c: UBIFS file-system description object + * @bud: the bud to add + */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ +	struct rb_node **p, *parent = NULL; +	struct ubifs_bud *b; +	struct ubifs_jhead *jhead; + +	spin_lock(&c->buds_lock); +	p = &c->buds.rb_node; +	while (*p) { +		parent = *p; +		b = rb_entry(parent, struct ubifs_bud, rb); +		ubifs_assert(bud->lnum != b->lnum); +		if (bud->lnum < b->lnum) +			p = &(*p)->rb_left; +		else +			p = &(*p)->rb_right; +	} + +	rb_link_node(&bud->rb, parent, p); +	rb_insert_color(&bud->rb, &c->buds); +	if (c->jheads) { +		jhead = &c->jheads[bud->jhead]; +		list_add_tail(&bud->list, &jhead->buds_list); +	} else +		ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + +	/* +	 * Note, although this is a new bud, we anyway account this space now, +	 * before any data has been written to it, because this is about to +	 * guarantee fixed mount time, and this bud will anyway be read and +	 * scanned. +	 */ +	c->bud_bytes += c->leb_size - bud->start; + +	dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, +		bud->start, bud->jhead, c->bud_bytes); +	spin_unlock(&c->buds_lock); +} diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c new file mode 100644 index 000000000..8ce4949fc --- /dev/null +++ b/fs/ubifs/lprops.c @@ -0,0 +1,842 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the functions that access LEB properties and their + * categories. LEBs are categorized based on the needs of UBIFS, and the + * categories are stored as either heaps or lists to provide a fast way of + * finding a LEB in a particular category. For example, UBIFS may need to find + * an empty LEB for the journal, or a very dirty LEB for garbage collection. + */ + +#include "ubifs.h" + +/** + * get_heap_comp_val - get the LEB properties value for heap comparisons. + * @lprops: LEB properties + * @cat: LEB category + */ +static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat) +{ +	switch (cat) { +	case LPROPS_FREE: +		return lprops->free; +	case LPROPS_DIRTY_IDX: +		return lprops->free + lprops->dirty; +	default: +		return lprops->dirty; +	} +} + +/** + * move_up_lpt_heap - move a new heap entry up as far as possible. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @cat: LEB category + * + * New entries to a heap are added at the bottom and then moved up until the + * parent's value is greater.  In the case of LPT's category heaps, the value + * is either the amount of free space or the amount of dirty space, depending + * on the category. + */ +static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, +			     struct ubifs_lprops *lprops, int cat) +{ +	int val1, val2, hpos; + +	hpos = lprops->hpos; +	if (!hpos) +		return; /* Already top of the heap */ +	val1 = get_heap_comp_val(lprops, cat); +	/* Compare to parent and, if greater, move up the heap */ +	do { +		int ppos = (hpos - 1) / 2; + +		val2 = get_heap_comp_val(heap->arr[ppos], cat); +		if (val2 >= val1) +			return; +		/* Greater than parent so move up */ +		heap->arr[ppos]->hpos = hpos; +		heap->arr[hpos] = heap->arr[ppos]; +		heap->arr[ppos] = lprops; +		lprops->hpos = ppos; +		hpos = ppos; +	} while (hpos); +} + +/** + * adjust_lpt_heap - move a changed heap entry up or down the heap. + * @c: UBIFS file-system description object + * @heap: LEB category heap + * @lprops: LEB properties to move + * @hpos: heap position of @lprops + * @cat: LEB category + * + * Changed entries in a heap are moved up or down until the parent's value is + * greater.  In the case of LPT's category heaps, the value is either the amount + * of free space or the amount of dirty space, depending on the category. + */ +static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, +			    struct ubifs_lprops *lprops, int hpos, int cat) +{ +	int val1, val2, val3, cpos; + +	val1 = get_heap_comp_val(lprops, cat); +	/* Compare to parent and, if greater than parent, move up the heap */ +	if (hpos) { +		int ppos = (hpos - 1) / 2; + +		val2 = get_heap_comp_val(heap->arr[ppos], cat); +		if (val1 > val2) { +			/* Greater than parent so move up */ +			while (1) { +				heap->arr[ppos]->hpos = hpos; +				heap->arr[hpos] = heap->arr[ppos]; +				heap->arr[ppos] = lprops; +				lprops->hpos = ppos; +				hpos = ppos; +				if (!hpos) +					return; +				ppos = (hpos - 1) / 2; +				val2 = get_heap_comp_val(heap->arr[ppos], cat); +				if (val1 <= val2) +					return; +				/* Still greater than parent so keep going */ +			} +		} +	} + +	/* Not greater than parent, so compare to children */ +	while (1) { +		/* Compare to left child */ +		cpos = hpos * 2 + 1; +		if (cpos >= heap->cnt) +			return; +		val2 = get_heap_comp_val(heap->arr[cpos], cat); +		if (val1 < val2) { +			/* Less than left child, so promote biggest child */ +			if (cpos + 1 < heap->cnt) { +				val3 = get_heap_comp_val(heap->arr[cpos + 1], +							 cat); +				if (val3 > val2) +					cpos += 1; /* Right child is bigger */ +			} +			heap->arr[cpos]->hpos = hpos; +			heap->arr[hpos] = heap->arr[cpos]; +			heap->arr[cpos] = lprops; +			lprops->hpos = cpos; +			hpos = cpos; +			continue; +		} +		/* Compare to right child */ +		cpos += 1; +		if (cpos >= heap->cnt) +			return; +		val3 = get_heap_comp_val(heap->arr[cpos], cat); +		if (val1 < val3) { +			/* Less than right child, so promote right child */ +			heap->arr[cpos]->hpos = hpos; +			heap->arr[hpos] = heap->arr[cpos]; +			heap->arr[cpos] = lprops; +			lprops->hpos = cpos; +			hpos = cpos; +			continue; +		} +		return; +	} +} + +/** + * add_to_lpt_heap - add LEB properties to a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category + * + * This function returns %1 if @lprops is added to the heap for LEB category + * @cat, otherwise %0 is returned because the heap is full. + */ +static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops, +			   int cat) +{ +	struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1]; + +	if (heap->cnt >= heap->max_cnt) { +		const int b = LPT_HEAP_SZ / 2 - 1; +		int cpos, val1, val2; + +		/* Compare to some other LEB on the bottom of heap */ +		/* Pick a position kind of randomly */ +		cpos = (((size_t)lprops >> 4) & b) + b; +		ubifs_assert(cpos >= b); +		ubifs_assert(cpos < LPT_HEAP_SZ); +		ubifs_assert(cpos < heap->cnt); + +		val1 = get_heap_comp_val(lprops, cat); +		val2 = get_heap_comp_val(heap->arr[cpos], cat); +		if (val1 > val2) { +			struct ubifs_lprops *lp; + +			lp = heap->arr[cpos]; +			lp->flags &= ~LPROPS_CAT_MASK; +			lp->flags |= LPROPS_UNCAT; +			list_add(&lp->list, &c->uncat_list); +			lprops->hpos = cpos; +			heap->arr[cpos] = lprops; +			move_up_lpt_heap(c, heap, lprops, cat); +			dbg_check_heap(c, heap, cat, lprops->hpos); +			return 1; /* Added to heap */ +		} +		dbg_check_heap(c, heap, cat, -1); +		return 0; /* Not added to heap */ +	} else { +		lprops->hpos = heap->cnt++; +		heap->arr[lprops->hpos] = lprops; +		move_up_lpt_heap(c, heap, lprops, cat); +		dbg_check_heap(c, heap, cat, lprops->hpos); +		return 1; /* Added to heap */ +	} +} + +/** + * remove_from_lpt_heap - remove LEB properties from a LEB category heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category + */ +static void remove_from_lpt_heap(struct ubifs_info *c, +				 struct ubifs_lprops *lprops, int cat) +{ +	struct ubifs_lpt_heap *heap; +	int hpos = lprops->hpos; + +	heap = &c->lpt_heap[cat - 1]; +	ubifs_assert(hpos >= 0 && hpos < heap->cnt); +	ubifs_assert(heap->arr[hpos] == lprops); +	heap->cnt -= 1; +	if (hpos < heap->cnt) { +		heap->arr[hpos] = heap->arr[heap->cnt]; +		heap->arr[hpos]->hpos = hpos; +		adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat); +	} +	dbg_check_heap(c, heap, cat, -1); +} + +/** + * lpt_heap_replace - replace lprops in a category heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * @cat: LEB category + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains.  When that happens, references in + * the category heaps to those lprops must be updated to point to the new + * lprops.  This function does that. + */ +static void lpt_heap_replace(struct ubifs_info *c, +			     struct ubifs_lprops *old_lprops, +			     struct ubifs_lprops *new_lprops, int cat) +{ +	struct ubifs_lpt_heap *heap; +	int hpos = new_lprops->hpos; + +	heap = &c->lpt_heap[cat - 1]; +	heap->arr[hpos] = new_lprops; +} + +/** + * ubifs_add_to_cat - add LEB properties to a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to add + * @cat: LEB category to which to add + * + * LEB properties are categorized to enable fast find operations. + */ +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, +		      int cat) +{ +	switch (cat) { +	case LPROPS_DIRTY: +	case LPROPS_DIRTY_IDX: +	case LPROPS_FREE: +		if (add_to_lpt_heap(c, lprops, cat)) +			break; +		/* No more room on heap so make it uncategorized */ +		cat = LPROPS_UNCAT; +		/* Fall through */ +	case LPROPS_UNCAT: +		list_add(&lprops->list, &c->uncat_list); +		break; +	case LPROPS_EMPTY: +		list_add(&lprops->list, &c->empty_list); +		break; +	case LPROPS_FREEABLE: +		list_add(&lprops->list, &c->freeable_list); +		c->freeable_cnt += 1; +		break; +	case LPROPS_FRDI_IDX: +		list_add(&lprops->list, &c->frdi_idx_list); +		break; +	default: +		ubifs_assert(0); +	} +	lprops->flags &= ~LPROPS_CAT_MASK; +	lprops->flags |= cat; +} + +/** + * ubifs_remove_from_cat - remove LEB properties from a category list or heap. + * @c: UBIFS file-system description object + * @lprops: LEB properties to remove + * @cat: LEB category from which to remove + * + * LEB properties are categorized to enable fast find operations. + */ +static void ubifs_remove_from_cat(struct ubifs_info *c, +				  struct ubifs_lprops *lprops, int cat) +{ +	switch (cat) { +	case LPROPS_DIRTY: +	case LPROPS_DIRTY_IDX: +	case LPROPS_FREE: +		remove_from_lpt_heap(c, lprops, cat); +		break; +	case LPROPS_FREEABLE: +		c->freeable_cnt -= 1; +		ubifs_assert(c->freeable_cnt >= 0); +		/* Fall through */ +	case LPROPS_UNCAT: +	case LPROPS_EMPTY: +	case LPROPS_FRDI_IDX: +		ubifs_assert(!list_empty(&lprops->list)); +		list_del(&lprops->list); +		break; +	default: +		ubifs_assert(0); +	} +} + +/** + * ubifs_replace_cat - replace lprops in a category list or heap. + * @c: UBIFS file-system description object + * @old_lprops: LEB properties to replace + * @new_lprops: LEB properties with which to replace + * + * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode) + * and the lprops that the pnode contains. When that happens, references in + * category lists and heaps must be replaced. This function does that. + */ +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, +		       struct ubifs_lprops *new_lprops) +{ +	int cat; + +	cat = new_lprops->flags & LPROPS_CAT_MASK; +	switch (cat) { +	case LPROPS_DIRTY: +	case LPROPS_DIRTY_IDX: +	case LPROPS_FREE: +		lpt_heap_replace(c, old_lprops, new_lprops, cat); +		break; +	case LPROPS_UNCAT: +	case LPROPS_EMPTY: +	case LPROPS_FREEABLE: +	case LPROPS_FRDI_IDX: +		list_replace(&old_lprops->list, &new_lprops->list); +		break; +	default: +		ubifs_assert(0); +	} +} + +/** + * ubifs_ensure_cat - ensure LEB properties are categorized. + * @c: UBIFS file-system description object + * @lprops: LEB properties + * + * A LEB may have fallen off of the bottom of a heap, and ended up as + * uncategorized even though it has enough space for us now. If that is the case + * this function will put the LEB back onto a heap. + */ +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ +	int cat = lprops->flags & LPROPS_CAT_MASK; + +	if (cat != LPROPS_UNCAT) +		return; +	cat = ubifs_categorize_lprops(c, lprops); +	if (cat == LPROPS_UNCAT) +		return; +	ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT); +	ubifs_add_to_cat(c, lprops, cat); +} + +/** + * ubifs_categorize_lprops - categorize LEB properties. + * @c: UBIFS file-system description object + * @lprops: LEB properties to categorize + * + * LEB properties are categorized to enable fast find operations. This function + * returns the LEB category to which the LEB properties belong. Note however + * that if the LEB category is stored as a heap and the heap is full, the + * LEB properties may have their category changed to %LPROPS_UNCAT. + */ +int ubifs_categorize_lprops(const struct ubifs_info *c, +			    const struct ubifs_lprops *lprops) +{ +	if (lprops->flags & LPROPS_TAKEN) +		return LPROPS_UNCAT; + +	if (lprops->free == c->leb_size) { +		ubifs_assert(!(lprops->flags & LPROPS_INDEX)); +		return LPROPS_EMPTY; +	} + +	if (lprops->free + lprops->dirty == c->leb_size) { +		if (lprops->flags & LPROPS_INDEX) +			return LPROPS_FRDI_IDX; +		else +			return LPROPS_FREEABLE; +	} + +	if (lprops->flags & LPROPS_INDEX) { +		if (lprops->dirty + lprops->free >= c->min_idx_node_sz) +			return LPROPS_DIRTY_IDX; +	} else { +		if (lprops->dirty >= c->dead_wm && +		    lprops->dirty > lprops->free) +			return LPROPS_DIRTY; +		if (lprops->free > 0) +			return LPROPS_FREE; +	} + +	return LPROPS_UNCAT; +} + +/** + * change_category - change LEB properties category. + * @c: UBIFS file-system description object + * @lprops: LEB properties to recategorize + * + * LEB properties are categorized to enable fast find operations. When the LEB + * properties change they must be recategorized. + */ +static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ +	int old_cat = lprops->flags & LPROPS_CAT_MASK; +	int new_cat = ubifs_categorize_lprops(c, lprops); + +	if (old_cat == new_cat) { +		struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1]; + +		/* lprops on a heap now must be moved up or down */ +		if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT) +			return; /* Not on a heap */ +		heap = &c->lpt_heap[new_cat - 1]; +		adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat); +	} else { +		ubifs_remove_from_cat(c, lprops, old_cat); +		ubifs_add_to_cat(c, lprops, new_cat); +	} +} + +/** + * calc_dark - calculate LEB dark space size. + * @c: the UBIFS file-system description object + * @spc: amount of free and dirty space in the LEB + * + * This function calculates amount of dark space in an LEB which has @spc bytes + * of free and dirty space. Returns the calculations result. + * + * Dark space is the space which is not always usable - it depends on which + * nodes are written in which order. E.g., if an LEB has only 512 free bytes, + * it is dark space, because it cannot fit a large data node. So UBIFS cannot + * count on this LEB and treat these 512 bytes as usable because it is not true + * if, for example, only big chunks of uncompressible data will be written to + * the FS. + */ +static int calc_dark(struct ubifs_info *c, int spc) +{ +	ubifs_assert(!(spc & 7)); + +	if (spc < c->dark_wm) +		return spc; + +	/* +	 * If we have slightly more space then the dark space watermark, we can +	 * anyway safely assume it we'll be able to write a node of the +	 * smallest size there. +	 */ +	if (spc - c->dark_wm < MIN_WRITE_SZ) +		return spc - MIN_WRITE_SZ; + +	return c->dark_wm; +} + +/** + * is_lprops_dirty - determine if LEB properties are dirty. + * @c: the UBIFS file-system description object + * @lprops: LEB properties to test + */ +static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) +{ +	struct ubifs_pnode *pnode; +	int pos; + +	pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1); +	pnode = (struct ubifs_pnode *)container_of(lprops - pos, +						   struct ubifs_pnode, +						   lprops[0]); +	return !test_bit(COW_ZNODE, &pnode->flags) && +	       test_bit(DIRTY_CNODE, &pnode->flags); +} + +/** + * ubifs_change_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lp: LEB properties to change + * @free: new free space amount + * @dirty: new dirty space amount + * @flags: new flags + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes LEB properties (@free, @dirty or @flag). However, the + * property which has the %LPROPS_NC value is not changed. Returns a pointer to + * the updated LEB properties on success and a negative error code on failure. + * + * Note, the LEB properties may have had to be copied (due to COW) and + * consequently the pointer returned may not be the same as the pointer + * passed. + */ +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, +					   const struct ubifs_lprops *lp, +					   int free, int dirty, int flags, +					   int idx_gc_cnt) +{ +	/* +	 * This is the only function that is allowed to change lprops, so we +	 * discard the const qualifier. +	 */ +	struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; + +	dbg_lp("LEB %d, free %d, dirty %d, flags %d", +	       lprops->lnum, free, dirty, flags); + +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); +	ubifs_assert(c->lst.empty_lebs >= 0 && +		     c->lst.empty_lebs <= c->main_lebs); +	ubifs_assert(c->freeable_cnt >= 0); +	ubifs_assert(c->freeable_cnt <= c->main_lebs); +	ubifs_assert(c->lst.taken_empty_lebs >= 0); +	ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs); +	ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7)); +	ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7)); +	ubifs_assert(!(c->lst.total_used & 7)); +	ubifs_assert(free == LPROPS_NC || free >= 0); +	ubifs_assert(dirty == LPROPS_NC || dirty >= 0); + +	if (!is_lprops_dirty(c, lprops)) { +		lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum); +		if (IS_ERR(lprops)) +			return lprops; +	} else +		ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum)); + +	ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); + +	spin_lock(&c->space_lock); +	if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) +		c->lst.taken_empty_lebs -= 1; + +	if (!(lprops->flags & LPROPS_INDEX)) { +		int old_spc; + +		old_spc = lprops->free + lprops->dirty; +		if (old_spc < c->dead_wm) +			c->lst.total_dead -= old_spc; +		else +			c->lst.total_dark -= calc_dark(c, old_spc); + +		c->lst.total_used -= c->leb_size - old_spc; +	} + +	if (free != LPROPS_NC) { +		free = ALIGN(free, 8); +		c->lst.total_free += free - lprops->free; + +		/* Increase or decrease empty LEBs counter if needed */ +		if (free == c->leb_size) { +			if (lprops->free != c->leb_size) +				c->lst.empty_lebs += 1; +		} else if (lprops->free == c->leb_size) +			c->lst.empty_lebs -= 1; +		lprops->free = free; +	} + +	if (dirty != LPROPS_NC) { +		dirty = ALIGN(dirty, 8); +		c->lst.total_dirty += dirty - lprops->dirty; +		lprops->dirty = dirty; +	} + +	if (flags != LPROPS_NC) { +		/* Take care about indexing LEBs counter if needed */ +		if ((lprops->flags & LPROPS_INDEX)) { +			if (!(flags & LPROPS_INDEX)) +				c->lst.idx_lebs -= 1; +		} else if (flags & LPROPS_INDEX) +			c->lst.idx_lebs += 1; +		lprops->flags = flags; +	} + +	if (!(lprops->flags & LPROPS_INDEX)) { +		int new_spc; + +		new_spc = lprops->free + lprops->dirty; +		if (new_spc < c->dead_wm) +			c->lst.total_dead += new_spc; +		else +			c->lst.total_dark += calc_dark(c, new_spc); + +		c->lst.total_used += c->leb_size - new_spc; +	} + +	if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) +		c->lst.taken_empty_lebs += 1; + +	change_category(c, lprops); +	c->idx_gc_cnt += idx_gc_cnt; +	spin_unlock(&c->space_lock); +	return lprops; +} + +/** + * ubifs_get_lp_stats - get lprops statistics. + * @c: UBIFS file-system description object + * @st: return statistics + */ +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst) +{ +	spin_lock(&c->space_lock); +	memcpy(lst, &c->lst, sizeof(struct ubifs_lp_stats)); +	spin_unlock(&c->space_lock); +} + +/** + * ubifs_change_one_lp - change LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space + * @flags_set: flags to set + * @flags_clean: flags to clean + * @idx_gc_cnt: change to the count of idx_gc list + * + * This function changes properties of LEB @lnum. It is a helper wrapper over + * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the + * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and + * a negative error code in case of failure. + */ +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, +			int flags_set, int flags_clean, int idx_gc_cnt) +{ +	int err = 0, flags; +	const struct ubifs_lprops *lp; + +	ubifs_get_lprops(c); + +	lp = ubifs_lpt_lookup_dirty(c, lnum); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} + +	flags = (lp->flags | flags_set) & ~flags_clean; +	lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt); +	if (IS_ERR(lp)) +		err = PTR_ERR(lp); + +out: +	ubifs_release_lprops(c); +	return err; +} + +/** + * ubifs_update_one_lp - update LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to change properties for + * @free: amount of free space + * @dirty: amount of dirty space to add + * @flags_set: flags to set + * @flags_clean: flags to clean + * + * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to + * current dirty space, not substitutes it. + */ +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, +			int flags_set, int flags_clean) +{ +	int err = 0, flags; +	const struct ubifs_lprops *lp; + +	ubifs_get_lprops(c); + +	lp = ubifs_lpt_lookup_dirty(c, lnum); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} + +	flags = (lp->flags | flags_set) & ~flags_clean; +	lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0); +	if (IS_ERR(lp)) +		err = PTR_ERR(lp); + +out: +	ubifs_release_lprops(c); +	return err; +} + +/** + * ubifs_read_one_lp - read LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to read properties for + * @lp: where to store read properties + * + * This helper function reads properties of a LEB @lnum and stores them in @lp. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp) +{ +	int err = 0; +	const struct ubifs_lprops *lpp; + +	ubifs_get_lprops(c); + +	lpp = ubifs_lpt_lookup(c, lnum); +	if (IS_ERR(lpp)) { +		err = PTR_ERR(lpp); +		goto out; +	} + +	memcpy(lp, lpp, sizeof(struct ubifs_lprops)); + +out: +	ubifs_release_lprops(c); +	return err; +} + +/** + * ubifs_fast_find_free - try to find a LEB with free space quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a LEB with free space or %NULL if + * the function is unable to find a LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c) +{ +	struct ubifs_lprops *lprops; +	struct ubifs_lpt_heap *heap; + +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); + +	heap = &c->lpt_heap[LPROPS_FREE - 1]; +	if (heap->cnt == 0) +		return NULL; + +	lprops = heap->arr[0]; +	ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); +	ubifs_assert(!(lprops->flags & LPROPS_INDEX)); +	return lprops; +} + +/** + * ubifs_fast_find_empty - try to find an empty LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for an empty LEB or %NULL if the + * function is unable to find an empty LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c) +{ +	struct ubifs_lprops *lprops; + +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); + +	if (list_empty(&c->empty_list)) +		return NULL; + +	lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list); +	ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); +	ubifs_assert(!(lprops->flags & LPROPS_INDEX)); +	ubifs_assert(lprops->free == c->leb_size); +	return lprops; +} + +/** + * ubifs_fast_find_freeable - try to find a freeable LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable LEB or %NULL if the + * function is unable to find a freeable LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c) +{ +	struct ubifs_lprops *lprops; + +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); + +	if (list_empty(&c->freeable_list)) +		return NULL; + +	lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list); +	ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); +	ubifs_assert(!(lprops->flags & LPROPS_INDEX)); +	ubifs_assert(lprops->free + lprops->dirty == c->leb_size); +	ubifs_assert(c->freeable_cnt > 0); +	return lprops; +} + +/** + * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly. + * @c: the UBIFS file-system description object + * + * This function returns LEB properties for a freeable index LEB or %NULL if the + * function is unable to find a freeable index LEB quickly. + */ +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c) +{ +	struct ubifs_lprops *lprops; + +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); + +	if (list_empty(&c->frdi_idx_list)) +		return NULL; + +	lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list); +	ubifs_assert(!(lprops->flags & LPROPS_TAKEN)); +	ubifs_assert((lprops->flags & LPROPS_INDEX)); +	ubifs_assert(lprops->free + lprops->dirty == c->leb_size); +	return lprops; +} diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c new file mode 100644 index 000000000..1a50d4cc2 --- /dev/null +++ b/fs/ubifs/lpt.c @@ -0,0 +1,1105 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the LEB properties tree (LPT) area. The LPT area + * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and + * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits + * between the log and the orphan area. + * + * The LPT area is like a miniature self-contained file system. It is required + * that it never runs out of space, is fast to access and update, and scales + * logarithmically. The LEB properties tree is implemented as a wandering tree + * much like the TNC, and the LPT area has its own garbage collection. + * + * The LPT has two slightly different forms called the "small model" and the + * "big model". The small model is used when the entire LEB properties table + * can be written into a single eraseblock. In that case, garbage collection + * consists of just writing the whole table, which therefore makes all other + * eraseblocks reusable. In the case of the big model, dirty eraseblocks are + * selected for garbage collection, which consists of marking the clean nodes in + * that LEB as dirty, and then only the dirty nodes are written out. Also, in + * the case of the big model, a table of LEB numbers is saved so that the entire + * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first + * mounted. + */ + +#include "ubifs.h" +#include "crc16.h" +#include <linux/math64.h> + +/** + * do_calc_lpt_geom - calculate sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * Calculate the sizes of LPT bit fields, nodes, and tree, based on the + * properties of the flash and whether LPT is "big" (c->big_lpt). + */ +static void do_calc_lpt_geom(struct ubifs_info *c) +{ +	int i, n, bits, per_leb_wastage, max_pnode_cnt; +	long long sz, tot_wastage; + +	n = c->main_lebs + c->max_leb_cnt - c->leb_cnt; +	max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); + +	c->lpt_hght = 1; +	n = UBIFS_LPT_FANOUT; +	while (n < max_pnode_cnt) { +		c->lpt_hght += 1; +		n <<= UBIFS_LPT_FANOUT_SHIFT; +	} + +	c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT); + +	n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT); +	c->nnode_cnt = n; +	for (i = 1; i < c->lpt_hght; i++) { +		n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT); +		c->nnode_cnt += n; +	} + +	c->space_bits = fls(c->leb_size) - 3; +	c->lpt_lnum_bits = fls(c->lpt_lebs); +	c->lpt_offs_bits = fls(c->leb_size - 1); +	c->lpt_spc_bits = fls(c->leb_size); + +	n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT); +	c->pcnt_bits = fls(n - 1); + +	c->lnum_bits = fls(c->max_leb_cnt - 1); + +	bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + +	       (c->big_lpt ? c->pcnt_bits : 0) + +	       (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT; +	c->pnode_sz = (bits + 7) / 8; + +	bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + +	       (c->big_lpt ? c->pcnt_bits : 0) + +	       (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT; +	c->nnode_sz = (bits + 7) / 8; + +	bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + +	       c->lpt_lebs * c->lpt_spc_bits * 2; +	c->ltab_sz = (bits + 7) / 8; + +	bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS + +	       c->lnum_bits * c->lsave_cnt; +	c->lsave_sz = (bits + 7) / 8; + +	/* Calculate the minimum LPT size */ +	c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; +	c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; +	c->lpt_sz += c->ltab_sz; +	if (c->big_lpt) +		c->lpt_sz += c->lsave_sz; + +	/* Add wastage */ +	sz = c->lpt_sz; +	per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz); +	sz += per_leb_wastage; +	tot_wastage = per_leb_wastage; +	while (sz > c->leb_size) { +		sz += per_leb_wastage; +		sz -= c->leb_size; +		tot_wastage += per_leb_wastage; +	} +	tot_wastage += ALIGN(sz, c->min_io_size) - sz; +	c->lpt_sz += tot_wastage; +} + +/** + * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area. + * @c: the UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_calc_lpt_geom(struct ubifs_info *c) +{ +	int lebs_needed; +	long long sz; + +	do_calc_lpt_geom(c); + +	/* Verify that lpt_lebs is big enough */ +	sz = c->lpt_sz * 2; /* Must have at least 2 times the size */ +	lebs_needed = div_u64(sz + c->leb_size - 1, c->leb_size); +	if (lebs_needed > c->lpt_lebs) { +		ubifs_err("too few LPT LEBs"); +		return -EINVAL; +	} + +	/* Verify that ltab fits in a single LEB (since ltab is a single node */ +	if (c->ltab_sz > c->leb_size) { +		ubifs_err("LPT ltab too big"); +		return -EINVAL; +	} + +	c->check_lpt_free = c->big_lpt; +	return 0; +} + +/** + * ubifs_unpack_bits - unpack bit fields. + * @addr: address at which to unpack (passed and next address returned) + * @pos: bit position at which to unpack (passed and next position returned) + * @nrbits: number of bits of value to unpack (1-32) + * + * This functions returns the value unpacked. + */ +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) +{ +	const int k = 32 - nrbits; +	uint8_t *p = *addr; +	int b = *pos; +	uint32_t uninitialized_var(val); +	const int bytes = (nrbits + b + 7) >> 3; + +	ubifs_assert(nrbits > 0); +	ubifs_assert(nrbits <= 32); +	ubifs_assert(*pos >= 0); +	ubifs_assert(*pos < 8); +	if (b) { +		switch (bytes) { +		case 2: +			val = p[1]; +			break; +		case 3: +			val = p[1] | ((uint32_t)p[2] << 8); +			break; +		case 4: +			val = p[1] | ((uint32_t)p[2] << 8) | +				     ((uint32_t)p[3] << 16); +			break; +		case 5: +			val = p[1] | ((uint32_t)p[2] << 8) | +				     ((uint32_t)p[3] << 16) | +				     ((uint32_t)p[4] << 24); +		} +		val <<= (8 - b); +		val |= *p >> b; +		nrbits += b; +	} else { +		switch (bytes) { +		case 1: +			val = p[0]; +			break; +		case 2: +			val = p[0] | ((uint32_t)p[1] << 8); +			break; +		case 3: +			val = p[0] | ((uint32_t)p[1] << 8) | +				     ((uint32_t)p[2] << 16); +			break; +		case 4: +			val = p[0] | ((uint32_t)p[1] << 8) | +				     ((uint32_t)p[2] << 16) | +				     ((uint32_t)p[3] << 24); +			break; +		} +	} +	val <<= k; +	val >>= k; +	b = nrbits & 7; +	p += nrbits >> 3; +	*addr = p; +	*pos = b; +	ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); +	return val; +} + +/** + * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @lnum: LEB number to which to add dirty space + * @dirty: amount of dirty space to add + */ +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty) +{ +	if (!dirty || !lnum) +		return; +	dbg_lp("LEB %d add %d to %d", +	       lnum, dirty, c->ltab[lnum - c->lpt_first].dirty); +	ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last); +	c->ltab[lnum - c->lpt_first].dirty += dirty; +} + +/** + * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @nnode: nnode for which to add dirt + */ +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode) +{ +	struct ubifs_nnode *np = nnode->parent; + +	if (np) +		ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum, +				   c->nnode_sz); +	else { +		ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz); +		if (!(c->lpt_drty_flgs & LTAB_DIRTY)) { +			c->lpt_drty_flgs |= LTAB_DIRTY; +			ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz); +		} +	} +} + +/** + * add_pnode_dirt - add dirty space to LPT LEB properties. + * @c: UBIFS file-system description object + * @pnode: pnode for which to add dirt + */ +static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ +	ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum, +			   c->pnode_sz); +} + +/** + * calc_nnode_num_from_parent - calculate nnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The nnode number is a number that uniquely identifies a nnode and can be used + * easily to traverse the tree from the root to that nnode. + * + * This function calculates and returns the nnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_nnode_num_from_parent(const struct ubifs_info *c, +				      struct ubifs_nnode *parent, int iip) +{ +	int num, shft; + +	if (!parent) +		return 1; +	shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT; +	num = parent->num ^ (1 << shft); +	num |= (UBIFS_LPT_FANOUT + iip) << shft; +	return num; +} + +/** + * calc_pnode_num_from_parent - calculate pnode number. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * The pnode number is a number that uniquely identifies a pnode and can be used + * easily to traverse the tree from the root to that pnode. + * + * This function calculates and returns the pnode number based on the parent's + * nnode number and the index in parent. + */ +static int calc_pnode_num_from_parent(const struct ubifs_info *c, +				      struct ubifs_nnode *parent, int iip) +{ +	int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0; + +	for (i = 0; i < n; i++) { +		num <<= UBIFS_LPT_FANOUT_SHIFT; +		num |= pnum & (UBIFS_LPT_FANOUT - 1); +		pnum >>= UBIFS_LPT_FANOUT_SHIFT; +	} +	num <<= UBIFS_LPT_FANOUT_SHIFT; +	num |= iip; +	return num; +} + +/** + * update_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @pnode: pnode + * + * When a pnode is loaded into memory, the LEB properties it contains are added, + * by this function, to the LEB category lists and heaps. + */ +static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode) +{ +	int i; + +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK; +		int lnum = pnode->lprops[i].lnum; + +		if (!lnum) +			return; +		ubifs_add_to_cat(c, &pnode->lprops[i], cat); +	} +} + +/** + * replace_cats - add LEB properties of a pnode to LEB category lists and heaps. + * @c: UBIFS file-system description object + * @old_pnode: pnode copied + * @new_pnode: pnode copy + * + * During commit it is sometimes necessary to copy a pnode + * (see dirty_cow_pnode).  When that happens, references in + * category lists and heaps must be replaced.  This function does that. + */ +static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode, +			 struct ubifs_pnode *new_pnode) +{ +	int i; + +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		if (!new_pnode->lprops[i].lnum) +			return; +		ubifs_replace_cat(c, &old_pnode->lprops[i], +				  &new_pnode->lprops[i]); +	} +} + +/** + * check_lpt_crc - check LPT node crc is correct. + * @c: UBIFS file-system description object + * @buf: buffer containing node + * @len: length of node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_crc(void *buf, int len) +{ +	int pos = 0; +	uint8_t *addr = buf; +	uint16_t crc, calc_crc; + +	crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS); +	calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES, +			 len - UBIFS_LPT_CRC_BYTES); +	if (crc != calc_crc) { +		ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc, +			  calc_crc); +		dbg_dump_stack(); +		return -EINVAL; +	} +	return 0; +} + +/** + * check_lpt_type - check LPT node type is correct. + * @c: UBIFS file-system description object + * @addr: address of type bit field is passed and returned updated here + * @pos: position of type bit field is passed and returned updated here + * @type: expected type + * + * This function returns %0 on success and a negative error code on failure. + */ +static int check_lpt_type(uint8_t **addr, int *pos, int type) +{ +	int node_type; + +	node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS); +	if (node_type != type) { +		ubifs_err("invalid type (%d) in LPT node type %d", node_type, +			  type); +		dbg_dump_stack(); +		return -EINVAL; +	} +	return 0; +} + +/** + * unpack_pnode - unpack a pnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed pnode to unpack + * @pnode: pnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_pnode(const struct ubifs_info *c, void *buf, +			struct ubifs_pnode *pnode) +{ +	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; +	int i, pos = 0, err; + +	err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE); +	if (err) +		return err; +	if (c->big_lpt) +		pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		struct ubifs_lprops * const lprops = &pnode->lprops[i]; + +		lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits); +		lprops->free <<= 3; +		lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits); +		lprops->dirty <<= 3; + +		if (ubifs_unpack_bits(&addr, &pos, 1)) +			lprops->flags = LPROPS_INDEX; +		else +			lprops->flags = 0; +		lprops->flags |= ubifs_categorize_lprops(c, lprops); +	} +	err = check_lpt_crc(buf, c->pnode_sz); +	return err; +} + +/** + * ubifs_unpack_nnode - unpack a nnode. + * @c: UBIFS file-system description object + * @buf: buffer containing packed nnode to unpack + * @nnode: nnode structure to fill + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, +		       struct ubifs_nnode *nnode) +{ +	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; +	int i, pos = 0, err; + +	err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE); +	if (err) +		return err; +	if (c->big_lpt) +		nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits); +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		int lnum; + +		lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) + +		       c->lpt_first; +		if (lnum == c->lpt_last + 1) +			lnum = 0; +		nnode->nbranch[i].lnum = lnum; +		nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos, +						     c->lpt_offs_bits); +	} +	err = check_lpt_crc(buf, c->nnode_sz); +	return err; +} + +/** + * unpack_ltab - unpack the LPT's own lprops table. + * @c: UBIFS file-system description object + * @buf: buffer from which to unpack + * + * This function returns %0 on success and a negative error code on failure. + */ +static int unpack_ltab(const struct ubifs_info *c, void *buf) +{ +	uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES; +	int i, pos = 0, err; + +	err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB); +	if (err) +		return err; +	for (i = 0; i < c->lpt_lebs; i++) { +		int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); +		int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits); + +		if (free < 0 || free > c->leb_size || dirty < 0 || +		    dirty > c->leb_size || free + dirty > c->leb_size) +			return -EINVAL; + +		c->ltab[i].free = free; +		c->ltab[i].dirty = dirty; +		c->ltab[i].tgc = 0; +		c->ltab[i].cmt = 0; +	} +	err = check_lpt_crc(buf, c->ltab_sz); +	return err; +} + +/** + * validate_nnode - validate a nnode. + * @c: UBIFS file-system description object + * @nnode: nnode to validate + * @parent: parent nnode (or NULL for the root nnode) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_nnode(const struct ubifs_info *c, struct ubifs_nnode *nnode, +			  struct ubifs_nnode *parent, int iip) +{ +	int i, lvl, max_offs; + +	if (c->big_lpt) { +		int num = calc_nnode_num_from_parent(c, parent, iip); + +		if (nnode->num != num) +			return -EINVAL; +	} +	lvl = parent ? parent->level - 1 : c->lpt_hght; +	if (lvl < 1) +		return -EINVAL; +	if (lvl == 1) +		max_offs = c->leb_size - c->pnode_sz; +	else +		max_offs = c->leb_size - c->nnode_sz; +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		int lnum = nnode->nbranch[i].lnum; +		int offs = nnode->nbranch[i].offs; + +		if (lnum == 0) { +			if (offs != 0) +				return -EINVAL; +			continue; +		} +		if (lnum < c->lpt_first || lnum > c->lpt_last) +			return -EINVAL; +		if (offs < 0 || offs > max_offs) +			return -EINVAL; +	} +	return 0; +} + +/** + * validate_pnode - validate a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to validate + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int validate_pnode(const struct ubifs_info *c, struct ubifs_pnode *pnode, +			  struct ubifs_nnode *parent, int iip) +{ +	int i; + +	if (c->big_lpt) { +		int num = calc_pnode_num_from_parent(c, parent, iip); + +		if (pnode->num != num) +			return -EINVAL; +	} +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		int free = pnode->lprops[i].free; +		int dirty = pnode->lprops[i].dirty; + +		if (free < 0 || free > c->leb_size || free % c->min_io_size || +		    (free & 7)) +			return -EINVAL; +		if (dirty < 0 || dirty > c->leb_size || (dirty & 7)) +			return -EINVAL; +		if (dirty + free > c->leb_size) +			return -EINVAL; +	} +	return 0; +} + +/** + * set_pnode_lnum - set LEB numbers on a pnode. + * @c: UBIFS file-system description object + * @pnode: pnode to update + * + * This function calculates the LEB numbers for the LEB properties it contains + * based on the pnode number. + */ +static void set_pnode_lnum(const struct ubifs_info *c, +			   struct ubifs_pnode *pnode) +{ +	int i, lnum; + +	lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first; +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		if (lnum >= c->leb_cnt) +			return; +		pnode->lprops[i].lnum = lnum++; +	} +} + +/** + * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ +	struct ubifs_nbranch *branch = NULL; +	struct ubifs_nnode *nnode = NULL; +	void *buf = c->lpt_nod_buf; +	int err, lnum, offs; + +	if (parent) { +		branch = &parent->nbranch[iip]; +		lnum = branch->lnum; +		offs = branch->offs; +	} else { +		lnum = c->lpt_lnum; +		offs = c->lpt_offs; +	} +	nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS); +	if (!nnode) { +		err = -ENOMEM; +		goto out; +	} +	if (lnum == 0) { +		/* +		 * This nnode was not written which just means that the LEB +		 * properties in the subtree below it describe empty LEBs. We +		 * make the nnode as though we had read it, which in fact means +		 * doing almost nothing. +		 */ +		if (c->big_lpt) +			nnode->num = calc_nnode_num_from_parent(c, parent, iip); +	} else { +		err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); +		if (err) +			goto out; +		err = ubifs_unpack_nnode(c, buf, nnode); +		if (err) +			goto out; +	} +	err = validate_nnode(c, nnode, parent, iip); +	if (err) +		goto out; +	if (!c->big_lpt) +		nnode->num = calc_nnode_num_from_parent(c, parent, iip); +	if (parent) { +		branch->nnode = nnode; +		nnode->level = parent->level - 1; +	} else { +		c->nroot = nnode; +		nnode->level = c->lpt_hght; +	} +	nnode->parent = parent; +	nnode->iip = iip; +	return 0; + +out: +	ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); +	kfree(nnode); +	return err; +} + +/** + * read_pnode - read a pnode from flash and link it to the tree in memory. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) +{ +	struct ubifs_nbranch *branch; +	struct ubifs_pnode *pnode = NULL; +	void *buf = c->lpt_nod_buf; +	int err, lnum, offs; + +	branch = &parent->nbranch[iip]; +	lnum = branch->lnum; +	offs = branch->offs; +	pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); +	if (!pnode) { +		err = -ENOMEM; +		goto out; +	} +	if (lnum == 0) { +		/* +		 * This pnode was not written which just means that the LEB +		 * properties in it describe empty LEBs. We make the pnode as +		 * though we had read it. +		 */ +		int i; + +		if (c->big_lpt) +			pnode->num = calc_pnode_num_from_parent(c, parent, iip); +		for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +			struct ubifs_lprops * const lprops = &pnode->lprops[i]; + +			lprops->free = c->leb_size; +			lprops->flags = ubifs_categorize_lprops(c, lprops); +		} +	} else { +		err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); +		if (err) +			goto out; +		err = unpack_pnode(c, buf, pnode); +		if (err) +			goto out; +	} +	err = validate_pnode(c, pnode, parent, iip); +	if (err) +		goto out; +	if (!c->big_lpt) +		pnode->num = calc_pnode_num_from_parent(c, parent, iip); +	branch->pnode = pnode; +	pnode->parent = parent; +	pnode->iip = iip; +	set_pnode_lnum(c, pnode); +	c->pnodes_have += 1; +	return 0; + +out: +	ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); +	dbg_dump_pnode(c, pnode, parent, iip); +	dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); +	kfree(pnode); +	return err; +} + +/** + * read_ltab - read LPT's own lprops table. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int read_ltab(struct ubifs_info *c) +{ +	int err; +	void *buf; + +	buf = vmalloc(c->ltab_sz); +	if (!buf) +		return -ENOMEM; +	err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); +	if (err) +		goto out; +	err = unpack_ltab(c, buf); +out: +	vfree(buf); +	return err; +} + +/** + * ubifs_get_nnode - get a nnode. + * @c: UBIFS file-system description object + * @parent: parent nnode (or NULL for the root) + * @iip: index in parent + * + * This function returns a pointer to the nnode on success or a negative error + * code on failure. + */ +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, +				    struct ubifs_nnode *parent, int iip) +{ +	struct ubifs_nbranch *branch; +	struct ubifs_nnode *nnode; +	int err; + +	branch = &parent->nbranch[iip]; +	nnode = branch->nnode; +	if (nnode) +		return nnode; +	err = ubifs_read_nnode(c, parent, iip); +	if (err) +		return ERR_PTR(err); +	return branch->nnode; +} + +/** + * ubifs_get_pnode - get a pnode. + * @c: UBIFS file-system description object + * @parent: parent nnode + * @iip: index in parent + * + * This function returns a pointer to the pnode on success or a negative error + * code on failure. + */ +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, +				    struct ubifs_nnode *parent, int iip) +{ +	struct ubifs_nbranch *branch; +	struct ubifs_pnode *pnode; +	int err; + +	branch = &parent->nbranch[iip]; +	pnode = branch->pnode; +	if (pnode) +		return pnode; +	err = read_pnode(c, parent, iip); +	if (err) +		return ERR_PTR(err); +	update_cats(c, branch->pnode); +	return branch->pnode; +} + +/** + * ubifs_lpt_lookup - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) +{ +	int err, i, h, iip, shft; +	struct ubifs_nnode *nnode; +	struct ubifs_pnode *pnode; + +	if (!c->nroot) { +		err = ubifs_read_nnode(c, NULL, 0); +		if (err) +			return ERR_PTR(err); +	} +	nnode = c->nroot; +	i = lnum - c->main_first; +	shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; +	for (h = 1; h < c->lpt_hght; h++) { +		iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); +		shft -= UBIFS_LPT_FANOUT_SHIFT; +		nnode = ubifs_get_nnode(c, nnode, iip); +		if (IS_ERR(nnode)) +			return ERR_PTR(PTR_ERR(nnode)); +	} +	iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); +	shft -= UBIFS_LPT_FANOUT_SHIFT; +	pnode = ubifs_get_pnode(c, nnode, iip); +	if (IS_ERR(pnode)) +		return ERR_PTR(PTR_ERR(pnode)); +	iip = (i & (UBIFS_LPT_FANOUT - 1)); +	dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, +	       pnode->lprops[iip].free, pnode->lprops[iip].dirty, +	       pnode->lprops[iip].flags); +	return &pnode->lprops[iip]; +} + +/** + * dirty_cow_nnode - ensure a nnode is not being committed. + * @c: UBIFS file-system description object + * @nnode: nnode to check + * + * Returns dirtied nnode on success or negative error code on failure. + */ +static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c, +					   struct ubifs_nnode *nnode) +{ +	struct ubifs_nnode *n; +	int i; + +	if (!test_bit(COW_CNODE, &nnode->flags)) { +		/* nnode is not being committed */ +		if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) { +			c->dirty_nn_cnt += 1; +			ubifs_add_nnode_dirt(c, nnode); +		} +		return nnode; +	} + +	/* nnode is being committed, so copy it */ +	n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS); +	if (unlikely(!n)) +		return ERR_PTR(-ENOMEM); + +	memcpy(n, nnode, sizeof(struct ubifs_nnode)); +	n->cnext = NULL; +	__set_bit(DIRTY_CNODE, &n->flags); +	__clear_bit(COW_CNODE, &n->flags); + +	/* The children now have new parent */ +	for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +		struct ubifs_nbranch *branch = &n->nbranch[i]; + +		if (branch->cnode) +			branch->cnode->parent = n; +	} + +	ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags)); +	__set_bit(OBSOLETE_CNODE, &nnode->flags); + +	c->dirty_nn_cnt += 1; +	ubifs_add_nnode_dirt(c, nnode); +	if (nnode->parent) +		nnode->parent->nbranch[n->iip].nnode = n; +	else +		c->nroot = n; +	return n; +} + +/** + * dirty_cow_pnode - ensure a pnode is not being committed. + * @c: UBIFS file-system description object + * @pnode: pnode to check + * + * Returns dirtied pnode on success or negative error code on failure. + */ +static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c, +					   struct ubifs_pnode *pnode) +{ +	struct ubifs_pnode *p; + +	if (!test_bit(COW_CNODE, &pnode->flags)) { +		/* pnode is not being committed */ +		if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) { +			c->dirty_pn_cnt += 1; +			add_pnode_dirt(c, pnode); +		} +		return pnode; +	} + +	/* pnode is being committed, so copy it */ +	p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS); +	if (unlikely(!p)) +		return ERR_PTR(-ENOMEM); + +	memcpy(p, pnode, sizeof(struct ubifs_pnode)); +	p->cnext = NULL; +	__set_bit(DIRTY_CNODE, &p->flags); +	__clear_bit(COW_CNODE, &p->flags); +	replace_cats(c, pnode, p); + +	ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags)); +	__set_bit(OBSOLETE_CNODE, &pnode->flags); + +	c->dirty_pn_cnt += 1; +	add_pnode_dirt(c, pnode); +	pnode->parent->nbranch[p->iip].pnode = p; +	return p; +} + +/** + * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT. + * @c: UBIFS file-system description object + * @lnum: LEB number to lookup + * + * This function returns a pointer to the LEB properties on success or a + * negative error code on failure. + */ +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) +{ +	int err, i, h, iip, shft; +	struct ubifs_nnode *nnode; +	struct ubifs_pnode *pnode; + +	if (!c->nroot) { +		err = ubifs_read_nnode(c, NULL, 0); +		if (err) +			return ERR_PTR(err); +	} +	nnode = c->nroot; +	nnode = dirty_cow_nnode(c, nnode); +	if (IS_ERR(nnode)) +		return ERR_PTR(PTR_ERR(nnode)); +	i = lnum - c->main_first; +	shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; +	for (h = 1; h < c->lpt_hght; h++) { +		iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); +		shft -= UBIFS_LPT_FANOUT_SHIFT; +		nnode = ubifs_get_nnode(c, nnode, iip); +		if (IS_ERR(nnode)) +			return ERR_PTR(PTR_ERR(nnode)); +		nnode = dirty_cow_nnode(c, nnode); +		if (IS_ERR(nnode)) +			return ERR_PTR(PTR_ERR(nnode)); +	} +	iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); +	shft -= UBIFS_LPT_FANOUT_SHIFT; +	pnode = ubifs_get_pnode(c, nnode, iip); +	if (IS_ERR(pnode)) +		return ERR_PTR(PTR_ERR(pnode)); +	pnode = dirty_cow_pnode(c, pnode); +	if (IS_ERR(pnode)) +		return ERR_PTR(PTR_ERR(pnode)); +	iip = (i & (UBIFS_LPT_FANOUT - 1)); +	dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, +	       pnode->lprops[iip].free, pnode->lprops[iip].dirty, +	       pnode->lprops[iip].flags); +	ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags)); +	return &pnode->lprops[iip]; +} + +/** + * lpt_init_rd - initialize the LPT for reading. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int lpt_init_rd(struct ubifs_info *c) +{ +	int err, i; + +	c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs); +	if (!c->ltab) +		return -ENOMEM; + +	i = max_t(int, c->nnode_sz, c->pnode_sz); +	c->lpt_nod_buf = kmalloc(i, GFP_KERNEL); +	if (!c->lpt_nod_buf) +		return -ENOMEM; + +	for (i = 0; i < LPROPS_HEAP_CNT; i++) { +		c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, +					     GFP_KERNEL); +		if (!c->lpt_heap[i].arr) +			return -ENOMEM; +		c->lpt_heap[i].cnt = 0; +		c->lpt_heap[i].max_cnt = LPT_HEAP_SZ; +	} + +	c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL); +	if (!c->dirty_idx.arr) +		return -ENOMEM; +	c->dirty_idx.cnt = 0; +	c->dirty_idx.max_cnt = LPT_HEAP_SZ; + +	err = read_ltab(c); +	if (err) +		return err; + +	dbg_lp("space_bits %d", c->space_bits); +	dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits); +	dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits); +	dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits); +	dbg_lp("pcnt_bits %d", c->pcnt_bits); +	dbg_lp("lnum_bits %d", c->lnum_bits); +	dbg_lp("pnode_sz %d", c->pnode_sz); +	dbg_lp("nnode_sz %d", c->nnode_sz); +	dbg_lp("ltab_sz %d", c->ltab_sz); +	dbg_lp("lsave_sz %d", c->lsave_sz); +	dbg_lp("lsave_cnt %d", c->lsave_cnt); +	dbg_lp("lpt_hght %d", c->lpt_hght); +	dbg_lp("big_lpt %d", c->big_lpt); +	dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); +	dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); +	dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); +	if (c->big_lpt) +		dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); + +	return 0; +} + +/** + * ubifs_lpt_init - initialize the LPT. + * @c: UBIFS file-system description object + * @rd: whether to initialize lpt for reading + * @wr: whether to initialize lpt for writing + * + * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true + * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is + * true. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) +{ +	int err; + +	if (rd) { +		err = lpt_init_rd(c); +		if (err) +			return err; +	} + +	return 0; +} diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c new file mode 100644 index 000000000..c0af8187a --- /dev/null +++ b/fs/ubifs/lpt_commit.c @@ -0,0 +1,171 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements commit-related functionality of the LEB properties + * subsystem. + */ + +#include "crc16.h" +#include "ubifs.h" + +/** + * free_obsolete_cnodes - free obsolete cnodes for commit end. + * @c: UBIFS file-system description object + */ +static void free_obsolete_cnodes(struct ubifs_info *c) +{ +	struct ubifs_cnode *cnode, *cnext; + +	cnext = c->lpt_cnext; +	if (!cnext) +		return; +	do { +		cnode = cnext; +		cnext = cnode->cnext; +		if (test_bit(OBSOLETE_CNODE, &cnode->flags)) +			kfree(cnode); +		else +			cnode->cnext = NULL; +	} while (cnext != c->lpt_cnext); +	c->lpt_cnext = NULL; +} + +/** + * first_nnode - find the first nnode in memory. + * @c: UBIFS file-system description object + * @hght: height of tree where nnode found is returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght) +{ +	struct ubifs_nnode *nnode; +	int h, i, found; + +	nnode = c->nroot; +	*hght = 0; +	if (!nnode) +		return NULL; +	for (h = 1; h < c->lpt_hght; h++) { +		found = 0; +		for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +			if (nnode->nbranch[i].nnode) { +				found = 1; +				nnode = nnode->nbranch[i].nnode; +				*hght = h; +				break; +			} +		} +		if (!found) +			break; +	} +	return nnode; +} + +/** + * next_nnode - find the next nnode in memory. + * @c: UBIFS file-system description object + * @nnode: nnode from which to start. + * @hght: height of tree where nnode is, is passed and returned here + * + * This function returns a pointer to the nnode found or %NULL if no nnode is + * found. This function is a helper to 'ubifs_lpt_free()'. + */ +static struct ubifs_nnode *next_nnode(struct ubifs_info *c, +				      struct ubifs_nnode *nnode, int *hght) +{ +	struct ubifs_nnode *parent; +	int iip, h, i, found; + +	parent = nnode->parent; +	if (!parent) +		return NULL; +	if (nnode->iip == UBIFS_LPT_FANOUT - 1) { +		*hght -= 1; +		return parent; +	} +	for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) { +		nnode = parent->nbranch[iip].nnode; +		if (nnode) +			break; +	} +	if (!nnode) { +		*hght -= 1; +		return parent; +	} +	for (h = *hght + 1; h < c->lpt_hght; h++) { +		found = 0; +		for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +			if (nnode->nbranch[i].nnode) { +				found = 1; +				nnode = nnode->nbranch[i].nnode; +				*hght = h; +				break; +			} +		} +		if (!found) +			break; +	} +	return nnode; +} + +/** + * ubifs_lpt_free - free resources owned by the LPT. + * @c: UBIFS file-system description object + * @wr_only: free only resources used for writing + */ +void ubifs_lpt_free(struct ubifs_info *c, int wr_only) +{ +	struct ubifs_nnode *nnode; +	int i, hght; + +	/* Free write-only things first */ + +	free_obsolete_cnodes(c); /* Leftover from a failed commit */ + +	vfree(c->ltab_cmt); +	c->ltab_cmt = NULL; +	vfree(c->lpt_buf); +	c->lpt_buf = NULL; +	kfree(c->lsave); +	c->lsave = NULL; + +	if (wr_only) +		return; + +	/* Now free the rest */ + +	nnode = first_nnode(c, &hght); +	while (nnode) { +		for (i = 0; i < UBIFS_LPT_FANOUT; i++) +			kfree(nnode->nbranch[i].nnode); +		nnode = next_nnode(c, nnode, &hght); +	} +	for (i = 0; i < LPROPS_HEAP_CNT; i++) +		kfree(c->lpt_heap[i].arr); +	kfree(c->dirty_idx.arr); +	kfree(c->nroot); +	vfree(c->ltab); +	kfree(c->lpt_nod_buf); +} diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c new file mode 100644 index 000000000..3f2926e87 --- /dev/null +++ b/fs/ubifs/master.c @@ -0,0 +1,341 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* This file implements reading and writing the master node */ + +#include "ubifs.h" + +/** + * scan_for_master - search the valid master node. + * @c: UBIFS file-system description object + * + * This function scans the master node LEBs and search for the latest master + * node. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int scan_for_master(struct ubifs_info *c) +{ +	struct ubifs_scan_leb *sleb; +	struct ubifs_scan_node *snod; +	int lnum, offs = 0, nodes_cnt; + +	lnum = UBIFS_MST_LNUM; + +	sleb = ubifs_scan(c, lnum, 0, c->sbuf); +	if (IS_ERR(sleb)) +		return PTR_ERR(sleb); +	nodes_cnt = sleb->nodes_cnt; +	if (nodes_cnt > 0) { +		snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, +				  list); +		if (snod->type != UBIFS_MST_NODE) +			goto out; +		memcpy(c->mst_node, snod->node, snod->len); +		offs = snod->offs; +	} +	ubifs_scan_destroy(sleb); + +	lnum += 1; + +	sleb = ubifs_scan(c, lnum, 0, c->sbuf); +	if (IS_ERR(sleb)) +		return PTR_ERR(sleb); +	if (sleb->nodes_cnt != nodes_cnt) +		goto out; +	if (!sleb->nodes_cnt) +		goto out; +	snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); +	if (snod->type != UBIFS_MST_NODE) +		goto out; +	if (snod->offs != offs) +		goto out; +	if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, +		   (void *)snod->node + UBIFS_CH_SZ, +		   UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) +		goto out; +	c->mst_offs = offs; +	ubifs_scan_destroy(sleb); +	return 0; + +out: +	ubifs_scan_destroy(sleb); +	return -EINVAL; +} + +/** + * validate_master - validate master node. + * @c: UBIFS file-system description object + * + * This function validates data which was read from master node. Returns zero + * if the data is all right and %-EINVAL if not. + */ +static int validate_master(const struct ubifs_info *c) +{ +	long long main_sz; +	int err; + +	if (c->max_sqnum >= SQNUM_WATERMARK) { +		err = 1; +		goto out; +	} + +	if (c->cmt_no >= c->max_sqnum) { +		err = 2; +		goto out; +	} + +	if (c->highest_inum >= INUM_WATERMARK) { +		err = 3; +		goto out; +	} + +	if (c->lhead_lnum < UBIFS_LOG_LNUM || +	    c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs || +	    c->lhead_offs < 0 || c->lhead_offs >= c->leb_size || +	    c->lhead_offs & (c->min_io_size - 1)) { +		err = 4; +		goto out; +	} + +	if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first || +	    c->zroot.offs >= c->leb_size || c->zroot.offs & 7) { +		err = 5; +		goto out; +	} + +	if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len || +	    c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) { +		err = 6; +		goto out; +	} + +	if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) { +		err = 7; +		goto out; +	} + +	if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first || +	    c->ihead_offs % c->min_io_size || c->ihead_offs < 0 || +	    c->ihead_offs > c->leb_size || c->ihead_offs & 7) { +		err = 8; +		goto out; +	} + +	main_sz = (long long)c->main_lebs * c->leb_size; +	if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { +		err = 9; +		goto out; +	} + +	if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last || +	    c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) { +		err = 10; +		goto out; +	} + +	if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last || +	    c->nhead_offs < 0 || c->nhead_offs % c->min_io_size || +	    c->nhead_offs > c->leb_size) { +		err = 11; +		goto out; +	} + +	if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last || +	    c->ltab_offs < 0 || +	    c->ltab_offs + c->ltab_sz > c->leb_size) { +		err = 12; +		goto out; +	} + +	if (c->big_lpt && (c->lsave_lnum < c->lpt_first || +	    c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 || +	    c->lsave_offs + c->lsave_sz > c->leb_size)) { +		err = 13; +		goto out; +	} + +	if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) { +		err = 14; +		goto out; +	} + +	if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) { +		err = 15; +		goto out; +	} + +	if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) { +		err = 16; +		goto out; +	} + +	if (c->lst.total_free < 0 || c->lst.total_free > main_sz || +	    c->lst.total_free & 7) { +		err = 17; +		goto out; +	} + +	if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) { +		err = 18; +		goto out; +	} + +	if (c->lst.total_used < 0 || (c->lst.total_used & 7)) { +		err = 19; +		goto out; +	} + +	if (c->lst.total_free + c->lst.total_dirty + +	    c->lst.total_used > main_sz) { +		err = 20; +		goto out; +	} + +	if (c->lst.total_dead + c->lst.total_dark + +	    c->lst.total_used + c->old_idx_sz > main_sz) { +		err = 21; +		goto out; +	} + +	if (c->lst.total_dead < 0 || +	    c->lst.total_dead > c->lst.total_free + c->lst.total_dirty || +	    c->lst.total_dead & 7) { +		err = 22; +		goto out; +	} + +	if (c->lst.total_dark < 0 || +	    c->lst.total_dark > c->lst.total_free + c->lst.total_dirty || +	    c->lst.total_dark & 7) { +		err = 23; +		goto out; +	} + +	return 0; + +out: +	ubifs_err("bad master node at offset %d error %d", c->mst_offs, err); +	dbg_dump_node(c, c->mst_node); +	return -EINVAL; +} + +/** + * ubifs_read_master - read master node. + * @c: UBIFS file-system description object + * + * This function finds and reads the master node during file-system mount. If + * the flash is empty, it creates default master node as well. Returns zero in + * case of success and a negative error code in case of failure. + */ +int ubifs_read_master(struct ubifs_info *c) +{ +	int err, old_leb_cnt; + +	c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL); +	if (!c->mst_node) +		return -ENOMEM; + +	err = scan_for_master(c); +	if (err) { +		err = ubifs_recover_master_node(c); +		if (err) +			/* +			 * Note, we do not free 'c->mst_node' here because the +			 * unmount routine will take care of this. +			 */ +			return err; +	} + +	/* Make sure that the recovery flag is clear */ +	c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY); + +	c->max_sqnum       = le64_to_cpu(c->mst_node->ch.sqnum); +	c->highest_inum    = le64_to_cpu(c->mst_node->highest_inum); +	c->cmt_no          = le64_to_cpu(c->mst_node->cmt_no); +	c->zroot.lnum      = le32_to_cpu(c->mst_node->root_lnum); +	c->zroot.offs      = le32_to_cpu(c->mst_node->root_offs); +	c->zroot.len       = le32_to_cpu(c->mst_node->root_len); +	c->lhead_lnum      = le32_to_cpu(c->mst_node->log_lnum); +	c->gc_lnum         = le32_to_cpu(c->mst_node->gc_lnum); +	c->ihead_lnum      = le32_to_cpu(c->mst_node->ihead_lnum); +	c->ihead_offs      = le32_to_cpu(c->mst_node->ihead_offs); +	c->old_idx_sz      = le64_to_cpu(c->mst_node->index_size); +	c->lpt_lnum        = le32_to_cpu(c->mst_node->lpt_lnum); +	c->lpt_offs        = le32_to_cpu(c->mst_node->lpt_offs); +	c->nhead_lnum      = le32_to_cpu(c->mst_node->nhead_lnum); +	c->nhead_offs      = le32_to_cpu(c->mst_node->nhead_offs); +	c->ltab_lnum       = le32_to_cpu(c->mst_node->ltab_lnum); +	c->ltab_offs       = le32_to_cpu(c->mst_node->ltab_offs); +	c->lsave_lnum      = le32_to_cpu(c->mst_node->lsave_lnum); +	c->lsave_offs      = le32_to_cpu(c->mst_node->lsave_offs); +	c->lscan_lnum      = le32_to_cpu(c->mst_node->lscan_lnum); +	c->lst.empty_lebs  = le32_to_cpu(c->mst_node->empty_lebs); +	c->lst.idx_lebs    = le32_to_cpu(c->mst_node->idx_lebs); +	old_leb_cnt        = le32_to_cpu(c->mst_node->leb_cnt); +	c->lst.total_free  = le64_to_cpu(c->mst_node->total_free); +	c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty); +	c->lst.total_used  = le64_to_cpu(c->mst_node->total_used); +	c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead); +	c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark); + +	c->calc_idx_sz = c->old_idx_sz; + +	if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) +		c->no_orphs = 1; + +	if (old_leb_cnt != c->leb_cnt) { +		/* The file system has been resized */ +		int growth = c->leb_cnt - old_leb_cnt; + +		if (c->leb_cnt < old_leb_cnt || +		    c->leb_cnt < UBIFS_MIN_LEB_CNT) { +			ubifs_err("bad leb_cnt on master node"); +			dbg_dump_node(c, c->mst_node); +			return -EINVAL; +		} + +		dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs", +			old_leb_cnt, c->leb_cnt); +		c->lst.empty_lebs += growth; +		c->lst.total_free += growth * (long long)c->leb_size; +		c->lst.total_dark += growth * (long long)c->dark_wm; + +		/* +		 * Reflect changes back onto the master node. N.B. the master +		 * node gets written immediately whenever mounting (or +		 * remounting) in read-write mode, so we do not need to write it +		 * here. +		 */ +		c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt); +		c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs); +		c->mst_node->total_free = cpu_to_le64(c->lst.total_free); +		c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark); +	} + +	err = validate_master(c); +	if (err) +		return err; + +	err = dbg_old_index_check_init(c, &c->zroot); + +	return err; +} diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h new file mode 100644 index 000000000..b745d8678 --- /dev/null +++ b/fs/ubifs/misc.h @@ -0,0 +1,310 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file contains miscellaneous helper functions. + */ + +#ifndef __UBIFS_MISC_H__ +#define __UBIFS_MISC_H__ + +/** + * ubifs_zn_dirty - check if znode is dirty. + * @znode: znode to check + * + * This helper function returns %1 if @znode is dirty and %0 otherwise. + */ +static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) +{ +	return !!test_bit(DIRTY_ZNODE, &znode->flags); +} + +/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +static inline void ubifs_wake_up_bgt(struct ubifs_info *c) +{ +	if (c->bgt && !c->need_bgt) { +		c->need_bgt = 1; +		wake_up_process(c->bgt); +	} +} + +/** + * ubifs_tnc_find_child - find next child in znode. + * @znode: znode to search at + * @start: the zbranch index to start at + * + * This helper function looks for znode child starting at index @start. Returns + * the child or %NULL if no children were found. + */ +static inline struct ubifs_znode * +ubifs_tnc_find_child(struct ubifs_znode *znode, int start) +{ +	while (start < znode->child_cnt) { +		if (znode->zbranch[start].znode) +			return znode->zbranch[start].znode; +		start += 1; +	} + +	return NULL; +} + +/** + * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object. + * @inode: the VFS 'struct inode' pointer + */ +static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) +{ +	return container_of(inode, struct ubifs_inode, vfs_inode); +} + +/** + * ubifs_compr_present - check if compressor was compiled in. + * @compr_type: compressor type to check + * + * This function returns %1 of compressor of type @compr_type is present, and + * %0 if not. + */ +static inline int ubifs_compr_present(int compr_type) +{ +	ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); +	return !!ubifs_compressors[compr_type]->capi_name; +} + +/** + * ubifs_compr_name - get compressor name string by its type. + * @compr_type: compressor type + * + * This function returns compressor type string. + */ +static inline const char *ubifs_compr_name(int compr_type) +{ +	ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT); +	return ubifs_compressors[compr_type]->name; +} + +/** + * ubifs_wbuf_sync - synchronize write-buffer. + * @wbuf: write-buffer to synchronize + * + * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume + * that the write-buffer is already locked. + */ +static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) +{ +	int err; + +	mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); +	err = ubifs_wbuf_sync_nolock(wbuf); +	mutex_unlock(&wbuf->io_mutex); +	return err; +} + +/** + * ubifs_leb_unmap - unmap an LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to unmap + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +{ +	int err; + +	if (c->ro_media) +		return -EROFS; +	err = ubi_leb_unmap(c->ubi, lnum); +	if (err) { +		ubifs_err("unmap LEB %d failed, error %d", lnum, err); +		return err; +	} + +	return 0; +} + +/** + * ubifs_leb_write - write to a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @offs: offset within LEB to write to + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, +				  const void *buf, int offs, int len, int dtype) +{ +	int err; + +	if (c->ro_media) +		return -EROFS; +	err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); +	if (err) { +		ubifs_err("writing %d bytes at %d:%d, error %d", +			  len, lnum, offs, err); +		return err; +	} + +	return 0; +} + +/** + * ubifs_leb_change - atomic LEB change. + * @c: UBIFS file-system description object + * @lnum: LEB number to write + * @buf: buffer to write from + * @len: length to write + * @dtype: data type + * + * This function returns %0 on success and a negative error code on failure. + */ +static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, +				   const void *buf, int len, int dtype) +{ +	int err; + +	if (c->ro_media) +		return -EROFS; +	err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); +	if (err) { +		ubifs_err("changing %d bytes in LEB %d, error %d", +			  len, lnum, err); +		return err; +	} + +	return 0; +} + +/** + * ubifs_add_dirt - add dirty space to LEB properties. + * @c: the UBIFS file-system description object + * @lnum: LEB to add dirty space for + * @dirty: dirty space to add + * + * This is a helper function which increased amount of dirty LEB space. Returns + * zero in case of success and a negative error code in case of failure. + */ +static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty) +{ +	return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0); +} + +/** + * ubifs_return_leb - return LEB to lprops. + * @c: the UBIFS file-system description object + * @lnum: LEB to return + * + * This helper function cleans the "taken" flag of a logical eraseblock in the + * lprops. Returns zero in case of success and a negative error code in case of + * failure. + */ +static inline int ubifs_return_leb(struct ubifs_info *c, int lnum) +{ +	return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, +				   LPROPS_TAKEN, 0); +} + +/** + * ubifs_idx_node_sz - return index node size. + * @c: the UBIFS file-system description object + * @child_cnt: number of children of this index node + */ +static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt) +{ +	return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt; +} + +/** + * ubifs_idx_branch - return pointer to an index branch. + * @c: the UBIFS file-system description object + * @idx: index node + * @bnum: branch number + */ +static inline +struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c, +				      const struct ubifs_idx_node *idx, +				      int bnum) +{ +	return (struct ubifs_branch *)((void *)idx->branches + +				       (UBIFS_BRANCH_SZ + c->key_len) * bnum); +} + +/** + * ubifs_idx_key - return pointer to an index key. + * @c: the UBIFS file-system description object + * @idx: index node + */ +static inline void *ubifs_idx_key(const struct ubifs_info *c, +				  const struct ubifs_idx_node *idx) +{ +	return (void *)((struct ubifs_branch *)idx->branches)->key; +} + +/** + * ubifs_tnc_lookup - look up a file-system node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. + */ +static inline int ubifs_tnc_lookup(struct ubifs_info *c, +				   const union ubifs_key *key, void *node) +{ +	return ubifs_tnc_locate(c, key, node, NULL, NULL); +} + +/** + * ubifs_get_lprops - get reference to LEB properties. + * @c: the UBIFS file-system description object + * + * This function locks lprops. Lprops have to be unlocked by + * 'ubifs_release_lprops()'. + */ +static inline void ubifs_get_lprops(struct ubifs_info *c) +{ +	mutex_lock(&c->lp_mutex); +} + +/** + * ubifs_release_lprops - release lprops lock. + * @c: the UBIFS file-system description object + * + * This function has to be called after each 'ubifs_get_lprops()' call to + * unlock lprops. + */ +static inline void ubifs_release_lprops(struct ubifs_info *c) +{ +	ubifs_assert(mutex_is_locked(&c->lp_mutex)); +	ubifs_assert(c->lst.empty_lebs >= 0 && +		     c->lst.empty_lebs <= c->main_lebs); +	mutex_unlock(&c->lp_mutex); +} + +#endif /* __UBIFS_MISC_H__ */ diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c new file mode 100644 index 000000000..d091031b8 --- /dev/null +++ b/fs/ubifs/orphan.c @@ -0,0 +1,316 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Author: Adrian Hunter + */ + +#include "ubifs.h" + +/* + * An orphan is an inode number whose inode node has been committed to the index + * with a link count of zero. That happens when an open file is deleted + * (unlinked) and then a commit is run. In the normal course of events the inode + * would be deleted when the file is closed. However in the case of an unclean + * unmount, orphans need to be accounted for. After an unclean unmount, the + * orphans' inodes must be deleted which means either scanning the entire index + * looking for them, or keeping a list on flash somewhere. This unit implements + * the latter approach. + * + * The orphan area is a fixed number of LEBs situated between the LPT area and + * the main area. The number of orphan area LEBs is specified when the file + * system is created. The minimum number is 1. The size of the orphan area + * should be so that it can hold the maximum number of orphans that are expected + * to ever exist at one time. + * + * The number of orphans that can fit in a LEB is: + * + *         (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64) + * + * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough. + * + * Orphans are accumulated in a rb-tree. When an inode's link count drops to + * zero, the inode number is added to the rb-tree. It is removed from the tree + * when the inode is deleted.  Any new orphans that are in the orphan tree when + * the commit is run, are written to the orphan area in 1 or more orphan nodes. + * If the orphan area is full, it is consolidated to make space.  There is + * always enough space because validation prevents the user from creating more + * than the maximum number of orphans allowed. + */ + +/** + * tot_avail_orphs - calculate total space. + * @c: UBIFS file-system description object + * + * This function returns the number of orphans that can be written in half + * the total space. That leaves half the space for adding new orphans. + */ +static int tot_avail_orphs(struct ubifs_info *c) +{ +	int avail_lebs, avail; + +	avail_lebs = c->orph_lebs; +	avail = avail_lebs * +	       ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)); +	return avail / 2; +} + +/** + * ubifs_clear_orphans - erase all LEBs used for orphans. + * @c: UBIFS file-system description object + * + * If recovery is not required, then the orphans from the previous session + * are not needed. This function locates the LEBs used to record + * orphans, and un-maps them. + */ +int ubifs_clear_orphans(struct ubifs_info *c) +{ +	int lnum, err; + +	for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { +		err = ubifs_leb_unmap(c, lnum); +		if (err) +			return err; +	} +	c->ohead_lnum = c->orph_first; +	c->ohead_offs = 0; +	return 0; +} + +/** + * insert_dead_orphan - insert an orphan. + * @c: UBIFS file-system description object + * @inum: orphan inode number + * + * This function is a helper to the 'do_kill_orphans()' function. The orphan + * must be kept until the next commit, so it is added to the rb-tree and the + * deletion list. + */ +static int insert_dead_orphan(struct ubifs_info *c, ino_t inum) +{ +	struct ubifs_orphan *orphan, *o; +	struct rb_node **p, *parent = NULL; + +	orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL); +	if (!orphan) +		return -ENOMEM; +	orphan->inum = inum; + +	p = &c->orph_tree.rb_node; +	while (*p) { +		parent = *p; +		o = rb_entry(parent, struct ubifs_orphan, rb); +		if (inum < o->inum) +			p = &(*p)->rb_left; +		else if (inum > o->inum) +			p = &(*p)->rb_right; +		else { +			/* Already added - no problem */ +			kfree(orphan); +			return 0; +		} +	} +	c->tot_orphans += 1; +	rb_link_node(&orphan->rb, parent, p); +	rb_insert_color(&orphan->rb, &c->orph_tree); +	list_add_tail(&orphan->list, &c->orph_list); +	orphan->dnext = c->orph_dnext; +	c->orph_dnext = orphan; +	dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum, +		c->new_orphans, c->tot_orphans); +	return 0; +} + +/** + * do_kill_orphans - remove orphan inodes from the index. + * @c: UBIFS file-system description object + * @sleb: scanned LEB + * @last_cmt_no: cmt_no of last orphan node read is passed and returned here + * @outofdate: whether the LEB is out of date is returned here + * @last_flagged: whether the end orphan node is encountered + * + * This function is a helper to the 'kill_orphans()' function. It goes through + * every orphan node in a LEB and for every inode number recorded, removes + * all keys for that inode from the TNC. + */ +static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, +			   unsigned long long *last_cmt_no, int *outofdate, +			   int *last_flagged) +{ +	struct ubifs_scan_node *snod; +	struct ubifs_orph_node *orph; +	unsigned long long cmt_no; +	ino_t inum; +	int i, n, err, first = 1; + +	list_for_each_entry(snod, &sleb->nodes, list) { +		if (snod->type != UBIFS_ORPH_NODE) { +			ubifs_err("invalid node type %d in orphan area at " +				  "%d:%d", snod->type, sleb->lnum, snod->offs); +			dbg_dump_node(c, snod->node); +			return -EINVAL; +		} + +		orph = snod->node; + +		/* Check commit number */ +		cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX; +		/* +		 * The commit number on the master node may be less, because +		 * of a failed commit. If there are several failed commits in a +		 * row, the commit number written on orphan nodes will continue +		 * to increase (because the commit number is adjusted here) even +		 * though the commit number on the master node stays the same +		 * because the master node has not been re-written. +		 */ +		if (cmt_no > c->cmt_no) +			c->cmt_no = cmt_no; +		if (cmt_no < *last_cmt_no && *last_flagged) { +			/* +			 * The last orphan node had a higher commit number and +			 * was flagged as the last written for that commit +			 * number. That makes this orphan node, out of date. +			 */ +			if (!first) { +				ubifs_err("out of order commit number %llu in " +					  "orphan node at %d:%d", +					  cmt_no, sleb->lnum, snod->offs); +				dbg_dump_node(c, snod->node); +				return -EINVAL; +			} +			dbg_rcvry("out of date LEB %d", sleb->lnum); +			*outofdate = 1; +			return 0; +		} + +		if (first) +			first = 0; + +		n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; +		for (i = 0; i < n; i++) { +			inum = le64_to_cpu(orph->inos[i]); +			dbg_rcvry("deleting orphaned inode %lu", +				  (unsigned long)inum); +			err = ubifs_tnc_remove_ino(c, inum); +			if (err) +				return err; +			err = insert_dead_orphan(c, inum); +			if (err) +				return err; +		} + +		*last_cmt_no = cmt_no; +		if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) { +			dbg_rcvry("last orph node for commit %llu at %d:%d", +				  cmt_no, sleb->lnum, snod->offs); +			*last_flagged = 1; +		} else +			*last_flagged = 0; +	} + +	return 0; +} + +/** + * kill_orphans - remove all orphan inodes from the index. + * @c: UBIFS file-system description object + * + * If recovery is required, then orphan inodes recorded during the previous + * session (which ended with an unclean unmount) must be deleted from the index. + * This is done by updating the TNC, but since the index is not updated until + * the next commit, the LEBs where the orphan information is recorded are not + * erased until the next commit. + */ +static int kill_orphans(struct ubifs_info *c) +{ +	unsigned long long last_cmt_no = 0; +	int lnum, err = 0, outofdate = 0, last_flagged = 0; + +	c->ohead_lnum = c->orph_first; +	c->ohead_offs = 0; +	/* Check no-orphans flag and skip this if no orphans */ +	if (c->no_orphs) { +		dbg_rcvry("no orphans"); +		return 0; +	} +	/* +	 * Orph nodes always start at c->orph_first and are written to each +	 * successive LEB in turn. Generally unused LEBs will have been unmapped +	 * but may contain out of date orphan nodes if the unmap didn't go +	 * through. In addition, the last orphan node written for each commit is +	 * marked (top bit of orph->cmt_no is set to 1). It is possible that +	 * there are orphan nodes from the next commit (i.e. the commit did not +	 * complete successfully). In that case, no orphans will have been lost +	 * due to the way that orphans are written, and any orphans added will +	 * be valid orphans anyway and so can be deleted. +	 */ +	for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { +		struct ubifs_scan_leb *sleb; + +		dbg_rcvry("LEB %d", lnum); +		sleb = ubifs_scan(c, lnum, 0, c->sbuf); +		if (IS_ERR(sleb)) { +			sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); +			if (IS_ERR(sleb)) { +				err = PTR_ERR(sleb); +				break; +			} +		} +		err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate, +				      &last_flagged); +		if (err || outofdate) { +			ubifs_scan_destroy(sleb); +			break; +		} +		if (sleb->endpt) { +			c->ohead_lnum = lnum; +			c->ohead_offs = sleb->endpt; +		} +		ubifs_scan_destroy(sleb); +	} +	return err; +} + +/** + * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them. + * @c: UBIFS file-system description object + * @unclean: indicates recovery from unclean unmount + * @read_only: indicates read only mount + * + * This function is called when mounting to erase orphans from the previous + * session. If UBIFS was not unmounted cleanly, then the inodes recorded as + * orphans are deleted. + */ +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only) +{ +	int err = 0; + +	c->max_orphans = tot_avail_orphs(c); + +	if (!read_only) { +		c->orph_buf = vmalloc(c->leb_size); +		if (!c->orph_buf) +			return -ENOMEM; +	} + +	if (unclean) +		err = kill_orphans(c); +	else if (!read_only) +		err = ubifs_clear_orphans(c); + +	return err; +} diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c new file mode 100644 index 000000000..fe3b36443 --- /dev/null +++ b/fs/ubifs/recovery.c @@ -0,0 +1,1249 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements functions needed to recover from unclean un-mounts. + * When UBIFS is mounted, it checks a flag on the master node to determine if + * an un-mount was completed sucessfully. If not, the process of mounting + * incorparates additional checking and fixing of on-flash data structures. + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. + */ + +#include "ubifs.h" + +/** + * is_empty - determine whether a buffer is empty (contains all 0xff). + * @buf: buffer to clean + * @len: length of buffer + * + * This function returns %1 if the buffer is empty (contains all 0xff) otherwise + * %0 is returned. + */ +static int is_empty(void *buf, int len) +{ +	uint8_t *p = buf; +	int i; + +	for (i = 0; i < len; i++) +		if (*p++ != 0xff) +			return 0; +	return 1; +} + +/** + * get_master_node - get the last valid master node allowing for corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @pbuf: buffer containing the LEB read, is returned here + * @mst: master node, if found, is returned here + * @cor: corruption, if found, is returned here + * + * This function allocates a buffer, reads the LEB into it, and finds and + * returns the last valid master node allowing for one area of corruption. + * The corrupt area, if there is one, must be consistent with the assumption + * that it is the result of an unclean unmount while the master node was being + * written. Under those circumstances, it is valid to use the previously written + * master node. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, +			   struct ubifs_mst_node **mst, void **cor) +{ +	const int sz = c->mst_node_alsz; +	int err, offs, len; +	void *sbuf, *buf; + +	sbuf = vmalloc(c->leb_size); +	if (!sbuf) +		return -ENOMEM; + +	err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); +	if (err && err != -EBADMSG) +		goto out_free; + +	/* Find the first position that is definitely not a node */ +	offs = 0; +	buf = sbuf; +	len = c->leb_size; +	while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) { +		struct ubifs_ch *ch = buf; + +		if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) +			break; +		offs += sz; +		buf  += sz; +		len  -= sz; +	} +	/* See if there was a valid master node before that */ +	if (offs) { +		int ret; + +		offs -= sz; +		buf  -= sz; +		len  += sz; +		ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); +		if (ret != SCANNED_A_NODE && offs) { +			/* Could have been corruption so check one place back */ +			offs -= sz; +			buf  -= sz; +			len  += sz; +			ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); +			if (ret != SCANNED_A_NODE) +				/* +				 * We accept only one area of corruption because +				 * we are assuming that it was caused while +				 * trying to write a master node. +				 */ +				goto out_err; +		} +		if (ret == SCANNED_A_NODE) { +			struct ubifs_ch *ch = buf; + +			if (ch->node_type != UBIFS_MST_NODE) +				goto out_err; +			dbg_rcvry("found a master node at %d:%d", lnum, offs); +			*mst = buf; +			offs += sz; +			buf  += sz; +			len  -= sz; +		} +	} +	/* Check for corruption */ +	if (offs < c->leb_size) { +		if (!is_empty(buf, min_t(int, len, sz))) { +			*cor = buf; +			dbg_rcvry("found corruption at %d:%d", lnum, offs); +		} +		offs += sz; +		buf  += sz; +		len  -= sz; +	} +	/* Check remaining empty space */ +	if (offs < c->leb_size) +		if (!is_empty(buf, len)) +			goto out_err; +	*pbuf = sbuf; +	return 0; + +out_err: +	err = -EINVAL; +out_free: +	vfree(sbuf); +	*mst = NULL; +	*cor = NULL; +	return err; +} + +/** + * write_rcvrd_mst_node - write recovered master node. + * @c: UBIFS file-system description object + * @mst: master node + * + * This function returns %0 on success and a negative error code on failure. + */ +static int write_rcvrd_mst_node(struct ubifs_info *c, +				struct ubifs_mst_node *mst) +{ +	int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; +	__le32 save_flags; + +	dbg_rcvry("recovery"); + +	save_flags = mst->flags; +	mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); + +	ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); +	err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); +	if (err) +		goto out; +	err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); +	if (err) +		goto out; +out: +	mst->flags = save_flags; +	return err; +} + +/** + * ubifs_recover_master_node - recover the master node. + * @c: UBIFS file-system description object + * + * This function recovers the master node from corruption that may occur due to + * an unclean unmount. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_master_node(struct ubifs_info *c) +{ +	void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL; +	struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst; +	const int sz = c->mst_node_alsz; +	int err, offs1, offs2; + +	dbg_rcvry("recovery"); + +	err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1); +	if (err) +		goto out_free; + +	err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2); +	if (err) +		goto out_free; + +	if (mst1) { +		offs1 = (void *)mst1 - buf1; +		if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) && +		    (offs1 == 0 && !cor1)) { +			/* +			 * mst1 was written by recovery at offset 0 with no +			 * corruption. +			 */ +			dbg_rcvry("recovery recovery"); +			mst = mst1; +		} else if (mst2) { +			offs2 = (void *)mst2 - buf2; +			if (offs1 == offs2) { +				/* Same offset, so must be the same */ +				if (memcmp((void *)mst1 + UBIFS_CH_SZ, +					   (void *)mst2 + UBIFS_CH_SZ, +					   UBIFS_MST_NODE_SZ - UBIFS_CH_SZ)) +					goto out_err; +				mst = mst1; +			} else if (offs2 + sz == offs1) { +				/* 1st LEB was written, 2nd was not */ +				if (cor1) +					goto out_err; +				mst = mst1; +			} else if (offs1 == 0 && offs2 + sz >= c->leb_size) { +				/* 1st LEB was unmapped and written, 2nd not */ +				if (cor1) +					goto out_err; +				mst = mst1; +			} else +				goto out_err; +		} else { +			/* +			 * 2nd LEB was unmapped and about to be written, so +			 * there must be only one master node in the first LEB +			 * and no corruption. +			 */ +			if (offs1 != 0 || cor1) +				goto out_err; +			mst = mst1; +		} +	} else { +		if (!mst2) +			goto out_err; +		/* +		 * 1st LEB was unmapped and about to be written, so there must +		 * be no room left in 2nd LEB. +		 */ +		offs2 = (void *)mst2 - buf2; +		if (offs2 + sz + sz <= c->leb_size) +			goto out_err; +		mst = mst2; +	} + +	dbg_rcvry("recovered master node from LEB %d", +		  (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); + +	memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); + +	if ((c->vfs_sb->s_flags & MS_RDONLY)) { +		/* Read-only mode. Keep a copy for switching to rw mode */ +		c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); +		if (!c->rcvrd_mst_node) { +			err = -ENOMEM; +			goto out_free; +		} +		memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); +	} + +	vfree(buf2); +	vfree(buf1); + +	return 0; + +out_err: +	err = -EINVAL; +out_free: +	ubifs_err("failed to recover master node"); +	if (mst1) { +		dbg_err("dumping first master node"); +		dbg_dump_node(c, mst1); +	} +	if (mst2) { +		dbg_err("dumping second master node"); +		dbg_dump_node(c, mst2); +	} +	vfree(buf2); +	vfree(buf1); +	return err; +} + +/** + * ubifs_write_rcvrd_mst_node - write the recovered master node. + * @c: UBIFS file-system description object + * + * This function writes the master node that was recovered during mounting in + * read-only mode and must now be written because we are remounting rw. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) +{ +	int err; + +	if (!c->rcvrd_mst_node) +		return 0; +	c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); +	c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); +	err = write_rcvrd_mst_node(c, c->rcvrd_mst_node); +	if (err) +		return err; +	kfree(c->rcvrd_mst_node); +	c->rcvrd_mst_node = NULL; +	return 0; +} + +/** + * is_last_write - determine if an offset was in the last write to a LEB. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data + * is in @buf, otherwise %0 is returned.  The determination is made by checking + * for subsequent empty space starting from the next min_io_size boundary (or a + * bit less than the common header size if min_io_size is one). + */ +static int is_last_write(const struct ubifs_info *c, void *buf, int offs) +{ +	int empty_offs; +	int check_len; +	uint8_t *p; + +	if (c->min_io_size == 1) { +		check_len = c->leb_size - offs; +		p = buf + check_len; +		for (; check_len > 0; check_len--) +			if (*--p != 0xff) +				break; +		/* +		 * 'check_len' is the size of the corruption which cannot be +		 * more than the size of 1 node if it was caused by an unclean +		 * unmount. +		 */ +		if (check_len > UBIFS_MAX_NODE_SZ) +			return 0; +		return 1; +	} + +	/* +	 * Round up to the next c->min_io_size boundary i.e. 'offs' is in the +	 * last wbuf written. After that should be empty space. +	 */ +	empty_offs = ALIGN(offs + 1, c->min_io_size); +	check_len = c->leb_size - empty_offs; +	p = buf + empty_offs - offs; + +	for (; check_len > 0; check_len--) +		if (*p++ != 0xff) +			return 0; +	return 1; +} + +/** + * clean_buf - clean the data from an LEB sitting in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to clean + * @lnum: LEB number to clean + * @offs: offset from which to clean + * @len: length of buffer + * + * This function pads up to the next min_io_size boundary (if there is one) and + * sets empty space to all 0xff. @buf, @offs and @len are updated to the next + * min_io_size boundary (if there is one). + */ +static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, +		      int *offs, int *len) +{ +	int empty_offs, pad_len; + +	lnum = lnum; +	dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); + +	if (c->min_io_size == 1) { +		memset(*buf, 0xff, c->leb_size - *offs); +		return; +	} + +	ubifs_assert(!(*offs & 7)); +	empty_offs = ALIGN(*offs, c->min_io_size); +	pad_len = empty_offs - *offs; +	ubifs_pad(c, *buf, pad_len); +	*offs += pad_len; +	*buf += pad_len; +	*len -= pad_len; +	memset(*buf, 0xff, c->leb_size - empty_offs); +} + +/** + * no_more_nodes - determine if there are no more nodes in a buffer. + * @c: UBIFS file-system description object + * @buf: buffer to check + * @len: length of buffer + * @lnum: LEB number of the LEB from which @buf was read + * @offs: offset from which @buf was read + * + * This function scans @buf for more nodes and returns %0 is a node is found and + * %1 if no more nodes are found. + */ +static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, +			int lnum, int offs) +{ +	int skip, next_offs = 0; + +	if (len > UBIFS_DATA_NODE_SZ) { +		struct ubifs_ch *ch = buf; +		int dlen = le32_to_cpu(ch->len); + +		if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ && +		    dlen <= UBIFS_MAX_DATA_NODE_SZ) +			/* The corrupt node looks like a data node */ +			next_offs = ALIGN(offs + dlen, 8); +	} + +	if (c->min_io_size == 1) +		skip = 8; +	else +		skip = ALIGN(offs + 1, c->min_io_size) - offs; + +	offs += skip; +	buf += skip; +	len -= skip; +	while (len > 8) { +		struct ubifs_ch *ch = buf; +		uint32_t magic = le32_to_cpu(ch->magic); +		int ret; + +		if (magic == UBIFS_NODE_MAGIC) { +			ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); +			if (ret == SCANNED_A_NODE || ret > 0) { +				/* +				 * There is a small chance this is just data in +				 * a data node, so check that possibility. e.g. +				 * this is part of a file that itself contains +				 * a UBIFS image. +				 */ +				if (next_offs && offs + le32_to_cpu(ch->len) <= +				    next_offs) +					continue; +				dbg_rcvry("unexpected node at %d:%d", lnum, +					  offs); +				return 0; +			} +		} +		offs += 8; +		buf += 8; +		len -= 8; +	} +	return 1; +} + +/** + * fix_unclean_leb - fix an unclean LEB. + * @c: UBIFS file-system description object + * @sleb: scanned LEB information + * @start: offset where scan started + */ +static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, +			   int start) +{ +	int lnum = sleb->lnum, endpt = start; + +	/* Get the end offset of the last node we are keeping */ +	if (!list_empty(&sleb->nodes)) { +		struct ubifs_scan_node *snod; + +		snod = list_entry(sleb->nodes.prev, +				  struct ubifs_scan_node, list); +		endpt = snod->offs + snod->len; +	} + +	if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { +		/* Add to recovery list */ +		struct ubifs_unclean_leb *ucleb; + +		dbg_rcvry("need to fix LEB %d start %d endpt %d", +			  lnum, start, sleb->endpt); +		ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS); +		if (!ucleb) +			return -ENOMEM; +		ucleb->lnum = lnum; +		ucleb->endpt = endpt; +		list_add_tail(&ucleb->list, &c->unclean_leb_list); +	} +	return 0; +} + +/** + * drop_incomplete_group - drop nodes from an incomplete group. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * + * This function returns %1 if nodes are dropped and %0 otherwise. + */ +static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +{ +	int dropped = 0; + +	while (!list_empty(&sleb->nodes)) { +		struct ubifs_scan_node *snod; +		struct ubifs_ch *ch; + +		snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, +				  list); +		ch = snod->node; +		if (ch->group_type != UBIFS_IN_NODE_GROUP) +			return dropped; +		dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); +		*offs = snod->offs; +		list_del(&snod->list); +		kfree(snod); +		sleb->nodes_cnt -= 1; +		dropped = 1; +	} +	return dropped; +} + +/** + * ubifs_recover_leb - scan and recover a LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * @grouped: nodes may be grouped for recovery + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +					 int offs, void *sbuf, int grouped) +{ +	int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; +	int empty_chkd = 0, start = offs; +	struct ubifs_scan_leb *sleb; +	void *buf = sbuf + offs; + +	dbg_rcvry("%d:%d", lnum, offs); + +	sleb = ubifs_start_scan(c, lnum, offs, sbuf); +	if (IS_ERR(sleb)) +		return sleb; + +	if (sleb->ecc) +		need_clean = 1; + +	while (len >= 8) { +		int ret; + +		dbg_scan("look at LEB %d:%d (%d bytes left)", +			 lnum, offs, len); + +		cond_resched(); + +		/* +		 * Scan quietly until there is an error from which we cannot +		 * recover +		 */ +		ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + +		if (ret == SCANNED_A_NODE) { +			/* A valid node, and not a padding node */ +			struct ubifs_ch *ch = buf; +			int node_len; + +			err = ubifs_add_snod(c, sleb, buf, offs); +			if (err) +				goto error; +			node_len = ALIGN(le32_to_cpu(ch->len), 8); +			offs += node_len; +			buf += node_len; +			len -= node_len; +			continue; +		} + +		if (ret > 0) { +			/* Padding bytes or a valid padding node */ +			offs += ret; +			buf += ret; +			len -= ret; +			continue; +		} + +		if (ret == SCANNED_EMPTY_SPACE) { +			if (!is_empty(buf, len)) { +				if (!is_last_write(c, buf, offs)) +					break; +				clean_buf(c, &buf, lnum, &offs, &len); +				need_clean = 1; +			} +			empty_chkd = 1; +			break; +		} + +		if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) +			if (is_last_write(c, buf, offs)) { +				clean_buf(c, &buf, lnum, &offs, &len); +				need_clean = 1; +				empty_chkd = 1; +				break; +			} + +		if (ret == SCANNED_A_CORRUPT_NODE) +			if (no_more_nodes(c, buf, len, lnum, offs)) { +				clean_buf(c, &buf, lnum, &offs, &len); +				need_clean = 1; +				empty_chkd = 1; +				break; +			} + +		if (quiet) { +			/* Redo the last scan but noisily */ +			quiet = 0; +			continue; +		} + +		switch (ret) { +		case SCANNED_GARBAGE: +			dbg_err("garbage"); +			goto corrupted; +		case SCANNED_A_CORRUPT_NODE: +		case SCANNED_A_BAD_PAD_NODE: +			dbg_err("bad node"); +			goto corrupted; +		default: +			dbg_err("unknown"); +			goto corrupted; +		} +	} + +	if (!empty_chkd && !is_empty(buf, len)) { +		if (is_last_write(c, buf, offs)) { +			clean_buf(c, &buf, lnum, &offs, &len); +			need_clean = 1; +		} else { +			ubifs_err("corrupt empty space at LEB %d:%d", +				  lnum, offs); +			goto corrupted; +		} +	} + +	/* Drop nodes from incomplete group */ +	if (grouped && drop_incomplete_group(sleb, &offs)) { +		buf = sbuf + offs; +		len = c->leb_size - offs; +		clean_buf(c, &buf, lnum, &offs, &len); +		need_clean = 1; +	} + +	if (offs % c->min_io_size) { +		clean_buf(c, &buf, lnum, &offs, &len); +		need_clean = 1; +	} + +	ubifs_end_scan(c, sleb, lnum, offs); + +	if (need_clean) { +		err = fix_unclean_leb(c, sleb, start); +		if (err) +			goto error; +	} + +	return sleb; + +corrupted: +	ubifs_scanned_corruption(c, lnum, offs, buf); +	err = -EUCLEAN; +error: +	ubifs_err("LEB %d scanning failed", lnum); +	ubifs_scan_destroy(sleb); +	return ERR_PTR(err); +} + +/** + * get_cs_sqnum - get commit start sequence number. + * @c: UBIFS file-system description object + * @lnum: LEB number of commit start node + * @offs: offset of commit start node + * @cs_sqnum: commit start sequence number is returned here + * + * This function returns %0 on success and a negative error code on failure. + */ +static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, +			unsigned long long *cs_sqnum) +{ +	struct ubifs_cs_node *cs_node = NULL; +	int err, ret; + +	dbg_rcvry("at %d:%d", lnum, offs); +	cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL); +	if (!cs_node) +		return -ENOMEM; +	if (c->leb_size - offs < UBIFS_CS_NODE_SZ) +		goto out_err; +	err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); +	if (err && err != -EBADMSG) +		goto out_free; +	ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); +	if (ret != SCANNED_A_NODE) { +		dbg_err("Not a valid node"); +		goto out_err; +	} +	if (cs_node->ch.node_type != UBIFS_CS_NODE) { +		dbg_err("Node a CS node, type is %d", cs_node->ch.node_type); +		goto out_err; +	} +	if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) { +		dbg_err("CS node cmt_no %llu != current cmt_no %llu", +			(unsigned long long)le64_to_cpu(cs_node->cmt_no), +			c->cmt_no); +		goto out_err; +	} +	*cs_sqnum = le64_to_cpu(cs_node->ch.sqnum); +	dbg_rcvry("commit start sqnum %llu", *cs_sqnum); +	kfree(cs_node); +	return 0; + +out_err: +	err = -EINVAL; +out_free: +	ubifs_err("failed to get CS sqnum"); +	kfree(cs_node); +	return err; +} + +/** + * ubifs_recover_log_leb - scan and recover a log LEB. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, +					     int offs, void *sbuf) +{ +	struct ubifs_scan_leb *sleb; +	int next_lnum; + +	dbg_rcvry("LEB %d", lnum); +	next_lnum = lnum + 1; +	if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs) +		next_lnum = UBIFS_LOG_LNUM; +	if (next_lnum != c->ltail_lnum) { +		/* +		 * We can only recover at the end of the log, so check that the +		 * next log LEB is empty or out of date. +		 */ +		sleb = ubifs_scan(c, next_lnum, 0, sbuf); +		if (IS_ERR(sleb)) +			return sleb; +		if (sleb->nodes_cnt) { +			struct ubifs_scan_node *snod; +			unsigned long long cs_sqnum = c->cs_sqnum; + +			snod = list_entry(sleb->nodes.next, +					  struct ubifs_scan_node, list); +			if (cs_sqnum == 0) { +				int err; + +				err = get_cs_sqnum(c, lnum, offs, &cs_sqnum); +				if (err) { +					ubifs_scan_destroy(sleb); +					return ERR_PTR(err); +				} +			} +			if (snod->sqnum > cs_sqnum) { +				ubifs_err("unrecoverable log corruption " +					  "in LEB %d", lnum); +				ubifs_scan_destroy(sleb); +				return ERR_PTR(-EUCLEAN); +			} +		} +		ubifs_scan_destroy(sleb); +	} +	return ubifs_recover_leb(c, lnum, offs, sbuf, 0); +} + +/** + * recover_head - recover a head. + * @c: UBIFS file-system description object + * @lnum: LEB number of head to recover + * @offs: offset of head to recover + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at a head location. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int recover_head(const struct ubifs_info *c, int lnum, int offs, +			void *sbuf) +{ +	int len, err, need_clean = 0; + +	if (c->min_io_size > 1) +		len = c->min_io_size; +	else +		len = 512; +	if (offs + len > c->leb_size) +		len = c->leb_size - offs; + +	if (!len) +		return 0; + +	/* Read at the head location and check it is empty flash */ +	err = ubi_read(c->ubi, lnum, sbuf, offs, len); +	if (err) +		need_clean = 1; +	else { +		uint8_t *p = sbuf; + +		while (len--) +			if (*p++ != 0xff) { +				need_clean = 1; +				break; +			} +	} + +	if (need_clean) { +		dbg_rcvry("cleaning head at %d:%d", lnum, offs); +		if (offs == 0) +			return ubifs_leb_unmap(c, lnum); +		err = ubi_read(c->ubi, lnum, sbuf, 0, offs); +		if (err) +			return err; +		return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); +	} + +	return 0; +} + +/** + * ubifs_recover_inl_heads - recover index and LPT heads. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function ensures that there is no data on the flash at the index and + * LPT head locations. + * + * This deals with the recovery of a half-completed journal commit. UBIFS is + * careful never to overwrite the last version of the index or the LPT. Because + * the index and LPT are wandering trees, data from a half-completed commit will + * not be referenced anywhere in UBIFS. The data will be either in LEBs that are + * assumed to be empty and will be unmapped anyway before use, or in the index + * and LPT heads. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) +{ +	int err; + +	ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + +	dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); +	err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); +	if (err) +		return err; + +	dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs); +	err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf); +	if (err) +		return err; + +	return 0; +} + +/** + *  clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use + * + * This function reads a LEB up to a point pre-determined by the mount recovery, + * checks the nodes, and writes the result back to the flash, thereby cleaning + * off any following corruption, or non-fatal ECC errors. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int clean_an_unclean_leb(const struct ubifs_info *c, +				struct ubifs_unclean_leb *ucleb, void *sbuf) +{ +	int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; +	void *buf = sbuf; + +	dbg_rcvry("LEB %d len %d", lnum, len); + +	if (len == 0) { +		/* Nothing to read, just unmap it */ +		err = ubifs_leb_unmap(c, lnum); +		if (err) +			return err; +		return 0; +	} + +	err = ubi_read(c->ubi, lnum, buf, offs, len); +	if (err && err != -EBADMSG) +		return err; + +	while (len >= 8) { +		int ret; + +		cond_resched(); + +		/* Scan quietly until there is an error */ +		ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + +		if (ret == SCANNED_A_NODE) { +			/* A valid node, and not a padding node */ +			struct ubifs_ch *ch = buf; +			int node_len; + +			node_len = ALIGN(le32_to_cpu(ch->len), 8); +			offs += node_len; +			buf += node_len; +			len -= node_len; +			continue; +		} + +		if (ret > 0) { +			/* Padding bytes or a valid padding node */ +			offs += ret; +			buf += ret; +			len -= ret; +			continue; +		} + +		if (ret == SCANNED_EMPTY_SPACE) { +			ubifs_err("unexpected empty space at %d:%d", +				  lnum, offs); +			return -EUCLEAN; +		} + +		if (quiet) { +			/* Redo the last scan but noisily */ +			quiet = 0; +			continue; +		} + +		ubifs_scanned_corruption(c, lnum, offs, buf); +		return -EUCLEAN; +	} + +	/* Pad to min_io_size */ +	len = ALIGN(ucleb->endpt, c->min_io_size); +	if (len > ucleb->endpt) { +		int pad_len = len - ALIGN(ucleb->endpt, 8); + +		if (pad_len > 0) { +			buf = c->sbuf + len - pad_len; +			ubifs_pad(c, buf, pad_len); +		} +	} + +	/* Write back the LEB atomically */ +	err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); +	if (err) +		return err; + +	dbg_rcvry("cleaned LEB %d", lnum); + +	return 0; +} + +/** + * ubifs_clean_lebs - clean LEBs recovered during read-only mount. + * @c: UBIFS file-system description object + * @sbuf: LEB-sized buffer to use + * + * This function cleans a LEB identified during recovery that needs to be + * written but was not because UBIFS was mounted read-only. This happens when + * remounting to read-write mode. + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) +{ +	dbg_rcvry("recovery"); +	while (!list_empty(&c->unclean_leb_list)) { +		struct ubifs_unclean_leb *ucleb; +		int err; + +		ucleb = list_entry(c->unclean_leb_list.next, +				   struct ubifs_unclean_leb, list); +		err = clean_an_unclean_leb(c, ucleb, sbuf); +		if (err) +			return err; +		list_del(&ucleb->list); +		kfree(ucleb); +	} +	return 0; +} + +/** + * struct size_entry - inode size information for recovery. + * @rb: link in the RB-tree of sizes + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + * @inode: inode if pinned in memory awaiting rw mode to fix it + */ +struct size_entry { +	struct rb_node rb; +	ino_t inum; +	loff_t i_size; +	loff_t d_size; +	int exists; +	struct inode *inode; +}; + +/** + * add_ino - add an entry to the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + * @i_size: size on inode + * @d_size: maximum size based on data nodes + * @exists: indicates whether the inode exists + */ +static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size, +		   loff_t d_size, int exists) +{ +	struct rb_node **p = &c->size_tree.rb_node, *parent = NULL; +	struct size_entry *e; + +	while (*p) { +		parent = *p; +		e = rb_entry(parent, struct size_entry, rb); +		if (inum < e->inum) +			p = &(*p)->rb_left; +		else +			p = &(*p)->rb_right; +	} + +	e = kzalloc(sizeof(struct size_entry), GFP_KERNEL); +	if (!e) +		return -ENOMEM; + +	e->inum = inum; +	e->i_size = i_size; +	e->d_size = d_size; +	e->exists = exists; + +	rb_link_node(&e->rb, parent, p); +	rb_insert_color(&e->rb, &c->size_tree); + +	return 0; +} + +/** + * find_ino - find an entry on the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum) +{ +	struct rb_node *p = c->size_tree.rb_node; +	struct size_entry *e; + +	while (p) { +		e = rb_entry(p, struct size_entry, rb); +		if (inum < e->inum) +			p = p->rb_left; +		else if (inum > e->inum) +			p = p->rb_right; +		else +			return e; +	} +	return NULL; +} + +/** + * remove_ino - remove an entry from the size tree. + * @c: UBIFS file-system description object + * @inum: inode number + */ +static void remove_ino(struct ubifs_info *c, ino_t inum) +{ +	struct size_entry *e = find_ino(c, inum); + +	if (!e) +		return; +	rb_erase(&e->rb, &c->size_tree); +	kfree(e); +} + +/** + * ubifs_recover_size_accum - accumulate inode sizes for recovery. + * @c: UBIFS file-system description object + * @key: node key + * @deletion: node is for a deletion + * @new_size: inode size + * + * This function has two purposes: + *     1) to ensure there are no data nodes that fall outside the inode size + *     2) to ensure there are no data nodes for inodes that do not exist + * To accomplish those purposes, a rb-tree is constructed containing an entry + * for each inode number in the journal that has not been deleted, and recording + * the size from the inode node, the maximum size of any data node (also altered + * by truncations) and a flag indicating a inode number for which no inode node + * was present in the journal. + * + * Note that there is still the possibility that there are data nodes that have + * been committed that are beyond the inode size, however the only way to find + * them would be to scan the entire index. Alternatively, some provision could + * be made to record the size of inodes at the start of commit, which would seem + * very cumbersome for a scenario that is quite unlikely and the only negative + * consequence of which is wasted space. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, +			     int deletion, loff_t new_size) +{ +	ino_t inum = key_inum(c, key); +	struct size_entry *e; +	int err; + +	switch (key_type(c, key)) { +	case UBIFS_INO_KEY: +		if (deletion) +			remove_ino(c, inum); +		else { +			e = find_ino(c, inum); +			if (e) { +				e->i_size = new_size; +				e->exists = 1; +			} else { +				err = add_ino(c, inum, new_size, 0, 1); +				if (err) +					return err; +			} +		} +		break; +	case UBIFS_DATA_KEY: +		e = find_ino(c, inum); +		if (e) { +			if (new_size > e->d_size) +				e->d_size = new_size; +		} else { +			err = add_ino(c, inum, 0, new_size, 0); +			if (err) +				return err; +		} +		break; +	case UBIFS_TRUN_KEY: +		e = find_ino(c, inum); +		if (e) +			e->d_size = new_size; +		break; +	} +	return 0; +} + +/** + * ubifs_recover_size - recover inode size. + * @c: UBIFS file-system description object + * + * This function attempts to fix inode size discrepancies identified by the + * 'ubifs_recover_size_accum()' function. + * + * This functions returns %0 on success and a negative error code on failure. + */ +int ubifs_recover_size(struct ubifs_info *c) +{ +	struct rb_node *this = rb_first(&c->size_tree); + +	while (this) { +		struct size_entry *e; +		int err; + +		e = rb_entry(this, struct size_entry, rb); +		if (!e->exists) { +			union ubifs_key key; + +			ino_key_init(c, &key, e->inum); +			err = ubifs_tnc_lookup(c, &key, c->sbuf); +			if (err && err != -ENOENT) +				return err; +			if (err == -ENOENT) { +				/* Remove data nodes that have no inode */ +				dbg_rcvry("removing ino %lu", +					  (unsigned long)e->inum); +				err = ubifs_tnc_remove_ino(c, e->inum); +				if (err) +					return err; +			} else { +				struct ubifs_ino_node *ino = c->sbuf; + +				e->exists = 1; +				e->i_size = le64_to_cpu(ino->size); +			} +		} +		if (e->exists && e->i_size < e->d_size) { +			if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { +				/* Fix the inode size and pin it in memory */ +				struct inode *inode; + +				inode = ubifs_iget(c->vfs_sb, e->inum); +				if (IS_ERR(inode)) +					return PTR_ERR(inode); +				if (inode->i_size < e->d_size) { +					dbg_rcvry("ino %lu size %lld -> %lld", +						  (unsigned long)e->inum, +						  e->d_size, inode->i_size); +					inode->i_size = e->d_size; +					ubifs_inode(inode)->ui_size = e->d_size; +					e->inode = inode; +					this = rb_next(this); +					continue; +				} +				iput(inode); +			} +		} +		this = rb_next(this); +		rb_erase(&e->rb, &c->size_tree); +		kfree(e); +	} +	return 0; +} diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100644 index 000000000..da33a14ab --- /dev/null +++ b/fs/ubifs/replay.c @@ -0,0 +1,1070 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains journal replay code. It runs when the file-system is being + * mounted and requires no locking. + * + * The larger is the journal, the longer it takes to scan it, so the longer it + * takes to mount UBIFS. This is why the journal has limited size which may be + * changed depending on the system requirements. But a larger journal gives + * faster I/O speed because it writes the index less frequently. So this is a + * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the + * larger is the journal, the more memory its index may consume. + */ + +#include "ubifs.h" + +/* + * Replay flags. + * + * REPLAY_DELETION: node was deleted + * REPLAY_REF: node is a reference node + */ +enum { +	REPLAY_DELETION = 1, +	REPLAY_REF = 2, +}; + +/** + * struct replay_entry - replay tree entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length + * @sqnum: node sequence number + * @flags: replay flags + * @rb: links the replay tree + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size + * @free: amount of free space in a bud + * @dirty: amount of dirty space in a bud from padding and deletion nodes + * + * UBIFS journal replay must compare node sequence numbers, which means it must + * build a tree of node information to insert into the TNC. + */ +struct replay_entry { +	int lnum; +	int offs; +	int len; +	unsigned long long sqnum; +	int flags; +	struct rb_node rb; +	union ubifs_key key; +	union { +		struct qstr nm; +		struct { +			loff_t old_size; +			loff_t new_size; +		}; +		struct { +			int free; +			int dirty; +		}; +	}; +}; + +/** + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object + * @free: free bytes in the bud + * @sqnum: reference node sequence number + */ +struct bud_entry { +	struct list_head list; +	struct ubifs_bud *bud; +	int free; +	unsigned long long sqnum; +}; + +/** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object + * @r: replay entry of bud + */ +static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +{ +	const struct ubifs_lprops *lp; +	int err = 0, dirty; + +	ubifs_get_lprops(c); + +	lp = ubifs_lpt_lookup_dirty(c, r->lnum); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} + +	dirty = lp->dirty; +	if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { +		/* +		 * The LEB was added to the journal with a starting offset of +		 * zero which means the LEB must have been empty. The LEB +		 * property values should be lp->free == c->leb_size and +		 * lp->dirty == 0, but that is not the case. The reason is that +		 * the LEB was garbage collected. The garbage collector resets +		 * the free and dirty space without recording it anywhere except +		 * lprops, so if there is not a commit then lprops does not have +		 * that information next time the file system is mounted. +		 * +		 * We do not need to adjust free space because the scan has told +		 * us the exact value which is recorded in the replay entry as +		 * r->free. +		 * +		 * However we do need to subtract from the dirty space the +		 * amount of space that the garbage collector reclaimed, which +		 * is the whole LEB minus the amount of space that was free. +		 */ +		dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, +			lp->free, lp->dirty); +		dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, +			lp->free, lp->dirty); +		dirty -= c->leb_size - lp->free; +		/* +		 * If the replay order was perfect the dirty space would now be +		 * zero. The order is not perfect because the the journal heads +		 * race with each other. This is not a problem but is does mean +		 * that the dirty space may temporarily exceed c->leb_size +		 * during the replay. +		 */ +		if (dirty != 0) +			dbg_msg("LEB %d lp: %d free %d dirty " +				"replay: %d free %d dirty", r->lnum, lp->free, +				lp->dirty, r->free, r->dirty); +	} +	lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, +			     lp->flags | LPROPS_TAKEN, 0); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} +out: +	ubifs_release_lprops(c); +	return err; +} + +/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation + */ +static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) +{ +	unsigned min_blk, max_blk; +	union ubifs_key min_key, max_key; +	ino_t ino; + +	min_blk = r->new_size / UBIFS_BLOCK_SIZE; +	if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) +		min_blk += 1; + +	max_blk = r->old_size / UBIFS_BLOCK_SIZE; +	if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) +		max_blk -= 1; + +	ino = key_inum(c, &r->key); + +	data_key_init(c, &min_key, ino, min_blk); +	data_key_init(c, &max_key, ino, max_blk); + +	return ubifs_tnc_remove_range(c, &min_key, &max_key); +} + +/** + * apply_replay_entry - apply a replay entry to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry to apply + * + * Apply a replay entry to the TNC. + */ +static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) +{ +	int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + +	dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, +		r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + +	/* Set c->replay_sqnum to help deal with dangling branches. */ +	c->replay_sqnum = r->sqnum; + +	if (r->flags & REPLAY_REF) +		err = set_bud_lprops(c, r); +	else if (is_hash_key(c, &r->key)) { +		if (deletion) +			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); +		else +			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, +					       r->len, &r->nm); +	} else { +		if (deletion) +			switch (key_type(c, &r->key)) { +			case UBIFS_INO_KEY: +			{ +				ino_t inum = key_inum(c, &r->key); + +				err = ubifs_tnc_remove_ino(c, inum); +				break; +			} +			case UBIFS_TRUN_KEY: +				err = trun_remove_range(c, r); +				break; +			default: +				err = ubifs_tnc_remove(c, &r->key); +				break; +			} +		else +			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, +					    r->len); +		if (err) +			return err; + +		if (c->need_recovery) +			err = ubifs_recover_size_accum(c, &r->key, deletion, +						       r->new_size); +	} + +	return err; +} + +/** + * destroy_replay_tree - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay tree. + */ +static void destroy_replay_tree(struct ubifs_info *c) +{ +	struct rb_node *this = c->replay_tree.rb_node; +	struct replay_entry *r; + +	while (this) { +		if (this->rb_left) { +			this = this->rb_left; +			continue; +		} else if (this->rb_right) { +			this = this->rb_right; +			continue; +		} +		r = rb_entry(this, struct replay_entry, rb); +		this = rb_parent(this); +		if (this) { +			if (this->rb_left == &r->rb) +				this->rb_left = NULL; +			else +				this->rb_right = NULL; +		} +		if (is_hash_key(c, &r->key)) +			kfree((void *)r->nm.name); +		kfree(r); +	} +	c->replay_tree = RB_ROOT; +} + +/** + * apply_replay_tree - apply the replay tree to the TNC. + * @c: UBIFS file-system description object + * + * Apply the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int apply_replay_tree(struct ubifs_info *c) +{ +	struct rb_node *this = rb_first(&c->replay_tree); + +	while (this) { +		struct replay_entry *r; +		int err; + +		cond_resched(); + +		r = rb_entry(this, struct replay_entry, rb); +		err = apply_replay_entry(c, r); +		if (err) +			return err; +		this = rb_next(this); +	} +	return 0; +} + +/** + * insert_node - insert a node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * @old_size: truncation old size + * @new_size: truncation new size + * + * This function inserts a scanned non-direntry node to the replay tree. The + * replay tree is an RB-tree containing @struct replay_entry elements which are + * indexed by the sequence number. The replay tree is applied at the very end + * of the replay process. Since the tree is sorted in sequence number order, + * the older modifications are applied first. This function returns zero in + * case of success and a negative error code in case of failure. + */ +static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, +		       union ubifs_key *key, unsigned long long sqnum, +		       int deletion, int *used, loff_t old_size, +		       loff_t new_size) +{ +	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +	struct replay_entry *r; + +	if (key_inum(c, key) >= c->highest_inum) +		c->highest_inum = key_inum(c, key); + +	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +	while (*p) { +		parent = *p; +		r = rb_entry(parent, struct replay_entry, rb); +		if (sqnum < r->sqnum) { +			p = &(*p)->rb_left; +			continue; +		} else if (sqnum > r->sqnum) { +			p = &(*p)->rb_right; +			continue; +		} +		ubifs_err("duplicate sqnum in replay"); +		return -EINVAL; +	} + +	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +	if (!r) +		return -ENOMEM; + +	if (!deletion) +		*used += ALIGN(len, 8); +	r->lnum = lnum; +	r->offs = offs; +	r->len = len; +	r->sqnum = sqnum; +	r->flags = (deletion ? REPLAY_DELETION : 0); +	r->old_size = old_size; +	r->new_size = new_size; +	key_copy(c, key, &r->key); + +	rb_link_node(&r->rb, parent, p); +	rb_insert_color(&r->rb, &c->replay_tree); +	return 0; +} + +/** + * insert_dent - insert a directory entry node into the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @len: node length + * @key: node key + * @name: directory entry name + * @nlen: directory entry name length + * @sqnum: sequence number + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * + * This function inserts a scanned directory entry node to the replay tree. + * Returns zero in case of success and a negative error code in case of + * failure. + * + * This function is also used for extended attribute entries because they are + * implemented as directory entry nodes. + */ +static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, +		       union ubifs_key *key, const char *name, int nlen, +		       unsigned long long sqnum, int deletion, int *used) +{ +	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +	struct replay_entry *r; +	char *nbuf; + +	if (key_inum(c, key) >= c->highest_inum) +		c->highest_inum = key_inum(c, key); + +	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +	while (*p) { +		parent = *p; +		r = rb_entry(parent, struct replay_entry, rb); +		if (sqnum < r->sqnum) { +			p = &(*p)->rb_left; +			continue; +		} +		if (sqnum > r->sqnum) { +			p = &(*p)->rb_right; +			continue; +		} +		ubifs_err("duplicate sqnum in replay"); +		return -EINVAL; +	} + +	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +	if (!r) +		return -ENOMEM; +	nbuf = kmalloc(nlen + 1, GFP_KERNEL); +	if (!nbuf) { +		kfree(r); +		return -ENOMEM; +	} + +	if (!deletion) +		*used += ALIGN(len, 8); +	r->lnum = lnum; +	r->offs = offs; +	r->len = len; +	r->sqnum = sqnum; +	r->nm.len = nlen; +	memcpy(nbuf, name, nlen); +	nbuf[nlen] = '\0'; +	r->nm.name = nbuf; +	r->flags = (deletion ? REPLAY_DELETION : 0); +	key_copy(c, key, &r->key); + +	ubifs_assert(!*p); +	rb_link_node(&r->rb, parent, p); +	rb_insert_color(&r->rb, &c->replay_tree); +	return 0; +} + +/** + * ubifs_validate_entry - validate directory or extended attribute entry node. + * @c: UBIFS file-system description object + * @dent: the node to validate + * + * This function validates directory or extended attribute entry node @dent. + * Returns zero if the node is all right and a %-EINVAL if not. + */ +int ubifs_validate_entry(struct ubifs_info *c, +			 const struct ubifs_dent_node *dent) +{ +	int key_type = key_type_flash(c, dent->key); +	int nlen = le16_to_cpu(dent->nlen); + +	if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || +	    dent->type >= UBIFS_ITYPES_CNT || +	    nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || +	    strnlen((char *)dent->name, nlen) != nlen || +	    le64_to_cpu(dent->inum) > MAX_INUM) { +		ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? +			  "directory entry" : "extended attribute entry"); +		return -EINVAL; +	} + +	if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { +		ubifs_err("bad key type %d", key_type); +		return -EINVAL; +	} + +	return 0; +} + +/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @free: amount of free space in the bud is returned here + * @dirty: amount of dirty space from padding and deletion nodes is returned + * here + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, +		      int *free, int *dirty) +{ +	int err = 0, used = 0; +	struct ubifs_scan_leb *sleb; +	struct ubifs_scan_node *snod; +	struct ubifs_bud *bud; + +	dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); +	if (c->need_recovery) +		sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); +	else +		sleb = ubifs_scan(c, lnum, offs, c->sbuf); +	if (IS_ERR(sleb)) +		return PTR_ERR(sleb); + +	/* +	 * The bud does not have to start from offset zero - the beginning of +	 * the 'lnum' LEB may contain previously committed data. One of the +	 * things we have to do in replay is to correctly update lprops with +	 * newer information about this LEB. +	 * +	 * At this point lprops thinks that this LEB has 'c->leb_size - offs' +	 * bytes of free space because it only contain information about +	 * committed data. +	 * +	 * But we know that real amount of free space is 'c->leb_size - +	 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and +	 * 'sleb->endpt' is used by bud data. We have to correctly calculate +	 * how much of these data are dirty and update lprops with this +	 * information. +	 * +	 * The dirt in that LEB region is comprised of padding nodes, deletion +	 * nodes, truncation nodes and nodes which are obsoleted by subsequent +	 * nodes in this LEB. So instead of calculating clean space, we +	 * calculate used space ('used' variable). +	 */ + +	list_for_each_entry(snod, &sleb->nodes, list) { +		int deletion = 0; + +		cond_resched(); + +		if (snod->sqnum >= SQNUM_WATERMARK) { +			ubifs_err("file system's life ended"); +			goto out_dump; +		} + +		if (snod->sqnum > c->max_sqnum) +			c->max_sqnum = snod->sqnum; + +		switch (snod->type) { +		case UBIFS_INO_NODE: +		{ +			struct ubifs_ino_node *ino = snod->node; +			loff_t new_size = le64_to_cpu(ino->size); + +			if (le32_to_cpu(ino->nlink) == 0) +				deletion = 1; +			err = insert_node(c, lnum, snod->offs, snod->len, +					  &snod->key, snod->sqnum, deletion, +					  &used, 0, new_size); +			break; +		} +		case UBIFS_DATA_NODE: +		{ +			struct ubifs_data_node *dn = snod->node; +			loff_t new_size = le32_to_cpu(dn->size) + +					  key_block(c, &snod->key) * +					  UBIFS_BLOCK_SIZE; + +			err = insert_node(c, lnum, snod->offs, snod->len, +					  &snod->key, snod->sqnum, deletion, +					  &used, 0, new_size); +			break; +		} +		case UBIFS_DENT_NODE: +		case UBIFS_XENT_NODE: +		{ +			struct ubifs_dent_node *dent = snod->node; + +			err = ubifs_validate_entry(c, dent); +			if (err) +				goto out_dump; + +			err = insert_dent(c, lnum, snod->offs, snod->len, +					  &snod->key, (char *)dent->name, +					  le16_to_cpu(dent->nlen), snod->sqnum, +					  !le64_to_cpu(dent->inum), &used); +			break; +		} +		case UBIFS_TRUN_NODE: +		{ +			struct ubifs_trun_node *trun = snod->node; +			loff_t old_size = le64_to_cpu(trun->old_size); +			loff_t new_size = le64_to_cpu(trun->new_size); +			union ubifs_key key; + +			/* Validate truncation node */ +			if (old_size < 0 || old_size > c->max_inode_sz || +			    new_size < 0 || new_size > c->max_inode_sz || +			    old_size <= new_size) { +				ubifs_err("bad truncation node"); +				goto out_dump; +			} + +			/* +			 * Create a fake truncation key just to use the same +			 * functions which expect nodes to have keys. +			 */ +			trun_key_init(c, &key, le32_to_cpu(trun->inum)); +			err = insert_node(c, lnum, snod->offs, snod->len, +					  &key, snod->sqnum, 1, &used, +					  old_size, new_size); +			break; +		} +		default: +			ubifs_err("unexpected node type %d in bud LEB %d:%d", +				  snod->type, lnum, snod->offs); +			err = -EINVAL; +			goto out_dump; +		} +		if (err) +			goto out; +	} + +	bud = ubifs_search_bud(c, lnum); +	if (!bud) +		BUG(); + +	ubifs_assert(sleb->endpt - offs >= used); +	ubifs_assert(sleb->endpt % c->min_io_size == 0); + +	*dirty = sleb->endpt - offs - used; +	*free = c->leb_size - sleb->endpt; + +out: +	ubifs_scan_destroy(sleb); +	return err; + +out_dump: +	ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); +	dbg_dump_node(c, snod->node); +	ubifs_scan_destroy(sleb); +	return -EINVAL; +} + +/** + * insert_ref_node - insert a reference node to the replay tree. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset + * @sqnum: sequence number + * @free: amount of free space in bud + * @dirty: amount of dirty space from padding and deletion nodes + * + * This function inserts a reference node to the replay tree and returns zero + * in case of success or a negative error code in case of failure. + */ +static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, +			   unsigned long long sqnum, int free, int dirty) +{ +	struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +	struct replay_entry *r; + +	dbg_mnt("add ref LEB %d:%d", lnum, offs); +	while (*p) { +		parent = *p; +		r = rb_entry(parent, struct replay_entry, rb); +		if (sqnum < r->sqnum) { +			p = &(*p)->rb_left; +			continue; +		} else if (sqnum > r->sqnum) { +			p = &(*p)->rb_right; +			continue; +		} +		ubifs_err("duplicate sqnum in replay tree"); +		return -EINVAL; +	} + +	r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +	if (!r) +		return -ENOMEM; + +	r->lnum = lnum; +	r->offs = offs; +	r->sqnum = sqnum; +	r->flags = REPLAY_REF; +	r->free = free; +	r->dirty = dirty; + +	rb_link_node(&r->rb, parent, p); +	rb_insert_color(&r->rb, &c->replay_tree); +	return 0; +} + +/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int replay_buds(struct ubifs_info *c) +{ +	struct bud_entry *b; +	int err, uninitialized_var(free), uninitialized_var(dirty); + +	list_for_each_entry(b, &c->replay_buds, list) { +		err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, +				 &free, &dirty); +		if (err) +			return err; +		err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, +				      free, dirty); +		if (err) +			return err; +	} + +	return 0; +} + +/** + * destroy_bud_list - destroy the list of buds to replay. + * @c: UBIFS file-system description object + */ +static void destroy_bud_list(struct ubifs_info *c) +{ +	struct bud_entry *b; + +	while (!list_empty(&c->replay_buds)) { +		b = list_entry(c->replay_buds.next, struct bud_entry, list); +		list_del(&b->list); +		kfree(b); +	} +} + +/** + * add_replay_bud - add a bud to the list of buds to replay. + * @c: UBIFS file-system description object + * @lnum: bud logical eraseblock number to replay + * @offs: bud start offset + * @jhead: journal head to which this bud belongs + * @sqnum: reference node sequence number + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, +			  unsigned long long sqnum) +{ +	struct ubifs_bud *bud; +	struct bud_entry *b; + +	dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); + +	bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); +	if (!bud) +		return -ENOMEM; + +	b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); +	if (!b) { +		kfree(bud); +		return -ENOMEM; +	} + +	bud->lnum = lnum; +	bud->start = offs; +	bud->jhead = jhead; +	ubifs_add_bud(c, bud); + +	b->bud = bud; +	b->sqnum = sqnum; +	list_add_tail(&b->list, &c->replay_buds); + +	return 0; +} + +/** + * validate_ref - validate a reference node. + * @c: UBIFS file-system description object + * @ref: the reference node to validate + * @ref_lnum: LEB number of the reference node + * @ref_offs: reference node offset + * + * This function returns %1 if a bud reference already exists for the LEB. %0 is + * returned if the reference node is new, otherwise %-EINVAL is returned if + * validation failed. + */ +static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) +{ +	struct ubifs_bud *bud; +	int lnum = le32_to_cpu(ref->lnum); +	unsigned int offs = le32_to_cpu(ref->offs); +	unsigned int jhead = le32_to_cpu(ref->jhead); + +	/* +	 * ref->offs may point to the end of LEB when the journal head points +	 * to the end of LEB and we write reference node for it during commit. +	 * So this is why we require 'offs > c->leb_size'. +	 */ +	if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || +	    lnum < c->main_first || offs > c->leb_size || +	    offs & (c->min_io_size - 1)) +		return -EINVAL; + +	/* Make sure we have not already looked at this bud */ +	bud = ubifs_search_bud(c, lnum); +	if (bud) { +		if (bud->jhead == jhead && bud->start <= offs) +			return 1; +		ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); +		return -EINVAL; +	} + +	return 0; +} + +/** + * replay_log_leb - replay a log logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: log logical eraseblock to replay + * @offs: offset to start replaying from + * @sbuf: scan buffer + * + * This function replays a log LEB and returns zero in case of success, %1 if + * this is the last LEB in the log, and a negative error code in case of + * failure. + */ +static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) +{ +	int err; +	struct ubifs_scan_leb *sleb; +	struct ubifs_scan_node *snod; +	const struct ubifs_cs_node *node; + +	dbg_mnt("replay log LEB %d:%d", lnum, offs); +	sleb = ubifs_scan(c, lnum, offs, sbuf); +	if (IS_ERR(sleb)) { +		if (c->need_recovery) +			sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); +		if (IS_ERR(sleb)) +			return PTR_ERR(sleb); +	} + +	if (sleb->nodes_cnt == 0) { +		err = 1; +		goto out; +	} + +	node = sleb->buf; + +	snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); +	if (c->cs_sqnum == 0) { +		/* +		 * This is the first log LEB we are looking at, make sure that +		 * the first node is a commit start node. Also record its +		 * sequence number so that UBIFS can determine where the log +		 * ends, because all nodes which were have higher sequence +		 * numbers. +		 */ +		if (snod->type != UBIFS_CS_NODE) { +			dbg_err("first log node at LEB %d:%d is not CS node", +				lnum, offs); +			goto out_dump; +		} +		if (le64_to_cpu(node->cmt_no) != c->cmt_no) { +			dbg_err("first CS node at LEB %d:%d has wrong " +				"commit number %llu expected %llu", +				lnum, offs, +				(unsigned long long)le64_to_cpu(node->cmt_no), +				c->cmt_no); +			goto out_dump; +		} + +		c->cs_sqnum = le64_to_cpu(node->ch.sqnum); +		dbg_mnt("commit start sqnum %llu", c->cs_sqnum); +	} + +	if (snod->sqnum < c->cs_sqnum) { +		/* +		 * This means that we reached end of log and now +		 * look to the older log data, which was already +		 * committed but the eraseblock was not erased (UBIFS +		 * only un-maps it). So this basically means we have to +		 * exit with "end of log" code. +		 */ +		err = 1; +		goto out; +	} + +	/* Make sure the first node sits at offset zero of the LEB */ +	if (snod->offs != 0) { +		dbg_err("first node is not at zero offset"); +		goto out_dump; +	} + +	list_for_each_entry(snod, &sleb->nodes, list) { + +		cond_resched(); + +		if (snod->sqnum >= SQNUM_WATERMARK) { +			ubifs_err("file system's life ended"); +			goto out_dump; +		} + +		if (snod->sqnum < c->cs_sqnum) { +			dbg_err("bad sqnum %llu, commit sqnum %llu", +				snod->sqnum, c->cs_sqnum); +			goto out_dump; +		} + +		if (snod->sqnum > c->max_sqnum) +			c->max_sqnum = snod->sqnum; + +		switch (snod->type) { +		case UBIFS_REF_NODE: { +			const struct ubifs_ref_node *ref = snod->node; + +			err = validate_ref(c, ref); +			if (err == 1) +				break; /* Already have this bud */ +			if (err) +				goto out_dump; + +			err = add_replay_bud(c, le32_to_cpu(ref->lnum), +					     le32_to_cpu(ref->offs), +					     le32_to_cpu(ref->jhead), +					     snod->sqnum); +			if (err) +				goto out; + +			break; +		} +		case UBIFS_CS_NODE: +			/* Make sure it sits at the beginning of LEB */ +			if (snod->offs != 0) { +				ubifs_err("unexpected node in log"); +				goto out_dump; +			} +			break; +		default: +			ubifs_err("unexpected node in log"); +			goto out_dump; +		} +	} + +	if (sleb->endpt || c->lhead_offs >= c->leb_size) { +		c->lhead_lnum = lnum; +		c->lhead_offs = sleb->endpt; +	} + +	err = !sleb->endpt; +out: +	ubifs_scan_destroy(sleb); +	return err; + +out_dump: +	ubifs_err("log error detected while replying the log at LEB %d:%d", +		  lnum, offs + snod->offs); +	dbg_dump_node(c, snod->node); +	ubifs_scan_destroy(sleb); +	return -EINVAL; +} + +/** + * take_ihead - update the status of the index head in lprops to 'taken'. + * @c: UBIFS file-system description object + * + * This function returns the amount of free space in the index head LEB or a + * negative error code. + */ +static int take_ihead(struct ubifs_info *c) +{ +	const struct ubifs_lprops *lp; +	int err, free; + +	ubifs_get_lprops(c); + +	lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} + +	free = lp->free; + +	lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, +			     lp->flags | LPROPS_TAKEN, 0); +	if (IS_ERR(lp)) { +		err = PTR_ERR(lp); +		goto out; +	} + +	err = free; +out: +	ubifs_release_lprops(c); +	return err; +} + +/** + * ubifs_replay_journal - replay journal. + * @c: UBIFS file-system description object + * + * This function scans the journal, replays and cleans it up. It makes sure all + * memory data structures related to uncommitted journal are built (dirty TNC + * tree, tree of buds, modified lprops, etc). + */ +int ubifs_replay_journal(struct ubifs_info *c) +{ +	int err, i, lnum, offs, _free; +	void *sbuf = NULL; + +	BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); + +	/* Update the status of the index head in lprops to 'taken' */ +	_free = take_ihead(c); +	if (_free < 0) +		return _free; /* Error code */ + +	if (c->ihead_offs != c->leb_size - _free) { +		ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, +			  c->ihead_offs); +		return -EINVAL; +	} + +	sbuf = vmalloc(c->leb_size); +	if (!sbuf) +		return -ENOMEM; + +	dbg_mnt("start replaying the journal"); + +	c->replaying = 1; + +	lnum = c->ltail_lnum = c->lhead_lnum; +	offs = c->lhead_offs; + +	for (i = 0; i < c->log_lebs; i++, lnum++) { +		if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { +			/* +			 * The log is logically circular, we reached the last +			 * LEB, switch to the first one. +			 */ +			lnum = UBIFS_LOG_LNUM; +			offs = 0; +		} +		err = replay_log_leb(c, lnum, offs, sbuf); +		if (err == 1) +			/* We hit the end of the log */ +			break; +		if (err) +			goto out; +		offs = 0; +	} + +	err = replay_buds(c); +	if (err) +		goto out; + +	err = apply_replay_tree(c); +	if (err) +		goto out; + +	ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); +	dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " +		"highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, +		(unsigned long)c->highest_inum); +out: +	destroy_replay_tree(c); +	destroy_bud_list(c); +	vfree(sbuf); +	c->replaying = 0; +	return err; +} diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c new file mode 100644 index 000000000..9708fda95 --- /dev/null +++ b/fs/ubifs/sb.c @@ -0,0 +1,324 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file implements UBIFS superblock. The superblock is stored at the first + * LEB of the volume and is never changed by UBIFS. Only user-space tools may + * change it. The superblock node mostly contains geometry information. + */ + +#include "ubifs.h" + +/* + * Default journal size in logical eraseblocks as a percent of total + * flash size. + */ +#define DEFAULT_JNL_PERCENT 5 + +/* Default maximum journal size in bytes */ +#define DEFAULT_MAX_JNL (32*1024*1024) + +/* Default indexing tree fanout */ +#define DEFAULT_FANOUT 8 + +/* Default number of data journal heads */ +#define DEFAULT_JHEADS_CNT 1 + +/* Default positions of different LEBs in the main area */ +#define DEFAULT_IDX_LEB  0 +#define DEFAULT_DATA_LEB 1 +#define DEFAULT_GC_LEB   2 + +/* Default number of LEB numbers in LPT's save table */ +#define DEFAULT_LSAVE_CNT 256 + +/* Default reserved pool size as a percent of maximum free space */ +#define DEFAULT_RP_PERCENT 5 + +/* The default maximum size of reserved pool in bytes */ +#define DEFAULT_MAX_RP_SIZE (5*1024*1024) + +/* Default time granularity in nanoseconds */ +#define DEFAULT_TIME_GRAN 1000000000 + +/** + * validate_sb - validate superblock node. + * @c: UBIFS file-system description object + * @sup: superblock node + * + * This function validates superblock node @sup. Since most of data was read + * from the superblock and stored in @c, the function validates fields in @c + * instead. Returns zero in case of success and %-EINVAL in case of validation + * failure. + */ +static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) +{ +	long long max_bytes; +	int err = 1, min_leb_cnt; + +	if (!c->key_hash) { +		err = 2; +		goto failed; +	} + +	if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) { +		err = 3; +		goto failed; +	} + +	if (le32_to_cpu(sup->min_io_size) != c->min_io_size) { +		ubifs_err("min. I/O unit mismatch: %d in superblock, %d real", +			  le32_to_cpu(sup->min_io_size), c->min_io_size); +		goto failed; +	} + +	if (le32_to_cpu(sup->leb_size) != c->leb_size) { +		ubifs_err("LEB size mismatch: %d in superblock, %d real", +			  le32_to_cpu(sup->leb_size), c->leb_size); +		goto failed; +	} + +	if (c->log_lebs < UBIFS_MIN_LOG_LEBS || +	    c->lpt_lebs < UBIFS_MIN_LPT_LEBS || +	    c->orph_lebs < UBIFS_MIN_ORPH_LEBS || +	    c->main_lebs < UBIFS_MIN_MAIN_LEBS) { +		err = 4; +		goto failed; +	} + +	/* +	 * Calculate minimum allowed amount of main area LEBs. This is very +	 * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we +	 * have just read from the superblock. +	 */ +	min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs; +	min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6; + +	if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) { +		ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, " +			  "%d minimum required", c->leb_cnt, c->vi.size, +			  min_leb_cnt); +		goto failed; +	} + +	if (c->max_leb_cnt < c->leb_cnt) { +		ubifs_err("max. LEB count %d less than LEB count %d", +			  c->max_leb_cnt, c->leb_cnt); +		goto failed; +	} + +	if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { +		err = 7; +		goto failed; +	} + +	if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || +	    c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { +		err = 8; +		goto failed; +	} + +	if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 || +	    c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) { +		err = 9; +		goto failed; +	} + +	if (c->fanout < UBIFS_MIN_FANOUT || +	    ubifs_idx_node_sz(c, c->fanout) > c->leb_size) { +		err = 10; +		goto failed; +	} + +	if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT && +	    c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - +	    c->log_lebs - c->lpt_lebs - c->orph_lebs)) { +		err = 11; +		goto failed; +	} + +	if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs + +	    c->orph_lebs + c->main_lebs != c->leb_cnt) { +		err = 12; +		goto failed; +	} + +	if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) { +		err = 13; +		goto failed; +	} + +	max_bytes = c->main_lebs * (long long)c->leb_size; +	if (c->rp_size < 0 || max_bytes < c->rp_size) { +		err = 14; +		goto failed; +	} + +	if (le32_to_cpu(sup->time_gran) > 1000000000 || +	    le32_to_cpu(sup->time_gran) < 1) { +		err = 15; +		goto failed; +	} + +	return 0; + +failed: +	ubifs_err("bad superblock, error %d", err); +	dbg_dump_node(c, sup); +	return -EINVAL; +} + +/** + * ubifs_read_sb_node - read superblock node. + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error + * code. + */ +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) +{ +	struct ubifs_sb_node *sup; +	int err; + +	sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS); +	if (!sup) +		return ERR_PTR(-ENOMEM); + +	err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ, +			      UBIFS_SB_LNUM, 0); +	if (err) { +		kfree(sup); +		return ERR_PTR(err); +	} + +	return sup; +} + +/** + * ubifs_read_superblock - read superblock. + * @c: UBIFS file-system description object + * + * This function finds, reads and checks the superblock. If an empty UBI volume + * is being mounted, this function creates default superblock. Returns zero in + * case of success, and a negative error code in case of failure. + */ +int ubifs_read_superblock(struct ubifs_info *c) +{ +	int err, sup_flags; +	struct ubifs_sb_node *sup; + +	if (c->empty) { +		printf("No UBIFS filesystem found!\n"); +		return -1; +	} + +	sup = ubifs_read_sb_node(c); +	if (IS_ERR(sup)) +		return PTR_ERR(sup); + +	/* +	 * The software supports all previous versions but not future versions, +	 * due to the unavailability of time-travelling equipment. +	 */ +	c->fmt_version = le32_to_cpu(sup->fmt_version); +	if (c->fmt_version > UBIFS_FORMAT_VERSION) { +		ubifs_err("on-flash format version is %d, but software only " +			  "supports up to version %d", c->fmt_version, +			  UBIFS_FORMAT_VERSION); +		err = -EINVAL; +		goto out; +	} + +	if (c->fmt_version < 3) { +		ubifs_err("on-flash format version %d is not supported", +			  c->fmt_version); +		err = -EINVAL; +		goto out; +	} + +	switch (sup->key_hash) { +	case UBIFS_KEY_HASH_R5: +		c->key_hash = key_r5_hash; +		c->key_hash_type = UBIFS_KEY_HASH_R5; +		break; + +	case UBIFS_KEY_HASH_TEST: +		c->key_hash = key_test_hash; +		c->key_hash_type = UBIFS_KEY_HASH_TEST; +		break; +	}; + +	c->key_fmt = sup->key_fmt; + +	switch (c->key_fmt) { +	case UBIFS_SIMPLE_KEY_FMT: +		c->key_len = UBIFS_SK_LEN; +		break; +	default: +		ubifs_err("unsupported key format"); +		err = -EINVAL; +		goto out; +	} + +	c->leb_cnt       = le32_to_cpu(sup->leb_cnt); +	c->max_leb_cnt   = le32_to_cpu(sup->max_leb_cnt); +	c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes); +	c->log_lebs      = le32_to_cpu(sup->log_lebs); +	c->lpt_lebs      = le32_to_cpu(sup->lpt_lebs); +	c->orph_lebs     = le32_to_cpu(sup->orph_lebs); +	c->jhead_cnt     = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT; +	c->fanout        = le32_to_cpu(sup->fanout); +	c->lsave_cnt     = le32_to_cpu(sup->lsave_cnt); +	c->default_compr = le16_to_cpu(sup->default_compr); +	c->rp_size       = le64_to_cpu(sup->rp_size); +	c->rp_uid        = le32_to_cpu(sup->rp_uid); +	c->rp_gid        = le32_to_cpu(sup->rp_gid); +	sup_flags        = le32_to_cpu(sup->flags); + +	c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); +	memcpy(&c->uuid, &sup->uuid, 16); +	c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + +	/* Automatically increase file system size to the maximum size */ +	c->old_leb_cnt = c->leb_cnt; +	if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { +		c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); +		dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", +			c->old_leb_cnt,	c->leb_cnt); +	} + +	c->log_bytes = (long long)c->log_lebs * c->leb_size; +	c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1; +	c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs; +	c->lpt_last = c->lpt_first + c->lpt_lebs - 1; +	c->orph_first = c->lpt_last + 1; +	c->orph_last = c->orph_first + c->orph_lebs - 1; +	c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; +	c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; +	c->main_first = c->leb_cnt - c->main_lebs; +	c->report_rp_size = ubifs_reported_space(c, c->rp_size); + +	err = validate_sb(c, sup); +out: +	kfree(sup); +	return err; +} diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c new file mode 100644 index 000000000..0ed82479b --- /dev/null +++ b/fs/ubifs/scan.c @@ -0,0 +1,362 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements the scan which is a general-purpose function for + * determining what nodes are in an eraseblock. The scan is used to replay the + * journal, to do garbage collection. for the TNC in-the-gaps method, and by + * debugging functions. + */ + +#include "ubifs.h" + +/** + * scan_padding_bytes - scan for padding bytes. + * @buf: buffer to scan + * @len: length of buffer + * + * This function returns the number of padding bytes on success and + * %SCANNED_GARBAGE on failure. + */ +static int scan_padding_bytes(void *buf, int len) +{ +	int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len); +	uint8_t *p = buf; + +	dbg_scan("not a node"); + +	while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE) +		pad_len += 1; + +	if (!pad_len || (pad_len & 7)) +		return SCANNED_GARBAGE; + +	dbg_scan("%d padding bytes", pad_len); + +	return pad_len; +} + +/** + * ubifs_scan_a_node - scan for a node or padding. + * @c: UBIFS file-system description object + * @buf: buffer to scan + * @len: length of buffer + * @lnum: logical eraseblock number + * @offs: offset within the logical eraseblock + * @quiet: print no messages + * + * This function returns a scanning code to indicate what was scanned. + */ +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, +		      int offs, int quiet) +{ +	struct ubifs_ch *ch = buf; +	uint32_t magic; + +	magic = le32_to_cpu(ch->magic); + +	if (magic == 0xFFFFFFFF) { +		dbg_scan("hit empty space"); +		return SCANNED_EMPTY_SPACE; +	} + +	if (magic != UBIFS_NODE_MAGIC) +		return scan_padding_bytes(buf, len); + +	if (len < UBIFS_CH_SZ) +		return SCANNED_GARBAGE; + +	dbg_scan("scanning %s", dbg_ntype(ch->node_type)); + +	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) +		return SCANNED_A_CORRUPT_NODE; + +	if (ch->node_type == UBIFS_PAD_NODE) { +		struct ubifs_pad_node *pad = buf; +		int pad_len = le32_to_cpu(pad->pad_len); +		int node_len = le32_to_cpu(ch->len); + +		/* Validate the padding node */ +		if (pad_len < 0 || +		    offs + node_len + pad_len > c->leb_size) { +			if (!quiet) { +				ubifs_err("bad pad node at LEB %d:%d", +					  lnum, offs); +				dbg_dump_node(c, pad); +			} +			return SCANNED_A_BAD_PAD_NODE; +		} + +		/* Make the node pads to 8-byte boundary */ +		if ((node_len + pad_len) & 7) { +			if (!quiet) { +				dbg_err("bad padding length %d - %d", +					offs, offs + node_len + pad_len); +			} +			return SCANNED_A_BAD_PAD_NODE; +		} + +		dbg_scan("%d bytes padded, offset now %d", +			 pad_len, ALIGN(offs + node_len + pad_len, 8)); + +		return node_len + pad_len; +	} + +	return SCANNED_A_NODE; +} + +/** + * ubifs_start_scan - create LEB scanning information at start of scan. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function returns %0 on success and a negative error code on failure. + */ +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, +					int offs, void *sbuf) +{ +	struct ubifs_scan_leb *sleb; +	int err; + +	dbg_scan("scan LEB %d:%d", lnum, offs); + +	sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS); +	if (!sleb) +		return ERR_PTR(-ENOMEM); + +	sleb->lnum = lnum; +	INIT_LIST_HEAD(&sleb->nodes); +	sleb->buf = sbuf; + +	err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); +	if (err && err != -EBADMSG) { +		ubifs_err("cannot read %d bytes from LEB %d:%d," +			  " error %d", c->leb_size - offs, lnum, offs, err); +		kfree(sleb); +		return ERR_PTR(err); +	} + +	if (err == -EBADMSG) +		sleb->ecc = 1; + +	return sleb; +} + +/** + * ubifs_end_scan - update LEB scanning information at end of scan. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * + * This function returns %0 on success and a negative error code on failure. + */ +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, +		    int lnum, int offs) +{ +	lnum = lnum; +	dbg_scan("stop scanning LEB %d at offset %d", lnum, offs); +	ubifs_assert(offs % c->min_io_size == 0); + +	sleb->endpt = ALIGN(offs, c->min_io_size); +} + +/** + * ubifs_add_snod - add a scanned node to LEB scanning information. + * @c: UBIFS file-system description object + * @sleb: scanning information + * @buf: buffer containing node + * @offs: offset of node on flash + * + * This function returns %0 on success and a negative error code on failure. + */ +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, +		   void *buf, int offs) +{ +	struct ubifs_ch *ch = buf; +	struct ubifs_ino_node *ino = buf; +	struct ubifs_scan_node *snod; + +	snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); +	if (!snod) +		return -ENOMEM; + +	snod->sqnum = le64_to_cpu(ch->sqnum); +	snod->type = ch->node_type; +	snod->offs = offs; +	snod->len = le32_to_cpu(ch->len); +	snod->node = buf; + +	switch (ch->node_type) { +	case UBIFS_INO_NODE: +	case UBIFS_DENT_NODE: +	case UBIFS_XENT_NODE: +	case UBIFS_DATA_NODE: +	case UBIFS_TRUN_NODE: +		/* +		 * The key is in the same place in all keyed +		 * nodes. +		 */ +		key_read(c, &ino->key, &snod->key); +		break; +	} +	list_add_tail(&snod->list, &sleb->nodes); +	sleb->nodes_cnt += 1; +	return 0; +} + +/** + * ubifs_scanned_corruption - print information after UBIFS scanned corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number of corruption + * @offs: offset of corruption + * @buf: buffer containing corruption + */ +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, +			      void *buf) +{ +	int len; + +	ubifs_err("corrupted data at LEB %d:%d", lnum, offs); +	if (dbg_failure_mode) +		return; +	len = c->leb_size - offs; +	if (len > 4096) +		len = 4096; +	dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); +	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); +} + +/** + * ubifs_scan - scan a logical eraseblock. + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) + * @sbuf: scan buffer (must be c->leb_size) + * + * This function scans LEB number @lnum and returns complete information about + * its contents. Returns an error code in case of failure. + */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, +				  int offs, void *sbuf) +{ +	void *buf = sbuf + offs; +	int err, len = c->leb_size - offs; +	struct ubifs_scan_leb *sleb; + +	sleb = ubifs_start_scan(c, lnum, offs, sbuf); +	if (IS_ERR(sleb)) +		return sleb; + +	while (len >= 8) { +		struct ubifs_ch *ch = buf; +		int node_len, ret; + +		dbg_scan("look at LEB %d:%d (%d bytes left)", +			 lnum, offs, len); + +		cond_resched(); + +		ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + +		if (ret > 0) { +			/* Padding bytes or a valid padding node */ +			offs += ret; +			buf += ret; +			len -= ret; +			continue; +		} + +		if (ret == SCANNED_EMPTY_SPACE) +			/* Empty space is checked later */ +			break; + +		switch (ret) { +		case SCANNED_GARBAGE: +			dbg_err("garbage"); +			goto corrupted; +		case SCANNED_A_NODE: +			break; +		case SCANNED_A_CORRUPT_NODE: +		case SCANNED_A_BAD_PAD_NODE: +			dbg_err("bad node"); +			goto corrupted; +		default: +			dbg_err("unknown"); +			goto corrupted; +		} + +		err = ubifs_add_snod(c, sleb, buf, offs); +		if (err) +			goto error; + +		node_len = ALIGN(le32_to_cpu(ch->len), 8); +		offs += node_len; +		buf += node_len; +		len -= node_len; +	} + +	if (offs % c->min_io_size) +		goto corrupted; + +	ubifs_end_scan(c, sleb, lnum, offs); + +	for (; len > 4; offs += 4, buf = buf + 4, len -= 4) +		if (*(uint32_t *)buf != 0xffffffff) +			break; +	for (; len; offs++, buf++, len--) +		if (*(uint8_t *)buf != 0xff) { +			ubifs_err("corrupt empty space at LEB %d:%d", +				  lnum, offs); +			goto corrupted; +		} + +	return sleb; + +corrupted: +	ubifs_scanned_corruption(c, lnum, offs, buf); +	err = -EUCLEAN; +error: +	ubifs_err("LEB %d scanning failed", lnum); +	ubifs_scan_destroy(sleb); +	return ERR_PTR(err); +} + +/** + * ubifs_scan_destroy - destroy LEB scanning information. + * @sleb: scanning information to free + */ +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb) +{ +	struct ubifs_scan_node *node; +	struct list_head *head; + +	head = &sleb->nodes; +	while (!list_empty(head)) { +		node = list_entry(head->next, struct ubifs_scan_node, list); +		list_del(&node->list); +		kfree(node); +	} +	kfree(sleb); +} diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c new file mode 100644 index 000000000..95f2a4125 --- /dev/null +++ b/fs/ubifs/super.c @@ -0,0 +1,1189 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file implements UBIFS initialization and VFS superblock operations. Some + * initialization stuff which is rather large and complex is placed at + * corresponding subsystems, but most of it is here. + */ + +#include "ubifs.h" +#include <linux/math64.h> + +#define INODE_LOCKED_MAX	64 + +struct super_block *ubifs_sb; +static struct inode *inodes_locked_down[INODE_LOCKED_MAX]; + +/* shrinker.c */ + +/* List of all UBIFS file-system instances */ +struct list_head ubifs_infos; + +/* linux/fs/super.c */ + +static int sb_set(struct super_block *sb, void *data) +{ +	dev_t *dev = data; + +	sb->s_dev = *dev; +	return 0; +} + +/** + *	sget	-	find or create a superblock + *	@type:	filesystem type superblock should belong to + *	@test:	comparison callback + *	@set:	setup callback + *	@data:	argument to each of them + */ +struct super_block *sget(struct file_system_type *type, +			int (*test)(struct super_block *,void *), +			int (*set)(struct super_block *,void *), +			void *data) +{ +	struct super_block *s = NULL; +	int err; + +	s = kzalloc(sizeof(struct super_block),  GFP_USER); +	if (!s) { +		err = -ENOMEM; +		return ERR_PTR(err); +	} + +	INIT_LIST_HEAD(&s->s_instances); +	INIT_LIST_HEAD(&s->s_inodes); +	s->s_time_gran = 1000000000; + +	err = set(s, data); +	if (err) { +		return ERR_PTR(err); +	} +	s->s_type = type; +	strncpy(s->s_id, type->name, sizeof(s->s_id)); +	list_add(&s->s_instances, &type->fs_supers); +	return s; +} + +/** + * validate_inode - validate inode. + * @c: UBIFS file-system description object + * @inode: the inode to validate + * + * This is a helper function for 'ubifs_iget()' which validates various fields + * of a newly built inode to make sure they contain sane values and prevent + * possible vulnerabilities. Returns zero if the inode is all right and + * a non-zero error code if not. + */ +static int validate_inode(struct ubifs_info *c, const struct inode *inode) +{ +	int err; +	const struct ubifs_inode *ui = ubifs_inode(inode); + +	if (inode->i_size > c->max_inode_sz) { +		ubifs_err("inode is too large (%lld)", +			  (long long)inode->i_size); +		return 1; +	} + +	if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) { +		ubifs_err("unknown compression type %d", ui->compr_type); +		return 2; +	} + +	if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) +		return 4; + +	if (!ubifs_compr_present(ui->compr_type)) { +		ubifs_warn("inode %lu uses '%s' compression, but it was not " +			   "compiled in", inode->i_ino, +			   ubifs_compr_name(ui->compr_type)); +	} + +	err = dbg_check_dir_size(c, inode); +	return err; +} + +struct inode *iget_locked(struct super_block *sb, unsigned long ino) +{ +	struct inode *inode; + +	inode = (struct inode *)malloc(sizeof(struct ubifs_inode)); +	if (inode) { +		inode->i_ino = ino; +		inode->i_sb = sb; +		list_add(&inode->i_sb_list, &sb->s_inodes); +		inode->i_state = I_LOCK | I_NEW; +	} + +	return inode; +} + +int ubifs_iput(struct inode *inode) +{ +	list_del_init(&inode->i_sb_list); + +	free(inode); +	return 0; +} + +/* + * Lock (save) inode in inode array for readback after recovery + */ +void iput(struct inode *inode) +{ +	int i; +	struct inode *ino; + +	/* +	 * Search end of list +	 */ +	for (i = 0; i < INODE_LOCKED_MAX; i++) { +		if (inodes_locked_down[i] == NULL) +			break; +	} + +	if (i >= INODE_LOCKED_MAX) { +		ubifs_err("Error, can't lock (save) more inodes while recovery!!!"); +		return; +	} + +	/* +	 * Allocate and use new inode +	 */ +	ino = (struct inode *)malloc(sizeof(struct ubifs_inode)); +	memcpy(ino, inode, sizeof(struct ubifs_inode)); + +	/* +	 * Finally save inode in array +	 */ +	inodes_locked_down[i] = ino; +} + +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) +{ +	int err; +	union ubifs_key key; +	struct ubifs_ino_node *ino; +	struct ubifs_info *c = sb->s_fs_info; +	struct inode *inode; +	struct ubifs_inode *ui; +	int i; + +	dbg_gen("inode %lu", inum); + +	/* +	 * U-Boot special handling of locked down inodes via recovery +	 * e.g. ubifs_recover_size() +	 */ +	for (i = 0; i < INODE_LOCKED_MAX; i++) { +		/* +		 * Exit on last entry (NULL), inode not found in list +		 */ +		if (inodes_locked_down[i] == NULL) +			break; + +		if (inodes_locked_down[i]->i_ino == inum) { +			/* +			 * We found the locked down inode in our array, +			 * so just return this pointer instead of creating +			 * a new one. +			 */ +			return inodes_locked_down[i]; +		} +	} + +	inode = iget_locked(sb, inum); +	if (!inode) +		return ERR_PTR(-ENOMEM); +	if (!(inode->i_state & I_NEW)) +		return inode; +	ui = ubifs_inode(inode); + +	ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS); +	if (!ino) { +		err = -ENOMEM; +		goto out; +	} + +	ino_key_init(c, &key, inode->i_ino); + +	err = ubifs_tnc_lookup(c, &key, ino); +	if (err) +		goto out_ino; + +	inode->i_flags |= (S_NOCMTIME | S_NOATIME); +	inode->i_nlink = le32_to_cpu(ino->nlink); +	inode->i_uid   = le32_to_cpu(ino->uid); +	inode->i_gid   = le32_to_cpu(ino->gid); +	inode->i_atime.tv_sec  = (int64_t)le64_to_cpu(ino->atime_sec); +	inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec); +	inode->i_mtime.tv_sec  = (int64_t)le64_to_cpu(ino->mtime_sec); +	inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec); +	inode->i_ctime.tv_sec  = (int64_t)le64_to_cpu(ino->ctime_sec); +	inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec); +	inode->i_mode = le32_to_cpu(ino->mode); +	inode->i_size = le64_to_cpu(ino->size); + +	ui->data_len    = le32_to_cpu(ino->data_len); +	ui->flags       = le32_to_cpu(ino->flags); +	ui->compr_type  = le16_to_cpu(ino->compr_type); +	ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum); +	ui->synced_i_size = ui->ui_size = inode->i_size; + +	err = validate_inode(c, inode); +	if (err) +		goto out_invalid; + +	if ((inode->i_mode & S_IFMT) == S_IFLNK) { +		if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) { +			err = 12; +			goto out_invalid; +		} +		ui->data = kmalloc(ui->data_len + 1, GFP_NOFS); +		if (!ui->data) { +			err = -ENOMEM; +			goto out_ino; +		} +		memcpy(ui->data, ino->data, ui->data_len); +		((char *)ui->data)[ui->data_len] = '\0'; +	} + +	kfree(ino); +	inode->i_state &= ~(I_LOCK | I_NEW); +	return inode; + +out_invalid: +	ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err); +	dbg_dump_node(c, ino); +	dbg_dump_inode(c, inode); +	err = -EINVAL; +out_ino: +	kfree(ino); +out: +	ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err); +	return ERR_PTR(err); +} + +/** + * init_constants_early - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This function initialize UBIFS constants which do not need the superblock to + * be read. It also checks that the UBI volume satisfies basic UBIFS + * requirements. Returns zero in case of success and a negative error code in + * case of failure. + */ +static int init_constants_early(struct ubifs_info *c) +{ +	if (c->vi.corrupted) { +		ubifs_warn("UBI volume is corrupted - read-only mode"); +		c->ro_media = 1; +	} + +	if (c->di.ro_mode) { +		ubifs_msg("read-only UBI device"); +		c->ro_media = 1; +	} + +	if (c->vi.vol_type == UBI_STATIC_VOLUME) { +		ubifs_msg("static UBI volume - read-only mode"); +		c->ro_media = 1; +	} + +	c->leb_cnt = c->vi.size; +	c->leb_size = c->vi.usable_leb_size; +	c->half_leb_size = c->leb_size / 2; +	c->min_io_size = c->di.min_io_size; +	c->min_io_shift = fls(c->min_io_size) - 1; + +	if (c->leb_size < UBIFS_MIN_LEB_SZ) { +		ubifs_err("too small LEBs (%d bytes), min. is %d bytes", +			  c->leb_size, UBIFS_MIN_LEB_SZ); +		return -EINVAL; +	} + +	if (c->leb_cnt < UBIFS_MIN_LEB_CNT) { +		ubifs_err("too few LEBs (%d), min. is %d", +			  c->leb_cnt, UBIFS_MIN_LEB_CNT); +		return -EINVAL; +	} + +	if (!is_power_of_2(c->min_io_size)) { +		ubifs_err("bad min. I/O size %d", c->min_io_size); +		return -EINVAL; +	} + +	/* +	 * UBIFS aligns all node to 8-byte boundary, so to make function in +	 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is +	 * less than 8. +	 */ +	if (c->min_io_size < 8) { +		c->min_io_size = 8; +		c->min_io_shift = 3; +	} + +	c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); +	c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size); + +	/* +	 * Initialize node length ranges which are mostly needed for node +	 * length validation. +	 */ +	c->ranges[UBIFS_PAD_NODE].len  = UBIFS_PAD_NODE_SZ; +	c->ranges[UBIFS_SB_NODE].len   = UBIFS_SB_NODE_SZ; +	c->ranges[UBIFS_MST_NODE].len  = UBIFS_MST_NODE_SZ; +	c->ranges[UBIFS_REF_NODE].len  = UBIFS_REF_NODE_SZ; +	c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ; +	c->ranges[UBIFS_CS_NODE].len   = UBIFS_CS_NODE_SZ; + +	c->ranges[UBIFS_INO_NODE].min_len  = UBIFS_INO_NODE_SZ; +	c->ranges[UBIFS_INO_NODE].max_len  = UBIFS_MAX_INO_NODE_SZ; +	c->ranges[UBIFS_ORPH_NODE].min_len = +				UBIFS_ORPH_NODE_SZ + sizeof(__le64); +	c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size; +	c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ; +	c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ; +	c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ; +	c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ; +	c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ; +	c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ; +	/* +	 * Minimum indexing node size is amended later when superblock is +	 * read and the key length is known. +	 */ +	c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ; +	/* +	 * Maximum indexing node size is amended later when superblock is +	 * read and the fanout is known. +	 */ +	c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX; + +	/* +	 * Initialize dead and dark LEB space watermarks. See gc.c for comments +	 * about these values. +	 */ +	c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); +	c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); + +	/* +	 * Calculate how many bytes would be wasted at the end of LEB if it was +	 * fully filled with data nodes of maximum size. This is used in +	 * calculations when reporting free space. +	 */ +	c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; + +	return 0; +} + +/* + * init_constants_sb - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the superblock has been read. It also checks various UBIFS parameters and + * makes sure they are all right. Returns zero in case of success and a + * negative error code in case of failure. + */ +static int init_constants_sb(struct ubifs_info *c) +{ +	int tmp, err; +	long long tmp64; + +	c->main_bytes = (long long)c->main_lebs * c->leb_size; +	c->max_znode_sz = sizeof(struct ubifs_znode) + +				c->fanout * sizeof(struct ubifs_zbranch); + +	tmp = ubifs_idx_node_sz(c, 1); +	c->ranges[UBIFS_IDX_NODE].min_len = tmp; +	c->min_idx_node_sz = ALIGN(tmp, 8); + +	tmp = ubifs_idx_node_sz(c, c->fanout); +	c->ranges[UBIFS_IDX_NODE].max_len = tmp; +	c->max_idx_node_sz = ALIGN(tmp, 8); + +	/* Make sure LEB size is large enough to fit full commit */ +	tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt; +	tmp = ALIGN(tmp, c->min_io_size); +	if (tmp > c->leb_size) { +		dbg_err("too small LEB size %d, at least %d needed", +			c->leb_size, tmp); +		return -EINVAL; +	} + +	/* +	 * Make sure that the log is large enough to fit reference nodes for +	 * all buds plus one reserved LEB. +	 */ +	tmp64 = c->max_bud_bytes + c->leb_size - 1; +	c->max_bud_cnt = div_u64(tmp64, c->leb_size); +	tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1); +	tmp /= c->leb_size; +	tmp += 1; +	if (c->log_lebs < tmp) { +		dbg_err("too small log %d LEBs, required min. %d LEBs", +			c->log_lebs, tmp); +		return -EINVAL; +	} + +	/* +	 * When budgeting we assume worst-case scenarios when the pages are not +	 * be compressed and direntries are of the maximum size. +	 * +	 * Note, data, which may be stored in inodes is budgeted separately, so +	 * it is not included into 'c->inode_budget'. +	 */ +	c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; +	c->inode_budget = UBIFS_INO_NODE_SZ; +	c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + +	/* +	 * When the amount of flash space used by buds becomes +	 * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit. +	 * The writers are unblocked when the commit is finished. To avoid +	 * writers to be blocked UBIFS initiates background commit in advance, +	 * when number of bud bytes becomes above the limit defined below. +	 */ +	c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4; + +	/* +	 * Ensure minimum journal size. All the bytes in the journal heads are +	 * considered to be used, when calculating the current journal usage. +	 * Consequently, if the journal is too small, UBIFS will treat it as +	 * always full. +	 */ +	tmp64 = (long long)(c->jhead_cnt + 1) * c->leb_size + 1; +	if (c->bg_bud_bytes < tmp64) +		c->bg_bud_bytes = tmp64; +	if (c->max_bud_bytes < tmp64 + c->leb_size) +		c->max_bud_bytes = tmp64 + c->leb_size; + +	err = ubifs_calc_lpt_geom(c); +	if (err) +		return err; + +	return 0; +} + +/* + * init_constants_master - initialize UBIFS constants. + * @c: UBIFS file-system description object + * + * This is a helper function which initializes various UBIFS constants after + * the master node has been read. It also checks various UBIFS parameters and + * makes sure they are all right. + */ +static void init_constants_master(struct ubifs_info *c) +{ +	long long tmp64; + +	c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + +	/* +	 * Calculate total amount of FS blocks. This number is not used +	 * internally because it does not make much sense for UBIFS, but it is +	 * necessary to report something for the 'statfs()' call. +	 * +	 * Subtract the LEB reserved for GC, the LEB which is reserved for +	 * deletions, minimum LEBs for the index, and assume only one journal +	 * head is available. +	 */ +	tmp64 = c->main_lebs - 1 - 1 - MIN_INDEX_LEBS - c->jhead_cnt + 1; +	tmp64 *= (long long)c->leb_size - c->leb_overhead; +	tmp64 = ubifs_reported_space(c, tmp64); +	c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; +} + +/** + * free_orphans - free orphans. + * @c: UBIFS file-system description object + */ +static void free_orphans(struct ubifs_info *c) +{ +	struct ubifs_orphan *orph; + +	while (c->orph_dnext) { +		orph = c->orph_dnext; +		c->orph_dnext = orph->dnext; +		list_del(&orph->list); +		kfree(orph); +	} + +	while (!list_empty(&c->orph_list)) { +		orph = list_entry(c->orph_list.next, struct ubifs_orphan, list); +		list_del(&orph->list); +		kfree(orph); +		dbg_err("orphan list not empty at unmount"); +	} + +	vfree(c->orph_buf); +	c->orph_buf = NULL; +} + +/** + * check_volume_empty - check if the UBI volume is empty. + * @c: UBIFS file-system description object + * + * This function checks if the UBIFS volume is empty by looking if its LEBs are + * mapped or not. The result of checking is stored in the @c->empty variable. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int check_volume_empty(struct ubifs_info *c) +{ +	int lnum, err; + +	c->empty = 1; +	for (lnum = 0; lnum < c->leb_cnt; lnum++) { +		err = ubi_is_mapped(c->ubi, lnum); +		if (unlikely(err < 0)) +			return err; +		if (err == 1) { +			c->empty = 0; +			break; +		} + +		cond_resched(); +	} + +	return 0; +} + +/** + * mount_ubifs - mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * This function mounts UBIFS file system. Returns zero in case of success and + * a negative error code in case of failure. + * + * Note, the function does not de-allocate resources it it fails half way + * through, and the caller has to do this instead. + */ +static int mount_ubifs(struct ubifs_info *c) +{ +	struct super_block *sb = c->vfs_sb; +	int err, mounted_read_only = (sb->s_flags & MS_RDONLY); +	long long x; +	size_t sz; + +	err = init_constants_early(c); +	if (err) +		return err; + +	err = ubifs_debugging_init(c); +	if (err) +		return err; + +	err = check_volume_empty(c); +	if (err) +		goto out_free; + +	if (c->empty && (mounted_read_only || c->ro_media)) { +		/* +		 * This UBI volume is empty, and read-only, or the file system +		 * is mounted read-only - we cannot format it. +		 */ +		ubifs_err("can't format empty UBI volume: read-only %s", +			  c->ro_media ? "UBI volume" : "mount"); +		err = -EROFS; +		goto out_free; +	} + +	if (c->ro_media && !mounted_read_only) { +		ubifs_err("cannot mount read-write - read-only media"); +		err = -EROFS; +		goto out_free; +	} + +	/* +	 * The requirement for the buffer is that it should fit indexing B-tree +	 * height amount of integers. We assume the height if the TNC tree will +	 * never exceed 64. +	 */ +	err = -ENOMEM; +	c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL); +	if (!c->bottom_up_buf) +		goto out_free; + +	c->sbuf = vmalloc(c->leb_size); +	if (!c->sbuf) +		goto out_free; + +	/* +	 * We have to check all CRCs, even for data nodes, when we mount the FS +	 * (specifically, when we are replaying). +	 */ +	c->always_chk_crc = 1; + +	err = ubifs_read_superblock(c); +	if (err) +		goto out_free; + +	/* +	 * Make sure the compressor which is set as default in the superblock +	 * or overridden by mount options is actually compiled in. +	 */ +	if (!ubifs_compr_present(c->default_compr)) { +		ubifs_err("'compressor \"%s\" is not compiled in", +			  ubifs_compr_name(c->default_compr)); +		goto out_free; +	} + +	dbg_failure_mode_registration(c); + +	err = init_constants_sb(c); +	if (err) +		goto out_free; + +	sz = ALIGN(c->max_idx_node_sz, c->min_io_size); +	sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size); +	c->cbuf = kmalloc(sz, GFP_NOFS); +	if (!c->cbuf) { +		err = -ENOMEM; +		goto out_free; +	} + +	sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); + +	err = ubifs_read_master(c); +	if (err) +		goto out_master; + +	init_constants_master(c); + +	if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { +		ubifs_msg("recovery needed"); +		c->need_recovery = 1; +	} + +	err = ubifs_lpt_init(c, 1, !mounted_read_only); +	if (err) +		goto out_lpt; + +	err = dbg_check_idx_size(c, c->old_idx_sz); +	if (err) +		goto out_lpt; + +	err = ubifs_replay_journal(c); +	if (err) +		goto out_journal; + +	err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); +	if (err) +		goto out_orphans; + +	if (c->need_recovery) { +		err = ubifs_recover_size(c); +		if (err) +			goto out_orphans; +	} + +	spin_lock(&ubifs_infos_lock); +	list_add_tail(&c->infos_list, &ubifs_infos); +	spin_unlock(&ubifs_infos_lock); + +	if (c->need_recovery) { +		if (mounted_read_only) +			ubifs_msg("recovery deferred"); +		else { +			c->need_recovery = 0; +			ubifs_msg("recovery completed"); +		} +	} + +	err = dbg_check_filesystem(c); +	if (err) +		goto out_infos; + +	c->always_chk_crc = 0; + +	ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", +		  c->vi.ubi_num, c->vi.vol_id, c->vi.name); +	if (mounted_read_only) +		ubifs_msg("mounted read-only"); +	x = (long long)c->main_lebs * c->leb_size; +	ubifs_msg("file system size:   %lld bytes (%lld KiB, %lld MiB, %d " +		  "LEBs)", x, x >> 10, x >> 20, c->main_lebs); +	x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; +	ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d " +		  "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); +	ubifs_msg("media format:       %d (latest is %d)", +		  c->fmt_version, UBIFS_FORMAT_VERSION); +	ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); +	ubifs_msg("reserved for root:  %llu bytes (%llu KiB)", +		c->report_rp_size, c->report_rp_size >> 10); + +	dbg_msg("compiled on:         " __DATE__ " at " __TIME__); +	dbg_msg("min. I/O unit size:  %d bytes", c->min_io_size); +	dbg_msg("LEB size:            %d bytes (%d KiB)", +		c->leb_size, c->leb_size >> 10); +	dbg_msg("data journal heads:  %d", +		c->jhead_cnt - NONDATA_JHEADS_CNT); +	dbg_msg("UUID:                %02X%02X%02X%02X-%02X%02X" +	       "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", +	       c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], +	       c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], +	       c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], +	       c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); +	dbg_msg("big_lpt              %d", c->big_lpt); +	dbg_msg("log LEBs:            %d (%d - %d)", +		c->log_lebs, UBIFS_LOG_LNUM, c->log_last); +	dbg_msg("LPT area LEBs:       %d (%d - %d)", +		c->lpt_lebs, c->lpt_first, c->lpt_last); +	dbg_msg("orphan area LEBs:    %d (%d - %d)", +		c->orph_lebs, c->orph_first, c->orph_last); +	dbg_msg("main area LEBs:      %d (%d - %d)", +		c->main_lebs, c->main_first, c->leb_cnt - 1); +	dbg_msg("index LEBs:          %d", c->lst.idx_lebs); +	dbg_msg("total index bytes:   %lld (%lld KiB, %lld MiB)", +		c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); +	dbg_msg("key hash type:       %d", c->key_hash_type); +	dbg_msg("tree fanout:         %d", c->fanout); +	dbg_msg("reserved GC LEB:     %d", c->gc_lnum); +	dbg_msg("first main LEB:      %d", c->main_first); +	dbg_msg("max. znode size      %d", c->max_znode_sz); +	dbg_msg("max. index node size %d", c->max_idx_node_sz); +	dbg_msg("node sizes:          data %zu, inode %zu, dentry %zu", +		UBIFS_DATA_NODE_SZ, UBIFS_INO_NODE_SZ, UBIFS_DENT_NODE_SZ); +	dbg_msg("node sizes:          trun %zu, sb %zu, master %zu", +		UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); +	dbg_msg("node sizes:          ref %zu, cmt. start %zu, orph %zu", +		UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); +	dbg_msg("max. node sizes:     data %zu, inode %zu dentry %zu", +	        UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, +		UBIFS_MAX_DENT_NODE_SZ); +	dbg_msg("dead watermark:      %d", c->dead_wm); +	dbg_msg("dark watermark:      %d", c->dark_wm); +	dbg_msg("LEB overhead:        %d", c->leb_overhead); +	x = (long long)c->main_lebs * c->dark_wm; +	dbg_msg("max. dark space:     %lld (%lld KiB, %lld MiB)", +		x, x >> 10, x >> 20); +	dbg_msg("maximum bud bytes:   %lld (%lld KiB, %lld MiB)", +		c->max_bud_bytes, c->max_bud_bytes >> 10, +		c->max_bud_bytes >> 20); +	dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)", +		c->bg_bud_bytes, c->bg_bud_bytes >> 10, +		c->bg_bud_bytes >> 20); +	dbg_msg("current bud bytes    %lld (%lld KiB, %lld MiB)", +		c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20); +	dbg_msg("max. seq. number:    %llu", c->max_sqnum); +	dbg_msg("commit number:       %llu", c->cmt_no); + +	return 0; + +out_infos: +	spin_lock(&ubifs_infos_lock); +	list_del(&c->infos_list); +	spin_unlock(&ubifs_infos_lock); +out_orphans: +	free_orphans(c); +out_journal: +out_lpt: +	ubifs_lpt_free(c, 0); +out_master: +	kfree(c->mst_node); +	kfree(c->rcvrd_mst_node); +	if (c->bgt) +		kthread_stop(c->bgt); +	kfree(c->cbuf); +out_free: +	vfree(c->ileb_buf); +	vfree(c->sbuf); +	kfree(c->bottom_up_buf); +	ubifs_debugging_exit(c); +	return err; +} + +/** + * ubifs_umount - un-mount UBIFS file-system. + * @c: UBIFS file-system description object + * + * Note, this function is called to free allocated resourced when un-mounting, + * as well as free resources when an error occurred while we were half way + * through mounting (error path cleanup function). So it has to make sure the + * resource was actually allocated before freeing it. + */ +static void ubifs_umount(struct ubifs_info *c) +{ +	dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num, +		c->vi.vol_id); + +	spin_lock(&ubifs_infos_lock); +	list_del(&c->infos_list); +	spin_unlock(&ubifs_infos_lock); + +	if (c->bgt) +		kthread_stop(c->bgt); + +	free_orphans(c); +	ubifs_lpt_free(c, 0); + +	kfree(c->cbuf); +	kfree(c->rcvrd_mst_node); +	kfree(c->mst_node); +	vfree(c->ileb_buf); +	vfree(c->sbuf); +	kfree(c->bottom_up_buf); +	ubifs_debugging_exit(c); + +	/* Finally free U-Boot's global copy of superblock */ +	free(ubifs_sb->s_fs_info); +	free(ubifs_sb); +} + +/** + * open_ubi - parse UBI device name string and open the UBI device. + * @name: UBI volume name + * @mode: UBI volume open mode + * + * There are several ways to specify UBI volumes when mounting UBIFS: + * o ubiX_Y    - UBI device number X, volume Y; + * o ubiY      - UBI device number 0, volume Y; + * o ubiX:NAME - mount UBI device X, volume with name NAME; + * o ubi:NAME  - mount UBI device 0, volume with name NAME. + * + * Alternative '!' separator may be used instead of ':' (because some shells + * like busybox may interpret ':' as an NFS host name separator). This function + * returns ubi volume object in case of success and a negative error code in + * case of failure. + */ +static struct ubi_volume_desc *open_ubi(const char *name, int mode) +{ +	int dev, vol; +	char *endptr; + +	if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') +		return ERR_PTR(-EINVAL); + +	/* ubi:NAME method */ +	if ((name[3] == ':' || name[3] == '!') && name[4] != '\0') +		return ubi_open_volume_nm(0, name + 4, mode); + +	if (!isdigit(name[3])) +		return ERR_PTR(-EINVAL); + +	dev = simple_strtoul(name + 3, &endptr, 0); + +	/* ubiY method */ +	if (*endptr == '\0') +		return ubi_open_volume(0, dev, mode); + +	/* ubiX_Y method */ +	if (*endptr == '_' && isdigit(endptr[1])) { +		vol = simple_strtoul(endptr + 1, &endptr, 0); +		if (*endptr != '\0') +			return ERR_PTR(-EINVAL); +		return ubi_open_volume(dev, vol, mode); +	} + +	/* ubiX:NAME method */ +	if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0') +		return ubi_open_volume_nm(dev, ++endptr, mode); + +	return ERR_PTR(-EINVAL); +} + +static int ubifs_fill_super(struct super_block *sb, void *data, int silent) +{ +	struct ubi_volume_desc *ubi = sb->s_fs_info; +	struct ubifs_info *c; +	struct inode *root; +	int err; + +	c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); +	if (!c) +		return -ENOMEM; + +	spin_lock_init(&c->cnt_lock); +	spin_lock_init(&c->cs_lock); +	spin_lock_init(&c->buds_lock); +	spin_lock_init(&c->space_lock); +	spin_lock_init(&c->orphan_lock); +	init_rwsem(&c->commit_sem); +	mutex_init(&c->lp_mutex); +	mutex_init(&c->tnc_mutex); +	mutex_init(&c->log_mutex); +	mutex_init(&c->mst_mutex); +	mutex_init(&c->umount_mutex); +	init_waitqueue_head(&c->cmt_wq); +	c->buds = RB_ROOT; +	c->old_idx = RB_ROOT; +	c->size_tree = RB_ROOT; +	c->orph_tree = RB_ROOT; +	INIT_LIST_HEAD(&c->infos_list); +	INIT_LIST_HEAD(&c->idx_gc); +	INIT_LIST_HEAD(&c->replay_list); +	INIT_LIST_HEAD(&c->replay_buds); +	INIT_LIST_HEAD(&c->uncat_list); +	INIT_LIST_HEAD(&c->empty_list); +	INIT_LIST_HEAD(&c->freeable_list); +	INIT_LIST_HEAD(&c->frdi_idx_list); +	INIT_LIST_HEAD(&c->unclean_leb_list); +	INIT_LIST_HEAD(&c->old_buds); +	INIT_LIST_HEAD(&c->orph_list); +	INIT_LIST_HEAD(&c->orph_new); + +	c->highest_inum = UBIFS_FIRST_INO; +	c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + +	ubi_get_volume_info(ubi, &c->vi); +	ubi_get_device_info(c->vi.ubi_num, &c->di); + +	/* Re-open the UBI device in read-write mode */ +	c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); +	if (IS_ERR(c->ubi)) { +		err = PTR_ERR(c->ubi); +		goto out_free; +	} + +	c->vfs_sb = sb; + +	sb->s_fs_info = c; +	sb->s_magic = UBIFS_SUPER_MAGIC; +	sb->s_blocksize = UBIFS_BLOCK_SIZE; +	sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; +	sb->s_dev = c->vi.cdev; +	sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); +	if (c->max_inode_sz > MAX_LFS_FILESIZE) +		sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; + +	mutex_lock(&c->umount_mutex); +	err = mount_ubifs(c); +	if (err) { +		ubifs_assert(err < 0); +		goto out_unlock; +	} + +	/* Read the root inode */ +	root = ubifs_iget(sb, UBIFS_ROOT_INO); +	if (IS_ERR(root)) { +		err = PTR_ERR(root); +		goto out_umount; +	} + +	sb->s_root = NULL; + +	mutex_unlock(&c->umount_mutex); +	return 0; + +out_umount: +	ubifs_umount(c); +out_unlock: +	mutex_unlock(&c->umount_mutex); +	ubi_close_volume(c->ubi); +out_free: +	kfree(c); +	return err; +} + +static int sb_test(struct super_block *sb, void *data) +{ +	dev_t *dev = data; + +	return sb->s_dev == *dev; +} + +static int ubifs_get_sb(struct file_system_type *fs_type, int flags, +			const char *name, void *data, struct vfsmount *mnt) +{ +	struct ubi_volume_desc *ubi; +	struct ubi_volume_info vi; +	struct super_block *sb; +	int err; + +	dbg_gen("name %s, flags %#x", name, flags); + +	/* +	 * Get UBI device number and volume ID. Mount it read-only so far +	 * because this might be a new mount point, and UBI allows only one +	 * read-write user at a time. +	 */ +	ubi = open_ubi(name, UBI_READONLY); +	if (IS_ERR(ubi)) { +		ubifs_err("cannot open \"%s\", error %d", +			  name, (int)PTR_ERR(ubi)); +		return PTR_ERR(ubi); +	} +	ubi_get_volume_info(ubi, &vi); + +	dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + +	sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); +	if (IS_ERR(sb)) { +		err = PTR_ERR(sb); +		goto out_close; +	} + +	if (sb->s_root) { +		/* A new mount point for already mounted UBIFS */ +		dbg_gen("this ubi volume is already mounted"); +		if ((flags ^ sb->s_flags) & MS_RDONLY) { +			err = -EBUSY; +			goto out_deact; +		} +	} else { +		sb->s_flags = flags; +		/* +		 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is +		 * replaced by 'c'. +		 */ +		sb->s_fs_info = ubi; +		err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); +		if (err) +			goto out_deact; +		/* We do not support atime */ +		sb->s_flags |= MS_ACTIVE | MS_NOATIME; +	} + +	/* 'fill_super()' opens ubi again so we must close it here */ +	ubi_close_volume(ubi); + +	ubifs_sb = sb; +	return 0; + +out_deact: +	up_write(&sb->s_umount); +out_close: +	ubi_close_volume(ubi); +	return err; +} + +int __init ubifs_init(void) +{ +	int err; + +	BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24); + +	/* Make sure node sizes are 8-byte aligned */ +	BUILD_BUG_ON(UBIFS_CH_SZ        & 7); +	BUILD_BUG_ON(UBIFS_INO_NODE_SZ  & 7); +	BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_SB_NODE_SZ   & 7); +	BUILD_BUG_ON(UBIFS_MST_NODE_SZ  & 7); +	BUILD_BUG_ON(UBIFS_REF_NODE_SZ  & 7); +	BUILD_BUG_ON(UBIFS_CS_NODE_SZ   & 7); +	BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7); + +	BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7); +	BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ  & 7); +	BUILD_BUG_ON(UBIFS_MAX_NODE_SZ      & 7); +	BUILD_BUG_ON(MIN_WRITE_SZ           & 7); + +	/* Check min. node size */ +	BUILD_BUG_ON(UBIFS_INO_NODE_SZ  < MIN_WRITE_SZ); +	BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ); +	BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ); +	BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ); + +	BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ); +	BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ); +	BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ); +	BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ  > UBIFS_MAX_NODE_SZ); + +	/* Defined node sizes */ +	BUILD_BUG_ON(UBIFS_SB_NODE_SZ  != 4096); +	BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512); +	BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160); +	BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64); + +	/* +	 * We use 2 bit wide bit-fields to store compression type, which should +	 * be amended if more compressors are added. The bit-fields are: +	 * @compr_type in 'struct ubifs_inode', @default_compr in +	 * 'struct ubifs_info' and @compr_type in 'struct ubifs_mount_opts'. +	 */ +	BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4); + +	/* +	 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to +	 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2. +	 */ +	if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) { +		ubifs_err("VFS page cache size is %u bytes, but UBIFS requires" +			  " at least 4096 bytes", +			  (unsigned int)PAGE_CACHE_SIZE); +		return -EINVAL; +	} + +	err = -ENOMEM; + +	err = ubifs_compressors_init(); +	if (err) +		goto out_shrinker; + +	return 0; + +out_shrinker: +	return err; +} + +/* + * ubifsmount... + */ + +static struct file_system_type ubifs_fs_type = { +	.name    = "ubifs", +	.owner   = THIS_MODULE, +	.get_sb  = ubifs_get_sb, +}; + +int ubifs_mount(char *vol_name) +{ +	int flags; +	char name[80] = "ubi:"; +	void *data; +	struct vfsmount *mnt; +	int ret; +	struct ubifs_info *c; + +	/* +	 * First unmount if allready mounted +	 */ +	if (ubifs_sb) +		ubifs_umount(ubifs_sb->s_fs_info); + +	INIT_LIST_HEAD(&ubifs_infos); + +	/* +	 * Mount in read-only mode +	 */ +	flags = MS_RDONLY; +	strcat(name, vol_name); +	data = NULL; +	mnt = NULL; +	ret = ubifs_get_sb(&ubifs_fs_type, flags, name, data, mnt); +	if (ret) { +		printf("Error reading superblock on volume '%s'!\n", name); +		return -1; +	} + +	c = ubifs_sb->s_fs_info; +	ubi_close_volume(c->ubi); + +	return 0; +} diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c new file mode 100644 index 000000000..ccda9387b --- /dev/null +++ b/fs/ubifs/tnc.c @@ -0,0 +1,2767 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file implements TNC (Tree Node Cache) which caches indexing nodes of + * the UBIFS B-tree. + * + * At the moment the locking rules of the TNC tree are quite simple and + * straightforward. We just have a mutex and lock it when we traverse the + * tree. If a znode is not in memory, we read it from flash while still having + * the mutex locked. + */ + +#include "ubifs.h" + +/* + * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions. + * @NAME_LESS: name corresponding to the first argument is less than second + * @NAME_MATCHES: names match + * @NAME_GREATER: name corresponding to the second argument is greater than + *                first + * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media + * + * These constants were introduce to improve readability. + */ +enum { +	NAME_LESS    = 0, +	NAME_MATCHES = 1, +	NAME_GREATER = 2, +	NOT_ON_MEDIA = 3, +}; + +/** + * insert_old_idx - record an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + * + * For recovery, there must always be a complete intact version of the index on + * flash at all times. That is called the "old index". It is the index as at the + * time of the last successful commit. Many of the index nodes in the old index + * may be dirty, but they must not be erased until the next successful commit + * (at which point that index becomes the old index). + * + * That means that the garbage collection and the in-the-gaps method of + * committing must be able to determine if an index node is in the old index. + * Most of the old index nodes can be found by looking up the TNC using the + * 'lookup_znode()' function. However, some of the old index nodes may have + * been deleted from the current index or may have been changed so much that + * they cannot be easily found. In those cases, an entry is added to an RB-tree. + * That is what this function does. The RB-tree is ordered by LEB number and + * offset because they uniquely identify the old index node. + */ +static int insert_old_idx(struct ubifs_info *c, int lnum, int offs) +{ +	struct ubifs_old_idx *old_idx, *o; +	struct rb_node **p, *parent = NULL; + +	old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS); +	if (unlikely(!old_idx)) +		return -ENOMEM; +	old_idx->lnum = lnum; +	old_idx->offs = offs; + +	p = &c->old_idx.rb_node; +	while (*p) { +		parent = *p; +		o = rb_entry(parent, struct ubifs_old_idx, rb); +		if (lnum < o->lnum) +			p = &(*p)->rb_left; +		else if (lnum > o->lnum) +			p = &(*p)->rb_right; +		else if (offs < o->offs) +			p = &(*p)->rb_left; +		else if (offs > o->offs) +			p = &(*p)->rb_right; +		else { +			ubifs_err("old idx added twice!"); +			kfree(old_idx); +			return 0; +		} +	} +	rb_link_node(&old_idx->rb, parent, p); +	rb_insert_color(&old_idx->rb, &c->old_idx); +	return 0; +} + +/** + * insert_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode) +{ +	if (znode->parent) { +		struct ubifs_zbranch *zbr; + +		zbr = &znode->parent->zbranch[znode->iip]; +		if (zbr->len) +			return insert_old_idx(c, zbr->lnum, zbr->offs); +	} else +		if (c->zroot.len) +			return insert_old_idx(c, c->zroot.lnum, +					      c->zroot.offs); +	return 0; +} + +/** + * ins_clr_old_idx_znode - record a znode obsoleted since last commit start. + * @c: UBIFS file-system description object + * @znode: znode of obsoleted index node + * + * Returns %0 on success, and a negative error code on failure. + */ +static int ins_clr_old_idx_znode(struct ubifs_info *c, +				 struct ubifs_znode *znode) +{ +	int err; + +	if (znode->parent) { +		struct ubifs_zbranch *zbr; + +		zbr = &znode->parent->zbranch[znode->iip]; +		if (zbr->len) { +			err = insert_old_idx(c, zbr->lnum, zbr->offs); +			if (err) +				return err; +			zbr->lnum = 0; +			zbr->offs = 0; +			zbr->len = 0; +		} +	} else +		if (c->zroot.len) { +			err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs); +			if (err) +				return err; +			c->zroot.lnum = 0; +			c->zroot.offs = 0; +			c->zroot.len = 0; +		} +	return 0; +} + +/** + * destroy_old_idx - destroy the old_idx RB-tree. + * @c: UBIFS file-system description object + * + * During start commit, the old_idx RB-tree is used to avoid overwriting index + * nodes that were in the index last commit but have since been deleted.  This + * is necessary for recovery i.e. the old index must be kept intact until the + * new index is successfully written.  The old-idx RB-tree is used for the + * in-the-gaps method of writing index nodes and is destroyed every commit. + */ +void destroy_old_idx(struct ubifs_info *c) +{ +	struct rb_node *this = c->old_idx.rb_node; +	struct ubifs_old_idx *old_idx; + +	while (this) { +		if (this->rb_left) { +			this = this->rb_left; +			continue; +		} else if (this->rb_right) { +			this = this->rb_right; +			continue; +		} +		old_idx = rb_entry(this, struct ubifs_old_idx, rb); +		this = rb_parent(this); +		if (this) { +			if (this->rb_left == &old_idx->rb) +				this->rb_left = NULL; +			else +				this->rb_right = NULL; +		} +		kfree(old_idx); +	} +	c->old_idx = RB_ROOT; +} + +/** + * copy_znode - copy a dirty znode. + * @c: UBIFS file-system description object + * @znode: znode to copy + * + * A dirty znode being committed may not be changed, so it is copied. + */ +static struct ubifs_znode *copy_znode(struct ubifs_info *c, +				      struct ubifs_znode *znode) +{ +	struct ubifs_znode *zn; + +	zn = kmalloc(c->max_znode_sz, GFP_NOFS); +	if (unlikely(!zn)) +		return ERR_PTR(-ENOMEM); + +	memcpy(zn, znode, c->max_znode_sz); +	zn->cnext = NULL; +	__set_bit(DIRTY_ZNODE, &zn->flags); +	__clear_bit(COW_ZNODE, &zn->flags); + +	ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); +	__set_bit(OBSOLETE_ZNODE, &znode->flags); + +	if (znode->level != 0) { +		int i; +		const int n = zn->child_cnt; + +		/* The children now have new parent */ +		for (i = 0; i < n; i++) { +			struct ubifs_zbranch *zbr = &zn->zbranch[i]; + +			if (zbr->znode) +				zbr->znode->parent = zn; +		} +	} + +	atomic_long_inc(&c->dirty_zn_cnt); +	return zn; +} + +/** + * add_idx_dirt - add dirt due to a dirty znode. + * @c: UBIFS file-system description object + * @lnum: LEB number of index node + * @dirt: size of index node + * + * This function updates lprops dirty space and the new size of the index. + */ +static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt) +{ +	c->calc_idx_sz -= ALIGN(dirt, 8); +	return ubifs_add_dirt(c, lnum, dirt); +} + +/** + * dirty_cow_znode - ensure a znode is not being committed. + * @c: UBIFS file-system description object + * @zbr: branch of znode to check + * + * Returns dirtied znode on success or negative error code on failure. + */ +static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, +					   struct ubifs_zbranch *zbr) +{ +	struct ubifs_znode *znode = zbr->znode; +	struct ubifs_znode *zn; +	int err; + +	if (!test_bit(COW_ZNODE, &znode->flags)) { +		/* znode is not being committed */ +		if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { +			atomic_long_inc(&c->dirty_zn_cnt); +			atomic_long_dec(&c->clean_zn_cnt); +			atomic_long_dec(&ubifs_clean_zn_cnt); +			err = add_idx_dirt(c, zbr->lnum, zbr->len); +			if (unlikely(err)) +				return ERR_PTR(err); +		} +		return znode; +	} + +	zn = copy_znode(c, znode); +	if (IS_ERR(zn)) +		return zn; + +	if (zbr->len) { +		err = insert_old_idx(c, zbr->lnum, zbr->offs); +		if (unlikely(err)) +			return ERR_PTR(err); +		err = add_idx_dirt(c, zbr->lnum, zbr->len); +	} else +		err = 0; + +	zbr->znode = zn; +	zbr->lnum = 0; +	zbr->offs = 0; +	zbr->len = 0; + +	if (unlikely(err)) +		return ERR_PTR(err); +	return zn; +} + +/** + * lnc_add - add a leaf node to the leaf node cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * Leaf nodes are non-index nodes directory entry nodes or data nodes. The + * purpose of the leaf node cache is to save re-reading the same leaf node over + * and over again. Most things are cached by VFS, however the file system must + * cache directory entries for readdir and for resolving hash collisions. The + * present implementation of the leaf node cache is extremely simple, and + * allows for error returns that are not used but that may be needed if a more + * complex implementation is created. + * + * Note, this function does not add the @node object to LNC directly, but + * allocates a copy of the object and adds the copy to LNC. The reason for this + * is that @node has been allocated outside of the TNC subsystem and will be + * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC + * may be changed at any time, e.g. freed by the shrinker. + */ +static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, +		   const void *node) +{ +	int err; +	void *lnc_node; +	const struct ubifs_dent_node *dent = node; + +	ubifs_assert(!zbr->leaf); +	ubifs_assert(zbr->len != 0); +	ubifs_assert(is_hash_key(c, &zbr->key)); + +	err = ubifs_validate_entry(c, dent); +	if (err) { +		dbg_dump_stack(); +		dbg_dump_node(c, dent); +		return err; +	} + +	lnc_node = kmalloc(zbr->len, GFP_NOFS); +	if (!lnc_node) +		/* We don't have to have the cache, so no error */ +		return 0; + +	memcpy(lnc_node, node, zbr->len); +	zbr->leaf = lnc_node; +	return 0; +} + + /** + * lnc_add_directly - add a leaf node to the leaf-node-cache. + * @c: UBIFS file-system description object + * @zbr: zbranch of leaf node + * @node: leaf node + * + * This function is similar to 'lnc_add()', but it does not create a copy of + * @node but inserts @node to TNC directly. + */ +static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, +			    void *node) +{ +	int err; + +	ubifs_assert(!zbr->leaf); +	ubifs_assert(zbr->len != 0); + +	err = ubifs_validate_entry(c, node); +	if (err) { +		dbg_dump_stack(); +		dbg_dump_node(c, node); +		return err; +	} + +	zbr->leaf = node; +	return 0; +} + +/** + * lnc_free - remove a leaf node from the leaf node cache. + * @zbr: zbranch of leaf node + * @node: leaf node + */ +static void lnc_free(struct ubifs_zbranch *zbr) +{ +	if (!zbr->leaf) +		return; +	kfree(zbr->leaf); +	zbr->leaf = NULL; +} + +/** + * tnc_read_node_nm - read a "hashed" leaf node. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a "hashed" node defined by @zbr from the leaf node cache + * (in it is there) or from the hash media, in which case the node is also + * added to LNC. Returns zero in case of success or a negative negative error + * code in case of failure. + */ +static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, +			    void *node) +{ +	int err; + +	ubifs_assert(is_hash_key(c, &zbr->key)); + +	if (zbr->leaf) { +		/* Read from the leaf node cache */ +		ubifs_assert(zbr->len != 0); +		memcpy(node, zbr->leaf, zbr->len); +		return 0; +	} + +	err = ubifs_tnc_read_node(c, zbr, node); +	if (err) +		return err; + +	/* Add the node to the leaf node cache */ +	err = lnc_add(c, zbr, node); +	return err; +} + +/** + * try_read_node - read a node if it is a node. + * @c: UBIFS file-system description object + * @buf: buffer to read to + * @type: node type + * @len: node length (not aligned) + * @lnum: LEB number of node to read + * @offs: offset of node to read + * + * This function tries to read a node of known type and length, checks it and + * stores it in @buf. This function returns %1 if a node is present and %0 if + * a node is not present. A negative error code is returned for I/O errors. + * This function performs that same function as ubifs_read_node except that + * it does not require that there is actually a node present and instead + * the return code indicates if a node was read. + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if + * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always + * checked. + */ +static int try_read_node(const struct ubifs_info *c, void *buf, int type, +			 int len, int lnum, int offs) +{ +	int err, node_len; +	struct ubifs_ch *ch = buf; +	uint32_t crc, node_crc; + +	dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + +	err = ubi_read(c->ubi, lnum, buf, offs, len); +	if (err) { +		ubifs_err("cannot read node type %d from LEB %d:%d, error %d", +			  type, lnum, offs, err); +		return err; +	} + +	if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) +		return 0; + +	if (ch->node_type != type) +		return 0; + +	node_len = le32_to_cpu(ch->len); +	if (node_len != len) +		return 0; + +	if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) +		return 1; + +	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +	node_crc = le32_to_cpu(ch->crc); +	if (crc != node_crc) +		return 0; + +	return 1; +} + +/** + * fallible_read_node - try to read a leaf node. + * @c: UBIFS file-system description object + * @key:  key of node to read + * @zbr:  position of node + * @node: node returned + * + * This function tries to read a node and returns %1 if the node is read, %0 + * if the node is not present, and a negative error code in the case of error. + */ +static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, +			      struct ubifs_zbranch *zbr, void *node) +{ +	int ret; + +	dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); + +	ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, +			    zbr->offs); +	if (ret == 1) { +		union ubifs_key node_key; +		struct ubifs_dent_node *dent = node; + +		/* All nodes have key in the same place */ +		key_read(c, &dent->key, &node_key); +		if (keys_cmp(c, key, &node_key) != 0) +			ret = 0; +	} +	if (ret == 0 && c->replaying) +		dbg_mnt("dangling branch LEB %d:%d len %d, key %s", +			zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); +	return ret; +} + +/** + * matches_name - determine if a direntry or xattr entry matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by + * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case + * of failure, a negative error code is returned. + */ +static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr, +			const struct qstr *nm) +{ +	struct ubifs_dent_node *dent; +	int nlen, err; + +	/* If possible, match against the dent in the leaf node cache */ +	if (!zbr->leaf) { +		dent = kmalloc(zbr->len, GFP_NOFS); +		if (!dent) +			return -ENOMEM; + +		err = ubifs_tnc_read_node(c, zbr, dent); +		if (err) +			goto out_free; + +		/* Add the node to the leaf node cache */ +		err = lnc_add_directly(c, zbr, dent); +		if (err) +			goto out_free; +	} else +		dent = zbr->leaf; + +	nlen = le16_to_cpu(dent->nlen); +	err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); +	if (err == 0) { +		if (nlen == nm->len) +			return NAME_MATCHES; +		else if (nlen < nm->len) +			return NAME_LESS; +		else +			return NAME_GREATER; +	} else if (err < 0) +		return NAME_LESS; +	else +		return NAME_GREATER; + +out_free: +	kfree(dent); +	return err; +} + +/** + * get_znode - get a TNC znode that may not be loaded yet. + * @c: UBIFS file-system description object + * @znode: parent znode + * @n: znode branch slot number + * + * This function returns the znode or a negative error code. + */ +static struct ubifs_znode *get_znode(struct ubifs_info *c, +				     struct ubifs_znode *znode, int n) +{ +	struct ubifs_zbranch *zbr; + +	zbr = &znode->zbranch[n]; +	if (zbr->znode) +		znode = zbr->znode; +	else +		znode = ubifs_load_znode(c, zbr, znode, n); +	return znode; +} + +/** + * tnc_next - find next TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is passed and returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the next TNC entry is found, %-ENOENT if there is + * no next entry, or a negative error code otherwise. + */ +static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ +	struct ubifs_znode *znode = *zn; +	int nn = *n; + +	nn += 1; +	if (nn < znode->child_cnt) { +		*n = nn; +		return 0; +	} +	while (1) { +		struct ubifs_znode *zp; + +		zp = znode->parent; +		if (!zp) +			return -ENOENT; +		nn = znode->iip + 1; +		znode = zp; +		if (nn < znode->child_cnt) { +			znode = get_znode(c, znode, nn); +			if (IS_ERR(znode)) +				return PTR_ERR(znode); +			while (znode->level != 0) { +				znode = get_znode(c, znode, 0); +				if (IS_ERR(znode)) +					return PTR_ERR(znode); +			} +			nn = 0; +			break; +		} +	} +	*zn = znode; +	*n = nn; +	return 0; +} + +/** + * tnc_prev - find previous TNC entry. + * @c: UBIFS file-system description object + * @zn: znode is returned here + * @n: znode branch slot number is passed and returned here + * + * This function returns %0 if the previous TNC entry is found, %-ENOENT if + * there is no next entry, or a negative error code otherwise. + */ +static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n) +{ +	struct ubifs_znode *znode = *zn; +	int nn = *n; + +	if (nn > 0) { +		*n = nn - 1; +		return 0; +	} +	while (1) { +		struct ubifs_znode *zp; + +		zp = znode->parent; +		if (!zp) +			return -ENOENT; +		nn = znode->iip - 1; +		znode = zp; +		if (nn >= 0) { +			znode = get_znode(c, znode, nn); +			if (IS_ERR(znode)) +				return PTR_ERR(znode); +			while (znode->level != 0) { +				nn = znode->child_cnt - 1; +				znode = get_znode(c, znode, nn); +				if (IS_ERR(znode)) +					return PTR_ERR(znode); +			} +			nn = znode->child_cnt - 1; +			break; +		} +	} +	*zn = znode; +	*n = nn; +	return 0; +} + +/** + * resolve_collision - resolve a collision. + * @c: UBIFS file-system description object + * @key: key of a directory or extended attribute entry + * @zn: znode is returned here + * @n: zbranch number is passed and returned here + * @nm: name of the entry + * + * This function is called for "hashed" keys to make sure that the found key + * really corresponds to the looked up node (directory or extended attribute + * entry). It returns %1 and sets @zn and @n if the collision is resolved. + * %0 is returned if @nm is not found and @zn and @n are set to the previous + * entry, i.e. to the entry after which @nm could follow if it were in TNC. + * This means that @n may be set to %-1 if the leftmost key in @zn is the + * previous one. A negative error code is returned on failures. + */ +static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key, +			     struct ubifs_znode **zn, int *n, +			     const struct qstr *nm) +{ +	int err; + +	err = matches_name(c, &(*zn)->zbranch[*n], nm); +	if (unlikely(err < 0)) +		return err; +	if (err == NAME_MATCHES) +		return 1; + +	if (err == NAME_GREATER) { +		/* Look left */ +		while (1) { +			err = tnc_prev(c, zn, n); +			if (err == -ENOENT) { +				ubifs_assert(*n == 0); +				*n = -1; +				return 0; +			} +			if (err < 0) +				return err; +			if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { +				/* +				 * We have found the branch after which we would +				 * like to insert, but inserting in this znode +				 * may still be wrong. Consider the following 3 +				 * znodes, in the case where we are resolving a +				 * collision with Key2. +				 * +				 *                  znode zp +				 *            ---------------------- +				 * level 1     |  Key0  |  Key1  | +				 *            ----------------------- +				 *                 |            | +				 *       znode za  |            |  znode zb +				 *          ------------      ------------ +				 * level 0  |  Key0  |        |  Key2  | +				 *          ------------      ------------ +				 * +				 * The lookup finds Key2 in znode zb. Lets say +				 * there is no match and the name is greater so +				 * we look left. When we find Key0, we end up +				 * here. If we return now, we will insert into +				 * znode za at slot n = 1.  But that is invalid +				 * according to the parent's keys.  Key2 must +				 * be inserted into znode zb. +				 * +				 * Note, this problem is not relevant for the +				 * case when we go right, because +				 * 'tnc_insert()' would correct the parent key. +				 */ +				if (*n == (*zn)->child_cnt - 1) { +					err = tnc_next(c, zn, n); +					if (err) { +						/* Should be impossible */ +						ubifs_assert(0); +						if (err == -ENOENT) +							err = -EINVAL; +						return err; +					} +					ubifs_assert(*n == 0); +					*n = -1; +				} +				return 0; +			} +			err = matches_name(c, &(*zn)->zbranch[*n], nm); +			if (err < 0) +				return err; +			if (err == NAME_LESS) +				return 0; +			if (err == NAME_MATCHES) +				return 1; +			ubifs_assert(err == NAME_GREATER); +		} +	} else { +		int nn = *n; +		struct ubifs_znode *znode = *zn; + +		/* Look right */ +		while (1) { +			err = tnc_next(c, &znode, &nn); +			if (err == -ENOENT) +				return 0; +			if (err < 0) +				return err; +			if (keys_cmp(c, &znode->zbranch[nn].key, key)) +				return 0; +			err = matches_name(c, &znode->zbranch[nn], nm); +			if (err < 0) +				return err; +			if (err == NAME_GREATER) +				return 0; +			*zn = znode; +			*n = nn; +			if (err == NAME_MATCHES) +				return 1; +			ubifs_assert(err == NAME_LESS); +		} +	} +} + +/** + * fallible_matches_name - determine if a dent matches a given name. + * @c: UBIFS file-system description object + * @zbr: zbranch of dent + * @nm: name to match + * + * This is a "fallible" version of 'matches_name()' function which does not + * panic if the direntry/xentry referred by @zbr does not exist on the media. + * + * This function checks if xentry/direntry referred by zbranch @zbr matches name + * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr + * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA + * if xentry/direntry referred by @zbr does not exist on the media. A negative + * error code is returned in case of failure. + */ +static int fallible_matches_name(struct ubifs_info *c, +				 struct ubifs_zbranch *zbr, +				 const struct qstr *nm) +{ +	struct ubifs_dent_node *dent; +	int nlen, err; + +	/* If possible, match against the dent in the leaf node cache */ +	if (!zbr->leaf) { +		dent = kmalloc(zbr->len, GFP_NOFS); +		if (!dent) +			return -ENOMEM; + +		err = fallible_read_node(c, &zbr->key, zbr, dent); +		if (err < 0) +			goto out_free; +		if (err == 0) { +			/* The node was not present */ +			err = NOT_ON_MEDIA; +			goto out_free; +		} +		ubifs_assert(err == 1); + +		err = lnc_add_directly(c, zbr, dent); +		if (err) +			goto out_free; +	} else +		dent = zbr->leaf; + +	nlen = le16_to_cpu(dent->nlen); +	err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len)); +	if (err == 0) { +		if (nlen == nm->len) +			return NAME_MATCHES; +		else if (nlen < nm->len) +			return NAME_LESS; +		else +			return NAME_GREATER; +	} else if (err < 0) +		return NAME_LESS; +	else +		return NAME_GREATER; + +out_free: +	kfree(dent); +	return err; +} + +/** + * fallible_resolve_collision - resolve a collision even if nodes are missing. + * @c: UBIFS file-system description object + * @key: key + * @zn: znode is returned here + * @n: branch number is passed and returned here + * @nm: name of directory entry + * @adding: indicates caller is adding a key to the TNC + * + * This is a "fallible" version of the 'resolve_collision()' function which + * does not panic if one of the nodes referred to by TNC does not exist on the + * media. This may happen when replaying the journal if a deleted node was + * Garbage-collected and the commit was not done. A branch that refers to a node + * that is not present is called a dangling branch. The following are the return + * codes for this function: + *  o if @nm was found, %1 is returned and @zn and @n are set to the found + *    branch; + *  o if we are @adding and @nm was not found, %0 is returned; + *  o if we are not @adding and @nm was not found, but a dangling branch was + *    found, then %1 is returned and @zn and @n are set to the dangling branch; + *  o a negative error code is returned in case of failure. + */ +static int fallible_resolve_collision(struct ubifs_info *c, +				      const union ubifs_key *key, +				      struct ubifs_znode **zn, int *n, +				      const struct qstr *nm, int adding) +{ +	struct ubifs_znode *o_znode = NULL, *znode = *zn; +	int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n; + +	cmp = fallible_matches_name(c, &znode->zbranch[nn], nm); +	if (unlikely(cmp < 0)) +		return cmp; +	if (cmp == NAME_MATCHES) +		return 1; +	if (cmp == NOT_ON_MEDIA) { +		o_znode = znode; +		o_n = nn; +		/* +		 * We are unlucky and hit a dangling branch straight away. +		 * Now we do not really know where to go to find the needed +		 * branch - to the left or to the right. Well, let's try left. +		 */ +		unsure = 1; +	} else if (!adding) +		unsure = 1; /* Remove a dangling branch wherever it is */ + +	if (cmp == NAME_GREATER || unsure) { +		/* Look left */ +		while (1) { +			err = tnc_prev(c, zn, n); +			if (err == -ENOENT) { +				ubifs_assert(*n == 0); +				*n = -1; +				break; +			} +			if (err < 0) +				return err; +			if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) { +				/* See comments in 'resolve_collision()' */ +				if (*n == (*zn)->child_cnt - 1) { +					err = tnc_next(c, zn, n); +					if (err) { +						/* Should be impossible */ +						ubifs_assert(0); +						if (err == -ENOENT) +							err = -EINVAL; +						return err; +					} +					ubifs_assert(*n == 0); +					*n = -1; +				} +				break; +			} +			err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm); +			if (err < 0) +				return err; +			if (err == NAME_MATCHES) +				return 1; +			if (err == NOT_ON_MEDIA) { +				o_znode = *zn; +				o_n = *n; +				continue; +			} +			if (!adding) +				continue; +			if (err == NAME_LESS) +				break; +			else +				unsure = 0; +		} +	} + +	if (cmp == NAME_LESS || unsure) { +		/* Look right */ +		*zn = znode; +		*n = nn; +		while (1) { +			err = tnc_next(c, &znode, &nn); +			if (err == -ENOENT) +				break; +			if (err < 0) +				return err; +			if (keys_cmp(c, &znode->zbranch[nn].key, key)) +				break; +			err = fallible_matches_name(c, &znode->zbranch[nn], nm); +			if (err < 0) +				return err; +			if (err == NAME_GREATER) +				break; +			*zn = znode; +			*n = nn; +			if (err == NAME_MATCHES) +				return 1; +			if (err == NOT_ON_MEDIA) { +				o_znode = znode; +				o_n = nn; +			} +		} +	} + +	/* Never match a dangling branch when adding */ +	if (adding || !o_znode) +		return 0; + +	dbg_mnt("dangling match LEB %d:%d len %d %s", +		o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, +		o_znode->zbranch[o_n].len, DBGKEY(key)); +	*zn = o_znode; +	*n = o_n; +	return 1; +} + +/** + * matches_position - determine if a zbranch matches a given position. + * @zbr: zbranch of dent + * @lnum: LEB number of dent to match + * @offs: offset of dent to match + * + * This function returns %1 if @lnum:@offs matches, and %0 otherwise. + */ +static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs) +{ +	if (zbr->lnum == lnum && zbr->offs == offs) +		return 1; +	else +		return 0; +} + +/** + * resolve_collision_directly - resolve a collision directly. + * @c: UBIFS file-system description object + * @key: key of directory entry + * @zn: znode is passed and returned here + * @n: zbranch number is passed and returned here + * @lnum: LEB number of dent node to match + * @offs: offset of dent node to match + * + * This function is used for "hashed" keys to make sure the found directory or + * extended attribute entry node is what was looked for. It is used when the + * flash address of the right node is known (@lnum:@offs) which makes it much + * easier to resolve collisions (no need to read entries and match full + * names). This function returns %1 and sets @zn and @n if the collision is + * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the + * previous directory entry. Otherwise a negative error code is returned. + */ +static int resolve_collision_directly(struct ubifs_info *c, +				      const union ubifs_key *key, +				      struct ubifs_znode **zn, int *n, +				      int lnum, int offs) +{ +	struct ubifs_znode *znode; +	int nn, err; + +	znode = *zn; +	nn = *n; +	if (matches_position(&znode->zbranch[nn], lnum, offs)) +		return 1; + +	/* Look left */ +	while (1) { +		err = tnc_prev(c, &znode, &nn); +		if (err == -ENOENT) +			break; +		if (err < 0) +			return err; +		if (keys_cmp(c, &znode->zbranch[nn].key, key)) +			break; +		if (matches_position(&znode->zbranch[nn], lnum, offs)) { +			*zn = znode; +			*n = nn; +			return 1; +		} +	} + +	/* Look right */ +	znode = *zn; +	nn = *n; +	while (1) { +		err = tnc_next(c, &znode, &nn); +		if (err == -ENOENT) +			return 0; +		if (err < 0) +			return err; +		if (keys_cmp(c, &znode->zbranch[nn].key, key)) +			return 0; +		*zn = znode; +		*n = nn; +		if (matches_position(&znode->zbranch[nn], lnum, offs)) +			return 1; +	} +} + +/** + * dirty_cow_bottom_up - dirty a znode and its ancestors. + * @c: UBIFS file-system description object + * @znode: znode to dirty + * + * If we do not have a unique key that resides in a znode, then we cannot + * dirty that znode from the top down (i.e. by using lookup_level0_dirty) + * This function records the path back to the last dirty ancestor, and then + * dirties the znodes on that path. + */ +static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, +					       struct ubifs_znode *znode) +{ +	struct ubifs_znode *zp; +	int *path = c->bottom_up_buf, p = 0; + +	ubifs_assert(c->zroot.znode); +	ubifs_assert(znode); +	if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) { +		kfree(c->bottom_up_buf); +		c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int), +					   GFP_NOFS); +		if (!c->bottom_up_buf) +			return ERR_PTR(-ENOMEM); +		path = c->bottom_up_buf; +	} +	if (c->zroot.znode->level) { +		/* Go up until parent is dirty */ +		while (1) { +			int n; + +			zp = znode->parent; +			if (!zp) +				break; +			n = znode->iip; +			ubifs_assert(p < c->zroot.znode->level); +			path[p++] = n; +			if (!zp->cnext && ubifs_zn_dirty(znode)) +				break; +			znode = zp; +		} +	} + +	/* Come back down, dirtying as we go */ +	while (1) { +		struct ubifs_zbranch *zbr; + +		zp = znode->parent; +		if (zp) { +			ubifs_assert(path[p - 1] >= 0); +			ubifs_assert(path[p - 1] < zp->child_cnt); +			zbr = &zp->zbranch[path[--p]]; +			znode = dirty_cow_znode(c, zbr); +		} else { +			ubifs_assert(znode == c->zroot.znode); +			znode = dirty_cow_znode(c, &c->zroot); +		} +		if (IS_ERR(znode) || !p) +			break; +		ubifs_assert(path[p - 1] >= 0); +		ubifs_assert(path[p - 1] < znode->child_cnt); +		znode = znode->zbranch[path[p - 1]].znode; +	} + +	return znode; +} + +/** + * ubifs_lookup_level0 - search for zero-level znode. + * @c: UBIFS file-system description object + * @key:  key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + *   o exact match, i.e. the found zero-level znode contains key @key, then %1 + *     is returned and slot number of the matched branch is stored in @n; + *   o not exact match, which means that zero-level znode does not contain + *     @key, then %0 is returned and slot number of the closed branch is stored + *     in  @n; + *   o @key is so small that it is even less than the lowest key of the + *     leftmost zero-level node, then %0 is returned and %0 is stored in @n. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, +			struct ubifs_znode **zn, int *n) +{ +	int err, exact; +	struct ubifs_znode *znode; +	unsigned long time = get_seconds(); + +	dbg_tnc("search key %s", DBGKEY(key)); + +	znode = c->zroot.znode; +	if (unlikely(!znode)) { +		znode = ubifs_load_znode(c, &c->zroot, NULL, 0); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +	} + +	znode->time = time; + +	while (1) { +		struct ubifs_zbranch *zbr; + +		exact = ubifs_search_zbranch(c, znode, key, n); + +		if (znode->level == 0) +			break; + +		if (*n < 0) +			*n = 0; +		zbr = &znode->zbranch[*n]; + +		if (zbr->znode) { +			znode->time = time; +			znode = zbr->znode; +			continue; +		} + +		/* znode is not in TNC cache, load it from the media */ +		znode = ubifs_load_znode(c, zbr, znode, *n); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +	} + +	*zn = znode; +	if (exact || !is_hash_key(c, key) || *n != -1) { +		dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); +		return exact; +	} + +	/* +	 * Here is a tricky place. We have not found the key and this is a +	 * "hashed" key, which may collide. The rest of the code deals with +	 * situations like this: +	 * +	 *                  | 3 | 5 | +	 *                  /       \ +	 *          | 3 | 5 |      | 6 | 7 | (x) +	 * +	 * Or more a complex example: +	 * +	 *                | 1 | 5 | +	 *                /       \ +	 *       | 1 | 3 |         | 5 | 8 | +	 *              \           / +	 *          | 5 | 5 |   | 6 | 7 | (x) +	 * +	 * In the examples, if we are looking for key "5", we may reach nodes +	 * marked with "(x)". In this case what we have do is to look at the +	 * left and see if there is "5" key there. If there is, we have to +	 * return it. +	 * +	 * Note, this whole situation is possible because we allow to have +	 * elements which are equivalent to the next key in the parent in the +	 * children of current znode. For example, this happens if we split a +	 * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something +	 * like this: +	 *                      | 3 | 5 | +	 *                       /     \ +	 *                | 3 | 5 |   | 5 | 6 | 7 | +	 *                              ^ +	 * And this becomes what is at the first "picture" after key "5" marked +	 * with "^" is removed. What could be done is we could prohibit +	 * splitting in the middle of the colliding sequence. Also, when +	 * removing the leftmost key, we would have to correct the key of the +	 * parent node, which would introduce additional complications. Namely, +	 * if we changed the the leftmost key of the parent znode, the garbage +	 * collector would be unable to find it (GC is doing this when GC'ing +	 * indexing LEBs). Although we already have an additional RB-tree where +	 * we save such changed znodes (see 'ins_clr_old_idx_znode()') until +	 * after the commit. But anyway, this does not look easy to implement +	 * so we did not try this. +	 */ +	err = tnc_prev(c, &znode, n); +	if (err == -ENOENT) { +		dbg_tnc("found 0, lvl %d, n -1", znode->level); +		*n = -1; +		return 0; +	} +	if (unlikely(err < 0)) +		return err; +	if (keys_cmp(c, key, &znode->zbranch[*n].key)) { +		dbg_tnc("found 0, lvl %d, n -1", znode->level); +		*n = -1; +		return 0; +	} + +	dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); +	*zn = znode; +	return 1; +} + +/** + * lookup_level0_dirty - search for zero-level znode dirtying. + * @c: UBIFS file-system description object + * @key:  key to lookup + * @zn: znode is returned here + * @n: znode branch slot number is returned here + * + * This function looks up the TNC tree and search for zero-level znode which + * refers key @key. The found zero-level znode is returned in @zn. There are 3 + * cases: + *   o exact match, i.e. the found zero-level znode contains key @key, then %1 + *     is returned and slot number of the matched branch is stored in @n; + *   o not exact match, which means that zero-level znode does not contain @key + *     then %0 is returned and slot number of the closed branch is stored in + *     @n; + *   o @key is so small that it is even less than the lowest key of the + *     leftmost zero-level node, then %0 is returned and %-1 is stored in @n. + * + * Additionally all znodes in the path from the root to the located zero-level + * znode are marked as dirty. + * + * Note, when the TNC tree is traversed, some znodes may be absent, then this + * function reads corresponding indexing nodes and inserts them to TNC. In + * case of failure, a negative error code is returned. + */ +static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, +			       struct ubifs_znode **zn, int *n) +{ +	int err, exact; +	struct ubifs_znode *znode; +	unsigned long time = get_seconds(); + +	dbg_tnc("search and dirty key %s", DBGKEY(key)); + +	znode = c->zroot.znode; +	if (unlikely(!znode)) { +		znode = ubifs_load_znode(c, &c->zroot, NULL, 0); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +	} + +	znode = dirty_cow_znode(c, &c->zroot); +	if (IS_ERR(znode)) +		return PTR_ERR(znode); + +	znode->time = time; + +	while (1) { +		struct ubifs_zbranch *zbr; + +		exact = ubifs_search_zbranch(c, znode, key, n); + +		if (znode->level == 0) +			break; + +		if (*n < 0) +			*n = 0; +		zbr = &znode->zbranch[*n]; + +		if (zbr->znode) { +			znode->time = time; +			znode = dirty_cow_znode(c, zbr); +			if (IS_ERR(znode)) +				return PTR_ERR(znode); +			continue; +		} + +		/* znode is not in TNC cache, load it from the media */ +		znode = ubifs_load_znode(c, zbr, znode, *n); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +		znode = dirty_cow_znode(c, zbr); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +	} + +	*zn = znode; +	if (exact || !is_hash_key(c, key) || *n != -1) { +		dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n); +		return exact; +	} + +	/* +	 * See huge comment at 'lookup_level0_dirty()' what is the rest of the +	 * code. +	 */ +	err = tnc_prev(c, &znode, n); +	if (err == -ENOENT) { +		*n = -1; +		dbg_tnc("found 0, lvl %d, n -1", znode->level); +		return 0; +	} +	if (unlikely(err < 0)) +		return err; +	if (keys_cmp(c, key, &znode->zbranch[*n].key)) { +		*n = -1; +		dbg_tnc("found 0, lvl %d, n -1", znode->level); +		return 0; +	} + +	if (znode->cnext || !ubifs_zn_dirty(znode)) { +		znode = dirty_cow_bottom_up(c, znode); +		if (IS_ERR(znode)) +			return PTR_ERR(znode); +	} + +	dbg_tnc("found 1, lvl %d, n %d", znode->level, *n); +	*zn = znode; +	return 1; +} + +/** + * maybe_leb_gced - determine if a LEB may have been garbage collected. + * @c: UBIFS file-system description object + * @lnum: LEB number + * @gc_seq1: garbage collection sequence number + * + * This function determines if @lnum may have been garbage collected since + * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise + * %0 is returned. + */ +static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) +{ +	/* +	 * No garbage collection in the read-only U-Boot implementation +	 */ +	return 0; +} + +/** + * ubifs_tnc_locate - look up a file-system node and return it and its location. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @lnum: LEB number is returned here + * @offs: offset is returned here + * + * This function look up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. The node location can be returned in @lnum and @offs. + */ +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, +		     void *node, int *lnum, int *offs) +{ +	int found, n, err, safely = 0, gc_seq1; +	struct ubifs_znode *znode; +	struct ubifs_zbranch zbr, *zt; + +again: +	mutex_lock(&c->tnc_mutex); +	found = ubifs_lookup_level0(c, key, &znode, &n); +	if (!found) { +		err = -ENOENT; +		goto out; +	} else if (found < 0) { +		err = found; +		goto out; +	} +	zt = &znode->zbranch[n]; +	if (lnum) { +		*lnum = zt->lnum; +		*offs = zt->offs; +	} +	if (is_hash_key(c, key)) { +		/* +		 * In this case the leaf node cache gets used, so we pass the +		 * address of the zbranch and keep the mutex locked +		 */ +		err = tnc_read_node_nm(c, zt, node); +		goto out; +	} +	if (safely) { +		err = ubifs_tnc_read_node(c, zt, node); +		goto out; +	} +	/* Drop the TNC mutex prematurely and race with garbage collection */ +	zbr = znode->zbranch[n]; +	gc_seq1 = c->gc_seq; +	mutex_unlock(&c->tnc_mutex); + +	err = fallible_read_node(c, key, &zbr, node); +	if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { +		/* +		 * The node may have been GC'ed out from under us so try again +		 * while keeping the TNC mutex locked. +		 */ +		safely = 1; +		goto again; +	} +	return 0; + +out: +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. + * @c: UBIFS file-system description object + * @bu: bulk-read parameters and results + * + * Lookup consecutive data node keys for the same inode that reside + * consecutively in the same LEB. This function returns zero in case of success + * and a negative error code in case of failure. + * + * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function + * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares + * maximum possible amount of nodes for bulk-read. + */ +int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) +{ +	int n, err = 0, lnum = -1, uninitialized_var(offs); +	int uninitialized_var(len); +	unsigned int block = key_block(c, &bu->key); +	struct ubifs_znode *znode; + +	bu->cnt = 0; +	bu->blk_cnt = 0; +	bu->eof = 0; + +	mutex_lock(&c->tnc_mutex); +	/* Find first key */ +	err = ubifs_lookup_level0(c, &bu->key, &znode, &n); +	if (err < 0) +		goto out; +	if (err) { +		/* Key found */ +		len = znode->zbranch[n].len; +		/* The buffer must be big enough for at least 1 node */ +		if (len > bu->buf_len) { +			err = -EINVAL; +			goto out; +		} +		/* Add this key */ +		bu->zbranch[bu->cnt++] = znode->zbranch[n]; +		bu->blk_cnt += 1; +		lnum = znode->zbranch[n].lnum; +		offs = ALIGN(znode->zbranch[n].offs + len, 8); +	} +	while (1) { +		struct ubifs_zbranch *zbr; +		union ubifs_key *key; +		unsigned int next_block; + +		/* Find next key */ +		err = tnc_next(c, &znode, &n); +		if (err) +			goto out; +		zbr = &znode->zbranch[n]; +		key = &zbr->key; +		/* See if there is another data key for this file */ +		if (key_inum(c, key) != key_inum(c, &bu->key) || +		    key_type(c, key) != UBIFS_DATA_KEY) { +			err = -ENOENT; +			goto out; +		} +		if (lnum < 0) { +			/* First key found */ +			lnum = zbr->lnum; +			offs = ALIGN(zbr->offs + zbr->len, 8); +			len = zbr->len; +			if (len > bu->buf_len) { +				err = -EINVAL; +				goto out; +			} +		} else { +			/* +			 * The data nodes must be in consecutive positions in +			 * the same LEB. +			 */ +			if (zbr->lnum != lnum || zbr->offs != offs) +				goto out; +			offs += ALIGN(zbr->len, 8); +			len = ALIGN(len, 8) + zbr->len; +			/* Must not exceed buffer length */ +			if (len > bu->buf_len) +				goto out; +		} +		/* Allow for holes */ +		next_block = key_block(c, key); +		bu->blk_cnt += (next_block - block - 1); +		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) +			goto out; +		block = next_block; +		/* Add this key */ +		bu->zbranch[bu->cnt++] = *zbr; +		bu->blk_cnt += 1; +		/* See if we have room for more */ +		if (bu->cnt >= UBIFS_MAX_BULK_READ) +			goto out; +		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) +			goto out; +	} +out: +	if (err == -ENOENT) { +		bu->eof = 1; +		err = 0; +	} +	bu->gc_seq = c->gc_seq; +	mutex_unlock(&c->tnc_mutex); +	if (err) +		return err; +	/* +	 * An enormous hole could cause bulk-read to encompass too many +	 * page cache pages, so limit the number here. +	 */ +	if (bu->blk_cnt > UBIFS_MAX_BULK_READ) +		bu->blk_cnt = UBIFS_MAX_BULK_READ; +	/* +	 * Ensure that bulk-read covers a whole number of page cache +	 * pages. +	 */ +	if (UBIFS_BLOCKS_PER_PAGE == 1 || +	    !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) +		return 0; +	if (bu->eof) { +		/* At the end of file we can round up */ +		bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; +		return 0; +	} +	/* Exclude data nodes that do not make up a whole page cache page */ +	block = key_block(c, &bu->key) + bu->blk_cnt; +	block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); +	while (bu->cnt) { +		if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) +			break; +		bu->cnt -= 1; +	} +	return 0; +} + +/** + * validate_data_node - validate data nodes for bulk-read. + * @c: UBIFS file-system description object + * @buf: buffer containing data node to validate + * @zbr: zbranch of data node to validate + * + * This functions returns %0 on success or a negative error code on failure. + */ +static int validate_data_node(struct ubifs_info *c, void *buf, +			      struct ubifs_zbranch *zbr) +{ +	union ubifs_key key1; +	struct ubifs_ch *ch = buf; +	int err, len; + +	if (ch->node_type != UBIFS_DATA_NODE) { +		ubifs_err("bad node type (%d but expected %d)", +			  ch->node_type, UBIFS_DATA_NODE); +		goto out_err; +	} + +	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); +	if (err) { +		ubifs_err("expected node type %d", UBIFS_DATA_NODE); +		goto out; +	} + +	len = le32_to_cpu(ch->len); +	if (len != zbr->len) { +		ubifs_err("bad node length %d, expected %d", len, zbr->len); +		goto out_err; +	} + +	/* Make sure the key of the read node is correct */ +	key_read(c, buf + UBIFS_KEY_OFFSET, &key1); +	if (!keys_eq(c, &zbr->key, &key1)) { +		ubifs_err("bad key in node at LEB %d:%d", +			  zbr->lnum, zbr->offs); +		dbg_tnc("looked for key %s found node's key %s", +			DBGKEY(&zbr->key), DBGKEY1(&key1)); +		goto out_err; +	} + +	return 0; + +out_err: +	err = -EINVAL; +out: +	ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); +	dbg_dump_node(c, buf); +	dbg_dump_stack(); +	return err; +} + +/** + * ubifs_tnc_bulk_read - read a number of data nodes in one go. + * @c: UBIFS file-system description object + * @bu: bulk-read parameters and results + * + * This functions reads and validates the data nodes that were identified by the + * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, + * -EAGAIN to indicate a race with GC, or another negative error code on + * failure. + */ +int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) +{ +	int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; +	void *buf; + +	len = bu->zbranch[bu->cnt - 1].offs; +	len += bu->zbranch[bu->cnt - 1].len - offs; +	if (len > bu->buf_len) { +		ubifs_err("buffer too small %d vs %d", bu->buf_len, len); +		return -EINVAL; +	} + +	/* Do the read */ +	err = ubi_read(c->ubi, lnum, bu->buf, offs, len); + +	/* Check for a race with GC */ +	if (maybe_leb_gced(c, lnum, bu->gc_seq)) +		return -EAGAIN; + +	if (err && err != -EBADMSG) { +		ubifs_err("failed to read from LEB %d:%d, error %d", +			  lnum, offs, err); +		dbg_dump_stack(); +		dbg_tnc("key %s", DBGKEY(&bu->key)); +		return err; +	} + +	/* Validate the nodes read */ +	buf = bu->buf; +	for (i = 0; i < bu->cnt; i++) { +		err = validate_data_node(c, buf, &bu->zbranch[i]); +		if (err) +			return err; +		buf = buf + ALIGN(bu->zbranch[i].len, 8); +	} + +	return 0; +} + +/** + * do_lookup_nm- look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, +			void *node, const struct qstr *nm) +{ +	int found, n, err; +	struct ubifs_znode *znode; + +	dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); +	mutex_lock(&c->tnc_mutex); +	found = ubifs_lookup_level0(c, key, &znode, &n); +	if (!found) { +		err = -ENOENT; +		goto out_unlock; +	} else if (found < 0) { +		err = found; +		goto out_unlock; +	} + +	ubifs_assert(n >= 0); + +	err = resolve_collision(c, key, &znode, &n, nm); +	dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); +	if (unlikely(err < 0)) +		goto out_unlock; +	if (err == 0) { +		err = -ENOENT; +		goto out_unlock; +	} + +	err = tnc_read_node_nm(c, &znode->zbranch[n], node); + +out_unlock: +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * ubifs_tnc_lookup_nm - look up a "hashed" node. + * @c: UBIFS file-system description object + * @key: node key to lookup + * @node: the node is returned here + * @nm: node name + * + * This function look up and reads a node which contains name hash in the key. + * Since the hash may have collisions, there may be many nodes with the same + * key, so we have to sequentially look to all of them until the needed one is + * found. This function returns zero in case of success, %-ENOENT if the node + * was not found, and a negative error code in case of failure. + */ +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, +			void *node, const struct qstr *nm) +{ +	int err, len; +	const struct ubifs_dent_node *dent = node; + +	/* +	 * We assume that in most of the cases there are no name collisions and +	 * 'ubifs_tnc_lookup()' returns us the right direntry. +	 */ +	err = ubifs_tnc_lookup(c, key, node); +	if (err) +		return err; + +	len = le16_to_cpu(dent->nlen); +	if (nm->len == len && !memcmp(dent->name, nm->name, len)) +		return 0; + +	/* +	 * Unluckily, there are hash collisions and we have to iterate over +	 * them look at each direntry with colliding name hash sequentially. +	 */ +	return do_lookup_nm(c, key, node, nm); +} + +/** + * correct_parent_keys - correct parent znodes' keys. + * @c: UBIFS file-system description object + * @znode: znode to correct parent znodes for + * + * This is a helper function for 'tnc_insert()'. When the key of the leftmost + * zbranch changes, keys of parent znodes have to be corrected. This helper + * function is called in such situations and corrects the keys if needed. + */ +static void correct_parent_keys(const struct ubifs_info *c, +				struct ubifs_znode *znode) +{ +	union ubifs_key *key, *key1; + +	ubifs_assert(znode->parent); +	ubifs_assert(znode->iip == 0); + +	key = &znode->zbranch[0].key; +	key1 = &znode->parent->zbranch[0].key; + +	while (keys_cmp(c, key, key1) < 0) { +		key_copy(c, key, key1); +		znode = znode->parent; +		znode->alt = 1; +		if (!znode->parent || znode->iip) +			break; +		key1 = &znode->parent->zbranch[0].key; +	} +} + +/** + * insert_zbranch - insert a zbranch into a znode. + * @znode: znode into which to insert + * @zbr: zbranch to insert + * @n: slot number to insert to + * + * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in + * znode's array of zbranches and keeps zbranches consolidated, so when a new + * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th + * slot, zbranches starting from @n have to be moved right. + */ +static void insert_zbranch(struct ubifs_znode *znode, +			   const struct ubifs_zbranch *zbr, int n) +{ +	int i; + +	ubifs_assert(ubifs_zn_dirty(znode)); + +	if (znode->level) { +		for (i = znode->child_cnt; i > n; i--) { +			znode->zbranch[i] = znode->zbranch[i - 1]; +			if (znode->zbranch[i].znode) +				znode->zbranch[i].znode->iip = i; +		} +		if (zbr->znode) +			zbr->znode->iip = n; +	} else +		for (i = znode->child_cnt; i > n; i--) +			znode->zbranch[i] = znode->zbranch[i - 1]; + +	znode->zbranch[n] = *zbr; +	znode->child_cnt += 1; + +	/* +	 * After inserting at slot zero, the lower bound of the key range of +	 * this znode may have changed. If this znode is subsequently split +	 * then the upper bound of the key range may change, and furthermore +	 * it could change to be lower than the original lower bound. If that +	 * happens, then it will no longer be possible to find this znode in the +	 * TNC using the key from the index node on flash. That is bad because +	 * if it is not found, we will assume it is obsolete and may overwrite +	 * it. Then if there is an unclean unmount, we will start using the +	 * old index which will be broken. +	 * +	 * So we first mark znodes that have insertions at slot zero, and then +	 * if they are split we add their lnum/offs to the old_idx tree. +	 */ +	if (n == 0) +		znode->alt = 1; +} + +/** + * tnc_insert - insert a node into TNC. + * @c: UBIFS file-system description object + * @znode: znode to insert into + * @zbr: branch to insert + * @n: slot number to insert new zbranch to + * + * This function inserts a new node described by @zbr into znode @znode. If + * znode does not have a free slot for new zbranch, it is split. Parent znodes + * are splat as well if needed. Returns zero in case of success or a negative + * error code in case of failure. + */ +static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, +		      struct ubifs_zbranch *zbr, int n) +{ +	struct ubifs_znode *zn, *zi, *zp; +	int i, keep, move, appending = 0; +	union ubifs_key *key = &zbr->key, *key1; + +	ubifs_assert(n >= 0 && n <= c->fanout); + +	/* Implement naive insert for now */ +again: +	zp = znode->parent; +	if (znode->child_cnt < c->fanout) { +		ubifs_assert(n != c->fanout); +		dbg_tnc("inserted at %d level %d, key %s", n, znode->level, +			DBGKEY(key)); + +		insert_zbranch(znode, zbr, n); + +		/* Ensure parent's key is correct */ +		if (n == 0 && zp && znode->iip == 0) +			correct_parent_keys(c, znode); + +		return 0; +	} + +	/* +	 * Unfortunately, @znode does not have more empty slots and we have to +	 * split it. +	 */ +	dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); + +	if (znode->alt) +		/* +		 * We can no longer be sure of finding this znode by key, so we +		 * record it in the old_idx tree. +		 */ +		ins_clr_old_idx_znode(c, znode); + +	zn = kzalloc(c->max_znode_sz, GFP_NOFS); +	if (!zn) +		return -ENOMEM; +	zn->parent = zp; +	zn->level = znode->level; + +	/* Decide where to split */ +	if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { +		/* Try not to split consecutive data keys */ +		if (n == c->fanout) { +			key1 = &znode->zbranch[n - 1].key; +			if (key_inum(c, key1) == key_inum(c, key) && +			    key_type(c, key1) == UBIFS_DATA_KEY) +				appending = 1; +		} else +			goto check_split; +	} else if (appending && n != c->fanout) { +		/* Try not to split consecutive data keys */ +		appending = 0; +check_split: +		if (n >= (c->fanout + 1) / 2) { +			key1 = &znode->zbranch[0].key; +			if (key_inum(c, key1) == key_inum(c, key) && +			    key_type(c, key1) == UBIFS_DATA_KEY) { +				key1 = &znode->zbranch[n].key; +				if (key_inum(c, key1) != key_inum(c, key) || +				    key_type(c, key1) != UBIFS_DATA_KEY) { +					keep = n; +					move = c->fanout - keep; +					zi = znode; +					goto do_split; +				} +			} +		} +	} + +	if (appending) { +		keep = c->fanout; +		move = 0; +	} else { +		keep = (c->fanout + 1) / 2; +		move = c->fanout - keep; +	} + +	/* +	 * Although we don't at present, we could look at the neighbors and see +	 * if we can move some zbranches there. +	 */ + +	if (n < keep) { +		/* Insert into existing znode */ +		zi = znode; +		move += 1; +		keep -= 1; +	} else { +		/* Insert into new znode */ +		zi = zn; +		n -= keep; +		/* Re-parent */ +		if (zn->level != 0) +			zbr->znode->parent = zn; +	} + +do_split: + +	__set_bit(DIRTY_ZNODE, &zn->flags); +	atomic_long_inc(&c->dirty_zn_cnt); + +	zn->child_cnt = move; +	znode->child_cnt = keep; + +	dbg_tnc("moving %d, keeping %d", move, keep); + +	/* Move zbranch */ +	for (i = 0; i < move; i++) { +		zn->zbranch[i] = znode->zbranch[keep + i]; +		/* Re-parent */ +		if (zn->level != 0) +			if (zn->zbranch[i].znode) { +				zn->zbranch[i].znode->parent = zn; +				zn->zbranch[i].znode->iip = i; +			} +	} + +	/* Insert new key and branch */ +	dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); + +	insert_zbranch(zi, zbr, n); + +	/* Insert new znode (produced by spitting) into the parent */ +	if (zp) { +		if (n == 0 && zi == znode && znode->iip == 0) +			correct_parent_keys(c, znode); + +		/* Locate insertion point */ +		n = znode->iip + 1; + +		/* Tail recursion */ +		zbr->key = zn->zbranch[0].key; +		zbr->znode = zn; +		zbr->lnum = 0; +		zbr->offs = 0; +		zbr->len = 0; +		znode = zp; + +		goto again; +	} + +	/* We have to split root znode */ +	dbg_tnc("creating new zroot at level %d", znode->level + 1); + +	zi = kzalloc(c->max_znode_sz, GFP_NOFS); +	if (!zi) +		return -ENOMEM; + +	zi->child_cnt = 2; +	zi->level = znode->level + 1; + +	__set_bit(DIRTY_ZNODE, &zi->flags); +	atomic_long_inc(&c->dirty_zn_cnt); + +	zi->zbranch[0].key = znode->zbranch[0].key; +	zi->zbranch[0].znode = znode; +	zi->zbranch[0].lnum = c->zroot.lnum; +	zi->zbranch[0].offs = c->zroot.offs; +	zi->zbranch[0].len = c->zroot.len; +	zi->zbranch[1].key = zn->zbranch[0].key; +	zi->zbranch[1].znode = zn; + +	c->zroot.lnum = 0; +	c->zroot.offs = 0; +	c->zroot.len = 0; +	c->zroot.znode = zi; + +	zn->parent = zi; +	zn->iip = 1; +	znode->parent = zi; +	znode->iip = 0; + +	return 0; +} + +/** + * ubifs_tnc_add - add a node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function adds a node with key @key to TNC. The node may be new or it may + * obsolete some existing one. Returns %0 on success or negative error code on + * failure. + */ +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, +		  int offs, int len) +{ +	int found, n, err = 0; +	struct ubifs_znode *znode; + +	mutex_lock(&c->tnc_mutex); +	dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); +	found = lookup_level0_dirty(c, key, &znode, &n); +	if (!found) { +		struct ubifs_zbranch zbr; + +		zbr.znode = NULL; +		zbr.lnum = lnum; +		zbr.offs = offs; +		zbr.len = len; +		key_copy(c, key, &zbr.key); +		err = tnc_insert(c, znode, &zbr, n + 1); +	} else if (found == 1) { +		struct ubifs_zbranch *zbr = &znode->zbranch[n]; + +		lnc_free(zbr); +		err = ubifs_add_dirt(c, zbr->lnum, zbr->len); +		zbr->lnum = lnum; +		zbr->offs = offs; +		zbr->len = len; +	} else +		err = found; +	if (!err) +		err = dbg_check_tnc(c, 0); +	mutex_unlock(&c->tnc_mutex); + +	return err; +} + +/** + * ubifs_tnc_replace - replace a node in the TNC only if the old node is found. + * @c: UBIFS file-system description object + * @key: key to add + * @old_lnum: LEB number of old node + * @old_offs: old node offset + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * + * This function replaces a node with key @key in the TNC only if the old node + * is found.  This function is called by garbage collection when node are moved. + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, +		      int old_lnum, int old_offs, int lnum, int offs, int len) +{ +	int found, n, err = 0; +	struct ubifs_znode *znode; + +	mutex_lock(&c->tnc_mutex); +	dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, +		old_offs, lnum, offs, len, DBGKEY(key)); +	found = lookup_level0_dirty(c, key, &znode, &n); +	if (found < 0) { +		err = found; +		goto out_unlock; +	} + +	if (found == 1) { +		struct ubifs_zbranch *zbr = &znode->zbranch[n]; + +		found = 0; +		if (zbr->lnum == old_lnum && zbr->offs == old_offs) { +			lnc_free(zbr); +			err = ubifs_add_dirt(c, zbr->lnum, zbr->len); +			if (err) +				goto out_unlock; +			zbr->lnum = lnum; +			zbr->offs = offs; +			zbr->len = len; +			found = 1; +		} else if (is_hash_key(c, key)) { +			found = resolve_collision_directly(c, key, &znode, &n, +							   old_lnum, old_offs); +			dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d", +				found, znode, n, old_lnum, old_offs); +			if (found < 0) { +				err = found; +				goto out_unlock; +			} + +			if (found) { +				/* Ensure the znode is dirtied */ +				if (znode->cnext || !ubifs_zn_dirty(znode)) { +					znode = dirty_cow_bottom_up(c, znode); +					if (IS_ERR(znode)) { +						err = PTR_ERR(znode); +						goto out_unlock; +					} +				} +				zbr = &znode->zbranch[n]; +				lnc_free(zbr); +				err = ubifs_add_dirt(c, zbr->lnum, +						     zbr->len); +				if (err) +					goto out_unlock; +				zbr->lnum = lnum; +				zbr->offs = offs; +				zbr->len = len; +			} +		} +	} + +	if (!found) +		err = ubifs_add_dirt(c, lnum, len); + +	if (!err) +		err = dbg_check_tnc(c, 0); + +out_unlock: +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * ubifs_tnc_add_nm - add a "hashed" node to TNC. + * @c: UBIFS file-system description object + * @key: key to add + * @lnum: LEB number of node + * @offs: node offset + * @len: node length + * @nm: node name + * + * This is the same as 'ubifs_tnc_add()' but it should be used with keys which + * may have collisions, like directory entry keys. + */ +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, +		     int lnum, int offs, int len, const struct qstr *nm) +{ +	int found, n, err = 0; +	struct ubifs_znode *znode; + +	mutex_lock(&c->tnc_mutex); +	dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, +		DBGKEY(key)); +	found = lookup_level0_dirty(c, key, &znode, &n); +	if (found < 0) { +		err = found; +		goto out_unlock; +	} + +	if (found == 1) { +		if (c->replaying) +			found = fallible_resolve_collision(c, key, &znode, &n, +							   nm, 1); +		else +			found = resolve_collision(c, key, &znode, &n, nm); +		dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n); +		if (found < 0) { +			err = found; +			goto out_unlock; +		} + +		/* Ensure the znode is dirtied */ +		if (znode->cnext || !ubifs_zn_dirty(znode)) { +			znode = dirty_cow_bottom_up(c, znode); +			if (IS_ERR(znode)) { +				err = PTR_ERR(znode); +				goto out_unlock; +			} +		} + +		if (found == 1) { +			struct ubifs_zbranch *zbr = &znode->zbranch[n]; + +			lnc_free(zbr); +			err = ubifs_add_dirt(c, zbr->lnum, zbr->len); +			zbr->lnum = lnum; +			zbr->offs = offs; +			zbr->len = len; +			goto out_unlock; +		} +	} + +	if (!found) { +		struct ubifs_zbranch zbr; + +		zbr.znode = NULL; +		zbr.lnum = lnum; +		zbr.offs = offs; +		zbr.len = len; +		key_copy(c, key, &zbr.key); +		err = tnc_insert(c, znode, &zbr, n + 1); +		if (err) +			goto out_unlock; +		if (c->replaying) { +			/* +			 * We did not find it in the index so there may be a +			 * dangling branch still in the index. So we remove it +			 * by passing 'ubifs_tnc_remove_nm()' the same key but +			 * an unmatchable name. +			 */ +			struct qstr noname = { .len = 0, .name = "" }; + +			err = dbg_check_tnc(c, 0); +			mutex_unlock(&c->tnc_mutex); +			if (err) +				return err; +			return ubifs_tnc_remove_nm(c, key, &noname); +		} +	} + +out_unlock: +	if (!err) +		err = dbg_check_tnc(c, 0); +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * tnc_delete - delete a znode form TNC. + * @c: UBIFS file-system description object + * @znode: znode to delete from + * @n: zbranch slot number to delete + * + * This function deletes a leaf node from @n-th slot of @znode. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) +{ +	struct ubifs_zbranch *zbr; +	struct ubifs_znode *zp; +	int i, err; + +	/* Delete without merge for now */ +	ubifs_assert(znode->level == 0); +	ubifs_assert(n >= 0 && n < c->fanout); +	dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); + +	zbr = &znode->zbranch[n]; +	lnc_free(zbr); + +	err = ubifs_add_dirt(c, zbr->lnum, zbr->len); +	if (err) { +		dbg_dump_znode(c, znode); +		return err; +	} + +	/* We do not "gap" zbranch slots */ +	for (i = n; i < znode->child_cnt - 1; i++) +		znode->zbranch[i] = znode->zbranch[i + 1]; +	znode->child_cnt -= 1; + +	if (znode->child_cnt > 0) +		return 0; + +	/* +	 * This was the last zbranch, we have to delete this znode from the +	 * parent. +	 */ + +	do { +		ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); +		ubifs_assert(ubifs_zn_dirty(znode)); + +		zp = znode->parent; +		n = znode->iip; + +		atomic_long_dec(&c->dirty_zn_cnt); + +		err = insert_old_idx_znode(c, znode); +		if (err) +			return err; + +		if (znode->cnext) { +			__set_bit(OBSOLETE_ZNODE, &znode->flags); +			atomic_long_inc(&c->clean_zn_cnt); +			atomic_long_inc(&ubifs_clean_zn_cnt); +		} else +			kfree(znode); +		znode = zp; +	} while (znode->child_cnt == 1); /* while removing last child */ + +	/* Remove from znode, entry n - 1 */ +	znode->child_cnt -= 1; +	ubifs_assert(znode->level != 0); +	for (i = n; i < znode->child_cnt; i++) { +		znode->zbranch[i] = znode->zbranch[i + 1]; +		if (znode->zbranch[i].znode) +			znode->zbranch[i].znode->iip = i; +	} + +	/* +	 * If this is the root and it has only 1 child then +	 * collapse the tree. +	 */ +	if (!znode->parent) { +		while (znode->child_cnt == 1 && znode->level != 0) { +			zp = znode; +			zbr = &znode->zbranch[0]; +			znode = get_znode(c, znode, 0); +			if (IS_ERR(znode)) +				return PTR_ERR(znode); +			znode = dirty_cow_znode(c, zbr); +			if (IS_ERR(znode)) +				return PTR_ERR(znode); +			znode->parent = NULL; +			znode->iip = 0; +			if (c->zroot.len) { +				err = insert_old_idx(c, c->zroot.lnum, +						     c->zroot.offs); +				if (err) +					return err; +			} +			c->zroot.lnum = zbr->lnum; +			c->zroot.offs = zbr->offs; +			c->zroot.len = zbr->len; +			c->zroot.znode = znode; +			ubifs_assert(!test_bit(OBSOLETE_ZNODE, +				     &zp->flags)); +			ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); +			atomic_long_dec(&c->dirty_zn_cnt); + +			if (zp->cnext) { +				__set_bit(OBSOLETE_ZNODE, &zp->flags); +				atomic_long_inc(&c->clean_zn_cnt); +				atomic_long_inc(&ubifs_clean_zn_cnt); +			} else +				kfree(zp); +		} +	} + +	return 0; +} + +/** + * ubifs_tnc_remove - remove an index entry of a node. + * @c: UBIFS file-system description object + * @key: key of node + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) +{ +	int found, n, err = 0; +	struct ubifs_znode *znode; + +	mutex_lock(&c->tnc_mutex); +	dbg_tnc("key %s", DBGKEY(key)); +	found = lookup_level0_dirty(c, key, &znode, &n); +	if (found < 0) { +		err = found; +		goto out_unlock; +	} +	if (found == 1) +		err = tnc_delete(c, znode, n); +	if (!err) +		err = dbg_check_tnc(c, 0); + +out_unlock: +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node. + * @c: UBIFS file-system description object + * @key: key of node + * @nm: directory entry name + * + * Returns %0 on success or negative error code on failure. + */ +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, +			const struct qstr *nm) +{ +	int n, err; +	struct ubifs_znode *znode; + +	mutex_lock(&c->tnc_mutex); +	dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); +	err = lookup_level0_dirty(c, key, &znode, &n); +	if (err < 0) +		goto out_unlock; + +	if (err) { +		if (c->replaying) +			err = fallible_resolve_collision(c, key, &znode, &n, +							 nm, 0); +		else +			err = resolve_collision(c, key, &znode, &n, nm); +		dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n); +		if (err < 0) +			goto out_unlock; +		if (err) { +			/* Ensure the znode is dirtied */ +			if (znode->cnext || !ubifs_zn_dirty(znode)) { +				    znode = dirty_cow_bottom_up(c, znode); +				    if (IS_ERR(znode)) { +					    err = PTR_ERR(znode); +					    goto out_unlock; +				    } +			} +			err = tnc_delete(c, znode, n); +		} +	} + +out_unlock: +	if (!err) +		err = dbg_check_tnc(c, 0); +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * key_in_range - determine if a key falls within a range of keys. + * @c: UBIFS file-system description object + * @key: key to check + * @from_key: lowest key in range + * @to_key: highest key in range + * + * This function returns %1 if the key is in range and %0 otherwise. + */ +static int key_in_range(struct ubifs_info *c, union ubifs_key *key, +			union ubifs_key *from_key, union ubifs_key *to_key) +{ +	if (keys_cmp(c, key, from_key) < 0) +		return 0; +	if (keys_cmp(c, key, to_key) > 0) +		return 0; +	return 1; +} + +/** + * ubifs_tnc_remove_range - remove index entries in range. + * @c: UBIFS file-system description object + * @from_key: lowest key to remove + * @to_key: highest key to remove + * + * This function removes index entries starting at @from_key and ending at + * @to_key.  This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, +			   union ubifs_key *to_key) +{ +	int i, n, k, err = 0; +	struct ubifs_znode *znode; +	union ubifs_key *key; + +	mutex_lock(&c->tnc_mutex); +	while (1) { +		/* Find first level 0 znode that contains keys to remove */ +		err = ubifs_lookup_level0(c, from_key, &znode, &n); +		if (err < 0) +			goto out_unlock; + +		if (err) +			key = from_key; +		else { +			err = tnc_next(c, &znode, &n); +			if (err == -ENOENT) { +				err = 0; +				goto out_unlock; +			} +			if (err < 0) +				goto out_unlock; +			key = &znode->zbranch[n].key; +			if (!key_in_range(c, key, from_key, to_key)) { +				err = 0; +				goto out_unlock; +			} +		} + +		/* Ensure the znode is dirtied */ +		if (znode->cnext || !ubifs_zn_dirty(znode)) { +			znode = dirty_cow_bottom_up(c, znode); +			if (IS_ERR(znode)) { +				err = PTR_ERR(znode); +				goto out_unlock; +			} +		} + +		/* Remove all keys in range except the first */ +		for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) { +			key = &znode->zbranch[i].key; +			if (!key_in_range(c, key, from_key, to_key)) +				break; +			lnc_free(&znode->zbranch[i]); +			err = ubifs_add_dirt(c, znode->zbranch[i].lnum, +					     znode->zbranch[i].len); +			if (err) { +				dbg_dump_znode(c, znode); +				goto out_unlock; +			} +			dbg_tnc("removing %s", DBGKEY(key)); +		} +		if (k) { +			for (i = n + 1 + k; i < znode->child_cnt; i++) +				znode->zbranch[i - k] = znode->zbranch[i]; +			znode->child_cnt -= k; +		} + +		/* Now delete the first */ +		err = tnc_delete(c, znode, n); +		if (err) +			goto out_unlock; +	} + +out_unlock: +	if (!err) +		err = dbg_check_tnc(c, 0); +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * ubifs_tnc_remove_ino - remove an inode from TNC. + * @c: UBIFS file-system description object + * @inum: inode number to remove + * + * This function remove inode @inum and all the extended attributes associated + * with the anode from TNC and returns zero in case of success or a negative + * error code in case of failure. + */ +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum) +{ +	union ubifs_key key1, key2; +	struct ubifs_dent_node *xent, *pxent = NULL; +	struct qstr nm = { .name = NULL }; + +	dbg_tnc("ino %lu", (unsigned long)inum); + +	/* +	 * Walk all extended attribute entries and remove them together with +	 * corresponding extended attribute inodes. +	 */ +	lowest_xent_key(c, &key1, inum); +	while (1) { +		ino_t xattr_inum; +		int err; + +		xent = ubifs_tnc_next_ent(c, &key1, &nm); +		if (IS_ERR(xent)) { +			err = PTR_ERR(xent); +			if (err == -ENOENT) +				break; +			return err; +		} + +		xattr_inum = le64_to_cpu(xent->inum); +		dbg_tnc("xent '%s', ino %lu", xent->name, +			(unsigned long)xattr_inum); + +		nm.name = (char *)xent->name; +		nm.len = le16_to_cpu(xent->nlen); +		err = ubifs_tnc_remove_nm(c, &key1, &nm); +		if (err) { +			kfree(xent); +			return err; +		} + +		lowest_ino_key(c, &key1, xattr_inum); +		highest_ino_key(c, &key2, xattr_inum); +		err = ubifs_tnc_remove_range(c, &key1, &key2); +		if (err) { +			kfree(xent); +			return err; +		} + +		kfree(pxent); +		pxent = xent; +		key_read(c, &xent->key, &key1); +	} + +	kfree(pxent); +	lowest_ino_key(c, &key1, inum); +	highest_ino_key(c, &key2, inum); + +	return ubifs_tnc_remove_range(c, &key1, &key2); +} + +/** + * ubifs_tnc_next_ent - walk directory or extended attribute entries. + * @c: UBIFS file-system description object + * @key: key of last entry + * @nm: name of last entry found or %NULL + * + * This function finds and reads the next directory or extended attribute entry + * after the given key (@key) if there is one. @nm is used to resolve + * collisions. + * + * If the name of the current entry is not known and only the key is known, + * @nm->name has to be %NULL. In this case the semantics of this function is a + * little bit different and it returns the entry corresponding to this key, not + * the next one. If the key was not found, the closest "right" entry is + * returned. + * + * If the fist entry has to be found, @key has to contain the lowest possible + * key value for this inode and @name has to be %NULL. + * + * This function returns the found directory or extended attribute entry node + * in case of success, %-ENOENT is returned if no entry was found, and a + * negative error code is returned in case of failure. + */ +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, +					   union ubifs_key *key, +					   const struct qstr *nm) +{ +	int n, err, type = key_type(c, key); +	struct ubifs_znode *znode; +	struct ubifs_dent_node *dent; +	struct ubifs_zbranch *zbr; +	union ubifs_key *dkey; + +	dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); +	ubifs_assert(is_hash_key(c, key)); + +	mutex_lock(&c->tnc_mutex); +	err = ubifs_lookup_level0(c, key, &znode, &n); +	if (unlikely(err < 0)) +		goto out_unlock; + +	if (nm->name) { +		if (err) { +			/* Handle collisions */ +			err = resolve_collision(c, key, &znode, &n, nm); +			dbg_tnc("rc returned %d, znode %p, n %d", +				err, znode, n); +			if (unlikely(err < 0)) +				goto out_unlock; +		} + +		/* Now find next entry */ +		err = tnc_next(c, &znode, &n); +		if (unlikely(err)) +			goto out_unlock; +	} else { +		/* +		 * The full name of the entry was not given, in which case the +		 * behavior of this function is a little different and it +		 * returns current entry, not the next one. +		 */ +		if (!err) { +			/* +			 * However, the given key does not exist in the TNC +			 * tree and @znode/@n variables contain the closest +			 * "preceding" element. Switch to the next one. +			 */ +			err = tnc_next(c, &znode, &n); +			if (err) +				goto out_unlock; +		} +	} + +	zbr = &znode->zbranch[n]; +	dent = kmalloc(zbr->len, GFP_NOFS); +	if (unlikely(!dent)) { +		err = -ENOMEM; +		goto out_unlock; +	} + +	/* +	 * The above 'tnc_next()' call could lead us to the next inode, check +	 * this. +	 */ +	dkey = &zbr->key; +	if (key_inum(c, dkey) != key_inum(c, key) || +	    key_type(c, dkey) != type) { +		err = -ENOENT; +		goto out_free; +	} + +	err = tnc_read_node_nm(c, zbr, dent); +	if (unlikely(err)) +		goto out_free; + +	mutex_unlock(&c->tnc_mutex); +	return dent; + +out_free: +	kfree(dent); +out_unlock: +	mutex_unlock(&c->tnc_mutex); +	return ERR_PTR(err); +} diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c new file mode 100644 index 000000000..8ac76b1c2 --- /dev/null +++ b/fs/ubifs/tnc_commit.c @@ -0,0 +1,1102 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* This file implements TNC functions for committing */ + +#include "ubifs.h" + +/** + * make_idx_node - make an index node for fill-the-gaps method of TNC commit. + * @c: UBIFS file-system description object + * @idx: buffer in which to place new index node + * @znode: znode from which to make new index node + * @lnum: LEB number where new index node will be written + * @offs: offset where new index node will be written + * @len: length of new index node + */ +static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, +			 struct ubifs_znode *znode, int lnum, int offs, int len) +{ +	struct ubifs_znode *zp; +	int i, err; + +	/* Make index node */ +	idx->ch.node_type = UBIFS_IDX_NODE; +	idx->child_cnt = cpu_to_le16(znode->child_cnt); +	idx->level = cpu_to_le16(znode->level); +	for (i = 0; i < znode->child_cnt; i++) { +		struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); +		struct ubifs_zbranch *zbr = &znode->zbranch[i]; + +		key_write_idx(c, &zbr->key, &br->key); +		br->lnum = cpu_to_le32(zbr->lnum); +		br->offs = cpu_to_le32(zbr->offs); +		br->len = cpu_to_le32(zbr->len); +		if (!zbr->lnum || !zbr->len) { +			ubifs_err("bad ref in znode"); +			dbg_dump_znode(c, znode); +			if (zbr->znode) +				dbg_dump_znode(c, zbr->znode); +		} +	} +	ubifs_prepare_node(c, idx, len, 0); + +#ifdef CONFIG_UBIFS_FS_DEBUG +	znode->lnum = lnum; +	znode->offs = offs; +	znode->len = len; +#endif + +	err = insert_old_idx_znode(c, znode); + +	/* Update the parent */ +	zp = znode->parent; +	if (zp) { +		struct ubifs_zbranch *zbr; + +		zbr = &zp->zbranch[znode->iip]; +		zbr->lnum = lnum; +		zbr->offs = offs; +		zbr->len = len; +	} else { +		c->zroot.lnum = lnum; +		c->zroot.offs = offs; +		c->zroot.len = len; +	} +	c->calc_idx_sz += ALIGN(len, 8); + +	atomic_long_dec(&c->dirty_zn_cnt); + +	ubifs_assert(ubifs_zn_dirty(znode)); +	ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + +	__clear_bit(DIRTY_ZNODE, &znode->flags); +	__clear_bit(COW_ZNODE, &znode->flags); + +	return err; +} + +/** + * fill_gap - make index nodes in gaps in dirty index LEBs. + * @c: UBIFS file-system description object + * @lnum: LEB number that gap appears in + * @gap_start: offset of start of gap + * @gap_end: offset of end of gap + * @dirt: adds dirty space to this + * + * This function returns the number of index nodes written into the gap. + */ +static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end, +		    int *dirt) +{ +	int len, gap_remains, gap_pos, written, pad_len; + +	ubifs_assert((gap_start & 7) == 0); +	ubifs_assert((gap_end & 7) == 0); +	ubifs_assert(gap_end >= gap_start); + +	gap_remains = gap_end - gap_start; +	if (!gap_remains) +		return 0; +	gap_pos = gap_start; +	written = 0; +	while (c->enext) { +		len = ubifs_idx_node_sz(c, c->enext->child_cnt); +		if (len < gap_remains) { +			struct ubifs_znode *znode = c->enext; +			const int alen = ALIGN(len, 8); +			int err; + +			ubifs_assert(alen <= gap_remains); +			err = make_idx_node(c, c->ileb_buf + gap_pos, znode, +					    lnum, gap_pos, len); +			if (err) +				return err; +			gap_remains -= alen; +			gap_pos += alen; +			c->enext = znode->cnext; +			if (c->enext == c->cnext) +				c->enext = NULL; +			written += 1; +		} else +			break; +	} +	if (gap_end == c->leb_size) { +		c->ileb_len = ALIGN(gap_pos, c->min_io_size); +		/* Pad to end of min_io_size */ +		pad_len = c->ileb_len - gap_pos; +	} else +		/* Pad to end of gap */ +		pad_len = gap_remains; +	dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d", +	       lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len); +	ubifs_pad(c, c->ileb_buf + gap_pos, pad_len); +	*dirt += pad_len; +	return written; +} + +/** + * find_old_idx - find an index node obsoleted since the last commit start. + * @c: UBIFS file-system description object + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + * + * Returns %1 if found and %0 otherwise. + */ +static int find_old_idx(struct ubifs_info *c, int lnum, int offs) +{ +	struct ubifs_old_idx *o; +	struct rb_node *p; + +	p = c->old_idx.rb_node; +	while (p) { +		o = rb_entry(p, struct ubifs_old_idx, rb); +		if (lnum < o->lnum) +			p = p->rb_left; +		else if (lnum > o->lnum) +			p = p->rb_right; +		else if (offs < o->offs) +			p = p->rb_left; +		else if (offs > o->offs) +			p = p->rb_right; +		else +			return 1; +	} +	return 0; +} + +/** + * is_idx_node_in_use - determine if an index node can be overwritten. + * @c: UBIFS file-system description object + * @key: key of index node + * @level: index node level + * @lnum: LEB number of index node + * @offs: offset of index node + * + * If @key / @lnum / @offs identify an index node that was not part of the old + * index, then this function returns %0 (obsolete).  Else if the index node was + * part of the old index but is now dirty %1 is returned, else if it is clean %2 + * is returned. A negative error code is returned on failure. + */ +static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key, +			      int level, int lnum, int offs) +{ +	int ret; + +	ret = is_idx_node_in_tnc(c, key, level, lnum, offs); +	if (ret < 0) +		return ret; /* Error code */ +	if (ret == 0) +		if (find_old_idx(c, lnum, offs)) +			return 1; +	return ret; +} + +/** + * layout_leb_in_gaps - layout index nodes using in-the-gaps method. + * @c: UBIFS file-system description object + * @p: return LEB number here + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * This function merely puts the next znode into the next gap, making no attempt + * to try to maximise the number of znodes that fit. + * This function returns the number of index nodes written into the gaps, or a + * negative error code on failure. + */ +static int layout_leb_in_gaps(struct ubifs_info *c, int *p) +{ +	struct ubifs_scan_leb *sleb; +	struct ubifs_scan_node *snod; +	int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written; + +	tot_written = 0; +	/* Get an index LEB with lots of obsolete index nodes */ +	lnum = ubifs_find_dirty_idx_leb(c); +	if (lnum < 0) +		/* +		 * There also may be dirt in the index head that could be +		 * filled, however we do not check there at present. +		 */ +		return lnum; /* Error code */ +	*p = lnum; +	dbg_gc("LEB %d", lnum); +	/* +	 * Scan the index LEB.  We use the generic scan for this even though +	 * it is more comprehensive and less efficient than is needed for this +	 * purpose. +	 */ +	sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); +	c->ileb_len = 0; +	if (IS_ERR(sleb)) +		return PTR_ERR(sleb); +	gap_start = 0; +	list_for_each_entry(snod, &sleb->nodes, list) { +		struct ubifs_idx_node *idx; +		int in_use, level; + +		ubifs_assert(snod->type == UBIFS_IDX_NODE); +		idx = snod->node; +		key_read(c, ubifs_idx_key(c, idx), &snod->key); +		level = le16_to_cpu(idx->level); +		/* Determine if the index node is in use (not obsolete) */ +		in_use = is_idx_node_in_use(c, &snod->key, level, lnum, +					    snod->offs); +		if (in_use < 0) { +			ubifs_scan_destroy(sleb); +			return in_use; /* Error code */ +		} +		if (in_use) { +			if (in_use == 1) +				dirt += ALIGN(snod->len, 8); +			/* +			 * The obsolete index nodes form gaps that can be +			 * overwritten.  This gap has ended because we have +			 * found an index node that is still in use +			 * i.e. not obsolete +			 */ +			gap_end = snod->offs; +			/* Try to fill gap */ +			written = fill_gap(c, lnum, gap_start, gap_end, &dirt); +			if (written < 0) { +				ubifs_scan_destroy(sleb); +				return written; /* Error code */ +			} +			tot_written += written; +			gap_start = ALIGN(snod->offs + snod->len, 8); +		} +	} +	ubifs_scan_destroy(sleb); +	c->ileb_len = c->leb_size; +	gap_end = c->leb_size; +	/* Try to fill gap */ +	written = fill_gap(c, lnum, gap_start, gap_end, &dirt); +	if (written < 0) +		return written; /* Error code */ +	tot_written += written; +	if (tot_written == 0) { +		struct ubifs_lprops lp; + +		dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); +		err = ubifs_read_one_lp(c, lnum, &lp); +		if (err) +			return err; +		if (lp.free == c->leb_size) { +			/* +			 * We must have snatched this LEB from the idx_gc list +			 * so we need to correct the free and dirty space. +			 */ +			err = ubifs_change_one_lp(c, lnum, +						  c->leb_size - c->ileb_len, +						  dirt, 0, 0, 0); +			if (err) +				return err; +		} +		return 0; +	} +	err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt, +				  0, 0, 0); +	if (err) +		return err; +	err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len, +			       UBI_SHORTTERM); +	if (err) +		return err; +	dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written); +	return tot_written; +} + +/** + * get_leb_cnt - calculate the number of empty LEBs needed to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns the number of empty LEBs needed to commit @cnt znodes + * to the current index head.  The number is not exact and may be more than + * needed. + */ +static int get_leb_cnt(struct ubifs_info *c, int cnt) +{ +	int d; + +	/* Assume maximum index node size (i.e. overestimate space needed) */ +	cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz; +	if (cnt < 0) +		cnt = 0; +	d = c->leb_size / c->max_idx_node_sz; +	return DIV_ROUND_UP(cnt, d); +} + +/** + * layout_in_gaps - in-the-gaps method of committing TNC. + * @c: UBIFS file-system description object + * @cnt: number of dirty znodes to commit. + * + * This function lays out new index nodes for dirty znodes using in-the-gaps + * method of TNC commit. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_gaps(struct ubifs_info *c, int cnt) +{ +	int err, leb_needed_cnt, written, *p; + +	dbg_gc("%d znodes to write", cnt); + +	c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS); +	if (!c->gap_lebs) +		return -ENOMEM; + +	p = c->gap_lebs; +	do { +		ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); +		written = layout_leb_in_gaps(c, p); +		if (written < 0) { +			err = written; +			if (err != -ENOSPC) { +				kfree(c->gap_lebs); +				c->gap_lebs = NULL; +				return err; +			} +			if (!dbg_force_in_the_gaps_enabled) { +				/* +				 * Do not print scary warnings if the debugging +				 * option which forces in-the-gaps is enabled. +				 */ +				ubifs_err("out of space"); +				spin_lock(&c->space_lock); +				dbg_dump_budg(c); +				spin_unlock(&c->space_lock); +				dbg_dump_lprops(c); +			} +			/* Try to commit anyway */ +			err = 0; +			break; +		} +		p++; +		cnt -= written; +		leb_needed_cnt = get_leb_cnt(c, cnt); +		dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt, +		       leb_needed_cnt, c->ileb_cnt); +	} while (leb_needed_cnt > c->ileb_cnt); + +	*p = -1; +	return 0; +} + +/** + * layout_in_empty_space - layout index nodes in empty space. + * @c: UBIFS file-system description object + * + * This function lays out new index nodes for dirty znodes using empty LEBs. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int layout_in_empty_space(struct ubifs_info *c) +{ +	struct ubifs_znode *znode, *cnext, *zp; +	int lnum, offs, len, next_len, buf_len, buf_offs, used, avail; +	int wlen, blen, err; + +	cnext = c->enext; +	if (!cnext) +		return 0; + +	lnum = c->ihead_lnum; +	buf_offs = c->ihead_offs; + +	buf_len = ubifs_idx_node_sz(c, c->fanout); +	buf_len = ALIGN(buf_len, c->min_io_size); +	used = 0; +	avail = buf_len; + +	/* Ensure there is enough room for first write */ +	next_len = ubifs_idx_node_sz(c, cnext->child_cnt); +	if (buf_offs + next_len > c->leb_size) +		lnum = -1; + +	while (1) { +		znode = cnext; + +		len = ubifs_idx_node_sz(c, znode->child_cnt); + +		/* Determine the index node position */ +		if (lnum == -1) { +			if (c->ileb_nxt >= c->ileb_cnt) { +				ubifs_err("out of space"); +				return -ENOSPC; +			} +			lnum = c->ilebs[c->ileb_nxt++]; +			buf_offs = 0; +			used = 0; +			avail = buf_len; +		} + +		offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG +		znode->lnum = lnum; +		znode->offs = offs; +		znode->len = len; +#endif + +		/* Update the parent */ +		zp = znode->parent; +		if (zp) { +			struct ubifs_zbranch *zbr; +			int i; + +			i = znode->iip; +			zbr = &zp->zbranch[i]; +			zbr->lnum = lnum; +			zbr->offs = offs; +			zbr->len = len; +		} else { +			c->zroot.lnum = lnum; +			c->zroot.offs = offs; +			c->zroot.len = len; +		} +		c->calc_idx_sz += ALIGN(len, 8); + +		/* +		 * Once lprops is updated, we can decrease the dirty znode count +		 * but it is easier to just do it here. +		 */ +		atomic_long_dec(&c->dirty_zn_cnt); + +		/* +		 * Calculate the next index node length to see if there is +		 * enough room for it +		 */ +		cnext = znode->cnext; +		if (cnext == c->cnext) +			next_len = 0; +		else +			next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +		if (c->min_io_size == 1) { +			buf_offs += ALIGN(len, 8); +			if (next_len) { +				if (buf_offs + next_len <= c->leb_size) +					continue; +				err = ubifs_update_one_lp(c, lnum, 0, +						c->leb_size - buf_offs, 0, 0); +				if (err) +					return err; +				lnum = -1; +				continue; +			} +			err = ubifs_update_one_lp(c, lnum, +					c->leb_size - buf_offs, 0, 0, 0); +			if (err) +				return err; +			break; +		} + +		/* Update buffer positions */ +		wlen = used + len; +		used += ALIGN(len, 8); +		avail -= ALIGN(len, 8); + +		if (next_len != 0 && +		    buf_offs + used + next_len <= c->leb_size && +		    avail > 0) +			continue; + +		if (avail <= 0 && next_len && +		    buf_offs + used + next_len <= c->leb_size) +			blen = buf_len; +		else +			blen = ALIGN(wlen, c->min_io_size); + +		/* The buffer is full or there are no more znodes to do */ +		buf_offs += blen; +		if (next_len) { +			if (buf_offs + next_len > c->leb_size) { +				err = ubifs_update_one_lp(c, lnum, +					c->leb_size - buf_offs, blen - used, +					0, 0); +				if (err) +					return err; +				lnum = -1; +			} +			used -= blen; +			if (used < 0) +				used = 0; +			avail = buf_len - used; +			continue; +		} +		err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs, +					  blen - used, 0, 0); +		if (err) +			return err; +		break; +	} + +#ifdef CONFIG_UBIFS_FS_DEBUG +	c->new_ihead_lnum = lnum; +	c->new_ihead_offs = buf_offs; +#endif + +	return 0; +} + +/** + * layout_commit - determine positions of index nodes to commit. + * @c: UBIFS file-system description object + * @no_space: indicates that insufficient empty LEBs were allocated + * @cnt: number of znodes to commit + * + * Calculate and update the positions of index nodes to commit.  If there were + * an insufficient number of empty LEBs allocated, then index nodes are placed + * into the gaps created by obsolete index nodes in non-empty index LEBs.  For + * this purpose, an obsolete index node is one that was not in the index as at + * the end of the last commit.  To write "in-the-gaps" requires that those index + * LEBs are updated atomically in-place. + */ +static int layout_commit(struct ubifs_info *c, int no_space, int cnt) +{ +	int err; + +	if (no_space) { +		err = layout_in_gaps(c, cnt); +		if (err) +			return err; +	} +	err = layout_in_empty_space(c); +	return err; +} + +/** + * find_first_dirty - find first dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode) +{ +	int i, cont; + +	if (!znode) +		return NULL; + +	while (1) { +		if (znode->level == 0) { +			if (ubifs_zn_dirty(znode)) +				return znode; +			return NULL; +		} +		cont = 0; +		for (i = 0; i < znode->child_cnt; i++) { +			struct ubifs_zbranch *zbr = &znode->zbranch[i]; + +			if (zbr->znode && ubifs_zn_dirty(zbr->znode)) { +				znode = zbr->znode; +				cont = 1; +				break; +			} +		} +		if (!cont) { +			if (ubifs_zn_dirty(znode)) +				return znode; +			return NULL; +		} +	} +} + +/** + * find_next_dirty - find next dirty znode. + * @znode: znode to begin searching from + */ +static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode) +{ +	int n = znode->iip + 1; + +	znode = znode->parent; +	if (!znode) +		return NULL; +	for (; n < znode->child_cnt; n++) { +		struct ubifs_zbranch *zbr = &znode->zbranch[n]; + +		if (zbr->znode && ubifs_zn_dirty(zbr->znode)) +			return find_first_dirty(zbr->znode); +	} +	return znode; +} + +/** + * get_znodes_to_commit - create list of dirty znodes to commit. + * @c: UBIFS file-system description object + * + * This function returns the number of znodes to commit. + */ +static int get_znodes_to_commit(struct ubifs_info *c) +{ +	struct ubifs_znode *znode, *cnext; +	int cnt = 0; + +	c->cnext = find_first_dirty(c->zroot.znode); +	znode = c->enext = c->cnext; +	if (!znode) { +		dbg_cmt("no znodes to commit"); +		return 0; +	} +	cnt += 1; +	while (1) { +		ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); +		__set_bit(COW_ZNODE, &znode->flags); +		znode->alt = 0; +		cnext = find_next_dirty(znode); +		if (!cnext) { +			znode->cnext = c->cnext; +			break; +		} +		znode->cnext = cnext; +		znode = cnext; +		cnt += 1; +	} +	dbg_cmt("committing %d znodes", cnt); +	ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt)); +	return cnt; +} + +/** + * alloc_idx_lebs - allocate empty LEBs to be used to commit. + * @c: UBIFS file-system description object + * @cnt: number of znodes to commit + * + * This function returns %-ENOSPC if it cannot allocate a sufficient number of + * empty LEBs.  %0 is returned on success, otherwise a negative error code + * is returned. + */ +static int alloc_idx_lebs(struct ubifs_info *c, int cnt) +{ +	int i, leb_cnt, lnum; + +	c->ileb_cnt = 0; +	c->ileb_nxt = 0; +	leb_cnt = get_leb_cnt(c, cnt); +	dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt); +	if (!leb_cnt) +		return 0; +	c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS); +	if (!c->ilebs) +		return -ENOMEM; +	for (i = 0; i < leb_cnt; i++) { +		lnum = ubifs_find_free_leb_for_idx(c); +		if (lnum < 0) +			return lnum; +		c->ilebs[c->ileb_cnt++] = lnum; +		dbg_cmt("LEB %d", lnum); +	} +	if (dbg_force_in_the_gaps()) +		return -ENOSPC; +	return 0; +} + +/** + * free_unused_idx_lebs - free unused LEBs that were allocated for the commit. + * @c: UBIFS file-system description object + * + * It is possible that we allocate more empty LEBs for the commit than we need. + * This functions frees the surplus. + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_unused_idx_lebs(struct ubifs_info *c) +{ +	int i, err = 0, lnum, er; + +	for (i = c->ileb_nxt; i < c->ileb_cnt; i++) { +		lnum = c->ilebs[i]; +		dbg_cmt("LEB %d", lnum); +		er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, +					 LPROPS_INDEX | LPROPS_TAKEN, 0); +		if (!err) +			err = er; +	} +	return err; +} + +/** + * free_idx_lebs - free unused LEBs after commit end. + * @c: UBIFS file-system description object + * + * This function returns %0 on success and a negative error code on failure. + */ +static int free_idx_lebs(struct ubifs_info *c) +{ +	int err; + +	err = free_unused_idx_lebs(c); +	kfree(c->ilebs); +	c->ilebs = NULL; +	return err; +} + +/** + * ubifs_tnc_start_commit - start TNC commit. + * @c: UBIFS file-system description object + * @zroot: new index root position is returned here + * + * This function prepares the list of indexing nodes to commit and lays out + * their positions on flash. If there is not enough free space it uses the + * in-gap commit method. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) +{ +	int err = 0, cnt; + +	mutex_lock(&c->tnc_mutex); +	err = dbg_check_tnc(c, 1); +	if (err) +		goto out; +	cnt = get_znodes_to_commit(c); +	if (cnt != 0) { +		int no_space = 0; + +		err = alloc_idx_lebs(c, cnt); +		if (err == -ENOSPC) +			no_space = 1; +		else if (err) +			goto out_free; +		err = layout_commit(c, no_space, cnt); +		if (err) +			goto out_free; +		ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); +		err = free_unused_idx_lebs(c); +		if (err) +			goto out; +	} +	destroy_old_idx(c); +	memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch)); + +	err = ubifs_save_dirty_idx_lnums(c); +	if (err) +		goto out; + +	spin_lock(&c->space_lock); +	/* +	 * Although we have not finished committing yet, update size of the +	 * committed index ('c->old_idx_sz') and zero out the index growth +	 * budget. It is OK to do this now, because we've reserved all the +	 * space which is needed to commit the index, and it is save for the +	 * budgeting subsystem to assume the index is already committed, +	 * even though it is not. +	 */ +	c->old_idx_sz = c->calc_idx_sz; +	c->budg_uncommitted_idx = 0; +	spin_unlock(&c->space_lock); +	mutex_unlock(&c->tnc_mutex); + +	dbg_cmt("number of index LEBs %d", c->lst.idx_lebs); +	dbg_cmt("size of index %llu", c->calc_idx_sz); +	return err; + +out_free: +	free_idx_lebs(c); +out: +	mutex_unlock(&c->tnc_mutex); +	return err; +} + +/** + * write_index - write index nodes. + * @c: UBIFS file-system description object + * + * This function writes the index nodes whose positions were laid out in the + * layout_in_empty_space function. + */ +static int write_index(struct ubifs_info *c) +{ +	struct ubifs_idx_node *idx; +	struct ubifs_znode *znode, *cnext; +	int i, lnum, offs, len, next_len, buf_len, buf_offs, used; +	int avail, wlen, err, lnum_pos = 0; + +	cnext = c->enext; +	if (!cnext) +		return 0; + +	/* +	 * Always write index nodes to the index head so that index nodes and +	 * other types of nodes are never mixed in the same erase block. +	 */ +	lnum = c->ihead_lnum; +	buf_offs = c->ihead_offs; + +	/* Allocate commit buffer */ +	buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size); +	used = 0; +	avail = buf_len; + +	/* Ensure there is enough room for first write */ +	next_len = ubifs_idx_node_sz(c, cnext->child_cnt); +	if (buf_offs + next_len > c->leb_size) { +		err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0, +					  LPROPS_TAKEN); +		if (err) +			return err; +		lnum = -1; +	} + +	while (1) { +		cond_resched(); + +		znode = cnext; +		idx = c->cbuf + used; + +		/* Make index node */ +		idx->ch.node_type = UBIFS_IDX_NODE; +		idx->child_cnt = cpu_to_le16(znode->child_cnt); +		idx->level = cpu_to_le16(znode->level); +		for (i = 0; i < znode->child_cnt; i++) { +			struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); +			struct ubifs_zbranch *zbr = &znode->zbranch[i]; + +			key_write_idx(c, &zbr->key, &br->key); +			br->lnum = cpu_to_le32(zbr->lnum); +			br->offs = cpu_to_le32(zbr->offs); +			br->len = cpu_to_le32(zbr->len); +			if (!zbr->lnum || !zbr->len) { +				ubifs_err("bad ref in znode"); +				dbg_dump_znode(c, znode); +				if (zbr->znode) +					dbg_dump_znode(c, zbr->znode); +			} +		} +		len = ubifs_idx_node_sz(c, znode->child_cnt); +		ubifs_prepare_node(c, idx, len, 0); + +		/* Determine the index node position */ +		if (lnum == -1) { +			lnum = c->ilebs[lnum_pos++]; +			buf_offs = 0; +			used = 0; +			avail = buf_len; +		} +		offs = buf_offs + used; + +#ifdef CONFIG_UBIFS_FS_DEBUG +		if (lnum != znode->lnum || offs != znode->offs || +		    len != znode->len) { +			ubifs_err("inconsistent znode posn"); +			return -EINVAL; +		} +#endif + +		/* Grab some stuff from znode while we still can */ +		cnext = znode->cnext; + +		ubifs_assert(ubifs_zn_dirty(znode)); +		ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); + +		/* +		 * It is important that other threads should see %DIRTY_ZNODE +		 * flag cleared before %COW_ZNODE. Specifically, it matters in +		 * the 'dirty_cow_znode()' function. This is the reason for the +		 * first barrier. Also, we want the bit changes to be seen to +		 * other threads ASAP, to avoid unnecesarry copying, which is +		 * the reason for the second barrier. +		 */ +		clear_bit(DIRTY_ZNODE, &znode->flags); +		smp_mb__before_clear_bit(); +		clear_bit(COW_ZNODE, &znode->flags); +		smp_mb__after_clear_bit(); + +		/* Do not access znode from this point on */ + +		/* Update buffer positions */ +		wlen = used + len; +		used += ALIGN(len, 8); +		avail -= ALIGN(len, 8); + +		/* +		 * Calculate the next index node length to see if there is +		 * enough room for it +		 */ +		if (cnext == c->cnext) +			next_len = 0; +		else +			next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +		if (c->min_io_size == 1) { +			/* +			 * Write the prepared index node immediately if there is +			 * no minimum IO size +			 */ +			err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +					      wlen, UBI_SHORTTERM); +			if (err) +				return err; +			buf_offs += ALIGN(wlen, 8); +			if (next_len) { +				used = 0; +				avail = buf_len; +				if (buf_offs + next_len > c->leb_size) { +					err = ubifs_update_one_lp(c, lnum, +						LPROPS_NC, 0, 0, LPROPS_TAKEN); +					if (err) +						return err; +					lnum = -1; +				} +				continue; +			} +		} else { +			int blen, nxt_offs = buf_offs + used + next_len; + +			if (next_len && nxt_offs <= c->leb_size) { +				if (avail > 0) +					continue; +				else +					blen = buf_len; +			} else { +				wlen = ALIGN(wlen, 8); +				blen = ALIGN(wlen, c->min_io_size); +				ubifs_pad(c, c->cbuf + wlen, blen - wlen); +			} +			/* +			 * The buffer is full or there are no more znodes +			 * to do +			 */ +			err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +					      blen, UBI_SHORTTERM); +			if (err) +				return err; +			buf_offs += blen; +			if (next_len) { +				if (nxt_offs > c->leb_size) { +					err = ubifs_update_one_lp(c, lnum, +						LPROPS_NC, 0, 0, LPROPS_TAKEN); +					if (err) +						return err; +					lnum = -1; +				} +				used -= blen; +				if (used < 0) +					used = 0; +				avail = buf_len - used; +				memmove(c->cbuf, c->cbuf + blen, used); +				continue; +			} +		} +		break; +	} + +#ifdef CONFIG_UBIFS_FS_DEBUG +	if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) { +		ubifs_err("inconsistent ihead"); +		return -EINVAL; +	} +#endif + +	c->ihead_lnum = lnum; +	c->ihead_offs = buf_offs; + +	return 0; +} + +/** + * free_obsolete_znodes - free obsolete znodes. + * @c: UBIFS file-system description object + * + * At the end of commit end, obsolete znodes are freed. + */ +static void free_obsolete_znodes(struct ubifs_info *c) +{ +	struct ubifs_znode *znode, *cnext; + +	cnext = c->cnext; +	do { +		znode = cnext; +		cnext = znode->cnext; +		if (test_bit(OBSOLETE_ZNODE, &znode->flags)) +			kfree(znode); +		else { +			znode->cnext = NULL; +			atomic_long_inc(&c->clean_zn_cnt); +			atomic_long_inc(&ubifs_clean_zn_cnt); +		} +	} while (cnext != c->cnext); +} + +/** + * return_gap_lebs - return LEBs used by the in-gap commit method. + * @c: UBIFS file-system description object + * + * This function clears the "taken" flag for the LEBs which were used by the + * "commit in-the-gaps" method. + */ +static int return_gap_lebs(struct ubifs_info *c) +{ +	int *p, err; + +	if (!c->gap_lebs) +		return 0; + +	dbg_cmt(""); +	for (p = c->gap_lebs; *p != -1; p++) { +		err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0, +					  LPROPS_TAKEN, 0); +		if (err) +			return err; +	} + +	kfree(c->gap_lebs); +	c->gap_lebs = NULL; +	return 0; +} + +/** + * ubifs_tnc_end_commit - update the TNC for commit end. + * @c: UBIFS file-system description object + * + * Write the dirty znodes. + */ +int ubifs_tnc_end_commit(struct ubifs_info *c) +{ +	int err; + +	if (!c->cnext) +		return 0; + +	err = return_gap_lebs(c); +	if (err) +		return err; + +	err = write_index(c); +	if (err) +		return err; + +	mutex_lock(&c->tnc_mutex); + +	dbg_cmt("TNC height is %d", c->zroot.znode->level + 1); + +	free_obsolete_znodes(c); + +	c->cnext = NULL; +	kfree(c->ilebs); +	c->ilebs = NULL; + +	mutex_unlock(&c->tnc_mutex); + +	return 0; +} diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c new file mode 100644 index 000000000..955219fa0 --- /dev/null +++ b/fs/ubifs/tnc_misc.c @@ -0,0 +1,435 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Adrian Hunter + *          Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains miscelanious TNC-related functions shared betweend + * different files. This file does not form any logically separate TNC + * sub-system. The file was created because there is a lot of TNC code and + * putting it all in one file would make that file too big and unreadable. + */ + +#include "ubifs.h" + +/** + * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal. + * @zr: root of the subtree to traverse + * @znode: previous znode + * + * This function implements levelorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, +					      struct ubifs_znode *znode) +{ +	int level, iip, level_search = 0; +	struct ubifs_znode *zn; + +	ubifs_assert(zr); + +	if (unlikely(!znode)) +		return zr; + +	if (unlikely(znode == zr)) { +		if (znode->level == 0) +			return NULL; +		return ubifs_tnc_find_child(zr, 0); +	} + +	level = znode->level; + +	iip = znode->iip; +	while (1) { +		ubifs_assert(znode->level <= zr->level); + +		/* +		 * First walk up until there is a znode with next branch to +		 * look at. +		 */ +		while (znode->parent != zr && iip >= znode->parent->child_cnt) { +			znode = znode->parent; +			iip = znode->iip; +		} + +		if (unlikely(znode->parent == zr && +			     iip >= znode->parent->child_cnt)) { +			/* This level is done, switch to the lower one */ +			level -= 1; +			if (level_search || level < 0) +				/* +				 * We were already looking for znode at lower +				 * level ('level_search'). As we are here +				 * again, it just does not exist. Or all levels +				 * were finished ('level < 0'). +				 */ +				return NULL; + +			level_search = 1; +			iip = -1; +			znode = ubifs_tnc_find_child(zr, 0); +			ubifs_assert(znode); +		} + +		/* Switch to the next index */ +		zn = ubifs_tnc_find_child(znode->parent, iip + 1); +		if (!zn) { +			/* No more children to look at, we have walk up */ +			iip = znode->parent->child_cnt; +			continue; +		} + +		/* Walk back down to the level we came from ('level') */ +		while (zn->level != level) { +			znode = zn; +			zn = ubifs_tnc_find_child(zn, 0); +			if (!zn) { +				/* +				 * This path is not too deep so it does not +				 * reach 'level'. Try next path. +				 */ +				iip = znode->iip; +				break; +			} +		} + +		if (zn) { +			ubifs_assert(zn->level >= 0); +			return zn; +		} +	} +} + +/** + * ubifs_search_zbranch - search znode branch. + * @c: UBIFS file-system description object + * @znode: znode to search in + * @key: key to search for + * @n: znode branch slot number is returned here + * + * This is a helper function which search branch with key @key in @znode using + * binary search. The result of the search may be: + *   o exact match, then %1 is returned, and the slot number of the branch is + *     stored in @n; + *   o no exact match, then %0 is returned and the slot number of the left + *     closest branch is returned in @n; the slot if all keys in this znode are + *     greater than @key, then %-1 is returned in @n. + */ +int ubifs_search_zbranch(const struct ubifs_info *c, +			 const struct ubifs_znode *znode, +			 const union ubifs_key *key, int *n) +{ +	int beg = 0, end = znode->child_cnt, uninitialized_var(mid); +	int uninitialized_var(cmp); +	const struct ubifs_zbranch *zbr = &znode->zbranch[0]; + +	ubifs_assert(end > beg); + +	while (end > beg) { +		mid = (beg + end) >> 1; +		cmp = keys_cmp(c, key, &zbr[mid].key); +		if (cmp > 0) +			beg = mid + 1; +		else if (cmp < 0) +			end = mid; +		else { +			*n = mid; +			return 1; +		} +	} + +	*n = end - 1; + +	/* The insert point is after *n */ +	ubifs_assert(*n >= -1 && *n < znode->child_cnt); +	if (*n == -1) +		ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0); +	else +		ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0); +	if (*n + 1 < znode->child_cnt) +		ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0); + +	return 0; +} + +/** + * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal. + * @znode: znode to start at (root of the sub-tree to traverse) + * + * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is + * ignored. + */ +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode) +{ +	if (unlikely(!znode)) +		return NULL; + +	while (znode->level > 0) { +		struct ubifs_znode *child; + +		child = ubifs_tnc_find_child(znode, 0); +		if (!child) +			return znode; +		znode = child; +	} + +	return znode; +} + +/** + * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal. + * @znode: previous znode + * + * This function implements postorder TNC traversal. The LNC is ignored. + * Returns the next element or %NULL if @znode is already the last one. + */ +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode) +{ +	struct ubifs_znode *zn; + +	ubifs_assert(znode); +	if (unlikely(!znode->parent)) +		return NULL; + +	/* Switch to the next index in the parent */ +	zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1); +	if (!zn) +		/* This is in fact the last child, return parent */ +		return znode->parent; + +	/* Go to the first znode in this new subtree */ +	return ubifs_tnc_postorder_first(zn); +} + +/** + * read_znode - read an indexing node from flash and fill znode. + * @c: UBIFS file-system description object + * @lnum: LEB of the indexing node to read + * @offs: node offset + * @len: node length + * @znode: znode to read to + * + * This function reads an indexing node from the flash media and fills znode + * with the read data. Returns zero in case of success and a negative error + * code in case of failure. The read indexing node is validated and if anything + * is wrong with it, this function prints complaint messages and returns + * %-EINVAL. + */ +static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, +		      struct ubifs_znode *znode) +{ +	int i, err, type, cmp; +	struct ubifs_idx_node *idx; + +	idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); +	if (!idx) +		return -ENOMEM; + +	err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); +	if (err < 0) { +		kfree(idx); +		return err; +	} + +	znode->child_cnt = le16_to_cpu(idx->child_cnt); +	znode->level = le16_to_cpu(idx->level); + +	dbg_tnc("LEB %d:%d, level %d, %d branch", +		lnum, offs, znode->level, znode->child_cnt); + +	if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) { +		dbg_err("current fanout %d, branch count %d", +			c->fanout, znode->child_cnt); +		dbg_err("max levels %d, znode level %d", +			UBIFS_MAX_LEVELS, znode->level); +		err = 1; +		goto out_dump; +	} + +	for (i = 0; i < znode->child_cnt; i++) { +		const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i); +		struct ubifs_zbranch *zbr = &znode->zbranch[i]; + +		key_read(c, &br->key, &zbr->key); +		zbr->lnum = le32_to_cpu(br->lnum); +		zbr->offs = le32_to_cpu(br->offs); +		zbr->len  = le32_to_cpu(br->len); +		zbr->znode = NULL; + +		/* Validate branch */ + +		if (zbr->lnum < c->main_first || +		    zbr->lnum >= c->leb_cnt || zbr->offs < 0 || +		    zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) { +			dbg_err("bad branch %d", i); +			err = 2; +			goto out_dump; +		} + +		switch (key_type(c, &zbr->key)) { +		case UBIFS_INO_KEY: +		case UBIFS_DATA_KEY: +		case UBIFS_DENT_KEY: +		case UBIFS_XENT_KEY: +			break; +		default: +			dbg_msg("bad key type at slot %d: %s", i, +				DBGKEY(&zbr->key)); +			err = 3; +			goto out_dump; +		} + +		if (znode->level) +			continue; + +		type = key_type(c, &zbr->key); +		if (c->ranges[type].max_len == 0) { +			if (zbr->len != c->ranges[type].len) { +				dbg_err("bad target node (type %d) length (%d)", +					type, zbr->len); +				dbg_err("have to be %d", c->ranges[type].len); +				err = 4; +				goto out_dump; +			} +		} else if (zbr->len < c->ranges[type].min_len || +			   zbr->len > c->ranges[type].max_len) { +			dbg_err("bad target node (type %d) length (%d)", +				type, zbr->len); +			dbg_err("have to be in range of %d-%d", +				c->ranges[type].min_len, +				c->ranges[type].max_len); +			err = 5; +			goto out_dump; +		} +	} + +	/* +	 * Ensure that the next key is greater or equivalent to the +	 * previous one. +	 */ +	for (i = 0; i < znode->child_cnt - 1; i++) { +		const union ubifs_key *key1, *key2; + +		key1 = &znode->zbranch[i].key; +		key2 = &znode->zbranch[i + 1].key; + +		cmp = keys_cmp(c, key1, key2); +		if (cmp > 0) { +			dbg_err("bad key order (keys %d and %d)", i, i + 1); +			err = 6; +			goto out_dump; +		} else if (cmp == 0 && !is_hash_key(c, key1)) { +			/* These can only be keys with colliding hash */ +			dbg_err("keys %d and %d are not hashed but equivalent", +				i, i + 1); +			err = 7; +			goto out_dump; +		} +	} + +	kfree(idx); +	return 0; + +out_dump: +	ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err); +	dbg_dump_node(c, idx); +	kfree(idx); +	return -EINVAL; +} + +/** + * ubifs_load_znode - load znode to TNC cache. + * @c: UBIFS file-system description object + * @zbr: znode branch + * @parent: znode's parent + * @iip: index in parent + * + * This function loads znode pointed to by @zbr into the TNC cache and + * returns pointer to it in case of success and a negative error code in case + * of failure. + */ +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, +				     struct ubifs_zbranch *zbr, +				     struct ubifs_znode *parent, int iip) +{ +	int err; +	struct ubifs_znode *znode; + +	ubifs_assert(!zbr->znode); +	/* +	 * A slab cache is not presently used for znodes because the znode size +	 * depends on the fanout which is stored in the superblock. +	 */ +	znode = kzalloc(c->max_znode_sz, GFP_NOFS); +	if (!znode) +		return ERR_PTR(-ENOMEM); + +	err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode); +	if (err) +		goto out; + +	zbr->znode = znode; +	znode->parent = parent; +	znode->time = get_seconds(); +	znode->iip = iip; + +	return znode; + +out: +	kfree(znode); +	return ERR_PTR(err); +} + +/** + * ubifs_tnc_read_node - read a leaf node from the flash media. + * @c: UBIFS file-system description object + * @zbr: key and position of the node + * @node: node is returned here + * + * This function reads a node defined by @zbr from the flash media. Returns + * zero in case of success or a negative negative error code in case of + * failure. + */ +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, +			void *node) +{ +	union ubifs_key key1, *key = &zbr->key; +	int err, type = key_type(c, key); + +	err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum, zbr->offs); + +	if (err) { +		dbg_tnc("key %s", DBGKEY(key)); +		return err; +	} + +	/* Make sure the key of the read node is correct */ +	key_read(c, node + UBIFS_KEY_OFFSET, &key1); +	if (!keys_eq(c, key, &key1)) { +		ubifs_err("bad key in node at LEB %d:%d", +			  zbr->lnum, zbr->offs); +		dbg_tnc("looked for key %s found node's key %s", +			DBGKEY(key), DBGKEY1(&key1)); +		dbg_dump_node(c, node); +		return -EINVAL; +	} + +	return 0; +} diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h new file mode 100644 index 000000000..b25fc36cf --- /dev/null +++ b/fs/ubifs/ubifs-media.h @@ -0,0 +1,751 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +/* + * This file describes UBIFS on-flash format and contains definitions of all the + * relevant data structures and constants. + * + * All UBIFS on-flash objects are stored in the form of nodes. All nodes start + * with the UBIFS node magic number and have the same common header. Nodes + * always sit at 8-byte aligned positions on the media and node header sizes are + * also 8-byte aligned (except for the indexing node and the padding node). + */ + +#ifndef __UBIFS_MEDIA_H__ +#define __UBIFS_MEDIA_H__ + +/* UBIFS node magic number (must not have the padding byte first or last) */ +#define UBIFS_NODE_MAGIC  0x06101831 + +/* UBIFS on-flash format version */ +#define UBIFS_FORMAT_VERSION 4 + +/* Minimum logical eraseblock size in bytes */ +#define UBIFS_MIN_LEB_SZ (15*1024) + +/* Initial CRC32 value used when calculating CRC checksums */ +#define UBIFS_CRC32_INIT 0xFFFFFFFFU + +/* + * UBIFS does not try to compress data if its length is less than the below + * constant. + */ +#define UBIFS_MIN_COMPR_LEN 128 + +/* + * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes + * shorter than uncompressed data length, UBIFS preferes to leave this data + * node uncompress, because it'll be read faster. + */ +#define UBIFS_MIN_COMPRESS_DIFF 64 + +/* Root inode number */ +#define UBIFS_ROOT_INO 1 + +/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */ +#define UBIFS_FIRST_INO 64 + +/* + * Maximum file name and extended attribute length (must be a multiple of 8, + * minus 1). + */ +#define UBIFS_MAX_NLEN 255 + +/* Maximum number of data journal heads */ +#define UBIFS_MAX_JHEADS 1 + +/* + * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system, + * which means that it does not treat the underlying media as consisting of + * blocks like in case of hard drives. Do not be confused. UBIFS block is just + * the maximum amount of data which one data node can have or which can be + * attached to an inode node. + */ +#define UBIFS_BLOCK_SIZE  4096 +#define UBIFS_BLOCK_SHIFT 12 + +/* UBIFS padding byte pattern (must not be first or last byte of node magic) */ +#define UBIFS_PADDING_BYTE 0xCE + +/* Maximum possible key length */ +#define UBIFS_MAX_KEY_LEN 16 + +/* Key length ("simple" format) */ +#define UBIFS_SK_LEN 8 + +/* Minimum index tree fanout */ +#define UBIFS_MIN_FANOUT 3 + +/* Maximum number of levels in UBIFS indexing B-tree */ +#define UBIFS_MAX_LEVELS 512 + +/* Maximum amount of data attached to an inode in bytes */ +#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE + +/* LEB Properties Tree fanout (must be power of 2) and fanout shift */ +#define UBIFS_LPT_FANOUT 4 +#define UBIFS_LPT_FANOUT_SHIFT 2 + +/* LEB Properties Tree bit field sizes */ +#define UBIFS_LPT_CRC_BITS 16 +#define UBIFS_LPT_CRC_BYTES 2 +#define UBIFS_LPT_TYPE_BITS 4 + +/* The key is always at the same position in all keyed nodes */ +#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) + +/* + * LEB Properties Tree node types. + * + * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties) + * UBIFS_LPT_NNODE: LPT internal node + * UBIFS_LPT_LTAB: LPT's own lprops table + * UBIFS_LPT_LSAVE: LPT's save table (big model only) + * UBIFS_LPT_NODE_CNT: count of LPT node types + * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type + */ +enum { +	UBIFS_LPT_PNODE, +	UBIFS_LPT_NNODE, +	UBIFS_LPT_LTAB, +	UBIFS_LPT_LSAVE, +	UBIFS_LPT_NODE_CNT, +	UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1, +}; + +/* + * UBIFS inode types. + * + * UBIFS_ITYPE_REG: regular file + * UBIFS_ITYPE_DIR: directory + * UBIFS_ITYPE_LNK: soft link + * UBIFS_ITYPE_BLK: block device node + * UBIFS_ITYPE_CHR: character device node + * UBIFS_ITYPE_FIFO: fifo + * UBIFS_ITYPE_SOCK: socket + * UBIFS_ITYPES_CNT: count of supported file types + */ +enum { +	UBIFS_ITYPE_REG, +	UBIFS_ITYPE_DIR, +	UBIFS_ITYPE_LNK, +	UBIFS_ITYPE_BLK, +	UBIFS_ITYPE_CHR, +	UBIFS_ITYPE_FIFO, +	UBIFS_ITYPE_SOCK, +	UBIFS_ITYPES_CNT, +}; + +/* + * Supported key hash functions. + * + * UBIFS_KEY_HASH_R5: R5 hash + * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name + */ +enum { +	UBIFS_KEY_HASH_R5, +	UBIFS_KEY_HASH_TEST, +}; + +/* + * Supported key formats. + * + * UBIFS_SIMPLE_KEY_FMT: simple key format + */ +enum { +	UBIFS_SIMPLE_KEY_FMT, +}; + +/* + * The simple key format uses 29 bits for storing UBIFS block number and hash + * value. + */ +#define UBIFS_S_KEY_BLOCK_BITS 29 +#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF +#define UBIFS_S_KEY_HASH_BITS  UBIFS_S_KEY_BLOCK_BITS +#define UBIFS_S_KEY_HASH_MASK  UBIFS_S_KEY_BLOCK_MASK + +/* + * Key types. + * + * UBIFS_INO_KEY: inode node key + * UBIFS_DATA_KEY: data node key + * UBIFS_DENT_KEY: directory entry node key + * UBIFS_XENT_KEY: extended attribute entry key + * UBIFS_KEY_TYPES_CNT: number of supported key types + */ +enum { +	UBIFS_INO_KEY, +	UBIFS_DATA_KEY, +	UBIFS_DENT_KEY, +	UBIFS_XENT_KEY, +	UBIFS_KEY_TYPES_CNT, +}; + +/* Count of LEBs reserved for the superblock area */ +#define UBIFS_SB_LEBS 1 +/* Count of LEBs reserved for the master area */ +#define UBIFS_MST_LEBS 2 + +/* First LEB of the superblock area */ +#define UBIFS_SB_LNUM 0 +/* First LEB of the master area */ +#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS) +/* First LEB of the log area */ +#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS) + +/* + * The below constants define the absolute minimum values for various UBIFS + * media areas. Many of them actually depend of flash geometry and the FS + * configuration (number of journal heads, orphan LEBs, etc). This means that + * the smallest volume size which can be used for UBIFS cannot be pre-defined + * by these constants. The file-system that meets the below limitation will not + * necessarily mount. UBIFS does run-time calculations and validates the FS + * size. + */ + +/* Minimum number of logical eraseblocks in the log */ +#define UBIFS_MIN_LOG_LEBS 2 +/* Minimum number of bud logical eraseblocks (one for each head) */ +#define UBIFS_MIN_BUD_LEBS 3 +/* Minimum number of journal logical eraseblocks */ +#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS) +/* Minimum number of LPT area logical eraseblocks */ +#define UBIFS_MIN_LPT_LEBS 2 +/* Minimum number of orphan area logical eraseblocks */ +#define UBIFS_MIN_ORPH_LEBS 1 +/* + * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 + * for GC, 1 for deletions, and at least 1 for committed data). + */ +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) + +/* Minimum number of logical eraseblocks */ +#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ +			   UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \ +			   UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS) + +/* Node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_CH_SZ        sizeof(struct ubifs_ch) +#define UBIFS_INO_NODE_SZ  sizeof(struct ubifs_ino_node) +#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node) +#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node) +#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node) +#define UBIFS_PAD_NODE_SZ  sizeof(struct ubifs_pad_node) +#define UBIFS_SB_NODE_SZ   sizeof(struct ubifs_sb_node) +#define UBIFS_MST_NODE_SZ  sizeof(struct ubifs_mst_node) +#define UBIFS_REF_NODE_SZ  sizeof(struct ubifs_ref_node) +#define UBIFS_IDX_NODE_SZ  sizeof(struct ubifs_idx_node) +#define UBIFS_CS_NODE_SZ   sizeof(struct ubifs_cs_node) +#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node) +/* Extended attribute entry nodes are identical to directory entry nodes */ +#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ +/* Only this does not have to be multiple of 8 bytes */ +#define UBIFS_BRANCH_SZ    sizeof(struct ubifs_branch) + +/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */ +#define UBIFS_MAX_DATA_NODE_SZ  (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE) +#define UBIFS_MAX_INO_NODE_SZ   (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA) +#define UBIFS_MAX_DENT_NODE_SZ  (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1) +#define UBIFS_MAX_XENT_NODE_SZ  UBIFS_MAX_DENT_NODE_SZ + +/* The largest UBIFS node */ +#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ + +/* + * On-flash inode flags. + * + * UBIFS_COMPR_FL: use compression for this inode + * UBIFS_SYNC_FL:  I/O on this inode has to be synchronous + * UBIFS_IMMUTABLE_FL: inode is immutable + * UBIFS_APPEND_FL: writes to the inode may only append data + * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous + * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value + * + * Note, these are on-flash flags which correspond to ioctl flags + * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not + * have to be the same. + */ +enum { +	UBIFS_COMPR_FL     = 0x01, +	UBIFS_SYNC_FL      = 0x02, +	UBIFS_IMMUTABLE_FL = 0x04, +	UBIFS_APPEND_FL    = 0x08, +	UBIFS_DIRSYNC_FL   = 0x10, +	UBIFS_XATTR_FL     = 0x20, +}; + +/* Inode flag bits used by UBIFS */ +#define UBIFS_FL_MASK 0x0000001F + +/* + * UBIFS compression algorithms. + * + * UBIFS_COMPR_NONE: no compression + * UBIFS_COMPR_LZO: LZO compression + * UBIFS_COMPR_ZLIB: ZLIB compression + * UBIFS_COMPR_TYPES_CNT: count of supported compression types + */ +enum { +	UBIFS_COMPR_NONE, +	UBIFS_COMPR_LZO, +	UBIFS_COMPR_ZLIB, +	UBIFS_COMPR_TYPES_CNT, +}; + +/* + * UBIFS node types. + * + * UBIFS_INO_NODE: inode node + * UBIFS_DATA_NODE: data node + * UBIFS_DENT_NODE: directory entry node + * UBIFS_XENT_NODE: extended attribute node + * UBIFS_TRUN_NODE: truncation node + * UBIFS_PAD_NODE: padding node + * UBIFS_SB_NODE: superblock node + * UBIFS_MST_NODE: master node + * UBIFS_REF_NODE: LEB reference node + * UBIFS_IDX_NODE: index node + * UBIFS_CS_NODE: commit start node + * UBIFS_ORPH_NODE: orphan node + * UBIFS_NODE_TYPES_CNT: count of supported node types + * + * Note, we index arrays by these numbers, so keep them low and contiguous. + * Node type constants for inodes, direntries and so on have to be the same as + * corresponding key type constants. + */ +enum { +	UBIFS_INO_NODE, +	UBIFS_DATA_NODE, +	UBIFS_DENT_NODE, +	UBIFS_XENT_NODE, +	UBIFS_TRUN_NODE, +	UBIFS_PAD_NODE, +	UBIFS_SB_NODE, +	UBIFS_MST_NODE, +	UBIFS_REF_NODE, +	UBIFS_IDX_NODE, +	UBIFS_CS_NODE, +	UBIFS_ORPH_NODE, +	UBIFS_NODE_TYPES_CNT, +}; + +/* + * Master node flags. + * + * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty + * UBIFS_MST_NO_ORPHS: no orphan inodes present + * UBIFS_MST_RCVRY: written by recovery + */ +enum { +	UBIFS_MST_DIRTY = 1, +	UBIFS_MST_NO_ORPHS = 2, +	UBIFS_MST_RCVRY = 4, +}; + +/* + * Node group type (used by recovery to recover whole group or none). + * + * UBIFS_NO_NODE_GROUP: this node is not part of a group + * UBIFS_IN_NODE_GROUP: this node is a part of a group + * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group + */ +enum { +	UBIFS_NO_NODE_GROUP = 0, +	UBIFS_IN_NODE_GROUP, +	UBIFS_LAST_OF_NODE_GROUP, +}; + +/* + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + */ +enum { +	UBIFS_FLG_BIGLPT = 0x02, +}; + +/** + * struct ubifs_ch - common header node. + * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC) + * @crc: CRC-32 checksum of the node header + * @sqnum: sequence number + * @len: full node length + * @node_type: node type + * @group_type: node group type + * @padding: reserved for future, zeroes + * + * Every UBIFS node starts with this common part. If the node has a key, the + * key always goes next. + */ +struct ubifs_ch { +	__le32 magic; +	__le32 crc; +	__le64 sqnum; +	__le32 len; +	__u8 node_type; +	__u8 group_type; +	__u8 padding[2]; +} __attribute__ ((packed)); + +/** + * union ubifs_dev_desc - device node descriptor. + * @new: new type device descriptor + * @huge: huge type device descriptor + * + * This data structure describes major/minor numbers of a device node. In an + * inode is a device node then its data contains an object of this type. UBIFS + * uses standard Linux "new" and "huge" device node encodings. + */ +union ubifs_dev_desc { +	__le32 new; +	__le64 huge; +} __attribute__ ((packed)); + +/** + * struct ubifs_ino_node - inode node. + * @ch: common header + * @key: node key + * @creat_sqnum: sequence number at time of creation + * @size: inode size in bytes (amount of uncompressed data) + * @atime_sec: access time seconds + * @ctime_sec: creation time seconds + * @mtime_sec: modification time seconds + * @atime_nsec: access time nanoseconds + * @ctime_nsec: creation time nanoseconds + * @mtime_nsec: modification time nanoseconds + * @nlink: number of hard links + * @uid: owner ID + * @gid: group ID + * @mode: access flags + * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc) + * @data_len: inode data length + * @xattr_cnt: count of extended attributes this inode has + * @xattr_size: summarized size of all extended attributes in bytes + * @padding1: reserved for future, zeroes + * @xattr_names: sum of lengths of all extended attribute names belonging to + *               this inode + * @compr_type: compression type used for this inode + * @padding2: reserved for future, zeroes + * @data: data attached to the inode + * + * Note, even though inode compression type is defined by @compr_type, some + * nodes of this inode may be compressed with different compressor - this + * happens if compression type is changed while the inode already has data + * nodes. But @compr_type will be use for further writes to the inode. + * + * Note, do not forget to amend 'zero_ino_node_unused()' function when changing + * the padding fields. + */ +struct ubifs_ino_node { +	struct ubifs_ch ch; +	__u8 key[UBIFS_MAX_KEY_LEN]; +	__le64 creat_sqnum; +	__le64 size; +	__le64 atime_sec; +	__le64 ctime_sec; +	__le64 mtime_sec; +	__le32 atime_nsec; +	__le32 ctime_nsec; +	__le32 mtime_nsec; +	__le32 nlink; +	__le32 uid; +	__le32 gid; +	__le32 mode; +	__le32 flags; +	__le32 data_len; +	__le32 xattr_cnt; +	__le32 xattr_size; +	__u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */ +	__le32 xattr_names; +	__le16 compr_type; +	__u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ +	__u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_dent_node - directory entry node. + * @ch: common header + * @key: node key + * @inum: target inode number + * @padding1: reserved for future, zeroes + * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc) + * @nlen: name length + * @padding2: reserved for future, zeroes + * @name: zero-terminated name + * + * Note, do not forget to amend 'zero_dent_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_dent_node { +	struct ubifs_ch ch; +	__u8 key[UBIFS_MAX_KEY_LEN]; +	__le64 inum; +	__u8 padding1; +	__u8 type; +	__le16 nlen; +	__u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ +	__u8 name[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_data_node - data node. + * @ch: common header + * @key: node key + * @size: uncompressed data size in bytes + * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc) + * @padding: reserved for future, zeroes + * @data: data + * + * Note, do not forget to amend 'zero_data_node_unused()' function when + * changing the padding fields. + */ +struct ubifs_data_node { +	struct ubifs_ch ch; +	__u8 key[UBIFS_MAX_KEY_LEN]; +	__le32 size; +	__le16 compr_type; +	__u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ +	__u8 data[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_trun_node - truncation node. + * @ch: common header + * @inum: truncated inode number + * @padding: reserved for future, zeroes + * @old_size: size before truncation + * @new_size: size after truncation + * + * This node exists only in the journal and never goes to the main area. Note, + * do not forget to amend 'zero_trun_node_unused()' function when changing the + * padding fields. + */ +struct ubifs_trun_node { +	struct ubifs_ch ch; +	__le32 inum; +	__u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ +	__le64 old_size; +	__le64 new_size; +} __attribute__ ((packed)); + +/** + * struct ubifs_pad_node - padding node. + * @ch: common header + * @pad_len: how many bytes after this node are unused (because padded) + * @padding: reserved for future, zeroes + */ +struct ubifs_pad_node { +	struct ubifs_ch ch; +	__le32 pad_len; +} __attribute__ ((packed)); + +/** + * struct ubifs_sb_node - superblock node. + * @ch: common header + * @padding: reserved for future, zeroes + * @key_hash: type of hash function used in keys + * @key_fmt: format of the key + * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc) + * @min_io_size: minimal input/output unit size + * @leb_size: logical eraseblock size in bytes + * @leb_cnt: count of LEBs used by file-system + * @max_leb_cnt: maximum count of LEBs used by file-system + * @max_bud_bytes: maximum amount of data stored in buds + * @log_lebs: log size in logical eraseblocks + * @lpt_lebs: number of LEBs used for lprops table + * @orph_lebs: number of LEBs used for recording orphans + * @jhead_cnt: count of journal heads + * @fanout: tree fanout (max. number of links per indexing node) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @fmt_version: UBIFS on-flash format version + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * @padding1: reserved for future, zeroes + * @rp_uid: reserve pool UID + * @rp_gid: reserve pool GID + * @rp_size: size of the reserved pool in bytes + * @padding2: reserved for future, zeroes + * @time_gran: time granularity in nanoseconds + * @uuid: UUID generated when the file system image was created + */ +struct ubifs_sb_node { +	struct ubifs_ch ch; +	__u8 padding[2]; +	__u8 key_hash; +	__u8 key_fmt; +	__le32 flags; +	__le32 min_io_size; +	__le32 leb_size; +	__le32 leb_cnt; +	__le32 max_leb_cnt; +	__le64 max_bud_bytes; +	__le32 log_lebs; +	__le32 lpt_lebs; +	__le32 orph_lebs; +	__le32 jhead_cnt; +	__le32 fanout; +	__le32 lsave_cnt; +	__le32 fmt_version; +	__le16 default_compr; +	__u8 padding1[2]; +	__le32 rp_uid; +	__le32 rp_gid; +	__le64 rp_size; +	__le32 time_gran; +	__u8 uuid[16]; +	__u8 padding2[3972]; +} __attribute__ ((packed)); + +/** + * struct ubifs_mst_node - master node. + * @ch: common header + * @highest_inum: highest inode number in the committed index + * @cmt_no: commit number + * @flags: various flags (%UBIFS_MST_DIRTY, etc) + * @log_lnum: start of the log + * @root_lnum: LEB number of the root indexing node + * @root_offs: offset within @root_lnum + * @root_len: root indexing node length + * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was + * not reserved and should be reserved on mount) + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @index_size: size of index on flash + * @total_free: total free space in bytes + * @total_dirty: total dirty space in bytes + * @total_used: total used space in bytes (includes only data LEBs) + * @total_dead: total dead space in bytes (includes only data LEBs) + * @total_dark: total dark space in bytes (includes only data LEBs) + * @lpt_lnum: LEB number of LPT root nnode + * @lpt_offs: offset of LPT root nnode + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @lsave_lnum: LEB number of LPT's save table (big model only) + * @lsave_offs: offset of LPT's save table (big model only) + * @lscan_lnum: LEB number of last LPT scan + * @empty_lebs: number of empty logical eraseblocks + * @idx_lebs: number of indexing logical eraseblocks + * @leb_cnt: count of LEBs used by file-system + * @padding: reserved for future, zeroes + */ +struct ubifs_mst_node { +	struct ubifs_ch ch; +	__le64 highest_inum; +	__le64 cmt_no; +	__le32 flags; +	__le32 log_lnum; +	__le32 root_lnum; +	__le32 root_offs; +	__le32 root_len; +	__le32 gc_lnum; +	__le32 ihead_lnum; +	__le32 ihead_offs; +	__le64 index_size; +	__le64 total_free; +	__le64 total_dirty; +	__le64 total_used; +	__le64 total_dead; +	__le64 total_dark; +	__le32 lpt_lnum; +	__le32 lpt_offs; +	__le32 nhead_lnum; +	__le32 nhead_offs; +	__le32 ltab_lnum; +	__le32 ltab_offs; +	__le32 lsave_lnum; +	__le32 lsave_offs; +	__le32 lscan_lnum; +	__le32 empty_lebs; +	__le32 idx_lebs; +	__le32 leb_cnt; +	__u8 padding[344]; +} __attribute__ ((packed)); + +/** + * struct ubifs_ref_node - logical eraseblock reference node. + * @ch: common header + * @lnum: the referred logical eraseblock number + * @offs: start offset in the referred LEB + * @jhead: journal head number + * @padding: reserved for future, zeroes + */ +struct ubifs_ref_node { +	struct ubifs_ch ch; +	__le32 lnum; +	__le32 offs; +	__le32 jhead; +	__u8 padding[28]; +} __attribute__ ((packed)); + +/** + * struct ubifs_branch - key/reference/length branch + * @lnum: LEB number of the target node + * @offs: offset within @lnum + * @len: target node length + * @key: key + */ +struct ubifs_branch { +	__le32 lnum; +	__le32 offs; +	__le32 len; +	__u8 key[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_idx_node - indexing node. + * @ch: common header + * @child_cnt: number of child index nodes + * @level: tree level + * @branches: LEB number / offset / length / key branches + */ +struct ubifs_idx_node { +	struct ubifs_ch ch; +	__le16 child_cnt; +	__le16 level; +	__u8 branches[]; +} __attribute__ ((packed)); + +/** + * struct ubifs_cs_node - commit start node. + * @ch: common header + * @cmt_no: commit number + */ +struct ubifs_cs_node { +	struct ubifs_ch ch; +	__le64 cmt_no; +} __attribute__ ((packed)); + +/** + * struct ubifs_orph_node - orphan node. + * @ch: common header + * @cmt_no: commit number (also top bit is set on the last node of the commit) + * @inos: inode numbers of orphans + */ +struct ubifs_orph_node { +	struct ubifs_ch ch; +	__le64 cmt_no; +	__le64 inos[]; +} __attribute__ ((packed)); + +#endif /* __UBIFS_MEDIA_H__ */ diff --git a/fs/ubifs/ubifs.c b/fs/ubifs/ubifs.c new file mode 100644 index 000000000..d91786388 --- /dev/null +++ b/fs/ubifs/ubifs.c @@ -0,0 +1,684 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation. + * + * (C) Copyright 2008-2009 + * Stefan Roese, DENX Software Engineering, sr@denx.de. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +#include "ubifs.h" + +#if !defined(CONFIG_SYS_64BIT_VSPRINTF) +#warning Please define CONFIG_SYS_64BIT_VSPRINTF for correct output! +#endif + +DECLARE_GLOBAL_DATA_PTR; + +/* compress.c */ + +/* + * We need a wrapper for gunzip() because the parameters are + * incompatible with the lzo decompressor. + */ +static int gzip_decompress(const unsigned char *in, size_t in_len, +			   unsigned char *out, size_t *out_len) +{ +	unsigned long len = in_len; +	return gunzip(out, *out_len, (unsigned char *)in, &len); +} + +/* Fake description object for the "none" compressor */ +static struct ubifs_compressor none_compr = { +	.compr_type = UBIFS_COMPR_NONE, +	.name = "no compression", +	.capi_name = "", +	.decompress = NULL, +}; + +static struct ubifs_compressor lzo_compr = { +	.compr_type = UBIFS_COMPR_LZO, +	.name = "LZO", +	.capi_name = "lzo", +	.decompress = lzo1x_decompress_safe, +}; + +static struct ubifs_compressor zlib_compr = { +	.compr_type = UBIFS_COMPR_ZLIB, +	.name = "zlib", +	.capi_name = "deflate", +	.decompress = gzip_decompress, +}; + +/* All UBIFS compressors */ +struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/** + * ubifs_decompress - decompress data. + * @in_buf: data to decompress + * @in_len: length of the data to decompress + * @out_buf: output buffer where decompressed data should + * @out_len: output length is returned here + * @compr_type: type of compression + * + * This function decompresses data from buffer @in_buf into buffer @out_buf. + * The length of the uncompressed data is returned in @out_len. This functions + * returns %0 on success or a negative error code on failure. + */ +int ubifs_decompress(const void *in_buf, int in_len, void *out_buf, +		     int *out_len, int compr_type) +{ +	int err; +	struct ubifs_compressor *compr; + +	if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) { +		ubifs_err("invalid compression type %d", compr_type); +		return -EINVAL; +	} + +	compr = ubifs_compressors[compr_type]; + +	if (unlikely(!compr->capi_name)) { +		ubifs_err("%s compression is not compiled in", compr->name); +		return -EINVAL; +	} + +	if (compr_type == UBIFS_COMPR_NONE) { +		memcpy(out_buf, in_buf, in_len); +		*out_len = in_len; +		return 0; +	} + +	err = compr->decompress(in_buf, in_len, out_buf, (size_t *)out_len); +	if (err) +		ubifs_err("cannot decompress %d bytes, compressor %s, " +			  "error %d", in_len, compr->name, err); + +	return err; +} + +/** + * compr_init - initialize a compressor. + * @compr: compressor description object + * + * This function initializes the requested compressor and returns zero in case + * of success or a negative error code in case of failure. + */ +static int __init compr_init(struct ubifs_compressor *compr) +{ +	ubifs_compressors[compr->compr_type] = compr; +	ubifs_compressors[compr->compr_type]->name += gd->reloc_off; +	ubifs_compressors[compr->compr_type]->capi_name += gd->reloc_off; +	ubifs_compressors[compr->compr_type]->decompress += gd->reloc_off; +	return 0; +} + +/** + * ubifs_compressors_init - initialize UBIFS compressors. + * + * This function initializes the compressor which were compiled in. Returns + * zero in case of success and a negative error code in case of failure. + */ +int __init ubifs_compressors_init(void) +{ +	int err; + +	err = compr_init(&lzo_compr); +	if (err) +		return err; + +	err = compr_init(&zlib_compr); +	if (err) +		return err; + +	ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; +	return 0; +} + +/* + * ubifsls... + */ + +static int filldir(struct ubifs_info *c, const char *name, int namlen, +		   u64 ino, unsigned int d_type) +{ +	struct inode *inode; +	char filetime[32]; + +	switch (d_type) { +	case UBIFS_ITYPE_REG: +		printf("\t"); +		break; +	case UBIFS_ITYPE_DIR: +		printf("<DIR>\t"); +		break; +	case UBIFS_ITYPE_LNK: +		printf("<LNK>\t"); +		break; +	default: +		printf("other\t"); +		break; +	} + +	inode = ubifs_iget(c->vfs_sb, ino); +	if (IS_ERR(inode)) { +		printf("%s: Error in ubifs_iget(), ino=%lld ret=%p!\n", +		       __func__, ino, inode); +		return -1; +	} +	ctime_r((time_t *)&inode->i_mtime, filetime); +	printf("%9lld  %24.24s  ", inode->i_size, filetime); +	ubifs_iput(inode); + +	printf("%s\n", name); + +	return 0; +} + +static int ubifs_printdir(struct file *file, void *dirent) +{ +	int err, over = 0; +	struct qstr nm; +	union ubifs_key key; +	struct ubifs_dent_node *dent; +	struct inode *dir = file->f_path.dentry->d_inode; +	struct ubifs_info *c = dir->i_sb->s_fs_info; + +	dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + +	if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) +		/* +		 * The directory was seek'ed to a senseless position or there +		 * are no more entries. +		 */ +		return 0; + +	if (file->f_pos == 1) { +		/* Find the first entry in TNC and save it */ +		lowest_dent_key(c, &key, dir->i_ino); +		nm.name = NULL; +		dent = ubifs_tnc_next_ent(c, &key, &nm); +		if (IS_ERR(dent)) { +			err = PTR_ERR(dent); +			goto out; +		} + +		file->f_pos = key_hash_flash(c, &dent->key); +		file->private_data = dent; +	} + +	dent = file->private_data; +	if (!dent) { +		/* +		 * The directory was seek'ed to and is now readdir'ed. +		 * Find the entry corresponding to @file->f_pos or the +		 * closest one. +		 */ +		dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); +		nm.name = NULL; +		dent = ubifs_tnc_next_ent(c, &key, &nm); +		if (IS_ERR(dent)) { +			err = PTR_ERR(dent); +			goto out; +		} +		file->f_pos = key_hash_flash(c, &dent->key); +		file->private_data = dent; +	} + +	while (1) { +		dbg_gen("feed '%s', ino %llu, new f_pos %#x", +			dent->name, (unsigned long long)le64_to_cpu(dent->inum), +			key_hash_flash(c, &dent->key)); +		ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); + +		nm.len = le16_to_cpu(dent->nlen); +		over = filldir(c, (char *)dent->name, nm.len, +			       le64_to_cpu(dent->inum), dent->type); +		if (over) +			return 0; + +		/* Switch to the next entry */ +		key_read(c, &dent->key, &key); +		nm.name = (char *)dent->name; +		dent = ubifs_tnc_next_ent(c, &key, &nm); +		if (IS_ERR(dent)) { +			err = PTR_ERR(dent); +			goto out; +		} + +		kfree(file->private_data); +		file->f_pos = key_hash_flash(c, &dent->key); +		file->private_data = dent; +		cond_resched(); +	} + +out: +	if (err != -ENOENT) { +		ubifs_err("cannot find next direntry, error %d", err); +		return err; +	} + +	kfree(file->private_data); +	file->private_data = NULL; +	file->f_pos = 2; +	return 0; +} + +static int ubifs_finddir(struct super_block *sb, char *dirname, +			 unsigned long root_inum, unsigned long *inum) +{ +	int err; +	struct qstr nm; +	union ubifs_key key; +	struct ubifs_dent_node *dent; +	struct ubifs_info *c; +	struct file *file; +	struct dentry *dentry; +	struct inode *dir; + +	file = kzalloc(sizeof(struct file), 0); +	dentry = kzalloc(sizeof(struct dentry), 0); +	dir = kzalloc(sizeof(struct inode), 0); +	if (!file || !dentry || !dir) { +		printf("%s: Error, no memory for malloc!\n", __func__); +		err = -ENOMEM; +		goto out; +	} + +	dir->i_sb = sb; +	file->f_path.dentry = dentry; +	file->f_path.dentry->d_parent = dentry; +	file->f_path.dentry->d_inode = dir; +	file->f_path.dentry->d_inode->i_ino = root_inum; +	c = sb->s_fs_info; + +	dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); + +	/* Find the first entry in TNC and save it */ +	lowest_dent_key(c, &key, dir->i_ino); +	nm.name = NULL; +	dent = ubifs_tnc_next_ent(c, &key, &nm); +	if (IS_ERR(dent)) { +		err = PTR_ERR(dent); +		goto out; +	} + +	file->f_pos = key_hash_flash(c, &dent->key); +	file->private_data = dent; + +	while (1) { +		dbg_gen("feed '%s', ino %llu, new f_pos %#x", +			dent->name, (unsigned long long)le64_to_cpu(dent->inum), +			key_hash_flash(c, &dent->key)); +		ubifs_assert(le64_to_cpu(dent->ch.sqnum) > ubifs_inode(dir)->creat_sqnum); + +		nm.len = le16_to_cpu(dent->nlen); +		if ((strncmp(dirname, (char *)dent->name, nm.len) == 0) && +		    (strlen(dirname) == nm.len)) { +			*inum = le64_to_cpu(dent->inum); +			return 1; +		} + +		/* Switch to the next entry */ +		key_read(c, &dent->key, &key); +		nm.name = (char *)dent->name; +		dent = ubifs_tnc_next_ent(c, &key, &nm); +		if (IS_ERR(dent)) { +			err = PTR_ERR(dent); +			goto out; +		} + +		kfree(file->private_data); +		file->f_pos = key_hash_flash(c, &dent->key); +		file->private_data = dent; +		cond_resched(); +	} + +out: +	if (err != -ENOENT) { +		ubifs_err("cannot find next direntry, error %d", err); +		return err; +	} + +	if (file) +		free(file); +	if (dentry) +		free(dentry); +	if (dir) +		free(dir); + +	if (file->private_data) +		kfree(file->private_data); +	file->private_data = NULL; +	file->f_pos = 2; +	return 0; +} + +static unsigned long ubifs_findfile(struct super_block *sb, char *filename) +{ +	int ret; +	char *next; +	char fpath[128]; +	char *name = fpath; +	unsigned long root_inum = 1; +	unsigned long inum; + +	strcpy(fpath, filename); + +	/* Remove all leading slashes */ +	while (*name == '/') +		name++; + +	/* +	 * Handle root-direcoty ('/') +	 */ +	inum = root_inum; +	if (!name || *name == '\0') +		return inum; + +	for (;;) { +		/* Extract the actual part from the pathname.  */ +		next = strchr(name, '/'); +		if (next) { +			/* Remove all leading slashes.  */ +			while (*next == '/') +				*(next++) = '\0'; +		} + +		ret = ubifs_finddir(sb, name, root_inum, &inum); + +		/* +		 * Check if directory with this name exists +		 */ + +		/* Found the node!  */ +		if (!next || *next == '\0') { +			if (ret) +				return inum; + +			break; +		} + +		root_inum = inum; +		name = next; +	} + +	return 0; +} + +int ubifs_ls(char *filename) +{ +	struct ubifs_info *c = ubifs_sb->s_fs_info; +	struct file *file; +	struct dentry *dentry; +	struct inode *dir; +	void *dirent = NULL; +	unsigned long inum; +	int ret = 0; + +	c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); +	inum = ubifs_findfile(ubifs_sb, filename); +	if (!inum) { +		ret = -1; +		goto out; +	} + +	file = kzalloc(sizeof(struct file), 0); +	dentry = kzalloc(sizeof(struct dentry), 0); +	dir = kzalloc(sizeof(struct inode), 0); +	if (!file || !dentry || !dir) { +		printf("%s: Error, no memory for malloc!\n", __func__); +		ret = -ENOMEM; +		goto out_mem; +	} + +	dir->i_sb = ubifs_sb; +	file->f_path.dentry = dentry; +	file->f_path.dentry->d_parent = dentry; +	file->f_path.dentry->d_inode = dir; +	file->f_path.dentry->d_inode->i_ino = inum; +	file->f_pos = 1; +	file->private_data = NULL; +	ubifs_printdir(file, dirent); + +out_mem: +	if (file) +		free(file); +	if (dentry) +		free(dentry); +	if (dir) +		free(dir); + +out: +	ubi_close_volume(c->ubi); +	return ret; +} + +/* + * ubifsload... + */ + +/* file.c */ + +static inline void *kmap(struct page *page) +{ +	return page->addr; +} + +static int read_block(struct inode *inode, void *addr, unsigned int block, +		      struct ubifs_data_node *dn) +{ +	struct ubifs_info *c = inode->i_sb->s_fs_info; +	int err, len, out_len; +	union ubifs_key key; +	unsigned int dlen; + +	data_key_init(c, &key, inode->i_ino, block); +	err = ubifs_tnc_lookup(c, &key, dn); +	if (err) { +		if (err == -ENOENT) +			/* Not found, so it must be a hole */ +			memset(addr, 0, UBIFS_BLOCK_SIZE); +		return err; +	} + +	ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum); + +	len = le32_to_cpu(dn->size); +	if (len <= 0 || len > UBIFS_BLOCK_SIZE) +		goto dump; + +	dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; +	out_len = UBIFS_BLOCK_SIZE; +	err = ubifs_decompress(&dn->data, dlen, addr, &out_len, +			       le16_to_cpu(dn->compr_type)); +	if (err || len != out_len) +		goto dump; + +	/* +	 * Data length can be less than a full block, even for blocks that are +	 * not the last in the file (e.g., as a result of making a hole and +	 * appending data). Ensure that the remainder is zeroed out. +	 */ +	if (len < UBIFS_BLOCK_SIZE) +		memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); + +	return 0; + +dump: +	ubifs_err("bad data node (block %u, inode %lu)", +		  block, inode->i_ino); +	dbg_dump_node(c, dn); +	return -EINVAL; +} + +static int do_readpage(struct ubifs_info *c, struct inode *inode, struct page *page) +{ +	void *addr; +	int err = 0, i; +	unsigned int block, beyond; +	struct ubifs_data_node *dn; +	loff_t i_size = inode->i_size; + +	dbg_gen("ino %lu, pg %lu, i_size %lld", +		inode->i_ino, page->index, i_size); + +	addr = kmap(page); + +	block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; +	beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; +	if (block >= beyond) { +		/* Reading beyond inode */ +		memset(addr, 0, PAGE_CACHE_SIZE); +		goto out; +	} + +	dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS); +	if (!dn) { +		err = -ENOMEM; +		goto error; +	} + +	i = 0; +	while (1) { +		int ret; + +		if (block >= beyond) { +			/* Reading beyond inode */ +			err = -ENOENT; +			memset(addr, 0, UBIFS_BLOCK_SIZE); +		} else { +			ret = read_block(inode, addr, block, dn); +			if (ret) { +				err = ret; +				if (err != -ENOENT) +					break; +			} else if (block + 1 == beyond) { +				int dlen = le32_to_cpu(dn->size); +				int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); + +				if (ilen && ilen < dlen) +					memset(addr + ilen, 0, dlen - ilen); +			} +		} +		if (++i >= UBIFS_BLOCKS_PER_PAGE) +			break; +		block += 1; +		addr += UBIFS_BLOCK_SIZE; +	} +	if (err) { +		if (err == -ENOENT) { +			/* Not found, so it must be a hole */ +			dbg_gen("hole"); +			goto out_free; +		} +		ubifs_err("cannot read page %lu of inode %lu, error %d", +			  page->index, inode->i_ino, err); +		goto error; +	} + +out_free: +	kfree(dn); +out: +	return 0; + +error: +	kfree(dn); +	return err; +} + +int ubifs_load(char *filename, u32 addr, u32 size) +{ +	struct ubifs_info *c = ubifs_sb->s_fs_info; +	unsigned long inum; +	struct inode *inode; +	struct page page; +	int err = 0; +	int i; +	int count; +	char link_name[64]; +	struct ubifs_inode *ui; + +	c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READONLY); +	inum = ubifs_findfile(ubifs_sb, filename); +	if (!inum) { +		err = -1; +		goto out; +	} + +	/* +	 * Read file inode +	 */ +	inode = ubifs_iget(ubifs_sb, inum); +	if (IS_ERR(inode)) { +		printf("%s: Error reading inode %ld!\n", __func__, inum); +		err = PTR_ERR(inode); +		goto out; +	} + +	/* +	 * Check for symbolic link +	 */ +	ui = ubifs_inode(inode); +	if (((inode->i_mode & S_IFMT) == S_IFLNK) && ui->data_len) { +		memcpy(link_name, ui->data, ui->data_len); +		printf("%s is linked to %s!\n", filename, link_name); +		ubifs_iput(inode); + +		/* +		 * Now we have the "real" filename, call ubifs_load() +		 * again (recursive call) to load this file instead +		 */ +		return ubifs_load(link_name, addr, size); +	} + +	/* +	 * If no size was specified or if size bigger than filesize +	 * set size to filesize +	 */ +	if ((size == 0) || (size > inode->i_size)) +		size = inode->i_size; + +	count = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; +	printf("Loading file '%s' to addr 0x%08x with size %d (0x%08x)...\n", +	       filename, addr, size, size); + +	page.addr = (void *)addr; +	page.index = 0; +	page.inode = inode; +	for (i = 0; i < count; i++) { +		err = do_readpage(c, inode, &page); +		if (err) +			break; + +		page.addr += PAGE_SIZE; +		page.index++; +	} + +	if (err) +		printf("Error reading file '%s'\n", filename); +	else +		printf("Done\n"); + +	ubifs_iput(inode); + +out: +	ubi_close_volume(c->ubi); +	return err; +} diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h new file mode 100644 index 000000000..f342dd8bf --- /dev/null +++ b/fs/ubifs/ubifs.h @@ -0,0 +1,2173 @@ +/* + * This file is part of UBIFS. + * + * Copyright (C) 2006-2008 Nokia Corporation + * + * (C) Copyright 2008-2009 + * Stefan Roese, DENX Software Engineering, sr@denx.de. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём) + *          Adrian Hunter + */ + +#ifndef __UBIFS_H__ +#define __UBIFS_H__ + +#if 0	/* Enable for debugging output */ +#define CONFIG_UBIFS_FS_DEBUG +#define CONFIG_UBIFS_FS_DEBUG_MSG_LVL	3 +#endif + +#include <ubi_uboot.h> +#include <linux/ctype.h> +#include <linux/time.h> +#include <linux/math64.h> +#include "ubifs-media.h" + +struct dentry; +struct file; +struct iattr; +struct kstat; +struct vfsmount; + +extern struct super_block *ubifs_sb; + +extern unsigned int ubifs_msg_flags; +extern unsigned int ubifs_chk_flags; +extern unsigned int ubifs_tst_flags; + +#define pgoff_t		unsigned long + +/* + * We "simulate" the Linux page struct much simpler here + */ +struct page { +	pgoff_t index; +	void *addr; +	struct inode *inode; +}; + +void iput(struct inode *inode); + +/* + * The atomic operations are used for budgeting etc which is not + * needed for the read-only U-Boot implementation: + */ +#define atomic_long_inc(a) +#define atomic_long_dec(a) +#define	atomic_long_sub(a, b) + +/* linux/include/time.h */ + +struct timespec { +	time_t	tv_sec;		/* seconds */ +	long	tv_nsec;	/* nanoseconds */ +}; + +/* linux/include/dcache.h */ + +/* + * "quick string" -- eases parameter passing, but more importantly + * saves "metadata" about the string (ie length and the hash). + * + * hash comes first so it snuggles against d_parent in the + * dentry. + */ +struct qstr { +	unsigned int hash; +	unsigned int len; +	const char *name; +}; + +struct inode { +	struct hlist_node	i_hash; +	struct list_head	i_list; +	struct list_head	i_sb_list; +	struct list_head	i_dentry; +	unsigned long		i_ino; +	unsigned int		i_nlink; +	uid_t			i_uid; +	gid_t			i_gid; +	dev_t			i_rdev; +	u64			i_version; +	loff_t			i_size; +#ifdef __NEED_I_SIZE_ORDERED +	seqcount_t		i_size_seqcount; +#endif +	struct timespec		i_atime; +	struct timespec		i_mtime; +	struct timespec		i_ctime; +	unsigned int		i_blkbits; +	unsigned short          i_bytes; +	umode_t			i_mode; +	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */ +	struct mutex		i_mutex; +	struct rw_semaphore	i_alloc_sem; +	const struct inode_operations	*i_op; +	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */ +	struct super_block	*i_sb; +	struct file_lock	*i_flock; +#ifdef CONFIG_QUOTA +	struct dquot		*i_dquot[MAXQUOTAS]; +#endif +	struct list_head	i_devices; +	int			i_cindex; + +	__u32			i_generation; + +#ifdef CONFIG_DNOTIFY +	unsigned long		i_dnotify_mask; /* Directory notify events */ +	struct dnotify_struct	*i_dnotify; /* for directory notifications */ +#endif + +#ifdef CONFIG_INOTIFY +	struct list_head	inotify_watches; /* watches on this inode */ +	struct mutex		inotify_mutex;	/* protects the watches list */ +#endif + +	unsigned long		i_state; +	unsigned long		dirtied_when;	/* jiffies of first dirtying */ + +	unsigned int		i_flags; + +#ifdef CONFIG_SECURITY +	void			*i_security; +#endif +	void			*i_private; /* fs or device private pointer */ +}; + +struct super_block { +	struct list_head	s_list;		/* Keep this first */ +	dev_t			s_dev;		/* search index; _not_ kdev_t */ +	unsigned long		s_blocksize; +	unsigned char		s_blocksize_bits; +	unsigned char		s_dirt; +	unsigned long long	s_maxbytes;	/* Max file size */ +	struct file_system_type	*s_type; +	const struct super_operations	*s_op; +	struct dquot_operations	*dq_op; +	struct quotactl_ops	*s_qcop; +	const struct export_operations *s_export_op; +	unsigned long		s_flags; +	unsigned long		s_magic; +	struct dentry		*s_root; +	struct rw_semaphore	s_umount; +	struct mutex		s_lock; +	int			s_count; +	int			s_syncing; +	int			s_need_sync_fs; +#ifdef CONFIG_SECURITY +	void                    *s_security; +#endif +	struct xattr_handler	**s_xattr; + +	struct list_head	s_inodes;	/* all inodes */ +	struct list_head	s_dirty;	/* dirty inodes */ +	struct list_head	s_io;		/* parked for writeback */ +	struct list_head	s_more_io;	/* parked for more writeback */ +	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */ +	struct list_head	s_files; +	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ +	struct list_head	s_dentry_lru;	/* unused dentry lru */ +	int			s_nr_dentry_unused;	/* # of dentry on lru */ + +	struct block_device	*s_bdev; +	struct mtd_info		*s_mtd; +	struct list_head	s_instances; + +	int			s_frozen; +	wait_queue_head_t	s_wait_unfrozen; + +	char s_id[32];				/* Informational name */ + +	void 			*s_fs_info;	/* Filesystem private info */ + +	/* +	 * The next field is for VFS *only*. No filesystems have any business +	 * even looking at it. You had been warned. +	 */ +	struct mutex s_vfs_rename_mutex;	/* Kludge */ + +	/* Granularity of c/m/atime in ns. +	   Cannot be worse than a second */ +	u32		   s_time_gran; + +	/* +	 * Filesystem subtype.  If non-empty the filesystem type field +	 * in /proc/mounts will be "type.subtype" +	 */ +	char *s_subtype; + +	/* +	 * Saved mount options for lazy filesystems using +	 * generic_show_options() +	 */ +	char *s_options; +}; + +struct file_system_type { +	const char *name; +	int fs_flags; +	int (*get_sb) (struct file_system_type *, int, +		       const char *, void *, struct vfsmount *); +	void (*kill_sb) (struct super_block *); +	struct module *owner; +	struct file_system_type * next; +	struct list_head fs_supers; +}; + +struct vfsmount { +	struct list_head mnt_hash; +	struct vfsmount *mnt_parent;	/* fs we are mounted on */ +	struct dentry *mnt_mountpoint;	/* dentry of mountpoint */ +	struct dentry *mnt_root;	/* root of the mounted tree */ +	struct super_block *mnt_sb;	/* pointer to superblock */ +	struct list_head mnt_mounts;	/* list of children, anchored here */ +	struct list_head mnt_child;	/* and going through their mnt_child */ +	int mnt_flags; +	/* 4 bytes hole on 64bits arches */ +	const char *mnt_devname;	/* Name of device e.g. /dev/dsk/hda1 */ +	struct list_head mnt_list; +	struct list_head mnt_expire;	/* link in fs-specific expiry list */ +	struct list_head mnt_share;	/* circular list of shared mounts */ +	struct list_head mnt_slave_list;/* list of slave mounts */ +	struct list_head mnt_slave;	/* slave list entry */ +	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */ +	struct mnt_namespace *mnt_ns;	/* containing namespace */ +	int mnt_id;			/* mount identifier */ +	int mnt_group_id;		/* peer group identifier */ +	/* +	 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount +	 * to let these frequently modified fields in a separate cache line +	 * (so that reads of mnt_flags wont ping-pong on SMP machines) +	 */ +	int mnt_expiry_mark;		/* true if marked for expiry */ +	int mnt_pinned; +	int mnt_ghosts; +	/* +	 * This value is not stable unless all of the mnt_writers[] spinlocks +	 * are held, and all mnt_writer[]s on this mount have 0 as their ->count +	 */ +}; + +struct path { +	struct vfsmount *mnt; +	struct dentry *dentry; +}; + +struct file { +	struct path		f_path; +#define f_dentry	f_path.dentry +#define f_vfsmnt	f_path.mnt +	const struct file_operations	*f_op; +	unsigned int 		f_flags; +	loff_t			f_pos; +	unsigned int		f_uid, f_gid; + +	u64			f_version; +#ifdef CONFIG_SECURITY +	void			*f_security; +#endif +	/* needed for tty driver, and maybe others */ +	void			*private_data; + +#ifdef CONFIG_EPOLL +	/* Used by fs/eventpoll.c to link all the hooks to this file */ +	struct list_head	f_ep_links; +	spinlock_t		f_ep_lock; +#endif /* #ifdef CONFIG_EPOLL */ +#ifdef CONFIG_DEBUG_WRITECOUNT +	unsigned long f_mnt_write_state; +#endif +}; + +/* + * get_seconds() not really needed in the read-only implmentation + */ +#define get_seconds()		0 + +/* 4k page size */ +#define PAGE_CACHE_SHIFT	12 +#define PAGE_CACHE_SIZE		(1 << PAGE_CACHE_SHIFT) + +/* Page cache limit. The filesystems should put that into their s_maxbytes +   limits, otherwise bad things can happen in VM. */ +#if BITS_PER_LONG==32 +#define MAX_LFS_FILESIZE	(((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#elif BITS_PER_LONG==64 +#define MAX_LFS_FILESIZE 	0x7fffffffffffffffUL +#endif + +#define INT_MAX		((int)(~0U>>1)) +#define INT_MIN		(-INT_MAX - 1) +#define LLONG_MAX	((long long)(~0ULL>>1)) + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + */ +#define MS_RDONLY	 1	/* Mount read-only */ +#define MS_NOSUID	 2	/* Ignore suid and sgid bits */ +#define MS_NODEV	 4	/* Disallow access to device special files */ +#define MS_NOEXEC	 8	/* Disallow program execution */ +#define MS_SYNCHRONOUS	16	/* Writes are synced at once */ +#define MS_REMOUNT	32	/* Alter flags of a mounted FS */ +#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */ +#define MS_DIRSYNC	128	/* Directory modifications are synchronous */ +#define MS_NOATIME	1024	/* Do not update access times. */ +#define MS_NODIRATIME	2048	/* Do not update directory access times */ +#define MS_BIND		4096 +#define MS_MOVE		8192 +#define MS_REC		16384 +#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence. +				   MS_VERBOSE is deprecated. */ +#define MS_SILENT	32768 +#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */ +#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */ +#define MS_PRIVATE	(1<<18)	/* change to private */ +#define MS_SLAVE	(1<<19)	/* change to slave */ +#define MS_SHARED	(1<<20)	/* change to shared */ +#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION	(1<<23) /* Update inode I_version field */ +#define MS_ACTIVE	(1<<30) +#define MS_NOUSER	(1<<31) + +#define I_NEW			8 + +/* Inode flags - they have nothing to superblock flags now */ + +#define S_SYNC		1	/* Writes are synced at once */ +#define S_NOATIME	2	/* Do not update access times */ +#define S_APPEND	4	/* Append-only file */ +#define S_IMMUTABLE	8	/* Immutable file */ +#define S_DEAD		16	/* removed, but still open directory */ +#define S_NOQUOTA	32	/* Inode is not counted to quota */ +#define S_DIRSYNC	64	/* Directory modifications are synchronous */ +#define S_NOCMTIME	128	/* Do not update file c/mtime */ +#define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */ +#define S_PRIVATE	512	/* Inode is fs-internal */ + +/* include/linux/stat.h */ + +#define S_IFMT  00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK	 0120000 +#define S_IFREG  0100000 +#define S_IFBLK  0060000 +#define S_IFDIR  0040000 +#define S_IFCHR  0020000 +#define S_IFIFO  0010000 +#define S_ISUID  0004000 +#define S_ISGID  0002000 +#define S_ISVTX  0001000 + +/* include/linux/fs.h */ + +/* + * File types + * + * NOTE! These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#define DT_UNKNOWN	0 +#define DT_FIFO		1 +#define DT_CHR		2 +#define DT_DIR		4 +#define DT_BLK		6 +#define DT_REG		8 +#define DT_LNK		10 +#define DT_SOCK		12 +#define DT_WHT		14 + +#define I_DIRTY_SYNC		1 +#define I_DIRTY_DATASYNC	2 +#define I_DIRTY_PAGES		4 +#define I_NEW			8 +#define I_WILL_FREE		16 +#define I_FREEING		32 +#define I_CLEAR			64 +#define __I_LOCK		7 +#define I_LOCK			(1 << __I_LOCK) +#define __I_SYNC		8 +#define I_SYNC			(1 << __I_SYNC) + +#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) + +/* linux/include/dcache.h */ + +#define DNAME_INLINE_LEN_MIN 36 + +struct dentry { +	unsigned int d_flags;		/* protected by d_lock */ +	spinlock_t d_lock;		/* per dentry lock */ +	struct inode *d_inode;		/* Where the name belongs to - NULL is +					 * negative */ +	/* +	 * The next three fields are touched by __d_lookup.  Place them here +	 * so they all fit in a cache line. +	 */ +	struct hlist_node d_hash;	/* lookup hash list */ +	struct dentry *d_parent;	/* parent directory */ +	struct qstr d_name; + +	struct list_head d_lru;		/* LRU list */ +	/* +	 * d_child and d_rcu can share memory +	 */ +	struct list_head d_subdirs;	/* our children */ +	struct list_head d_alias;	/* inode alias list */ +	unsigned long d_time;		/* used by d_revalidate */ +	struct super_block *d_sb;	/* The root of the dentry tree */ +	void *d_fsdata;			/* fs-specific data */ +#ifdef CONFIG_PROFILING +	struct dcookie_struct *d_cookie; /* cookie, if any */ +#endif +	int d_mounted; +	unsigned char d_iname[DNAME_INLINE_LEN_MIN];	/* small names */ +}; + +static inline ino_t parent_ino(struct dentry *dentry) +{ +	ino_t res; + +	spin_lock(&dentry->d_lock); +	res = dentry->d_parent->d_inode->i_ino; +	spin_unlock(&dentry->d_lock); +	return res; +} + +/* linux/include/linux/bitops.h */ + +#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr)		((nr) / BITS_PER_LONG) + +/* linux/include/asm-generic/bitops/non-atomic.h */ + +/** + * __set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ +	unsigned long mask = BIT_MASK(nr); +	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + +	*p  |= mask; +} + +static inline void __clear_bit(int nr, volatile unsigned long *addr) +{ +	unsigned long mask = BIT_MASK(nr); +	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + +	*p &= ~mask; +} + +/* debug.c */ + +#define DEFINE_SPINLOCK(...) +#define module_param_named(...) + +/* misc.h */ +#define mutex_lock_nested(...) +#define mutex_unlock_nested(...) +#define mutex_is_locked(...)	0 + +/* Version of this UBIFS implementation */ +#define UBIFS_VERSION 1 + +/* Normal UBIFS messages */ +#define ubifs_msg(fmt, ...) \ +		printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__) +/* UBIFS error messages */ +#define ubifs_err(fmt, ...)                                                  \ +	printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", 0, \ +	       __func__, ##__VA_ARGS__) +/* UBIFS warning messages */ +#define ubifs_warn(fmt, ...)                                         \ +	printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \ +	       0, __func__, ##__VA_ARGS__) + +/* UBIFS file system VFS magic number */ +#define UBIFS_SUPER_MAGIC 0x24051905 + +/* Number of UBIFS blocks per VFS page */ +#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE) +#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT) + +/* "File system end of life" sequence number watermark */ +#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL +#define SQNUM_WATERMARK      0xFFFFFFFFFF000000ULL + +/* + * Minimum amount of LEBs reserved for the index. At present the index needs at + * least 2 LEBs: one for the index head and one for in-the-gaps method (which + * currently does not cater for the index head and so excludes it from + * consideration). + */ +#define MIN_INDEX_LEBS 2 + +/* Minimum amount of data UBIFS writes to the flash */ +#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8) + +/* + * Currently we do not support inode number overlapping and re-using, so this + * watermark defines dangerous inode number level. This should be fixed later, + * although it is difficult to exceed current limit. Another option is to use + * 64-bit inode numbers, but this means more overhead. + */ +#define INUM_WARN_WATERMARK 0xFFF00000 +#define INUM_WATERMARK      0xFFFFFF00 + +/* Largest key size supported in this implementation */ +#define CUR_MAX_KEY_LEN UBIFS_SK_LEN + +/* Maximum number of entries in each LPT (LEB category) heap */ +#define LPT_HEAP_SZ 256 + +/* + * Background thread name pattern. The numbers are UBI device and volume + * numbers. + */ +#define BGT_NAME_PATTERN "ubifs_bgt%d_%d" + +/* Default write-buffer synchronization timeout (5 secs) */ +#define DEFAULT_WBUF_TIMEOUT (5 * HZ) + +/* Maximum possible inode number (only 32-bit inodes are supported now) */ +#define MAX_INUM 0xFFFFFFFF + +/* Number of non-data journal heads */ +#define NONDATA_JHEADS_CNT 2 + +/* Garbage collector head */ +#define GCHD   0 +/* Base journal head number */ +#define BASEHD 1 +/* First "general purpose" journal head */ +#define DATAHD 2 + +/* 'No change' value for 'ubifs_change_lp()' */ +#define LPROPS_NC 0x80000001 + +/* + * There is no notion of truncation key because truncation nodes do not exist + * in TNC. However, when replaying, it is handy to introduce fake "truncation" + * keys for truncation nodes because the code becomes simpler. So we define + * %UBIFS_TRUN_KEY type. + */ +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT + +/* + * How much a directory entry/extended attribute entry adds to the parent/host + * inode. + */ +#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8) + +/* How much an extended attribute adds to the host inode */ +#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8) + +/* + * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered + * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are + * considered "young". This is used by shrinker when selecting znode to trim + * off. + */ +#define OLD_ZNODE_AGE 20 +#define YOUNG_ZNODE_AGE 5 + +/* + * Some compressors, like LZO, may end up with more data then the input buffer. + * So UBIFS always allocates larger output buffer, to be sure the compressor + * will not corrupt memory in case of worst case compression. + */ +#define WORST_COMPR_FACTOR 2 + +/* Maximum expected tree height for use by bottom_up_buf */ +#define BOTTOM_UP_HEIGHT 64 + +/* Maximum number of data nodes to bulk-read */ +#define UBIFS_MAX_BULK_READ 32 + +/* + * Lockdep classes for UBIFS inode @ui_mutex. + */ +enum { +	WB_MUTEX_1 = 0, +	WB_MUTEX_2 = 1, +	WB_MUTEX_3 = 2, +}; + +/* + * Znode flags (actually, bit numbers which store the flags). + * + * DIRTY_ZNODE: znode is dirty + * COW_ZNODE: znode is being committed and a new instance of this znode has to + *            be created before changing this znode + * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is + *                 still in the commit list and the ongoing commit operation + *                 will commit it, and delete this znode after it is done + */ +enum { +	DIRTY_ZNODE    = 0, +	COW_ZNODE      = 1, +	OBSOLETE_ZNODE = 2, +}; + +/* + * Commit states. + * + * COMMIT_RESTING: commit is not wanted + * COMMIT_BACKGROUND: background commit has been requested + * COMMIT_REQUIRED: commit is required + * COMMIT_RUNNING_BACKGROUND: background commit is running + * COMMIT_RUNNING_REQUIRED: commit is running and it is required + * COMMIT_BROKEN: commit failed + */ +enum { +	COMMIT_RESTING = 0, +	COMMIT_BACKGROUND, +	COMMIT_REQUIRED, +	COMMIT_RUNNING_BACKGROUND, +	COMMIT_RUNNING_REQUIRED, +	COMMIT_BROKEN, +}; + +/* + * 'ubifs_scan_a_node()' return values. + * + * SCANNED_GARBAGE:  scanned garbage + * SCANNED_EMPTY_SPACE: scanned empty space + * SCANNED_A_NODE: scanned a valid node + * SCANNED_A_CORRUPT_NODE: scanned a corrupted node + * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length + * + * Greater than zero means: 'scanned that number of padding bytes' + */ +enum { +	SCANNED_GARBAGE        = 0, +	SCANNED_EMPTY_SPACE    = -1, +	SCANNED_A_NODE         = -2, +	SCANNED_A_CORRUPT_NODE = -3, +	SCANNED_A_BAD_PAD_NODE = -4, +}; + +/* + * LPT cnode flag bits. + * + * DIRTY_CNODE: cnode is dirty + * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), + * so it can (and must) be freed when the commit is finished + */ +enum { +	DIRTY_CNODE    = 0, +	COW_CNODE      = 1, +	OBSOLETE_CNODE = 2, +}; + +/* + * Dirty flag bits (lpt_drty_flgs) for LPT special nodes. + * + * LTAB_DIRTY: ltab node is dirty + * LSAVE_DIRTY: lsave node is dirty + */ +enum { +	LTAB_DIRTY  = 1, +	LSAVE_DIRTY = 2, +}; + +/* + * Return codes used by the garbage collector. + * @LEB_FREED: the logical eraseblock was freed and is ready to use + * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit + * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes + */ +enum { +	LEB_FREED, +	LEB_FREED_IDX, +	LEB_RETAINED, +}; + +/** + * struct ubifs_old_idx - index node obsoleted since last commit start. + * @rb: rb-tree node + * @lnum: LEB number of obsoleted index node + * @offs: offset of obsoleted index node + */ +struct ubifs_old_idx { +	struct rb_node rb; +	int lnum; +	int offs; +}; + +/* The below union makes it easier to deal with keys */ +union ubifs_key { +	uint8_t u8[CUR_MAX_KEY_LEN]; +	uint32_t u32[CUR_MAX_KEY_LEN/4]; +	uint64_t u64[CUR_MAX_KEY_LEN/8]; +	__le32 j32[CUR_MAX_KEY_LEN/4]; +}; + +/** + * struct ubifs_scan_node - UBIFS scanned node information. + * @list: list of scanned nodes + * @key: key of node scanned (if it has one) + * @sqnum: sequence number + * @type: type of node scanned + * @offs: offset with LEB of node scanned + * @len: length of node scanned + * @node: raw node + */ +struct ubifs_scan_node { +	struct list_head list; +	union ubifs_key key; +	unsigned long long sqnum; +	int type; +	int offs; +	int len; +	void *node; +}; + +/** + * struct ubifs_scan_leb - UBIFS scanned LEB information. + * @lnum: logical eraseblock number + * @nodes_cnt: number of nodes scanned + * @nodes: list of struct ubifs_scan_node + * @endpt: end point (and therefore the start of empty space) + * @ecc: read returned -EBADMSG + * @buf: buffer containing entire LEB scanned + */ +struct ubifs_scan_leb { +	int lnum; +	int nodes_cnt; +	struct list_head nodes; +	int endpt; +	int ecc; +	void *buf; +}; + +/** + * struct ubifs_gced_idx_leb - garbage-collected indexing LEB. + * @list: list + * @lnum: LEB number + * @unmap: OK to unmap this LEB + * + * This data structure is used to temporary store garbage-collected indexing + * LEBs - they are not released immediately, but only after the next commit. + * This is needed to guarantee recoverability. + */ +struct ubifs_gced_idx_leb { +	struct list_head list; +	int lnum; +	int unmap; +}; + +/** + * struct ubifs_inode - UBIFS in-memory inode description. + * @vfs_inode: VFS inode description object + * @creat_sqnum: sequence number at time of creation + * @del_cmtno: commit number corresponding to the time the inode was deleted, + *             protected by @c->commit_sem; + * @xattr_size: summarized size of all extended attributes in bytes + * @xattr_cnt: count of extended attributes this inode has + * @xattr_names: sum of lengths of all extended attribute names belonging to + *               this inode + * @dirty: non-zero if the inode is dirty + * @xattr: non-zero if this is an extended attribute inode + * @bulk_read: non-zero if bulk-read should be used + * @ui_mutex: serializes inode write-back with the rest of VFS operations, + *            serializes "clean <-> dirty" state changes, serializes bulk-read, + *            protects @dirty, @bulk_read, @ui_size, and @xattr_size + * @ui_lock: protects @synced_i_size + * @synced_i_size: synchronized size of inode, i.e. the value of inode size + *                 currently stored on the flash; used only for regular file + *                 inodes + * @ui_size: inode size used by UBIFS when writing to flash + * @flags: inode flags (@UBIFS_COMPR_FL, etc) + * @compr_type: default compression type used for this inode + * @last_page_read: page number of last page read (for bulk read) + * @read_in_a_row: number of consecutive pages read in a row (for bulk read) + * @data_len: length of the data attached to the inode + * @data: inode's data + * + * @ui_mutex exists for two main reasons. At first it prevents inodes from + * being written back while UBIFS changing them, being in the middle of an VFS + * operation. This way UBIFS makes sure the inode fields are consistent. For + * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and + * write-back must not write any of them before we have finished. + * + * The second reason is budgeting - UBIFS has to budget all operations. If an + * operation is going to mark an inode dirty, it has to allocate budget for + * this. It cannot just mark it dirty because there is no guarantee there will + * be enough flash space to write the inode back later. This means UBIFS has + * to have full control over inode "clean <-> dirty" transitions (and pages + * actually). But unfortunately, VFS marks inodes dirty in many places, and it + * does not ask the file-system if it is allowed to do so (there is a notifier, + * but it is not enough), i.e., there is no mechanism to synchronize with this. + * So UBIFS has its own inode dirty flag and its own mutex to serialize + * "clean <-> dirty" transitions. + * + * The @synced_i_size field is used to make sure we never write pages which are + * beyond last synchronized inode size. See 'ubifs_writepage()' for more + * information. + * + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it + * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * with 'ubifs_writepage()' (see file.c). All the other inode fields are + * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ +struct ubifs_inode { +	struct inode vfs_inode; +	unsigned long long creat_sqnum; +	unsigned long long del_cmtno; +	unsigned int xattr_size; +	unsigned int xattr_cnt; +	unsigned int xattr_names; +	unsigned int dirty:1; +	unsigned int xattr:1; +	unsigned int bulk_read:1; +	unsigned int compr_type:2; +	struct mutex ui_mutex; +	spinlock_t ui_lock; +	loff_t synced_i_size; +	loff_t ui_size; +	int flags; +	pgoff_t last_page_read; +	pgoff_t read_in_a_row; +	int data_len; +	void *data; +}; + +/** + * struct ubifs_unclean_leb - records a LEB recovered under read-only mode. + * @list: list + * @lnum: LEB number of recovered LEB + * @endpt: offset where recovery ended + * + * This structure records a LEB identified during recovery that needs to be + * cleaned but was not because UBIFS was mounted read-only. The information + * is used to clean the LEB when remounting to read-write mode. + */ +struct ubifs_unclean_leb { +	struct list_head list; +	int lnum; +	int endpt; +}; + +/* + * LEB properties flags. + * + * LPROPS_UNCAT: not categorized + * LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index + * LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index + * LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index + * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs + * LPROPS_EMPTY: LEB is empty, not taken + * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken + * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken + * LPROPS_CAT_MASK: mask for the LEB categories above + * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media) + * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash) + */ +enum { +	LPROPS_UNCAT     =  0, +	LPROPS_DIRTY     =  1, +	LPROPS_DIRTY_IDX =  2, +	LPROPS_FREE      =  3, +	LPROPS_HEAP_CNT  =  3, +	LPROPS_EMPTY     =  4, +	LPROPS_FREEABLE  =  5, +	LPROPS_FRDI_IDX  =  6, +	LPROPS_CAT_MASK  = 15, +	LPROPS_TAKEN     = 16, +	LPROPS_INDEX     = 32, +}; + +/** + * struct ubifs_lprops - logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @flags: LEB properties flags (see above) + * @lnum: LEB number + * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE) + * @hpos: heap position in heap of same-category lprops (other categories) + */ +struct ubifs_lprops { +	int free; +	int dirty; +	int flags; +	int lnum; +	union { +		struct list_head list; +		int hpos; +	}; +}; + +/** + * struct ubifs_lpt_lprops - LPT logical eraseblock properties. + * @free: amount of free space in bytes + * @dirty: amount of dirty space in bytes + * @tgc: trivial GC flag (1 => unmap after commit end) + * @cmt: commit flag (1 => reserved for commit) + */ +struct ubifs_lpt_lprops { +	int free; +	int dirty; +	unsigned tgc:1; +	unsigned cmt:1; +}; + +/** + * struct ubifs_lp_stats - statistics of eraseblocks in the main area. + * @empty_lebs: number of empty LEBs + * @taken_empty_lebs: number of taken LEBs + * @idx_lebs: number of indexing LEBs + * @total_free: total free space in bytes (includes all LEBs) + * @total_dirty: total dirty space in bytes (includes all LEBs) + * @total_used: total used space in bytes (does not include index LEBs) + * @total_dead: total dead space in bytes (does not include index LEBs) + * @total_dark: total dark space in bytes (does not include index LEBs) + * + * The @taken_empty_lebs field counts the LEBs that are in the transient state + * of having been "taken" for use but not yet written to. @taken_empty_lebs is + * needed to account correctly for @gc_lnum, otherwise @empty_lebs could be + * used by itself (in which case 'unused_lebs' would be a better name). In the + * case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained + * by GC, but unlike other empty LEBs that are "taken", it may not be written + * straight away (i.e. before the next commit start or unmount), so either + * @gc_lnum must be specially accounted for, or the current approach followed + * i.e. count it under @taken_empty_lebs. + * + * @empty_lebs includes @taken_empty_lebs. + * + * @total_used, @total_dead and @total_dark fields do not account indexing + * LEBs. + */ +struct ubifs_lp_stats { +	int empty_lebs; +	int taken_empty_lebs; +	int idx_lebs; +	long long total_free; +	long long total_dirty; +	long long total_used; +	long long total_dead; +	long long total_dark; +}; + +struct ubifs_nnode; + +/** + * struct ubifs_cnode - LEB Properties Tree common node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (zero for pnodes, greater than zero for nnodes) + * @num: node number + */ +struct ubifs_cnode { +	struct ubifs_nnode *parent; +	struct ubifs_cnode *cnext; +	unsigned long flags; +	int iip; +	int level; +	int num; +}; + +/** + * struct ubifs_pnode - LEB Properties Tree leaf node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always zero for pnodes) + * @num: node number + * @lprops: LEB properties array + */ +struct ubifs_pnode { +	struct ubifs_nnode *parent; +	struct ubifs_cnode *cnext; +	unsigned long flags; +	int iip; +	int level; +	int num; +	struct ubifs_lprops lprops[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_nbranch - LEB Properties Tree internal node branch. + * @lnum: LEB number of child + * @offs: offset of child + * @nnode: nnode child + * @pnode: pnode child + * @cnode: cnode child + */ +struct ubifs_nbranch { +	int lnum; +	int offs; +	union { +		struct ubifs_nnode *nnode; +		struct ubifs_pnode *pnode; +		struct ubifs_cnode *cnode; +	}; +}; + +/** + * struct ubifs_nnode - LEB Properties Tree internal node. + * @parent: parent nnode + * @cnext: next cnode to commit + * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE) + * @iip: index in parent + * @level: level in the tree (always greater than zero for nnodes) + * @num: node number + * @nbranch: branches to child nodes + */ +struct ubifs_nnode { +	struct ubifs_nnode *parent; +	struct ubifs_cnode *cnext; +	unsigned long flags; +	int iip; +	int level; +	int num; +	struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT]; +}; + +/** + * struct ubifs_lpt_heap - heap of categorized lprops. + * @arr: heap array + * @cnt: number in heap + * @max_cnt: maximum number allowed in heap + * + * There are %LPROPS_HEAP_CNT heaps. + */ +struct ubifs_lpt_heap { +	struct ubifs_lprops **arr; +	int cnt; +	int max_cnt; +}; + +/* + * Return codes for LPT scan callback function. + * + * LPT_SCAN_CONTINUE: continue scanning + * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory + * LPT_SCAN_STOP: stop scanning + */ +enum { +	LPT_SCAN_CONTINUE = 0, +	LPT_SCAN_ADD = 1, +	LPT_SCAN_STOP = 2, +}; + +struct ubifs_info; + +/* Callback used by the 'ubifs_lpt_scan_nolock()' function */ +typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, +				       const struct ubifs_lprops *lprops, +				       int in_tree, void *data); + +/** + * struct ubifs_wbuf - UBIFS write-buffer. + * @c: UBIFS file-system description object + * @buf: write-buffer (of min. flash I/O unit size) + * @lnum: logical eraseblock number the write-buffer points to + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used:  number of used bytes in the write-buffer + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep + *         up by 'mutex_lock_nested()). + * @sync_callback: write-buffer synchronization callback + * @io_mutex: serializes write-buffer I/O + * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes + *        fields + * @timer: write-buffer timer + * @timeout: timer expire interval in jiffies + * @need_sync: it is set if its timer expired and needs sync + * @next_ino: points to the next position of the following inode number + * @inodes: stores the inode numbers of the nodes which are in wbuf + * + * The write-buffer synchronization callback is called when the write-buffer is + * synchronized in order to notify how much space was wasted due to + * write-buffer padding and how much free space is left in the LEB. + * + * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under + * spin-lock or mutex because they are written under both mutex and spin-lock. + * @buf is appended to under mutex but overwritten under both mutex and + * spin-lock. Thus the data between @buf and @buf + @used can be read under + * spinlock. + */ +struct ubifs_wbuf { +	struct ubifs_info *c; +	void *buf; +	int lnum; +	int offs; +	int avail; +	int used; +	int dtype; +	int jhead; +	int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); +	struct mutex io_mutex; +	spinlock_t lock; +	int timeout; +	int need_sync; +	int next_ino; +	ino_t *inodes; +}; + +/** + * struct ubifs_bud - bud logical eraseblock. + * @lnum: logical eraseblock number + * @start: where the (uncommitted) bud data starts + * @jhead: journal head number this bud belongs to + * @list: link in the list buds belonging to the same journal head + * @rb: link in the tree of all buds + */ +struct ubifs_bud { +	int lnum; +	int start; +	int jhead; +	struct list_head list; +	struct rb_node rb; +}; + +/** + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ +struct ubifs_jhead { +	struct ubifs_wbuf wbuf; +	struct list_head buds_list; +}; + +/** + * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. + * @key: key + * @znode: znode address in memory + * @lnum: LEB number of the target node (indexing node or data node) + * @offs: target node offset within @lnum + * @len: target node length + */ +struct ubifs_zbranch { +	union ubifs_key key; +	union { +		struct ubifs_znode *znode; +		void *leaf; +	}; +	int lnum; +	int offs; +	int len; +}; + +/** + * struct ubifs_znode - in-memory representation of an indexing node. + * @parent: parent znode or NULL if it is the root + * @cnext: next znode to commit + * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE) + * @time: last access time (seconds) + * @level: level of the entry in the TNC tree + * @child_cnt: count of child znodes + * @iip: index in parent's zbranch array + * @alt: lower bound of key range has altered i.e. child inserted at slot 0 + * @lnum: LEB number of the corresponding indexing node + * @offs: offset of the corresponding indexing node + * @len: length  of the corresponding indexing node + * @zbranch: array of znode branches (@c->fanout elements) + */ +struct ubifs_znode { +	struct ubifs_znode *parent; +	struct ubifs_znode *cnext; +	unsigned long flags; +	unsigned long time; +	int level; +	int child_cnt; +	int iip; +	int alt; +#ifdef CONFIG_UBIFS_FS_DEBUG +	int lnum, offs, len; +#endif +	struct ubifs_zbranch zbranch[]; +}; + +/** + * struct bu_info - bulk-read information. + * @key: first data node key + * @zbranch: zbranches of data nodes to bulk read + * @buf: buffer to read into + * @buf_len: buffer length + * @gc_seq: GC sequence number to detect races with GC + * @cnt: number of data nodes for bulk read + * @blk_cnt: number of data blocks including holes + * @oef: end of file reached + */ +struct bu_info { +	union ubifs_key key; +	struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; +	void *buf; +	int buf_len; +	int gc_seq; +	int cnt; +	int blk_cnt; +	int eof; +}; + +/** + * struct ubifs_node_range - node length range description data structure. + * @len: fixed node length + * @min_len: minimum possible node length + * @max_len: maximum possible node length + * + * If @max_len is %0, the node has fixed length @len. + */ +struct ubifs_node_range { +	union { +		int len; +		int min_len; +	}; +	int max_len; +}; + +/** + * struct ubifs_compressor - UBIFS compressor description structure. + * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc) + * @cc: cryptoapi compressor handle + * @comp_mutex: mutex used during compression + * @decomp_mutex: mutex used during decompression + * @name: compressor name + * @capi_name: cryptoapi compressor name + */ +struct ubifs_compressor { +	int compr_type; +	char *name; +	char *capi_name; +	int (*decompress)(const unsigned char *in, size_t in_len, +			  unsigned char *out, size_t *out_len); +}; + +/** + * struct ubifs_budget_req - budget requirements of an operation. + * + * @fast: non-zero if the budgeting should try to acquire budget quickly and + *        should not try to call write-back + * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields + *               have to be re-calculated + * @new_page: non-zero if the operation adds a new page + * @dirtied_page: non-zero if the operation makes a page dirty + * @new_dent: non-zero if the operation adds a new directory entry + * @mod_dent: non-zero if the operation removes or modifies an existing + *            directory entry + * @new_ino: non-zero if the operation adds a new inode + * @new_ino_d: now much data newly created inode contains + * @dirtied_ino: how many inodes the operation makes dirty + * @dirtied_ino_d: now much data dirtied inode contains + * @idx_growth: how much the index will supposedly grow + * @data_growth: how much new data the operation will supposedly add + * @dd_growth: how much data that makes other data dirty the operation will + *             supposedly add + * + * @idx_growth, @data_growth and @dd_growth are not used in budget request. The + * budgeting subsystem caches index and data growth values there to avoid + * re-calculating them when the budget is released. However, if @idx_growth is + * %-1, it is calculated by the release function using other fields. + * + * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d + * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made + * dirty by the re-name operation. + * + * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to + * make sure the amount of inode data which contribute to @new_ino_d and + * @dirtied_ino_d fields are aligned. + */ +struct ubifs_budget_req { +	unsigned int fast:1; +	unsigned int recalculate:1; +#ifndef UBIFS_DEBUG +	unsigned int new_page:1; +	unsigned int dirtied_page:1; +	unsigned int new_dent:1; +	unsigned int mod_dent:1; +	unsigned int new_ino:1; +	unsigned int new_ino_d:13; +	unsigned int dirtied_ino:4; +	unsigned int dirtied_ino_d:15; +#else +	/* Not bit-fields to check for overflows */ +	unsigned int new_page; +	unsigned int dirtied_page; +	unsigned int new_dent; +	unsigned int mod_dent; +	unsigned int new_ino; +	unsigned int new_ino_d; +	unsigned int dirtied_ino; +	unsigned int dirtied_ino_d; +#endif +	int idx_growth; +	int data_growth; +	int dd_growth; +}; + +/** + * struct ubifs_orphan - stores the inode number of an orphan. + * @rb: rb-tree node of rb-tree of orphans sorted by inode number + * @list: list head of list of orphans in order added + * @new_list: list head of list of orphans added since the last commit + * @cnext: next orphan to commit + * @dnext: next orphan to delete + * @inum: inode number + * @new: %1 => added since the last commit, otherwise %0 + */ +struct ubifs_orphan { +	struct rb_node rb; +	struct list_head list; +	struct list_head new_list; +	struct ubifs_orphan *cnext; +	struct ubifs_orphan *dnext; +	ino_t inum; +	int new; +}; + +/** + * struct ubifs_mount_opts - UBIFS-specific mount options information. + * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) + * @bulk_read: enable/disable bulk-reads (%0 default, %1 disabe, %2 enable) + * @chk_data_crc: enable/disable CRC data checking when reading data nodes + *                (%0 default, %1 disabe, %2 enable) + * @override_compr: override default compressor (%0 - do not override and use + *                  superblock compressor, %1 - override and use compressor + *                  specified in @compr_type) + * @compr_type: compressor type to override the superblock compressor with + *              (%UBIFS_COMPR_NONE, etc) + */ +struct ubifs_mount_opts { +	unsigned int unmount_mode:2; +	unsigned int bulk_read:2; +	unsigned int chk_data_crc:2; +	unsigned int override_compr:1; +	unsigned int compr_type:2; +}; + +struct ubifs_debug_info; + +/** + * struct ubifs_info - UBIFS file-system description data structure + * (per-superblock). + * @vfs_sb: VFS @struct super_block object + * @bdi: backing device info object to make VFS happy and disable read-ahead + * + * @highest_inum: highest used inode number + * @max_sqnum: current global sequence number + * @cmt_no: commit number of the last successfully completed commit, protected + *          by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters + * @fmt_version: UBIFS on-flash format version + * @uuid: UUID from super block + * + * @lhead_lnum: log head logical eraseblock number + * @lhead_offs: log head offset + * @ltail_lnum: log tail logical eraseblock number (offset is always 0) + * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and + *             @bud_bytes + * @min_log_bytes: minimum required number of bytes in the log + * @cmt_bud_bytes: used during commit to temporarily amount of bytes in + *                 committed buds + * + * @buds: tree of all buds indexed by bud LEB number + * @bud_bytes: how many bytes of flash is used by buds + * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud + *             lists + * @jhead_cnt: count of journal heads + * @jheads: journal heads (head zero is base head) + * @max_bud_bytes: maximum number of bytes allowed in buds + * @bg_bud_bytes: number of bud bytes when background commit is initiated + * @old_buds: buds to be released after commit ends + * @max_bud_cnt: maximum number of buds + * + * @commit_sem: synchronizes committer with other processes + * @cmt_state: commit state + * @cs_lock: commit state lock + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * + * @big_lpt: flag that LPT is too big to write whole during commit + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + *                   recovery) + * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) + * + * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and + *             @calc_idx_sz + * @zroot: zbranch which points to the root index node and znode + * @cnext: next znode to commit + * @enext: next znode to commit to empty space + * @gap_lebs: array of LEBs used by the in-gaps commit method + * @cbuf: commit buffer + * @ileb_buf: buffer for commit in-the-gaps method + * @ileb_len: length of data in ileb_buf + * @ihead_lnum: LEB number of index head + * @ihead_offs: offset of index head + * @ilebs: pre-allocated index LEBs + * @ileb_cnt: number of pre-allocated index LEBs + * @ileb_nxt: next pre-allocated index LEBs + * @old_idx: tree of index nodes obsoleted since the last commit start + * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c + * + * @mst_node: master node + * @mst_offs: offset of valid master node + * @mst_mutex: protects the master node area, @mst_node, and @mst_offs + * + * @max_bu_buf_len: maximum bulk-read buffer length + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log + * @lpt_lebs: number of LEBs used for lprops table + * @lpt_first: first LEB of the lprops table area + * @lpt_last: last LEB of the lprops table area + * @orph_lebs: number of LEBs used for the orphan area + * @orph_first: first LEB of the orphan area + * @orph_last: last LEB of the orphan area + * @main_lebs: count of LEBs in the main area + * @main_first: first LEB of the main area + * @main_bytes: main area size in bytes + * + * @key_hash_type: type of the key hash + * @key_hash: direntry key hash function + * @key_fmt: key format + * @key_len: key length + * @fanout: fanout of the index tree (number of links per indexing node) + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one + * @leb_size: logical eraseblock size in bytes + * @half_leb_size: half LEB size + * @leb_cnt: count of logical eraseblocks + * @max_leb_cnt: maximum count of logical eraseblocks + * @old_leb_cnt: count of logical eraseblocks before re-size + * @ro_media: the underlying UBI volume is read-only + * + * @dirty_pg_cnt: number of dirty pages (not used) + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * + * @budg_idx_growth: amount of bytes budgeted for index growth + * @budg_data_growth: amount of bytes budgeted for cached data + * @budg_dd_growth: amount of bytes budgeted for cached data that will make + *                  other data dirty + * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, + *                        but which still have to be taken into account because + *                        the index has not been committed so far + * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, + *              @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, + *              @nospace, and @nospace_rp; + * @min_idx_lebs: minimum number of LEBs required for the index + * @old_idx_sz: size of index on flash + * @calc_idx_sz: temporary variable which is used to calculate new index size + *               (contains accurate new index size at end of TNC commit start) + * @lst: lprops statistics + * @nospace: non-zero if the file-system does not have flash space (used as + *           optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + *              pool is full + * + * @page_budget: budget for a page + * @inode_budget: budget for an inode + * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash + * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary + * @max_inode_sz: maximum possible inode size in bytes + * @max_znode_sz: size of znode in bytes + * + * @leb_overhead: how many bytes are wasted in an LEB when it is filled with + *                data nodes of maximum size - used in free space reporting + * @dead_wm: LEB dead space watermark + * @dark_wm: LEB dark space watermark + * @block_cnt: count of 4KiB blocks on the FS + * + * @ranges: UBIFS node length ranges + * @ubi: UBI volume descriptor + * @di: UBI device information + * @vi: UBI volume information + * + * @orph_tree: rb-tree of orphan inode numbers + * @orph_list: list of orphan inode numbers in order added + * @orph_new: list of orphan inode numbers added since last commit + * @orph_cnext: next orphan to commit + * @orph_dnext: next orphan to delete + * @orphan_lock: lock for orph_tree and orph_new + * @orph_buf: buffer for orphan nodes + * @new_orphans: number of orphans since last commit + * @cmt_orphans: number of orphans being committed + * @tot_orphans: number of orphans in the rb_tree + * @max_orphans: maximum number of orphans allowed + * @ohead_lnum: orphan head LEB number + * @ohead_offs: orphan head offset + * @no_orphs: non-zero if there are no orphans + * + * @bgt: UBIFS background thread + * @bgt_name: background thread name + * @need_bgt: if background thread should run + * @need_wbuf_sync: if write-buffers have to be synchronized + * + * @gc_lnum: LEB number used for garbage collection + * @sbuf: a buffer of LEB size used by GC and replay for scanning + * @idx_gc: list of index LEBs that have been garbage collected + * @idx_gc_cnt: number of elements on the idx_gc list + * @gc_seq: incremented for every non-index LEB garbage collected + * @gced_lnum: last non-index LEB that was garbage collected + * + * @infos_list: links all 'ubifs_info' objects + * @umount_mutex: serializes shrinker and un-mount + * @shrinker_run_no: shrinker run number + * + * @space_bits: number of bits needed to record free or dirty space + * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT + * @lpt_offs_bits: number of bits needed to record an offset in the LPT + * @lpt_spc_bits: number of bits needed to space in the LPT + * @pcnt_bits: number of bits needed to record pnode or nnode number + * @lnum_bits: number of bits needed to record LEB number + * @nnode_sz: size of on-flash nnode + * @pnode_sz: size of on-flash pnode + * @ltab_sz: size of on-flash LPT lprops table + * @lsave_sz: size of on-flash LPT save table + * @pnode_cnt: number of pnodes + * @nnode_cnt: number of nnodes + * @lpt_hght: height of the LPT + * @pnodes_have: number of pnodes in memory + * + * @lp_mutex: protects lprops table and all the other lprops-related fields + * @lpt_lnum: LEB number of the root nnode of the LPT + * @lpt_offs: offset of the root nnode of the LPT + * @nhead_lnum: LEB number of LPT head + * @nhead_offs: offset of LPT head + * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab + * @dirty_nn_cnt: number of dirty nnodes + * @dirty_pn_cnt: number of dirty pnodes + * @check_lpt_free: flag that indicates LPT GC may be needed + * @lpt_sz: LPT size + * @lpt_nod_buf: buffer for an on-flash nnode or pnode + * @lpt_buf: buffer of LEB size used by LPT + * @nroot: address in memory of the root nnode of the LPT + * @lpt_cnext: next LPT node to commit + * @lpt_heap: array of heaps of categorized lprops + * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at + *             previous commit start + * @uncat_list: list of un-categorized LEBs + * @empty_list: list of empty LEBs + * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) + * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) + * @freeable_cnt: number of freeable LEBs in @freeable_list + * + * @ltab_lnum: LEB number of LPT's own lprops table + * @ltab_offs: offset of LPT's own lprops table + * @ltab: LPT's own lprops table + * @ltab_cmt: LPT's own lprops table (commit copy) + * @lsave_cnt: number of LEB numbers in LPT's save table + * @lsave_lnum: LEB number of LPT's save table + * @lsave_offs: offset of LPT's save table + * @lsave: LPT's save table + * @lscan_lnum: LEB number of last LPT scan + * + * @rp_size: size of the reserved pool in bytes + * @report_rp_size: size of the reserved pool reported to user-space + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * + * @empty: if the UBI device is empty + * @replay_tree: temporary tree used during journal replay + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed + * @need_recovery: file-system needs recovery + * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when mounting ro to rw + * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @size_tree: inode size information for recovery + * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) + * @always_chk_crc: always check CRCs (while mounting and remounting rw) + * @mount_opts: UBIFS-specific mount options + * + * @dbg: debugging-related information + */ +struct ubifs_info { +	struct super_block *vfs_sb; + +	ino_t highest_inum; +	unsigned long long max_sqnum; +	unsigned long long cmt_no; +	spinlock_t cnt_lock; +	int fmt_version; +	unsigned char uuid[16]; + +	int lhead_lnum; +	int lhead_offs; +	int ltail_lnum; +	struct mutex log_mutex; +	int min_log_bytes; +	long long cmt_bud_bytes; + +	struct rb_root buds; +	long long bud_bytes; +	spinlock_t buds_lock; +	int jhead_cnt; +	struct ubifs_jhead *jheads; +	long long max_bud_bytes; +	long long bg_bud_bytes; +	struct list_head old_buds; +	int max_bud_cnt; + +	struct rw_semaphore commit_sem; +	int cmt_state; +	spinlock_t cs_lock; +	wait_queue_head_t cmt_wq; + +	unsigned int big_lpt:1; +	unsigned int no_chk_data_crc:1; +	unsigned int bulk_read:1; +	unsigned int default_compr:2; + +	struct mutex tnc_mutex; +	struct ubifs_zbranch zroot; +	struct ubifs_znode *cnext; +	struct ubifs_znode *enext; +	int *gap_lebs; +	void *cbuf; +	void *ileb_buf; +	int ileb_len; +	int ihead_lnum; +	int ihead_offs; +	int *ilebs; +	int ileb_cnt; +	int ileb_nxt; +	struct rb_root old_idx; +	int *bottom_up_buf; + +	struct ubifs_mst_node *mst_node; +	int mst_offs; +	struct mutex mst_mutex; + +	int max_bu_buf_len; +	struct mutex bu_mutex; +	struct bu_info bu; + +	int log_lebs; +	long long log_bytes; +	int log_last; +	int lpt_lebs; +	int lpt_first; +	int lpt_last; +	int orph_lebs; +	int orph_first; +	int orph_last; +	int main_lebs; +	int main_first; +	long long main_bytes; + +	uint8_t key_hash_type; +	uint32_t (*key_hash)(const char *str, int len); +	int key_fmt; +	int key_len; +	int fanout; + +	int min_io_size; +	int min_io_shift; +	int leb_size; +	int half_leb_size; +	int leb_cnt; +	int max_leb_cnt; +	int old_leb_cnt; +	int ro_media; + +	long long budg_idx_growth; +	long long budg_data_growth; +	long long budg_dd_growth; +	long long budg_uncommitted_idx; +	spinlock_t space_lock; +	int min_idx_lebs; +	unsigned long long old_idx_sz; +	unsigned long long calc_idx_sz; +	struct ubifs_lp_stats lst; +	unsigned int nospace:1; +	unsigned int nospace_rp:1; + +	int page_budget; +	int inode_budget; +	int dent_budget; + +	int ref_node_alsz; +	int mst_node_alsz; +	int min_idx_node_sz; +	int max_idx_node_sz; +	long long max_inode_sz; +	int max_znode_sz; + +	int leb_overhead; +	int dead_wm; +	int dark_wm; +	int block_cnt; + +	struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT]; +	struct ubi_volume_desc *ubi; +	struct ubi_device_info di; +	struct ubi_volume_info vi; + +	struct rb_root orph_tree; +	struct list_head orph_list; +	struct list_head orph_new; +	struct ubifs_orphan *orph_cnext; +	struct ubifs_orphan *orph_dnext; +	spinlock_t orphan_lock; +	void *orph_buf; +	int new_orphans; +	int cmt_orphans; +	int tot_orphans; +	int max_orphans; +	int ohead_lnum; +	int ohead_offs; +	int no_orphs; + +	struct task_struct *bgt; +	char bgt_name[sizeof(BGT_NAME_PATTERN) + 9]; +	int need_bgt; +	int need_wbuf_sync; + +	int gc_lnum; +	void *sbuf; +	struct list_head idx_gc; +	int idx_gc_cnt; +	int gc_seq; +	int gced_lnum; + +	struct list_head infos_list; +	struct mutex umount_mutex; +	unsigned int shrinker_run_no; + +	int space_bits; +	int lpt_lnum_bits; +	int lpt_offs_bits; +	int lpt_spc_bits; +	int pcnt_bits; +	int lnum_bits; +	int nnode_sz; +	int pnode_sz; +	int ltab_sz; +	int lsave_sz; +	int pnode_cnt; +	int nnode_cnt; +	int lpt_hght; +	int pnodes_have; + +	struct mutex lp_mutex; +	int lpt_lnum; +	int lpt_offs; +	int nhead_lnum; +	int nhead_offs; +	int lpt_drty_flgs; +	int dirty_nn_cnt; +	int dirty_pn_cnt; +	int check_lpt_free; +	long long lpt_sz; +	void *lpt_nod_buf; +	void *lpt_buf; +	struct ubifs_nnode *nroot; +	struct ubifs_cnode *lpt_cnext; +	struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT]; +	struct ubifs_lpt_heap dirty_idx; +	struct list_head uncat_list; +	struct list_head empty_list; +	struct list_head freeable_list; +	struct list_head frdi_idx_list; +	int freeable_cnt; + +	int ltab_lnum; +	int ltab_offs; +	struct ubifs_lpt_lprops *ltab; +	struct ubifs_lpt_lprops *ltab_cmt; +	int lsave_cnt; +	int lsave_lnum; +	int lsave_offs; +	int *lsave; +	int lscan_lnum; + +	long long rp_size; +	long long report_rp_size; +	uid_t rp_uid; +	gid_t rp_gid; + +	/* The below fields are used only during mounting and re-mounting */ +	int empty; +	struct rb_root replay_tree; +	struct list_head replay_list; +	struct list_head replay_buds; +	unsigned long long cs_sqnum; +	unsigned long long replay_sqnum; +	int need_recovery; +	int replaying; +	struct list_head unclean_leb_list; +	struct ubifs_mst_node *rcvrd_mst_node; +	struct rb_root size_tree; +	int remounting_rw; +	int always_chk_crc; +	struct ubifs_mount_opts mount_opts; + +#ifdef CONFIG_UBIFS_FS_DEBUG +	struct ubifs_debug_info *dbg; +#endif +}; + +extern spinlock_t ubifs_infos_lock; +extern struct kmem_cache *ubifs_inode_slab; +extern const struct super_operations ubifs_super_operations; +extern const struct address_space_operations ubifs_file_address_operations; +extern const struct file_operations ubifs_file_operations; +extern const struct inode_operations ubifs_file_inode_operations; +extern const struct file_operations ubifs_dir_operations; +extern const struct inode_operations ubifs_dir_inode_operations; +extern const struct inode_operations ubifs_symlink_inode_operations; +extern struct backing_dev_info ubifs_backing_dev_info; +extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + +/* io.c */ +void ubifs_ro_mode(struct ubifs_info *c, int err); +int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); +int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, +			   int dtype); +int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf); +int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, +		    int lnum, int offs); +int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, +			 int lnum, int offs); +int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, +		     int offs, int dtype); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, +		     int offs, int quiet, int must_chk_crc); +void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); +void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); +int ubifs_io_init(struct ubifs_info *c); +void ubifs_pad(const struct ubifs_info *c, void *buf, int pad); +int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf); +int ubifs_bg_wbufs_sync(struct ubifs_info *c); +void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum); +int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); + +/* scan.c */ +struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, +				  int offs, void *sbuf); +void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); +int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, +		      int offs, int quiet); +struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, +					int offs, void *sbuf); +void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, +		    int lnum, int offs); +int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, +		   void *buf, int offs); +void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, +			      void *buf); + +/* log.c */ +void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud); +void ubifs_create_buds_lists(struct ubifs_info *c); +int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs); +struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum); +struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum); +int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum); +int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum); +int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum); +int ubifs_consolidate_log(struct ubifs_info *c); + +/* journal.c */ +int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, +		     const struct qstr *nm, const struct inode *inode, +		     int deletion, int xent); +int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, +			 const union ubifs_key *key, const void *buf, int len); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, +		     const struct dentry *old_dentry, +		     const struct inode *new_dir, +		     const struct dentry *new_dentry, int sync); +int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, +		       loff_t old_size, loff_t new_size); +int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, +			   const struct inode *inode, const struct qstr *nm); +int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1, +			   const struct inode *inode2); + +/* budget.c */ +int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req); +void ubifs_release_dirty_inode_budget(struct ubifs_info *c, +				      struct ubifs_inode *ui); +int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode, +			  struct ubifs_budget_req *req); +void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, +				struct ubifs_budget_req *req); +void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, +			 struct ubifs_budget_req *req); +long long ubifs_get_free_space(struct ubifs_info *c); +long long ubifs_get_free_space_nolock(struct ubifs_info *c); +int ubifs_calc_min_idx_lebs(struct ubifs_info *c); +void ubifs_convert_page_budget(struct ubifs_info *c); +long long ubifs_reported_space(const struct ubifs_info *c, long long free); +long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); + +/* find.c */ +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, +			  int squeeze); +int ubifs_find_free_leb_for_idx(struct ubifs_info *c); +int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, +			 int min_space, int pick_free); +int ubifs_find_dirty_idx_leb(struct ubifs_info *c); +int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); + +/* tnc.c */ +int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, +			struct ubifs_znode **zn, int *n); +int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, +			void *node, const struct qstr *nm); +int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, +		     void *node, int *lnum, int *offs); +int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, +		  int offs, int len); +int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, +		      int old_lnum, int old_offs, int lnum, int offs, int len); +int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, +		     int lnum, int offs, int len, const struct qstr *nm); +int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key); +int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, +			const struct qstr *nm); +int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, +			   union ubifs_key *to_key); +int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum); +struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, +					   union ubifs_key *key, +					   const struct qstr *nm); +void ubifs_tnc_close(struct ubifs_info *c); +int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level, +		       int lnum, int offs, int is_idx); +int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level, +			 int lnum, int offs); +/* Shared by tnc.c for tnc_commit.c */ +void destroy_old_idx(struct ubifs_info *c); +int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, +		       int lnum, int offs); +int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); +int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); +int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); + +/* tnc_misc.c */ +struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, +					      struct ubifs_znode *znode); +int ubifs_search_zbranch(const struct ubifs_info *c, +			 const struct ubifs_znode *znode, +			 const union ubifs_key *key, int *n); +struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode); +struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode); +long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr); +struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c, +				     struct ubifs_zbranch *zbr, +				     struct ubifs_znode *parent, int iip); +int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, +			void *node); + +/* tnc_commit.c */ +int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); +int ubifs_tnc_end_commit(struct ubifs_info *c); + +/* shrinker.c */ +int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); + +/* commit.c */ +int ubifs_bg_thread(void *info); +void ubifs_commit_required(struct ubifs_info *c); +void ubifs_request_bg_commit(struct ubifs_info *c); +int ubifs_run_commit(struct ubifs_info *c); +void ubifs_recovery_commit(struct ubifs_info *c); +int ubifs_gc_should_commit(struct ubifs_info *c); +void ubifs_wait_for_commit(struct ubifs_info *c); + +/* master.c */ +int ubifs_read_master(struct ubifs_info *c); +int ubifs_write_master(struct ubifs_info *c); + +/* sb.c */ +int ubifs_read_superblock(struct ubifs_info *c); +struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); +int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); + +/* replay.c */ +int ubifs_validate_entry(struct ubifs_info *c, +			 const struct ubifs_dent_node *dent); +int ubifs_replay_journal(struct ubifs_info *c); + +/* gc.c */ +int ubifs_garbage_collect(struct ubifs_info *c, int anyway); +int ubifs_gc_start_commit(struct ubifs_info *c); +int ubifs_gc_end_commit(struct ubifs_info *c); +void ubifs_destroy_idx_gc(struct ubifs_info *c); +int ubifs_get_idx_gc_leb(struct ubifs_info *c); +int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp); + +/* orphan.c */ +int ubifs_add_orphan(struct ubifs_info *c, ino_t inum); +void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum); +int ubifs_orphan_start_commit(struct ubifs_info *c); +int ubifs_orphan_end_commit(struct ubifs_info *c); +int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only); +int ubifs_clear_orphans(struct ubifs_info *c); + +/* lpt.c */ +int ubifs_calc_lpt_geom(struct ubifs_info *c); +int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, +			  int *lpt_lebs, int *big_lpt); +int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr); +struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum); +struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum); +int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum, +			  ubifs_lpt_scan_callback scan_cb, void *data); + +/* Shared by lpt.c for lpt_commit.c */ +void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave); +void ubifs_pack_ltab(struct ubifs_info *c, void *buf, +		     struct ubifs_lpt_lprops *ltab); +void ubifs_pack_pnode(struct ubifs_info *c, void *buf, +		      struct ubifs_pnode *pnode); +void ubifs_pack_nnode(struct ubifs_info *c, void *buf, +		      struct ubifs_nnode *nnode); +struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c, +				    struct ubifs_nnode *parent, int iip); +struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c, +				    struct ubifs_nnode *parent, int iip); +int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip); +void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty); +void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode); +uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits); +struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght); +/* Needed only in debugging code in lpt_commit.c */ +int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf, +		       struct ubifs_nnode *nnode); + +/* lpt_commit.c */ +int ubifs_lpt_start_commit(struct ubifs_info *c); +int ubifs_lpt_end_commit(struct ubifs_info *c); +int ubifs_lpt_post_commit(struct ubifs_info *c); +void ubifs_lpt_free(struct ubifs_info *c, int wr_only); + +/* lprops.c */ +const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, +					   const struct ubifs_lprops *lp, +					   int free, int dirty, int flags, +					   int idx_gc_cnt); +void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst); +void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, +		      int cat); +void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, +		       struct ubifs_lprops *new_lprops); +void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops); +int ubifs_categorize_lprops(const struct ubifs_info *c, +			    const struct ubifs_lprops *lprops); +int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, +			int flags_set, int flags_clean, int idx_gc_cnt); +int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty, +			int flags_set, int flags_clean); +int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp); +const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); +const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); + +/* file.c */ +int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_setattr(struct dentry *dentry, struct iattr *attr); + +/* dir.c */ +struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, +			      int mode); +int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, +		  struct kstat *stat); + +/* xattr.c */ +int ubifs_setxattr(struct dentry *dentry, const char *name, +		   const void *value, size_t size, int flags); +ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, +		       size_t size); +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); +int ubifs_removexattr(struct dentry *dentry, const char *name); + +/* super.c */ +struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); +int ubifs_iput(struct inode *inode); + +/* recovery.c */ +int ubifs_recover_master_node(struct ubifs_info *c); +int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); +struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +					 int offs, void *sbuf, int grouped); +struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, +					     int offs, void *sbuf); +int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); +int ubifs_rcvry_gc_commit(struct ubifs_info *c); +int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, +			     int deletion, loff_t new_size); +int ubifs_recover_size(struct ubifs_info *c); +void ubifs_destroy_size_tree(struct ubifs_info *c); + +/* ioctl.c */ +long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +void ubifs_set_inode_flags(struct inode *inode); +#ifdef CONFIG_COMPAT +long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +#endif + +/* compressor.c */ +int __init ubifs_compressors_init(void); +void __exit ubifs_compressors_exit(void); +void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, +		    int *compr_type); +int ubifs_decompress(const void *buf, int len, void *out, int *out_len, +		     int compr_type); + +#include "debug.h" +#include "misc.h" +#include "key.h" + +/* todo: Move these to a common U-Boot header */ +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, +			  unsigned char *out, size_t *out_len); +int gunzip(void *dst, int dstlen, unsigned char *src, unsigned long *lenp); + +#endif /* !__UBIFS_H__ */ diff --git a/include/linux/math64.h b/include/linux/math64.h new file mode 100644 index 000000000..6d760d75c --- /dev/null +++ b/include/linux/math64.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include <linux/types.h> + +#if BITS_PER_LONG == 64 + +/** + * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder + * + * This is commonly provided by 32bit archs to provide an optimized 64bit + * divide. + */ +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ +	*remainder = dividend % divisor; +	return dividend / divisor; +} + +/** + * div_s64_rem - signed 64bit divide with 32bit divisor with remainder + */ +static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ +	*remainder = dividend % divisor; +	return dividend / divisor; +} + +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + */ +static inline u64 div64_u64(u64 dividend, u64 divisor) +{ +	return dividend / divisor; +} + +#elif BITS_PER_LONG == 32 + +#ifndef div_u64_rem +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ +	*remainder = do_div(dividend, divisor); +	return dividend; +} +#endif + +#ifndef div_s64_rem +extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder); +#endif + +#ifndef div64_u64 +extern u64 div64_u64(u64 dividend, u64 divisor); +#endif + +#endif /* BITS_PER_LONG */ + +/** + * div_u64 - unsigned 64bit divide with 32bit divisor + * + * This is the most common 64bit divide and should be used if possible, + * as many 32bit archs can optimize this variant better than a full 64bit + * divide. + */ +#ifndef div_u64 +static inline u64 div_u64(u64 dividend, u32 divisor) +{ +	u32 remainder; +	return div_u64_rem(dividend, divisor, &remainder); +} +#endif + +/** + * div_s64 - signed 64bit divide with 32bit divisor + */ +#ifndef div_s64 +static inline s64 div_s64(s64 dividend, s32 divisor) +{ +	s32 remainder; +	return div_s64_rem(dividend, divisor, &remainder); +} +#endif + +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); + +#endif /* _LINUX_MATH64_H */ diff --git a/include/ubi_uboot.h b/include/ubi_uboot.h index b4152192a..74312abf0 100644 --- a/include/ubi_uboot.h +++ b/include/ubi_uboot.h @@ -18,7 +18,12 @@  #include <malloc.h>  #include <div64.h>  #include <linux/crc32.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/string.h>  #include <linux/mtd/mtd.h> +#include <linux/mtd/ubi.h>  #ifdef CONFIG_CMD_ONENAND  #include <onenand_uboot.h> @@ -193,7 +198,7 @@ static inline long IS_ERR(const void *ptr)  /* module */  #define THIS_MODULE		0 -#define try_module_get(...)	0 +#define try_module_get(...)	1  #define module_put(...)		do { } while (0)  #define module_init(...)  #define module_exit(...) @@ -206,7 +211,9 @@ static inline long IS_ERR(const void *ptr)  #define MODULE_AUTHOR(...)  #define MODULE_LICENSE(...) +#ifndef __UBIFS_H__  #include "../drivers/mtd/ubi/ubi.h" +#endif  /* functions */  extern int ubi_mtd_param_parse(const char *val, struct kernel_param *kp); |