1 files changed, 107 insertions, 153 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25d8c9781ad..409c2ee7750 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -92,6 +92,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
 	return EXT4_INODES_PER_GROUP(sb);
 }
 
+void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate)
+{
+	if (uptodate) {
+		set_buffer_uptodate(bh);
+		set_bitmap_uptodate(bh);
+	}
+	unlock_buffer(bh);
+	put_bh(bh);
+}
+
 /*
  * Read the inode allocation bitmap for a given block_group, reading
  * into the specified slot in the superblock's bitmap cache.
@@ -147,18 +157,18 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 		return bh;
 	}
 	/*
-	 * submit the buffer_head for read. We can
-	 * safely mark the bitmap as uptodate now.
-	 * We do it here so the bitmap uptodate bit
-	 * get set with buffer lock held.
+	 * submit the buffer_head for reading
 	 */
 	trace_ext4_load_inode_bitmap(sb, block_group);
-	set_bitmap_uptodate(bh);
-	if (bh_submit_read(bh) < 0) {
+	bh->b_end_io = ext4_end_bitmap_read;
+	get_bh(bh);
+	submit_bh(READ, bh);
+	wait_on_buffer(bh);
+	if (!buffer_uptodate(bh)) {
 		put_bh(bh);
 		ext4_error(sb, "Cannot read inode bitmap - "
-			    "block_group = %u, inode_bitmap = %llu",
-			    block_group, bitmap_blk);
+			   "block_group = %u, inode_bitmap = %llu",
+			   block_group, bitmap_blk);
 		return NULL;
 	}
 	return bh;
@@ -194,19 +204,20 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	struct ext4_sb_info *sbi;
 	int fatal = 0, err, count, cleared;
 
-	if (atomic_read(&inode->i_count) > 1) {
-		printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
-		       atomic_read(&inode->i_count));
+	if (!sb) {
+		printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
+		       "nonexistent device\n", __func__, __LINE__);
 		return;
 	}
-	if (inode->i_nlink) {
-		printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
-		       inode->i_nlink);
+	if (atomic_read(&inode->i_count) > 1) {
+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: count=%d",
+			 __func__, __LINE__, inode->i_ino,
+			 atomic_read(&inode->i_count));
 		return;
 	}
-	if (!sb) {
-		printk(KERN_ERR "ext4_free_inode: inode on "
-		       "nonexistent device\n");
+	if (inode->i_nlink) {
+		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: nlink=%d\n",
+			 __func__, __LINE__, inode->i_ino, inode->i_nlink);
 		return;
 	}
 	sbi = EXT4_SB(sb);
@@ -593,94 +604,6 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
 }
 
 /*
- * claim the inode from the inode bitmap. If the group
- * is uninit we need to take the groups's ext4_group_lock
- * and clear the uninit flag. The inode bitmap update
- * and group desc uninit flag clear should be done
- * after holding ext4_group_lock so that ext4_read_inode_bitmap
- * doesn't race with the ext4_claim_inode
- */
-static int ext4_claim_inode(struct super_block *sb,
-			struct buffer_head *inode_bitmap_bh,
-			unsigned long ino, ext4_group_t group, umode_t mode)
-{
-	int free = 0, retval = 0, count;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
-	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
-
-	/*
-	 * We have to be sure that new inode allocation does not race with
-	 * inode table initialization, because otherwise we may end up
-	 * allocating and writing new inode right before sb_issue_zeroout
-	 * takes place and overwriting our new inode with zeroes. So we
-	 * take alloc_sem to prevent it.
-	 */
-	down_read(&grp->alloc_sem);
-	ext4_lock_group(sb, group);
-	if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
-		/* not a free inode */
-		retval = 1;
-		goto err_ret;
-	}
-	ino++;
-	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
-			ino > EXT4_INODES_PER_GROUP(sb)) {
-		ext4_unlock_group(sb, group);
-		up_read(&grp->alloc_sem);
-		ext4_error(sb, "reserved inode or inode > inodes count - "
-			   "block_group = %u, inode=%lu", group,
-			   ino + group * EXT4_INODES_PER_GROUP(sb));
-		return 1;
-	}
-	/* If we didn't allocate from within the initialized part of the inode
-	 * table then we need to initialize up to this inode. */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
-
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
-			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
-			/* When marking the block group with
-			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
-			 * on the value of bg_itable_unused even though
-			 * mke2fs could have initialized the same for us.
-			 * Instead we calculated the value below
-			 */
-
-			free = 0;
-		} else {
-			free = EXT4_INODES_PER_GROUP(sb) -
-				ext4_itable_unused_count(sb, gdp);
-		}
-
-		/*
-		 * Check the relative inode number against the last used
-		 * relative inode number in this group. if it is greater
-		 * we need to  update the bg_itable_unused count
-		 *
-		 */
-		if (ino > free)
-			ext4_itable_unused_set(sb, gdp,
-					(EXT4_INODES_PER_GROUP(sb) - ino));
-	}
-	count = ext4_free_inodes_count(sb, gdp) - 1;
-	ext4_free_inodes_set(sb, gdp, count);
-	if (S_ISDIR(mode)) {
-		count = ext4_used_dirs_count(sb, gdp) + 1;
-		ext4_used_dirs_set(sb, gdp, count);
-		if (sbi->s_log_groups_per_flex) {
-			ext4_group_t f = ext4_flex_group(sbi, group);
-
-			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
-		}
-	}
-	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
-err_ret:
-	ext4_unlock_group(sb, group);
-	up_read(&grp->alloc_sem);
-	return retval;
-}
-
-/*
  * There are two policies for allocating an inode.  If the new inode is
  * a directory, then a forward search is made for a block group with both
  * free space and a low directory-to-inode ratio; if that fails, then of
@@ -741,6 +664,11 @@ got_group:
 	if (ret2 == -1)
 		goto out;
 
+	/*
+	 * Normally we will only go through one pass of this loop,
+	 * unless we get unlucky and it turns out the group we selected
+	 * had its last inode grabbed by someone else.
+	 */
 	for (i = 0; i < ngroups; i++, ino = 0) {
 		err = -EIO;
 
@@ -757,51 +685,24 @@ repeat_in_this_group:
 		ino = ext4_find_next_zero_bit((unsigned long *)
 					      inode_bitmap_bh->b_data,
 					      EXT4_INODES_PER_GROUP(sb), ino);
-
-		if (ino < EXT4_INODES_PER_GROUP(sb)) {
-
-			BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
-			err = ext4_journal_get_write_access(handle,
-							    inode_bitmap_bh);
-			if (err)
-				goto fail;
-
-			BUFFER_TRACE(group_desc_bh, "get_write_access");
-			err = ext4_journal_get_write_access(handle,
-								group_desc_bh);
-			if (err)
-				goto fail;
-			if (!ext4_claim_inode(sb, inode_bitmap_bh,
-						ino, group, mode)) {
-				/* we won it */
-				BUFFER_TRACE(inode_bitmap_bh,
-					"call ext4_handle_dirty_metadata");
-				err = ext4_handle_dirty_metadata(handle,
-								 NULL,
-							inode_bitmap_bh);
-				if (err)
-					goto fail;
-				/* zero bit is inode number 1*/
-				ino++;
-				goto got;
-			}
-			/* we lost it */
-			ext4_handle_release_buffer(handle, inode_bitmap_bh);
-			ext4_handle_release_buffer(handle, group_desc_bh);
-
-			if (++ino < EXT4_INODES_PER_GROUP(sb))
-				goto repeat_in_this_group;
+		if (ino >= EXT4_INODES_PER_GROUP(sb)) {
+			if (++group == ngroups)
+				group = 0;
+			continue;
 		}
-
-		/*
-		 * This case is possible in concurrent environment.  It is very
-		 * rare.  We cannot repeat the find_group_xxx() call because
-		 * that will simply return the same blockgroup, because the
-		 * group descriptor metadata has not yet been updated.
-		 * So we just go onto the next blockgroup.
-		 */
-		if (++group == ngroups)
-			group = 0;
+		if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+			ext4_error(sb, "reserved inode found cleared - "
+				   "inode=%lu", ino + 1);
+			continue;
+		}
+		ext4_lock_group(sb, group);
+		ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+		ext4_unlock_group(sb, group);
+		ino++;		/* the inode bitmap is zero-based */
+		if (!ret2)
+			goto got; /* we grabbed the inode! */
+		if (ino < EXT4_INODES_PER_GROUP(sb))
+			goto repeat_in_this_group;
 	}
 	err = -ENOSPC;
 	goto out;
@@ -838,6 +739,59 @@ got:
 		if (err)
 			goto fail;
 	}
+
+	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+	if (err)
+		goto fail;
+
+	BUFFER_TRACE(group_desc_bh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, group_desc_bh);
+	if (err)
+		goto fail;
+
+	/* Update the relevant bg descriptor fields */
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		int free;
+		struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+
+		down_read(&grp->alloc_sem); /* protect vs itable lazyinit */
+		ext4_lock_group(sb, group); /* while we modify the bg desc */
+		free = EXT4_INODES_PER_GROUP(sb) -
+			ext4_itable_unused_count(sb, gdp);
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
+			free = 0;
+		}
+		/*
+		 * Check the relative inode number against the last used
+		 * relative inode number in this group. if it is greater
+		 * we need to update the bg_itable_unused count
+		 */
+		if (ino > free)
+			ext4_itable_unused_set(sb, gdp,
+					(EXT4_INODES_PER_GROUP(sb) - ino));
+		up_read(&grp->alloc_sem);
+	}
+	ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
+	if (S_ISDIR(mode)) {
+		ext4_used_dirs_set(sb, gdp, ext4_used_dirs_count(sb, gdp) + 1);
+		if (sbi->s_log_groups_per_flex) {
+			ext4_group_t f = ext4_flex_group(sbi, group);
+
+			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
+		}
+	}
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
+		gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+		ext4_unlock_group(sb, group);
+	}
+
+	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+	if (err)
+		goto fail;
+
 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
 	if (err)
@@ -1101,7 +1055,7 @@ unsigned long ext4_count_dirs(struct super_block * sb)
  * where it is called from on active part of filesystem is ext4lazyinit
  * thread, so we do not need any special locks, however we have to prevent
  * inode allocation from the current group, so we take alloc_sem lock, to
- * block ext4_claim_inode until we are finished.
+ * block ext4_new_inode() until we are finished.
  */
 int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 				 int barrier)
@@ -1149,9 +1103,9 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 			    sbi->s_inodes_per_block);
 
 	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
-		ext4_error(sb, "Something is wrong with group %u\n"
-			   "Used itable blocks: %d"
-			   "itable unused count: %u\n",
+		ext4_error(sb, "Something is wrong with group %u: "
+			   "used itable blocks: %d; "
+			   "itable unused count: %u",
 			   group, used_blks,
 			   ext4_itable_unused_count(sb, gdp));
 		ret = 1;