diff options
Diffstat (limited to 'fs/btrfs')
| -rw-r--r-- | fs/btrfs/backref.c | 105 | ||||
| -rw-r--r-- | fs/btrfs/btrfs_inode.h | 1 | ||||
| -rw-r--r-- | fs/btrfs/check-integrity.c | 16 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 88 | ||||
| -rw-r--r-- | fs/btrfs/ctree.h | 11 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.c | 18 | ||||
| -rw-r--r-- | fs/btrfs/delayed-inode.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 77 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 73 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 118 | ||||
| -rw-r--r-- | fs/btrfs/ordered-data.c | 22 | ||||
| -rw-r--r-- | fs/btrfs/rcu-string.h | 56 | ||||
| -rw-r--r-- | fs/btrfs/scrub.c | 30 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 33 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 14 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 92 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 2 | 
18 files changed, 535 insertions, 231 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3f75895c919..7301cdb4b2c 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -179,60 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,  static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,  				struct ulist *parents, int level, -				struct btrfs_key *key, u64 wanted_disk_byte, +				struct btrfs_key *key_for_search, u64 time_seq, +				u64 wanted_disk_byte,  				const u64 *extent_item_pos)  { -	int ret; -	int slot = path->slots[level]; -	struct extent_buffer *eb = path->nodes[level]; +	int ret = 0; +	int slot; +	struct extent_buffer *eb; +	struct btrfs_key key;  	struct btrfs_file_extent_item *fi;  	struct extent_inode_elem *eie = NULL;  	u64 disk_byte; -	u64 wanted_objectid = key->objectid; -add_parent: -	if (level == 0 && extent_item_pos) { -		fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); -		ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); +	if (level != 0) { +		eb = path->nodes[level]; +		ret = ulist_add(parents, eb->start, 0, GFP_NOFS);  		if (ret < 0)  			return ret; -	} -	ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); -	if (ret < 0) -		return ret; - -	if (level != 0)  		return 0; +	}  	/* -	 * if the current leaf is full with EXTENT_DATA items, we must -	 * check the next one if that holds a reference as well. -	 * ref->count cannot be used to skip this check. -	 * repeat this until we don't find any additional EXTENT_DATA items. +	 * We normally enter this function with the path already pointing to +	 * the first item to check. But sometimes, we may enter it with +	 * slot==nritems. In that case, go to the next leaf before we continue.  	 */ -	while (1) { -		eie = NULL; -		ret = btrfs_next_leaf(root, path); -		if (ret < 0) -			return ret; -		if (ret) -			return 0; +	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) +		ret = btrfs_next_old_leaf(root, path, time_seq); +	while (!ret) {  		eb = path->nodes[0]; -		for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { -			btrfs_item_key_to_cpu(eb, key, slot); -			if (key->objectid != wanted_objectid || -			    key->type != BTRFS_EXTENT_DATA_KEY) -				return 0; -			fi = btrfs_item_ptr(eb, slot, -						struct btrfs_file_extent_item); -			disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); -			if (disk_byte == wanted_disk_byte) -				goto add_parent; +		slot = path->slots[0]; + +		btrfs_item_key_to_cpu(eb, &key, slot); + +		if (key.objectid != key_for_search->objectid || +		    key.type != BTRFS_EXTENT_DATA_KEY) +			break; + +		fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); +		disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + +		if (disk_byte == wanted_disk_byte) { +			eie = NULL; +			if (extent_item_pos) { +				ret = check_extent_in_eb(&key, eb, fi, +						*extent_item_pos, +						&eie); +				if (ret < 0) +					break; +			} +			if (!ret) { +				ret = ulist_add(parents, eb->start, +						(unsigned long)eie, GFP_NOFS); +				if (ret < 0) +					break; +				if (!extent_item_pos) { +					ret = btrfs_next_old_leaf(root, path, +							time_seq); +					continue; +				} +			}  		} +		ret = btrfs_next_old_item(root, path, time_seq);  	} -	return 0; +	if (ret > 0) +		ret = 0; +	return ret;  }  /* @@ -249,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  	struct btrfs_path *path;  	struct btrfs_root *root;  	struct btrfs_key root_key; -	struct btrfs_key key = {0};  	struct extent_buffer *eb;  	int ret = 0;  	int root_level; @@ -294,19 +307,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  		goto out;  	} -	if (level == 0) { -		if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { -			ret = btrfs_next_leaf(root, path); -			if (ret) -				goto out; -			eb = path->nodes[0]; -		} - -		btrfs_item_key_to_cpu(eb, &key, path->slots[0]); -	} - -	ret = add_all_parents(root, path, parents, level, &key, -				ref->wanted_disk_byte, extent_item_pos); +	ret = add_all_parents(root, path, parents, level, &ref->key_for_search, +				time_seq, ref->wanted_disk_byte, +				extent_item_pos);  out:  	btrfs_free_path(path);  	return ret; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e616f8872e6..12394a90d60 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -37,6 +37,7 @@  #define BTRFS_INODE_IN_DEFRAG			3  #define BTRFS_INODE_DELALLOC_META_RESERVED	4  #define BTRFS_INODE_HAS_ORPHAN_ITEM		5 +#define BTRFS_INODE_HAS_ASYNC_EXTENT		6  /* in memory btrfs inode */  struct btrfs_inode { diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9cebb1fd6a3..da6e9364a5e 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -93,6 +93,7 @@  #include "print-tree.h"  #include "locking.h"  #include "check-integrity.h" +#include "rcu-string.h"  #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000  #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(  		superblock_tmp->never_written = 0;  		superblock_tmp->mirror_num = 1 + superblock_mirror_num;  		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) -			printk(KERN_INFO "New initial S-block (bdev %p, %s)" -			       " @%llu (%s/%llu/%d)\n", -			       superblock_bdev, device->name, -			       (unsigned long long)dev_bytenr, -			       dev_state->name, -			       (unsigned long long)dev_bytenr, -			       superblock_mirror_num); +			printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" +				     " @%llu (%s/%llu/%d)\n", +				     superblock_bdev, +				     rcu_str_deref(device->name), +				     (unsigned long long)dev_bytenr, +				     dev_state->name, +				     (unsigned long long)dev_bytenr, +				     superblock_mirror_num);  		list_add(&superblock_tmp->all_blocks_node,  			 &state->all_blocks_list);  		btrfsic_block_hashtable_add(superblock_tmp, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cfdc50..15cbc2bf4ff 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,  	return 0;  } +/* + * This allocates memory and gets a tree modification sequence number when + * needed. + * + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. + */  static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,  				 struct tree_mod_elem **tm_ret)  { @@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,  		 */  		kfree(tm);  		seq = 0; +		spin_unlock(&fs_info->tree_mod_seq_lock);  	} else {  		__get_tree_mod_seq(fs_info, &tm->elem);  		seq = tm->elem.seq;  	} -	spin_unlock(&fs_info->tree_mod_seq_lock);  	return seq;  } @@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,  	tm->slot = slot;  	tm->generation = btrfs_node_ptr_generation(eb, slot); -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static noinline int @@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,  	tm->move.nr_items = nr_items;  	tm->op = MOD_LOG_MOVE_KEYS; -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static noinline int @@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,  	tm->generation = btrfs_header_generation(old_root);  	tm->op = MOD_LOG_ROOT_REPLACE; -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static struct tree_mod_elem * @@ -1023,6 +1038,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,  		looped = 1;  	} +	/* if there's no old root to return, return what we found instead */ +	if (!found) +		found = tm; +  	return found;  } @@ -1143,22 +1162,36 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,  	return eb_rewin;  } +/* + * get_old_root() rewinds the state of @root's root node to the given @time_seq + * value. If there are no changes, the current root->root_node is returned. If + * anything changed in between, there's a fresh buffer allocated on which the + * rewind operations are done. In any case, the returned buffer is read locked. + * Returns NULL on error (with no locks held). + */  static inline struct extent_buffer *  get_old_root(struct btrfs_root *root, u64 time_seq)  {  	struct tree_mod_elem *tm;  	struct extent_buffer *eb; -	struct tree_mod_root *old_root; -	u64 old_generation; +	struct tree_mod_root *old_root = NULL; +	u64 old_generation = 0; +	u64 logical; +	eb = btrfs_read_lock_root_node(root);  	tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);  	if (!tm)  		return root->node; -	old_root = &tm->old_root; -	old_generation = tm->generation; +	if (tm->op == MOD_LOG_ROOT_REPLACE) { +		old_root = &tm->old_root; +		old_generation = tm->generation; +		logical = old_root->logical; +	} else { +		logical = root->node->start; +	} -	tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); +	tm = tree_mod_log_search(root->fs_info, logical, time_seq);  	/*  	 * there was an item in the log when __tree_mod_log_oldest_root  	 * returned. this one must not go away, because the time_seq passed to @@ -1166,22 +1199,25 @@ get_old_root(struct btrfs_root *root, u64 time_seq)  	 */  	BUG_ON(!tm); -	if (old_root->logical == root->node->start) { -		/* there are logged operations for the current root */ -		eb = btrfs_clone_extent_buffer(root->node); -	} else { -		/* there's a root replace operation for the current root */ +	if (old_root)  		eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,  					       root->nodesize); +	else +		eb = btrfs_clone_extent_buffer(root->node); +	btrfs_tree_read_unlock(root->node); +	free_extent_buffer(root->node); +	if (!eb) +		return NULL; +	btrfs_tree_read_lock(eb); +	if (old_root) {  		btrfs_set_header_bytenr(eb, eb->start);  		btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);  		btrfs_set_header_owner(eb, root->root_key.objectid); +		btrfs_set_header_level(eb, old_root->level); +		btrfs_set_header_generation(eb, old_generation);  	} -	if (!eb) -		return NULL; -	btrfs_set_header_level(eb, old_root->level); -	btrfs_set_header_generation(eb, old_generation);  	__tree_mod_log_rewind(eb, time_seq, tm); +	extent_buffer_get(eb);  	return eb;  } @@ -1650,8 +1686,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)  		return 0; -	btrfs_header_nritems(mid); -  	left = read_node_slot(root, parent, pslot - 1);  	if (left) {  		btrfs_tree_lock(left); @@ -1681,7 +1715,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  		wret = push_node_left(trans, root, left, mid, 1);  		if (wret < 0)  			ret = wret; -		btrfs_header_nritems(mid);  	}  	/* @@ -2615,9 +2648,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,  again:  	b = get_old_root(root, time_seq); -	extent_buffer_get(b);  	level = btrfs_header_level(b); -	btrfs_tree_read_lock(b);  	p->locks[level] = BTRFS_READ_LOCK;  	while (b) { @@ -5001,6 +5032,12 @@ next:   */  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)  { +	return btrfs_next_old_leaf(root, path, 0); +} + +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, +			u64 time_seq) +{  	int slot;  	int level;  	struct extent_buffer *c; @@ -5025,7 +5062,10 @@ again:  	path->keep_locks = 1;  	path->leave_spinning = 1; -	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (time_seq) +		ret = btrfs_search_old_slot(root, &key, path, time_seq); +	else +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  	path->keep_locks = 0;  	if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0236d03c673..fa5c45b3907 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2753,13 +2753,20 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,  }  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, +			u64 time_seq); +static inline int btrfs_next_old_item(struct btrfs_root *root, +				      struct btrfs_path *p, u64 time_seq)  {  	++p->slots[0];  	if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) -		return btrfs_next_leaf(root, p); +		return btrfs_next_old_leaf(root, p, time_seq);  	return 0;  } +static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) +{ +	return btrfs_next_old_item(root, p, 0); +}  int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);  int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);  int __must_check btrfs_drop_snapshot(struct btrfs_root *root, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c18d0442ae6..2399f408691 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)  		}  	}  } + +void btrfs_destroy_delayed_inodes(struct btrfs_root *root) +{ +	struct btrfs_delayed_root *delayed_root; +	struct btrfs_delayed_node *curr_node, *prev_node; + +	delayed_root = btrfs_get_delayed_root(root); + +	curr_node = btrfs_first_delayed_node(delayed_root); +	while (curr_node) { +		__btrfs_kill_delayed_node(curr_node); + +		prev_node = curr_node; +		curr_node = btrfs_next_delayed_node(curr_node); +		btrfs_release_delayed_node(prev_node); +	} +} + diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 7083d08b2a2..f5aa4023d3e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);  /* Used for drop dead root */  void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); +/* Used for clean the transaction */ +void btrfs_destroy_delayed_inodes(struct btrfs_root *root); +  /* Used for readdir() */  void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,  			     struct list_head *del_list); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ae51decf6d..7b845ff4af9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@  #include "free-space-cache.h"  #include "inode-map.h"  #include "check-integrity.h" +#include "rcu-string.h"  static struct extent_io_ops btree_extent_io_ops;  static void end_workqueue_fn(struct btrfs_work *work); @@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,  	features = btrfs_super_incompat_flags(disk_super);  	features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; -	if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) +	if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)  		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;  	/* @@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)  		struct btrfs_device *device = (struct btrfs_device *)  			bh->b_private; -		printk_ratelimited(KERN_WARNING "lost page write due to " -				   "I/O error on %s\n", device->name); +		printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " +					  "I/O error on %s\n", +					  rcu_str_deref(device->name));  		/* note, we dont' set_buffer_write_io_error because we have  		 * our own ways of dealing with the IO errors  		 */ @@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)  		wait_for_completion(&device->flush_wait);  		if (bio_flagged(bio, BIO_EOPNOTSUPP)) { -			printk("btrfs: disabling barriers on dev %s\n", -			       device->name); +			printk_in_rcu("btrfs: disabling barriers on dev %s\n", +				      rcu_str_deref(device->name));  			device->nobarriers = 1;  		}  		if (!bio_flagged(bio, BIO_UPTODATE)) { @@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  	delayed_refs = &trans->delayed_refs; -again:  	spin_lock(&delayed_refs->lock);  	if (delayed_refs->num_entries == 0) {  		spin_unlock(&delayed_refs->lock); @@ -3408,31 +3409,37 @@ again:  		return ret;  	} -	node = rb_first(&delayed_refs->root); -	while (node) { +	while ((node = rb_first(&delayed_refs->root)) != NULL) {  		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); -		node = rb_next(node); - -		ref->in_tree = 0; -		rb_erase(&ref->rb_node, &delayed_refs->root); -		delayed_refs->num_entries--;  		atomic_set(&ref->refs, 1);  		if (btrfs_delayed_ref_is_head(ref)) {  			struct btrfs_delayed_ref_head *head;  			head = btrfs_delayed_node_to_head(ref); -			spin_unlock(&delayed_refs->lock); -			mutex_lock(&head->mutex); +			if (!mutex_trylock(&head->mutex)) { +				atomic_inc(&ref->refs); +				spin_unlock(&delayed_refs->lock); + +				/* Need to wait for the delayed ref to run */ +				mutex_lock(&head->mutex); +				mutex_unlock(&head->mutex); +				btrfs_put_delayed_ref(ref); + +				spin_lock(&delayed_refs->lock); +				continue; +			} +  			kfree(head->extent_op);  			delayed_refs->num_heads--;  			if (list_empty(&head->cluster))  				delayed_refs->num_heads_ready--;  			list_del_init(&head->cluster); -			mutex_unlock(&head->mutex); -			btrfs_put_delayed_ref(ref); -			goto again;  		} +		ref->in_tree = 0; +		rb_erase(&ref->rb_node, &delayed_refs->root); +		delayed_refs->num_entries--; +  		spin_unlock(&delayed_refs->lock);  		btrfs_put_delayed_ref(ref); @@ -3520,11 +3527,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,  			     &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,  					       offset >> PAGE_CACHE_SHIFT);  			spin_unlock(&dirty_pages->buffer_lock); -			if (eb) { +			if (eb)  				ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,  							 &eb->bflags); -				atomic_set(&eb->refs, 1); -			}  			if (PageWriteback(page))  				end_page_writeback(page); @@ -3538,8 +3543,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,  				spin_unlock_irq(&page->mapping->tree_lock);  			} -			page->mapping->a_ops->invalidatepage(page, 0);  			unlock_page(page); +			page_cache_release(page);  		}  	} @@ -3553,8 +3558,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,  	u64 start;  	u64 end;  	int ret; +	bool loop = true;  	unpin = pinned_extents; +again:  	while (1) {  		ret = find_first_extent_bit(unpin, 0, &start, &end,  					    EXTENT_DIRTY); @@ -3572,6 +3579,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,  		cond_resched();  	} +	if (loop) { +		if (unpin == &root->fs_info->freed_extents[0]) +			unpin = &root->fs_info->freed_extents[1]; +		else +			unpin = &root->fs_info->freed_extents[0]; +		loop = false; +		goto again; +	} +  	return 0;  } @@ -3585,21 +3601,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,  	/* FIXME: cleanup wait for commit */  	cur_trans->in_commit = 1;  	cur_trans->blocked = 1; -	if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) -		wake_up(&root->fs_info->transaction_blocked_wait); +	wake_up(&root->fs_info->transaction_blocked_wait);  	cur_trans->blocked = 0; -	if (waitqueue_active(&root->fs_info->transaction_wait)) -		wake_up(&root->fs_info->transaction_wait); +	wake_up(&root->fs_info->transaction_wait);  	cur_trans->commit_done = 1; -	if (waitqueue_active(&cur_trans->commit_wait)) -		wake_up(&cur_trans->commit_wait); +	wake_up(&cur_trans->commit_wait); + +	btrfs_destroy_delayed_inodes(root); +	btrfs_assert_delayed_root_empty(root);  	btrfs_destroy_pending_snapshots(cur_trans);  	btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,  				     EXTENT_DIRTY); +	btrfs_destroy_pinned_extent(root, +				    root->fs_info->pinned_extents);  	/*  	memset(cur_trans, 0, sizeof(*cur_trans)); @@ -3648,6 +3666,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)  		if (waitqueue_active(&t->commit_wait))  			wake_up(&t->commit_wait); +		btrfs_destroy_delayed_inodes(root); +		btrfs_assert_delayed_root_empty(root); +  		btrfs_destroy_pending_snapshots(t);  		btrfs_destroy_delalloc_inodes(root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2c8f7b20461..aaa12c1eb34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@  #include "volumes.h"  #include "check-integrity.h"  #include "locking.h" +#include "rcu-string.h"  static struct kmem_cache *extent_state_cache;  static struct kmem_cache *extent_buffer_cache; @@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,  		return -EIO;  	} -	printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " -			"sector %llu)\n", page->mapping->host->i_ino, start, -			dev->name, sector); +	printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " +		      "(dev %s sector %llu)\n", page->mapping->host->i_ino, +		      start, rcu_str_deref(dev->name), sector);  	bio_put(bio);  	return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6ab6f5e635..d8bb0dbc494 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,  	if (IS_ERR(trans)) {  		extent_clear_unlock_delalloc(inode,  			     &BTRFS_I(inode)->io_tree, -			     start, end, NULL, +			     start, end, locked_page,  			     EXTENT_CLEAR_UNLOCK_PAGE |  			     EXTENT_CLEAR_UNLOCK |  			     EXTENT_CLEAR_DELALLOC | @@ -963,7 +963,7 @@ out:  out_unlock:  	extent_clear_unlock_delalloc(inode,  		     &BTRFS_I(inode)->io_tree, -		     start, end, NULL, +		     start, end, locked_page,  		     EXTENT_CLEAR_UNLOCK_PAGE |  		     EXTENT_CLEAR_UNLOCK |  		     EXTENT_CLEAR_DELALLOC | @@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)  	compress_file_range(async_cow->inode, async_cow->locked_page,  			    async_cow->start, async_cow->end, async_cow,  			    &num_added); -	if (num_added == 0) +	if (num_added == 0) { +		btrfs_add_delayed_iput(async_cow->inode);  		async_cow->inode = NULL; +	}  }  /* @@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)  {  	struct async_cow *async_cow;  	async_cow = container_of(work, struct async_cow, work); +	if (async_cow->inode) +		btrfs_add_delayed_iput(async_cow->inode);  	kfree(async_cow);  } @@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,  	while (start < end) {  		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);  		BUG_ON(!async_cow); /* -ENOMEM */ -		async_cow->inode = inode; +		async_cow->inode = igrab(inode);  		async_cow->root = root;  		async_cow->locked_page = locked_page;  		async_cow->start = start; @@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,  	u64 ino = btrfs_ino(inode);  	path = btrfs_alloc_path(); -	if (!path) +	if (!path) { +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     start, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK);  		return -ENOMEM; +	}  	nolock = btrfs_is_free_space_inode(root, inode); @@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,  		trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) { +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     start, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK);  		btrfs_free_path(path);  		return PTR_ERR(trans);  	} @@ -1327,8 +1350,11 @@ out_check:  	}  	btrfs_release_path(path); -	if (cur_offset <= end && cow_start == (u64)-1) +	if (cur_offset <= end && cow_start == (u64)-1) {  		cow_start = cur_offset; +		cur_offset = end; +	} +  	if (cow_start != (u64)-1) {  		ret = cow_file_range(inode, locked_page, cow_start, end,  				     page_started, nr_written, 1); @@ -1347,6 +1373,17 @@ error:  	if (!ret)  		ret = err; +	if (ret && cur_offset < end) +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     cur_offset, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK); +  	btrfs_free_path(path);  	return ret;  } @@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,  	int ret;  	struct btrfs_root *root = BTRFS_I(inode)->root; -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {  		ret = run_delalloc_nocow(inode, locked_page, start, end,  					 page_started, 1, nr_written); -	else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) +	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {  		ret = run_delalloc_nocow(inode, locked_page, start, end,  					 page_started, 0, nr_written); -	else if (!btrfs_test_opt(root, COMPRESS) && -		 !(BTRFS_I(inode)->force_compress) && -		 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) +	} else if (!btrfs_test_opt(root, COMPRESS) && +		   !(BTRFS_I(inode)->force_compress) && +		   !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {  		ret = cow_file_range(inode, locked_page, start, end,  				      page_started, nr_written, 1); -	else +	} else { +		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +			&BTRFS_I(inode)->runtime_flags);  		ret = cow_file_range_async(inode, locked_page, start, end,  					   page_started, nr_written); +	}  	return ret;  } @@ -7054,10 +7094,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)  	else  		b_inode->flags &= ~BTRFS_INODE_NODATACOW; -	if (b_dir->flags & BTRFS_INODE_COMPRESS) +	if (b_dir->flags & BTRFS_INODE_COMPRESS) {  		b_inode->flags |= BTRFS_INODE_COMPRESS; -	else -		b_inode->flags &= ~BTRFS_INODE_COMPRESS; +		b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS; +	} else { +		b_inode->flags &= ~(BTRFS_INODE_COMPRESS | +				    BTRFS_INODE_NOCOMPRESS); +	}  }  static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 24b776c08d9..0e92e576300 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -52,6 +52,7 @@  #include "locking.h"  #include "inode-map.h"  #include "backref.h" +#include "rcu-string.h"  /* Mask out flags that are inappropriate for the given type of inode. */  static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -785,39 +786,57 @@ none:  	return -ENOENT;  } -/* - * Validaty check of prev em and next em: - * 1) no prev/next em - * 2) prev/next em is an hole/inline extent - */ -static int check_adjacent_extents(struct inode *inode, struct extent_map *em) +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)  {  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; -	struct extent_map *prev = NULL, *next = NULL; -	int ret = 0; +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; +	struct extent_map *em; +	u64 len = PAGE_CACHE_SIZE; +	/* +	 * hopefully we have this extent in the tree already, try without +	 * the full extent lock +	 */  	read_lock(&em_tree->lock); -	prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); -	next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1); +	em = lookup_extent_mapping(em_tree, start, len);  	read_unlock(&em_tree->lock); -	if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && -	    (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) -		ret = 1; -	free_extent_map(prev); -	free_extent_map(next); +	if (!em) { +		/* get the big lock and read metadata off disk */ +		lock_extent(io_tree, start, start + len - 1); +		em = btrfs_get_extent(inode, NULL, 0, start, len, 0); +		unlock_extent(io_tree, start, start + len - 1); + +		if (IS_ERR(em)) +			return NULL; +	} + +	return em; +} + +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) +{ +	struct extent_map *next; +	bool ret = true; +	/* this is the last extent */ +	if (em->start + em->len >= i_size_read(inode)) +		return false; + +	next = defrag_lookup_extent(inode, em->start + em->len); +	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) +		ret = false; + +	free_extent_map(next);  	return ret;  } -static int should_defrag_range(struct inode *inode, u64 start, u64 len, -			       int thresh, u64 *last_len, u64 *skip, -			       u64 *defrag_end) +static int should_defrag_range(struct inode *inode, u64 start, int thresh, +			       u64 *last_len, u64 *skip, u64 *defrag_end)  { -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; -	struct extent_map *em = NULL; -	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; +	struct extent_map *em;  	int ret = 1; +	bool next_mergeable = true;  	/*  	 * make sure that once we start defragging an extent, we keep on @@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,  	*skip = 0; -	/* -	 * hopefully we have this extent in the tree already, try without -	 * the full extent lock -	 */ -	read_lock(&em_tree->lock); -	em = lookup_extent_mapping(em_tree, start, len); -	read_unlock(&em_tree->lock); - -	if (!em) { -		/* get the big lock and read metadata off disk */ -		lock_extent(io_tree, start, start + len - 1); -		em = btrfs_get_extent(inode, NULL, 0, start, len, 0); -		unlock_extent(io_tree, start, start + len - 1); - -		if (IS_ERR(em)) -			return 0; -	} +	em = defrag_lookup_extent(inode, start); +	if (!em) +		return 0;  	/* this will cover holes, and inline extents */  	if (em->block_start >= EXTENT_MAP_LAST_BYTE) { @@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,  		goto out;  	} -	/* If we have nothing to merge with us, just skip. */ -	if (check_adjacent_extents(inode, em)) { -		ret = 0; -		goto out; -	} +	next_mergeable = defrag_check_next_extent(inode, em);  	/* -	 * we hit a real extent, if it is big don't bother defragging it again +	 * we hit a real extent, if it is big or the next extent is not a +	 * real extent, don't bother defragging it  	 */ -	if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) +	if ((*last_len == 0 || *last_len >= thresh) && +	    (em->len >= thresh || !next_mergeable))  		ret = 0; -  out:  	/*  	 * last_len ends up being a counter of how many bytes we've defragged. @@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  			break;  		if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, -					 PAGE_CACHE_SIZE, extent_thresh, -					 &last_len, &skip, &defrag_end)) { +					 extent_thresh, &last_len, &skip, +					 &defrag_end)) {  			unsigned long next;  			/*  			 * the should_defrag function tells us how much to skip @@ -1304,6 +1306,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  		ret = -EINVAL;  		goto out_free;  	} +	if (device->fs_devices && device->fs_devices->seeding) { +		printk(KERN_INFO "btrfs: resizer unable to apply on " +		       "seeding device %llu\n", +		       (unsigned long long)devid); +		ret = -EINVAL; +		goto out_free; +	} +  	if (!strcmp(sizestr, "max"))  		new_size = device->bdev->bd_inode->i_size;  	else { @@ -1345,8 +1355,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  	do_div(new_size, root->sectorsize);  	new_size *= root->sectorsize; -	printk(KERN_INFO "btrfs: new size for %s is %llu\n", -		device->name, (unsigned long long)new_size); +	printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", +		      rcu_str_deref(device->name), +		      (unsigned long long)new_size);  	if (new_size > old_size) {  		trans = btrfs_start_transaction(root, 0); @@ -2264,7 +2275,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)  	di_args->total_bytes = dev->total_bytes;  	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));  	if (dev->name) { -		strncpy(di_args->path, dev->name, sizeof(di_args->path)); +		struct rcu_string *name; + +		rcu_read_lock(); +		name = rcu_dereference(dev->name); +		strncpy(di_args->path, name->str, sizeof(di_args->path)); +		rcu_read_unlock();  		di_args->path[sizeof(di_args->path) - 1] = 0;  	} else {  		di_args->path[0] = '\0'; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9e138cdc36c..643335a4fe3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)  	/* start IO across the range first to instantiate any delalloc  	 * extents  	 */ -	filemap_write_and_wait_range(inode->i_mapping, start, orig_end); +	filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + +	/* +	 * So with compression we will find and lock a dirty page and clear the +	 * first one as dirty, setup an async extent, and immediately return +	 * with the entire range locked but with nobody actually marked with +	 * writeback.  So we can't just filemap_write_and_wait_range() and +	 * expect it to work since it will just kick off a thread to do the +	 * actual work.  So we need to call filemap_fdatawrite_range _again_ +	 * since it will wait on the page lock, which won't be unlocked until +	 * after the pages have been marked as writeback and so we're good to go +	 * from there.  We have to do this otherwise we'll miss the ordered +	 * extents and that results in badness.  Please Josef, do not think you +	 * know better and pull this out at some point in the future, it is +	 * right and you are wrong. +	 */ +	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +		     &BTRFS_I(inode)->runtime_flags)) +		filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + +	filemap_fdatawait_range(inode->i_mapping, start, orig_end);  	end = orig_end;  	found = 0; diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h new file mode 100644 index 00000000000..9e111e4576d --- /dev/null +++ b/fs/btrfs/rcu-string.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 Red Hat.  All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct rcu_string { +	struct rcu_head rcu; +	char str[0]; +}; + +static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) +{ +	size_t len = strlen(src) + 1; +	struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + +					 (len * sizeof(char)), mask); +	if (!ret) +		return ret; +	strncpy(ret->str, src, len); +	return ret; +} + +static inline void rcu_string_free(struct rcu_string *str) +{ +	if (str) +		kfree_rcu(str, rcu); +} + +#define printk_in_rcu(fmt, ...) do {	\ +	rcu_read_lock();		\ +	printk(fmt, __VA_ARGS__);	\ +	rcu_read_unlock();		\ +} while (0) + +#define printk_ratelimited_in_rcu(fmt, ...) do {	\ +	rcu_read_lock();				\ +	printk_ratelimited(fmt, __VA_ARGS__);		\ +	rcu_read_unlock();				\ +} while (0) + +#define rcu_str_deref(rcu_str) ({				\ +	struct rcu_string *__str = rcu_dereference(rcu_str);	\ +	__str->str;						\ +}) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a38cfa4f251..b223620cd5a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -26,6 +26,7 @@  #include "backref.h"  #include "extent_io.h"  #include "check-integrity.h" +#include "rcu-string.h"  /*   * This is only the first step towards a full-features scrub. It reads all @@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  	 * hold all of the paths here  	 */  	for (i = 0; i < ipath->fspath->elem_cnt; ++i) -		printk(KERN_WARNING "btrfs: %s at logical %llu on dev " +		printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "  			"%s, sector %llu, root %llu, inode %llu, offset %llu, "  			"length %llu, links %u (path: %s)\n", swarn->errstr, -			swarn->logical, swarn->dev->name, +			swarn->logical, rcu_str_deref(swarn->dev->name),  			(unsigned long long)swarn->sector, root, inum, offset,  			min(isize - offset, (u64)PAGE_SIZE), nlink,  			(char *)(unsigned long)ipath->fspath->val[i]); @@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  	return 0;  err: -	printk(KERN_WARNING "btrfs: %s at logical %llu on dev " +	printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "  		"%s, sector %llu, root %llu, inode %llu, offset %llu: path "  		"resolving failed with ret=%d\n", swarn->errstr, -		swarn->logical, swarn->dev->name, +		swarn->logical, rcu_str_deref(swarn->dev->name),  		(unsigned long long)swarn->sector, root, inum, offset, ret);  	free_ipath(ipath); @@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)  		do {  			ret = tree_backref_for_extent(&ptr, eb, ei, item_size,  							&ref_root, &ref_level); -			printk(KERN_WARNING +			printk_in_rcu(KERN_WARNING  				"btrfs: %s at logical %llu on dev %s, "  				"sector %llu: metadata %s (level %d) in tree " -				"%llu\n", errstr, swarn.logical, dev->name, +				"%llu\n", errstr, swarn.logical, +				rcu_str_deref(dev->name),  				(unsigned long long)swarn.sector,  				ref_level ? "node" : "leaf",  				ret < 0 ? -1 : ref_level, @@ -580,9 +582,11 @@ out:  		spin_lock(&sdev->stat_lock);  		++sdev->stat.uncorrectable_errors;  		spin_unlock(&sdev->stat_lock); -		printk_ratelimited(KERN_ERR + +		printk_ratelimited_in_rcu(KERN_ERR  			"btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", -			(unsigned long long)fixup->logical, sdev->dev->name); +			(unsigned long long)fixup->logical, +			rcu_str_deref(sdev->dev->name));  	}  	btrfs_free_path(path); @@ -936,18 +940,20 @@ corrected_error:  			spin_lock(&sdev->stat_lock);  			sdev->stat.corrected_errors++;  			spin_unlock(&sdev->stat_lock); -			printk_ratelimited(KERN_ERR +			printk_ratelimited_in_rcu(KERN_ERR  				"btrfs: fixed up error at logical %llu on dev %s\n", -				(unsigned long long)logical, sdev->dev->name); +				(unsigned long long)logical, +				rcu_str_deref(sdev->dev->name));  		}  	} else {  did_not_correct_error:  		spin_lock(&sdev->stat_lock);  		sdev->stat.uncorrectable_errors++;  		spin_unlock(&sdev->stat_lock); -		printk_ratelimited(KERN_ERR +		printk_ratelimited_in_rcu(KERN_ERR  			"btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", -			(unsigned long long)logical, sdev->dev->name); +			(unsigned long long)logical, +			rcu_str_deref(sdev->dev->name));  	}  out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 96eb9fef7bd..0eb9a4da069 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,7 @@  #include "version.h"  #include "export.h"  #include "compression.h" +#include "rcu-string.h"  #define CREATE_TRACE_POINTS  #include <trace/events/btrfs.h> @@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)  				   "error %d\n", btrfs_ino(inode), ret);  } +static int btrfs_show_devname(struct seq_file *m, struct dentry *root) +{ +	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); +	struct btrfs_fs_devices *cur_devices; +	struct btrfs_device *dev, *first_dev = NULL; +	struct list_head *head; +	struct rcu_string *name; + +	mutex_lock(&fs_info->fs_devices->device_list_mutex); +	cur_devices = fs_info->fs_devices; +	while (cur_devices) { +		head = &cur_devices->devices; +		list_for_each_entry(dev, head, dev_list) { +			if (!first_dev || dev->devid < first_dev->devid) +				first_dev = dev; +		} +		cur_devices = cur_devices->seed; +	} + +	if (first_dev) { +		rcu_read_lock(); +		name = rcu_dereference(first_dev->name); +		seq_escape(m, name->str, " \t\n\\"); +		rcu_read_unlock(); +	} else { +		WARN_ON(1); +	} +	mutex_unlock(&fs_info->fs_devices->device_list_mutex); +	return 0; +} +  static const struct super_operations btrfs_super_ops = {  	.drop_inode	= btrfs_drop_inode,  	.evict_inode	= btrfs_evict_inode,  	.put_super	= btrfs_put_super,  	.sync_fs	= btrfs_sync_fs,  	.show_options	= btrfs_show_options, +	.show_devname	= btrfs_show_devname,  	.write_inode	= btrfs_write_inode,  	.dirty_inode	= btrfs_fs_dirty_inode,  	.alloc_inode	= btrfs_alloc_inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1791c6e3d83..b72b068183e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -100,6 +100,10 @@ loop:  		kmem_cache_free(btrfs_transaction_cachep, cur_trans);  		cur_trans = fs_info->running_transaction;  		goto loop; +	} else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { +		spin_unlock(&root->fs_info->trans_lock); +		kmem_cache_free(btrfs_transaction_cachep, cur_trans); +		return -EROFS;  	}  	atomic_set(&cur_trans->num_writers, 1); @@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,  static void cleanup_transaction(struct btrfs_trans_handle *trans, -				struct btrfs_root *root) +				struct btrfs_root *root, int err)  {  	struct btrfs_transaction *cur_trans = trans->transaction;  	WARN_ON(trans->use_count > 1); +	btrfs_abort_transaction(trans, root, err); +  	spin_lock(&root->fs_info->trans_lock);  	list_del_init(&cur_trans->list); +	if (cur_trans == root->fs_info->running_transaction) { +		root->fs_info->running_transaction = NULL; +		root->fs_info->trans_no_join = 0; +	}  	spin_unlock(&root->fs_info->trans_lock);  	btrfs_cleanup_one_transaction(trans->transaction, root); @@ -1526,7 +1536,7 @@ cleanup_transaction:  //	WARN_ON(1);  	if (current->journal_info == trans)  		current->journal_info = NULL; -	cleanup_transaction(trans, root); +	cleanup_transaction(trans, root, ret);  	return ret;  } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7782020996f..8a3d2594b80 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -35,6 +35,7 @@  #include "volumes.h"  #include "async-thread.h"  #include "check-integrity.h" +#include "rcu-string.h"  static int init_first_rw_device(struct btrfs_trans_handle *trans,  				struct btrfs_root *root, @@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)  		device = list_entry(fs_devices->devices.next,  				    struct btrfs_device, dev_list);  		list_del(&device->dev_list); -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  	}  	kfree(fs_devices); @@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,  {  	struct btrfs_device *device;  	struct btrfs_fs_devices *fs_devices; +	struct rcu_string *name;  	u64 found_transid = btrfs_super_generation(disk_super); -	char *name;  	fs_devices = find_fsid(disk_super->fsid);  	if (!fs_devices) { @@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,  		memcpy(device->uuid, disk_super->dev_item.uuid,  		       BTRFS_UUID_SIZE);  		spin_lock_init(&device->io_lock); -		device->name = kstrdup(path, GFP_NOFS); -		if (!device->name) { + +		name = rcu_string_strdup(path, GFP_NOFS); +		if (!name) {  			kfree(device);  			return -ENOMEM;  		} +		rcu_assign_pointer(device->name, name);  		INIT_LIST_HEAD(&device->dev_alloc_list);  		/* init readahead state */ @@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,  		device->fs_devices = fs_devices;  		fs_devices->num_devices++; -	} else if (!device->name || strcmp(device->name, path)) { -		name = kstrdup(path, GFP_NOFS); +	} else if (!device->name || strcmp(device->name->str, path)) { +		name = rcu_string_strdup(path, GFP_NOFS);  		if (!name)  			return -ENOMEM; -		kfree(device->name); -		device->name = name; +		rcu_string_free(device->name); +		rcu_assign_pointer(device->name, name);  		if (device->missing) {  			fs_devices->missing_devices--;  			device->missing = 0; @@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)  	/* We have held the volume lock, it is safe to get the devices. */  	list_for_each_entry(orig_dev, &orig->devices, dev_list) { +		struct rcu_string *name; +  		device = kzalloc(sizeof(*device), GFP_NOFS);  		if (!device)  			goto error; -		device->name = kstrdup(orig_dev->name, GFP_NOFS); -		if (!device->name) { +		/* +		 * This is ok to do without rcu read locked because we hold the +		 * uuid mutex so nothing we touch in here is going to disappear. +		 */ +		name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); +		if (!name) {  			kfree(device);  			goto error;  		} +		rcu_assign_pointer(device->name, name);  		device->devid = orig_dev->devid;  		device->work.func = pending_bios_fn; @@ -491,7 +501,7 @@ again:  		}  		list_del_init(&device->dev_list);  		fs_devices->num_devices--; -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  	} @@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)  	if (device->bdev)  		blkdev_put(device->bdev, device->mode); -	kfree(device->name); +	rcu_string_free(device->name);  	kfree(device);  } @@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  	mutex_lock(&fs_devices->device_list_mutex);  	list_for_each_entry(device, &fs_devices->devices, dev_list) {  		struct btrfs_device *new_device; +		struct rcu_string *name;  		if (device->bdev)  			fs_devices->open_devices--; @@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  		new_device = kmalloc(sizeof(*new_device), GFP_NOFS);  		BUG_ON(!new_device); /* -ENOMEM */  		memcpy(new_device, device, sizeof(*new_device)); -		new_device->name = kstrdup(device->name, GFP_NOFS); -		BUG_ON(device->name && !new_device->name); /* -ENOMEM */ + +		/* Safe because we are under uuid_mutex */ +		name = rcu_string_strdup(device->name->str, GFP_NOFS); +		BUG_ON(device->name && !name); /* -ENOMEM */ +		rcu_assign_pointer(new_device->name, name);  		new_device->bdev = NULL;  		new_device->writeable = 0;  		new_device->in_fs_metadata = 0; @@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,  		if (!device->name)  			continue; -		bdev = blkdev_get_by_path(device->name, flags, holder); +		bdev = blkdev_get_by_path(device->name->str, flags, holder);  		if (IS_ERR(bdev)) { -			printk(KERN_INFO "open %s failed\n", device->name); +			printk(KERN_INFO "open %s failed\n", device->name->str);  			goto error;  		}  		filemap_write_and_wait(bdev->bd_inode->i_mapping); @@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)  	struct block_device *bdev;  	struct list_head *devices;  	struct super_block *sb = root->fs_info->sb; +	struct rcu_string *name;  	u64 total_bytes;  	int seeding_dev = 0;  	int ret = 0; @@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)  		goto error;  	} -	device->name = kstrdup(device_path, GFP_NOFS); -	if (!device->name) { +	name = rcu_string_strdup(device_path, GFP_NOFS); +	if (!name) {  		kfree(device);  		ret = -ENOMEM;  		goto error;  	} +	rcu_assign_pointer(device->name, name);  	ret = find_next_devid(root, &device->devid);  	if (ret) { -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  		goto error;  	}  	trans = btrfs_start_transaction(root, 0);  	if (IS_ERR(trans)) { -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  		ret = PTR_ERR(trans);  		goto error; @@ -1796,7 +1812,7 @@ error_trans:  	unlock_chunks(root);  	btrfs_abort_transaction(trans, root, ret);  	btrfs_end_transaction(trans, root); -	kfree(device->name); +	rcu_string_free(device->name);  	kfree(device);  error:  	blkdev_put(bdev, FMODE_EXCL); @@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,  		bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;  		dev = bbio->stripes[dev_nr].dev;  		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { +#ifdef DEBUG +			struct rcu_string *name; + +			rcu_read_lock(); +			name = rcu_dereference(dev->name);  			pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "  				 "(%s id %llu), size=%u\n", rw,  				 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, -				 dev->name, dev->devid, bio->bi_size); +				 name->str, dev->devid, bio->bi_size); +			rcu_read_unlock(); +#endif  			bio->bi_bdev = dev->bdev;  			if (async_submit)  				schedule_bio(root, dev, rw, bio); @@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)  		key.offset = device->devid;  		ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);  		if (ret) { -			printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", -			       device->name, (unsigned long long)device->devid); +			printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", +				      rcu_str_deref(device->name), +				      (unsigned long long)device->devid);  			__btrfs_reset_dev_stats(device);  			device->dev_stats_valid = 1;  			btrfs_release_path(path); @@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  	BUG_ON(!path);  	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);  	if (ret < 0) { -		printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", -		       ret, device->name); +		printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", +			      ret, rcu_str_deref(device->name));  		goto out;  	} @@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  		/* need to delete old one and insert a new one */  		ret = btrfs_del_item(trans, dev_root, path);  		if (ret != 0) { -			printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", -			       device->name, ret); +			printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", +				      rcu_str_deref(device->name), ret);  			goto out;  		}  		ret = 1; @@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  		ret = btrfs_insert_empty_item(trans, dev_root, path,  					      &key, sizeof(*ptr));  		if (ret < 0) { -			printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", -			       device->name, ret); +			printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", +				      rcu_str_deref(device->name), ret);  			goto out;  		}  	} @@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)  {  	if (!dev->dev_stats_valid)  		return; -	printk_ratelimited(KERN_ERR +	printk_ratelimited_in_rcu(KERN_ERR  			   "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", -			   dev->name, +			   rcu_str_deref(dev->name),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), @@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)  static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)  { -	printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", -	       dev->name, +	printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", +	       rcu_str_deref(dev->name),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3406a88ca83..74366f27a76 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -58,7 +58,7 @@ struct btrfs_device {  	/* the mode sent to blkdev_get */  	fmode_t mode; -	char *name; +	struct rcu_string *name;  	/* the internal btrfs device id */  	u64 devid;  |