diff options
Diffstat (limited to 'fs')
60 files changed, 917 insertions, 510 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3f75895c919..7301cdb4b2c 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -179,60 +179,74 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,  static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,  				struct ulist *parents, int level, -				struct btrfs_key *key, u64 wanted_disk_byte, +				struct btrfs_key *key_for_search, u64 time_seq, +				u64 wanted_disk_byte,  				const u64 *extent_item_pos)  { -	int ret; -	int slot = path->slots[level]; -	struct extent_buffer *eb = path->nodes[level]; +	int ret = 0; +	int slot; +	struct extent_buffer *eb; +	struct btrfs_key key;  	struct btrfs_file_extent_item *fi;  	struct extent_inode_elem *eie = NULL;  	u64 disk_byte; -	u64 wanted_objectid = key->objectid; -add_parent: -	if (level == 0 && extent_item_pos) { -		fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); -		ret = check_extent_in_eb(key, eb, fi, *extent_item_pos, &eie); +	if (level != 0) { +		eb = path->nodes[level]; +		ret = ulist_add(parents, eb->start, 0, GFP_NOFS);  		if (ret < 0)  			return ret; -	} -	ret = ulist_add(parents, eb->start, (unsigned long)eie, GFP_NOFS); -	if (ret < 0) -		return ret; - -	if (level != 0)  		return 0; +	}  	/* -	 * if the current leaf is full with EXTENT_DATA items, we must -	 * check the next one if that holds a reference as well. -	 * ref->count cannot be used to skip this check. -	 * repeat this until we don't find any additional EXTENT_DATA items. +	 * We normally enter this function with the path already pointing to +	 * the first item to check. But sometimes, we may enter it with +	 * slot==nritems. In that case, go to the next leaf before we continue.  	 */ -	while (1) { -		eie = NULL; -		ret = btrfs_next_leaf(root, path); -		if (ret < 0) -			return ret; -		if (ret) -			return 0; +	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) +		ret = btrfs_next_old_leaf(root, path, time_seq); +	while (!ret) {  		eb = path->nodes[0]; -		for (slot = 0; slot < btrfs_header_nritems(eb); ++slot) { -			btrfs_item_key_to_cpu(eb, key, slot); -			if (key->objectid != wanted_objectid || -			    key->type != BTRFS_EXTENT_DATA_KEY) -				return 0; -			fi = btrfs_item_ptr(eb, slot, -						struct btrfs_file_extent_item); -			disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); -			if (disk_byte == wanted_disk_byte) -				goto add_parent; +		slot = path->slots[0]; + +		btrfs_item_key_to_cpu(eb, &key, slot); + +		if (key.objectid != key_for_search->objectid || +		    key.type != BTRFS_EXTENT_DATA_KEY) +			break; + +		fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); +		disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); + +		if (disk_byte == wanted_disk_byte) { +			eie = NULL; +			if (extent_item_pos) { +				ret = check_extent_in_eb(&key, eb, fi, +						*extent_item_pos, +						&eie); +				if (ret < 0) +					break; +			} +			if (!ret) { +				ret = ulist_add(parents, eb->start, +						(unsigned long)eie, GFP_NOFS); +				if (ret < 0) +					break; +				if (!extent_item_pos) { +					ret = btrfs_next_old_leaf(root, path, +							time_seq); +					continue; +				} +			}  		} +		ret = btrfs_next_old_item(root, path, time_seq);  	} -	return 0; +	if (ret > 0) +		ret = 0; +	return ret;  }  /* @@ -249,7 +263,6 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  	struct btrfs_path *path;  	struct btrfs_root *root;  	struct btrfs_key root_key; -	struct btrfs_key key = {0};  	struct extent_buffer *eb;  	int ret = 0;  	int root_level; @@ -294,19 +307,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  		goto out;  	} -	if (level == 0) { -		if (ret == 1 && path->slots[0] >= btrfs_header_nritems(eb)) { -			ret = btrfs_next_leaf(root, path); -			if (ret) -				goto out; -			eb = path->nodes[0]; -		} - -		btrfs_item_key_to_cpu(eb, &key, path->slots[0]); -	} - -	ret = add_all_parents(root, path, parents, level, &key, -				ref->wanted_disk_byte, extent_item_pos); +	ret = add_all_parents(root, path, parents, level, &ref->key_for_search, +				time_seq, ref->wanted_disk_byte, +				extent_item_pos);  out:  	btrfs_free_path(path);  	return ret; diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index e616f8872e6..12394a90d60 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -37,6 +37,7 @@  #define BTRFS_INODE_IN_DEFRAG			3  #define BTRFS_INODE_DELALLOC_META_RESERVED	4  #define BTRFS_INODE_HAS_ORPHAN_ITEM		5 +#define BTRFS_INODE_HAS_ASYNC_EXTENT		6  /* in memory btrfs inode */  struct btrfs_inode { diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9cebb1fd6a3..da6e9364a5e 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -93,6 +93,7 @@  #include "print-tree.h"  #include "locking.h"  #include "check-integrity.h" +#include "rcu-string.h"  #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000  #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 @@ -843,13 +844,14 @@ static int btrfsic_process_superblock_dev_mirror(  		superblock_tmp->never_written = 0;  		superblock_tmp->mirror_num = 1 + superblock_mirror_num;  		if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) -			printk(KERN_INFO "New initial S-block (bdev %p, %s)" -			       " @%llu (%s/%llu/%d)\n", -			       superblock_bdev, device->name, -			       (unsigned long long)dev_bytenr, -			       dev_state->name, -			       (unsigned long long)dev_bytenr, -			       superblock_mirror_num); +			printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" +				     " @%llu (%s/%llu/%d)\n", +				     superblock_bdev, +				     rcu_str_deref(device->name), +				     (unsigned long long)dev_bytenr, +				     dev_state->name, +				     (unsigned long long)dev_bytenr, +				     superblock_mirror_num);  		list_add(&superblock_tmp->all_blocks_node,  			 &state->all_blocks_list);  		btrfsic_block_hashtable_add(superblock_tmp, diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d7a96cfdc50..15cbc2bf4ff 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -467,6 +467,15 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,  	return 0;  } +/* + * This allocates memory and gets a tree modification sequence number when + * needed. + * + * Returns 0 when no sequence number is needed, < 0 on error. + * Returns 1 when a sequence number was added. In this case, + * fs_info->tree_mod_seq_lock was acquired and must be released by the caller + * after inserting into the rb tree. + */  static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,  				 struct tree_mod_elem **tm_ret)  { @@ -491,11 +500,11 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,  		 */  		kfree(tm);  		seq = 0; +		spin_unlock(&fs_info->tree_mod_seq_lock);  	} else {  		__get_tree_mod_seq(fs_info, &tm->elem);  		seq = tm->elem.seq;  	} -	spin_unlock(&fs_info->tree_mod_seq_lock);  	return seq;  } @@ -521,7 +530,9 @@ tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,  	tm->slot = slot;  	tm->generation = btrfs_node_ptr_generation(eb, slot); -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static noinline int @@ -559,7 +570,9 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,  	tm->move.nr_items = nr_items;  	tm->op = MOD_LOG_MOVE_KEYS; -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static noinline int @@ -580,7 +593,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,  	tm->generation = btrfs_header_generation(old_root);  	tm->op = MOD_LOG_ROOT_REPLACE; -	return __tree_mod_log_insert(fs_info, tm); +	ret = __tree_mod_log_insert(fs_info, tm); +	spin_unlock(&fs_info->tree_mod_seq_lock); +	return ret;  }  static struct tree_mod_elem * @@ -1023,6 +1038,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,  		looped = 1;  	} +	/* if there's no old root to return, return what we found instead */ +	if (!found) +		found = tm; +  	return found;  } @@ -1143,22 +1162,36 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,  	return eb_rewin;  } +/* + * get_old_root() rewinds the state of @root's root node to the given @time_seq + * value. If there are no changes, the current root->root_node is returned. If + * anything changed in between, there's a fresh buffer allocated on which the + * rewind operations are done. In any case, the returned buffer is read locked. + * Returns NULL on error (with no locks held). + */  static inline struct extent_buffer *  get_old_root(struct btrfs_root *root, u64 time_seq)  {  	struct tree_mod_elem *tm;  	struct extent_buffer *eb; -	struct tree_mod_root *old_root; -	u64 old_generation; +	struct tree_mod_root *old_root = NULL; +	u64 old_generation = 0; +	u64 logical; +	eb = btrfs_read_lock_root_node(root);  	tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);  	if (!tm)  		return root->node; -	old_root = &tm->old_root; -	old_generation = tm->generation; +	if (tm->op == MOD_LOG_ROOT_REPLACE) { +		old_root = &tm->old_root; +		old_generation = tm->generation; +		logical = old_root->logical; +	} else { +		logical = root->node->start; +	} -	tm = tree_mod_log_search(root->fs_info, old_root->logical, time_seq); +	tm = tree_mod_log_search(root->fs_info, logical, time_seq);  	/*  	 * there was an item in the log when __tree_mod_log_oldest_root  	 * returned. this one must not go away, because the time_seq passed to @@ -1166,22 +1199,25 @@ get_old_root(struct btrfs_root *root, u64 time_seq)  	 */  	BUG_ON(!tm); -	if (old_root->logical == root->node->start) { -		/* there are logged operations for the current root */ -		eb = btrfs_clone_extent_buffer(root->node); -	} else { -		/* there's a root replace operation for the current root */ +	if (old_root)  		eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,  					       root->nodesize); +	else +		eb = btrfs_clone_extent_buffer(root->node); +	btrfs_tree_read_unlock(root->node); +	free_extent_buffer(root->node); +	if (!eb) +		return NULL; +	btrfs_tree_read_lock(eb); +	if (old_root) {  		btrfs_set_header_bytenr(eb, eb->start);  		btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);  		btrfs_set_header_owner(eb, root->root_key.objectid); +		btrfs_set_header_level(eb, old_root->level); +		btrfs_set_header_generation(eb, old_generation);  	} -	if (!eb) -		return NULL; -	btrfs_set_header_level(eb, old_root->level); -	btrfs_set_header_generation(eb, old_generation);  	__tree_mod_log_rewind(eb, time_seq, tm); +	extent_buffer_get(eb);  	return eb;  } @@ -1650,8 +1686,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)  		return 0; -	btrfs_header_nritems(mid); -  	left = read_node_slot(root, parent, pslot - 1);  	if (left) {  		btrfs_tree_lock(left); @@ -1681,7 +1715,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  		wret = push_node_left(trans, root, left, mid, 1);  		if (wret < 0)  			ret = wret; -		btrfs_header_nritems(mid);  	}  	/* @@ -2615,9 +2648,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,  again:  	b = get_old_root(root, time_seq); -	extent_buffer_get(b);  	level = btrfs_header_level(b); -	btrfs_tree_read_lock(b);  	p->locks[level] = BTRFS_READ_LOCK;  	while (b) { @@ -5001,6 +5032,12 @@ next:   */  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)  { +	return btrfs_next_old_leaf(root, path, 0); +} + +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, +			u64 time_seq) +{  	int slot;  	int level;  	struct extent_buffer *c; @@ -5025,7 +5062,10 @@ again:  	path->keep_locks = 1;  	path->leave_spinning = 1; -	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); +	if (time_seq) +		ret = btrfs_search_old_slot(root, &key, path, time_seq); +	else +		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  	path->keep_locks = 0;  	if (ret < 0) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0236d03c673..fa5c45b3907 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2753,13 +2753,20 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,  }  int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); -static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) +int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, +			u64 time_seq); +static inline int btrfs_next_old_item(struct btrfs_root *root, +				      struct btrfs_path *p, u64 time_seq)  {  	++p->slots[0];  	if (p->slots[0] >= btrfs_header_nritems(p->nodes[0])) -		return btrfs_next_leaf(root, p); +		return btrfs_next_old_leaf(root, p, time_seq);  	return 0;  } +static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p) +{ +	return btrfs_next_old_item(root, p, 0); +}  int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);  int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);  int __must_check btrfs_drop_snapshot(struct btrfs_root *root, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index c18d0442ae6..2399f408691 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1879,3 +1879,21 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)  		}  	}  } + +void btrfs_destroy_delayed_inodes(struct btrfs_root *root) +{ +	struct btrfs_delayed_root *delayed_root; +	struct btrfs_delayed_node *curr_node, *prev_node; + +	delayed_root = btrfs_get_delayed_root(root); + +	curr_node = btrfs_first_delayed_node(delayed_root); +	while (curr_node) { +		__btrfs_kill_delayed_node(curr_node); + +		prev_node = curr_node; +		curr_node = btrfs_next_delayed_node(curr_node); +		btrfs_release_delayed_node(prev_node); +	} +} + diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 7083d08b2a2..f5aa4023d3e 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -124,6 +124,9 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev);  /* Used for drop dead root */  void btrfs_kill_all_delayed_nodes(struct btrfs_root *root); +/* Used for clean the transaction */ +void btrfs_destroy_delayed_inodes(struct btrfs_root *root); +  /* Used for readdir() */  void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,  			     struct list_head *del_list); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ae51decf6d..7b845ff4af9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@  #include "free-space-cache.h"  #include "inode-map.h"  #include "check-integrity.h" +#include "rcu-string.h"  static struct extent_io_ops btree_extent_io_ops;  static void end_workqueue_fn(struct btrfs_work *work); @@ -2118,7 +2119,7 @@ int open_ctree(struct super_block *sb,  	features = btrfs_super_incompat_flags(disk_super);  	features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; -	if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) +	if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)  		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;  	/* @@ -2575,8 +2576,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)  		struct btrfs_device *device = (struct btrfs_device *)  			bh->b_private; -		printk_ratelimited(KERN_WARNING "lost page write due to " -				   "I/O error on %s\n", device->name); +		printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to " +					  "I/O error on %s\n", +					  rcu_str_deref(device->name));  		/* note, we dont' set_buffer_write_io_error because we have  		 * our own ways of dealing with the IO errors  		 */ @@ -2749,8 +2751,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait)  		wait_for_completion(&device->flush_wait);  		if (bio_flagged(bio, BIO_EOPNOTSUPP)) { -			printk("btrfs: disabling barriers on dev %s\n", -			       device->name); +			printk_in_rcu("btrfs: disabling barriers on dev %s\n", +				      rcu_str_deref(device->name));  			device->nobarriers = 1;  		}  		if (!bio_flagged(bio, BIO_UPTODATE)) { @@ -3400,7 +3402,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,  	delayed_refs = &trans->delayed_refs; -again:  	spin_lock(&delayed_refs->lock);  	if (delayed_refs->num_entries == 0) {  		spin_unlock(&delayed_refs->lock); @@ -3408,31 +3409,37 @@ again:  		return ret;  	} -	node = rb_first(&delayed_refs->root); -	while (node) { +	while ((node = rb_first(&delayed_refs->root)) != NULL) {  		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); -		node = rb_next(node); - -		ref->in_tree = 0; -		rb_erase(&ref->rb_node, &delayed_refs->root); -		delayed_refs->num_entries--;  		atomic_set(&ref->refs, 1);  		if (btrfs_delayed_ref_is_head(ref)) {  			struct btrfs_delayed_ref_head *head;  			head = btrfs_delayed_node_to_head(ref); -			spin_unlock(&delayed_refs->lock); -			mutex_lock(&head->mutex); +			if (!mutex_trylock(&head->mutex)) { +				atomic_inc(&ref->refs); +				spin_unlock(&delayed_refs->lock); + +				/* Need to wait for the delayed ref to run */ +				mutex_lock(&head->mutex); +				mutex_unlock(&head->mutex); +				btrfs_put_delayed_ref(ref); + +				spin_lock(&delayed_refs->lock); +				continue; +			} +  			kfree(head->extent_op);  			delayed_refs->num_heads--;  			if (list_empty(&head->cluster))  				delayed_refs->num_heads_ready--;  			list_del_init(&head->cluster); -			mutex_unlock(&head->mutex); -			btrfs_put_delayed_ref(ref); -			goto again;  		} +		ref->in_tree = 0; +		rb_erase(&ref->rb_node, &delayed_refs->root); +		delayed_refs->num_entries--; +  		spin_unlock(&delayed_refs->lock);  		btrfs_put_delayed_ref(ref); @@ -3520,11 +3527,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,  			     &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,  					       offset >> PAGE_CACHE_SHIFT);  			spin_unlock(&dirty_pages->buffer_lock); -			if (eb) { +			if (eb)  				ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,  							 &eb->bflags); -				atomic_set(&eb->refs, 1); -			}  			if (PageWriteback(page))  				end_page_writeback(page); @@ -3538,8 +3543,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,  				spin_unlock_irq(&page->mapping->tree_lock);  			} -			page->mapping->a_ops->invalidatepage(page, 0);  			unlock_page(page); +			page_cache_release(page);  		}  	} @@ -3553,8 +3558,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,  	u64 start;  	u64 end;  	int ret; +	bool loop = true;  	unpin = pinned_extents; +again:  	while (1) {  		ret = find_first_extent_bit(unpin, 0, &start, &end,  					    EXTENT_DIRTY); @@ -3572,6 +3579,15 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,  		cond_resched();  	} +	if (loop) { +		if (unpin == &root->fs_info->freed_extents[0]) +			unpin = &root->fs_info->freed_extents[1]; +		else +			unpin = &root->fs_info->freed_extents[0]; +		loop = false; +		goto again; +	} +  	return 0;  } @@ -3585,21 +3601,23 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,  	/* FIXME: cleanup wait for commit */  	cur_trans->in_commit = 1;  	cur_trans->blocked = 1; -	if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) -		wake_up(&root->fs_info->transaction_blocked_wait); +	wake_up(&root->fs_info->transaction_blocked_wait);  	cur_trans->blocked = 0; -	if (waitqueue_active(&root->fs_info->transaction_wait)) -		wake_up(&root->fs_info->transaction_wait); +	wake_up(&root->fs_info->transaction_wait);  	cur_trans->commit_done = 1; -	if (waitqueue_active(&cur_trans->commit_wait)) -		wake_up(&cur_trans->commit_wait); +	wake_up(&cur_trans->commit_wait); + +	btrfs_destroy_delayed_inodes(root); +	btrfs_assert_delayed_root_empty(root);  	btrfs_destroy_pending_snapshots(cur_trans);  	btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,  				     EXTENT_DIRTY); +	btrfs_destroy_pinned_extent(root, +				    root->fs_info->pinned_extents);  	/*  	memset(cur_trans, 0, sizeof(*cur_trans)); @@ -3648,6 +3666,9 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)  		if (waitqueue_active(&t->commit_wait))  			wake_up(&t->commit_wait); +		btrfs_destroy_delayed_inodes(root); +		btrfs_assert_delayed_root_empty(root); +  		btrfs_destroy_pending_snapshots(t);  		btrfs_destroy_delalloc_inodes(root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2c8f7b20461..aaa12c1eb34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,6 +20,7 @@  #include "volumes.h"  #include "check-integrity.h"  #include "locking.h" +#include "rcu-string.h"  static struct kmem_cache *extent_state_cache;  static struct kmem_cache *extent_buffer_cache; @@ -1917,9 +1918,9 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,  		return -EIO;  	} -	printk(KERN_INFO "btrfs read error corrected: ino %lu off %llu (dev %s " -			"sector %llu)\n", page->mapping->host->i_ino, start, -			dev->name, sector); +	printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu " +		      "(dev %s sector %llu)\n", page->mapping->host->i_ino, +		      start, rcu_str_deref(dev->name), sector);  	bio_put(bio);  	return 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6ab6f5e635..d8bb0dbc494 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -830,7 +830,7 @@ static noinline int cow_file_range(struct inode *inode,  	if (IS_ERR(trans)) {  		extent_clear_unlock_delalloc(inode,  			     &BTRFS_I(inode)->io_tree, -			     start, end, NULL, +			     start, end, locked_page,  			     EXTENT_CLEAR_UNLOCK_PAGE |  			     EXTENT_CLEAR_UNLOCK |  			     EXTENT_CLEAR_DELALLOC | @@ -963,7 +963,7 @@ out:  out_unlock:  	extent_clear_unlock_delalloc(inode,  		     &BTRFS_I(inode)->io_tree, -		     start, end, NULL, +		     start, end, locked_page,  		     EXTENT_CLEAR_UNLOCK_PAGE |  		     EXTENT_CLEAR_UNLOCK |  		     EXTENT_CLEAR_DELALLOC | @@ -986,8 +986,10 @@ static noinline void async_cow_start(struct btrfs_work *work)  	compress_file_range(async_cow->inode, async_cow->locked_page,  			    async_cow->start, async_cow->end, async_cow,  			    &num_added); -	if (num_added == 0) +	if (num_added == 0) { +		btrfs_add_delayed_iput(async_cow->inode);  		async_cow->inode = NULL; +	}  }  /* @@ -1020,6 +1022,8 @@ static noinline void async_cow_free(struct btrfs_work *work)  {  	struct async_cow *async_cow;  	async_cow = container_of(work, struct async_cow, work); +	if (async_cow->inode) +		btrfs_add_delayed_iput(async_cow->inode);  	kfree(async_cow);  } @@ -1038,7 +1042,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,  	while (start < end) {  		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);  		BUG_ON(!async_cow); /* -ENOMEM */ -		async_cow->inode = inode; +		async_cow->inode = igrab(inode);  		async_cow->root = root;  		async_cow->locked_page = locked_page;  		async_cow->start = start; @@ -1136,8 +1140,18 @@ static noinline int run_delalloc_nocow(struct inode *inode,  	u64 ino = btrfs_ino(inode);  	path = btrfs_alloc_path(); -	if (!path) +	if (!path) { +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     start, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK);  		return -ENOMEM; +	}  	nolock = btrfs_is_free_space_inode(root, inode); @@ -1147,6 +1161,15 @@ static noinline int run_delalloc_nocow(struct inode *inode,  		trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) { +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     start, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK);  		btrfs_free_path(path);  		return PTR_ERR(trans);  	} @@ -1327,8 +1350,11 @@ out_check:  	}  	btrfs_release_path(path); -	if (cur_offset <= end && cow_start == (u64)-1) +	if (cur_offset <= end && cow_start == (u64)-1) {  		cow_start = cur_offset; +		cur_offset = end; +	} +  	if (cow_start != (u64)-1) {  		ret = cow_file_range(inode, locked_page, cow_start, end,  				     page_started, nr_written, 1); @@ -1347,6 +1373,17 @@ error:  	if (!ret)  		ret = err; +	if (ret && cur_offset < end) +		extent_clear_unlock_delalloc(inode, +			     &BTRFS_I(inode)->io_tree, +			     cur_offset, end, locked_page, +			     EXTENT_CLEAR_UNLOCK_PAGE | +			     EXTENT_CLEAR_UNLOCK | +			     EXTENT_CLEAR_DELALLOC | +			     EXTENT_CLEAR_DIRTY | +			     EXTENT_SET_WRITEBACK | +			     EXTENT_END_WRITEBACK); +  	btrfs_free_path(path);  	return ret;  } @@ -1361,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,  	int ret;  	struct btrfs_root *root = BTRFS_I(inode)->root; -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) +	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {  		ret = run_delalloc_nocow(inode, locked_page, start, end,  					 page_started, 1, nr_written); -	else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) +	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {  		ret = run_delalloc_nocow(inode, locked_page, start, end,  					 page_started, 0, nr_written); -	else if (!btrfs_test_opt(root, COMPRESS) && -		 !(BTRFS_I(inode)->force_compress) && -		 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) +	} else if (!btrfs_test_opt(root, COMPRESS) && +		   !(BTRFS_I(inode)->force_compress) && +		   !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {  		ret = cow_file_range(inode, locked_page, start, end,  				      page_started, nr_written, 1); -	else +	} else { +		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +			&BTRFS_I(inode)->runtime_flags);  		ret = cow_file_range_async(inode, locked_page, start, end,  					   page_started, nr_written); +	}  	return ret;  } @@ -7054,10 +7094,13 @@ static void fixup_inode_flags(struct inode *dir, struct inode *inode)  	else  		b_inode->flags &= ~BTRFS_INODE_NODATACOW; -	if (b_dir->flags & BTRFS_INODE_COMPRESS) +	if (b_dir->flags & BTRFS_INODE_COMPRESS) {  		b_inode->flags |= BTRFS_INODE_COMPRESS; -	else -		b_inode->flags &= ~BTRFS_INODE_COMPRESS; +		b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS; +	} else { +		b_inode->flags &= ~(BTRFS_INODE_COMPRESS | +				    BTRFS_INODE_NOCOMPRESS); +	}  }  static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 24b776c08d9..0e92e576300 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -52,6 +52,7 @@  #include "locking.h"  #include "inode-map.h"  #include "backref.h" +#include "rcu-string.h"  /* Mask out flags that are inappropriate for the given type of inode. */  static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -785,39 +786,57 @@ none:  	return -ENOENT;  } -/* - * Validaty check of prev em and next em: - * 1) no prev/next em - * 2) prev/next em is an hole/inline extent - */ -static int check_adjacent_extents(struct inode *inode, struct extent_map *em) +static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)  {  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; -	struct extent_map *prev = NULL, *next = NULL; -	int ret = 0; +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; +	struct extent_map *em; +	u64 len = PAGE_CACHE_SIZE; +	/* +	 * hopefully we have this extent in the tree already, try without +	 * the full extent lock +	 */  	read_lock(&em_tree->lock); -	prev = lookup_extent_mapping(em_tree, em->start - 1, (u64)-1); -	next = lookup_extent_mapping(em_tree, em->start + em->len, (u64)-1); +	em = lookup_extent_mapping(em_tree, start, len);  	read_unlock(&em_tree->lock); -	if ((!prev || prev->block_start >= EXTENT_MAP_LAST_BYTE) && -	    (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)) -		ret = 1; -	free_extent_map(prev); -	free_extent_map(next); +	if (!em) { +		/* get the big lock and read metadata off disk */ +		lock_extent(io_tree, start, start + len - 1); +		em = btrfs_get_extent(inode, NULL, 0, start, len, 0); +		unlock_extent(io_tree, start, start + len - 1); + +		if (IS_ERR(em)) +			return NULL; +	} + +	return em; +} + +static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) +{ +	struct extent_map *next; +	bool ret = true; +	/* this is the last extent */ +	if (em->start + em->len >= i_size_read(inode)) +		return false; + +	next = defrag_lookup_extent(inode, em->start + em->len); +	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) +		ret = false; + +	free_extent_map(next);  	return ret;  } -static int should_defrag_range(struct inode *inode, u64 start, u64 len, -			       int thresh, u64 *last_len, u64 *skip, -			       u64 *defrag_end) +static int should_defrag_range(struct inode *inode, u64 start, int thresh, +			       u64 *last_len, u64 *skip, u64 *defrag_end)  { -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; -	struct extent_map *em = NULL; -	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; +	struct extent_map *em;  	int ret = 1; +	bool next_mergeable = true;  	/*  	 * make sure that once we start defragging an extent, we keep on @@ -828,23 +847,9 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,  	*skip = 0; -	/* -	 * hopefully we have this extent in the tree already, try without -	 * the full extent lock -	 */ -	read_lock(&em_tree->lock); -	em = lookup_extent_mapping(em_tree, start, len); -	read_unlock(&em_tree->lock); - -	if (!em) { -		/* get the big lock and read metadata off disk */ -		lock_extent(io_tree, start, start + len - 1); -		em = btrfs_get_extent(inode, NULL, 0, start, len, 0); -		unlock_extent(io_tree, start, start + len - 1); - -		if (IS_ERR(em)) -			return 0; -	} +	em = defrag_lookup_extent(inode, start); +	if (!em) +		return 0;  	/* this will cover holes, and inline extents */  	if (em->block_start >= EXTENT_MAP_LAST_BYTE) { @@ -852,18 +857,15 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,  		goto out;  	} -	/* If we have nothing to merge with us, just skip. */ -	if (check_adjacent_extents(inode, em)) { -		ret = 0; -		goto out; -	} +	next_mergeable = defrag_check_next_extent(inode, em);  	/* -	 * we hit a real extent, if it is big don't bother defragging it again +	 * we hit a real extent, if it is big or the next extent is not a +	 * real extent, don't bother defragging it  	 */ -	if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) +	if ((*last_len == 0 || *last_len >= thresh) && +	    (em->len >= thresh || !next_mergeable))  		ret = 0; -  out:  	/*  	 * last_len ends up being a counter of how many bytes we've defragged. @@ -1142,8 +1144,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  			break;  		if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, -					 PAGE_CACHE_SIZE, extent_thresh, -					 &last_len, &skip, &defrag_end)) { +					 extent_thresh, &last_len, &skip, +					 &defrag_end)) {  			unsigned long next;  			/*  			 * the should_defrag function tells us how much to skip @@ -1304,6 +1306,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  		ret = -EINVAL;  		goto out_free;  	} +	if (device->fs_devices && device->fs_devices->seeding) { +		printk(KERN_INFO "btrfs: resizer unable to apply on " +		       "seeding device %llu\n", +		       (unsigned long long)devid); +		ret = -EINVAL; +		goto out_free; +	} +  	if (!strcmp(sizestr, "max"))  		new_size = device->bdev->bd_inode->i_size;  	else { @@ -1345,8 +1355,9 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,  	do_div(new_size, root->sectorsize);  	new_size *= root->sectorsize; -	printk(KERN_INFO "btrfs: new size for %s is %llu\n", -		device->name, (unsigned long long)new_size); +	printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", +		      rcu_str_deref(device->name), +		      (unsigned long long)new_size);  	if (new_size > old_size) {  		trans = btrfs_start_transaction(root, 0); @@ -2264,7 +2275,12 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)  	di_args->total_bytes = dev->total_bytes;  	memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));  	if (dev->name) { -		strncpy(di_args->path, dev->name, sizeof(di_args->path)); +		struct rcu_string *name; + +		rcu_read_lock(); +		name = rcu_dereference(dev->name); +		strncpy(di_args->path, name->str, sizeof(di_args->path)); +		rcu_read_unlock();  		di_args->path[sizeof(di_args->path) - 1] = 0;  	} else {  		di_args->path[0] = '\0'; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 9e138cdc36c..643335a4fe3 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)  	/* start IO across the range first to instantiate any delalloc  	 * extents  	 */ -	filemap_write_and_wait_range(inode->i_mapping, start, orig_end); +	filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + +	/* +	 * So with compression we will find and lock a dirty page and clear the +	 * first one as dirty, setup an async extent, and immediately return +	 * with the entire range locked but with nobody actually marked with +	 * writeback.  So we can't just filemap_write_and_wait_range() and +	 * expect it to work since it will just kick off a thread to do the +	 * actual work.  So we need to call filemap_fdatawrite_range _again_ +	 * since it will wait on the page lock, which won't be unlocked until +	 * after the pages have been marked as writeback and so we're good to go +	 * from there.  We have to do this otherwise we'll miss the ordered +	 * extents and that results in badness.  Please Josef, do not think you +	 * know better and pull this out at some point in the future, it is +	 * right and you are wrong. +	 */ +	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +		     &BTRFS_I(inode)->runtime_flags)) +		filemap_fdatawrite_range(inode->i_mapping, start, orig_end); + +	filemap_fdatawait_range(inode->i_mapping, start, orig_end);  	end = orig_end;  	found = 0; diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h new file mode 100644 index 00000000000..9e111e4576d --- /dev/null +++ b/fs/btrfs/rcu-string.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2012 Red Hat.  All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +struct rcu_string { +	struct rcu_head rcu; +	char str[0]; +}; + +static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) +{ +	size_t len = strlen(src) + 1; +	struct rcu_string *ret = kzalloc(sizeof(struct rcu_string) + +					 (len * sizeof(char)), mask); +	if (!ret) +		return ret; +	strncpy(ret->str, src, len); +	return ret; +} + +static inline void rcu_string_free(struct rcu_string *str) +{ +	if (str) +		kfree_rcu(str, rcu); +} + +#define printk_in_rcu(fmt, ...) do {	\ +	rcu_read_lock();		\ +	printk(fmt, __VA_ARGS__);	\ +	rcu_read_unlock();		\ +} while (0) + +#define printk_ratelimited_in_rcu(fmt, ...) do {	\ +	rcu_read_lock();				\ +	printk_ratelimited(fmt, __VA_ARGS__);		\ +	rcu_read_unlock();				\ +} while (0) + +#define rcu_str_deref(rcu_str) ({				\ +	struct rcu_string *__str = rcu_dereference(rcu_str);	\ +	__str->str;						\ +}) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index a38cfa4f251..b223620cd5a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -26,6 +26,7 @@  #include "backref.h"  #include "extent_io.h"  #include "check-integrity.h" +#include "rcu-string.h"  /*   * This is only the first step towards a full-features scrub. It reads all @@ -320,10 +321,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  	 * hold all of the paths here  	 */  	for (i = 0; i < ipath->fspath->elem_cnt; ++i) -		printk(KERN_WARNING "btrfs: %s at logical %llu on dev " +		printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "  			"%s, sector %llu, root %llu, inode %llu, offset %llu, "  			"length %llu, links %u (path: %s)\n", swarn->errstr, -			swarn->logical, swarn->dev->name, +			swarn->logical, rcu_str_deref(swarn->dev->name),  			(unsigned long long)swarn->sector, root, inum, offset,  			min(isize - offset, (u64)PAGE_SIZE), nlink,  			(char *)(unsigned long)ipath->fspath->val[i]); @@ -332,10 +333,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)  	return 0;  err: -	printk(KERN_WARNING "btrfs: %s at logical %llu on dev " +	printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "  		"%s, sector %llu, root %llu, inode %llu, offset %llu: path "  		"resolving failed with ret=%d\n", swarn->errstr, -		swarn->logical, swarn->dev->name, +		swarn->logical, rcu_str_deref(swarn->dev->name),  		(unsigned long long)swarn->sector, root, inum, offset, ret);  	free_ipath(ipath); @@ -390,10 +391,11 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)  		do {  			ret = tree_backref_for_extent(&ptr, eb, ei, item_size,  							&ref_root, &ref_level); -			printk(KERN_WARNING +			printk_in_rcu(KERN_WARNING  				"btrfs: %s at logical %llu on dev %s, "  				"sector %llu: metadata %s (level %d) in tree " -				"%llu\n", errstr, swarn.logical, dev->name, +				"%llu\n", errstr, swarn.logical, +				rcu_str_deref(dev->name),  				(unsigned long long)swarn.sector,  				ref_level ? "node" : "leaf",  				ret < 0 ? -1 : ref_level, @@ -580,9 +582,11 @@ out:  		spin_lock(&sdev->stat_lock);  		++sdev->stat.uncorrectable_errors;  		spin_unlock(&sdev->stat_lock); -		printk_ratelimited(KERN_ERR + +		printk_ratelimited_in_rcu(KERN_ERR  			"btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", -			(unsigned long long)fixup->logical, sdev->dev->name); +			(unsigned long long)fixup->logical, +			rcu_str_deref(sdev->dev->name));  	}  	btrfs_free_path(path); @@ -936,18 +940,20 @@ corrected_error:  			spin_lock(&sdev->stat_lock);  			sdev->stat.corrected_errors++;  			spin_unlock(&sdev->stat_lock); -			printk_ratelimited(KERN_ERR +			printk_ratelimited_in_rcu(KERN_ERR  				"btrfs: fixed up error at logical %llu on dev %s\n", -				(unsigned long long)logical, sdev->dev->name); +				(unsigned long long)logical, +				rcu_str_deref(sdev->dev->name));  		}  	} else {  did_not_correct_error:  		spin_lock(&sdev->stat_lock);  		sdev->stat.uncorrectable_errors++;  		spin_unlock(&sdev->stat_lock); -		printk_ratelimited(KERN_ERR +		printk_ratelimited_in_rcu(KERN_ERR  			"btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", -			(unsigned long long)logical, sdev->dev->name); +			(unsigned long long)logical, +			rcu_str_deref(sdev->dev->name));  	}  out: diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 96eb9fef7bd..0eb9a4da069 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -54,6 +54,7 @@  #include "version.h"  #include "export.h"  #include "compression.h" +#include "rcu-string.h"  #define CREATE_TRACE_POINTS  #include <trace/events/btrfs.h> @@ -1482,12 +1483,44 @@ static void btrfs_fs_dirty_inode(struct inode *inode, int flags)  				   "error %d\n", btrfs_ino(inode), ret);  } +static int btrfs_show_devname(struct seq_file *m, struct dentry *root) +{ +	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); +	struct btrfs_fs_devices *cur_devices; +	struct btrfs_device *dev, *first_dev = NULL; +	struct list_head *head; +	struct rcu_string *name; + +	mutex_lock(&fs_info->fs_devices->device_list_mutex); +	cur_devices = fs_info->fs_devices; +	while (cur_devices) { +		head = &cur_devices->devices; +		list_for_each_entry(dev, head, dev_list) { +			if (!first_dev || dev->devid < first_dev->devid) +				first_dev = dev; +		} +		cur_devices = cur_devices->seed; +	} + +	if (first_dev) { +		rcu_read_lock(); +		name = rcu_dereference(first_dev->name); +		seq_escape(m, name->str, " \t\n\\"); +		rcu_read_unlock(); +	} else { +		WARN_ON(1); +	} +	mutex_unlock(&fs_info->fs_devices->device_list_mutex); +	return 0; +} +  static const struct super_operations btrfs_super_ops = {  	.drop_inode	= btrfs_drop_inode,  	.evict_inode	= btrfs_evict_inode,  	.put_super	= btrfs_put_super,  	.sync_fs	= btrfs_sync_fs,  	.show_options	= btrfs_show_options, +	.show_devname	= btrfs_show_devname,  	.write_inode	= btrfs_write_inode,  	.dirty_inode	= btrfs_fs_dirty_inode,  	.alloc_inode	= btrfs_alloc_inode, diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 1791c6e3d83..b72b068183e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -100,6 +100,10 @@ loop:  		kmem_cache_free(btrfs_transaction_cachep, cur_trans);  		cur_trans = fs_info->running_transaction;  		goto loop; +	} else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { +		spin_unlock(&root->fs_info->trans_lock); +		kmem_cache_free(btrfs_transaction_cachep, cur_trans); +		return -EROFS;  	}  	atomic_set(&cur_trans->num_writers, 1); @@ -1213,14 +1217,20 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,  static void cleanup_transaction(struct btrfs_trans_handle *trans, -				struct btrfs_root *root) +				struct btrfs_root *root, int err)  {  	struct btrfs_transaction *cur_trans = trans->transaction;  	WARN_ON(trans->use_count > 1); +	btrfs_abort_transaction(trans, root, err); +  	spin_lock(&root->fs_info->trans_lock);  	list_del_init(&cur_trans->list); +	if (cur_trans == root->fs_info->running_transaction) { +		root->fs_info->running_transaction = NULL; +		root->fs_info->trans_no_join = 0; +	}  	spin_unlock(&root->fs_info->trans_lock);  	btrfs_cleanup_one_transaction(trans->transaction, root); @@ -1526,7 +1536,7 @@ cleanup_transaction:  //	WARN_ON(1);  	if (current->journal_info == trans)  		current->journal_info = NULL; -	cleanup_transaction(trans, root); +	cleanup_transaction(trans, root, ret);  	return ret;  } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 7782020996f..8a3d2594b80 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -35,6 +35,7 @@  #include "volumes.h"  #include "async-thread.h"  #include "check-integrity.h" +#include "rcu-string.h"  static int init_first_rw_device(struct btrfs_trans_handle *trans,  				struct btrfs_root *root, @@ -64,7 +65,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)  		device = list_entry(fs_devices->devices.next,  				    struct btrfs_device, dev_list);  		list_del(&device->dev_list); -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  	}  	kfree(fs_devices); @@ -334,8 +335,8 @@ static noinline int device_list_add(const char *path,  {  	struct btrfs_device *device;  	struct btrfs_fs_devices *fs_devices; +	struct rcu_string *name;  	u64 found_transid = btrfs_super_generation(disk_super); -	char *name;  	fs_devices = find_fsid(disk_super->fsid);  	if (!fs_devices) { @@ -369,11 +370,13 @@ static noinline int device_list_add(const char *path,  		memcpy(device->uuid, disk_super->dev_item.uuid,  		       BTRFS_UUID_SIZE);  		spin_lock_init(&device->io_lock); -		device->name = kstrdup(path, GFP_NOFS); -		if (!device->name) { + +		name = rcu_string_strdup(path, GFP_NOFS); +		if (!name) {  			kfree(device);  			return -ENOMEM;  		} +		rcu_assign_pointer(device->name, name);  		INIT_LIST_HEAD(&device->dev_alloc_list);  		/* init readahead state */ @@ -390,12 +393,12 @@ static noinline int device_list_add(const char *path,  		device->fs_devices = fs_devices;  		fs_devices->num_devices++; -	} else if (!device->name || strcmp(device->name, path)) { -		name = kstrdup(path, GFP_NOFS); +	} else if (!device->name || strcmp(device->name->str, path)) { +		name = rcu_string_strdup(path, GFP_NOFS);  		if (!name)  			return -ENOMEM; -		kfree(device->name); -		device->name = name; +		rcu_string_free(device->name); +		rcu_assign_pointer(device->name, name);  		if (device->missing) {  			fs_devices->missing_devices--;  			device->missing = 0; @@ -430,15 +433,22 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)  	/* We have held the volume lock, it is safe to get the devices. */  	list_for_each_entry(orig_dev, &orig->devices, dev_list) { +		struct rcu_string *name; +  		device = kzalloc(sizeof(*device), GFP_NOFS);  		if (!device)  			goto error; -		device->name = kstrdup(orig_dev->name, GFP_NOFS); -		if (!device->name) { +		/* +		 * This is ok to do without rcu read locked because we hold the +		 * uuid mutex so nothing we touch in here is going to disappear. +		 */ +		name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); +		if (!name) {  			kfree(device);  			goto error;  		} +		rcu_assign_pointer(device->name, name);  		device->devid = orig_dev->devid;  		device->work.func = pending_bios_fn; @@ -491,7 +501,7 @@ again:  		}  		list_del_init(&device->dev_list);  		fs_devices->num_devices--; -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  	} @@ -516,7 +526,7 @@ static void __free_device(struct work_struct *work)  	if (device->bdev)  		blkdev_put(device->bdev, device->mode); -	kfree(device->name); +	rcu_string_free(device->name);  	kfree(device);  } @@ -540,6 +550,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  	mutex_lock(&fs_devices->device_list_mutex);  	list_for_each_entry(device, &fs_devices->devices, dev_list) {  		struct btrfs_device *new_device; +		struct rcu_string *name;  		if (device->bdev)  			fs_devices->open_devices--; @@ -555,8 +566,11 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)  		new_device = kmalloc(sizeof(*new_device), GFP_NOFS);  		BUG_ON(!new_device); /* -ENOMEM */  		memcpy(new_device, device, sizeof(*new_device)); -		new_device->name = kstrdup(device->name, GFP_NOFS); -		BUG_ON(device->name && !new_device->name); /* -ENOMEM */ + +		/* Safe because we are under uuid_mutex */ +		name = rcu_string_strdup(device->name->str, GFP_NOFS); +		BUG_ON(device->name && !name); /* -ENOMEM */ +		rcu_assign_pointer(new_device->name, name);  		new_device->bdev = NULL;  		new_device->writeable = 0;  		new_device->in_fs_metadata = 0; @@ -621,9 +635,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,  		if (!device->name)  			continue; -		bdev = blkdev_get_by_path(device->name, flags, holder); +		bdev = blkdev_get_by_path(device->name->str, flags, holder);  		if (IS_ERR(bdev)) { -			printk(KERN_INFO "open %s failed\n", device->name); +			printk(KERN_INFO "open %s failed\n", device->name->str);  			goto error;  		}  		filemap_write_and_wait(bdev->bd_inode->i_mapping); @@ -1632,6 +1646,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)  	struct block_device *bdev;  	struct list_head *devices;  	struct super_block *sb = root->fs_info->sb; +	struct rcu_string *name;  	u64 total_bytes;  	int seeding_dev = 0;  	int ret = 0; @@ -1671,23 +1686,24 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)  		goto error;  	} -	device->name = kstrdup(device_path, GFP_NOFS); -	if (!device->name) { +	name = rcu_string_strdup(device_path, GFP_NOFS); +	if (!name) {  		kfree(device);  		ret = -ENOMEM;  		goto error;  	} +	rcu_assign_pointer(device->name, name);  	ret = find_next_devid(root, &device->devid);  	if (ret) { -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  		goto error;  	}  	trans = btrfs_start_transaction(root, 0);  	if (IS_ERR(trans)) { -		kfree(device->name); +		rcu_string_free(device->name);  		kfree(device);  		ret = PTR_ERR(trans);  		goto error; @@ -1796,7 +1812,7 @@ error_trans:  	unlock_chunks(root);  	btrfs_abort_transaction(trans, root, ret);  	btrfs_end_transaction(trans, root); -	kfree(device->name); +	rcu_string_free(device->name);  	kfree(device);  error:  	blkdev_put(bdev, FMODE_EXCL); @@ -4204,10 +4220,17 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,  		bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;  		dev = bbio->stripes[dev_nr].dev;  		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { +#ifdef DEBUG +			struct rcu_string *name; + +			rcu_read_lock(); +			name = rcu_dereference(dev->name);  			pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "  				 "(%s id %llu), size=%u\n", rw,  				 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, -				 dev->name, dev->devid, bio->bi_size); +				 name->str, dev->devid, bio->bi_size); +			rcu_read_unlock(); +#endif  			bio->bi_bdev = dev->bdev;  			if (async_submit)  				schedule_bio(root, dev, rw, bio); @@ -4694,8 +4717,9 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)  		key.offset = device->devid;  		ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);  		if (ret) { -			printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", -			       device->name, (unsigned long long)device->devid); +			printk_in_rcu(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", +				      rcu_str_deref(device->name), +				      (unsigned long long)device->devid);  			__btrfs_reset_dev_stats(device);  			device->dev_stats_valid = 1;  			btrfs_release_path(path); @@ -4747,8 +4771,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  	BUG_ON(!path);  	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);  	if (ret < 0) { -		printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", -		       ret, device->name); +		printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", +			      ret, rcu_str_deref(device->name));  		goto out;  	} @@ -4757,8 +4781,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  		/* need to delete old one and insert a new one */  		ret = btrfs_del_item(trans, dev_root, path);  		if (ret != 0) { -			printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", -			       device->name, ret); +			printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", +				      rcu_str_deref(device->name), ret);  			goto out;  		}  		ret = 1; @@ -4770,8 +4794,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,  		ret = btrfs_insert_empty_item(trans, dev_root, path,  					      &key, sizeof(*ptr));  		if (ret < 0) { -			printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", -			       device->name, ret); +			printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", +				      rcu_str_deref(device->name), ret);  			goto out;  		}  	} @@ -4823,9 +4847,9 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)  {  	if (!dev->dev_stats_valid)  		return; -	printk_ratelimited(KERN_ERR +	printk_ratelimited_in_rcu(KERN_ERR  			   "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", -			   dev->name, +			   rcu_str_deref(dev->name),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),  			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), @@ -4837,8 +4861,8 @@ void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)  static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)  { -	printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", -	       dev->name, +	printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", +	       rcu_str_deref(dev->name),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),  	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 3406a88ca83..74366f27a76 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -58,7 +58,7 @@ struct btrfs_device {  	/* the mode sent to blkdev_get */  	fmode_t mode; -	char *name; +	struct rcu_string *name;  	/* the internal btrfs device id */  	u64 devid; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 173b1d22e59..8b67304e4b8 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -54,7 +54,12 @@  	(CONGESTION_ON_THRESH(congestion_kb) -				\  	 (CONGESTION_ON_THRESH(congestion_kb) >> 2)) - +static inline struct ceph_snap_context *page_snap_context(struct page *page) +{ +	if (PagePrivate(page)) +		return (void *)page->private; +	return NULL; +}  /*   * Dirty a page.  Optimistically adjust accounting, on the assumption @@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)  {  	struct inode *inode;  	struct ceph_inode_info *ci; -	struct ceph_snap_context *snapc = (void *)page->private; +	struct ceph_snap_context *snapc = page_snap_context(page);  	BUG_ON(!PageLocked(page)); -	BUG_ON(!page->private);  	BUG_ON(!PagePrivate(page));  	BUG_ON(!page->mapping); @@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)  	struct inode *inode = page->mapping ? page->mapping->host : NULL;  	dout("%p releasepage %p idx %lu\n", inode, page, page->index);  	WARN_ON(PageDirty(page)); -	WARN_ON(page->private);  	WARN_ON(PagePrivate(page));  	return 0;  } @@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)  	osdc = &fsc->client->osdc;  	/* verify this is a writeable snap context */ -	snapc = (void *)page->private; +	snapc = page_snap_context(page);  	if (snapc == NULL) {  		dout("writepage %p page %p not dirty?\n", inode, page);  		goto out; @@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)  	oldest = get_oldest_context(inode, &snap_size);  	if (snapc->seq > oldest->seq) {  		dout("writepage %p page %p snapc %p not writeable - noop\n", -		     inode, page, (void *)page->private); +		     inode, page, snapc);  		/* we should only noop if called by kswapd */  		WARN_ON((current->flags & PF_MEMALLOC) == 0);  		ceph_put_snap_context(oldest); @@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,  			clear_bdi_congested(&fsc->backing_dev_info,  					    BLK_RW_ASYNC); -		ceph_put_snap_context((void *)page->private); +		ceph_put_snap_context(page_snap_context(page));  		page->private = 0;  		ClearPagePrivate(page);  		dout("unlocking %d %p\n", i, page); @@ -795,7 +798,7 @@ get_more_pages:  			}  			/* only if matching snap context */ -			pgsnapc = (void *)page->private; +			pgsnapc = page_snap_context(page);  			if (pgsnapc->seq > snapc->seq) {  				dout("page snapc %p %lld > oldest %p %lld\n",  				     pgsnapc, pgsnapc->seq, snapc, snapc->seq); @@ -984,7 +987,7 @@ retry_locked:  	BUG_ON(!ci->i_snap_realm);  	down_read(&mdsc->snap_rwsem);  	BUG_ON(!ci->i_snap_realm->cached_context); -	snapc = (void *)page->private; +	snapc = page_snap_context(page);  	if (snapc && snapc != ci->i_head_snapc) {  		/*  		 * this page is already dirty in another (older) snap diff --git a/fs/exec.c b/fs/exec.c index a79786a8d2c..da27b91ff1e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -819,10 +819,10 @@ static int exec_mmap(struct mm_struct *mm)  	/* Notify parent that we're no longer interested in the old VM */  	tsk = current;  	old_mm = current->mm; -	sync_mm_rss(old_mm);  	mm_release(tsk, old_mm);  	if (old_mm) { +		sync_mm_rss(old_mm);  		/*  		 * Make sure that if there is a core dump in progress  		 * for the old mm, we get out and die instead of going diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index e32bc919e4e..5a7b691e748 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -109,7 +109,7 @@ static struct kobj_type odev_ktype = {  static struct kobj_type uuid_ktype = {  }; -void exofs_sysfs_dbg_print() +void exofs_sysfs_dbg_print(void)  {  #ifdef CONFIG_EXOFS_DEBUG  	struct kobject *k_name, *k_tmp; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8d2fb8c88cf..41a3ccff18d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -664,6 +664,7 @@ static long writeback_sb_inodes(struct super_block *sb,  			/* Wait for I_SYNC. This function drops i_lock... */  			inode_sleep_on_writeback(inode);  			/* Inode may be gone, start again */ +			spin_lock(&wb->list_lock);  			continue;  		}  		inode->i_state |= I_SYNC; diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index c640ba57074..09addc8615f 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -31,6 +31,7 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)  	struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);  	struct hfsplus_vh *vh = sbi->s_vhdr;  	struct hfsplus_vh *bvh = sbi->s_backup_vhdr; +	u32 cnid = (unsigned long)dentry->d_fsdata;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; @@ -41,8 +42,12 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)  	vh->finder_info[0] = bvh->finder_info[0] =  		cpu_to_be32(parent_ino(dentry)); -	/* Bootloader */ -	vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(inode->i_ino); +	/* +	 * Bootloader. Just using the inode here breaks in the case of +	 * hard links - the firmware wants the ID of the hard link file, +	 * but the inode points at the indirect inode +	 */ +	vh->finder_info[1] = bvh->finder_info[1] = cpu_to_be32(cnid);  	/* Per spec, the OS X system folder - same as finder_info[0] here */  	vh->finder_info[5] = bvh->finder_info[5] = diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 7daf4b852d1..90effcccca9 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -56,7 +56,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector,  	DECLARE_COMPLETION_ONSTACK(wait);  	struct bio *bio;  	int ret = 0; -	unsigned int io_size; +	u64 io_size;  	loff_t start;  	int offset; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 970659daa32..23ff18fe080 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,7 +17,6 @@  #include <linux/kthread.h>  #include <linux/sunrpc/svcauth_gss.h>  #include <linux/sunrpc/bc_xprt.h> -#include <linux/nsproxy.h>  #include <net/inet_sock.h> @@ -107,7 +106,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)  {  	int ret; -	ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, +	ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,  				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);  	if (ret <= 0)  		goto out_err; @@ -115,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)  	dprintk("NFS: Callback listener port = %u (af %u)\n",  			nfs_callback_tcpport, PF_INET); -	ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, +	ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,  				nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);  	if (ret > 0) {  		nfs_callback_tcpport6 = ret; @@ -184,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)  	 * fore channel connection.  	 * Returns the input port (0) and sets the svc_serv bc_xprt on success  	 */ -	ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, +	ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,  			      SVC_SOCK_ANONYMOUS);  	if (ret < 0) {  		rqstp = ERR_PTR(ret); @@ -254,7 +253,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)  	char svc_name[12];  	int ret = 0;  	int minorversion_setup; -	struct net *net = current->nsproxy->net_ns; +	struct net *net = &init_net;  	mutex_lock(&nfs_callback_mutex);  	if (cb_info->users++ || cb_info->task != NULL) { @@ -330,7 +329,7 @@ void nfs_callback_down(int minorversion)  	cb_info->users--;  	if (cb_info->users == 0 && cb_info->task != NULL) {  		kthread_stop(cb_info->task); -		svc_shutdown_net(cb_info->serv, current->nsproxy->net_ns); +		svc_shutdown_net(cb_info->serv, &init_net);  		svc_exit_thread(cb_info->rqst);  		cb_info->serv = NULL;  		cb_info->rqst = NULL; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 95bfc243992..e64b01d2a33 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -455,9 +455,9 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,  	args->csa_nrclists = ntohl(*p++);  	args->csa_rclists = NULL;  	if (args->csa_nrclists) { -		args->csa_rclists = kmalloc(args->csa_nrclists * -					    sizeof(*args->csa_rclists), -					    GFP_KERNEL); +		args->csa_rclists = kmalloc_array(args->csa_nrclists, +						  sizeof(*args->csa_rclists), +						  GFP_KERNEL);  		if (unlikely(args->csa_rclists == NULL))  			goto out; @@ -696,7 +696,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,  				       const struct cb_sequenceres *res)  {  	__be32 *p; -	unsigned status = res->csr_status; +	__be32 status = res->csr_status;  	if (unlikely(status != 0))  		goto out; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7d108753af8..f005b5bebdc 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -207,7 +207,6 @@ error_0:  static void nfs4_shutdown_session(struct nfs_client *clp)  {  	if (nfs4_has_session(clp)) { -		nfs4_deviceid_purge_client(clp);  		nfs4_destroy_session(clp->cl_session);  		nfs4_destroy_clientid(clp);  	} @@ -544,8 +543,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,  	smp_rmb(); -	BUG_ON(clp->cl_cons_state != NFS_CS_READY); -  	dprintk("<-- %s found nfs_client %p for %s\n",  		__func__, clp, cl_init->hostname ?: "");  	return clp; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index ad2775d3e21..9a4cbfc85d8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -490,6 +490,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)  			dreq->error = -EIO;  			spin_unlock(cinfo.lock);  		} +		nfs_release_request(req);  	}  	nfs_pageio_complete(&desc); @@ -523,9 +524,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)  		nfs_list_remove_request(req);  		if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {  			/* Note the rewrite will go through mds */ -			kref_get(&req->wb_kref);  			nfs_mark_request_commit(req, NULL, &cinfo); -		} +		} else +			nfs_release_request(req);  		nfs_unlock_and_release_request(req);  	} @@ -716,12 +717,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)  			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)  				bit = NFS_IOHDR_NEED_RESCHED;  			else if (dreq->flags == 0) { -				memcpy(&dreq->verf, &req->wb_verf, +				memcpy(&dreq->verf, hdr->verf,  				       sizeof(dreq->verf));  				bit = NFS_IOHDR_NEED_COMMIT;  				dreq->flags = NFS_ODIRECT_DO_COMMIT;  			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { -				if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { +				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {  					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;  					bit = NFS_IOHDR_NEED_RESCHED;  				} else diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b5b86a05059..864c51e4b40 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -57,6 +57,11 @@ unsigned int nfs_idmap_cache_timeout = 600;  static const struct cred *id_resolver_cache;  static struct key_type key_type_id_resolver_legacy; +struct idmap { +	struct rpc_pipe		*idmap_pipe; +	struct key_construction	*idmap_key_cons; +	struct mutex		idmap_mutex; +};  /**   * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields @@ -310,9 +315,11 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,  					    name, namelen, type, data,  					    data_size, NULL);  	if (ret < 0) { +		mutex_lock(&idmap->idmap_mutex);  		ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,  					    name, namelen, type, data,  					    data_size, idmap); +		mutex_unlock(&idmap->idmap_mutex);  	}  	return ret;  } @@ -354,11 +361,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ  /* idmap classic begins here */  module_param(nfs_idmap_cache_timeout, int, 0644); -struct idmap { -	struct rpc_pipe		*idmap_pipe; -	struct key_construction	*idmap_key_cons; -}; -  enum {  	Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err  }; @@ -469,6 +471,7 @@ nfs_idmap_new(struct nfs_client *clp)  		return error;  	}  	idmap->idmap_pipe = pipe; +	mutex_init(&idmap->idmap_mutex);  	clp->cl_idmap = idmap;  	return 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e605d695dbc..f7296983eba 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1530,7 +1530,6 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)  	nfsi->delegation_state = 0;  	init_rwsem(&nfsi->rwsem);  	nfsi->layout = NULL; -	atomic_set(&nfsi->commit_info.rpcs_out, 0);  #endif  } @@ -1545,6 +1544,7 @@ static void init_once(void *foo)  	INIT_LIST_HEAD(&nfsi->commit_info.list);  	nfsi->npages = 0;  	nfsi->commit_info.ncommit = 0; +	atomic_set(&nfsi->commit_info.rpcs_out, 0);  	atomic_set(&nfsi->silly_count, 1);  	INIT_HLIST_HEAD(&nfsi->silly_list);  	init_waitqueue_head(&nfsi->waitqueue); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c6827f93ab5..cc5900ac61b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp)  extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_fattr_bitmap[3];  extern const u32 nfs4_statfs_bitmap[2];  extern const u32 nfs4_pathconf_bitmap[2];  extern const u32 nfs4_fsinfo_bitmap[3]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d48dbefa0e7..15fc7e4664e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,6 +105,8 @@ static int nfs4_map_errors(int err)  		return -EINVAL;  	case -NFS4ERR_SHARE_DENIED:  		return -EACCES; +	case -NFS4ERR_MINOR_VERS_MISMATCH: +		return -EPROTONOSUPPORT;  	default:  		dprintk("%s could not handle NFSv4 error %d\n",  				__func__, -err); @@ -116,7 +118,7 @@ static int nfs4_map_errors(int err)  /*   * This is our standard bitmap for GETATTR requests.   */ -const u32 nfs4_fattr_bitmap[2] = { +const u32 nfs4_fattr_bitmap[3] = {  	FATTR4_WORD0_TYPE  	| FATTR4_WORD0_CHANGE  	| FATTR4_WORD0_SIZE @@ -133,6 +135,24 @@ const u32 nfs4_fattr_bitmap[2] = {  	| FATTR4_WORD1_TIME_MODIFY  }; +static const u32 nfs4_pnfs_open_bitmap[3] = { +	FATTR4_WORD0_TYPE +	| FATTR4_WORD0_CHANGE +	| FATTR4_WORD0_SIZE +	| FATTR4_WORD0_FSID +	| FATTR4_WORD0_FILEID, +	FATTR4_WORD1_MODE +	| FATTR4_WORD1_NUMLINKS +	| FATTR4_WORD1_OWNER +	| FATTR4_WORD1_OWNER_GROUP +	| FATTR4_WORD1_RAWDEV +	| FATTR4_WORD1_SPACE_USED +	| FATTR4_WORD1_TIME_ACCESS +	| FATTR4_WORD1_TIME_METADATA +	| FATTR4_WORD1_TIME_MODIFY, +	FATTR4_WORD2_MDSTHRESHOLD +}; +  const u32 nfs4_statfs_bitmap[2] = {  	FATTR4_WORD0_FILES_AVAIL  	| FATTR4_WORD0_FILES_FREE @@ -844,6 +864,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,  	p->o_arg.name = &dentry->d_name;  	p->o_arg.server = server;  	p->o_arg.bitmask = server->attr_bitmask; +	p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];  	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;  	if (attrs != NULL && attrs->ia_valid != 0) {  		__be32 verf[2]; @@ -1820,6 +1841,7 @@ static int _nfs4_do_open(struct inode *dir,  		opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();  		if (!opendata->f_attr.mdsthreshold)  			goto err_opendata_put; +		opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];  	}  	if (dentry->d_inode != NULL)  		opendata->state = nfs4_get_open_state(dentry->d_inode, sp); @@ -1880,6 +1902,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,  	struct nfs4_state *res;  	int status; +	fmode &= FMODE_READ|FMODE_WRITE;  	do {  		status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,  				       &res, ctx_th); @@ -2526,6 +2549,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  	nfs_fattr_init(fattr); +	/* Deal with open(O_TRUNC) */ +	if (sattr->ia_valid & ATTR_OPEN) +		sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + +	/* Optimization: if the end result is no change, don't RPC */ +	if ((sattr->ia_valid & ~(ATTR_FILE)) == 0) +		return 0; +  	/* Search for an existing open(O_WRITE) file */  	if (sattr->ia_valid & ATTR_FILE) {  		struct nfs_open_context *ctx; @@ -2537,10 +2568,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,  		}  	} -	/* Deal with open(O_TRUNC) */ -	if (sattr->ia_valid & ATTR_OPEN) -		sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); -  	status = nfs4_do_setattr(inode, cred, fattr, sattr, state);  	if (status == 0)  		nfs_setattr_update_inode(inode, sattr); @@ -5275,7 +5302,7 @@ static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,  	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);  	if (status) -		pr_warn("NFS: Got error %d from the server %s on " +		dprintk("NFS: Got error %d from the server %s on "  			"DESTROY_CLIENTID.", status, clp->cl_hostname);  	return status;  } @@ -5746,8 +5773,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session,  	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);  	if (status) -		printk(KERN_WARNING -			"NFS: Got error %d from the server on DESTROY_SESSION. " +		dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "  			"Session has been destroyed regardless...\n", status);  	dprintk("<-- nfs4_proc_destroy_session\n"); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c679b9ecef6..f38300e9f17 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,6 +244,16 @@ static int nfs4_begin_drain_session(struct nfs_client *clp)  	return nfs4_wait_on_slot_tbl(&ses->fc_slot_table);  } +static void nfs41_finish_session_reset(struct nfs_client *clp) +{ +	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); +	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); +	/* create_session negotiated new slot table */ +	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); +	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +	nfs41_setup_state_renewal(clp); +} +  int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)  {  	int status; @@ -259,8 +269,7 @@ do_confirm:  	status = nfs4_proc_create_session(clp, cred);  	if (status != 0)  		goto out; -	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); -	nfs41_setup_state_renewal(clp); +	nfs41_finish_session_reset(clp);  	nfs_mark_client_ready(clp, NFS_CS_READY);  out:  	return status; @@ -1772,16 +1781,9 @@ static int nfs4_reset_session(struct nfs_client *clp)  		status = nfs4_handle_reclaim_lease_error(clp, status);  		goto out;  	} -	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); -	/* create_session negotiated new slot table */ -	clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); -	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); +	nfs41_finish_session_reset(clp);  	dprintk("%s: session reset was successful for server %s!\n",  			__func__, clp->cl_hostname); - -	 /* Let the state manager reestablish state */ -	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) -		nfs41_setup_state_renewal(clp);  out:  	if (cred)  		put_rpccred(cred); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ee4a74db95d..18fae29b030 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c  }  static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, +				 const u32 *open_bitmap,  				 struct compound_hdr *hdr)  {  	encode_getattr_three(xdr, -			     bitmask[0] & nfs4_fattr_bitmap[0], -			     bitmask[1] & nfs4_fattr_bitmap[1], -			     bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, +			     bitmask[0] & open_bitmap[0], +			     bitmask[1] & open_bitmap[1], +			     bitmask[2] & open_bitmap[2],  			     hdr);  } @@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,  	encode_putfh(xdr, args->fh, &hdr);  	encode_open(xdr, args, &hdr);  	encode_getfh(xdr, &hdr); -	encode_getfattr_open(xdr, args->bitmask, &hdr); +	encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);  	encode_nops(&hdr);  } @@ -4359,7 +4360,10 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,  	if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))  		return -EIO; -	if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { +	if (bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD) { +		/* Did the server return an unrequested attribute? */ +		if (unlikely(res == NULL)) +			return -EREMOTEIO;  		p = xdr_inline_decode(xdr, 4);  		if (unlikely(!p))  			goto out_overflow; @@ -4372,6 +4376,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,  				__func__);  		status = decode_first_threshold_item4(xdr, res); +		bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;  	}  	return status;  out_overflow: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8323aa7b54..bbc49caa7a8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -70,6 +70,10 @@ find_pnfs_driver(u32 id)  	spin_lock(&pnfs_spinlock);  	local = find_pnfs_driver_locked(id); +	if (local != NULL && !try_module_get(local->owner)) { +		dprintk("%s: Could not grab reference on module\n", __func__); +		local = NULL; +	}  	spin_unlock(&pnfs_spinlock);  	return local;  } @@ -80,6 +84,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)  	if (nfss->pnfs_curr_ld) {  		if (nfss->pnfs_curr_ld->clear_layoutdriver)  			nfss->pnfs_curr_ld->clear_layoutdriver(nfss); +		/* Decrement the MDS count. Purge the deviceid cache if zero */ +		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) +			nfs4_deviceid_purge_client(nfss->nfs_client);  		module_put(nfss->pnfs_curr_ld->owner);  	}  	nfss->pnfs_curr_ld = NULL; @@ -115,10 +122,6 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,  			goto out_no_driver;  		}  	} -	if (!try_module_get(ld_type->owner)) { -		dprintk("%s: Could not grab reference on module\n", __func__); -		goto out_no_driver; -	}  	server->pnfs_curr_ld = ld_type;  	if (ld_type->set_layoutdriver  	    && ld_type->set_layoutdriver(server, mntfh)) { @@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,  		module_put(ld_type->owner);  		goto out_no_driver;  	} +	/* Bump the MDS count */ +	atomic_inc(&server->nfs_client->cl_mds_count);  	dprintk("%s: pNFS module for %u set\n", __func__, id);  	return; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 29fd23c0efd..64f90d845f6 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -365,7 +365,7 @@ static inline bool  pnfs_use_threshold(struct nfs4_threshold **dst, struct nfs4_threshold *src,  		   struct nfs_server *nfss)  { -	return (dst && src && src->bm != 0 && +	return (dst && src && src->bm != 0 && nfss->pnfs_curr_ld &&  					nfss->pnfs_curr_ld->id == src->l_type);  } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index a706b6bcc28..617c7419a08 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -651,7 +651,7 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)  		/* Emulate the eof flag, which isn't normally needed in NFSv2  		 * as it is guaranteed to always return the file attributes  		 */ -		if (data->args.offset + data->args.count >= data->res.fattr->size) +		if (data->args.offset + data->res.count >= data->res.fattr->size)  			data->res.eof = 1;  	}  	return 0; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ff656c02268..906f09c7d84 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1867,6 +1867,7 @@ static int nfs23_validate_mount_data(void *options,  	if (data == NULL)  		goto out_no_data; +	args->version = NFS_DEFAULT_VERSION;  	switch (data->version) {  	case 1:  		data->namlen = 0; @@ -2637,6 +2638,8 @@ static int nfs4_validate_mount_data(void *options,  	if (data == NULL)  		goto out_no_data; +	args->version = 4; +  	switch (data->version) {  	case 1:  		if (data->host_addrlen > sizeof(args->nfs_server.address)) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6fe3d69d14..4d6861c0dc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)  		INIT_LIST_HEAD(&hdr->rpc_list);  		spin_lock_init(&hdr->lock);  		atomic_set(&hdr->refcnt, 0); +		hdr->verf = &p->verf;  	}  	return p;  } @@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)  			goto next;  		}  		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { +			memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));  			nfs_mark_request_commit(req, hdr->lseg, &cinfo);  			goto next;  		} @@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata)  	struct nfs_write_data	*data = calldata;  	struct nfs_pgio_header *hdr = data->header;  	int status = data->task.tk_status; -	struct nfs_page *req = hdr->req;  	if ((status >= 0) && nfs_write_need_commit(data)) {  		spin_lock(&hdr->lock);  		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))  			; /* Do nothing */  		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) -			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); -		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) +			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); +		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))  			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);  		spin_unlock(&hdr->lock);  	} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8fdc9ec5c5d..94effd5bc4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -900,7 +900,7 @@ static void free_session(struct kref *kref)  	struct nfsd4_session *ses;  	int mem; -	BUG_ON(!spin_is_locked(&client_lock)); +	lockdep_assert_held(&client_lock);  	ses = container_of(kref, struct nfsd4_session, se_ref);  	nfsd4_del_conns(ses);  	spin_lock(&nfsd_drc_lock); @@ -1080,7 +1080,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)  static inline void  free_client(struct nfs4_client *clp)  { -	BUG_ON(!spin_is_locked(&client_lock)); +	lockdep_assert_held(&client_lock);  	while (!list_empty(&clp->cl_sessions)) {  		struct nfsd4_session *ses;  		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 08a07a218d2..57ceaf33d17 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -191,6 +191,8 @@ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)  	while (!list_empty(head)) {  		ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);  		list_del_init(&ii->i_dirty); +		truncate_inode_pages(&ii->vfs_inode.i_data, 0); +		nilfs_btnode_cache_clear(&ii->i_btnode_cache);  		iput(&ii->vfs_inode);  	}  } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0e72ad6f22a..88e11fb346b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2309,6 +2309,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)  		if (!test_bit(NILFS_I_UPDATED, &ii->i_state))  			continue;  		list_del_init(&ii->i_dirty); +		truncate_inode_pages(&ii->vfs_inode.i_data, 0); +		nilfs_btnode_cache_clear(&ii->i_btnode_cache);  		iput(&ii->vfs_inode);  	}  } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index aeb19e68e08..11a2aa2a56c 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -258,7 +258,7 @@ fail:  	return rc;  } -int pstore_fill_super(struct super_block *sb, void *data, int silent) +static int pstore_fill_super(struct super_block *sb, void *data, int silent)  {  	struct inode *inode; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 82c585f715e..03ce7a9b81c 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -94,20 +94,15 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)   * as we can from the end of the buffer.   */  static void pstore_dump(struct kmsg_dumper *dumper, -	    enum kmsg_dump_reason reason, -	    const char *s1, unsigned long l1, -	    const char *s2, unsigned long l2) +			enum kmsg_dump_reason reason)  { -	unsigned long	s1_start, s2_start; -	unsigned long	l1_cpy, l2_cpy; -	unsigned long	size, total = 0; -	char		*dst; +	unsigned long	total = 0;  	const char	*why;  	u64		id; -	int		hsize, ret;  	unsigned int	part = 1;  	unsigned long	flags = 0;  	int		is_locked = 0; +	int		ret;  	why = get_reason_str(reason); @@ -119,30 +114,25 @@ static void pstore_dump(struct kmsg_dumper *dumper,  		spin_lock_irqsave(&psinfo->buf_lock, flags);  	oopscount++;  	while (total < kmsg_bytes) { +		char *dst; +		unsigned long size; +		int hsize; +		size_t len; +  		dst = psinfo->buf;  		hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);  		size = psinfo->bufsize - hsize;  		dst += hsize; -		l2_cpy = min(l2, size); -		l1_cpy = min(l1, size - l2_cpy); - -		if (l1_cpy + l2_cpy == 0) +		if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len))  			break; -		s2_start = l2 - l2_cpy; -		s1_start = l1 - l1_cpy; - -		memcpy(dst, s1 + s1_start, l1_cpy); -		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); -  		ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, -				   hsize + l1_cpy + l2_cpy, psinfo); +				    hsize + len, psinfo);  		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())  			pstore_new_entry = 1; -		l1 -= l1_cpy; -		l2 -= l2_cpy; -		total += l1_cpy + l2_cpy; + +		total += hsize + len;  		part++;  	}  	if (in_nmi()) { diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 9123cce28c1..453030f9c5b 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -106,6 +106,8 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,  	time->tv_sec = 0;  	time->tv_nsec = 0; +	/* Update old/shadowed buffer. */ +	persistent_ram_save_old(prz);  	size = persistent_ram_old_size(prz);  	*buf = kmalloc(size, GFP_KERNEL);  	if (*buf == NULL) @@ -184,6 +186,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id,  		return -EINVAL;  	persistent_ram_free_old(cxt->przs[id]); +	persistent_ram_zap(cxt->przs[id]);  	return 0;  } diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 31f8d184f3a..c5fbdbbf81a 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -250,23 +250,24 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,  	persistent_ram_update_ecc(prz, start, count);  } -static void __init -persistent_ram_save_old(struct persistent_ram_zone *prz) +void persistent_ram_save_old(struct persistent_ram_zone *prz)  {  	struct persistent_ram_buffer *buffer = prz->buffer;  	size_t size = buffer_size(prz);  	size_t start = buffer_start(prz); -	char *dest; -	persistent_ram_ecc_old(prz); +	if (!size) +		return; -	dest = kmalloc(size, GFP_KERNEL); -	if (dest == NULL) { +	if (!prz->old_log) { +		persistent_ram_ecc_old(prz); +		prz->old_log = kmalloc(size, GFP_KERNEL); +	} +	if (!prz->old_log) {  		pr_err("persistent_ram: failed to allocate buffer\n");  		return;  	} -	prz->old_log = dest;  	prz->old_log_size = size;  	memcpy(prz->old_log, &buffer->data[start], size - start);  	memcpy(prz->old_log + size - start, &buffer->data[0], start); @@ -319,6 +320,13 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz)  	prz->old_log_size = 0;  } +void persistent_ram_zap(struct persistent_ram_zone *prz) +{ +	atomic_set(&prz->buffer->start, 0); +	atomic_set(&prz->buffer->size, 0); +	persistent_ram_update_header_ecc(prz); +} +  static void *persistent_ram_vmap(phys_addr_t start, size_t size)  {  	struct page **pages; @@ -405,6 +413,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool  				" size %zu, start %zu\n",  			       buffer_size(prz), buffer_start(prz));  			persistent_ram_save_old(prz); +			return 0;  		}  	} else {  		pr_info("persistent_ram: no valid data in buffer" @@ -412,8 +421,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool  	}  	prz->buffer->sig = PERSISTENT_RAM_SIG; -	atomic_set(&prz->buffer->start, 0); -	atomic_set(&prz->buffer->size, 0); +	persistent_ram_zap(prz);  	return 0;  } @@ -448,7 +456,6 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,  		goto err;  	persistent_ram_post_init(prz, ecc); -	persistent_ram_update_header_ecc(prz);  	return prz;  err: diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 84a7e6f3c04..92df3b08153 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2918,7 +2918,7 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)  	struct dentry *dent;  	struct ubifs_debug_info *d = c->dbg; -	if (!IS_ENABLED(DEBUG_FS)) +	if (!IS_ENABLED(CONFIG_DEBUG_FS))  		return 0;  	n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, @@ -3013,7 +3013,7 @@ out:   */  void dbg_debugfs_exit_fs(struct ubifs_info *c)  { -	if (IS_ENABLED(DEBUG_FS)) +	if (IS_ENABLED(CONFIG_DEBUG_FS))  		debugfs_remove_recursive(c->dbg->dfs_dir);  } @@ -3099,7 +3099,7 @@ int dbg_debugfs_init(void)  	const char *fname;  	struct dentry *dent; -	if (!IS_ENABLED(DEBUG_FS)) +	if (!IS_ENABLED(CONFIG_DEBUG_FS))  		return 0;  	fname = "ubifs"; @@ -3166,7 +3166,7 @@ out:   */  void dbg_debugfs_exit(void)  { -	if (IS_ENABLED(DEBUG_FS)) +	if (IS_ENABLED(CONFIG_DEBUG_FS))  		debugfs_remove_recursive(dfs_rootdir);  } diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 2559d174e00..28ec13af28d 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -939,8 +939,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)  	}  	dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,  		 lp->free, lp->flags); -	ubifs_assert(lp->flags | LPROPS_TAKEN); -	ubifs_assert(lp->flags | LPROPS_INDEX); +	ubifs_assert(lp->flags & LPROPS_TAKEN); +	ubifs_assert(lp->flags & LPROPS_INDEX);  	return lnum;  } diff --git a/fs/udf/super.c b/fs/udf/super.c index ac8a348dcb6..8d86a8706c0 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -56,6 +56,7 @@  #include <linux/seq_file.h>  #include <linux/bitmap.h>  #include <linux/crc-itu-t.h> +#include <linux/log2.h>  #include <asm/byteorder.h>  #include "udf_sb.h" @@ -1215,16 +1216,65 @@ out_bh:  	return ret;  } +static int udf_load_sparable_map(struct super_block *sb, +				 struct udf_part_map *map, +				 struct sparablePartitionMap *spm) +{ +	uint32_t loc; +	uint16_t ident; +	struct sparingTable *st; +	struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing; +	int i; +	struct buffer_head *bh; + +	map->s_partition_type = UDF_SPARABLE_MAP15; +	sdata->s_packet_len = le16_to_cpu(spm->packetLength); +	if (!is_power_of_2(sdata->s_packet_len)) { +		udf_err(sb, "error loading logical volume descriptor: " +			"Invalid packet length %u\n", +			(unsigned)sdata->s_packet_len); +		return -EIO; +	} +	if (spm->numSparingTables > 4) { +		udf_err(sb, "error loading logical volume descriptor: " +			"Too many sparing tables (%d)\n", +			(int)spm->numSparingTables); +		return -EIO; +	} + +	for (i = 0; i < spm->numSparingTables; i++) { +		loc = le32_to_cpu(spm->locSparingTable[i]); +		bh = udf_read_tagged(sb, loc, loc, &ident); +		if (!bh) +			continue; + +		st = (struct sparingTable *)bh->b_data; +		if (ident != 0 || +		    strncmp(st->sparingIdent.ident, UDF_ID_SPARING, +			    strlen(UDF_ID_SPARING)) || +		    sizeof(*st) + le16_to_cpu(st->reallocationTableLen) > +							sb->s_blocksize) { +			brelse(bh); +			continue; +		} + +		sdata->s_spar_map[i] = bh; +	} +	map->s_partition_func = udf_get_pblock_spar15; +	return 0; +} +  static int udf_load_logicalvol(struct super_block *sb, sector_t block,  			       struct kernel_lb_addr *fileset)  {  	struct logicalVolDesc *lvd; -	int i, j, offset; +	int i, offset;  	uint8_t type;  	struct udf_sb_info *sbi = UDF_SB(sb);  	struct genericPartitionMap *gpm;  	uint16_t ident;  	struct buffer_head *bh; +	unsigned int table_len;  	int ret = 0;  	bh = udf_read_tagged(sb, block, block, &ident); @@ -1232,15 +1282,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,  		return 1;  	BUG_ON(ident != TAG_IDENT_LVD);  	lvd = (struct logicalVolDesc *)bh->b_data; - -	i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); -	if (i != 0) { -		ret = i; +	table_len = le32_to_cpu(lvd->mapTableLength); +	if (sizeof(*lvd) + table_len > sb->s_blocksize) { +		udf_err(sb, "error loading logical volume descriptor: " +			"Partition table too long (%u > %lu)\n", table_len, +			sb->s_blocksize - sizeof(*lvd));  		goto out_bh;  	} +	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps)); +	if (ret) +		goto out_bh; +  	for (i = 0, offset = 0; -	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength); +	     i < sbi->s_partitions && offset < table_len;  	     i++, offset += gpm->partitionMapLength) {  		struct udf_part_map *map = &sbi->s_partmaps[i];  		gpm = (struct genericPartitionMap *) @@ -1275,38 +1330,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,  			} else if (!strncmp(upm2->partIdent.ident,  						UDF_ID_SPARABLE,  						strlen(UDF_ID_SPARABLE))) { -				uint32_t loc; -				struct sparingTable *st; -				struct sparablePartitionMap *spm = -					(struct sparablePartitionMap *)gpm; - -				map->s_partition_type = UDF_SPARABLE_MAP15; -				map->s_type_specific.s_sparing.s_packet_len = -						le16_to_cpu(spm->packetLength); -				for (j = 0; j < spm->numSparingTables; j++) { -					struct buffer_head *bh2; - -					loc = le32_to_cpu( -						spm->locSparingTable[j]); -					bh2 = udf_read_tagged(sb, loc, loc, -							     &ident); -					map->s_type_specific.s_sparing. -							s_spar_map[j] = bh2; - -					if (bh2 == NULL) -						continue; - -					st = (struct sparingTable *)bh2->b_data; -					if (ident != 0 || strncmp( -						st->sparingIdent.ident, -						UDF_ID_SPARING, -						strlen(UDF_ID_SPARING))) { -						brelse(bh2); -						map->s_type_specific.s_sparing. -							s_spar_map[j] = NULL; -					} -				} -				map->s_partition_func = udf_get_pblock_spar15; +				if (udf_load_sparable_map(sb, map, +				    (struct sparablePartitionMap *)gpm) < 0) +					goto out_bh;  			} else if (!strncmp(upm2->partIdent.ident,  						UDF_ID_METADATA,  						strlen(UDF_ID_METADATA))) { diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 229641fb8e6..9d1aeb7e273 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1080,6 +1080,7 @@ restart:  			goto restart;  		} +		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);  		trace_xfs_alloc_size_neither(args);  		args->agbno = NULLAGBLOCK;  		return 0; @@ -2441,7 +2442,7 @@ xfs_alloc_vextent(  	DECLARE_COMPLETION_ONSTACK(done);  	args->done = &done; -	INIT_WORK(&args->work, xfs_alloc_vextent_worker); +	INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);  	queue_work(xfs_alloc_wq, &args->work);  	wait_for_completion(&done);  	return args->result; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index ae31c313a79..8dad722c004 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -981,10 +981,15 @@ xfs_vm_writepage(  				imap_valid = 0;  			}  		} else { -			if (PageUptodate(page)) { +			if (PageUptodate(page))  				ASSERT(buffer_mapped(bh)); -				imap_valid = 0; -			} +			/* +			 * This buffer is not uptodate and will not be +			 * written to disk.  Ensure that we will put any +			 * subsequent writeable buffers into a new +			 * ioend. +			 */ +			imap_valid = 0;  			continue;  		} diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 172d3cc8f8c..a4beb421018 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -201,14 +201,7 @@ xfs_buf_alloc(  	bp->b_length = numblks;  	bp->b_io_length = numblks;  	bp->b_flags = flags; - -	/* -	 * We do not set the block number here in the buffer because we have not -	 * finished initialising the buffer. We insert the buffer into the cache -	 * in this state, so this ensures that we are unable to do IO on a -	 * buffer that hasn't been fully initialised. -	 */ -	bp->b_bn = XFS_BUF_DADDR_NULL; +	bp->b_bn = blkno;  	atomic_set(&bp->b_pin_count, 0);  	init_waitqueue_head(&bp->b_waiters); @@ -567,11 +560,6 @@ xfs_buf_get(  	if (bp != new_bp)  		xfs_buf_free(new_bp); -	/* -	 * Now we have a workable buffer, fill in the block number so -	 * that we can do IO on it. -	 */ -	bp->b_bn = blkno;  	bp->b_io_length = bp->b_length;  found: @@ -772,7 +760,7 @@ xfs_buf_get_uncached(  	int			error, i;  	xfs_buf_t		*bp; -	bp = xfs_buf_alloc(target, 0, numblks, 0); +	bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);  	if (unlikely(bp == NULL))  		goto fail; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6cdbf90c6f7..d041d47d9d8 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -505,6 +505,14 @@ xfs_inode_item_push(  	}  	/* +	 * Stale inode items should force out the iclog. +	 */ +	if (ip->i_flags & XFS_ISTALE) { +		rval = XFS_ITEM_PINNED; +		goto out_unlock; +	} + +	/*  	 * Someone else is already flushing the inode.  Nothing we can do  	 * here but wait for the flush to finish and remove the item from  	 * the AIL. @@ -514,15 +522,6 @@ xfs_inode_item_push(  		goto out_unlock;  	} -	/* -	 * Stale inode items should force out the iclog. -	 */ -	if (ip->i_flags & XFS_ISTALE) { -		xfs_ifunlock(ip); -		xfs_iunlock(ip, XFS_ILOCK_SHARED); -		return XFS_ITEM_PINNED; -	} -  	ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));  	ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f30d9807dc4..d90d4a38860 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -38,13 +38,21 @@  kmem_zone_t	*xfs_log_ticket_zone;  /* Local miscellaneous function prototypes */ -STATIC int	 xlog_commit_record(struct log *log, struct xlog_ticket *ticket, -				    xlog_in_core_t **, xfs_lsn_t *); +STATIC int +xlog_commit_record( +	struct xlog		*log, +	struct xlog_ticket	*ticket, +	struct xlog_in_core	**iclog, +	xfs_lsn_t		*commitlsnp); +  STATIC xlog_t *  xlog_alloc_log(xfs_mount_t	*mp,  				xfs_buftarg_t	*log_target,  				xfs_daddr_t	blk_offset,  				int		num_bblks); -STATIC int	 xlog_space_left(struct log *log, atomic64_t *head); +STATIC int +xlog_space_left( +	struct xlog		*log, +	atomic64_t		*head);  STATIC int	 xlog_sync(xlog_t *log, xlog_in_core_t *iclog);  STATIC void	 xlog_dealloc_log(xlog_t *log); @@ -64,8 +72,10 @@ STATIC void xlog_state_switch_iclogs(xlog_t		*log,  				     int		eventual_size);  STATIC void xlog_state_want_sync(xlog_t	*log, xlog_in_core_t *iclog); -STATIC void xlog_grant_push_ail(struct log	*log, -				int		need_bytes); +STATIC void +xlog_grant_push_ail( +	struct xlog	*log, +	int		need_bytes);  STATIC void xlog_regrant_reserve_log_space(xlog_t	 *log,  					   xlog_ticket_t *ticket);  STATIC void xlog_ungrant_log_space(xlog_t	 *log, @@ -73,7 +83,9 @@ STATIC void xlog_ungrant_log_space(xlog_t	 *log,  #if defined(DEBUG)  STATIC void	xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void	xlog_verify_grant_tail(struct log *log); +STATIC void +xlog_verify_grant_tail( +	struct xlog	*log);  STATIC void	xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,  				  int count, boolean_t syncing);  STATIC void	xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, @@ -89,9 +101,9 @@ STATIC int	xlog_iclogs_empty(xlog_t *log);  static void  xlog_grant_sub_space( -	struct log	*log, -	atomic64_t	*head, -	int		bytes) +	struct xlog		*log, +	atomic64_t		*head, +	int			bytes)  {  	int64_t	head_val = atomic64_read(head);  	int64_t new, old; @@ -115,9 +127,9 @@ xlog_grant_sub_space(  static void  xlog_grant_add_space( -	struct log	*log, -	atomic64_t	*head, -	int		bytes) +	struct xlog		*log, +	atomic64_t		*head, +	int			bytes)  {  	int64_t	head_val = atomic64_read(head);  	int64_t new, old; @@ -165,7 +177,7 @@ xlog_grant_head_wake_all(  static inline int  xlog_ticket_reservation( -	struct log		*log, +	struct xlog		*log,  	struct xlog_grant_head	*head,  	struct xlog_ticket	*tic)  { @@ -182,7 +194,7 @@ xlog_ticket_reservation(  STATIC bool  xlog_grant_head_wake( -	struct log		*log, +	struct xlog		*log,  	struct xlog_grant_head	*head,  	int			*free_bytes)  { @@ -204,7 +216,7 @@ xlog_grant_head_wake(  STATIC int  xlog_grant_head_wait( -	struct log		*log, +	struct xlog		*log,  	struct xlog_grant_head	*head,  	struct xlog_ticket	*tic,  	int			need_bytes) @@ -256,7 +268,7 @@ shutdown:   */  STATIC int  xlog_grant_head_check( -	struct log		*log, +	struct xlog		*log,  	struct xlog_grant_head	*head,  	struct xlog_ticket	*tic,  	int			*need_bytes) @@ -323,7 +335,7 @@ xfs_log_regrant(  	struct xfs_mount	*mp,  	struct xlog_ticket	*tic)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	int			need_bytes;  	int			error = 0; @@ -389,7 +401,7 @@ xfs_log_reserve(  	bool			permanent,  	uint		 	t_type)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	struct xlog_ticket	*tic;  	int			need_bytes;  	int			error = 0; @@ -465,7 +477,7 @@ xfs_log_done(  	struct xlog_in_core	**iclog,  	uint			flags)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	xfs_lsn_t		lsn = 0;  	if (XLOG_FORCED_SHUTDOWN(log) || @@ -810,6 +822,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)  void  xfs_log_unmount(xfs_mount_t *mp)  { +	cancel_delayed_work_sync(&mp->m_sync_work);  	xfs_trans_ail_destroy(mp);  	xlog_dealloc_log(mp->m_log);  } @@ -838,7 +851,7 @@ void  xfs_log_space_wake(  	struct xfs_mount	*mp)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	int			free_bytes;  	if (XLOG_FORCED_SHUTDOWN(log)) @@ -916,7 +929,7 @@ xfs_lsn_t  xlog_assign_tail_lsn_locked(  	struct xfs_mount	*mp)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	struct xfs_log_item	*lip;  	xfs_lsn_t		tail_lsn; @@ -965,7 +978,7 @@ xlog_assign_tail_lsn(   */  STATIC int  xlog_space_left( -	struct log	*log, +	struct xlog	*log,  	atomic64_t	*head)  {  	int		free_bytes; @@ -1277,7 +1290,7 @@ out:   */  STATIC int  xlog_commit_record( -	struct log		*log, +	struct xlog		*log,  	struct xlog_ticket	*ticket,  	struct xlog_in_core	**iclog,  	xfs_lsn_t		*commitlsnp) @@ -1311,7 +1324,7 @@ xlog_commit_record(   */  STATIC void  xlog_grant_push_ail( -	struct log	*log, +	struct xlog	*log,  	int		need_bytes)  {  	xfs_lsn_t	threshold_lsn = 0; @@ -1790,7 +1803,7 @@ xlog_write_start_rec(  static xlog_op_header_t *  xlog_write_setup_ophdr( -	struct log		*log, +	struct xlog		*log,  	struct xlog_op_header	*ophdr,  	struct xlog_ticket	*ticket,  	uint			flags) @@ -1873,7 +1886,7 @@ xlog_write_setup_copy(  static int  xlog_write_copy_finish( -	struct log		*log, +	struct xlog		*log,  	struct xlog_in_core	*iclog,  	uint			flags,  	int			*record_cnt, @@ -1958,7 +1971,7 @@ xlog_write_copy_finish(   */  int  xlog_write( -	struct log		*log, +	struct xlog		*log,  	struct xfs_log_vec	*log_vector,  	struct xlog_ticket	*ticket,  	xfs_lsn_t		*start_lsn, @@ -2821,7 +2834,7 @@ _xfs_log_force(  	uint			flags,  	int			*log_flushed)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	struct xlog_in_core	*iclog;  	xfs_lsn_t		lsn; @@ -2969,7 +2982,7 @@ _xfs_log_force_lsn(  	uint			flags,  	int			*log_flushed)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	struct xlog_in_core	*iclog;  	int			already_slept = 0; @@ -3147,7 +3160,7 @@ xfs_log_ticket_get(   */  xlog_ticket_t *  xlog_ticket_alloc( -	struct log	*log, +	struct xlog	*log,  	int		unit_bytes,  	int		cnt,  	char		client, @@ -3278,7 +3291,7 @@ xlog_ticket_alloc(   */  void  xlog_verify_dest_ptr( -	struct log	*log, +	struct xlog	*log,  	char		*ptr)  {  	int i; @@ -3307,7 +3320,7 @@ xlog_verify_dest_ptr(   */  STATIC void  xlog_verify_grant_tail( -	struct log	*log) +	struct xlog	*log)  {  	int		tail_cycle, tail_blocks;  	int		cycle, space; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7d6197c5849..ddc4529d07d 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -44,7 +44,7 @@   */  static struct xlog_ticket *  xlog_cil_ticket_alloc( -	struct log	*log) +	struct xlog	*log)  {  	struct xlog_ticket *tic; @@ -72,7 +72,7 @@ xlog_cil_ticket_alloc(   */  void  xlog_cil_init_post_recovery( -	struct log	*log) +	struct xlog	*log)  {  	log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);  	log->l_cilp->xc_ctx->sequence = 1; @@ -182,7 +182,7 @@ xlog_cil_prepare_log_vecs(   */  STATIC void  xfs_cil_prepare_item( -	struct log		*log, +	struct xlog		*log,  	struct xfs_log_vec	*lv,  	int			*len,  	int			*diff_iovecs) @@ -231,7 +231,7 @@ xfs_cil_prepare_item(   */  static void  xlog_cil_insert_items( -	struct log		*log, +	struct xlog		*log,  	struct xfs_log_vec	*log_vector,  	struct xlog_ticket	*ticket)  { @@ -373,7 +373,7 @@ xlog_cil_committed(   */  STATIC int  xlog_cil_push( -	struct log		*log) +	struct xlog		*log)  {  	struct xfs_cil		*cil = log->l_cilp;  	struct xfs_log_vec	*lv; @@ -601,7 +601,7 @@ xlog_cil_push_work(   */  static void  xlog_cil_push_background( -	struct log	*log) +	struct xlog	*log)  {  	struct xfs_cil	*cil = log->l_cilp; @@ -629,7 +629,7 @@ xlog_cil_push_background(  static void  xlog_cil_push_foreground( -	struct log	*log, +	struct xlog	*log,  	xfs_lsn_t	push_seq)  {  	struct xfs_cil	*cil = log->l_cilp; @@ -683,7 +683,7 @@ xfs_log_commit_cil(  	xfs_lsn_t		*commit_lsn,  	int			flags)  { -	struct log		*log = mp->m_log; +	struct xlog		*log = mp->m_log;  	int			log_flags = 0;  	struct xfs_log_vec	*log_vector; @@ -754,7 +754,7 @@ xfs_log_commit_cil(   */  xfs_lsn_t  xlog_cil_force_lsn( -	struct log	*log, +	struct xlog	*log,  	xfs_lsn_t	sequence)  {  	struct xfs_cil		*cil = log->l_cilp; @@ -833,7 +833,7 @@ xfs_log_item_in_current_chkpt(   */  int  xlog_cil_init( -	struct log	*log) +	struct xlog	*log)  {  	struct xfs_cil	*cil;  	struct xfs_cil_ctx *ctx; @@ -869,7 +869,7 @@ xlog_cil_init(  void  xlog_cil_destroy( -	struct log	*log) +	struct xlog	*log)  {  	if (log->l_cilp->xc_ctx) {  		if (log->l_cilp->xc_ctx->ticket) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 5bc33261f5b..72eba2201b1 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -19,7 +19,7 @@  #define __XFS_LOG_PRIV_H__  struct xfs_buf; -struct log; +struct xlog;  struct xlog_ticket;  struct xfs_mount; @@ -352,7 +352,7 @@ typedef struct xlog_in_core {  	struct xlog_in_core	*ic_next;  	struct xlog_in_core	*ic_prev;  	struct xfs_buf		*ic_bp; -	struct log		*ic_log; +	struct xlog		*ic_log;  	int			ic_size;  	int			ic_offset;  	int			ic_bwritecnt; @@ -409,7 +409,7 @@ struct xfs_cil_ctx {   * operations almost as efficient as the old logging methods.   */  struct xfs_cil { -	struct log		*xc_log; +	struct xlog		*xc_log;  	struct list_head	xc_cil;  	spinlock_t		xc_cil_lock;  	struct xfs_cil_ctx	*xc_ctx; @@ -487,7 +487,7 @@ struct xlog_grant_head {   * overflow 31 bits worth of byte offset, so using a byte number will mean   * that round off problems won't occur when releasing partial reservations.   */ -typedef struct log { +typedef struct xlog {  	/* The following fields don't need locking */  	struct xfs_mount	*l_mp;	        /* mount point */  	struct xfs_ail		*l_ailp;	/* AIL log is working with */ @@ -553,9 +553,14 @@ extern int	 xlog_recover_finish(xlog_t *log);  extern void	 xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);  extern kmem_zone_t *xfs_log_ticket_zone; -struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, -				int count, char client, bool permanent, -				xfs_km_flags_t alloc_flags); +struct xlog_ticket * +xlog_ticket_alloc( +	struct xlog	*log, +	int		unit_bytes, +	int		count, +	char		client, +	bool		permanent, +	xfs_km_flags_t	alloc_flags);  static inline void @@ -567,9 +572,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)  }  void	xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); -int	xlog_write(struct log *log, struct xfs_log_vec *log_vector, -				struct xlog_ticket *tic, xfs_lsn_t *start_lsn, -				xlog_in_core_t **commit_iclog, uint flags); +int +xlog_write( +	struct xlog		*log, +	struct xfs_log_vec	*log_vector, +	struct xlog_ticket	*tic, +	xfs_lsn_t		*start_lsn, +	struct xlog_in_core	**commit_iclog, +	uint			flags);  /*   * When we crack an atomic LSN, we sample it first so that the value will not @@ -629,17 +639,23 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)  /*   * Committed Item List interfaces   */ -int	xlog_cil_init(struct log *log); -void	xlog_cil_init_post_recovery(struct log *log); -void	xlog_cil_destroy(struct log *log); +int +xlog_cil_init(struct xlog *log); +void +xlog_cil_init_post_recovery(struct xlog *log); +void +xlog_cil_destroy(struct xlog *log);  /*   * CIL force routines   */ -xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence); +xfs_lsn_t +xlog_cil_force_lsn( +	struct xlog *log, +	xfs_lsn_t sequence);  static inline void -xlog_cil_force(struct log *log) +xlog_cil_force(struct xlog *log)  {  	xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);  } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index ca386909131..a7be98abd6a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1471,8 +1471,8 @@ xlog_recover_add_item(  STATIC int  xlog_recover_add_to_cont_trans( -	struct log		*log, -	xlog_recover_t		*trans, +	struct xlog		*log, +	struct xlog_recover	*trans,  	xfs_caddr_t		dp,  	int			len)  { @@ -1517,8 +1517,8 @@ xlog_recover_add_to_cont_trans(   */  STATIC int  xlog_recover_add_to_trans( -	struct log		*log, -	xlog_recover_t		*trans, +	struct xlog		*log, +	struct xlog_recover	*trans,  	xfs_caddr_t		dp,  	int			len)  { @@ -1588,8 +1588,8 @@ xlog_recover_add_to_trans(   */  STATIC int  xlog_recover_reorder_trans( -	struct log		*log, -	xlog_recover_t		*trans, +	struct xlog		*log, +	struct xlog_recover	*trans,  	int			pass)  {  	xlog_recover_item_t	*item, *n; @@ -1642,8 +1642,8 @@ xlog_recover_reorder_trans(   */  STATIC int  xlog_recover_buffer_pass1( -	struct log		*log, -	xlog_recover_item_t	*item) +	struct xlog			*log, +	struct xlog_recover_item	*item)  {  	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;  	struct list_head	*bucket; @@ -1696,7 +1696,7 @@ xlog_recover_buffer_pass1(   */  STATIC int  xlog_check_buffer_cancelled( -	struct log		*log, +	struct xlog		*log,  	xfs_daddr_t		blkno,  	uint			len,  	ushort			flags) @@ -2689,9 +2689,9 @@ xlog_recover_free_trans(  STATIC int  xlog_recover_commit_pass1( -	struct log		*log, -	struct xlog_recover	*trans, -	xlog_recover_item_t	*item) +	struct xlog			*log, +	struct xlog_recover		*trans, +	struct xlog_recover_item	*item)  {  	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS1); @@ -2716,10 +2716,10 @@ xlog_recover_commit_pass1(  STATIC int  xlog_recover_commit_pass2( -	struct log		*log, -	struct xlog_recover	*trans, -	struct list_head	*buffer_list, -	xlog_recover_item_t	*item) +	struct xlog			*log, +	struct xlog_recover		*trans, +	struct list_head		*buffer_list, +	struct xlog_recover_item	*item)  {  	trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); @@ -2753,7 +2753,7 @@ xlog_recover_commit_pass2(   */  STATIC int  xlog_recover_commit_trans( -	struct log		*log, +	struct xlog		*log,  	struct xlog_recover	*trans,  	int			pass)  { @@ -2793,8 +2793,8 @@ out:  STATIC int  xlog_recover_unmount_trans( -	struct log		*log, -	xlog_recover_t		*trans) +	struct xlog		*log, +	struct xlog_recover	*trans)  {  	/* Do nothing now */  	xfs_warn(log->l_mp, "%s: Unmount LR", __func__); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8b89c5ac72d..90c1fc9eaea 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -53,7 +53,7 @@ typedef struct xfs_trans_reservations {  #include "xfs_sync.h" -struct log; +struct xlog;  struct xfs_mount_args;  struct xfs_inode;  struct xfs_bmbt_irec; @@ -133,7 +133,7 @@ typedef struct xfs_mount {  	uint			m_readio_blocks; /* min read size blocks */  	uint			m_writeio_log;	/* min write size log bytes */  	uint			m_writeio_blocks; /* min write size blocks */ -	struct log		*m_log;		/* log specific stuff */ +	struct xlog		*m_log;		/* log specific stuff */  	int			m_logbufs;	/* number of log buffers */  	int			m_logbsize;	/* size of each log buffer */  	uint			m_rsumlevels;	/* rt summary levels */ diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index c9d3409c5ca..1e9ee064dbb 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c @@ -386,23 +386,23 @@ xfs_sync_worker(  	 * We shouldn't write/force the log if we are in the mount/unmount  	 * process or on a read only filesystem. The workqueue still needs to be  	 * active in both cases, however, because it is used for inode reclaim -	 * during these times.  Use the s_umount semaphore to provide exclusion -	 * with unmount. +	 * during these times.  Use the MS_ACTIVE flag to avoid doing anything +	 * during mount.  Doing work during unmount is avoided by calling +	 * cancel_delayed_work_sync on this work queue before tearing down +	 * the ail and the log in xfs_log_unmount.  	 */ -	if (down_read_trylock(&mp->m_super->s_umount)) { -		if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { -			/* dgc: errors ignored here */ -			if (mp->m_super->s_frozen == SB_UNFROZEN && -			    xfs_log_need_covered(mp)) -				error = xfs_fs_log_dummy(mp); -			else -				xfs_log_force(mp, 0); +	if (!(mp->m_super->s_flags & MS_ACTIVE) && +	    !(mp->m_flags & XFS_MOUNT_RDONLY)) { +		/* dgc: errors ignored here */ +		if (mp->m_super->s_frozen == SB_UNFROZEN && +		    xfs_log_need_covered(mp)) +			error = xfs_fs_log_dummy(mp); +		else +			xfs_log_force(mp, 0); -			/* start pushing all the metadata that is currently -			 * dirty */ -			xfs_ail_push_all(mp->m_ail); -		} -		up_read(&mp->m_super->s_umount); +		/* start pushing all the metadata that is currently +		 * dirty */ +		xfs_ail_push_all(mp->m_ail);  	}  	/* queue us up again */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 7cf9d3529e5..caf5dabfd55 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -32,7 +32,7 @@ struct xfs_da_node_entry;  struct xfs_dquot;  struct xfs_log_item;  struct xlog_ticket; -struct log; +struct xlog;  struct xlog_recover;  struct xlog_recover_item;  struct xfs_buf_log_format; @@ -762,7 +762,7 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_force);  DEFINE_DQUOT_EVENT(xfs_dqflush_done);  DECLARE_EVENT_CLASS(xfs_loggrant_class, -	TP_PROTO(struct log *log, struct xlog_ticket *tic), +	TP_PROTO(struct xlog *log, struct xlog_ticket *tic),  	TP_ARGS(log, tic),  	TP_STRUCT__entry(  		__field(dev_t, dev) @@ -830,7 +830,7 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,  #define DEFINE_LOGGRANT_EVENT(name) \  DEFINE_EVENT(xfs_loggrant_class, name, \ -	TP_PROTO(struct log *log, struct xlog_ticket *tic), \ +	TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \  	TP_ARGS(log, tic))  DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);  DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); @@ -1664,7 +1664,7 @@ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);  DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);  DECLARE_EVENT_CLASS(xfs_log_recover_item_class, -	TP_PROTO(struct log *log, struct xlog_recover *trans, +	TP_PROTO(struct xlog *log, struct xlog_recover *trans,  		struct xlog_recover_item *item, int pass),  	TP_ARGS(log, trans, item, pass),  	TP_STRUCT__entry( @@ -1698,7 +1698,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class,  #define DEFINE_LOG_RECOVER_ITEM(name) \  DEFINE_EVENT(xfs_log_recover_item_class, name, \ -	TP_PROTO(struct log *log, struct xlog_recover *trans, \ +	TP_PROTO(struct xlog *log, struct xlog_recover *trans, \  		struct xlog_recover_item *item, int pass), \  	TP_ARGS(log, trans, item, pass)) @@ -1709,7 +1709,7 @@ DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);  DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);  DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, -	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), +	TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f),  	TP_ARGS(log, buf_f),  	TP_STRUCT__entry(  		__field(dev_t, dev) @@ -1739,7 +1739,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,  #define DEFINE_LOG_RECOVER_BUF_ITEM(name) \  DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ -	TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ +	TP_PROTO(struct xlog *log, struct xfs_buf_log_format *buf_f), \  	TP_ARGS(log, buf_f))  DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); @@ -1752,7 +1752,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);  DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);  DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, -	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), +	TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f),  	TP_ARGS(log, in_f),  	TP_STRUCT__entry(  		__field(dev_t, dev) @@ -1790,7 +1790,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,  )  #define DEFINE_LOG_RECOVER_INO_ITEM(name) \  DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ -	TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ +	TP_PROTO(struct xlog *log, struct xfs_inode_log_format *in_f), \  	TP_ARGS(log, in_f))  DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);  |