diff options
| -rw-r--r-- | fs/btrfs/backref.c | 15 | ||||
| -rw-r--r-- | fs/btrfs/ctree.c | 60 | ||||
| -rw-r--r-- | fs/btrfs/disk-io.c | 34 | ||||
| -rw-r--r-- | fs/btrfs/extent-tree.c | 11 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 14 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 13 | ||||
| -rw-r--r-- | fs/btrfs/free-space-cache.c | 143 | ||||
| -rw-r--r-- | fs/btrfs/inode.c | 57 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.h | 2 | ||||
| -rw-r--r-- | fs/btrfs/super.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/tree-log.c | 6 | ||||
| -rw-r--r-- | fs/btrfs/volumes.c | 95 | ||||
| -rw-r--r-- | fs/btrfs/volumes.h | 3 | 
13 files changed, 257 insertions, 200 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7301cdb4b2c..a383c18e74e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,  		goto out;  	eb = path->nodes[level]; -	if (!eb) { -		WARN_ON(1); -		ret = 1; -		goto out; +	while (!eb) { +		if (!level) { +			WARN_ON(1); +			ret = 1; +			goto out; +		} +		level--; +		eb = path->nodes[level];  	}  	ret = add_all_parents(root, path, parents, level, &ref->key_for_search, @@ -835,6 +839,7 @@ again:  			}  			ret = __add_delayed_refs(head, delayed_ref_seq,  						 &prefs_delayed); +			mutex_unlock(&head->mutex);  			if (ret) {  				spin_unlock(&delayed_refs->lock);  				goto out; @@ -928,8 +933,6 @@ again:  	}  out: -	if (head) -		mutex_unlock(&head->mutex);  	btrfs_free_path(path);  	while (!list_empty(&prefs)) {  		ref = list_first_entry(&prefs, struct __prelim_ref, list); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 15cbc2bf4ff..8206b390058 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,  		if (!looped && !tm)  			return 0;  		/* -		 * we must have key remove operations in the log before the -		 * replace operation. +		 * if there are no tree operation for the oldest root, we simply +		 * return it. this should only happen if that (old) root is at +		 * level 0.  		 */ -		BUG_ON(!tm); +		if (!tm) +			break; +		/* +		 * if there's an operation that's not a root replacement, we +		 * found the oldest version of our root. normally, we'll find a +		 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. +		 */  		if (tm->op != MOD_LOG_ROOT_REPLACE)  			break; @@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,  						      tm->generation);  			break;  		case MOD_LOG_KEY_ADD: -			if (tm->slot != n - 1) { -				o_dst = btrfs_node_key_ptr_offset(tm->slot); -				o_src = btrfs_node_key_ptr_offset(tm->slot + 1); -				memmove_extent_buffer(eb, o_dst, o_src, p_size); -			} +			/* if a move operation is needed it's in the log */  			n--;  			break;  		case MOD_LOG_MOVE_KEYS: @@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)  	}  	tm = tree_mod_log_search(root->fs_info, logical, time_seq); -	/* -	 * there was an item in the log when __tree_mod_log_oldest_root -	 * returned. this one must not go away, because the time_seq passed to -	 * us must be blocking its removal. -	 */ -	BUG_ON(!tm); -  	if (old_root) -		eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, -					       root->nodesize); +		eb = alloc_dummy_extent_buffer(logical, root->nodesize);  	else  		eb = btrfs_clone_extent_buffer(root->node);  	btrfs_tree_read_unlock(root->node); @@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)  		btrfs_set_header_level(eb, old_root->level);  		btrfs_set_header_generation(eb, old_generation);  	} -	__tree_mod_log_rewind(eb, time_seq, tm); +	if (tm) +		__tree_mod_log_rewind(eb, time_seq, tm); +	else +		WARN_ON(btrfs_header_level(eb) != 0);  	extent_buffer_get(eb);  	return eb; @@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,  static void insert_ptr(struct btrfs_trans_handle *trans,  		       struct btrfs_root *root, struct btrfs_path *path,  		       struct btrfs_disk_key *key, u64 bytenr, -		       int slot, int level, int tree_mod_log) +		       int slot, int level)  {  	struct extent_buffer *lower;  	int nritems; @@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,  	BUG_ON(slot > nritems);  	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));  	if (slot != nritems) { -		if (tree_mod_log && level) +		if (level)  			tree_mod_log_eb_move(root->fs_info, lower, slot + 1,  					     slot, nritems - slot);  		memmove_extent_buffer(lower, @@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,  			      btrfs_node_key_ptr_offset(slot),  			      (nritems - slot) * sizeof(struct btrfs_key_ptr));  	} -	if (tree_mod_log && level) { +	if (level) {  		ret = tree_mod_log_insert_key(root->fs_info, lower, slot,  					      MOD_LOG_KEY_ADD);  		BUG_ON(ret < 0); @@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,  	btrfs_mark_buffer_dirty(split);  	insert_ptr(trans, root, path, &disk_key, split->start, -		   path->slots[level + 1] + 1, level + 1, 1); +		   path->slots[level + 1] + 1, level + 1);  	if (path->slots[level] >= mid) {  		path->slots[level] -= mid; @@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,  	btrfs_set_header_nritems(l, mid);  	btrfs_item_key(right, &disk_key, 0);  	insert_ptr(trans, root, path, &disk_key, right->start, -		   path->slots[1] + 1, 1, 0); +		   path->slots[1] + 1, 1);  	btrfs_mark_buffer_dirty(right);  	btrfs_mark_buffer_dirty(l); @@ -3848,7 +3846,7 @@ again:  		if (mid <= slot) {  			btrfs_set_header_nritems(right, 0);  			insert_ptr(trans, root, path, &disk_key, right->start, -				   path->slots[1] + 1, 1, 0); +				   path->slots[1] + 1, 1);  			btrfs_tree_unlock(path->nodes[0]);  			free_extent_buffer(path->nodes[0]);  			path->nodes[0] = right; @@ -3857,7 +3855,7 @@ again:  		} else {  			btrfs_set_header_nritems(right, 0);  			insert_ptr(trans, root, path, &disk_key, right->start, -					  path->slots[1], 1, 0); +					  path->slots[1], 1);  			btrfs_tree_unlock(path->nodes[0]);  			free_extent_buffer(path->nodes[0]);  			path->nodes[0] = right; @@ -5121,6 +5119,18 @@ again:  		if (!path->skip_locking) {  			ret = btrfs_try_tree_read_lock(next); +			if (!ret && time_seq) { +				/* +				 * If we don't get the lock, we may be racing +				 * with push_leaf_left, holding that lock while +				 * itself waiting for the leaf we've currently +				 * locked. To solve this situation, we give up +				 * on our lock and cycle. +				 */ +				btrfs_release_path(path); +				cond_resched(); +				goto again; +			}  			if (!ret) {  				btrfs_set_path_blocking(path);  				btrfs_tree_read_lock(next); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7b845ff4af9..2936ca49b3b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2354,12 +2354,17 @@ retry_root_backup:  				  BTRFS_CSUM_TREE_OBJECTID, csum_root);  	if (ret)  		goto recovery_tree_root; -  	csum_root->track_dirty = 1;  	fs_info->generation = generation;  	fs_info->last_trans_committed = generation; +	ret = btrfs_recover_balance(fs_info); +	if (ret) { +		printk(KERN_WARNING "btrfs: failed to recover balance\n"); +		goto fail_block_groups; +	} +  	ret = btrfs_init_dev_stats(fs_info);  	if (ret) {  		printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", @@ -2485,20 +2490,23 @@ retry_root_backup:  		goto fail_trans_kthread;  	} -	if (!(sb->s_flags & MS_RDONLY)) { -		down_read(&fs_info->cleanup_work_sem); -		err = btrfs_orphan_cleanup(fs_info->fs_root); -		if (!err) -			err = btrfs_orphan_cleanup(fs_info->tree_root); -		up_read(&fs_info->cleanup_work_sem); +	if (sb->s_flags & MS_RDONLY) +		return 0; -		if (!err) -			err = btrfs_recover_balance(fs_info->tree_root); +	down_read(&fs_info->cleanup_work_sem); +	if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || +	    (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { +		up_read(&fs_info->cleanup_work_sem); +		close_ctree(tree_root); +		return ret; +	} +	up_read(&fs_info->cleanup_work_sem); -		if (err) { -			close_ctree(tree_root); -			return err; -		} +	ret = btrfs_resume_balance_async(fs_info); +	if (ret) { +		printk(KERN_WARNING "btrfs: failed to resume balance\n"); +		close_ctree(tree_root); +		return ret;  	}  	return 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4b5a1e1bdef..6e1d36702ff 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2347,12 +2347,10 @@ next:  	return count;  } -  static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, -			unsigned long num_refs) +			       unsigned long num_refs, +			       struct list_head *first_seq)  { -	struct list_head *first_seq = delayed_refs->seq_head.next; -  	spin_unlock(&delayed_refs->lock);  	pr_debug("waiting for more refs (num %ld, first %p)\n",  		 num_refs, first_seq); @@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,  	struct btrfs_delayed_ref_root *delayed_refs;  	struct btrfs_delayed_ref_node *ref;  	struct list_head cluster; +	struct list_head *first_seq = NULL;  	int ret;  	u64 delayed_start;  	int run_all = count == (unsigned long)-1; @@ -2436,8 +2435,10 @@ again:  				 */  				consider_waiting = 1;  				num_refs = delayed_refs->num_entries; +				first_seq = root->fs_info->tree_mod_seq_list.next;  			} else { -				wait_for_more_refs(delayed_refs, num_refs); +				wait_for_more_refs(delayed_refs, +						   num_refs, first_seq);  				/*  				 * after waiting, things have changed. we  				 * dropped the lock and someone else might have diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaa12c1eb34..01c21b6c6d4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,  			     writepage_t writepage, void *data,  			     void (*flush_fn)(void *))  { +	struct inode *inode = mapping->host;  	int ret = 0;  	int done = 0;  	int nr_to_write_done = 0; @@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,  	int scanned = 0;  	int tag; +	/* +	 * We have to hold onto the inode so that ordered extents can do their +	 * work when the IO finishes.  The alternative to this is failing to add +	 * an ordered extent if the igrab() fails there and that is a huge pain +	 * to deal with, so instead just hold onto the inode throughout the +	 * writepages operation.  If it fails here we are freeing up the inode +	 * anyway and we'd rather not waste our time writing out stuff that is +	 * going to be truncated anyway. +	 */ +	if (!igrab(inode)) +		return 0; +  	pagevec_init(&pvec, 0);  	if (wbc->range_cyclic) {  		index = mapping->writeback_index; /* Start from prev offset */ @@ -3428,6 +3441,7 @@ retry:  		index = 0;  		goto retry;  	} +	btrfs_add_delayed_iput(inode);  	return ret;  } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 70dc8ca73e2..9aa01ec2138 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,  				    loff_t *ppos, size_t count, size_t ocount)  {  	struct file *file = iocb->ki_filp; -	struct inode *inode = fdentry(file)->d_inode;  	struct iov_iter i;  	ssize_t written;  	ssize_t written_buffered; @@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,  	written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,  					    count, ocount); -	/* -	 * the generic O_DIRECT will update in-memory i_size after the -	 * DIOs are done.  But our endio handlers that update the on -	 * disk i_size never update past the in memory i_size.  So we -	 * need one more update here to catch any additions to the -	 * file -	 */ -	if (inode->i_size != BTRFS_I(inode)->disk_i_size) { -		btrfs_ordered_update_i_size(inode, inode->i_size, NULL); -		mark_inode_dirty(inode); -	} -  	if (written < 0 || written == count)  		return written; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 81296c57405..6c4e2baa929 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1543,29 +1543,26 @@ again:  	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;  	/* -	 * XXX - this can go away after a few releases. -	 * -	 * since the only user of btrfs_remove_free_space is the tree logging -	 * stuff, and the only way to test that is under crash conditions, we -	 * want to have this debug stuff here just in case somethings not -	 * working.  Search the bitmap for the space we are trying to use to -	 * make sure its actually there.  If its not there then we need to stop -	 * because something has gone wrong. +	 * We need to search for bits in this bitmap.  We could only cover some +	 * of the extent in this bitmap thanks to how we add space, so we need +	 * to search for as much as it as we can and clear that amount, and then +	 * go searching for the next bit.  	 */  	search_start = *offset; -	search_bytes = *bytes; +	search_bytes = ctl->unit;  	search_bytes = min(search_bytes, end - search_start + 1);  	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);  	BUG_ON(ret < 0 || search_start != *offset); -	if (*offset > bitmap_info->offset && *offset + *bytes > end) { -		bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); -		*bytes -= end - *offset + 1; -		*offset = end + 1; -	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { -		bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); -		*bytes = 0; -	} +	/* We may have found more bits than what we need */ +	search_bytes = min(search_bytes, *bytes); + +	/* Cannot clear past the end of the bitmap */ +	search_bytes = min(search_bytes, end - search_start + 1); + +	bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); +	*offset += search_bytes; +	*bytes -= search_bytes;  	if (*bytes) {  		struct rb_node *next = rb_next(&bitmap_info->offset_index); @@ -1596,7 +1593,7 @@ again:  		 * everything over again.  		 */  		search_start = *offset; -		search_bytes = *bytes; +		search_bytes = ctl->unit;  		ret = search_bitmap(ctl, bitmap_info, &search_start,  				    &search_bytes);  		if (ret < 0 || search_start != *offset) @@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,  {  	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;  	struct btrfs_free_space *info; -	struct btrfs_free_space *next_info = NULL;  	int ret = 0;  	spin_lock(&ctl->tree_lock);  again: +	if (!bytes) +		goto out_lock; +  	info = tree_search_offset(ctl, offset, 0, 0);  	if (!info) {  		/* @@ -1905,88 +1904,48 @@ again:  		}  	} -	if (info->bytes < bytes && rb_next(&info->offset_index)) { -		u64 end; -		next_info = rb_entry(rb_next(&info->offset_index), -					     struct btrfs_free_space, -					     offset_index); - -		if (next_info->bitmap) -			end = next_info->offset + -			      BITS_PER_BITMAP * ctl->unit - 1; -		else -			end = next_info->offset + next_info->bytes; - -		if (next_info->bytes < bytes || -		    next_info->offset > offset || offset > end) { -			printk(KERN_CRIT "Found free space at %llu, size %llu," -			      " trying to use %llu\n", -			      (unsigned long long)info->offset, -			      (unsigned long long)info->bytes, -			      (unsigned long long)bytes); -			WARN_ON(1); -			ret = -EINVAL; -			goto out_lock; -		} - -		info = next_info; -	} - -	if (info->bytes == bytes) { +	if (!info->bitmap) {  		unlink_free_space(ctl, info); -		if (info->bitmap) { -			kfree(info->bitmap); -			ctl->total_bitmaps--; -		} -		kmem_cache_free(btrfs_free_space_cachep, info); -		ret = 0; -		goto out_lock; -	} +		if (offset == info->offset) { +			u64 to_free = min(bytes, info->bytes); -	if (!info->bitmap && info->offset == offset) { -		unlink_free_space(ctl, info); -		info->offset += bytes; -		info->bytes -= bytes; -		ret = link_free_space(ctl, info); -		WARN_ON(ret); -		goto out_lock; -	} +			info->bytes -= to_free; +			info->offset += to_free; +			if (info->bytes) { +				ret = link_free_space(ctl, info); +				WARN_ON(ret); +			} else { +				kmem_cache_free(btrfs_free_space_cachep, info); +			} -	if (!info->bitmap && info->offset <= offset && -	    info->offset + info->bytes >= offset + bytes) { -		u64 old_start = info->offset; -		/* -		 * we're freeing space in the middle of the info, -		 * this can happen during tree log replay -		 * -		 * first unlink the old info and then -		 * insert it again after the hole we're creating -		 */ -		unlink_free_space(ctl, info); -		if (offset + bytes < info->offset + info->bytes) { -			u64 old_end = info->offset + info->bytes; +			offset += to_free; +			bytes -= to_free; +			goto again; +		} else { +			u64 old_end = info->bytes + info->offset; -			info->offset = offset + bytes; -			info->bytes = old_end - info->offset; +			info->bytes = offset - info->offset;  			ret = link_free_space(ctl, info);  			WARN_ON(ret);  			if (ret)  				goto out_lock; -		} else { -			/* the hole we're creating ends at the end -			 * of the info struct, just free the info -			 */ -			kmem_cache_free(btrfs_free_space_cachep, info); -		} -		spin_unlock(&ctl->tree_lock); -		/* step two, insert a new info struct to cover -		 * anything before the hole -		 */ -		ret = btrfs_add_free_space(block_group, old_start, -					   offset - old_start); -		WARN_ON(ret); /* -ENOMEM */ -		goto out; +			/* Not enough bytes in this entry to satisfy us */ +			if (old_end < offset + bytes) { +				bytes -= old_end - offset; +				offset = old_end; +				goto again; +			} else if (old_end == offset + bytes) { +				/* all done */ +				goto out_lock; +			} +			spin_unlock(&ctl->tree_lock); + +			ret = btrfs_add_free_space(block_group, offset + bytes, +						   old_end - (offset + bytes)); +			WARN_ON(ret); +			goto out; +		}  	}  	ret = remove_from_bitmap(ctl, info, &offset, &bytes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d8bb0dbc494..a7d1921ac76 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)  	btrfs_wait_ordered_range(inode, 0, (u64)-1);  	if (root->fs_info->log_root_recovering) { -		BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, +		BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,  				 &BTRFS_I(inode)->runtime_flags));  		goto no_delete;  	} @@ -5876,8 +5876,17 @@ map:  	bh_result->b_size = len;  	bh_result->b_bdev = em->bdev;  	set_buffer_mapped(bh_result); -	if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) -		set_buffer_new(bh_result); +	if (create) { +		if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) +			set_buffer_new(bh_result); + +		/* +		 * Need to update the i_size under the extent lock so buffered +		 * readers will get the updated i_size when we unlock. +		 */ +		if (start + len > i_size_read(inode)) +			i_size_write(inode, start + len); +	}  	free_extent_map(em); @@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,  		 */  		ordered = btrfs_lookup_ordered_range(inode, lockstart,  						     lockend - lockstart + 1); -		if (!ordered) + +		/* +		 * We need to make sure there are no buffered pages in this +		 * range either, we could have raced between the invalidate in +		 * generic_file_direct_write and locking the extent.  The +		 * invalidate needs to happen so that reads after a write do not +		 * get stale data. +		 */ +		if (!ordered && (!writing || +		    !test_range_bit(&BTRFS_I(inode)->io_tree, +				    lockstart, lockend, EXTENT_UPTODATE, 0, +				    cached_state)))  			break; +  		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,  				     &cached_state, GFP_NOFS); -		btrfs_start_ordered_extent(inode, ordered, 1); -		btrfs_put_ordered_extent(ordered); + +		if (ordered) { +			btrfs_start_ordered_extent(inode, ordered, 1); +			btrfs_put_ordered_extent(ordered); +		} else { +			/* Screw you mmap */ +			ret = filemap_write_and_wait_range(file->f_mapping, +							   lockstart, +							   lockend); +			if (ret) +				goto out; + +			/* +			 * If we found a page that couldn't be invalidated just +			 * fall back to buffered. +			 */ +			ret = invalidate_inode_pages2_range(file->f_mapping, +					lockstart >> PAGE_CACHE_SHIFT, +					lockend >> PAGE_CACHE_SHIFT); +			if (ret) { +				if (ret == -EBUSY) +					ret = 0; +				goto out; +			} +		} +  		cond_resched();  	} diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497c530724c..e440aa653c3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {  #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)  #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \  				   struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)  #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)  #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \  			      struct btrfs_ioctl_scrub_args) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb9a4da069..e23991574fd 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)  		if (ret)  			goto restore; +		ret = btrfs_resume_balance_async(fs_info); +		if (ret) +			goto restore; +  		sb->s_flags &= ~MS_RDONLY;  	} diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2017d0ff511..8abeae4224f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,  	kfree(name);  	iput(inode); + +	btrfs_run_delayed_items(trans, root);  	return ret;  } @@ -895,6 +897,7 @@ again:  				ret = btrfs_unlink_inode(trans, root, dir,  							 inode, victim_name,  							 victim_name_len); +				btrfs_run_delayed_items(trans, root);  			}  			kfree(victim_name);  			ptr = (unsigned long)(victim_ref + 1) + victim_name_len; @@ -1475,6 +1478,9 @@ again:  			ret = btrfs_unlink_inode(trans, root, dir, inode,  						 name, name_len);  			BUG_ON(ret); + +			btrfs_run_delayed_items(trans, root); +  			kfree(name);  			iput(inode); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8a3d2594b80..ecaad40e7ef 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2845,31 +2845,48 @@ out:  static int balance_kthread(void *data)  { -	struct btrfs_balance_control *bctl = -			(struct btrfs_balance_control *)data; -	struct btrfs_fs_info *fs_info = bctl->fs_info; +	struct btrfs_fs_info *fs_info = data;  	int ret = 0;  	mutex_lock(&fs_info->volume_mutex);  	mutex_lock(&fs_info->balance_mutex); -	set_balance_control(bctl); - -	if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { -		printk(KERN_INFO "btrfs: force skipping balance\n"); -	} else { +	if (fs_info->balance_ctl) {  		printk(KERN_INFO "btrfs: continuing balance\n"); -		ret = btrfs_balance(bctl, NULL); +		ret = btrfs_balance(fs_info->balance_ctl, NULL);  	}  	mutex_unlock(&fs_info->balance_mutex);  	mutex_unlock(&fs_info->volume_mutex); +  	return ret;  } -int btrfs_recover_balance(struct btrfs_root *tree_root) +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)  {  	struct task_struct *tsk; + +	spin_lock(&fs_info->balance_lock); +	if (!fs_info->balance_ctl) { +		spin_unlock(&fs_info->balance_lock); +		return 0; +	} +	spin_unlock(&fs_info->balance_lock); + +	if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { +		printk(KERN_INFO "btrfs: force skipping balance\n"); +		return 0; +	} + +	tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); +	if (IS_ERR(tsk)) +		return PTR_ERR(tsk); + +	return 0; +} + +int btrfs_recover_balance(struct btrfs_fs_info *fs_info) +{  	struct btrfs_balance_control *bctl;  	struct btrfs_balance_item *item;  	struct btrfs_disk_balance_args disk_bargs; @@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)  	if (!path)  		return -ENOMEM; -	bctl = kzalloc(sizeof(*bctl), GFP_NOFS); -	if (!bctl) { -		ret = -ENOMEM; -		goto out; -	} -  	key.objectid = BTRFS_BALANCE_OBJECTID;  	key.type = BTRFS_BALANCE_ITEM_KEY;  	key.offset = 0; -	ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); +	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);  	if (ret < 0) -		goto out_bctl; +		goto out;  	if (ret > 0) { /* ret = -ENOENT; */  		ret = 0; -		goto out_bctl; +		goto out; +	} + +	bctl = kzalloc(sizeof(*bctl), GFP_NOFS); +	if (!bctl) { +		ret = -ENOMEM; +		goto out;  	}  	leaf = path->nodes[0];  	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); -	bctl->fs_info = tree_root->fs_info; -	bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; +	bctl->fs_info = fs_info; +	bctl->flags = btrfs_balance_flags(leaf, item); +	bctl->flags |= BTRFS_BALANCE_RESUME;  	btrfs_balance_data(leaf, item, &disk_bargs);  	btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); @@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)  	btrfs_balance_sys(leaf, item, &disk_bargs);  	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); -	tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); -	if (IS_ERR(tsk)) -		ret = PTR_ERR(tsk); -	else -		goto out; +	mutex_lock(&fs_info->volume_mutex); +	mutex_lock(&fs_info->balance_mutex); -out_bctl: -	kfree(bctl); +	set_balance_control(bctl); + +	mutex_unlock(&fs_info->balance_mutex); +	mutex_unlock(&fs_info->volume_mutex);  out:  	btrfs_free_path(path);  	return ret; @@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err)  			BUG_ON(stripe_index >= bbio->num_stripes);  			dev = bbio->stripes[stripe_index].dev; -			if (bio->bi_rw & WRITE) -				btrfs_dev_stat_inc(dev, -						   BTRFS_DEV_STAT_WRITE_ERRS); -			else -				btrfs_dev_stat_inc(dev, -						   BTRFS_DEV_STAT_READ_ERRS); -			if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) -				btrfs_dev_stat_inc(dev, -						   BTRFS_DEV_STAT_FLUSH_ERRS); -			btrfs_dev_stat_print_on_error(dev); +			if (dev->bdev) { +				if (bio->bi_rw & WRITE) +					btrfs_dev_stat_inc(dev, +						BTRFS_DEV_STAT_WRITE_ERRS); +				else +					btrfs_dev_stat_inc(dev, +						BTRFS_DEV_STAT_READ_ERRS); +				if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) +					btrfs_dev_stat_inc(dev, +						BTRFS_DEV_STAT_FLUSH_ERRS); +				btrfs_dev_stat_print_on_error(dev); +			}  		}  	} diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 74366f27a76..95f6637614d 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);  int btrfs_init_new_device(struct btrfs_root *root, char *path);  int btrfs_balance(struct btrfs_balance_control *bctl,  		  struct btrfs_ioctl_balance_args *bargs); -int btrfs_recover_balance(struct btrfs_root *tree_root); +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); +int btrfs_recover_balance(struct btrfs_fs_info *fs_info);  int btrfs_pause_balance(struct btrfs_fs_info *fs_info);  int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);  int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);  |