diff options
Diffstat (limited to 'fs/btrfs/ctree.c')
| -rw-r--r-- | fs/btrfs/ctree.c | 268 | 
1 files changed, 209 insertions, 59 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d2431284b91..0ad48e782d3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)  {  	int i;  	for (i = 0; i < BTRFS_MAX_LEVEL; i++) { -		if (p->nodes[i] && p->locks[i]) -			btrfs_set_lock_blocking(p->nodes[i]); +		if (!p->nodes[i] || !p->locks[i]) +			continue; +		btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); +		if (p->locks[i] == BTRFS_READ_LOCK) +			p->locks[i] = BTRFS_READ_LOCK_BLOCKING; +		else if (p->locks[i] == BTRFS_WRITE_LOCK) +			p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;  	}  } @@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p)   * for held   */  noinline void btrfs_clear_path_blocking(struct btrfs_path *p, -					struct extent_buffer *held) +					struct extent_buffer *held, int held_rw)  {  	int i; @@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,  	 * really sure by forcing the path to blocking before we clear  	 * the path blocking.  	 */ -	if (held) -		btrfs_set_lock_blocking(held); +	if (held) { +		btrfs_set_lock_blocking_rw(held, held_rw); +		if (held_rw == BTRFS_WRITE_LOCK) +			held_rw = BTRFS_WRITE_LOCK_BLOCKING; +		else if (held_rw == BTRFS_READ_LOCK) +			held_rw = BTRFS_READ_LOCK_BLOCKING; +	}  	btrfs_set_path_blocking(p);  #endif  	for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { -		if (p->nodes[i] && p->locks[i]) -			btrfs_clear_lock_blocking(p->nodes[i]); +		if (p->nodes[i] && p->locks[i]) { +			btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); +			if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) +				p->locks[i] = BTRFS_WRITE_LOCK; +			else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) +				p->locks[i] = BTRFS_READ_LOCK; +		}  	}  #ifdef CONFIG_DEBUG_LOCK_ALLOC  	if (held) -		btrfs_clear_lock_blocking(held); +		btrfs_clear_lock_blocking_rw(held, held_rw);  #endif  } @@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p)  		if (!p->nodes[i])  			continue;  		if (p->locks[i]) { -			btrfs_tree_unlock(p->nodes[i]); +			btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);  			p->locks[i] = 0;  		}  		free_extent_buffer(p->nodes[i]); @@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)  	return eb;  } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root.  A locked buffer + * is returned, with a reference held. + */ +struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) +{ +	struct extent_buffer *eb; + +	while (1) { +		eb = btrfs_root_node(root); +		btrfs_tree_read_lock(eb); +		if (eb == root->node) +			break; +		btrfs_tree_read_unlock(eb); +		free_extent_buffer(eb); +	} +	return eb; +} +  /* cowonly root (everything not a reference counted cow subvolume), just get   * put onto a simple dirty list.  transaction.c walks this to make sure they   * get properly updated on disk. @@ -862,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,  	mid = path->nodes[level]; -	WARN_ON(!path->locks[level]); +	WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && +		path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);  	WARN_ON(btrfs_header_generation(mid) != trans->transid);  	orig_ptr = btrfs_node_blockptr(mid, orig_slot); @@ -1360,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level,  		t = path->nodes[i];  		if (i >= lowest_unlock && i > skip_level && path->locks[i]) { -			btrfs_tree_unlock(t); +			btrfs_tree_unlock_rw(t, path->locks[i]);  			path->locks[i] = 0;  		}  	} @@ -1387,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)  			continue;  		if (!path->locks[i])  			continue; -		btrfs_tree_unlock(path->nodes[i]); +		btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);  		path->locks[i] = 0;  	}  } @@ -1436,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans,  			 * we can trust our generation number  			 */  			free_extent_buffer(tmp); +			btrfs_set_path_blocking(p); +  			tmp = read_tree_block(root, blocknr, blocksize, gen);  			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {  				*eb_ret = tmp; @@ -1491,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans,  static int  setup_nodes_for_search(struct btrfs_trans_handle *trans,  		       struct btrfs_root *root, struct btrfs_path *p, -		       struct extent_buffer *b, int level, int ins_len) +		       struct extent_buffer *b, int level, int ins_len, +		       int *write_lock_level)  {  	int ret;  	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=  	    BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {  		int sret; +		if (*write_lock_level < level + 1) { +			*write_lock_level = level + 1; +			btrfs_release_path(p); +			goto again; +		} +  		sret = reada_for_balance(root, p, level);  		if (sret)  			goto again;  		btrfs_set_path_blocking(p);  		sret = split_node(trans, root, p, level); -		btrfs_clear_path_blocking(p, NULL); +		btrfs_clear_path_blocking(p, NULL, 0);  		BUG_ON(sret > 0);  		if (sret) { @@ -1516,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,  		   BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {  		int sret; +		if (*write_lock_level < level + 1) { +			*write_lock_level = level + 1; +			btrfs_release_path(p); +			goto again; +		} +  		sret = reada_for_balance(root, p, level);  		if (sret)  			goto again;  		btrfs_set_path_blocking(p);  		sret = balance_level(trans, root, p, level); -		btrfs_clear_path_blocking(p, NULL); +		btrfs_clear_path_blocking(p, NULL, 0);  		if (sret) {  			ret = sret; @@ -1566,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root  	int err;  	int level;  	int lowest_unlock = 1; +	int root_lock; +	/* everything at write_lock_level or lower must be write locked */ +	int write_lock_level = 0;  	u8 lowest_level = 0;  	lowest_level = p->lowest_level;  	WARN_ON(lowest_level && ins_len > 0);  	WARN_ON(p->nodes[0] != NULL); -	if (ins_len < 0) +	if (ins_len < 0) {  		lowest_unlock = 2; +		/* when we are removing items, we might have to go up to level +		 * two as we update tree pointers  Make sure we keep write +		 * for those levels as well +		 */ +		write_lock_level = 2; +	} else if (ins_len > 0) { +		/* +		 * for inserting items, make sure we have a write lock on +		 * level 1 so we can update keys +		 */ +		write_lock_level = 1; +	} + +	if (!cow) +		write_lock_level = -1; + +	if (cow && (p->keep_locks || p->lowest_level)) +		write_lock_level = BTRFS_MAX_LEVEL; +  again: +	/* +	 * we try very hard to do read locks on the root +	 */ +	root_lock = BTRFS_READ_LOCK; +	level = 0;  	if (p->search_commit_root) { +		/* +		 * the commit roots are read only +		 * so we always do read locks +		 */  		b = root->commit_root;  		extent_buffer_get(b); +		level = btrfs_header_level(b);  		if (!p->skip_locking) -			btrfs_tree_lock(b); +			btrfs_tree_read_lock(b);  	} else { -		if (p->skip_locking) +		if (p->skip_locking) {  			b = btrfs_root_node(root); -		else -			b = btrfs_lock_root_node(root); +			level = btrfs_header_level(b); +		} else { +			/* we don't know the level of the root node +			 * until we actually have it read locked +			 */ +			b = btrfs_read_lock_root_node(root); +			level = btrfs_header_level(b); +			if (level <= write_lock_level) { +				/* whoops, must trade for write lock */ +				btrfs_tree_read_unlock(b); +				free_extent_buffer(b); +				b = btrfs_lock_root_node(root); +				root_lock = BTRFS_WRITE_LOCK; + +				/* the level might have changed, check again */ +				level = btrfs_header_level(b); +			} +		}  	} +	p->nodes[level] = b; +	if (!p->skip_locking) +		p->locks[level] = root_lock;  	while (b) {  		level = btrfs_header_level(b); @@ -1595,10 +1696,6 @@ again:  		 * setup the path here so we can release it under lock  		 * contention with the cow code  		 */ -		p->nodes[level] = b; -		if (!p->skip_locking) -			p->locks[level] = 1; -  		if (cow) {  			/*  			 * if we don't really need to cow this block @@ -1610,6 +1707,16 @@ again:  			btrfs_set_path_blocking(p); +			/* +			 * must have write locks on this node and the +			 * parent +			 */ +			if (level + 1 > write_lock_level) { +				write_lock_level = level + 1; +				btrfs_release_path(p); +				goto again; +			} +  			err = btrfs_cow_block(trans, root, b,  					      p->nodes[level + 1],  					      p->slots[level + 1], &b); @@ -1622,10 +1729,7 @@ cow_done:  		BUG_ON(!cow && ins_len);  		p->nodes[level] = b; -		if (!p->skip_locking) -			p->locks[level] = 1; - -		btrfs_clear_path_blocking(p, NULL); +		btrfs_clear_path_blocking(p, NULL, 0);  		/*  		 * we have a lock on b and as long as we aren't changing @@ -1651,7 +1755,7 @@ cow_done:  			}  			p->slots[level] = slot;  			err = setup_nodes_for_search(trans, root, p, b, level, -						     ins_len); +					     ins_len, &write_lock_level);  			if (err == -EAGAIN)  				goto again;  			if (err) { @@ -1661,6 +1765,19 @@ cow_done:  			b = p->nodes[level];  			slot = p->slots[level]; +			/* +			 * slot 0 is special, if we change the key +			 * we have to update the parent pointer +			 * which means we must have a write lock +			 * on the parent +			 */ +			if (slot == 0 && cow && +			    write_lock_level < level + 1) { +				write_lock_level = level + 1; +				btrfs_release_path(p); +				goto again; +			} +  			unlock_up(p, level, lowest_unlock);  			if (level == lowest_level) { @@ -1679,23 +1796,42 @@ cow_done:  			}  			if (!p->skip_locking) { -				btrfs_clear_path_blocking(p, NULL); -				err = btrfs_try_spin_lock(b); - -				if (!err) { -					btrfs_set_path_blocking(p); -					btrfs_tree_lock(b); -					btrfs_clear_path_blocking(p, b); +				level = btrfs_header_level(b); +				if (level <= write_lock_level) { +					err = btrfs_try_tree_write_lock(b); +					if (!err) { +						btrfs_set_path_blocking(p); +						btrfs_tree_lock(b); +						btrfs_clear_path_blocking(p, b, +								  BTRFS_WRITE_LOCK); +					} +					p->locks[level] = BTRFS_WRITE_LOCK; +				} else { +					err = btrfs_try_tree_read_lock(b); +					if (!err) { +						btrfs_set_path_blocking(p); +						btrfs_tree_read_lock(b); +						btrfs_clear_path_blocking(p, b, +								  BTRFS_READ_LOCK); +					} +					p->locks[level] = BTRFS_READ_LOCK;  				} +				p->nodes[level] = b;  			}  		} else {  			p->slots[level] = slot;  			if (ins_len > 0 &&  			    btrfs_leaf_free_space(root, b) < ins_len) { +				if (write_lock_level < 1) { +					write_lock_level = 1; +					btrfs_release_path(p); +					goto again; +				} +  				btrfs_set_path_blocking(p);  				err = split_leaf(trans, root, key,  						 p, ins_len, ret == 0); -				btrfs_clear_path_blocking(p, NULL); +				btrfs_clear_path_blocking(p, NULL, 0);  				BUG_ON(err > 0);  				if (err) { @@ -1976,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,  	add_root_to_dirty_list(root);  	extent_buffer_get(c);  	path->nodes[level] = c; -	path->locks[level] = 1; +	path->locks[level] = BTRFS_WRITE_LOCK;  	path->slots[level] = 0;  	return 0;  } @@ -3819,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,  	WARN_ON(!path->keep_locks);  again: -	cur = btrfs_lock_root_node(root); +	cur = btrfs_read_lock_root_node(root);  	level = btrfs_header_level(cur);  	WARN_ON(path->nodes[level]);  	path->nodes[level] = cur; -	path->locks[level] = 1; +	path->locks[level] = BTRFS_READ_LOCK;  	if (btrfs_header_generation(cur) < min_trans) {  		ret = 1; @@ -3913,12 +4049,12 @@ find_next_key:  		cur = read_node_slot(root, cur, slot);  		BUG_ON(!cur); -		btrfs_tree_lock(cur); +		btrfs_tree_read_lock(cur); -		path->locks[level - 1] = 1; +		path->locks[level - 1] = BTRFS_READ_LOCK;  		path->nodes[level - 1] = cur;  		unlock_up(path, level, 1); -		btrfs_clear_path_blocking(path, NULL); +		btrfs_clear_path_blocking(path, NULL, 0);  	}  out:  	if (ret == 0) @@ -4034,6 +4170,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)  	int ret;  	int old_spinning = path->leave_spinning;  	int force_blocking = 0; +	int next_rw_lock = 0;  	nritems = btrfs_header_nritems(path->nodes[0]);  	if (nritems == 0) @@ -4051,6 +4188,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)  again:  	level = 1;  	next = NULL; +	next_rw_lock = 0;  	btrfs_release_path(path);  	path->keep_locks = 1; @@ -4096,11 +4234,12 @@ again:  		}  		if (next) { -			btrfs_tree_unlock(next); +			btrfs_tree_unlock_rw(next, next_rw_lock);  			free_extent_buffer(next);  		}  		next = c; +		next_rw_lock = path->locks[level];  		ret = read_block_for_search(NULL, root, path, &next, level,  					    slot, &key);  		if (ret == -EAGAIN) @@ -4112,15 +4251,22 @@ again:  		}  		if (!path->skip_locking) { -			ret = btrfs_try_spin_lock(next); +			ret = btrfs_try_tree_read_lock(next);  			if (!ret) {  				btrfs_set_path_blocking(path); -				btrfs_tree_lock(next); -				if (!force_blocking) -					btrfs_clear_path_blocking(path, next); +				btrfs_tree_read_lock(next); +				if (!force_blocking) { +					btrfs_clear_path_blocking(path, next, +							  BTRFS_READ_LOCK); +				} +			} +			if (force_blocking) { +				btrfs_set_lock_blocking_rw(next, +							   BTRFS_READ_LOCK); +				next_rw_lock = BTRFS_READ_LOCK_BLOCKING; +			} else { +				next_rw_lock = BTRFS_READ_LOCK;  			} -			if (force_blocking) -				btrfs_set_lock_blocking(next);  		}  		break;  	} @@ -4129,14 +4275,13 @@ again:  		level--;  		c = path->nodes[level];  		if (path->locks[level]) -			btrfs_tree_unlock(c); +			btrfs_tree_unlock_rw(c, path->locks[level]);  		free_extent_buffer(c);  		path->nodes[level] = next;  		path->slots[level] = 0;  		if (!path->skip_locking) -			path->locks[level] = 1; - +			path->locks[level] = next_rw_lock;  		if (!level)  			break; @@ -4151,16 +4296,21 @@ again:  		}  		if (!path->skip_locking) { -			btrfs_assert_tree_locked(path->nodes[level]); -			ret = btrfs_try_spin_lock(next); +			ret = btrfs_try_tree_read_lock(next);  			if (!ret) {  				btrfs_set_path_blocking(path); -				btrfs_tree_lock(next); +				btrfs_tree_read_lock(next);  				if (!force_blocking) -					btrfs_clear_path_blocking(path, next); +					btrfs_clear_path_blocking(path, next, +							  BTRFS_READ_LOCK); +			} +			if (force_blocking) { +				btrfs_set_lock_blocking_rw(next, +						   BTRFS_READ_LOCK); +				next_rw_lock = BTRFS_READ_LOCK_BLOCKING; +			} else { +				next_rw_lock = BTRFS_READ_LOCK;  			} -			if (force_blocking) -				btrfs_set_lock_blocking(next);  		}  	}  	ret = 0;  |