diff options
Diffstat (limited to 'mm/shmem.c')
| -rw-r--r-- | mm/shmem.c | 148 | 
1 files changed, 83 insertions, 65 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 8fa27e4e582..ba4ad28b7db 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_  static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)  { -	struct inode *inode; +	struct address_space *mapping;  	unsigned long idx;  	unsigned long size;  	unsigned long limit; @@ -875,8 +875,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s  	if (size > SHMEM_NR_DIRECT)  		size = SHMEM_NR_DIRECT;  	offset = shmem_find_swp(entry, ptr, ptr+size); -	if (offset >= 0) +	if (offset >= 0) { +		shmem_swp_balance_unmap();  		goto found; +	}  	if (!info->i_indirect)  		goto lost2; @@ -917,6 +919,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s  			shmem_swp_unmap(ptr);  			if (offset >= 0) {  				shmem_dir_unmap(dir); +				ptr = shmem_swp_map(subdir);  				goto found;  			}  		} @@ -928,8 +931,7 @@ lost2:  	return 0;  found:  	idx += offset; -	inode = igrab(&info->vfs_inode); -	spin_unlock(&info->lock); +	ptr += offset;  	/*  	 * Move _head_ to start search for next from here. @@ -940,37 +942,18 @@ found:  	 */  	if (shmem_swaplist.next != &info->swaplist)  		list_move_tail(&shmem_swaplist, &info->swaplist); -	mutex_unlock(&shmem_swaplist_mutex); -	error = 1; -	if (!inode) -		goto out;  	/* -	 * Charge page using GFP_KERNEL while we can wait. -	 * Charged back to the user(not to caller) when swap account is used. -	 * add_to_page_cache() will be called with GFP_NOWAIT. +	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist, +	 * but also to hold up shmem_evict_inode(): so inode cannot be freed +	 * beneath us (pagelock doesn't help until the page is in pagecache).  	 */ -	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); -	if (error) -		goto out; -	error = radix_tree_preload(GFP_KERNEL); -	if (error) { -		mem_cgroup_uncharge_cache_page(page); -		goto out; -	} -	error = 1; - -	spin_lock(&info->lock); -	ptr = shmem_swp_entry(info, idx, NULL); -	if (ptr && ptr->val == entry.val) { -		error = add_to_page_cache_locked(page, inode->i_mapping, -						idx, GFP_NOWAIT); -		/* does mem_cgroup_uncharge_cache_page on error */ -	} else	/* we must compensate for our precharge above */ -		mem_cgroup_uncharge_cache_page(page); +	mapping = info->vfs_inode.i_mapping; +	error = add_to_page_cache_locked(page, mapping, idx, GFP_NOWAIT); +	/* which does mem_cgroup_uncharge_cache_page on error */  	if (error == -EEXIST) { -		struct page *filepage = find_get_page(inode->i_mapping, idx); +		struct page *filepage = find_get_page(mapping, idx);  		error = 1;  		if (filepage) {  			/* @@ -990,14 +973,8 @@ found:  		swap_free(entry);  		error = 1;	/* not an error, but entry was found */  	} -	if (ptr) -		shmem_swp_unmap(ptr); +	shmem_swp_unmap(ptr);  	spin_unlock(&info->lock); -	radix_tree_preload_end(); -out: -	unlock_page(page); -	page_cache_release(page); -	iput(inode);		/* allows for NULL */  	return error;  } @@ -1009,6 +986,26 @@ int shmem_unuse(swp_entry_t entry, struct page *page)  	struct list_head *p, *next;  	struct shmem_inode_info *info;  	int found = 0; +	int error; + +	/* +	 * Charge page using GFP_KERNEL while we can wait, before taking +	 * the shmem_swaplist_mutex which might hold up shmem_writepage(). +	 * Charged back to the user (not to caller) when swap account is used. +	 * add_to_page_cache() will be called with GFP_NOWAIT. +	 */ +	error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); +	if (error) +		goto out; +	/* +	 * Try to preload while we can wait, to not make a habit of +	 * draining atomic reserves; but don't latch on to this cpu, +	 * it's okay if sometimes we get rescheduled after this. +	 */ +	error = radix_tree_preload(GFP_KERNEL); +	if (error) +		goto uncharge; +	radix_tree_preload_end();  	mutex_lock(&shmem_swaplist_mutex);  	list_for_each_safe(p, next, &shmem_swaplist) { @@ -1016,17 +1013,19 @@ int shmem_unuse(swp_entry_t entry, struct page *page)  		found = shmem_unuse_inode(info, entry, page);  		cond_resched();  		if (found) -			goto out; +			break;  	}  	mutex_unlock(&shmem_swaplist_mutex); -	/* -	 * Can some race bring us here?  We've been holding page lock, -	 * so I think not; but would rather try again later than BUG() -	 */ + +uncharge: +	if (!found) +		mem_cgroup_uncharge_cache_page(page); +	if (found < 0) +		error = found; +out:  	unlock_page(page);  	page_cache_release(page); -out: -	return (found < 0) ? found : 0; +	return error;  }  /* @@ -1064,7 +1063,25 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)  	else  		swap.val = 0; +	/* +	 * Add inode to shmem_unuse()'s list of swapped-out inodes, +	 * if it's not already there.  Do it now because we cannot take +	 * mutex while holding spinlock, and must do so before the page +	 * is moved to swap cache, when its pagelock no longer protects +	 * the inode from eviction.  But don't unlock the mutex until +	 * we've taken the spinlock, because shmem_unuse_inode() will +	 * prune a !swapped inode from the swaplist under both locks. +	 */ +	if (swap.val) { +		mutex_lock(&shmem_swaplist_mutex); +		if (list_empty(&info->swaplist)) +			list_add_tail(&info->swaplist, &shmem_swaplist); +	} +  	spin_lock(&info->lock); +	if (swap.val) +		mutex_unlock(&shmem_swaplist_mutex); +  	if (index >= info->next_index) {  		BUG_ON(!(info->flags & SHMEM_TRUNCATE));  		goto unlock; @@ -1084,21 +1101,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)  		delete_from_page_cache(page);  		shmem_swp_set(info, entry, swap.val);  		shmem_swp_unmap(entry); -		if (list_empty(&info->swaplist)) -			inode = igrab(inode); -		else -			inode = NULL;  		spin_unlock(&info->lock);  		swap_shmem_alloc(swap);  		BUG_ON(page_mapped(page));  		swap_writepage(page, wbc); -		if (inode) { -			mutex_lock(&shmem_swaplist_mutex); -			/* move instead of add in case we're racing */ -			list_move_tail(&info->swaplist, &shmem_swaplist); -			mutex_unlock(&shmem_swaplist_mutex); -			iput(inode); -		}  		return 0;  	} @@ -1400,20 +1406,14 @@ repeat:  		if (sbinfo->max_blocks) {  			if (percpu_counter_compare(&sbinfo->used_blocks,  						sbinfo->max_blocks) >= 0 || -			    shmem_acct_block(info->flags)) { -				spin_unlock(&info->lock); -				error = -ENOSPC; -				goto failed; -			} +			    shmem_acct_block(info->flags)) +				goto nospace;  			percpu_counter_inc(&sbinfo->used_blocks);  			spin_lock(&inode->i_lock);  			inode->i_blocks += BLOCKS_PER_PAGE;  			spin_unlock(&inode->i_lock); -		} else if (shmem_acct_block(info->flags)) { -			spin_unlock(&info->lock); -			error = -ENOSPC; -			goto failed; -		} +		} else if (shmem_acct_block(info->flags)) +			goto nospace;  		if (!filepage) {  			int ret; @@ -1493,6 +1493,24 @@ done:  	error = 0;  	goto out; +nospace: +	/* +	 * Perhaps the page was brought in from swap between find_lock_page +	 * and taking info->lock?  We allow for that at add_to_page_cache_lru, +	 * but must also avoid reporting a spurious ENOSPC while working on a +	 * full tmpfs.  (When filepage has been passed in to shmem_getpage, it +	 * is already in page cache, which prevents this race from occurring.) +	 */ +	if (!filepage) { +		struct page *page = find_get_page(mapping, idx); +		if (page) { +			spin_unlock(&info->lock); +			page_cache_release(page); +			goto repeat; +		} +	} +	spin_unlock(&info->lock); +	error = -ENOSPC;  failed:  	if (*pagep != filepage) {  		unlock_page(filepage);  |