diff options
| author | David Woodhouse <David.Woodhouse@intel.com> | 2008-07-11 14:36:25 +0100 | 
|---|---|---|
| committer | David Woodhouse <David.Woodhouse@intel.com> | 2008-07-11 14:36:25 +0100 | 
| commit | a8931ef380c92d121ae74ecfb03b2d63f72eea6f (patch) | |
| tree | 980fb6b019e11e6cb1ece55b7faff184721a8053 /mm/memory.c | |
| parent | 90574d0a4d4b73308ae54a2a57a4f3f1fa98e984 (diff) | |
| parent | e5a5816f7875207cb0a0a7032e39a4686c5e10a4 (diff) | |
| download | olio-linux-3.10-a8931ef380c92d121ae74ecfb03b2d63f72eea6f.tar.xz olio-linux-3.10-a8931ef380c92d121ae74ecfb03b2d63f72eea6f.zip  | |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 111 | 
1 files changed, 99 insertions, 12 deletions
diff --git a/mm/memory.c b/mm/memory.c index bbab1e37055..2302d228fe0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -311,6 +311,21 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)  	if (!new)  		return -ENOMEM; +	/* +	 * Ensure all pte setup (eg. pte page lock and page clearing) are +	 * visible before the pte is made visible to other CPUs by being +	 * put into page tables. +	 * +	 * The other side of the story is the pointer chasing in the page +	 * table walking code (when walking the page table without locking; +	 * ie. most of the time). Fortunately, these data accesses consist +	 * of a chain of data-dependent loads, meaning most CPUs (alpha +	 * being the notable exception) will already guarantee loads are +	 * seen in-order. See the alpha page table accessors for the +	 * smp_read_barrier_depends() barriers in page table walking code. +	 */ +	smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ +  	spin_lock(&mm->page_table_lock);  	if (!pmd_present(*pmd)) {	/* Has another populated it ? */  		mm->nr_ptes++; @@ -329,6 +344,8 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)  	if (!new)  		return -ENOMEM; +	smp_wmb(); /* See comment in __pte_alloc */ +  	spin_lock(&init_mm.page_table_lock);  	if (!pmd_present(*pmd)) {	/* Has another populated it ? */  		pmd_populate_kernel(&init_mm, pmd, new); @@ -969,7 +986,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,  		goto no_page_table;  	pmd = pmd_offset(pud, address); -	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) +	if (pmd_none(*pmd))  		goto no_page_table;  	if (pmd_huge(*pmd)) { @@ -978,18 +995,19 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,  		goto out;  	} +	if (unlikely(pmd_bad(*pmd))) +		goto no_page_table; +  	ptep = pte_offset_map_lock(mm, pmd, address, &ptl); -	if (!ptep) -		goto out;  	pte = *ptep;  	if (!pte_present(pte)) -		goto unlock; +		goto no_page;  	if ((flags & FOLL_WRITE) && !pte_write(pte))  		goto unlock;  	page = vm_normal_page(vma, address, pte);  	if (unlikely(!page)) -		goto unlock; +		goto bad_page;  	if (flags & FOLL_GET)  		get_page(page); @@ -1004,6 +1022,15 @@ unlock:  out:  	return page; +bad_page: +	pte_unmap_unlock(ptep, ptl); +	return ERR_PTR(-EFAULT); + +no_page: +	pte_unmap_unlock(ptep, ptl); +	if (!pte_none(pte)) +		return page; +	/* Fall through to ZERO_PAGE handling */  no_page_table:  	/*  	 * When core dumping an enormous anonymous area that nobody @@ -1018,6 +1045,26 @@ no_page_table:  	return page;  } +/* Can we do the FOLL_ANON optimization? */ +static inline int use_zero_page(struct vm_area_struct *vma) +{ +	/* +	 * We don't want to optimize FOLL_ANON for make_pages_present() +	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED, +	 * we want to get the page from the page tables to make sure +	 * that we serialize and update with any other user of that +	 * mapping. +	 */ +	if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) +		return 0; +	/* +	 * And if we have a fault or a nopfn routine, it's not an +	 * anonymous region. +	 */ +	return !vma->vm_ops || +		(!vma->vm_ops->fault && !vma->vm_ops->nopfn); +} +  int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,  		unsigned long start, int len, int write, int force,  		struct page **pages, struct vm_area_struct **vmas) @@ -1092,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,  		foll_flags = FOLL_TOUCH;  		if (pages)  			foll_flags |= FOLL_GET; -		if (!write && !(vma->vm_flags & VM_LOCKED) && -		    (!vma->vm_ops || !vma->vm_ops->fault)) +		if (!write && use_zero_page(vma))  			foll_flags |= FOLL_ANON;  		do { @@ -1105,7 +1151,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,  			 * be processed until returning to user space.  			 */  			if (unlikely(test_tsk_thread_flag(tsk, TIF_MEMDIE))) -				return -ENOMEM; +				return i ? i : -ENOMEM;  			if (write)  				foll_flags |= FOLL_WRITE; @@ -1139,6 +1185,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,  				cond_resched();  			} +			if (IS_ERR(page)) +				return i ? i : PTR_ERR(page);  			if (pages) {  				pages[i] = page; @@ -1649,8 +1697,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,  	struct page *dirty_page = NULL;  	old_page = vm_normal_page(vma, address, orig_pte); -	if (!old_page) +	if (!old_page) { +		/* +		 * VM_MIXEDMAP !pfn_valid() case +		 * +		 * We should not cow pages in a shared writeable mapping. +		 * Just mark the pages writable as we can't do any dirty +		 * accounting on raw pfn maps. +		 */ +		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) == +				     (VM_WRITE|VM_SHARED)) +			goto reuse;  		goto gotten; +	}  	/*  	 * Take out anonymous pages first, anonymous shared vmas are @@ -1703,6 +1762,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,  	}  	if (reuse) { +reuse:  		flush_cache_page(vma, address, pte_pfn(orig_pte));  		entry = pte_mkyoung(orig_pte);  		entry = maybe_mkwrite(pte_mkdirty(entry), vma); @@ -1737,7 +1797,6 @@ gotten:  	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);  	if (likely(pte_same(*page_table, orig_pte))) {  		if (old_page) { -			page_remove_rmap(old_page, vma);  			if (!PageAnon(old_page)) {  				dec_mm_counter(mm, file_rss);  				inc_mm_counter(mm, anon_rss); @@ -1759,6 +1818,32 @@ gotten:  		lru_cache_add_active(new_page);  		page_add_new_anon_rmap(new_page, vma, address); +		if (old_page) { +			/* +			 * Only after switching the pte to the new page may +			 * we remove the mapcount here. Otherwise another +			 * process may come and find the rmap count decremented +			 * before the pte is switched to the new page, and +			 * "reuse" the old page writing into it while our pte +			 * here still points into it and can be read by other +			 * threads. +			 * +			 * The critical issue is to order this +			 * page_remove_rmap with the ptp_clear_flush above. +			 * Those stores are ordered by (if nothing else,) +			 * the barrier present in the atomic_add_negative +			 * in page_remove_rmap. +			 * +			 * Then the TLB flush in ptep_clear_flush ensures that +			 * no process can access the old page before the +			 * decremented mapcount is visible. And the old page +			 * cannot be reused until after the decremented +			 * mapcount is visible. So transitively, TLBs to +			 * old page will be flushed before it can be reused. +			 */ +			page_remove_rmap(old_page, vma); +		} +  		/* Free the old page.. */  		new_page = old_page;  		ret |= VM_FAULT_WRITE; @@ -2275,8 +2360,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	vmf.flags = flags;  	vmf.page = NULL; -	BUG_ON(vma->vm_flags & VM_PFNMAP); -  	ret = vma->vm_ops->fault(vma, &vmf);  	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))  		return ret; @@ -2616,6 +2699,8 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)  	if (!new)  		return -ENOMEM; +	smp_wmb(); /* See comment in __pte_alloc */ +  	spin_lock(&mm->page_table_lock);  	if (pgd_present(*pgd))		/* Another has populated it */  		pud_free(mm, new); @@ -2637,6 +2722,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)  	if (!new)  		return -ENOMEM; +	smp_wmb(); /* See comment in __pte_alloc */ +  	spin_lock(&mm->page_table_lock);  #ifndef __ARCH_HAS_4LEVEL_HACK  	if (pud_present(*pud))		/* Another has populated it */  |