diff options
Diffstat (limited to 'mm/huge_memory.c')
| -rw-r--r-- | mm/huge_memory.c | 72 | 
1 files changed, 42 insertions, 30 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e187454d82f..dbe99a5f207 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)  static inline struct page *alloc_hugepage_vma(int defrag,  					      struct vm_area_struct *vma, -					      unsigned long haddr) +					      unsigned long haddr, int nd)  {  	return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), -			       HPAGE_PMD_ORDER, vma, haddr); +			       HPAGE_PMD_ORDER, vma, haddr, nd);  }  #ifndef CONFIG_NUMA @@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,  		if (unlikely(khugepaged_enter(vma)))  			return VM_FAULT_OOM;  		page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), -					  vma, haddr); +					  vma, haddr, numa_node_id());  		if (unlikely(!page))  			goto out;  		if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { @@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,  	}  	for (i = 0; i < HPAGE_PMD_NR; i++) { -		pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, -					  vma, address); +		pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, +					       vma, address, page_to_nid(page));  		if (unlikely(!pages[i] ||  			     mem_cgroup_newpage_charge(pages[i], mm,  						       GFP_KERNEL))) { @@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,  	if (transparent_hugepage_enabled(vma) &&  	    !transparent_hugepage_debug_cow())  		new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), -					      vma, haddr); +					      vma, haddr, numa_node_id());  	else  		new_page = NULL; @@ -1162,7 +1162,12 @@ static void __split_huge_page_refcount(struct page *page)  		/* after clearing PageTail the gup refcount can be released */  		smp_mb(); -		page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; +		/* +		 * retain hwpoison flag of the poisoned tail page: +		 *   fix for the unsuitable process killed on Guest Machine(KVM) +		 *   by the memory-failure. +		 */ +		page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;  		page_tail->flags |= (page->flags &  				     ((1L << PG_referenced) |  				      (1L << PG_swapbacked) | @@ -1740,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,  static void collapse_huge_page(struct mm_struct *mm,  			       unsigned long address,  			       struct page **hpage, -			       struct vm_area_struct *vma) +			       struct vm_area_struct *vma, +			       int node)  {  	pgd_t *pgd;  	pud_t *pud; @@ -1768,7 +1774,8 @@ static void collapse_huge_page(struct mm_struct *mm,  	 * mmap_sem in read mode is good idea also to allow greater  	 * scalability.  	 */ -	new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); +	new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address, +				      node);  	if (unlikely(!new_page)) {  		up_read(&mm->mmap_sem);  		*hpage = ERR_PTR(-ENOMEM); @@ -1806,6 +1813,8 @@ static void collapse_huge_page(struct mm_struct *mm,  	/* VM_PFNMAP vmas may have vm_ops null but vm_file set */  	if (!vma->anon_vma || vma->vm_ops || vma->vm_file)  		goto out; +	if (is_vma_temporary_stack(vma)) +		goto out;  	VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));  	pgd = pgd_offset(mm, address); @@ -1847,7 +1856,6 @@ static void collapse_huge_page(struct mm_struct *mm,  		set_pmd_at(mm, address, pmd, _pmd);  		spin_unlock(&mm->page_table_lock);  		anon_vma_unlock(vma->anon_vma); -		mem_cgroup_uncharge_page(new_page);  		goto out;  	} @@ -1893,6 +1901,7 @@ out_up_write:  	return;  out: +	mem_cgroup_uncharge_page(new_page);  #ifdef CONFIG_NUMA  	put_page(new_page);  #endif @@ -1912,6 +1921,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,  	struct page *page;  	unsigned long _address;  	spinlock_t *ptl; +	int node = -1;  	VM_BUG_ON(address & ~HPAGE_PMD_MASK); @@ -1942,6 +1952,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,  		page = vm_normal_page(vma, _address, pteval);  		if (unlikely(!page))  			goto out_unmap; +		/* +		 * Chose the node of the first page. This could +		 * be more sophisticated and look at more pages, +		 * but isn't for now. +		 */ +		if (node == -1) +			node = page_to_nid(page);  		VM_BUG_ON(PageCompound(page));  		if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))  			goto out_unmap; @@ -1958,7 +1975,7 @@ out_unmap:  	pte_unmap_unlock(pte, ptl);  	if (ret)  		/* collapse_huge_page will return with the mmap_sem released */ -		collapse_huge_page(mm, address, hpage, vma); +		collapse_huge_page(mm, address, hpage, vma, node);  out:  	return ret;  } @@ -2027,32 +2044,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,  		if ((!(vma->vm_flags & VM_HUGEPAGE) &&  		     !khugepaged_always()) ||  		    (vma->vm_flags & VM_NOHUGEPAGE)) { +		skip:  			progress++;  			continue;  		} -  		/* VM_PFNMAP vmas may have vm_ops null but vm_file set */ -		if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { -			khugepaged_scan.address = vma->vm_end; -			progress++; -			continue; -		} +		if (!vma->anon_vma || vma->vm_ops || vma->vm_file) +			goto skip; +		if (is_vma_temporary_stack(vma)) +			goto skip; +  		VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));  		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;  		hend = vma->vm_end & HPAGE_PMD_MASK; -		if (hstart >= hend) { -			progress++; -			continue; -		} +		if (hstart >= hend) +			goto skip; +		if (khugepaged_scan.address > hend) +			goto skip;  		if (khugepaged_scan.address < hstart)  			khugepaged_scan.address = hstart; -		if (khugepaged_scan.address > hend) { -			khugepaged_scan.address = hend + HPAGE_PMD_SIZE; -			progress++; -			continue; -		} -		BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); +		VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);  		while (khugepaged_scan.address < hend) {  			int ret; @@ -2081,7 +2093,7 @@ breakouterloop:  breakouterloop_mmap_sem:  	spin_lock(&khugepaged_mm_lock); -	BUG_ON(khugepaged_scan.mm_slot != mm_slot); +	VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);  	/*  	 * Release the current mm_slot if this mm is about to die, or  	 * if we scanned all vmas of this mm. @@ -2236,9 +2248,9 @@ static int khugepaged(void *none)  	for (;;) {  		mutex_unlock(&khugepaged_mutex); -		BUG_ON(khugepaged_thread != current); +		VM_BUG_ON(khugepaged_thread != current);  		khugepaged_loop(); -		BUG_ON(khugepaged_thread != current); +		VM_BUG_ON(khugepaged_thread != current);  		mutex_lock(&khugepaged_mutex);  		if (!khugepaged_enabled())  |