diff options
| -rw-r--r-- | include/linux/hugetlb.h | 1 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 8 | ||||
| -rw-r--r-- | include/linux/poison.h | 9 | ||||
| -rw-r--r-- | include/linux/rmap.h | 5 | ||||
| -rw-r--r-- | mm/hugetlb.c | 44 | ||||
| -rw-r--r-- | mm/rmap.c | 59 | 
6 files changed, 114 insertions, 12 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d47a7c41745..e688fd89354 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -99,6 +99,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)  #define is_hugepage_only_range(mm, addr, len)	0  #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })  #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; }) +#define huge_pte_offset(mm, address)	0  #define hugetlb_change_protection(vma, address, end, newprot) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b2bd2bae977..a547d968917 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -282,10 +282,16 @@ static inline loff_t page_offset(struct page *page)  	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;  } +extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, +				     unsigned long address); +  static inline pgoff_t linear_page_index(struct vm_area_struct *vma,  					unsigned long address)  { -	pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT; +	pgoff_t pgoff; +	if (unlikely(is_vm_hugetlb_page(vma))) +		return linear_hugepage_index(vma, address); +	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;  	pgoff += vma->vm_pgoff;  	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);  } diff --git a/include/linux/poison.h b/include/linux/poison.h index 34066ffd893..2110a81c5e2 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -48,15 +48,6 @@  #define POISON_FREE	0x6b	/* for use-after-free poisoning */  #define	POISON_END	0xa5	/* end-byte of poisoning */ -/********** mm/hugetlb.c **********/ -/* - * Private mappings of hugetlb pages use this poisoned value for - * page->mapping. The core VM should not be doing anything with this mapping - * but futex requires the existence of some page->mapping value even though it - * is unused if PAGE_MAPPING_ANON is set. - */ -#define HUGETLB_POISON	((void *)(0x00300300 + POISON_POINTER_DELTA + PAGE_MAPPING_ANON)) -  /********** arch/$ARCH/mm/init.c **********/  #define POISON_FREE_INITMEM	0xcc diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 77216742c17..9d50e7ef5f5 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -140,6 +140,11 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon  void page_add_file_rmap(struct page *);  void page_remove_rmap(struct page *); +void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, +			    unsigned long); +void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, +				unsigned long); +  static inline void page_dup_rmap(struct page *page)  {  	atomic_inc(&page->_mapcount); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 54d42b009db..aa3c5173937 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -18,6 +18,7 @@  #include <linux/bootmem.h>  #include <linux/sysfs.h>  #include <linux/slab.h> +#include <linux/rmap.h>  #include <asm/page.h>  #include <asm/pgtable.h> @@ -220,6 +221,12 @@ static pgoff_t vma_hugecache_offset(struct hstate *h,  			(vma->vm_pgoff >> huge_page_order(h));  } +pgoff_t linear_hugepage_index(struct vm_area_struct *vma, +				     unsigned long address) +{ +	return vma_hugecache_offset(hstate_vma(vma), vma, address); +} +  /*   * Return the size of the pages allocated when backing a VMA. In the majority   * cases this will be same size as used by the page table entries. @@ -552,6 +559,7 @@ static void free_huge_page(struct page *page)  	set_page_private(page, 0);  	page->mapping = NULL;  	BUG_ON(page_count(page)); +	BUG_ON(page_mapcount(page));  	INIT_LIST_HEAD(&page->lru);  	spin_lock(&hugetlb_lock); @@ -2129,6 +2137,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,  			entry = huge_ptep_get(src_pte);  			ptepage = pte_page(entry);  			get_page(ptepage); +			page_dup_rmap(ptepage);  			set_huge_pte_at(dst, addr, dst_pte, entry);  		}  		spin_unlock(&src->page_table_lock); @@ -2207,6 +2216,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,  	flush_tlb_range(vma, start, end);  	mmu_notifier_invalidate_range_end(mm, start, end);  	list_for_each_entry_safe(page, tmp, &page_list, lru) { +		page_remove_rmap(page);  		list_del(&page->lru);  		put_page(page);  	} @@ -2272,6 +2282,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,  	return 1;  } +/* + * Hugetlb_cow() should be called with page lock of the original hugepage held. + */  static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,  			unsigned long address, pte_t *ptep, pte_t pte,  			struct page *pagecache_page) @@ -2286,8 +2299,11 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,  retry_avoidcopy:  	/* If no-one else is actually using this page, avoid the copy  	 * and just make the page writable */ -	avoidcopy = (page_count(old_page) == 1); +	avoidcopy = (page_mapcount(old_page) == 1);  	if (avoidcopy) { +		if (!trylock_page(old_page)) +			if (PageAnon(old_page)) +				page_move_anon_rmap(old_page, vma, address);  		set_huge_ptep_writable(vma, address, ptep);  		return 0;  	} @@ -2338,6 +2354,13 @@ retry_avoidcopy:  		return -PTR_ERR(new_page);  	} +	/* +	 * When the original hugepage is shared one, it does not have +	 * anon_vma prepared. +	 */ +	if (unlikely(anon_vma_prepare(vma))) +		return VM_FAULT_OOM; +  	copy_huge_page(new_page, old_page, address, vma);  	__SetPageUptodate(new_page); @@ -2352,6 +2375,8 @@ retry_avoidcopy:  		huge_ptep_clear_flush(vma, address, ptep);  		set_huge_pte_at(mm, address, ptep,  				make_huge_pte(vma, new_page, 1)); +		page_remove_rmap(old_page); +		hugepage_add_anon_rmap(new_page, vma, address);  		/* Make the old page be freed below */  		new_page = old_page;  	} @@ -2452,10 +2477,17 @@ retry:  			spin_lock(&inode->i_lock);  			inode->i_blocks += blocks_per_huge_page(h);  			spin_unlock(&inode->i_lock); +			page_dup_rmap(page);  		} else {  			lock_page(page); -			page->mapping = HUGETLB_POISON; +			if (unlikely(anon_vma_prepare(vma))) { +				ret = VM_FAULT_OOM; +				goto backout_unlocked; +			} +			hugepage_add_new_anon_rmap(page, vma, address);  		} +	} else { +		page_dup_rmap(page);  	}  	/* @@ -2507,6 +2539,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,  	pte_t *ptep;  	pte_t entry;  	int ret; +	struct page *page = NULL;  	struct page *pagecache_page = NULL;  	static DEFINE_MUTEX(hugetlb_instantiation_mutex);  	struct hstate *h = hstate_vma(vma); @@ -2548,6 +2581,11 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,  								vma, address);  	} +	if (!pagecache_page) { +		page = pte_page(entry); +		lock_page(page); +	} +  	spin_lock(&mm->page_table_lock);  	/* Check for a racing update before calling hugetlb_cow */  	if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) @@ -2573,6 +2611,8 @@ out_page_table_lock:  	if (pagecache_page) {  		unlock_page(pagecache_page);  		put_page(pagecache_page); +	} else { +		unlock_page(page);  	}  out_mutex: diff --git a/mm/rmap.c b/mm/rmap.c index 38a336e2eea..0ad53572eaf 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -56,6 +56,7 @@  #include <linux/memcontrol.h>  #include <linux/mmu_notifier.h>  #include <linux/migrate.h> +#include <linux/hugetlb.h>  #include <asm/tlbflush.h> @@ -326,6 +327,8 @@ vma_address(struct page *page, struct vm_area_struct *vma)  	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);  	unsigned long address; +	if (unlikely(is_vm_hugetlb_page(vma))) +		pgoff = page->index << huge_page_order(page_hstate(page));  	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);  	if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {  		/* page should be within @vma mapping range */ @@ -369,6 +372,12 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,  	pte_t *pte;  	spinlock_t *ptl; +	if (unlikely(PageHuge(page))) { +		pte = huge_pte_offset(mm, address); +		ptl = &mm->page_table_lock; +		goto check; +	} +  	pgd = pgd_offset(mm, address);  	if (!pgd_present(*pgd))  		return NULL; @@ -389,6 +398,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,  	}  	ptl = pte_lockptr(mm, pmd); +check:  	spin_lock(ptl);  	if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {  		*ptlp = ptl; @@ -873,6 +883,12 @@ void page_remove_rmap(struct page *page)  		page_clear_dirty(page);  		set_page_dirty(page);  	} +	/* +	 * Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED +	 * and not charged by memcg for now. +	 */ +	if (unlikely(PageHuge(page))) +		return;  	if (PageAnon(page)) {  		mem_cgroup_uncharge_page(page);  		__dec_zone_page_state(page, NR_ANON_PAGES); @@ -1445,3 +1461,46 @@ int rmap_walk(struct page *page, int (*rmap_one)(struct page *,  		return rmap_walk_file(page, rmap_one, arg);  }  #endif /* CONFIG_MIGRATION */ + +#ifdef CONFIG_HUGETLBFS +/* + * The following three functions are for anonymous (private mapped) hugepages. + * Unlike common anonymous pages, anonymous hugepages have no accounting code + * and no lru code, because we handle hugepages differently from common pages. + */ +static void __hugepage_set_anon_rmap(struct page *page, +	struct vm_area_struct *vma, unsigned long address, int exclusive) +{ +	struct anon_vma *anon_vma = vma->anon_vma; +	BUG_ON(!anon_vma); +	if (!exclusive) { +		struct anon_vma_chain *avc; +		avc = list_entry(vma->anon_vma_chain.prev, +				 struct anon_vma_chain, same_vma); +		anon_vma = avc->anon_vma; +	} +	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; +	page->mapping = (struct address_space *) anon_vma; +	page->index = linear_page_index(vma, address); +} + +void hugepage_add_anon_rmap(struct page *page, +			    struct vm_area_struct *vma, unsigned long address) +{ +	struct anon_vma *anon_vma = vma->anon_vma; +	int first; +	BUG_ON(!anon_vma); +	BUG_ON(address < vma->vm_start || address >= vma->vm_end); +	first = atomic_inc_and_test(&page->_mapcount); +	if (first) +		__hugepage_set_anon_rmap(page, vma, address, 0); +} + +void hugepage_add_new_anon_rmap(struct page *page, +			struct vm_area_struct *vma, unsigned long address) +{ +	BUG_ON(address < vma->vm_start || address >= vma->vm_end); +	atomic_set(&page->_mapcount, 0); +	__hugepage_set_anon_rmap(page, vma, address, 1); +} +#endif /* CONFIG_HUGETLBFS */  |