diff options
| -rw-r--r-- | Documentation/filesystems/vfat.txt | 11 | ||||
| -rw-r--r-- | Documentation/vm/hugetlbpage.txt | 10 | ||||
| -rw-r--r-- | arch/x86/mm/hugetlbpage.c | 21 | ||||
| -rw-r--r-- | drivers/block/cciss_scsi.c | 11 | ||||
| -rw-r--r-- | drivers/clocksource/cs5535-clockevt.c | 4 | ||||
| -rw-r--r-- | drivers/misc/sgi-xp/xpc_uv.c | 84 | ||||
| -rw-r--r-- | drivers/rapidio/devices/tsi721.c | 12 | ||||
| -rw-r--r-- | drivers/rtc/rtc-pcf2123.c | 2 | ||||
| -rw-r--r-- | drivers/rtc/rtc-rs5c348.c | 7 | ||||
| -rw-r--r-- | include/linux/compaction.h | 4 | ||||
| -rw-r--r-- | include/linux/string.h | 2 | ||||
| -rw-r--r-- | mm/compaction.c | 156 | ||||
| -rw-r--r-- | mm/internal.h | 1 | ||||
| -rw-r--r-- | mm/mmap.c | 2 | ||||
| -rw-r--r-- | mm/page_alloc.c | 38 | ||||
| -rwxr-xr-x | scripts/checkpatch.pl | 3 | 
16 files changed, 258 insertions, 110 deletions
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index ead764b2728..de1e6c4dccf 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -137,6 +137,17 @@ errors=panic|continue|remount-ro  		 without doing anything or remount the partition in  		 read-only mode (default behavior). +discard       -- If set, issues discard/TRIM commands to the block +		 device when blocks are freed. This is useful for SSD devices +		 and sparse/thinly-provisoned LUNs. + +nfs           -- This option maintains an index (cache) of directory +		 inodes by i_logstart which is used by the nfs-related code to +		 improve look-ups. + +		 Enable this only if you want to export the FAT filesystem +		 over NFS +  <bool>: 0,1,yes,no,true,false  TODO diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt index f8551b3879f..4ac359b7aa1 100644 --- a/Documentation/vm/hugetlbpage.txt +++ b/Documentation/vm/hugetlbpage.txt @@ -299,11 +299,17 @@ map_hugetlb.c.  *******************************************************************  /* - * hugepage-shm:  see Documentation/vm/hugepage-shm.c + * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c   */  *******************************************************************  /* - * hugepage-mmap:  see Documentation/vm/hugepage-mmap.c + * hugepage-shm:  see tools/testing/selftests/vm/hugepage-shm.c + */ + +******************************************************************* + +/* + * hugepage-mmap:  see tools/testing/selftests/vm/hugepage-mmap.c   */ diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f6679a7fb8c..b91e4851242 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -56,9 +56,16 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)  }  /* - * search for a shareable pmd page for hugetlb. + * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() + * and returns the corresponding pte. While this is not necessary for the + * !shared pmd case because we can allocate the pmd later as well, it makes the + * code much cleaner. pmd allocation is essential for the shared case because + * pud has to be populated inside the same i_mmap_mutex section - otherwise + * racing tasks could either miss the sharing (see huge_pte_offset) or select a + * bad pmd for sharing.   */ -static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) +static pte_t * +huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)  {  	struct vm_area_struct *vma = find_vma(mm, addr);  	struct address_space *mapping = vma->vm_file->f_mapping; @@ -68,9 +75,10 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)  	struct vm_area_struct *svma;  	unsigned long saddr;  	pte_t *spte = NULL; +	pte_t *pte;  	if (!vma_shareable(vma, addr)) -		return; +		return (pte_t *)pmd_alloc(mm, pud, addr);  	mutex_lock(&mapping->i_mmap_mutex);  	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { @@ -97,7 +105,9 @@ static void huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)  		put_page(virt_to_page(spte));  	spin_unlock(&mm->page_table_lock);  out: +	pte = (pte_t *)pmd_alloc(mm, pud, addr);  	mutex_unlock(&mapping->i_mmap_mutex); +	return pte;  }  /* @@ -142,8 +152,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,  		} else {  			BUG_ON(sz != PMD_SIZE);  			if (pud_none(*pud)) -				huge_pmd_share(mm, addr, pud); -			pte = (pte_t *) pmd_alloc(mm, pud, addr); +				pte = huge_pmd_share(mm, addr, pud); +			else +				pte = (pte_t *)pmd_alloc(mm, pud, addr);  		}  	}  	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index acda773b372..38aa6dda6b8 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -763,16 +763,7 @@ static void complete_scsi_command(CommandList_struct *c, int timeout,  		{  			case CMD_TARGET_STATUS:  				/* Pass it up to the upper layers... */ -				if( ei->ScsiStatus) -                		{ -#if 0 -                    			printk(KERN_WARNING "cciss: cmd %p " -						"has SCSI Status = %x\n", -						c, ei->ScsiStatus); -#endif -					cmd->result |= (ei->ScsiStatus << 1); -                		} -				else {  /* scsi status is zero??? How??? */ +				if (!ei->ScsiStatus) {  	/* Ordinarily, this case should never happen, but there is a bug  	   in some released firmware revisions that allows it to happen diff --git a/drivers/clocksource/cs5535-clockevt.c b/drivers/clocksource/cs5535-clockevt.c index 540795cd076..d9279385304 100644 --- a/drivers/clocksource/cs5535-clockevt.c +++ b/drivers/clocksource/cs5535-clockevt.c @@ -53,7 +53,7 @@ static struct cs5535_mfgpt_timer *cs5535_event_clock;  #define MFGPT_PERIODIC (MFGPT_HZ / HZ)  /* - * The MFPGT timers on the CS5536 provide us with suitable timers to use + * The MFGPT timers on the CS5536 provide us with suitable timers to use   * as clock event sources - not as good as a HPET or APIC, but certainly   * better than the PIT.  This isn't a general purpose MFGPT driver, but   * a simplified one designed specifically to act as a clock event source. @@ -144,7 +144,7 @@ static int __init cs5535_mfgpt_init(void)  	timer = cs5535_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);  	if (!timer) { -		printk(KERN_ERR DRV_NAME ": Could not allocate MFPGT timer\n"); +		printk(KERN_ERR DRV_NAME ": Could not allocate MFGPT timer\n");  		return -ENODEV;  	}  	cs5535_event_clock = timer; diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 87b251ab6ec..b9e2000969f 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -18,6 +18,8 @@  #include <linux/interrupt.h>  #include <linux/delay.h>  #include <linux/device.h> +#include <linux/cpu.h> +#include <linux/module.h>  #include <linux/err.h>  #include <linux/slab.h>  #include <asm/uv/uv_hub.h> @@ -59,6 +61,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv;  					 XPC_NOTIFY_MSG_SIZE_UV)  #define XPC_NOTIFY_IRQ_NAME		"xpc_notify" +static int xpc_mq_node = -1; +  static struct xpc_gru_mq_uv *xpc_activate_mq_uv;  static struct xpc_gru_mq_uv *xpc_notify_mq_uv; @@ -109,11 +113,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)  #if defined CONFIG_X86_64  	mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,  			UV_AFFINITY_CPU); -	if (mq->irq < 0) { -		dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", -			-mq->irq); +	if (mq->irq < 0)  		return mq->irq; -	}  	mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); @@ -238,8 +239,9 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name,  	mq->mmr_blade = uv_cpu_to_blade_id(cpu);  	nid = cpu_to_node(cpu); -	page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, -				pg_order); +	page = alloc_pages_exact_node(nid, +				      GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, +				      pg_order);  	if (page == NULL) {  		dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d "  			"bytes of memory on nid=%d for GRU mq\n", mq_size, nid); @@ -1731,9 +1733,50 @@ static struct xpc_arch_operations xpc_arch_ops_uv = {  	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,  }; +static int +xpc_init_mq_node(int nid) +{ +	int cpu; + +	get_online_cpus(); + +	for_each_cpu(cpu, cpumask_of_node(nid)) { +		xpc_activate_mq_uv = +			xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, +					     XPC_ACTIVATE_IRQ_NAME, +					     xpc_handle_activate_IRQ_uv); +		if (!IS_ERR(xpc_activate_mq_uv)) +			break; +	} +	if (IS_ERR(xpc_activate_mq_uv)) { +		put_online_cpus(); +		return PTR_ERR(xpc_activate_mq_uv); +	} + +	for_each_cpu(cpu, cpumask_of_node(nid)) { +		xpc_notify_mq_uv = +			xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, +					     XPC_NOTIFY_IRQ_NAME, +					     xpc_handle_notify_IRQ_uv); +		if (!IS_ERR(xpc_notify_mq_uv)) +			break; +	} +	if (IS_ERR(xpc_notify_mq_uv)) { +		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); +		put_online_cpus(); +		return PTR_ERR(xpc_notify_mq_uv); +	} + +	put_online_cpus(); +	return 0; +} +  int  xpc_init_uv(void)  { +	int nid; +	int ret = 0; +  	xpc_arch_ops = xpc_arch_ops_uv;  	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { @@ -1742,21 +1785,21 @@ xpc_init_uv(void)  		return -E2BIG;  	} -	xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, -						  XPC_ACTIVATE_IRQ_NAME, -						  xpc_handle_activate_IRQ_uv); -	if (IS_ERR(xpc_activate_mq_uv)) -		return PTR_ERR(xpc_activate_mq_uv); +	if (xpc_mq_node < 0) +		for_each_online_node(nid) { +			ret = xpc_init_mq_node(nid); -	xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, -						XPC_NOTIFY_IRQ_NAME, -						xpc_handle_notify_IRQ_uv); -	if (IS_ERR(xpc_notify_mq_uv)) { -		xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); -		return PTR_ERR(xpc_notify_mq_uv); -	} +			if (!ret) +				break; +		} +	else +		ret = xpc_init_mq_node(xpc_mq_node); -	return 0; +	if (ret < 0) +		dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", +			-ret); + +	return ret;  }  void @@ -1765,3 +1808,6 @@ xpc_exit_uv(void)  	xpc_destroy_gru_mq_uv(xpc_notify_mq_uv);  	xpc_destroy_gru_mq_uv(xpc_activate_mq_uv);  } + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 722246cf20a..5d44252b734 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -435,6 +435,9 @@ static void tsi721_db_dpc(struct work_struct *work)  				" info %4.4x\n", DBELL_SID(idb.bytes),  				DBELL_TID(idb.bytes), DBELL_INF(idb.bytes));  		} + +		wr_ptr = ioread32(priv->regs + +				  TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE;  	}  	iowrite32(rd_ptr & (IDB_QSIZE - 1), @@ -445,6 +448,10 @@ static void tsi721_db_dpc(struct work_struct *work)  	regval |= TSI721_SR_CHINT_IDBQRCV;  	iowrite32(regval,  		priv->regs + TSI721_SR_CHINTE(IDB_QUEUE)); + +	wr_ptr = ioread32(priv->regs + TSI721_IDQ_WP(IDB_QUEUE)) % IDB_QSIZE; +	if (wr_ptr != rd_ptr) +		schedule_work(&priv->idb_work);  }  /** @@ -2212,7 +2219,7 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,  				  const struct pci_device_id *id)  {  	struct tsi721_device *priv; -	int i, cap; +	int cap;  	int err;  	u32 regval; @@ -2232,12 +2239,15 @@ static int __devinit tsi721_probe(struct pci_dev *pdev,  	priv->pdev = pdev;  #ifdef DEBUG +	{ +	int i;  	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {  		dev_dbg(&pdev->dev, "res[%d] @ 0x%llx (0x%lx, 0x%lx)\n",  			i, (unsigned long long)pci_resource_start(pdev, i),  			(unsigned long)pci_resource_len(pdev, i),  			pci_resource_flags(pdev, i));  	} +	}  #endif  	/*  	 * Verify BAR configuration diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index 836118795c0..13e4df63974 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -43,6 +43,7 @@  #include <linux/rtc.h>  #include <linux/spi/spi.h>  #include <linux/module.h> +#include <linux/sysfs.h>  #define DRV_VERSION "0.6" @@ -292,6 +293,7 @@ static int __devinit pcf2123_probe(struct spi_device *spi)  	pdata->rtc = rtc;  	for (i = 0; i < 16; i++) { +		sysfs_attr_init(&pdata->regs[i].attr.attr);  		sprintf(pdata->regs[i].name, "%1x", i);  		pdata->regs[i].attr.attr.mode = S_IRUGO | S_IWUSR;  		pdata->regs[i].attr.attr.name = pdata->regs[i].name; diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c index 77074ccd285..fd5c7af04ae 100644 --- a/drivers/rtc/rtc-rs5c348.c +++ b/drivers/rtc/rtc-rs5c348.c @@ -122,9 +122,12 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)  	tm->tm_min = bcd2bin(rxbuf[RS5C348_REG_MINS] & RS5C348_MINS_MASK);  	tm->tm_hour = bcd2bin(rxbuf[RS5C348_REG_HOURS] & RS5C348_HOURS_MASK);  	if (!pdata->rtc_24h) { -		tm->tm_hour %= 12; -		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) +		if (rxbuf[RS5C348_REG_HOURS] & RS5C348_BIT_PM) { +			tm->tm_hour -= 20; +			tm->tm_hour %= 12;  			tm->tm_hour += 12; +		} else +			tm->tm_hour %= 12;  	}  	tm->tm_wday = bcd2bin(rxbuf[RS5C348_REG_WDAY] & RS5C348_WDAY_MASK);  	tm->tm_mday = bcd2bin(rxbuf[RS5C348_REG_DAY] & RS5C348_DAY_MASK); diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 133ddcf8339..ef658147e4e 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,  extern int fragmentation_index(struct zone *zone, unsigned int order);  extern unsigned long try_to_compact_pages(struct zonelist *zonelist,  			int order, gfp_t gfp_mask, nodemask_t *mask, -			bool sync); +			bool sync, bool *contended);  extern int compact_pgdat(pg_data_t *pgdat, int order);  extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -64,7 +64,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)  #else  static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,  			int order, gfp_t gfp_mask, nodemask_t *nodemask, -			bool sync) +			bool sync, bool *contended)  {  	return COMPACT_CONTINUE;  } diff --git a/include/linux/string.h b/include/linux/string.h index ffe0442e18d..b9178812d9d 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -144,8 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)  {  	return strncmp(str, prefix, strlen(prefix)) == 0;  } -#endif  extern size_t memweight(const void *ptr, size_t bytes); +#endif /* __KERNEL__ */  #endif /* _LINUX_STRING_H_ */ diff --git a/mm/compaction.c b/mm/compaction.c index e78cb968842..7fcd3a52e68 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype)  }  /* + * Compaction requires the taking of some coarse locks that are potentially + * very heavily contended. Check if the process needs to be scheduled or + * if the lock is contended. For async compaction, back out in the event + * if contention is severe. For sync compaction, schedule. + * + * Returns true if the lock is held. + * Returns false if the lock is released and compaction should abort + */ +static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags, +				      bool locked, struct compact_control *cc) +{ +	if (need_resched() || spin_is_contended(lock)) { +		if (locked) { +			spin_unlock_irqrestore(lock, *flags); +			locked = false; +		} + +		/* async aborts if taking too long or contended */ +		if (!cc->sync) { +			if (cc->contended) +				*cc->contended = true; +			return false; +		} + +		cond_resched(); +		if (fatal_signal_pending(current)) +			return false; +	} + +	if (!locked) +		spin_lock_irqsave(lock, *flags); +	return true; +} + +static inline bool compact_trylock_irqsave(spinlock_t *lock, +			unsigned long *flags, struct compact_control *cc) +{ +	return compact_checklock_irqsave(lock, flags, false, cc); +} + +/*   * Isolate free pages onto a private freelist. Caller must hold zone->lock.   * If @strict is true, will abort returning 0 on any invalid PFNs or non-free   * pages inside of the pageblock (even though it may still end up isolating @@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)  }  /* Update the number of anon and file isolated pages in the zone */ -static void acct_isolated(struct zone *zone, struct compact_control *cc) +static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)  {  	struct page *page;  	unsigned int count[2] = { 0, }; @@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)  	list_for_each_entry(page, &cc->migratepages, lru)  		count[!!page_is_file_cache(page)]++; -	__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); -	__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); +	/* If locked we can use the interrupt unsafe versions */ +	if (locked) { +		__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); +		__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); +	} else { +		mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); +		mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); +	}  }  /* Similar to reclaim, but different enough that they don't share logic */ @@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,  	struct list_head *migratelist = &cc->migratepages;  	isolate_mode_t mode = 0;  	struct lruvec *lruvec; +	unsigned long flags; +	bool locked;  	/*  	 * Ensure that there are not too many pages isolated from the LRU @@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,  	/* Time to isolate some pages for migration */  	cond_resched(); -	spin_lock_irq(&zone->lru_lock); +	spin_lock_irqsave(&zone->lru_lock, flags); +	locked = true;  	for (; low_pfn < end_pfn; low_pfn++) {  		struct page *page; -		bool locked = true;  		/* give a chance to irqs before checking need_resched() */  		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) { -			spin_unlock_irq(&zone->lru_lock); +			spin_unlock_irqrestore(&zone->lru_lock, flags);  			locked = false;  		} -		if (need_resched() || spin_is_contended(&zone->lru_lock)) { -			if (locked) -				spin_unlock_irq(&zone->lru_lock); -			cond_resched(); -			spin_lock_irq(&zone->lru_lock); -			if (fatal_signal_pending(current)) -				break; -		} else if (!locked) -			spin_lock_irq(&zone->lru_lock); + +		/* Check if it is ok to still hold the lock */ +		locked = compact_checklock_irqsave(&zone->lru_lock, &flags, +								locked, cc); +		if (!locked) +			break;  		/*  		 * migrate_pfn does not necessarily start aligned to a @@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,  		}  	} -	acct_isolated(zone, cc); +	acct_isolated(zone, locked, cc); -	spin_unlock_irq(&zone->lru_lock); +	if (locked) +		spin_unlock_irqrestore(&zone->lru_lock, flags);  	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); @@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page)  }  /* + * Returns the start pfn of the last page block in a zone.  This is the starting + * point for full compaction of a zone.  Compaction searches for free pages from + * the end of each zone, while isolate_freepages_block scans forward inside each + * page block. + */ +static unsigned long start_free_pfn(struct zone *zone) +{ +	unsigned long free_pfn; +	free_pfn = zone->zone_start_pfn + zone->spanned_pages; +	free_pfn &= ~(pageblock_nr_pages-1); +	return free_pfn; +} + +/*   * Based on information in the current compact_control, find blocks   * suitable for isolating free pages from and then isolate them.   */ @@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,  					pfn -= pageblock_nr_pages) {  		unsigned long isolated; -		/* -		 * Skip ahead if another thread is compacting in the area -		 * simultaneously. If we wrapped around, we can only skip -		 * ahead if zone->compact_cached_free_pfn also wrapped to -		 * above our starting point. -		 */ -		if (cc->order > 0 && (!cc->wrapped || -				      zone->compact_cached_free_pfn > -				      cc->start_free_pfn)) -			pfn = min(pfn, zone->compact_cached_free_pfn); -  		if (!pfn_valid(pfn))  			continue; @@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,  		 * are disabled  		 */  		isolated = 0; -		spin_lock_irqsave(&zone->lock, flags); + +		/* +		 * The zone lock must be held to isolate freepages. This +		 * unfortunately this is a very coarse lock and can be +		 * heavily contended if there are parallel allocations +		 * or parallel compactions. For async compaction do not +		 * spin on the lock +		 */ +		if (!compact_trylock_irqsave(&zone->lock, &flags, cc)) +			break;  		if (suitable_migration_target(page)) {  			end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);  			isolated = isolate_freepages_block(pfn, end_pfn, @@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,  		 */  		if (isolated) {  			high_pfn = max(high_pfn, pfn); -			if (cc->order > 0) + +			/* +			 * If the free scanner has wrapped, update +			 * compact_cached_free_pfn to point to the highest +			 * pageblock with free pages. This reduces excessive +			 * scanning of full pageblocks near the end of the +			 * zone +			 */ +			if (cc->order > 0 && cc->wrapped)  				zone->compact_cached_free_pfn = high_pfn;  		}  	} @@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,  	cc->free_pfn = high_pfn;  	cc->nr_freepages = nr_freepages; + +	/* If compact_cached_free_pfn is reset then set it now */ +	if (cc->order > 0 && !cc->wrapped && +			zone->compact_cached_free_pfn == start_free_pfn(zone)) +		zone->compact_cached_free_pfn = high_pfn;  }  /* @@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,  	return ISOLATE_SUCCESS;  } -/* - * Returns the start pfn of the last page block in a zone.  This is the starting - * point for full compaction of a zone.  Compaction searches for free pages from - * the end of each zone, while isolate_freepages_block scans forward inside each - * page block. - */ -static unsigned long start_free_pfn(struct zone *zone) -{ -	unsigned long free_pfn; -	free_pfn = zone->zone_start_pfn + zone->spanned_pages; -	free_pfn &= ~(pageblock_nr_pages-1); -	return free_pfn; -} -  static int compact_finished(struct zone *zone,  			    struct compact_control *cc)  { @@ -771,7 +829,7 @@ out:  static unsigned long compact_zone_order(struct zone *zone,  				 int order, gfp_t gfp_mask, -				 bool sync) +				 bool sync, bool *contended)  {  	struct compact_control cc = {  		.nr_freepages = 0, @@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,  		.migratetype = allocflags_to_migratetype(gfp_mask),  		.zone = zone,  		.sync = sync, +		.contended = contended,  	};  	INIT_LIST_HEAD(&cc.freepages);  	INIT_LIST_HEAD(&cc.migratepages); @@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;   */  unsigned long try_to_compact_pages(struct zonelist *zonelist,  			int order, gfp_t gfp_mask, nodemask_t *nodemask, -			bool sync) +			bool sync, bool *contended)  {  	enum zone_type high_zoneidx = gfp_zone(gfp_mask);  	int may_enter_fs = gfp_mask & __GFP_FS; @@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,  								nodemask) {  		int status; -		status = compact_zone_order(zone, order, gfp_mask, sync); +		status = compact_zone_order(zone, order, gfp_mask, sync, +						contended);  		rc = max(status, rc);  		/* If a normal allocation would succeed, stop compacting */ @@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)  		if (cc->order > 0) {  			int ok = zone_watermark_ok(zone, cc->order,  						low_wmark_pages(zone), 0, 0); -			if (ok && cc->order > zone->compact_order_failed) +			if (ok && cc->order >= zone->compact_order_failed)  				zone->compact_order_failed = cc->order + 1;  			/* Currently async compaction is never deferred. */  			else if (!ok && cc->sync) diff --git a/mm/internal.h b/mm/internal.h index 3314f79d775..b8c91b342e2 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -130,6 +130,7 @@ struct compact_control {  	int order;			/* order a direct compactor needs */  	int migratetype;		/* MOVABLE, RECLAIMABLE etc */  	struct zone *zone; +	bool *contended;		/* True if a lock was contended */  };  unsigned long diff --git a/mm/mmap.c b/mm/mmap.c index e3e86914f11..9adee9fc0d8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm)  	}  	vm_unacct_memory(nr_accounted); -	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); +	WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);  }  /* Insert vm structure into process list sorted by address diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 009ac285fea..c66fb875104 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1928,6 +1928,17 @@ this_zone_full:  		zlc_active = 0;  		goto zonelist_scan;  	} + +	if (page) +		/* +		 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was +		 * necessary to allocate the page. The expectation is +		 * that the caller is taking steps that will free more +		 * memory. The caller should avoid the page being used +		 * for !PFMEMALLOC purposes. +		 */ +		page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); +  	return page;  } @@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  	struct zonelist *zonelist, enum zone_type high_zoneidx,  	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,  	int migratetype, bool sync_migration, -	bool *deferred_compaction, +	bool *contended_compaction, bool *deferred_compaction,  	unsigned long *did_some_progress)  {  	struct page *page; @@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  	current->flags |= PF_MEMALLOC;  	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask, -						nodemask, sync_migration); +						nodemask, sync_migration, +						contended_compaction);  	current->flags &= ~PF_MEMALLOC;  	if (*did_some_progress != COMPACT_SKIPPED) { @@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,  	struct zonelist *zonelist, enum zone_type high_zoneidx,  	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,  	int migratetype, bool sync_migration, -	bool *deferred_compaction, +	bool *contended_compaction, bool *deferred_compaction,  	unsigned long *did_some_progress)  {  	return NULL; @@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,  	unsigned long did_some_progress;  	bool sync_migration = false;  	bool deferred_compaction = false; +	bool contended_compaction = false;  	/*  	 * In the slowpath, we sanity check order to avoid ever trying to @@ -2389,14 +2402,6 @@ rebalance:  				zonelist, high_zoneidx, nodemask,  				preferred_zone, migratetype);  		if (page) { -			/* -			 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was -			 * necessary to allocate the page. The expectation is -			 * that the caller is taking steps that will free more -			 * memory. The caller should avoid the page being used -			 * for !PFMEMALLOC purposes. -			 */ -			page->pfmemalloc = true;  			goto got_pg;  		}  	} @@ -2422,6 +2427,7 @@ rebalance:  					nodemask,  					alloc_flags, preferred_zone,  					migratetype, sync_migration, +					&contended_compaction,  					&deferred_compaction,  					&did_some_progress);  	if (page) @@ -2431,10 +2437,11 @@ rebalance:  	/*  	 * If compaction is deferred for high-order allocations, it is because  	 * sync compaction recently failed. In this is the case and the caller -	 * has requested the system not be heavily disrupted, fail the -	 * allocation now instead of entering direct reclaim +	 * requested a movable allocation that does not heavily disrupt the +	 * system then fail the allocation instead of entering direct reclaim.  	 */ -	if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD)) +	if ((deferred_compaction || contended_compaction) && +						(gfp_mask & __GFP_NO_KSWAPD))  		goto nopage;  	/* Try direct reclaim and then allocating */ @@ -2505,6 +2512,7 @@ rebalance:  					nodemask,  					alloc_flags, preferred_zone,  					migratetype, sync_migration, +					&contended_compaction,  					&deferred_compaction,  					&did_some_progress);  		if (page) @@ -2569,8 +2577,6 @@ retry_cpuset:  		page = __alloc_pages_slowpath(gfp_mask, order,  				zonelist, high_zoneidx, nodemask,  				preferred_zone, migratetype); -	else -		page->pfmemalloc = false;  	trace_mm_page_alloc(page, order, gfp_mask, migratetype); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 913d6bdfdda..ca05ba217f5 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3016,7 +3016,8 @@ sub process {  					$herectx .= raw_line($linenr, $n) . "\n";  				} -				if (($stmts =~ tr/;/;/) == 1) { +				if (($stmts =~ tr/;/;/) == 1 && +				    $stmts !~ /^\s*(if|while|for|switch)\b/) {  					WARN("SINGLE_STATEMENT_DO_WHILE_MACRO",  					     "Single statement macros should not use a do {} while (0) loop\n" . "$herectx");  				}  |