diff options
Diffstat (limited to 'arch/x86')
120 files changed, 768 insertions, 429 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0eacb1ffb42..9458685902b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1216,8 +1216,8 @@ config NUMA_EMU  config NODES_SHIFT  	int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP -	range 1 9 -	default "9" if MAXSMP +	range 1 10 +	default "10" if MAXSMP  	default "6" if X86_64  	default "4" if X86_NUMAQ  	default "3" diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index daef6cd2b45..1a8f8649c03 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -16,6 +16,7 @@  #include <linux/init.h>  #include <linux/kernel.h>  #include <linux/module.h> +#include <linux/slab.h>  #include <asm/i387.h>  struct crypto_fpu_ctx { diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 280c019cfad..0350311906a 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -21,7 +21,6 @@  #include <linux/fcntl.h>  #include <linux/ptrace.h>  #include <linux/user.h> -#include <linux/slab.h>  #include <linux/binfmts.h>  #include <linux/personality.h>  #include <linux/init.h> diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 59b4556a5b9..e790bc1fbfa 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -626,7 +626,7 @@ ia32_sys_call_table:  	.quad stub32_sigreturn  	.quad stub32_clone		/* 120 */  	.quad sys_setdomainname -	.quad sys_uname +	.quad sys_newuname  	.quad sys_modify_ldt  	.quad compat_sys_adjtimex  	.quad sys32_mprotect		/* 125 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 74c35431b7d..626be156d88 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -40,6 +40,7 @@  #include <linux/ptrace.h>  #include <linux/highuid.h>  #include <linux/sysctl.h> +#include <linux/slab.h>  #include <asm/mman.h>  #include <asm/types.h>  #include <asm/uaccess.h> diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ba19ad4c47d..86a0ff0aeac 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -21,6 +21,7 @@  #define _ASM_X86_AMD_IOMMU_TYPES_H  #include <linux/types.h> +#include <linux/mutex.h>  #include <linux/list.h>  #include <linux/spinlock.h> @@ -140,6 +141,7 @@  /* constants to configure the command buffer */  #define CMD_BUFFER_SIZE    8192 +#define CMD_BUFFER_UNINITIALIZED 1  #define CMD_BUFFER_ENTRIES 512  #define MMIO_CMD_SIZE_SHIFT 56  #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) @@ -237,6 +239,7 @@ struct protection_domain {  	struct list_head list;  /* for list of all protection domains */  	struct list_head dev_list; /* List of all devices in this domain */  	spinlock_t lock;	/* mostly used to lock the page table*/ +	struct mutex api_lock;	/* protect page tables in the iommu-api path */  	u16 id;			/* the domain id written to the device table */  	int mode;		/* paging mode (0-6 levels) */  	u64 *pt_root;		/* page table root pointer */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 635f03bb499..d07b44f7d1d 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -82,6 +82,9 @@ enum fixed_addresses {  #endif  	FIX_DBGP_BASE,  	FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT +	FIX_OHCI1394_BASE, +#endif  #ifdef CONFIG_X86_LOCAL_APIC  	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */  #endif @@ -132,9 +135,6 @@ enum fixed_addresses {  	   (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))  	 : __end_of_permanent_fixed_addresses,  	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT -	FIX_OHCI1394_BASE, -#endif  #ifdef CONFIG_X86_32  	FIX_WP_TEST,  #endif diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index a929c9ede33..46c0fe05f23 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -133,6 +133,7 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);  typedef int vector_irq_t[NR_VECTORS];  DECLARE_PER_CPU(vector_irq_t, vector_irq); +extern void setup_vector_irq(int cpu);  #ifdef CONFIG_X86_IO_APIC  extern void lock_vector_lock(void); diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index a1dcfa3ab17..30a3e977612 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -347,6 +347,7 @@ extern void __iomem *early_ioremap(resource_size_t phys_addr,  extern void __iomem *early_memremap(resource_size_t phys_addr,  				    unsigned long size);  extern void early_iounmap(void __iomem *addr, unsigned long size); +extern void fixup_early_ioremap(void);  #define IO_SPACE_LIMIT 0xffff diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ba0eed8aa1a..b60f2924c41 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -28,22 +28,39 @@  #ifndef __ASSEMBLY__  #include <asm/hw_irq.h> -#include <asm/kvm_para.h>  /*G:030   * But first, how does our Guest contact the Host to ask for privileged   * operations?  There are two ways: the direct way is to make a "hypercall",   * to make requests of the Host Itself.   * - * We use the KVM hypercall mechanism, though completely different hypercall - * numbers. Seventeen hypercalls are available: the hypercall number is put in - * the %eax register, and the arguments (when required) are placed in %ebx, - * %ecx, %edx and %esi.  If a return value makes sense, it's returned in %eax. + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts).  Seventeen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax.   *   * Grossly invalid calls result in Sudden Death at the hands of the vengeful   * Host, rather than returning failure.  This reflects Winston Churchill's   * definition of a gentleman: "someone who is only rude intentionally". -:*/ + */ +static inline unsigned long +hcall(unsigned long call, +      unsigned long arg1, unsigned long arg2, unsigned long arg3, +      unsigned long arg4) +{ +	/* "int" is the Intel instruction to trigger a trap. */ +	asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) +		     /* The call in %eax (aka "a") might be overwritten */ +		     : "=a"(call) +		       /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ +		     : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) +		       /* "memory" means this might write somewhere in memory. +			* This isn't true for all calls, but it's safe to tell +			* gcc that it might happen so it doesn't get clever. */ +		     : "memory"); +	return call; +}  /* Can't use our min() macro here: needs to be a constant */  #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cdbc03..4604e6a54d3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -105,6 +105,8 @@  #define MSR_AMD64_PATCH_LEVEL		0x0000008b  #define MSR_AMD64_NB_CFG		0xc001001f  #define MSR_AMD64_PATCH_LOADER		0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140 +#define MSR_AMD64_OSVW_STATUS		0xc0010141  #define MSR_AMD64_IBSFETCHCTL		0xc0011030  #define MSR_AMD64_IBSFETCHLINAD		0xc0011031  #define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032 diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 47339a1ac7b..2984a25ff38 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -19,7 +19,6 @@  #include <asm/paravirt.h>  #include <linux/bitops.h> -#include <linux/slab.h>  #include <linux/list.h>  #include <linux/spinlock.h> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0061ea26306..cd40aba6aa9 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -31,6 +31,7 @@  #include <linux/module.h>  #include <linux/dmi.h>  #include <linux/irq.h> +#include <linux/slab.h>  #include <linux/bootmem.h>  #include <linux/ioport.h>  #include <linux/pci.h> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3a4bf35c179..1a160d5d44d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -8,6 +8,7 @@  #include <linux/vmalloc.h>  #include <linux/memory.h>  #include <linux/stop_machine.h> +#include <linux/slab.h>  #include <asm/alternative.h>  #include <asm/sections.h>  #include <asm/pgtable.h> diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index adb0ba02570..f854d89b7ed 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -18,8 +18,8 @@   */  #include <linux/pci.h> -#include <linux/gfp.h>  #include <linux/bitmap.h> +#include <linux/slab.h>  #include <linux/debugfs.h>  #include <linux/scatterlist.h>  #include <linux/dma-mapping.h> @@ -118,7 +118,7 @@ static bool check_device(struct device *dev)  		return false;  	/* No device or no PCI device */ -	if (!dev || dev->bus != &pci_bus_type) +	if (dev->bus != &pci_bus_type)  		return false;  	devid = get_device_id(dev); @@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)  	u32 tail, head;  	u8 *target; +	WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED);  	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);  	target = iommu->cmd_buf + tail;  	memcpy_toio(target, cmd, sizeof(*cmd)); @@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void)  	struct dma_ops_domain *dma_dom;  	u16 devid; -	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { +	for_each_pci_dev(dev) {  		/* Do we handle this device? */  		if (!check_device(&dev->dev)) @@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain)  	list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) {  		struct device *dev = dev_data->dev; -		do_detach(dev); +		__detach_device(dev);  		atomic_set(&dev_data->bind, 0);  	} @@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void)  		return NULL;  	spin_lock_init(&domain->lock); +	mutex_init(&domain->api_lock);  	domain->id = domain_id_alloc();  	if (!domain->id)  		goto out_err; @@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom)  	free_pagetable(domain); -	domain_id_free(domain->id); - -	kfree(domain); +	protection_domain_free(domain);  	dom->priv = NULL;  } @@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,  	iova  &= PAGE_MASK;  	paddr &= PAGE_MASK; +	mutex_lock(&domain->api_lock); +  	for (i = 0; i < npages; ++i) {  		ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k);  		if (ret) @@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom,  		paddr += PAGE_SIZE;  	} +	mutex_unlock(&domain->api_lock); +  	return 0;  } @@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom,  	iova  &= PAGE_MASK; +	mutex_lock(&domain->api_lock); +  	for (i = 0; i < npages; ++i) {  		iommu_unmap_page(domain, iova, PM_MAP_4k);  		iova  += PAGE_SIZE;  	}  	iommu_flush_tlb_pde(domain); + +	mutex_unlock(&domain->api_lock);  }  static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 9dc91b43147..6360abf993d 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -19,8 +19,8 @@  #include <linux/pci.h>  #include <linux/acpi.h> -#include <linux/gfp.h>  #include <linux/list.h> +#include <linux/slab.h>  #include <linux/sysdev.h>  #include <linux/interrupt.h>  #include <linux/msi.h> @@ -138,9 +138,9 @@ int amd_iommus_present;  bool amd_iommu_np_cache __read_mostly;  /* - * Set to true if ACPI table parsing and hardware intialization went properly + * The ACPI table parsing functions set this variable on an error   */ -static bool amd_iommu_initialized; +static int __initdata amd_iommu_init_err;  /*   * List of protection domains - used during resume @@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table)  	 */  	for (i = 0; i < table->length; ++i)  		checksum += p[i]; -	if (checksum != 0) +	if (checksum != 0) {  		/* ACPI table corrupt */ -		return -ENODEV; +		amd_iommu_init_err = -ENODEV; +		return 0; +	}  	p += IVRS_HEADER_LENGTH; @@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)  	if (cmd_buf == NULL)  		return NULL; -	iommu->cmd_buf_size = CMD_BUFFER_SIZE; +	iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED;  	return cmd_buf;  } @@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)  		    &entry, sizeof(entry));  	amd_iommu_reset_cmd_buffer(iommu); +	iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED);  }  static void __init free_command_buffer(struct amd_iommu *iommu)  {  	free_pages((unsigned long)iommu->cmd_buf, -		   get_order(iommu->cmd_buf_size)); +		   get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED)));  }  /* allocates the memory where the IOMMU will log its events to */ @@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)  				    h->mmio_phys);  			iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); -			if (iommu == NULL) -				return -ENOMEM; +			if (iommu == NULL) { +				amd_iommu_init_err = -ENOMEM; +				return 0; +			} +  			ret = init_iommu_one(iommu, h); -			if (ret) -				return ret; +			if (ret) { +				amd_iommu_init_err = ret; +				return 0; +			}  			break;  		default:  			break; @@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table)  	}  	WARN_ON(p != end); -	amd_iommu_initialized = true; -  	return 0;  } @@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void)  	if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)  		return -ENODEV; +	ret = amd_iommu_init_err; +	if (ret) +		goto out; +  	dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);  	alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);  	rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); @@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void)  	if (acpi_table_parse("IVRS", init_iommu_all) != 0)  		goto free; -	if (!amd_iommu_initialized) +	if (amd_iommu_init_err) { +		ret = amd_iommu_init_err;  		goto free; +	}  	if (acpi_table_parse("IVRS", init_memory_definitions) != 0)  		goto free; +	if (amd_iommu_init_err) { +		ret = amd_iommu_init_err; +		goto free; +	} +  	ret = sysdev_class_register(&amd_iommu_sysdev_class);  	if (ret)  		goto free; @@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void)  	if (ret)  		goto free; +	enable_iommus(); +  	if (iommu_pass_through)  		ret = amd_iommu_init_passthrough();  	else @@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void)  	amd_iommu_init_notifier(); -	enable_iommus(); -  	if (iommu_pass_through)  		goto out; @@ -1315,6 +1332,7 @@ out:  	return ret;  free: +	disable_iommus();  	amd_iommu_uninit_devices(); diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 4b7099526d2..a35347501d3 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -33,6 +33,7 @@  #include <linux/errno.h>  #include <linux/init.h>  #include <linux/sysdev.h> +#include <linux/slab.h>  #include <linux/pm.h>  #include <linux/pci.h>  #include <linux/sfi.h> @@ -428,7 +429,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n,  static __init int apbt_late_init(void)  { -	if (disable_apbt_percpu) +	if (disable_apbt_percpu || !apb_timer_block_enabled)  		return 0;  	/* This notifier should be called after workqueue is ready */  	hotcpu_notifier(apbt_cpuhp_notify, -20); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997e8b2..b5d8b0bcf23 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void)  	for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {  		int bus;  		int dev_base, dev_limit; +		u32 ctl;  		bus = bus_dev_ranges[i].bus;  		dev_base = bus_dev_ranges[i].dev_base; @@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void)  			gart_iommu_aperture = 1;  			x86_init.iommu.iommu_init = gart_iommu_init; -			aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; +			ctl = read_pci_config(bus, slot, 3, +					      AMD64_GARTAPERTURECTL); + +			/* +			 * Before we do anything else disable the GART. It may +			 * still be enabled if we boot into a crash-kernel here. +			 * Reconfiguring the GART while it is enabled could have +			 * unknown side-effects. +			 */ +			ctl &= ~GARTEN; +			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + +			aper_order = (ctl >> 1) & 7;  			aper_size = (32 * 1024 * 1024) << aper_order;  			aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;  			aper_base <<= 25; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 00187f1fcfb..e5a4a1e0161 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1640,8 +1640,10 @@ int __init APIC_init_uniprocessor(void)  	}  #endif +#ifndef CONFIG_SMP  	enable_IR_x2apic();  	default_setup_apic_routing(); +#endif  	verify_local_APIC();  	connect_bsp_APIC(); diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index dd2b5f26464..03ba1b895f5 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -42,6 +42,7 @@  #include <linux/errno.h>  #include <linux/acpi.h>  #include <linux/init.h> +#include <linux/gfp.h>  #include <linux/nmi.h>  #include <linux/smp.h>  #include <linux/io.h> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e4e0ddcb154..eb2789c3f72 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -36,6 +36,7 @@  #include <linux/freezer.h>  #include <linux/kthread.h>  #include <linux/jiffies.h>	/* time_after() */ +#include <linux/slab.h>  #ifdef CONFIG_ACPI  #include <acpi/acpi_bus.h>  #endif @@ -1268,6 +1269,14 @@ void __setup_vector_irq(int cpu)  	/* Mark the inuse vectors */  	for_each_irq_desc(irq, desc) {  		cfg = desc->chip_data; + +		/* +		 * If it is a legacy IRQ handled by the legacy PIC, this cpu +		 * will be part of the irq_cfg's domain. +		 */ +		if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) +			cpumask_set_cpu(cpu, cfg->domain); +  		if (!cpumask_test_cpu(cpu, cfg->domain))  			continue;  		vector = cfg->vector; @@ -2536,6 +2545,9 @@ void irq_force_complete_move(int irq)  	struct irq_desc *desc = irq_to_desc(irq);  	struct irq_cfg *cfg = desc->chip_data; +	if (!cfg) +		return; +  	__irq_complete_move(&desc, cfg->vector);  }  #else diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 8aa65adbd25..1edaf15c0b8 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -18,6 +18,7 @@  #include <linux/delay.h>  #include <linux/interrupt.h>  #include <linux/module.h> +#include <linux/slab.h>  #include <linux/sysdev.h>  #include <linux/sysctl.h>  #include <linux/percpu.h> diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 49dbeaef2a2..c085d52dbaf 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -17,6 +17,7 @@  #include <linux/ctype.h>  #include <linux/sched.h>  #include <linux/timer.h> +#include <linux/slab.h>  #include <linux/cpu.h>  #include <linux/init.h>  #include <linux/io.h> diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 30f25a75fe2..5de7f4c5697 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c @@ -5,7 +5,6 @@  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/string.h> -#include <linux/slab.h>  #include <linux/spinlock.h>  #include <linux/acpi.h>  #include <asm/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 1b1920fa7c8..459168083b7 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,6 +33,7 @@  #include <linux/cpufreq.h>  #include <linux/compiler.h>  #include <linux/dmi.h> +#include <linux/slab.h>  #include <trace/events/power.h>  #include <linux/acpi.h> diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 006b278b0d5..c587db472a7 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c @@ -20,7 +20,6 @@  #include <linux/module.h>  #include <linux/init.h> -#include <linux/slab.h>  #include <linux/delay.h>  #include <linux/cpufreq.h> diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ac27ec2264d..16e3483be9e 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c @@ -80,6 +80,7 @@  #include <linux/cpufreq.h>  #include <linux/pci.h>  #include <linux/errno.h> +#include <linux/slab.h>  #include <asm/processor-cyrix.h> diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index da5f70fcb76..e7b559d74c5 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -9,7 +9,6 @@  #include <linux/kernel.h>  #include <linux/module.h>  #include <linux/init.h> -#include <linux/slab.h>  #include <linux/cpufreq.h>  #include <linux/timex.h> diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 86961519372..7b8a8ba67b0 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -25,7 +25,6 @@  #include <linux/init.h>  #include <linux/smp.h>  #include <linux/cpufreq.h> -#include <linux/slab.h>  #include <linux/cpumask.h>  #include <linux/timex.h> diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index ff36d2979a9..ce7cde713e7 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -30,6 +30,7 @@  #include <linux/sched.h>  #include <linux/cpufreq.h>  #include <linux/compiler.h> +#include <linux/slab.h>  #include <linux/acpi.h>  #include <linux/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index cb01dac267d..b3379d6a5c5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -13,7 +13,6 @@  #include <linux/init.h>  #include <linux/cpufreq.h>  #include <linux/ioport.h> -#include <linux/slab.h>  #include <linux/timex.h>  #include <linux/io.h> diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d360b56e982..b6215b9798e 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,  		powernow_table[i].index = index;  		/* Frequency may be rounded for these */ -		if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { +		if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) +				 || boot_cpu_data.x86 == 0x11) {  			powernow_table[i].frequency =  				freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7);  		} else diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8d672ef162c..9b1ff37de46 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -20,6 +20,7 @@  #include <linux/sched.h>	/* current */  #include <linux/delay.h>  #include <linux/compiler.h> +#include <linux/gfp.h>  #include <asm/msr.h>  #include <asm/processor.h> diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2ce8e0b5cc5..561758e9518 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -23,7 +23,6 @@  #include <linux/init.h>  #include <linux/cpufreq.h>  #include <linux/pci.h> -#include <linux/slab.h>  #include <linux/sched.h>  #include "speedstep-lib.h" diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index ad0083abfa2..a94ec6be69f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -13,7 +13,6 @@  #include <linux/moduleparam.h>  #include <linux/init.h>  #include <linux/cpufreq.h> -#include <linux/slab.h>  #include <asm/msr.h>  #include <asm/tsc.h> diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 04d73c114e4..8abd869baab 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -17,7 +17,6 @@  #include <linux/moduleparam.h>  #include <linux/init.h>  #include <linux/cpufreq.h> -#include <linux/slab.h>  #include <linux/delay.h>  #include <linux/io.h>  #include <asm/ist.h> diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7e1cca13af3..1366c7cfd48 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)  		(c->x86 == 0x6 && c->x86_model >= 0x0e))  		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); +	/* +	 * Atom erratum AAE44/AAF40/AAG38/AAH41: +	 * +	 * A race condition between speculative fetches and invalidating +	 * a large page.  This is worked around in microcode, but we +	 * need the microcode to have already been loaded... so if it is +	 * not, recommend a BIOS update and disable large pages. +	 */ +	if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { +		u32 ucode, junk; + +		wrmsr(MSR_IA32_UCODE_REV, 0, 0); +		sync_core(); +		rdmsr(MSR_IA32_UCODE_REV, junk, ucode); + +		if (ucode < 0x20e) { +			printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); +			clear_cpu_cap(c, X86_FEATURE_PSE); +		} +	} +  #ifdef CONFIG_X86_64  	set_cpu_cap(c, X86_FEATURE_SYSENTER32);  #else diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 73734baa50f..e7dbde7bfed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -22,6 +22,7 @@  #include <linux/kdebug.h>  #include <linux/cpu.h>  #include <linux/sched.h> +#include <linux/gfp.h>  #include <asm/mce.h>  #include <asm/apic.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3ab9c886b61..8a6f0afa767 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -26,6 +26,7 @@  #include <linux/sched.h>  #include <linux/sysfs.h>  #include <linux/types.h> +#include <linux/slab.h>  #include <linux/init.h>  #include <linux/kmod.h>  #include <linux/poll.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index cda932ca3ad..224392d8fe8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -21,6 +21,7 @@  #include <linux/errno.h>  #include <linux/sched.h>  #include <linux/sysfs.h> +#include <linux/slab.h>  #include <linux/init.h>  #include <linux/cpu.h>  #include <linux/smp.h> diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d15df6e49bf..62b48e40920 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -5,6 +5,7 @@   * Author: Andi Kleen   */ +#include <linux/gfp.h>  #include <linux/init.h>  #include <linux/interrupt.h>  #include <linux/percpu.h> diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9aa5dc76ff4..fd31a441c61 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -6,7 +6,6 @@  #include <linux/module.h>  #include <linux/init.h> -#include <linux/slab.h>  #include <linux/io.h>  #include <linux/mm.h> diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index e006e56f699..79289632cb2 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -5,6 +5,7 @@  #include <linux/module.h>  #include <linux/ctype.h>  #include <linux/string.h> +#include <linux/slab.h>  #include <linux/init.h>  #define LINE_SIZE 80 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 42aafd11e17..db5bdc8addf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -21,6 +21,7 @@  #include <linux/kdebug.h>  #include <linux/sched.h>  #include <linux/uaccess.h> +#include <linux/slab.h>  #include <linux/highmem.h>  #include <linux/cpu.h>  #include <linux/bitops.h> @@ -28,6 +29,7 @@  #include <asm/apic.h>  #include <asm/stacktrace.h>  #include <asm/nmi.h> +#include <asm/compat.h>  static u64 perf_event_mask __read_mostly; @@ -133,8 +135,8 @@ struct x86_pmu {  	int		(*handle_irq)(struct pt_regs *);  	void		(*disable_all)(void);  	void		(*enable_all)(void); -	void		(*enable)(struct hw_perf_event *, int); -	void		(*disable)(struct hw_perf_event *, int); +	void		(*enable)(struct perf_event *); +	void		(*disable)(struct perf_event *);  	unsigned	eventsel;  	unsigned	perfctr;  	u64		(*event_map)(int); @@ -157,6 +159,11 @@ struct x86_pmu {  	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,  						 struct perf_event *event);  	struct event_constraint *event_constraints; + +	int		(*cpu_prepare)(int cpu); +	void		(*cpu_starting)(int cpu); +	void		(*cpu_dying)(int cpu); +	void		(*cpu_dead)(int cpu);  };  static struct x86_pmu x86_pmu __read_mostly; @@ -165,8 +172,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {  	.enabled = 1,  }; -static int x86_perf_event_set_period(struct perf_event *event, -			     struct hw_perf_event *hwc, int idx); +static int x86_perf_event_set_period(struct perf_event *event);  /*   * Generalized hw caching related hw_event table, filled @@ -189,11 +195,12 @@ static u64 __read_mostly hw_cache_event_ids   * Returns the delta events processed.   */  static u64 -x86_perf_event_update(struct perf_event *event, -			struct hw_perf_event *hwc, int idx) +x86_perf_event_update(struct perf_event *event)  { +	struct hw_perf_event *hwc = &event->hw;  	int shift = 64 - x86_pmu.event_bits;  	u64 prev_raw_count, new_raw_count; +	int idx = hwc->idx;  	s64 delta;  	if (idx == X86_PMC_IDX_FIXED_BTS) @@ -293,7 +300,7 @@ static inline bool bts_available(void)  	return x86_pmu.enable_bts != NULL;  } -static inline void init_debug_store_on_cpu(int cpu) +static void init_debug_store_on_cpu(int cpu)  {  	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -305,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu)  		     (u32)((u64)(unsigned long)ds >> 32));  } -static inline void fini_debug_store_on_cpu(int cpu) +static void fini_debug_store_on_cpu(int cpu)  {  	if (!per_cpu(cpu_hw_events, cpu).ds)  		return; @@ -638,7 +645,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  		if (test_bit(hwc->idx, used_mask))  			break; -		set_bit(hwc->idx, used_mask); +		__set_bit(hwc->idx, used_mask);  		if (assign)  			assign[i] = hwc->idx;  	} @@ -687,7 +694,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  			if (j == X86_PMC_IDX_MAX)  				break; -			set_bit(j, used_mask); +			__set_bit(j, used_mask);  			if (assign)  				assign[i] = j; @@ -780,6 +787,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,  		hwc->last_tag == cpuc->tags[i];  } +static int x86_pmu_start(struct perf_event *event);  static void x86_pmu_stop(struct perf_event *event);  void hw_perf_enable(void) @@ -796,6 +804,7 @@ void hw_perf_enable(void)  		return;  	if (cpuc->n_added) { +		int n_running = cpuc->n_events - cpuc->n_added;  		/*  		 * apply assignment obtained either from  		 * hw_perf_group_sched_in() or x86_pmu_enable() @@ -803,8 +812,7 @@ void hw_perf_enable(void)  		 * step1: save events moving to new counters  		 * step2: reprogram moved events into new counters  		 */ -		for (i = 0; i < cpuc->n_events; i++) { - +		for (i = 0; i < n_running; i++) {  			event = cpuc->event_list[i];  			hwc = &event->hw; @@ -819,29 +827,18 @@ void hw_perf_enable(void)  				continue;  			x86_pmu_stop(event); - -			hwc->idx = -1;  		}  		for (i = 0; i < cpuc->n_events; i++) { -  			event = cpuc->event_list[i];  			hwc = &event->hw; -			if (hwc->idx == -1) { +			if (!match_prev_assignment(hwc, cpuc, i))  				x86_assign_hw_event(event, cpuc, i); -				x86_perf_event_set_period(event, hwc, hwc->idx); -			} -			/* -			 * need to mark as active because x86_pmu_disable() -			 * clear active_mask and events[] yet it preserves -			 * idx -			 */ -			set_bit(hwc->idx, cpuc->active_mask); -			cpuc->events[hwc->idx] = event; +			else if (i < n_running) +				continue; -			x86_pmu.enable(hwc, hwc->idx); -			perf_event_update_userpage(event); +			x86_pmu_start(event);  		}  		cpuc->n_added = 0;  		perf_events_lapic_init(); @@ -853,15 +850,16 @@ void hw_perf_enable(void)  	x86_pmu.enable_all();  } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)  { -	(void)checking_wrmsrl(hwc->config_base + idx, +	(void)checking_wrmsrl(hwc->config_base + hwc->idx,  			      hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);  } -static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +static inline void x86_pmu_disable_event(struct perf_event *event)  { -	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config); +	struct hw_perf_event *hwc = &event->hw; +	(void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);  }  static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -871,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);   * To be called with the event disabled in hw:   */  static int -x86_perf_event_set_period(struct perf_event *event, -			     struct hw_perf_event *hwc, int idx) +x86_perf_event_set_period(struct perf_event *event)  { +	struct hw_perf_event *hwc = &event->hw;  	s64 left = atomic64_read(&hwc->period_left);  	s64 period = hwc->sample_period; -	int err, ret = 0; +	int err, ret = 0, idx = hwc->idx;  	if (idx == X86_PMC_IDX_FIXED_BTS)  		return 0; @@ -922,11 +920,11 @@ x86_perf_event_set_period(struct perf_event *event,  	return ret;  } -static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void x86_pmu_enable_event(struct perf_event *event)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);  	if (cpuc->enabled) -		__x86_pmu_enable_event(hwc, idx); +		__x86_pmu_enable_event(&event->hw);  }  /* @@ -962,34 +960,32 @@ static int x86_pmu_enable(struct perf_event *event)  	memcpy(cpuc->assign, assign, n*sizeof(int));  	cpuc->n_events = n; -	cpuc->n_added  = n - n0; +	cpuc->n_added += n - n0;  	return 0;  }  static int x86_pmu_start(struct perf_event *event)  { -	struct hw_perf_event *hwc = &event->hw; +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	int idx = event->hw.idx; -	if (hwc->idx == -1) +	if (idx == -1)  		return -EAGAIN; -	x86_perf_event_set_period(event, hwc, hwc->idx); -	x86_pmu.enable(hwc, hwc->idx); +	x86_perf_event_set_period(event); +	cpuc->events[idx] = event; +	__set_bit(idx, cpuc->active_mask); +	x86_pmu.enable(event); +	perf_event_update_userpage(event);  	return 0;  }  static void x86_pmu_unthrottle(struct perf_event *event)  { -	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	struct hw_perf_event *hwc = &event->hw; - -	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || -				cpuc->events[hwc->idx] != event)) -		return; - -	x86_pmu.enable(hwc, hwc->idx); +	int ret = x86_pmu_start(event); +	WARN_ON_ONCE(ret);  }  void perf_event_print_debug(void) @@ -1049,18 +1045,16 @@ static void x86_pmu_stop(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	int idx = hwc->idx; -	/* -	 * Must be done before we disable, otherwise the nmi handler -	 * could reenable again: -	 */ -	clear_bit(idx, cpuc->active_mask); -	x86_pmu.disable(hwc, idx); +	if (!__test_and_clear_bit(idx, cpuc->active_mask)) +		return; + +	x86_pmu.disable(event);  	/*  	 * Drain the remaining delta count out of a event  	 * that we are disabling:  	 */ -	x86_perf_event_update(event, hwc, idx); +	x86_perf_event_update(event);  	cpuc->events[idx] = NULL;  } @@ -1108,7 +1102,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  		event = cpuc->events[idx];  		hwc = &event->hw; -		val = x86_perf_event_update(event, hwc, idx); +		val = x86_perf_event_update(event);  		if (val & (1ULL << (x86_pmu.event_bits - 1)))  			continue; @@ -1118,11 +1112,11 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  		handled		= 1;  		data.period	= event->hw.last_period; -		if (!x86_perf_event_set_period(event, hwc, idx)) +		if (!x86_perf_event_set_period(event))  			continue;  		if (perf_event_overflow(event, 1, &data, regs)) -			x86_pmu.disable(hwc, idx); +			x86_pmu_stop(event);  	}  	if (handled) @@ -1309,7 +1303,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,  	memcpy(cpuc->assign, assign, n0*sizeof(int));  	cpuc->n_events  = n0; -	cpuc->n_added   = n1; +	cpuc->n_added  += n1;  	ctx->nr_active += n1;  	/* @@ -1337,6 +1331,41 @@ undo:  #include "perf_event_p6.c"  #include "perf_event_intel.c" +static int __cpuinit +x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ +	unsigned int cpu = (long)hcpu; +	int ret = NOTIFY_OK; + +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_UP_PREPARE: +		if (x86_pmu.cpu_prepare) +			ret = x86_pmu.cpu_prepare(cpu); +		break; + +	case CPU_STARTING: +		if (x86_pmu.cpu_starting) +			x86_pmu.cpu_starting(cpu); +		break; + +	case CPU_DYING: +		if (x86_pmu.cpu_dying) +			x86_pmu.cpu_dying(cpu); +		break; + +	case CPU_UP_CANCELED: +	case CPU_DEAD: +		if (x86_pmu.cpu_dead) +			x86_pmu.cpu_dead(cpu); +		break; + +	default: +		break; +	} + +	return ret; +} +  static void __init pmu_check_apic(void)  {  	if (cpu_has_apic) @@ -1415,11 +1444,13 @@ void __init init_hw_perf_events(void)  	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);  	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);  	pr_info("... event mask:             %016Lx\n", perf_event_mask); + +	perf_cpu_notifier(x86_pmu_notifier);  }  static inline void x86_pmu_read(struct perf_event *event)  { -	x86_perf_event_update(event, &event->hw, event->hw.idx); +	x86_perf_event_update(event);  }  static const struct pmu pmu = { @@ -1601,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)  	return len;  } -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +#ifdef CONFIG_COMPAT +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)  { -	unsigned long bytes; +	/* 32-bit process in 64-bit kernel. */ +	struct stack_frame_ia32 frame; +	const void __user *fp; -	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); +	if (!test_thread_flag(TIF_IA32)) +		return 0; + +	fp = compat_ptr(regs->bp); +	while (entry->nr < PERF_MAX_STACK_DEPTH) { +		unsigned long bytes; +		frame.next_frame     = 0; +		frame.return_address = 0; + +		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); +		if (bytes != sizeof(frame)) +			break; -	return bytes == sizeof(*frame); +		if (fp < compat_ptr(regs->sp)) +			break; + +		callchain_store(entry, frame.return_address); +		fp = compat_ptr(frame.next_frame); +	} +	return 1;  } +#else +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ +    return 0; +} +#endif  static void  perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) @@ -1624,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)  	callchain_store(entry, PERF_CONTEXT_USER);  	callchain_store(entry, regs->ip); +	if (perf_callchain_user32(regs, entry)) +		return; +  	while (entry->nr < PERF_MAX_STACK_DEPTH) { +		unsigned long bytes;  		frame.next_frame	     = NULL;  		frame.return_address = 0; -		if (!copy_stack_frame(fp, &frame)) +		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); +		if (bytes != sizeof(frame))  			break;  		if ((unsigned long)fp < regs->sp) @@ -1675,28 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)  	return entry;  } -void hw_perf_event_setup_online(int cpu) -{ -	init_debug_store_on_cpu(cpu); - -	switch (boot_cpu_data.x86_vendor) { -	case X86_VENDOR_AMD: -		amd_pmu_cpu_online(cpu); -		break; -	default: -		return; -	} -} - -void hw_perf_event_setup_offline(int cpu) +void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)  { -	init_debug_store_on_cpu(cpu); - -	switch (boot_cpu_data.x86_vendor) { -	case X86_VENDOR_AMD: -		amd_pmu_cpu_offline(cpu); -		break; -	default: -		return; -	} +	regs->ip = ip; +	/* +	 * perf_arch_fetch_caller_regs adds another call, we need to increment +	 * the skip level +	 */ +	regs->bp = rewind_frame_pointer(skip + 1); +	regs->cs = __KERNEL_CS; +	local_save_flags(regs->flags);  } diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 8f3dbfda3c4..db6f7d4056e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)  	return (hwc->config & 0xe0) == 0xe0;  } +static inline int amd_has_nb(struct cpu_hw_events *cpuc) +{ +	struct amd_nb *nb = cpuc->amd_nb; + +	return nb && nb->nb_id != -1; +} +  static void amd_put_event_constraints(struct cpu_hw_events *cpuc,  				      struct perf_event *event)  { @@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,  	/*  	 * only care about NB events  	 */ -	if (!(nb && amd_is_nb_event(hwc))) +	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))  		return;  	/* @@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)  	/*  	 * if not NB event or no NB, then no constraints  	 */ -	if (!(nb && amd_is_nb_event(hwc))) +	if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))  		return &unconstrained;  	/* @@ -271,28 +278,6 @@ done:  	return &emptyconstraint;  } -static __initconst struct x86_pmu amd_pmu = { -	.name			= "AMD", -	.handle_irq		= x86_pmu_handle_irq, -	.disable_all		= x86_pmu_disable_all, -	.enable_all		= x86_pmu_enable_all, -	.enable			= x86_pmu_enable_event, -	.disable		= x86_pmu_disable_event, -	.eventsel		= MSR_K7_EVNTSEL0, -	.perfctr		= MSR_K7_PERFCTR0, -	.event_map		= amd_pmu_event_map, -	.raw_event		= amd_pmu_raw_event, -	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), -	.num_events		= 4, -	.event_bits		= 48, -	.event_mask		= (1ULL << 48) - 1, -	.apic			= 1, -	/* use highest bit to detect overflow */ -	.max_period		= (1ULL << 47) - 1, -	.get_event_constraints	= amd_get_event_constraints, -	.put_event_constraints	= amd_put_event_constraints -}; -  static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)  {  	struct amd_nb *nb; @@ -309,57 +294,61 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)  	 * initialize all possible NB constraints  	 */  	for (i = 0; i < x86_pmu.num_events; i++) { -		set_bit(i, nb->event_constraints[i].idxmsk); +		__set_bit(i, nb->event_constraints[i].idxmsk);  		nb->event_constraints[i].weight = 1;  	}  	return nb;  } -static void amd_pmu_cpu_online(int cpu) +static int amd_pmu_cpu_prepare(int cpu)  { -	struct cpu_hw_events *cpu1, *cpu2; -	struct amd_nb *nb = NULL; +	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + +	WARN_ON_ONCE(cpuc->amd_nb); + +	if (boot_cpu_data.x86_max_cores < 2) +		return NOTIFY_OK; + +	cpuc->amd_nb = amd_alloc_nb(cpu, -1); +	if (!cpuc->amd_nb) +		return NOTIFY_BAD; + +	return NOTIFY_OK; +} + +static void amd_pmu_cpu_starting(int cpu) +{ +	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); +	struct amd_nb *nb;  	int i, nb_id;  	if (boot_cpu_data.x86_max_cores < 2)  		return; -	/* -	 * function may be called too early in the -	 * boot process, in which case nb_id is bogus -	 */  	nb_id = amd_get_nb_id(cpu); -	if (nb_id == BAD_APICID) -		return; - -	cpu1 = &per_cpu(cpu_hw_events, cpu); -	cpu1->amd_nb = NULL; +	WARN_ON_ONCE(nb_id == BAD_APICID);  	raw_spin_lock(&amd_nb_lock);  	for_each_online_cpu(i) { -		cpu2 = &per_cpu(cpu_hw_events, i); -		nb = cpu2->amd_nb; -		if (!nb) +		nb = per_cpu(cpu_hw_events, i).amd_nb; +		if (WARN_ON_ONCE(!nb))  			continue; -		if (nb->nb_id == nb_id) -			goto found; -	} -	nb = amd_alloc_nb(cpu, nb_id); -	if (!nb) { -		pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); -		raw_spin_unlock(&amd_nb_lock); -		return; +		if (nb->nb_id == nb_id) { +			kfree(cpuc->amd_nb); +			cpuc->amd_nb = nb; +			break; +		}  	} -found: -	nb->refcnt++; -	cpu1->amd_nb = nb; + +	cpuc->amd_nb->nb_id = nb_id; +	cpuc->amd_nb->refcnt++;  	raw_spin_unlock(&amd_nb_lock);  } -static void amd_pmu_cpu_offline(int cpu) +static void amd_pmu_cpu_dead(int cpu)  {  	struct cpu_hw_events *cpuhw; @@ -370,14 +359,44 @@ static void amd_pmu_cpu_offline(int cpu)  	raw_spin_lock(&amd_nb_lock); -	if (--cpuhw->amd_nb->refcnt == 0) -		kfree(cpuhw->amd_nb); +	if (cpuhw->amd_nb) { +		struct amd_nb *nb = cpuhw->amd_nb; + +		if (nb->nb_id == -1 || --nb->refcnt == 0) +			kfree(nb); -	cpuhw->amd_nb = NULL; +		cpuhw->amd_nb = NULL; +	}  	raw_spin_unlock(&amd_nb_lock);  } +static __initconst struct x86_pmu amd_pmu = { +	.name			= "AMD", +	.handle_irq		= x86_pmu_handle_irq, +	.disable_all		= x86_pmu_disable_all, +	.enable_all		= x86_pmu_enable_all, +	.enable			= x86_pmu_enable_event, +	.disable		= x86_pmu_disable_event, +	.eventsel		= MSR_K7_EVNTSEL0, +	.perfctr		= MSR_K7_PERFCTR0, +	.event_map		= amd_pmu_event_map, +	.raw_event		= amd_pmu_raw_event, +	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), +	.num_events		= 4, +	.event_bits		= 48, +	.event_mask		= (1ULL << 48) - 1, +	.apic			= 1, +	/* use highest bit to detect overflow */ +	.max_period		= (1ULL << 47) - 1, +	.get_event_constraints	= amd_get_event_constraints, +	.put_event_constraints	= amd_put_event_constraints, + +	.cpu_prepare		= amd_pmu_cpu_prepare, +	.cpu_starting		= amd_pmu_cpu_starting, +	.cpu_dead		= amd_pmu_cpu_dead, +}; +  static __init int amd_pmu_init(void)  {  	/* Performance-monitoring supported from K7 and later: */ @@ -390,11 +409,6 @@ static __init int amd_pmu_init(void)  	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,  	       sizeof(hw_cache_event_ids)); -	/* -	 * explicitly initialize the boot cpu, other cpus will get -	 * the cpu hotplug callbacks from smp_init() -	 */ -	amd_pmu_cpu_online(smp_processor_id());  	return 0;  } @@ -405,12 +419,4 @@ static int amd_pmu_init(void)  	return 0;  } -static void amd_pmu_cpu_online(int cpu) -{ -} - -static void amd_pmu_cpu_offline(int cpu) -{ -} -  #endif diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 44b60c85210..9c794ac8783 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -548,9 +548,9 @@ static inline void intel_pmu_ack_status(u64 ack)  }  static inline void -intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +intel_pmu_disable_fixed(struct hw_perf_event *hwc)  { -	int idx = __idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - X86_PMC_IDX_FIXED;  	u64 ctrl_val, mask;  	mask = 0xfULL << (idx * 4); @@ -621,26 +621,28 @@ static void intel_pmu_drain_bts_buffer(void)  }  static inline void -intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +intel_pmu_disable_event(struct perf_event *event)  { -	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { +	struct hw_perf_event *hwc = &event->hw; + +	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {  		intel_pmu_disable_bts();  		intel_pmu_drain_bts_buffer();  		return;  	}  	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { -		intel_pmu_disable_fixed(hwc, idx); +		intel_pmu_disable_fixed(hwc);  		return;  	} -	x86_pmu_disable_event(hwc, idx); +	x86_pmu_disable_event(event);  }  static inline void -intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +intel_pmu_enable_fixed(struct hw_perf_event *hwc)  { -	int idx = __idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - X86_PMC_IDX_FIXED;  	u64 ctrl_val, bits, mask;  	int err; @@ -670,9 +672,11 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)  	err = checking_wrmsrl(hwc->config_base, ctrl_val);  } -static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void intel_pmu_enable_event(struct perf_event *event)  { -	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { +	struct hw_perf_event *hwc = &event->hw; + +	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {  		if (!__get_cpu_var(cpu_hw_events).enabled)  			return; @@ -681,11 +685,11 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)  	}  	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { -		intel_pmu_enable_fixed(hwc, idx); +		intel_pmu_enable_fixed(hwc);  		return;  	} -	__x86_pmu_enable_event(hwc, idx); +	__x86_pmu_enable_event(hwc);  }  /* @@ -694,14 +698,8 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)   */  static int intel_pmu_save_and_restart(struct perf_event *event)  { -	struct hw_perf_event *hwc = &event->hw; -	int idx = hwc->idx; -	int ret; - -	x86_perf_event_update(event, hwc, idx); -	ret = x86_perf_event_set_period(event, hwc, idx); - -	return ret; +	x86_perf_event_update(event); +	return x86_perf_event_set_period(event);  }  static void intel_pmu_reset(void) @@ -745,11 +743,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)  	cpuc = &__get_cpu_var(cpu_hw_events); -	perf_disable(); +	intel_pmu_disable_all();  	intel_pmu_drain_bts_buffer();  	status = intel_pmu_get_status();  	if (!status) { -		perf_enable(); +		intel_pmu_enable_all();  		return 0;  	} @@ -759,8 +757,7 @@ again:  		WARN_ONCE(1, "perfevents: irq loop stuck!\n");  		perf_event_print_debug();  		intel_pmu_reset(); -		perf_enable(); -		return 1; +		goto done;  	}  	inc_irq_stat(apic_perf_irqs); @@ -768,7 +765,6 @@ again:  	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {  		struct perf_event *event = cpuc->events[bit]; -		clear_bit(bit, (unsigned long *) &status);  		if (!test_bit(bit, cpuc->active_mask))  			continue; @@ -778,7 +774,7 @@ again:  		data.period = event->hw.last_period;  		if (perf_event_overflow(event, 1, &data, regs)) -			intel_pmu_disable_event(&event->hw, bit); +			x86_pmu_stop(event);  	}  	intel_pmu_ack_status(ack); @@ -790,8 +786,8 @@ again:  	if (status)  		goto again; -	perf_enable(); - +done: +	intel_pmu_enable_all();  	return 1;  } @@ -870,7 +866,10 @@ static __initconst struct x86_pmu intel_pmu = {  	.max_period		= (1ULL << 31) - 1,  	.enable_bts		= intel_pmu_enable_bts,  	.disable_bts		= intel_pmu_disable_bts, -	.get_event_constraints	= intel_get_event_constraints +	.get_event_constraints	= intel_get_event_constraints, + +	.cpu_starting		= init_debug_store_on_cpu, +	.cpu_dying		= fini_debug_store_on_cpu,  };  static __init int intel_pmu_init(void) @@ -937,6 +936,7 @@ static __init int intel_pmu_init(void)  	case 26: /* 45 nm nehalem, "Bloomfield" */  	case 30: /* 45 nm nehalem, "Lynnfield" */ +	case 46: /* 45 nm nehalem-ex, "Beckton" */  		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,  		       sizeof(hw_cache_event_ids)); diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a4e67b99d91..a330485d14d 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -77,27 +77,29 @@ static void p6_pmu_enable_all(void)  }  static inline void -p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +p6_pmu_disable_event(struct perf_event *event)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	struct hw_perf_event *hwc = &event->hw;  	u64 val = P6_NOP_EVENT;  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base + idx, val); +	(void)checking_wrmsrl(hwc->config_base + hwc->idx, val);  } -static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void p6_pmu_enable_event(struct perf_event *event)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	struct hw_perf_event *hwc = &event->hw;  	u64 val;  	val = hwc->config;  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base + idx, val); +	(void)checking_wrmsrl(hwc->config_base + hwc->idx, val);  }  static __initconst struct x86_pmu p6_pmu = { diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 1cbed97b59c..dfdb4dba232 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -22,6 +22,7 @@   */  #include <linux/dmi.h> +#include <linux/module.h>  #include <asm/div64.h>  #include <asm/vmware.h>  #include <asm/x86_init.h> @@ -101,6 +102,7 @@ int vmware_platform(void)  	return 0;  } +EXPORT_SYMBOL(vmware_platform);  /*   * VMware hypervisor takes care of exporting a reliable TSC to the guest. diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 83e5e628de7..8b862d5900f 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -40,6 +40,7 @@  #include <linux/cpu.h>  #include <linux/notifier.h>  #include <linux/uaccess.h> +#include <linux/gfp.h>  #include <asm/processor.h>  #include <asm/msr.h> diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a4849c10a77..ebd4c51d096 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,7 +27,6 @@  #include <asm/cpu.h>  #include <asm/reboot.h>  #include <asm/virtext.h> -#include <asm/x86_init.h>  #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs)  #ifdef CONFIG_HPET_TIMER  	hpet_disable();  #endif - -#ifdef CONFIG_X86_64 -	x86_platform.iommu_shutdown(); -#endif -  	crash_save_cpu(regs, safe_smp_processor_id());  } diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index cd97ce18c29..67414550c3c 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c @@ -5,6 +5,7 @@   *	Copyright (C) IBM Corporation, 2004. All rights reserved   */ +#include <linux/slab.h>  #include <linux/errno.h>  #include <linux/highmem.h>  #include <linux/crash_dump.h> diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 4fd1420faff..e1a93be4fd4 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,6 +14,8 @@  #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)  #endif +#include <linux/uaccess.h> +  extern void  show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,  		unsigned long *stack, unsigned long bp, char *log_lvl); @@ -29,4 +31,26 @@ struct stack_frame {  	struct stack_frame *next_frame;  	unsigned long return_address;  }; + +struct stack_frame_ia32 { +    u32 next_frame; +    u32 return_address; +}; + +static inline unsigned long rewind_frame_pointer(int n) +{ +	struct stack_frame *frame; + +	get_bp(frame); + +#ifdef CONFIG_FRAME_POINTER +	while (n--) { +		if (probe_kernel_address(&frame->next_frame, frame)) +			break; +	}  #endif + +	return (unsigned long)frame; +} + +#endif /* DUMPSTACK_H */ diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index d5e2a2ebb62..272c9f1f05f 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -208,7 +208,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,  			if (in_irq_stack(stack, irq_stack, irq_stack_end)) {  				if (ops->stack(data, "IRQ") < 0)  					break; -				bp = print_context_stack(tinfo, stack, bp, +				bp = ops->walk_stack(tinfo, stack, bp,  					ops, data, irq_stack_end, &graph);  				/*  				 * We link to the next stack (which would be @@ -229,7 +229,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,  	/*  	 * This handles the process stack:  	 */ -	bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); +	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);  	put_cpu();  }  EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 740b440fbd7..7bca3c6a02f 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -519,29 +519,45 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,  	printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",  		       (unsigned long long) start,  		       (unsigned long long) end); -	e820_print_type(old_type); +	if (checktype) +		e820_print_type(old_type);  	printk(KERN_CONT "\n");  	for (i = 0; i < e820.nr_map; i++) {  		struct e820entry *ei = &e820.map[i];  		u64 final_start, final_end; +		u64 ei_end;  		if (checktype && ei->type != old_type)  			continue; + +		ei_end = ei->addr + ei->size;  		/* totally covered? */ -		if (ei->addr >= start && -		    (ei->addr + ei->size) <= (start + size)) { +		if (ei->addr >= start && ei_end <= end) {  			real_removed_size += ei->size;  			memset(ei, 0, sizeof(struct e820entry));  			continue;  		} + +		/* new range is totally covered? */ +		if (ei->addr < start && ei_end > end) { +			e820_add_region(end, ei_end - end, ei->type); +			ei->size = start - ei->addr; +			real_removed_size += size; +			continue; +		} +  		/* partially covered */  		final_start = max(start, ei->addr); -		final_end = min(start + size, ei->addr + ei->size); +		final_end = min(end, ei_end);  		if (final_start >= final_end)  			continue;  		real_removed_size += final_end - final_start; +		/* +		 * left range could be head or tail, so need to update +		 * size at first. +		 */  		ei->size -= final_end - final_start;  		if (ei->addr < final_start)  			continue; diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index adedeef1ded..b2e24603739 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -7,6 +7,7 @@  #include <linux/init.h>  #include <linux/start_kernel.h> +#include <linux/mm.h>  #include <asm/setup.h>  #include <asm/sections.h> @@ -44,9 +45,10 @@ void __init i386_start_kernel(void)  #ifdef CONFIG_BLK_DEV_INITRD  	/* Reserve INITRD */  	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { +		/* Assume only end is not page aligned */  		u64 ramdisk_image = boot_params.hdr.ramdisk_image;  		u64 ramdisk_size  = boot_params.hdr.ramdisk_size; -		u64 ramdisk_end   = ramdisk_image + ramdisk_size; +		u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);  		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");  	}  #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b5a9896ca1e..7147143fd61 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -103,9 +103,10 @@ void __init x86_64_start_reservations(char *real_mode_data)  #ifdef CONFIG_BLK_DEV_INITRD  	/* Reserve INITRD */  	if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { +		/* Assume only end is not page aligned */  		unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;  		unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size; -		unsigned long ramdisk_end   = ramdisk_image + ramdisk_size; +		unsigned long ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);  		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");  	}  #endif diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ee4fa1bfcb3..23b4ecdffa9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -4,6 +4,7 @@  #include <linux/sysdev.h>  #include <linux/delay.h>  #include <linux/errno.h> +#include <linux/slab.h>  #include <linux/hpet.h>  #include <linux/init.h>  #include <linux/cpu.h> @@ -399,9 +400,15 @@ static int hpet_next_event(unsigned long delta,  	 * then we might have a real hardware problem. We can not do  	 * much about it here, but at least alert the user/admin with  	 * a prominent warning. +	 * An erratum on some chipsets (ICH9,..), results in comparator read +	 * immediately following a write returning old value. Workaround +	 * for this is to read this value second time, when first +	 * read returns old value.  	 */ -	WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, +	if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { +		WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt,  		  KERN_WARNING "hpet: compare register read back failed.\n"); +	}  	return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;  } @@ -1143,6 +1150,7 @@ int hpet_set_periodic_freq(unsigned long freq)  		do_div(clc, freq);  		clc >>= hpet_clockevent.shift;  		hpet_pie_delta = clc; +		hpet_pie_limit = 0;  	}  	return 1;  } diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index c01a2b846d4..54c31c28548 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -8,6 +8,7 @@  #include <linux/module.h>  #include <linux/regset.h>  #include <linux/sched.h> +#include <linux/slab.h>  #include <asm/sigcontext.h>  #include <asm/processor.h> diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index fb725ee15f5..7c9f02c130f 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -5,7 +5,6 @@  #include <linux/ioport.h>  #include <linux/interrupt.h>  #include <linux/timex.h> -#include <linux/slab.h>  #include <linux/random.h>  #include <linux/init.h>  #include <linux/kernel_stat.h> diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index ef257fc2921..0ed2d300cd4 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -5,7 +5,6 @@  #include <linux/ioport.h>  #include <linux/interrupt.h>  #include <linux/timex.h> -#include <linux/slab.h>  #include <linux/random.h>  #include <linux/kprobes.h>  #include <linux/init.h> @@ -141,6 +140,28 @@ void __init init_IRQ(void)  	x86_init.irqs.intr_init();  } +/* + * Setup the vector to irq mappings. + */ +void setup_vector_irq(int cpu) +{ +#ifndef CONFIG_X86_IO_APIC +	int irq; + +	/* +	 * On most of the platforms, legacy PIC delivers the interrupts on the +	 * boot cpu. But there are certain platforms where PIC interrupts are +	 * delivered to multiple cpu's. If the legacy IRQ is handled by the +	 * legacy PIC, for the new cpu that is coming online, setup the static +	 * legacy vector to irq mapping: +	 */ +	for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) +		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; +#endif + +	__setup_vector_irq(cpu); +} +  static void __init smp_intr_init(void)  {  #ifdef CONFIG_SMP diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 9b895464dd0..0f7bc20cfcd 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -2,8 +2,8 @@   * Shared support code for AMD K8 northbridges and derivates.   * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.   */ -#include <linux/gfp.h>  #include <linux/types.h> +#include <linux/slab.h>  #include <linux/init.h>  #include <linux/errno.h>  #include <linux/module.h> diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index e444357375c..8afd9f321f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -9,6 +9,7 @@  #include <linux/debugfs.h>  #include <linux/uaccess.h>  #include <linux/module.h> +#include <linux/slab.h>  #include <linux/init.h>  #include <linux/stat.h>  #include <linux/io.h> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index bfba6019d76..b2258ca9100 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -618,8 +618,8 @@ int kgdb_arch_init(void)  	 * portion of kgdb because this operation requires mutexs to  	 * complete.  	 */ +	hw_breakpoint_init(&attr);  	attr.bp_addr = (unsigned long)kgdb_arch_init; -	attr.type = PERF_TYPE_BREAKPOINT;  	attr.bp_len = HW_BREAKPOINT_LEN_1;  	attr.bp_type = HW_BREAKPOINT_W;  	attr.disabled = 1; diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60cbd1..ea697263b37 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -7,6 +7,7 @@   */  #include <linux/errno.h> +#include <linux/gfp.h>  #include <linux/sched.h>  #include <linux/string.h>  #include <linux/mm.h> diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4a8bb82248a..035c8c52918 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -9,6 +9,7 @@  #include <linux/mm.h>  #include <linux/kexec.h>  #include <linux/string.h> +#include <linux/gfp.h>  #include <linux/reboot.h>  #include <linux/numa.h>  #include <linux/ftrace.h> diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 845d80ce1ef..63eaf659623 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c @@ -42,6 +42,7 @@  #include <linux/kernel.h>  #include <linux/mca.h>  #include <linux/kprobes.h> +#include <linux/slab.h>  #include <asm/system.h>  #include <asm/io.h>  #include <linux/proc_fs.h> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 89f386f044e..e0bc186d750 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -23,6 +23,7 @@  #include <linux/kernel.h>  #include <linux/bug.h>  #include <linux/mm.h> +#include <linux/gfp.h>  #include <asm/system.h>  #include <asm/page.h> diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index a2c1edd2d3a..e81030f71a8 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -664,7 +664,7 @@ static void __init smp_reserve_memory(struct mpf_intel *mpf)  {  	unsigned long size = get_mpc_size(mpf->physptr); -	reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); +	reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");  }  static int __init smp_scan_config(unsigned long base, unsigned long length) @@ -693,7 +693,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)  			       mpf, (u64)virt_to_phys(mpf));  			mem = virt_to_phys(mpf); -			reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); +			reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");  			if (mpf->physptr)  				smp_reserve_memory(mpf); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 206735ac8cb..4d4468e9f47 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -37,6 +37,7 @@  #include <linux/cpu.h>  #include <linux/notifier.h>  #include <linux/uaccess.h> +#include <linux/gfp.h>  #include <asm/processor.h>  #include <asm/msr.h> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a4ac764a688..4b7e3d8b01d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -2,6 +2,7 @@  #include <linux/dma-debug.h>  #include <linux/dmar.h>  #include <linux/bootmem.h> +#include <linux/gfp.h>  #include <linux/pci.h>  #include <linux/kmemleak.h> diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index f3af115a573..0f7f130caa6 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -29,6 +29,7 @@  #include <linux/iommu-helper.h>  #include <linux/sysdev.h>  #include <linux/io.h> +#include <linux/gfp.h>  #include <asm/atomic.h>  #include <asm/mtrr.h>  #include <asm/pgtable.h> @@ -564,6 +565,9 @@ static void enable_gart_translations(void)  		enable_gart_translation(dev, __pa(agp_gatt_table));  	} + +	/* Flush the GART-TLB to remove stale entries */ +	k8_flush_garts();  }  /* diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 22be12b60a8..3af4af810c0 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -4,6 +4,7 @@  #include <linux/scatterlist.h>  #include <linux/string.h>  #include <linux/init.h> +#include <linux/gfp.h>  #include <linux/pci.h>  #include <linux/mm.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ad9540676fc..28ad9f4d8b9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -526,21 +526,37 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)  }  /* - * Check for AMD CPUs, which have potentially C1E support + * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. + * For more information see + * - Erratum #400 for NPT family 0xf and family 0x10 CPUs + * - Erratum #365 for family 0x11 (not affected because C1e not in use)   */  static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)  { +	u64 val;  	if (c->x86_vendor != X86_VENDOR_AMD) -		return 0; - -	if (c->x86 < 0x0F) -		return 0; +		goto no_c1e_idle;  	/* Family 0x0f models < rev F do not have C1E */ -	if (c->x86 == 0x0f && c->x86_model < 0x40) -		return 0; +	if (c->x86 == 0x0F && c->x86_model >= 0x40) +		return 1; -	return 1; +	if (c->x86 == 0x10) { +		/* +		 * check OSVW bit for CPUs that are not affected +		 * by erratum #400 +		 */ +		rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); +		if (val >= 2) { +			rdmsrl(MSR_AMD64_OSVW_STATUS, val); +			if (!(val & BIT(1))) +				goto no_c1e_idle; +		} +		return 1; +	} + +no_c1e_idle: +	return 0;  }  static cpumask_var_t c1e_mask; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dc9690b4c4c..17cb3295cbf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -276,12 +276,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,  	set_tsk_thread_flag(p, TIF_FORK); -	p->thread.fs = me->thread.fs; -	p->thread.gs = me->thread.gs;  	p->thread.io_bitmap_ptr = NULL;  	savesegment(gs, p->thread.gsindex); +	p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;  	savesegment(fs, p->thread.fsindex); +	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;  	savesegment(es, p->thread.es);  	savesegment(ds, p->thread.ds); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a503b1fd04e..2e9b55027b7 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -12,6 +12,7 @@  #include <linux/mm.h>  #include <linux/smp.h>  #include <linux/errno.h> +#include <linux/slab.h>  #include <linux/ptrace.h>  #include <linux/regset.h>  #include <linux/tracehook.h> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d7ba1a449b..c4851eff57b 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -55,7 +55,6 @@  #include <linux/stddef.h>  #include <linux/unistd.h>  #include <linux/ptrace.h> -#include <linux/slab.h>  #include <linux/user.h>  #include <linux/delay.h> @@ -314,16 +313,17 @@ static void __init reserve_brk(void)  #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)  static void __init relocate_initrd(void)  { - +	/* Assume only end is not page aligned */  	u64 ramdisk_image = boot_params.hdr.ramdisk_image;  	u64 ramdisk_size  = boot_params.hdr.ramdisk_size; +	u64 area_size     = PAGE_ALIGN(ramdisk_size);  	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;  	u64 ramdisk_here;  	unsigned long slop, clen, mapaddr;  	char *p, *q;  	/* We need to move the initrd down into lowmem */ -	ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, +	ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,  					 PAGE_SIZE);  	if (ramdisk_here == -1ULL) @@ -332,7 +332,7 @@ static void __init relocate_initrd(void)  	/* Note: this includes all the lowmem currently occupied by  	   the initrd, we rely on that fact to keep the data intact. */ -	reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, +	reserve_early(ramdisk_here, ramdisk_here + area_size,  			 "NEW RAMDISK");  	initrd_start = ramdisk_here + PAGE_OFFSET;  	initrd_end   = initrd_start + ramdisk_size; @@ -376,9 +376,10 @@ static void __init relocate_initrd(void)  static void __init reserve_initrd(void)  { +	/* Assume only end is not page aligned */  	u64 ramdisk_image = boot_params.hdr.ramdisk_image;  	u64 ramdisk_size  = boot_params.hdr.ramdisk_size; -	u64 ramdisk_end   = ramdisk_image + ramdisk_size; +	u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);  	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;  	if (!boot_params.hdr.type_of_loader || @@ -606,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg)  early_param("elfcorehdr", setup_elfcorehdr);  #endif +static __init void reserve_ibft_region(void) +{ +	unsigned long addr, size = 0; + +	addr = find_ibft_region(&size); + +	if (size) +		reserve_early_overlap_ok(addr, addr + size, "ibft"); +} +  #ifdef CONFIG_X86_RESERVE_LOW_64K  static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)  { @@ -908,6 +919,8 @@ void __init setup_arch(char **cmdline_p)  	 */  	find_smp_config(); +	reserve_ibft_region(); +  	reserve_trampoline_memory();  #ifdef CONFIG_ACPI_SLEEP @@ -975,8 +988,6 @@ void __init setup_arch(char **cmdline_p)  	dma32_reserve_bootmem(); -	reserve_ibft_region(); -  #ifdef CONFIG_KVM_CLOCK  	kvmclock_init();  #endif diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index ec1de97600e..d801210945d 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -21,6 +21,7 @@  #include <linux/cache.h>  #include <linux/interrupt.h>  #include <linux/cpu.h> +#include <linux/gfp.h>  #include <asm/mtrr.h>  #include <asm/tlbflush.h> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a02e80c3c54..763d815e27a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -49,6 +49,7 @@  #include <linux/nmi.h>  #include <linux/tboot.h>  #include <linux/stackprotector.h> +#include <linux/gfp.h>  #include <asm/acpi.h>  #include <asm/desc.h> @@ -242,12 +243,10 @@ static void __cpuinit smp_callin(void)  	end_local_APIC_setup();  	map_cpu_to_logical_apicid(); -	notify_cpu_starting(cpuid); -  	/*  	 * Need to setup vector mappings before we enable interrupts.  	 */ -	__setup_vector_irq(smp_processor_id()); +	setup_vector_irq(smp_processor_id());  	/*  	 * Get our bogomips.  	 * @@ -264,6 +263,8 @@ static void __cpuinit smp_callin(void)  	 */  	smp_store_cpu_info(cpuid); +	notify_cpu_starting(cpuid); +  	/*  	 * Allow the master to continue.  	 */ diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 364d015efeb..17b03dd3a6b 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -9,6 +9,7 @@  #include <linux/seq_file.h>  #include <linux/proc_fs.h>  #include <linux/kernel.h> +#include <linux/slab.h>  #include <asm/mmu_context.h>  #include <asm/uv/uv.h> diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index ece73d8e324..1d40336b030 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -10,6 +10,7 @@  #include <linux/module.h>  #include <linux/rbtree.h> +#include <linux/slab.h>  #include <linux/irq.h>  #include <asm/apic.h> diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 2b75ef638db..56e421bc379 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c @@ -19,6 +19,7 @@   *  Copyright (c) Dimitri Sivanich   */  #include <linux/clockchips.h> +#include <linux/slab.h>  #include <asm/uv/uv_mmrs.h>  #include <asm/uv/uv_hub.h> diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 7dd599deca4..ce9fbacb752 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -28,6 +28,7 @@  #include <linux/mm.h>  #include <linux/highmem.h>  #include <linux/sched.h> +#include <linux/gfp.h>  #include <asm/vmi.h>  #include <asm/io.h>  #include <asm/fixmap.h> diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 44879df5569..2cc249718c4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -291,8 +291,8 @@ SECTIONS  	.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {  		__smp_locks = .;  		*(.smp_locks) -		__smp_locks_end = .;  		. = ALIGN(PAGE_SIZE); +		__smp_locks_end = .;  	}  #ifdef CONFIG_X86_64 diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 294698b6daf..0150affad25 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -32,6 +32,7 @@  #define pr_fmt(fmt) "pit: " fmt  #include <linux/kvm_host.h> +#include <linux/slab.h>  #include "irq.h"  #include "i8254.h" diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 07771da85de..a790fa128a9 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -26,6 +26,7 @@   *   Port from Qemu.   */  #include <linux/mm.h> +#include <linux/slab.h>  #include <linux/bitops.h>  #include "irq.h" diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4b224f90087..1eb7a4ae0c9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -26,6 +26,7 @@  #include <linux/io.h>  #include <linux/module.h>  #include <linux/math64.h> +#include <linux/slab.h>  #include <asm/processor.h>  #include <asm/msr.h>  #include <asm/page.h> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 741373e8ca7..19a8906bcaa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -31,6 +31,7 @@  #include <linux/hugetlb.h>  #include <linux/compiler.h>  #include <linux/srcu.h> +#include <linux/slab.h>  #include <asm/page.h>  #include <asm/cmpxchg.h> @@ -1489,8 +1490,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,  		for_each_sp(pages, sp, parents, i) {  			kvm_mmu_zap_page(kvm, sp);  			mmu_pages_clear_parents(&parents); +			zapped++;  		} -		zapped += pages.nr;  		kvm_mmu_pages_init(parent, &parents, &pages);  	} @@ -1541,14 +1542,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)  	 */  	if (used_pages > kvm_nr_mmu_pages) { -		while (used_pages > kvm_nr_mmu_pages) { +		while (used_pages > kvm_nr_mmu_pages && +			!list_empty(&kvm->arch.active_mmu_pages)) {  			struct kvm_mmu_page *page;  			page = container_of(kvm->arch.active_mmu_pages.prev,  					    struct kvm_mmu_page, link); -			kvm_mmu_zap_page(kvm, page); +			used_pages -= kvm_mmu_zap_page(kvm, page);  			used_pages--;  		} +		kvm_nr_mmu_pages = used_pages;  		kvm->arch.n_free_mmu_pages = 0;  	}  	else @@ -1595,7 +1598,8 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)  		    && !sp->role.invalid) {  			pgprintk("%s: zap %lx %x\n",  				 __func__, gfn, sp->role.word); -			kvm_mmu_zap_page(kvm, sp); +			if (kvm_mmu_zap_page(kvm, sp)) +				nn = bucket->first;  		}  	}  } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 52f78dd0301..2ba58206812 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -26,6 +26,7 @@  #include <linux/highmem.h>  #include <linux/sched.h>  #include <linux/ftrace_event.h> +#include <linux/slab.h>  #include <asm/desc.h> @@ -705,29 +706,28 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)  	if (err)  		goto free_svm; +	err = -ENOMEM;  	page = alloc_page(GFP_KERNEL); -	if (!page) { -		err = -ENOMEM; +	if (!page)  		goto uninit; -	} -	err = -ENOMEM;  	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);  	if (!msrpm_pages) -		goto uninit; +		goto free_page1;  	nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);  	if (!nested_msrpm_pages) -		goto uninit; - -	svm->msrpm = page_address(msrpm_pages); -	svm_vcpu_init_msrpm(svm->msrpm); +		goto free_page2;  	hsave_page = alloc_page(GFP_KERNEL);  	if (!hsave_page) -		goto uninit; +		goto free_page3; +  	svm->nested.hsave = page_address(hsave_page); +	svm->msrpm = page_address(msrpm_pages); +	svm_vcpu_init_msrpm(svm->msrpm); +  	svm->nested.msrpm = page_address(nested_msrpm_pages);  	svm->vmcb = page_address(page); @@ -743,6 +743,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)  	return &svm->vcpu; +free_page3: +	__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); +free_page2: +	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); +free_page1: +	__free_page(page);  uninit:  	kvm_vcpu_uninit(&svm->vcpu);  free_svm: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 14873b9f843..bc933cfb4e6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -26,6 +26,7 @@  #include <linux/sched.h>  #include <linux/moduleparam.h>  #include <linux/ftrace_event.h> +#include <linux/slab.h>  #include "kvm_cache_regs.h"  #include "x86.h" @@ -76,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);  #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)  #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) +#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) +  /*   * These 2 parameters are used to config the controls for Pause-Loop Exiting:   * ple_gap:    upper bound on the amount of time between two successive @@ -130,7 +133,7 @@ struct vcpu_vmx {  	} host_state;  	struct {  		int vm86_active; -		u8 save_iopl; +		ulong save_rflags;  		struct kvm_save_segment {  			u16 selector;  			unsigned long base; @@ -817,18 +820,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)  static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)  { -	unsigned long rflags; +	unsigned long rflags, save_rflags;  	rflags = vmcs_readl(GUEST_RFLAGS); -	if (to_vmx(vcpu)->rmode.vm86_active) -		rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); +	if (to_vmx(vcpu)->rmode.vm86_active) { +		rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; +		save_rflags = to_vmx(vcpu)->rmode.save_rflags; +		rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; +	}  	return rflags;  }  static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)  { -	if (to_vmx(vcpu)->rmode.vm86_active) +	if (to_vmx(vcpu)->rmode.vm86_active) { +		to_vmx(vcpu)->rmode.save_rflags = rflags;  		rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; +	}  	vmcs_writel(GUEST_RFLAGS, rflags);  } @@ -1482,8 +1490,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu)  	vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);  	flags = vmcs_readl(GUEST_RFLAGS); -	flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); -	flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); +	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; +	flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS;  	vmcs_writel(GUEST_RFLAGS, flags);  	vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -1556,8 +1564,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)  	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);  	flags = vmcs_readl(GUEST_RFLAGS); -	vmx->rmode.save_iopl -		= (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; +	vmx->rmode.save_rflags = flags;  	flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e46282a5656..3c4ca98ad27 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -39,6 +39,7 @@  #include <linux/cpufreq.h>  #include <linux/user-return-notifier.h>  #include <linux/srcu.h> +#include <linux/slab.h>  #include <trace/events/kvm.h>  #undef TRACE_INCLUDE_FILE  #define CREATE_TRACE_POINTS @@ -432,8 +433,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)  #ifdef CONFIG_X86_64  	if (cr0 & 0xffffffff00000000UL) { -		printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", -		       cr0, kvm_read_cr0(vcpu));  		kvm_inject_gp(vcpu, 0);  		return;  	} @@ -442,14 +441,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)  	cr0 &= ~CR0_RESERVED_BITS;  	if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { -		printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");  		kvm_inject_gp(vcpu, 0);  		return;  	}  	if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { -		printk(KERN_DEBUG "set_cr0: #GP, set PG flag " -		       "and a clear PE flag\n");  		kvm_inject_gp(vcpu, 0);  		return;  	} @@ -460,15 +456,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)  			int cs_db, cs_l;  			if (!is_pae(vcpu)) { -				printk(KERN_DEBUG "set_cr0: #GP, start paging " -				       "in long mode while PAE is disabled\n");  				kvm_inject_gp(vcpu, 0);  				return;  			}  			kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);  			if (cs_l) { -				printk(KERN_DEBUG "set_cr0: #GP, start paging " -				       "in long mode while CS.L == 1\n");  				kvm_inject_gp(vcpu, 0);  				return; @@ -476,8 +468,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)  		} else  #endif  		if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { -			printk(KERN_DEBUG "set_cr0: #GP, pdptrs " -			       "reserved bits\n");  			kvm_inject_gp(vcpu, 0);  			return;  		} @@ -504,28 +494,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)  	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE;  	if (cr4 & CR4_RESERVED_BITS) { -		printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");  		kvm_inject_gp(vcpu, 0);  		return;  	}  	if (is_long_mode(vcpu)) {  		if (!(cr4 & X86_CR4_PAE)) { -			printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " -			       "in long mode\n");  			kvm_inject_gp(vcpu, 0);  			return;  		}  	} else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)  		   && ((cr4 ^ old_cr4) & pdptr_bits)  		   && !load_pdptrs(vcpu, vcpu->arch.cr3)) { -		printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");  		kvm_inject_gp(vcpu, 0);  		return;  	}  	if (cr4 & X86_CR4_VMXE) { -		printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");  		kvm_inject_gp(vcpu, 0);  		return;  	} @@ -546,21 +531,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)  	if (is_long_mode(vcpu)) {  		if (cr3 & CR3_L_MODE_RESERVED_BITS) { -			printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");  			kvm_inject_gp(vcpu, 0);  			return;  		}  	} else {  		if (is_pae(vcpu)) {  			if (cr3 & CR3_PAE_RESERVED_BITS) { -				printk(KERN_DEBUG -				       "set_cr3: #GP, reserved bits\n");  				kvm_inject_gp(vcpu, 0);  				return;  			}  			if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { -				printk(KERN_DEBUG "set_cr3: #GP, pdptrs " -				       "reserved bits\n");  				kvm_inject_gp(vcpu, 0);  				return;  			} @@ -592,7 +572,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3);  void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)  {  	if (cr8 & CR8_RESERVED_BITS) { -		printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);  		kvm_inject_gp(vcpu, 0);  		return;  	} @@ -648,15 +627,12 @@ static u32 emulated_msrs[] = {  static void set_efer(struct kvm_vcpu *vcpu, u64 efer)  {  	if (efer & efer_reserved_bits) { -		printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", -		       efer);  		kvm_inject_gp(vcpu, 0);  		return;  	}  	if (is_paging(vcpu)  	    && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { -		printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");  		kvm_inject_gp(vcpu, 0);  		return;  	} @@ -666,7 +642,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)  		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);  		if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { -			printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n");  			kvm_inject_gp(vcpu, 0);  			return;  		} @@ -677,7 +652,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)  		feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);  		if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { -			printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");  			kvm_inject_gp(vcpu, 0);  			return;  		} @@ -966,9 +940,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)  		if (msr >= MSR_IA32_MC0_CTL &&  		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {  			u32 offset = msr - MSR_IA32_MC0_CTL; -			/* only 0 or all 1s can be written to IA32_MCi_CTL */ +			/* only 0 or all 1s can be written to IA32_MCi_CTL +			 * some Linux kernels though clear bit 10 in bank 4 to +			 * workaround a BIOS/GART TBL issue on AMD K8s, ignore +			 * this to avoid an uncatched #GP in the guest +			 */  			if ((offset & 0x3) == 0 && -			    data != 0 && data != ~(u64)0) +			    data != 0 && (data | (1 << 10)) != ~(u64)0)  				return -1;  			vcpu->arch.mce_banks[offset] = data;  			break; @@ -2634,8 +2612,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,  int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,  				      struct kvm_dirty_log *log)  { -	int r, n, i; +	int r, i;  	struct kvm_memory_slot *memslot; +	unsigned long n;  	unsigned long is_dirty = 0;  	unsigned long *dirty_bitmap = NULL; @@ -2650,7 +2629,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,  	if (!memslot->dirty_bitmap)  		goto out; -	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; +	n = kvm_dirty_bitmap_bytes(memslot);  	r = -ENOMEM;  	dirty_bitmap = vmalloc(n); @@ -4482,7 +4461,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)  		kvm_set_cr8(vcpu, kvm_run->cr8);  	if (vcpu->arch.pio.cur_count) { +		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);  		r = complete_pio(vcpu); +		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);  		if (r)  			goto out;  	} @@ -5145,6 +5126,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)  	int ret = 0;  	u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);  	u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); +	u32 desc_limit;  	old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); @@ -5167,7 +5149,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)  		}  	} -	if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { +	desc_limit = get_desc_limit(&nseg_desc); +	if (!nseg_desc.p || +	    ((desc_limit < 0x67 && (nseg_desc.type & 8)) || +	     desc_limit < 0x2b)) {  		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);  		return 1;  	} diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1d3fc..2bdf628066b 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1,  	local_irq_save(flags);  	if (lguest_data.hcall_status[next_call] != 0xFF) {  		/* Table full, so do normal hcall which will flush table. */ -		kvm_hypercall4(call, arg1, arg2, arg3, arg4); +		hcall(call, arg1, arg2, arg3, arg4);  	} else {  		lguest_data.hcalls[next_call].arg0 = call;  		lguest_data.hcalls[next_call].arg1 = arg1; @@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1,   * So, when we're in lazy mode, we call async_hcall() to store the call for   * future processing:   */ -static void lazy_hcall1(unsigned long call, -		       unsigned long arg1) +static void lazy_hcall1(unsigned long call, unsigned long arg1)  {  	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) -		kvm_hypercall1(call, arg1); +		hcall(call, arg1, 0, 0, 0);  	else  		async_hcall(call, arg1, 0, 0, 0);  }  /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/  static void lazy_hcall2(unsigned long call, -		       unsigned long arg1, -		       unsigned long arg2) +			unsigned long arg1, +			unsigned long arg2)  {  	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) -		kvm_hypercall2(call, arg1, arg2); +		hcall(call, arg1, arg2, 0, 0);  	else  		async_hcall(call, arg1, arg2, 0, 0);  }  static void lazy_hcall3(unsigned long call, -		       unsigned long arg1, -		       unsigned long arg2, -		       unsigned long arg3) +			unsigned long arg1, +			unsigned long arg2, +			unsigned long arg3)  {  	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) -		kvm_hypercall3(call, arg1, arg2, arg3); +		hcall(call, arg1, arg2, arg3, 0);  	else  		async_hcall(call, arg1, arg2, arg3, 0);  }  #ifdef CONFIG_X86_PAE  static void lazy_hcall4(unsigned long call, -		       unsigned long arg1, -		       unsigned long arg2, -		       unsigned long arg3, -		       unsigned long arg4) +			unsigned long arg1, +			unsigned long arg2, +			unsigned long arg3, +			unsigned long arg4)  {  	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) -		kvm_hypercall4(call, arg1, arg2, arg3, arg4); +		hcall(call, arg1, arg2, arg3, arg4);  	else  		async_hcall(call, arg1, arg2, arg3, arg4);  } @@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call,  :*/  static void lguest_leave_lazy_mmu_mode(void)  { -	kvm_hypercall0(LHCALL_FLUSH_ASYNC); +	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);  	paravirt_leave_lazy_mmu();  }  static void lguest_end_context_switch(struct task_struct *next)  { -	kvm_hypercall0(LHCALL_FLUSH_ASYNC); +	hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0);  	paravirt_end_context_switch(next);  } @@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt,  	/* Keep the local copy up to date. */  	native_write_idt_entry(dt, entrynum, g);  	/* Tell Host about this new entry. */ -	kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); +	hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0);  }  /* @@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc)  	struct desc_struct *idt = (void *)desc->address;  	for (i = 0; i < (desc->size+1)/8; i++) -		kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); +		hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0);  }  /* @@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc)  	struct desc_struct *gdt = (void *)desc->address;  	for (i = 0; i < (desc->size+1)/8; i++) -		kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); +		hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0);  }  /* @@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,  {  	native_write_gdt_entry(dt, entrynum, desc, type);  	/* Tell Host about this new entry. */ -	kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, -		       dt[entrynum].a, dt[entrynum].b); +	hcall(LHCALL_LOAD_GDT_ENTRY, entrynum, +	      dt[entrynum].a, dt[entrynum].b, 0);  }  /* @@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,  	}  	/* Please wake us this far in the future. */ -	kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); +	hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0);  	return 0;  } @@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode,  	case CLOCK_EVT_MODE_UNUSED:  	case CLOCK_EVT_MODE_SHUTDOWN:  		/* A 0 argument shuts the clock down. */ -		kvm_hypercall0(LHCALL_SET_CLOCKEVENT); +		hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0);  		break;  	case CLOCK_EVT_MODE_ONESHOT:  		/* This is what we expect. */ @@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void)  /* STOP!  Until an interrupt comes in. */  static void lguest_safe_halt(void)  { -	kvm_hypercall0(LHCALL_HALT); +	hcall(LHCALL_HALT, 0, 0, 0, 0);  }  /* @@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void)   */  static void lguest_power_off(void)  { -	kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), -					LGUEST_SHUTDOWN_POWEROFF); +	hcall(LHCALL_SHUTDOWN, __pa("Power down"), +	      LGUEST_SHUTDOWN_POWEROFF, 0, 0);  }  /* @@ -1123,7 +1122,7 @@ static void lguest_power_off(void)   */  static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)  { -	kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); +	hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0);  	/* The hcall won't return, but to keep gcc happy, we're "done". */  	return NOTIFY_DONE;  } @@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)  		len = sizeof(scratch) - 1;  	scratch[len] = '\0';  	memcpy(scratch, buf, len); -	kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); +	hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0);  	/* This routine returns the number of bytes actually written. */  	return len; @@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)   */  static void lguest_restart(char *reason)  { -	kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); +	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0);  }  /*G:050 diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 27eac0faee4..4f420c2f2d5 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -32,7 +32,7 @@ ENTRY(lguest_entry)  	 */  	movl $LHCALL_LGUEST_INIT, %eax  	movl $lguest_data - __PAGE_OFFSET, %ebx -	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ +	int $LGUEST_TRAP_ENTRY  	/* Set up the initial stack so we can run C code. */  	movl $(init_thread_union+THREAD_SIZE),%esp diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 15acecf0d7a..41fcf00e49d 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S @@ -60,7 +60,7 @@ ENTRY(call_rwsem_down_write_failed)  	ENDPROC(call_rwsem_down_write_failed)  ENTRY(call_rwsem_wake) -	decw %dx    /* do nothing if still outstanding active readers */ +	decl %edx	/* do nothing if still outstanding active readers */  	jnz 1f  	save_common_regs  	movq %rax,%rdi diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f46c340727b..069ce7c37c0 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -9,7 +9,6 @@  #include <linux/mm.h>  #include <linux/hugetlb.h>  #include <linux/pagemap.h> -#include <linux/slab.h>  #include <linux/err.h>  #include <linux/sysctl.h>  #include <asm/mman.h> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index e71c5cbc8f3..b278535b14a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1,3 +1,4 @@ +#include <linux/gfp.h>  #include <linux/initrd.h>  #include <linux/ioport.h>  #include <linux/swap.h> @@ -331,11 +332,23 @@ int devmem_is_allowed(unsigned long pagenr)  void free_init_pages(char *what, unsigned long begin, unsigned long end)  { -	unsigned long addr = begin; +	unsigned long addr; +	unsigned long begin_aligned, end_aligned; -	if (addr >= end) +	/* Make sure boundaries are page aligned */ +	begin_aligned = PAGE_ALIGN(begin); +	end_aligned   = end & PAGE_MASK; + +	if (WARN_ON(begin_aligned != begin || end_aligned != end)) { +		begin = begin_aligned; +		end   = end_aligned; +	} + +	if (begin >= end)  		return; +	addr = begin; +  	/*  	 * If debugging page accesses then do not free this memory but  	 * mark them not present - any buggy init-section access will @@ -343,7 +356,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)  	 */  #ifdef CONFIG_DEBUG_PAGEALLOC  	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", -		begin, PAGE_ALIGN(end)); +		begin, end);  	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);  #else  	/* @@ -358,8 +371,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)  	for (; addr < end; addr += PAGE_SIZE) {  		ClearPageReserved(virt_to_page(addr));  		init_page_count(virt_to_page(addr)); -		memset((void *)(addr & ~(PAGE_SIZE-1)), -			POISON_FREE_INITMEM, PAGE_SIZE); +		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);  		free_page(addr);  		totalram_pages++;  	} @@ -376,6 +388,15 @@ void free_initmem(void)  #ifdef CONFIG_BLK_DEV_INITRD  void free_initrd_mem(unsigned long start, unsigned long end)  { -	free_init_pages("initrd memory", start, end); +	/* +	 * end could be not aligned, and We can not align that, +	 * decompresser could be confused by aligned initrd_end +	 * We already reserve the end partial page before in +	 *   - i386_start_kernel() +	 *   - x86_64_start_kernel() +	 *   - relocate_initrd() +	 * So here We can do PAGE_ALIGN() safely to get partial page to be freed +	 */ +	free_init_pages("initrd memory", start, PAGE_ALIGN(end));  }  #endif diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 5cb3f0f54f4..bca79091b9d 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -25,11 +25,11 @@  #include <linux/pfn.h>  #include <linux/poison.h>  #include <linux/bootmem.h> -#include <linux/slab.h>  #include <linux/proc_fs.h>  #include <linux/memory_hotplug.h>  #include <linux/initrd.h>  #include <linux/cpumask.h> +#include <linux/gfp.h>  #include <asm/asm.h>  #include <asm/bios_ebda.h> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e9b040e1cde..ee41bba315d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -29,6 +29,7 @@  #include <linux/module.h>  #include <linux/memory_hotplug.h>  #include <linux/nmi.h> +#include <linux/gfp.h>  #include <asm/processor.h>  #include <asm/bios_ebda.h> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 5eb1ba74a3a..12e4d2d3c11 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -448,6 +448,20 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)  static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;  static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; +void __init fixup_early_ioremap(void) +{ +	int i; + +	for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { +		if (prev_map[i]) { +			WARN_ON(1); +			break; +		} +	} + +	early_ioremap_init(); +} +  static int __init check_early_ioremap_leak(void)  {  	int count = 0; diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 536fb682336..5d0e67fff1a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -21,6 +21,7 @@  #include <linux/kdebug.h>  #include <linux/mutex.h>  #include <linux/io.h> +#include <linux/slab.h>  #include <asm/cacheflush.h>  #include <asm/tlbflush.h>  #include <linux/errno.h> diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 34a3291ca10..3adff7dcc14 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -26,6 +26,7 @@  #include <linux/module.h>  #include <linux/debugfs.h> +#include <linux/slab.h>  #include <linux/uaccess.h>  #include <linux/io.h>  #include <linux/version.h> diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index cf07c26d9a4..28195c350b9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -6,13 +6,13 @@  #include <linux/bootmem.h>  #include <linux/module.h>  #include <linux/sched.h> -#include <linux/slab.h>  #include <linux/mm.h>  #include <linux/interrupt.h>  #include <linux/seq_file.h>  #include <linux/debugfs.h>  #include <linux/pfn.h>  #include <linux/percpu.h> +#include <linux/gfp.h>  #include <asm/e820.h>  #include <asm/processor.h> diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index ae9648eb1c7..edc8b95afc1 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -12,7 +12,7 @@  #include <linux/debugfs.h>  #include <linux/kernel.h>  #include <linux/module.h> -#include <linux/gfp.h> +#include <linux/slab.h>  #include <linux/mm.h>  #include <linux/fs.h>  #include <linux/rbtree.h> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index c9ba9deafe8..5c4ee422590 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -1,4 +1,5 @@  #include <linux/mm.h> +#include <linux/gfp.h>  #include <asm/pgalloc.h>  #include <asm/pgtable.h>  #include <asm/tlb.h> diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 46c8834aedc..792854003ed 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -6,7 +6,6 @@  #include <linux/swap.h>  #include <linux/smp.h>  #include <linux/highmem.h> -#include <linux/slab.h>  #include <linux/pagemap.h>  #include <linux/spinlock.h>  #include <linux/module.h> @@ -19,6 +18,7 @@  #include <asm/e820.h>  #include <asm/tlb.h>  #include <asm/tlbflush.h> +#include <asm/io.h>  unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -129,6 +129,7 @@ static int __init parse_reservetop(char *arg)  	address = memparse(arg, &arg);  	reserve_top_address(address); +	fixup_early_ioremap();  	return 0;  }  early_param("reservetop", parse_reservetop); diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 6e22454bfaa..31930fd30ea 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -3,6 +3,7 @@  #include <linux/init.h>  #include <linux/irq.h>  #include <linux/dmi.h> +#include <linux/slab.h>  #include <asm/numa.h>  #include <asm/pci_x86.h> @@ -65,14 +66,44 @@ resource_to_addr(struct acpi_resource *resource,  			struct acpi_resource_address64 *addr)  {  	acpi_status status; +	struct acpi_resource_memory24 *memory24; +	struct acpi_resource_memory32 *memory32; +	struct acpi_resource_fixed_memory32 *fixed_memory32; -	status = acpi_resource_to_address64(resource, addr); -	if (ACPI_SUCCESS(status) && -	    (addr->resource_type == ACPI_MEMORY_RANGE || -	    addr->resource_type == ACPI_IO_RANGE) && -	    addr->address_length > 0 && -	    addr->producer_consumer == ACPI_PRODUCER) { +	memset(addr, 0, sizeof(*addr)); +	switch (resource->type) { +	case ACPI_RESOURCE_TYPE_MEMORY24: +		memory24 = &resource->data.memory24; +		addr->resource_type = ACPI_MEMORY_RANGE; +		addr->minimum = memory24->minimum; +		addr->address_length = memory24->address_length; +		addr->maximum = addr->minimum + addr->address_length - 1;  		return AE_OK; +	case ACPI_RESOURCE_TYPE_MEMORY32: +		memory32 = &resource->data.memory32; +		addr->resource_type = ACPI_MEMORY_RANGE; +		addr->minimum = memory32->minimum; +		addr->address_length = memory32->address_length; +		addr->maximum = addr->minimum + addr->address_length - 1; +		return AE_OK; +	case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: +		fixed_memory32 = &resource->data.fixed_memory32; +		addr->resource_type = ACPI_MEMORY_RANGE; +		addr->minimum = fixed_memory32->address; +		addr->address_length = fixed_memory32->address_length; +		addr->maximum = addr->minimum + addr->address_length - 1; +		return AE_OK; +	case ACPI_RESOURCE_TYPE_ADDRESS16: +	case ACPI_RESOURCE_TYPE_ADDRESS32: +	case ACPI_RESOURCE_TYPE_ADDRESS64: +		status = acpi_resource_to_address64(resource, addr); +		if (ACPI_SUCCESS(status) && +		    (addr->resource_type == ACPI_MEMORY_RANGE || +		    addr->resource_type == ACPI_IO_RANGE) && +		    addr->address_length > 0) { +			return AE_OK; +		} +		break;  	}  	return AE_ERROR;  } @@ -90,30 +121,6 @@ count_resource(struct acpi_resource *acpi_res, void *data)  	return AE_OK;  } -static void -align_resource(struct acpi_device *bridge, struct resource *res) -{ -	int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; - -	/* -	 * Host bridge windows are not BARs, but the decoders on the PCI side -	 * that claim this address space have starting alignment and length -	 * constraints, so fix any obvious BIOS goofs. -	 */ -	if (!IS_ALIGNED(res->start, align)) { -		dev_printk(KERN_DEBUG, &bridge->dev, -			   "host bridge window %pR invalid; " -			   "aligning start to %d-byte boundary\n", res, align); -		res->start &= ~(align - 1); -	} -	if (!IS_ALIGNED(res->end + 1, align)) { -		dev_printk(KERN_DEBUG, &bridge->dev, -			   "host bridge window %pR invalid; " -			   "aligning end to %d-byte boundary\n", res, align); -		res->end = ALIGN(res->end, align) - 1; -	} -} -  static acpi_status  setup_resource(struct acpi_resource *acpi_res, void *data)  { @@ -122,7 +129,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)  	struct acpi_resource_address64 addr;  	acpi_status status;  	unsigned long flags; -	struct resource *root; +	struct resource *root, *conflict;  	u64 start, end;  	status = resource_to_addr(acpi_res, &addr); @@ -141,7 +148,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)  		return AE_OK;  	start = addr.minimum + addr.translation_offset; -	end = start + addr.address_length - 1; +	end = addr.maximum + addr.translation_offset;  	res = &info->res[info->res_num];  	res->name = info->name; @@ -149,7 +156,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)  	res->start = start;  	res->end = end;  	res->child = NULL; -	align_resource(info->bridge, res);  	if (!pci_use_crs) {  		dev_printk(KERN_DEBUG, &info->bridge->dev, @@ -157,9 +163,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data)  		return AE_OK;  	} -	if (insert_resource(root, res)) { +	conflict = insert_resource_conflict(root, res); +	if (conflict) {  		dev_err(&info->bridge->dev, -			"can't allocate host bridge window %pR\n", res); +			"address space collision: host bridge window %pR " +			"conflicts with %s %pR\n", +			res, conflict->name, conflict);  	} else {  		pci_bus_add_resource(info->bus, res, 0);  		info->res_num++; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 294e10cb11e..cf2e93869c4 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -9,6 +9,7 @@  #include <linux/ioport.h>  #include <linux/init.h>  #include <linux/dmi.h> +#include <linux/slab.h>  #include <asm/acpi.h>  #include <asm/segment.h> diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index dece3eb9c90..97da2ba9344 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -72,6 +72,9 @@ pcibios_align_resource(void *data, const struct resource *res,  			return start;  		if (start & 0x300)  			start = (start + 0x3ff) & ~0x3ff; +	} else if (res->flags & IORESOURCE_MEM) { +		if (start < BIOS_END) +			start = BIOS_END;  	}  	return start;  } @@ -127,9 +130,6 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)  					continue;  				if (!r->start ||  				    pci_claim_resource(dev, idx) < 0) { -					dev_info(&dev->dev, -						 "can't reserve window %pR\n", -						 r);  					/*  					 * Something is wrong with the region.  					 * Invalidate the resource to prevent @@ -181,8 +181,6 @@ static void __init pcibios_allocate_resources(int pass)  					"BAR %d: reserving %pr (d=%d, p=%d)\n",  					idx, r, disabled, pass);  				if (pci_claim_resource(dev, idx) < 0) { -					dev_info(&dev->dev, -						 "can't reserve %pR\n", r);  					/* We'll assign a new address later */  					r->end -= r->start;  					r->start = 0; diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 8b107521d24..5d362b5ba06 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -8,7 +8,6 @@  #include <linux/kernel.h>  #include <linux/pci.h>  #include <linux/init.h> -#include <linux/slab.h>  #include <linux/interrupt.h>  #include <linux/dmi.h>  #include <linux/io.h> diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8f3f9a50b1e..39b9ebe8f88 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -16,6 +16,7 @@  #include <linux/sfi_acpi.h>  #include <linux/bitmap.h>  #include <linux/dmi.h> +#include <linux/slab.h>  #include <asm/e820.h>  #include <asm/pci_x86.h>  #include <asm/acpi.h> diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 1c975cc9839..59a225c17b8 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -4,6 +4,7 @@  #include <linux/pci.h>  #include <linux/init.h> +#include <linux/slab.h>  #include <linux/module.h>  #include <linux/uaccess.h>  #include <asm/pci_x86.h> diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 81197c62d5b..3769079874d 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -6,6 +6,7 @@   * Copyright (c) 2006 Rafael J. Wysocki <rjw@sisk.pl>   */ +#include <linux/gfp.h>  #include <linux/suspend.h>  #include <linux/bootmem.h> diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 65fdc86e923..d24f983ba1e 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -8,6 +8,7 @@   * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>   */ +#include <linux/gfp.h>  #include <linux/smp.h>  #include <linux/suspend.h>  #include <asm/proto.h> diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index b641388d828..ad47daeafa4 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -27,10 +27,17 @@ ENTRY(swsusp_arch_suspend)  	ret  ENTRY(restore_image) +	movl	mmu_cr4_features, %ecx  	movl	resume_pg_dir, %eax  	subl	$__PAGE_OFFSET, %eax  	movl	%eax, %cr3 +	jecxz	1f	# cr4 Pentium and higher, skip if zero +	andl	$~(X86_CR4_PGE), %ecx +	movl	%ecx, %cr4;  # turn off PGE +	movl	%cr3, %eax;  # flush TLB +	movl	%eax, %cr3 +1:  	movl	restore_pblist, %edx  	.p2align 4,,7 @@ -54,16 +61,8 @@ done:  	movl	$swapper_pg_dir, %eax  	subl	$__PAGE_OFFSET, %eax  	movl	%eax, %cr3 -	/* Flush TLB, including "global" things (vmalloc) */  	movl	mmu_cr4_features, %ecx  	jecxz	1f	# cr4 Pentium and higher, skip if zero -	movl	%ecx, %edx -	andl	$~(X86_CR4_PGE), %edx -	movl	%edx, %cr4;  # turn off PGE -1: -	movl	%cr3, %eax;  # flush TLB -	movl	%eax, %cr3 -	jecxz	1f	# cr4 Pentium and higher, skip if zero  	movl	%ecx, %cr4;  # turn PGE back on  1: diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 21e1aeb9f3e..ac74869b814 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -6,6 +6,7 @@  #include <linux/mm.h>  #include <linux/err.h>  #include <linux/sched.h> +#include <linux/slab.h>  #include <linux/init.h>  #include <linux/random.h>  #include <linux/elf.h> diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c index e133ce25e29..1304bcec8ee 100644 --- a/arch/x86/xen/debugfs.c +++ b/arch/x86/xen/debugfs.c @@ -1,5 +1,6 @@  #include <linux/init.h>  #include <linux/debugfs.h> +#include <linux/slab.h>  #include <linux/module.h>  #include "debugfs.h" diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b607239c1ba..65d8d79b46a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -28,6 +28,7 @@  #include <linux/highmem.h>  #include <linux/console.h>  #include <linux/pci.h> +#include <linux/gfp.h>  #include <xen/xen.h>  #include <xen/interface/xen.h> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f9eb7de74f4..914f04695ce 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -43,6 +43,7 @@  #include <linux/debugfs.h>  #include <linux/bug.h>  #include <linux/module.h> +#include <linux/gfp.h>  #include <asm/pgtable.h>  #include <asm/tlbflush.h> diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index deafb65ef44..a29693fd313 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -14,6 +14,7 @@   */  #include <linux/sched.h>  #include <linux/err.h> +#include <linux/slab.h>  #include <linux/smp.h>  #include <asm/paravirt.h> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 24ded31b5ae..e0500646585 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -6,6 +6,7 @@  #include <linux/spinlock.h>  #include <linux/debugfs.h>  #include <linux/log2.h> +#include <linux/gfp.h>  #include <asm/paravirt.h> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0d3f07cd1b5..32764b8880b 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -13,6 +13,7 @@  #include <linux/clockchips.h>  #include <linux/kernel_stat.h>  #include <linux/math64.h> +#include <linux/gfp.h>  #include <asm/pvclock.h>  #include <asm/xen/hypervisor.h>  |