diff options
Diffstat (limited to 'arch/x86/kernel')
64 files changed, 3538 insertions, 1230 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8afb6931981..b2297e58c6e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,  		return 0;  	} -	if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { +	if (intsrc->source_irq == 0) {  		if (acpi_skip_timer_override) { -			printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); +			printk(PREFIX "BIOS IRQ0 override ignored.\n");  			return 0;  		} -		if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + +		if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity +			&& (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {  			intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;  			printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");  		} @@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)  }  /* - * Force ignoring BIOS IRQ0 pin2 override + * Force ignoring BIOS IRQ0 override   */  static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)  { -	/* -	 * The ati_ixp4x0_rev() early PCI quirk should have set -	 * the acpi_skip_timer_override flag already: -	 */  	if (!acpi_skip_timer_override) { -		WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n"); -		pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", +		pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",  			d->ident);  		acpi_skip_timer_override = 1;  	} @@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {  	 * is enabled.  This input is incorrectly designated the  	 * ISA IRQ 0 via an interrupt source override even though  	 * it is wired to the output of the master 8259A and INTIN0 -	 * is not connected at all.  Force ignoring BIOS IRQ0 pin2 +	 * is not connected at all.  Force ignoring BIOS IRQ0  	 * override in that cases.  	 */  	{ @@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {  		     DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),  		     },  	 }, +	{ +	 .callback = dmi_ignore_irq0_timer_override, +	 .ident = "FUJITSU SIEMENS", +	 .matches = { +		     DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), +		     DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"), +		     }, +	 },  	{}  }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1f84794f075..931280ff829 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) "SMP alternatives: " fmt +  #include <linux/module.h>  #include <linux/sched.h>  #include <linux/mutex.h> @@ -63,8 +65,11 @@ static int __init setup_noreplace_paravirt(char *str)  __setup("noreplace-paravirt", setup_noreplace_paravirt);  #endif -#define DPRINTK(fmt, args...) if (debug_alternative) \ -	printk(KERN_DEBUG fmt, args) +#define DPRINTK(fmt, ...)				\ +do {							\ +	if (debug_alternative)				\ +		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\ +} while (0)  /*   * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes @@ -428,7 +433,7 @@ void alternatives_smp_switch(int smp)  	 * If this still occurs then you should see a hang  	 * or crash shortly after this line:  	 */ -	printk("lockdep: fixing up alternatives.\n"); +	pr_info("lockdep: fixing up alternatives\n");  #endif  	if (noreplace_smp || smp_alt_once || skip_smp_alternatives) @@ -444,14 +449,14 @@ void alternatives_smp_switch(int smp)  	if (smp == smp_mode) {  		/* nothing */  	} else if (smp) { -		printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); +		pr_info("switching to SMP code\n");  		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);  		list_for_each_entry(mod, &smp_alt_modules, next)  			alternatives_smp_lock(mod->locks, mod->locks_end,  					      mod->text, mod->text_end);  	} else { -		printk(KERN_INFO "SMP alternatives: switching to UP code\n"); +		pr_info("switching to UP code\n");  		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);  		list_for_each_entry(mod, &smp_alt_modules, next) @@ -546,7 +551,7 @@ void __init alternative_instructions(void)  #ifdef CONFIG_SMP  	if (smp_alt_once) {  		if (1 == num_possible_cpus()) { -			printk(KERN_INFO "SMP alternatives: switching to UP code\n"); +			pr_info("switching to UP code\n");  			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);  			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); @@ -664,7 +669,7 @@ static int __kprobes stop_machine_text_poke(void *data)  	struct text_poke_param *p;  	int i; -	if (atomic_dec_and_test(&stop_machine_first)) { +	if (atomic_xchg(&stop_machine_first, 0)) {  		for (i = 0; i < tpp->nparams; i++) {  			p = &tpp->params[i];  			text_poke(p->addr, p->opcode, p->len); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index be16854591c..aadf3359e2a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -2,6 +2,9 @@   * Shared support code for AMD K8 northbridges and derivates.   * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/types.h>  #include <linux/slab.h>  #include <linux/init.h> @@ -16,6 +19,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },  	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, +	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },  	{}  };  EXPORT_SYMBOL(amd_nb_misc_ids); @@ -258,7 +262,7 @@ void amd_flush_garts(void)  	}  	spin_unlock_irqrestore(&gart_lock, flags);  	if (!flushed) -		printk("nothing to flush?\n"); +		pr_notice("nothing to flush?\n");  }  EXPORT_SYMBOL_GPL(amd_flush_garts); @@ -269,11 +273,10 @@ static __init int init_amd_nbs(void)  	err = amd_cache_northbridges();  	if (err < 0) -		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n"); +		pr_notice("Cannot enumerate AMD northbridges\n");  	if (amd_cache_gart() < 0) -		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, " -		       "GART support disabled.\n"); +		pr_notice("Cannot initialize GART flush words, GART support disabled\n");  	return err;  } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 39a222e094a..c421512ca5e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2123,6 +2123,25 @@ void default_init_apic_ldr(void)  	apic_write(APIC_LDR, val);  } +int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +				   const struct cpumask *andmask, +				   unsigned int *apicid) +{ +	unsigned int cpu; + +	for_each_cpu_and(cpu, cpumask, andmask) { +		if (cpumask_test_cpu(cpu, cpu_online_mask)) +			break; +	} + +	if (likely(cpu < nr_cpu_ids)) { +		*apicid = per_cpu(x86_cpu_to_apicid, cpu); +		return 0; +	} + +	return -EINVAL; +} +  /*   * Power management   */ diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0e881c46e8c..00c77cf78e9 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -36,25 +36,6 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	return 1;  } -static const struct cpumask *flat_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  /*   * Set up the logical destination ID.   * @@ -92,7 +73,7 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)  }  static void - flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) +flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)  {  	unsigned long mask = cpumask_bits(cpumask)[0];  	int cpu = smp_processor_id(); @@ -186,7 +167,7 @@ static struct apic apic_flat =  {  	.irq_delivery_mode		= dest_LowestPrio,  	.irq_dest_mode			= 1, /* logical */ -	.target_cpus			= flat_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL, @@ -210,8 +191,7 @@ static struct apic apic_flat =  {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFu << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= flat_send_IPI_mask,  	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself, @@ -262,17 +242,6 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)  	return 0;  } -static const struct cpumask *physflat_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)  {  	default_send_IPI_mask_sequence_phys(cpumask, vector); @@ -294,38 +263,6 @@ static void physflat_send_IPI_all(int vector)  	physflat_send_IPI_mask(cpu_online_mask, vector);  } -static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	cpu = cpumask_first(cpumask); -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu); -	else -		return BAD_APICID; -} - -static unsigned int -physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -				const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static int physflat_probe(void)  {  	if (apic == &apic_physflat || num_possible_cpus() > 8) @@ -345,13 +282,13 @@ static struct apic apic_physflat =  {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= physflat_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= physflat_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	/* not needed, but shouldn't hurt: */  	.init_apic_ldr			= flat_init_apic_ldr, @@ -370,8 +307,7 @@ static struct apic apic_physflat =  {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFu << 24, -	.cpu_mask_to_apicid		= physflat_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= physflat_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= physflat_send_IPI_mask,  	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index a6e4c6e06c0..e145f28b409 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -100,12 +100,12 @@ static unsigned long noop_check_apicid_present(int bit)  	return physid_isset(bit, phys_cpu_present_map);  } -static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask, +					  const struct cpumask *mask)  {  	if (cpu != 0)  		pr_warning("APIC: Vector allocated for non-BSP cpu\n"); -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); +	cpumask_copy(retmask, cpumask_of(cpu));  }  static u32 noop_apic_read(u32 reg) @@ -159,8 +159,7 @@ struct apic apic_noop = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= noop_send_IPI_mask,  	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 6ec6d5d297c..bc552cff257 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -72,17 +72,6 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)  	return initial_apic_id >> index_msb;  } -static const struct cpumask *numachip_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)  {  	union numachip_csr_g3_ext_irq_gen int_gen; @@ -157,38 +146,6 @@ static void numachip_send_IPI_self(int vector)  	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);  } -static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	cpu = cpumask_first(cpumask); -	if (likely((unsigned)cpu < nr_cpu_ids)) -		return per_cpu(x86_cpu_to_apicid, cpu); - -	return BAD_APICID; -} - -static unsigned int -numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -				const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static int __init numachip_probe(void)  {  	return apic == &apic_numachip; @@ -253,13 +210,13 @@ static struct apic apic_numachip __refconst = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= numachip_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= numachip_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= flat_init_apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -277,8 +234,7 @@ static struct apic apic_numachip __refconst = {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xffU << 24, -	.cpu_mask_to_apicid		= numachip_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= numachip_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= numachip_send_IPI_mask,  	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 31fbdbfbf96..d50e3640d5a 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -26,15 +26,6 @@ static int bigsmp_apic_id_registered(void)  	return 1;  } -static const struct cpumask *bigsmp_target_cpus(void) -{ -#ifdef CONFIG_SMP -	return cpu_online_mask; -#else -	return cpumask_of(0); -#endif -} -  static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)  {  	return 0; @@ -105,32 +96,6 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)  	return 1;  } -/* As we are using single CPU as destination, pick only one CPU here */ -static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	int cpu = cpumask_first(cpumask); - -	if (cpu < nr_cpu_ids) -		return cpu_physical_id(cpu); -	return BAD_APICID; -} - -static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			return cpu_physical_id(cpu); -	} -	return BAD_APICID; -} -  static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)  {  	return cpuid_apic >> index_msb; @@ -177,12 +142,6 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {  	{ } /* NULL entry stops DMI scanning */  }; -static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int probe_bigsmp(void)  {  	if (def_to_bigsmp) @@ -205,13 +164,13 @@ static struct apic apic_bigsmp = {  	/* phys delivery to target CPU: */  	.irq_dest_mode			= 0, -	.target_cpus			= bigsmp_target_cpus, +	.target_cpus			= default_target_cpus,  	.disable_esr			= 1,  	.dest_logical			= 0,  	.check_apicid_used		= bigsmp_check_apicid_used,  	.check_apicid_present		= bigsmp_check_apicid_present, -	.vector_allocation_domain	= bigsmp_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= bigsmp_init_apic_ldr,  	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map, @@ -229,8 +188,7 @@ static struct apic apic_bigsmp = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= bigsmp_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= bigsmp_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= bigsmp_send_IPI_mask,  	.send_IPI_mask_allbutself	= NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index db4ab1be3c7..0874799a98c 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -394,21 +394,6 @@ static void es7000_enable_apic_mode(void)  		WARN(1, "Command failed, status = %x\n", mip_status);  } -static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} - -  static void es7000_wait_for_init_deassert(atomic_t *deassert)  {  	while (!atomic_read(deassert)) @@ -540,45 +525,49 @@ static int es7000_check_phys_apicid_present(int cpu_physical_apicid)  	return 1;  } -static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)  {  	unsigned int round = 0; -	int cpu, uninitialized_var(apicid); +	unsigned int cpu, uninitialized_var(apicid);  	/*  	 * The cpus in the mask must all be on the apic cluster.  	 */ -	for_each_cpu(cpu, cpumask) { +	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {  		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);  		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {  			WARN(1, "Not a valid mask!"); -			return BAD_APICID; +			return -EINVAL;  		} -		apicid = new_apicid; +		apicid |= new_apicid;  		round++;  	} -	return apicid; +	if (!round) +		return -EINVAL; +	*dest_id = apicid; +	return 0;  } -static unsigned int +static int  es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, -			      const struct cpumask *andmask) +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	cpumask_var_t cpumask; +	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) -		return apicid; +		return 0;  	cpumask_and(cpumask, inmask, andmask); -	cpumask_and(cpumask, cpumask, cpu_online_mask); -	apicid = es7000_cpu_mask_to_apicid(cpumask); +	es7000_cpu_mask_to_apicid(cpumask, apicid);  	free_cpumask_var(cpumask); -	return apicid; +	return 0;  }  static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) @@ -638,7 +627,7 @@ static struct apic __refdata apic_es7000_cluster = {  	.check_apicid_used		= es7000_check_apicid_used,  	.check_apicid_present		= es7000_check_apicid_present, -	.vector_allocation_domain	= es7000_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= es7000_init_apic_ldr_cluster,  	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map, @@ -656,7 +645,6 @@ static struct apic __refdata apic_es7000_cluster = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,  	.send_IPI_mask			= es7000_send_IPI_mask, @@ -705,7 +693,7 @@ static struct apic __refdata apic_es7000 = {  	.check_apicid_used		= es7000_check_apicid_used,  	.check_apicid_present		= es7000_check_apicid_present, -	.vector_allocation_domain	= es7000_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= es7000_init_apic_ldr,  	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map, @@ -723,7 +711,6 @@ static struct apic __refdata apic_es7000 = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,  	.send_IPI_mask			= es7000_send_IPI_mask, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5f0ff597437..406eee78468 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -448,8 +448,8 @@ static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pi  	entry = alloc_irq_pin_list(node);  	if (!entry) { -		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", -				node, apic, pin); +		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n", +		       node, apic, pin);  		return -ENOMEM;  	}  	entry->apic = apic; @@ -661,7 +661,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)  	ioapic_mask_entry(apic, pin);  	entry = ioapic_read_entry(apic, pin);  	if (entry.irr) -		printk(KERN_ERR "Unable to reset IRR for apic: %d, pin :%d\n", +		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",  		       mpc_ioapic_id(apic), pin);  } @@ -895,7 +895,7 @@ static int irq_polarity(int idx)  		}  		case 2: /* reserved */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			polarity = 1;  			break;  		} @@ -906,7 +906,7 @@ static int irq_polarity(int idx)  		}  		default: /* invalid */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			polarity = 1;  			break;  		} @@ -948,7 +948,7 @@ static int irq_trigger(int idx)  				}  				default:  				{ -					printk(KERN_WARNING "broken BIOS!!\n"); +					pr_warn("broken BIOS!!\n");  					trigger = 1;  					break;  				} @@ -962,7 +962,7 @@ static int irq_trigger(int idx)  		}  		case 2: /* reserved */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			trigger = 1;  			break;  		} @@ -973,7 +973,7 @@ static int irq_trigger(int idx)  		}  		default: /* invalid */  		{ -			printk(KERN_WARNING "broken BIOS!!\n"); +			pr_warn("broken BIOS!!\n");  			trigger = 0;  			break;  		} @@ -991,7 +991,7 @@ static int pin_2_irq(int idx, int apic, int pin)  	 * Debugging check, we are in big trouble if this message pops up!  	 */  	if (mp_irqs[idx].dstirq != pin) -		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); +		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");  	if (test_bit(bus, mp_bus_not_pci)) {  		irq = mp_irqs[idx].srcbusirq; @@ -1112,8 +1112,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)  	 * 0x80, because int 0x80 is hm, kind of importantish. ;)  	 */  	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; -	static int current_offset = VECTOR_OFFSET_START % 8; -	unsigned int old_vector; +	static int current_offset = VECTOR_OFFSET_START % 16;  	int cpu, err;  	cpumask_var_t tmp_mask; @@ -1123,35 +1122,45 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)  	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))  		return -ENOMEM; -	old_vector = cfg->vector; -	if (old_vector) { -		cpumask_and(tmp_mask, mask, cpu_online_mask); -		cpumask_and(tmp_mask, cfg->domain, tmp_mask); -		if (!cpumask_empty(tmp_mask)) { -			free_cpumask_var(tmp_mask); -			return 0; -		} -	} -  	/* Only try and allocate irqs on cpus that are present */  	err = -ENOSPC; -	for_each_cpu_and(cpu, mask, cpu_online_mask) { -		int new_cpu; -		int vector, offset; +	cpumask_clear(cfg->old_domain); +	cpu = cpumask_first_and(mask, cpu_online_mask); +	while (cpu < nr_cpu_ids) { +		int new_cpu, vector, offset; -		apic->vector_allocation_domain(cpu, tmp_mask); +		apic->vector_allocation_domain(cpu, tmp_mask, mask); + +		if (cpumask_subset(tmp_mask, cfg->domain)) { +			err = 0; +			if (cpumask_equal(tmp_mask, cfg->domain)) +				break; +			/* +			 * New cpumask using the vector is a proper subset of +			 * the current in use mask. So cleanup the vector +			 * allocation for the members that are not used anymore. +			 */ +			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask); +			cfg->move_in_progress = 1; +			cpumask_and(cfg->domain, cfg->domain, tmp_mask); +			break; +		}  		vector = current_vector;  		offset = current_offset;  next: -		vector += 8; +		vector += 16;  		if (vector >= first_system_vector) { -			/* If out of vectors on large boxen, must share them. */ -			offset = (offset + 1) % 8; +			offset = (offset + 1) % 16;  			vector = FIRST_EXTERNAL_VECTOR + offset;  		} -		if (unlikely(current_vector == vector)) + +		if (unlikely(current_vector == vector)) { +			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask); +			cpumask_andnot(tmp_mask, mask, cfg->old_domain); +			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);  			continue; +		}  		if (test_bit(vector, used_vectors))  			goto next; @@ -1162,7 +1171,7 @@ next:  		/* Found one! */  		current_vector = vector;  		current_offset = offset; -		if (old_vector) { +		if (cfg->vector) {  			cfg->move_in_progress = 1;  			cpumask_copy(cfg->old_domain, cfg->domain);  		} @@ -1346,18 +1355,18 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,  	if (!IO_APIC_IRQ(irq))  		return; -	/* -	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy -	 * controllers like 8259. Now that IO-APIC can handle this irq, update -	 * the cfg->domain. -	 */ -	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) -		apic->vector_allocation_domain(0, cfg->domain);  	if (assign_irq_vector(irq, cfg, apic->target_cpus()))  		return; -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); +	if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(), +					 &dest)) { +		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n", +			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); +		__clear_irq_vector(irq, cfg); + +		return; +	}  	apic_printk(APIC_VERBOSE,KERN_DEBUG  		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " @@ -1366,7 +1375,7 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,  		    cfg->vector, irq, attr->trigger, attr->polarity, dest);  	if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { -		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n", +		pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",  			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);  		__clear_irq_vector(irq, cfg); @@ -1469,9 +1478,10 @@ void setup_IO_APIC_irq_extra(u32 gsi)   * Set up the timer pin, possibly with the 8259A-master behind.   */  static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, -					 unsigned int pin, int vector) +					unsigned int pin, int vector)  {  	struct IO_APIC_route_entry entry; +	unsigned int dest;  	if (irq_remapping_enabled)  		return; @@ -1482,9 +1492,13 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,  	 * We use logical delivery to get the timer IRQ  	 * to the first CPU.  	 */ +	if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(), +						  apic->target_cpus(), &dest))) +		dest = BAD_APICID; +  	entry.dest_mode = apic->irq_dest_mode;  	entry.mask = 0;			/* don't mask IRQ for edge */ -	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus()); +	entry.dest = dest;  	entry.delivery_mode = apic->irq_delivery_mode;  	entry.polarity = 0;  	entry.trigger = 0; @@ -1521,7 +1535,6 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  		reg_03.raw = io_apic_read(ioapic_idx, 3);  	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -	printk("\n");  	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));  	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);  	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID); @@ -1578,7 +1591,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  				i,  				ir_entry->index  			); -			printk("%1d   %1d    %1d    %1d   %1d   " +			pr_cont("%1d   %1d    %1d    %1d   %1d   "  				"%1d    %1d     %X    %02X\n",  				ir_entry->format,  				ir_entry->mask, @@ -1598,7 +1611,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)  				i,  				entry.dest  			); -			printk("%1d    %1d    %1d   %1d   %1d    " +			pr_cont("%1d    %1d    %1d   %1d   %1d    "  				"%1d    %1d    %02X\n",  				entry.mask,  				entry.trigger, @@ -1651,8 +1664,8 @@ __apicdebuginit(void) print_IO_APICs(void)  			continue;  		printk(KERN_DEBUG "IRQ%d ", irq);  		for_each_irq_pin(entry, cfg->irq_2_pin) -			printk("-> %d:%d", entry->apic, entry->pin); -		printk("\n"); +			pr_cont("-> %d:%d", entry->apic, entry->pin); +		pr_cont("\n");  	}  	printk(KERN_INFO ".................................... done.\n"); @@ -1665,9 +1678,9 @@ __apicdebuginit(void) print_APIC_field(int base)  	printk(KERN_DEBUG);  	for (i = 0; i < 8; i++) -		printk(KERN_CONT "%08x", apic_read(base + i*0x10)); +		pr_cont("%08x", apic_read(base + i*0x10)); -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  __apicdebuginit(void) print_local_APIC(void *dummy) @@ -1769,7 +1782,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy)  			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);  		}  	} -	printk("\n"); +	pr_cont("\n");  }  __apicdebuginit(void) print_local_APICs(int maxcpu) @@ -2065,7 +2078,7 @@ void __init setup_ioapic_ids_from_mpc_nocheck(void)  		reg_00.raw = io_apic_read(ioapic_idx, 0);  		raw_spin_unlock_irqrestore(&ioapic_lock, flags);  		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) -			printk("could not set ID!\n"); +			pr_cont("could not set ID!\n");  		else  			apic_printk(APIC_VERBOSE, " ok.\n");  	} @@ -2210,71 +2223,6 @@ void send_cleanup_vector(struct irq_cfg *cfg)  	cfg->move_in_progress = 0;  } -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ -	int apic, pin; -	struct irq_pin_list *entry; -	u8 vector = cfg->vector; - -	for_each_irq_pin(entry, cfg->irq_2_pin) { -		unsigned int reg; - -		apic = entry->apic; -		pin = entry->pin; -		/* -		 * With interrupt-remapping, destination information comes -		 * from interrupt-remapping table entry. -		 */ -		if (!irq_remapped(cfg)) -			io_apic_write(apic, 0x11 + pin*2, dest); -		reg = io_apic_read(apic, 0x10 + pin*2); -		reg &= ~IO_APIC_REDIR_VECTOR_MASK; -		reg |= vector; -		io_apic_modify(apic, 0x10 + pin*2, reg); -	} -} - -/* - * Either sets data->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and - * leaves data->affinity untouched. - */ -int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, -			  unsigned int *dest_id) -{ -	struct irq_cfg *cfg = data->chip_data; - -	if (!cpumask_intersects(mask, cpu_online_mask)) -		return -1; - -	if (assign_irq_vector(data->irq, data->chip_data, mask)) -		return -1; - -	cpumask_copy(data->affinity, mask); - -	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain); -	return 0; -} - -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, -		    bool force) -{ -	unsigned int dest, irq = data->irq; -	unsigned long flags; -	int ret; - -	raw_spin_lock_irqsave(&ioapic_lock, flags); -	ret = __ioapic_set_affinity(data, mask, &dest); -	if (!ret) { -		/* Only the high 8 bits are valid. */ -		dest = SET_APIC_LOGICAL_ID(dest); -		__target_IO_APIC_irq(irq, dest, data->chip_data); -	} -	raw_spin_unlock_irqrestore(&ioapic_lock, flags); -	return ret; -} -  asmlinkage void smp_irq_move_cleanup_interrupt(void)  {  	unsigned vector, me; @@ -2362,6 +2310,87 @@ void irq_force_complete_move(int irq)  static inline void irq_complete_move(struct irq_cfg *cfg) { }  #endif +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ +	int apic, pin; +	struct irq_pin_list *entry; +	u8 vector = cfg->vector; + +	for_each_irq_pin(entry, cfg->irq_2_pin) { +		unsigned int reg; + +		apic = entry->apic; +		pin = entry->pin; +		/* +		 * With interrupt-remapping, destination information comes +		 * from interrupt-remapping table entry. +		 */ +		if (!irq_remapped(cfg)) +			io_apic_write(apic, 0x11 + pin*2, dest); +		reg = io_apic_read(apic, 0x10 + pin*2); +		reg &= ~IO_APIC_REDIR_VECTOR_MASK; +		reg |= vector; +		io_apic_modify(apic, 0x10 + pin*2, reg); +	} +} + +/* + * Either sets data->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and + * leaves data->affinity untouched. + */ +int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, +			  unsigned int *dest_id) +{ +	struct irq_cfg *cfg = data->chip_data; +	unsigned int irq = data->irq; +	int err; + +	if (!config_enabled(CONFIG_SMP)) +		return -1; + +	if (!cpumask_intersects(mask, cpu_online_mask)) +		return -EINVAL; + +	err = assign_irq_vector(irq, cfg, mask); +	if (err) +		return err; + +	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id); +	if (err) { +		if (assign_irq_vector(irq, cfg, data->affinity)) +			pr_err("Failed to recover vector for irq %d\n", irq); +		return err; +	} + +	cpumask_copy(data->affinity, mask); + +	return 0; +} + +static int +ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, +		    bool force) +{ +	unsigned int dest, irq = data->irq; +	unsigned long flags; +	int ret; + +	if (!config_enabled(CONFIG_SMP)) +		return -1; + +	raw_spin_lock_irqsave(&ioapic_lock, flags); +	ret = __ioapic_set_affinity(data, mask, &dest); +	if (!ret) { +		/* Only the high 8 bits are valid. */ +		dest = SET_APIC_LOGICAL_ID(dest); +		__target_IO_APIC_irq(irq, dest, data->chip_data); +		ret = IRQ_SET_MASK_OK_NOCOPY; +	} +	raw_spin_unlock_irqrestore(&ioapic_lock, flags); +	return ret; +} +  static void ack_apic_edge(struct irq_data *data)  {  	irq_complete_move(data->chip_data); @@ -2541,9 +2570,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip)  	chip->irq_ack = ir_ack_apic_edge;  	chip->irq_eoi = ir_ack_apic_level; -#ifdef CONFIG_SMP  	chip->irq_set_affinity = set_remapped_irq_affinity; -#endif  }  #endif /* CONFIG_IRQ_REMAP */ @@ -2554,9 +2581,7 @@ static struct irq_chip ioapic_chip __read_mostly = {  	.irq_unmask		= unmask_ioapic_irq,  	.irq_ack		= ack_apic_edge,  	.irq_eoi		= ack_apic_level, -#ifdef CONFIG_SMP  	.irq_set_affinity	= ioapic_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3038,7 +3063,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,  	if (err)  		return err; -	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); +	err = apic->cpu_mask_to_apicid_and(cfg->domain, +					   apic->target_cpus(), &dest); +	if (err) +		return err;  	if (irq_remapped(cfg)) {  		compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); @@ -3072,7 +3100,6 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,  	return err;  } -#ifdef CONFIG_SMP  static int  msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  { @@ -3092,9 +3119,8 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  	__write_msi_msg(data->msi_desc, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */  /*   * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, @@ -3105,9 +3131,7 @@ static struct irq_chip msi_chip = {  	.irq_unmask		= unmask_msi_irq,  	.irq_mask		= mask_msi_irq,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= msi_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3192,7 +3216,6 @@ void native_teardown_msi_irq(unsigned int irq)  }  #ifdef CONFIG_DMAR_TABLE -#ifdef CONFIG_SMP  static int  dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,  		      bool force) @@ -3214,19 +3237,15 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,  	dmar_msi_write(irq, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */ -  static struct irq_chip dmar_msi_type = {  	.name			= "DMAR_MSI",  	.irq_unmask		= dmar_msi_unmask,  	.irq_mask		= dmar_msi_mask,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= dmar_msi_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  }; @@ -3247,7 +3266,6 @@ int arch_setup_dmar_msi(unsigned int irq)  #ifdef CONFIG_HPET_TIMER -#ifdef CONFIG_SMP  static int hpet_msi_set_affinity(struct irq_data *data,  				 const struct cpumask *mask, bool force)  { @@ -3267,19 +3285,15 @@ static int hpet_msi_set_affinity(struct irq_data *data,  	hpet_msi_write(data->handler_data, &msg); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif /* CONFIG_SMP */ -  static struct irq_chip hpet_msi_type = {  	.name = "HPET_MSI",  	.irq_unmask = hpet_msi_unmask,  	.irq_mask = hpet_msi_mask,  	.irq_ack = ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity = hpet_msi_set_affinity, -#endif  	.irq_retrigger = ioapic_retrigger_irq,  }; @@ -3314,8 +3328,6 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id)   */  #ifdef CONFIG_HT_IRQ -#ifdef CONFIG_SMP -  static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)  {  	struct ht_irq_msg msg; @@ -3340,25 +3352,23 @@ ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)  		return -1;  	target_ht_irq(data->irq, dest, cfg->vector); -	return 0; +	return IRQ_SET_MASK_OK_NOCOPY;  } -#endif -  static struct irq_chip ht_irq_chip = {  	.name			= "PCI-HT",  	.irq_mask		= mask_ht_irq,  	.irq_unmask		= unmask_ht_irq,  	.irq_ack		= ack_apic_edge, -#ifdef CONFIG_SMP  	.irq_set_affinity	= ht_set_affinity, -#endif  	.irq_retrigger		= ioapic_retrigger_irq,  };  int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)  {  	struct irq_cfg *cfg; +	struct ht_irq_msg msg; +	unsigned dest;  	int err;  	if (disable_apic) @@ -3366,36 +3376,37 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)  	cfg = irq_cfg(irq);  	err = assign_irq_vector(irq, cfg, apic->target_cpus()); -	if (!err) { -		struct ht_irq_msg msg; -		unsigned dest; +	if (err) +		return err; + +	err = apic->cpu_mask_to_apicid_and(cfg->domain, +					   apic->target_cpus(), &dest); +	if (err) +		return err; -		dest = apic->cpu_mask_to_apicid_and(cfg->domain, -						    apic->target_cpus()); +	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); -		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); +	msg.address_lo = +		HT_IRQ_LOW_BASE | +		HT_IRQ_LOW_DEST_ID(dest) | +		HT_IRQ_LOW_VECTOR(cfg->vector) | +		((apic->irq_dest_mode == 0) ? +			HT_IRQ_LOW_DM_PHYSICAL : +			HT_IRQ_LOW_DM_LOGICAL) | +		HT_IRQ_LOW_RQEOI_EDGE | +		((apic->irq_delivery_mode != dest_LowestPrio) ? +			HT_IRQ_LOW_MT_FIXED : +			HT_IRQ_LOW_MT_ARBITRATED) | +		HT_IRQ_LOW_IRQ_MASKED; -		msg.address_lo = -			HT_IRQ_LOW_BASE | -			HT_IRQ_LOW_DEST_ID(dest) | -			HT_IRQ_LOW_VECTOR(cfg->vector) | -			((apic->irq_dest_mode == 0) ? -				HT_IRQ_LOW_DM_PHYSICAL : -				HT_IRQ_LOW_DM_LOGICAL) | -			HT_IRQ_LOW_RQEOI_EDGE | -			((apic->irq_delivery_mode != dest_LowestPrio) ? -				HT_IRQ_LOW_MT_FIXED : -				HT_IRQ_LOW_MT_ARBITRATED) | -			HT_IRQ_LOW_IRQ_MASKED; +	write_ht_irq_msg(irq, &msg); -		write_ht_irq_msg(irq, &msg); +	irq_set_chip_and_handler_name(irq, &ht_irq_chip, +				      handle_edge_irq, "edge"); -		irq_set_chip_and_handler_name(irq, &ht_irq_chip, -					      handle_edge_irq, "edge"); +	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); -		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); -	} -	return err; +	return 0;  }  #endif /* CONFIG_HT_IRQ */ @@ -3563,7 +3574,8 @@ static int __init io_apic_get_unique_id(int ioapic, int apic_id)  		/* Sanity check */  		if (reg_00.bits.ID != apic_id) { -			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); +			pr_err("IOAPIC[%d]: Unable to change apic_id!\n", +			       ioapic);  			return -1;  		}  	} diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index f00a68cca37..d661ee95cab 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -406,16 +406,13 @@ static inline int numaq_check_phys_apicid_present(int phys_apicid)   * We use physical apicids here, not logical, so just return the default   * physical broadcast to stop people from breaking us   */ -static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	return 0x0F; -} - -static inline unsigned int +static int  numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			     const struct cpumask *andmask) +			     const struct cpumask *andmask, +			     unsigned int *apicid)  { -	return 0x0F; +	*apicid = 0x0F; +	return 0;  }  /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ @@ -441,20 +438,6 @@ static int probe_numaq(void)  	return found_numaq;  } -static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  static void numaq_setup_portio_remap(void)  {  	int num_quads = num_online_nodes(); @@ -491,7 +474,7 @@ static struct apic __refdata apic_numaq = {  	.check_apicid_used		= numaq_check_apicid_used,  	.check_apicid_present		= numaq_check_apicid_present, -	.vector_allocation_domain	= numaq_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= numaq_init_apic_ldr,  	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map, @@ -509,7 +492,6 @@ static struct apic __refdata apic_numaq = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= numaq_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= numaq_cpu_mask_to_apicid_and,  	.send_IPI_mask			= numaq_send_IPI_mask, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1b291da09e6..eb35ef9ee63 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -66,21 +66,6 @@ static void setup_apic_flat_routing(void)  #endif  } -static void default_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* -	 * Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  /* should be called last. */  static int probe_default(void)  { @@ -105,7 +90,7 @@ static struct apic apic_default = {  	.check_apicid_used		= default_check_apicid_used,  	.check_apicid_present		= default_check_apicid_present, -	.vector_allocation_domain	= default_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= default_init_apic_ldr,  	.ioapic_phys_id_map		= default_ioapic_phys_id_map, @@ -123,8 +108,7 @@ static struct apic apic_default = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0x0F << 24, -	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,  	.send_IPI_mask			= default_send_IPI_mask_logical,  	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical, @@ -208,6 +192,9 @@ void __init default_setup_apic_routing(void)  	if (apic->setup_apic_routing)  		apic->setup_apic_routing(); + +	if (x86_platform.apic_post_init) +		x86_platform.apic_post_init();  }  void __init generic_apic_probe(void) diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 3fe98669892..1793dba7a74 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -23,11 +23,6 @@  #include <asm/ipi.h>  #include <asm/setup.h> -static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) -{ -	return hard_smp_processor_id() >> index_msb; -} -  /*   * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.   */ @@ -48,10 +43,8 @@ void __init default_setup_apic_routing(void)  		}  	} -	if (is_vsmp_box()) { -		/* need to update phys_pkg_id */ -		apic->phys_pkg_id = apicid_phys_pkg_id; -	} +	if (x86_platform.apic_post_init) +		x86_platform.apic_post_init();  }  /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 659897c0075..77c95c0e1bf 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -26,6 +26,8 @@   *   */ +#define pr_fmt(fmt) "summit: %s: " fmt, __func__ +  #include <linux/mm.h>  #include <linux/init.h>  #include <asm/io.h> @@ -235,8 +237,8 @@ static int summit_apic_id_registered(void)  static void summit_setup_apic_routing(void)  { -	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n", -						nr_ioapics); +	pr_info("Enabling APIC mode:  Summit.  Using %d I/O APICs\n", +		nr_ioapics);  }  static int summit_cpu_present_to_apicid(int mps_cpu) @@ -263,43 +265,48 @@ static int summit_check_phys_apicid_present(int physical_apicid)  	return 1;  } -static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) +static inline int +summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id)  {  	unsigned int round = 0; -	int cpu, apicid = 0; +	unsigned int cpu, apicid = 0;  	/*  	 * The cpus in the mask must all be on the apic cluster.  	 */ -	for_each_cpu(cpu, cpumask) { +	for_each_cpu_and(cpu, cpumask, cpu_online_mask) {  		int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);  		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { -			printk("%s: Not a valid mask!\n", __func__); -			return BAD_APICID; +			pr_err("Not a valid mask!\n"); +			return -EINVAL;  		}  		apicid |= new_apicid;  		round++;  	} -	return apicid; +	if (!round) +		return -EINVAL; +	*dest_id = apicid; +	return 0;  } -static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, -			      const struct cpumask *andmask) +static int +summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	cpumask_var_t cpumask; +	*apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);  	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) -		return apicid; +		return 0;  	cpumask_and(cpumask, inmask, andmask); -	cpumask_and(cpumask, cpumask, cpu_online_mask); -	apicid = summit_cpu_mask_to_apicid(cpumask); +	summit_cpu_mask_to_apicid(cpumask, apicid);  	free_cpumask_var(cpumask); -	return apicid; +	return 0;  }  /* @@ -320,20 +327,6 @@ static int probe_summit(void)  	return 0;  } -static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	/* Careful. Some cpus do not strictly honor the set of cpus -	 * specified in the interrupt destination when using lowest -	 * priority interrupt delivery mode. -	 * -	 * In particular there was a hyperthreading cpu observed to -	 * deliver interrupts to the wrong hyperthread when only one -	 * hyperthread was specified in the interrupt desitination. -	 */ -	cpumask_clear(retmask); -	cpumask_bits(retmask)[0] = APIC_ALL_CPUS; -} -  #ifdef CONFIG_X86_SUMMIT_NUMA  static struct rio_table_hdr *rio_table_hdr;  static struct scal_detail   *scal_devs[MAX_NUMNODES]; @@ -355,7 +348,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		}  	}  	if (i == rio_table_hdr->num_rio_dev) { -		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__); +		pr_err("Couldn't find owner Cyclone for Winnipeg!\n");  		return last_bus;  	} @@ -366,7 +359,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		}  	}  	if (i == rio_table_hdr->num_scal_dev) { -		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__); +		pr_err("Couldn't find owner Twister for Cyclone!\n");  		return last_bus;  	} @@ -396,7 +389,7 @@ static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)  		num_buses = 9;  		break;  	default: -		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__); +		pr_info("Unsupported Winnipeg type!\n");  		return last_bus;  	} @@ -411,13 +404,15 @@ static int build_detail_arrays(void)  	int i, scal_detail_size, rio_detail_size;  	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { -		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev); +		pr_warn("MAX_NUMNODES too low!  Defined as %d, but system has %d nodes\n", +			MAX_NUMNODES, rio_table_hdr->num_scal_dev);  		return 0;  	}  	switch (rio_table_hdr->version) {  	default: -		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version); +		pr_warn("Invalid Rio Grande Table Version: %d\n", +			rio_table_hdr->version);  		return 0;  	case 2:  		scal_detail_size = 11; @@ -462,7 +457,7 @@ void setup_summit(void)  		offset = *((unsigned short *)(ptr + offset));  	}  	if (!rio_table_hdr) { -		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__); +		pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n");  		return;  	} @@ -509,7 +504,7 @@ static struct apic apic_summit = {  	.check_apicid_used		= summit_check_apicid_used,  	.check_apicid_present		= summit_check_apicid_present, -	.vector_allocation_domain	= summit_vector_allocation_domain, +	.vector_allocation_domain	= flat_vector_allocation_domain,  	.init_apic_ldr			= summit_init_apic_ldr,  	.ioapic_phys_id_map		= summit_ioapic_phys_id_map, @@ -527,7 +522,6 @@ static struct apic apic_summit = {  	.set_apic_id			= NULL,  	.apic_id_mask			= 0xFF << 24, -	.cpu_mask_to_apicid		= summit_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= summit_cpu_mask_to_apicid_and,  	.send_IPI_mask			= summit_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ff35cff0e1a..c88baa4ff0e 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -81,7 +81,7 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)  }  static void - x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) +x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)  {  	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);  } @@ -96,36 +96,37 @@ static void x2apic_send_IPI_all(int vector)  	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);  } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) +static int +x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, +			      const struct cpumask *andmask, +			      unsigned int *apicid)  { -	/* -	 * We're using fixed IRQ delivery, can only return one logical APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); +	u32 dest = 0; +	u16 cluster; +	int i; -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_logical_apicid, cpu); -	else -		return BAD_APICID; -} +	for_each_cpu_and(i, cpumask, andmask) { +		if (!cpumask_test_cpu(i, cpu_online_mask)) +			continue; +		dest = per_cpu(x86_cpu_to_logical_apicid, i); +		cluster = x2apic_cluster(i); +		break; +	} -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; +	if (!dest) +		return -EINVAL; -	/* -	 * We're using fixed IRQ delivery, can only return one logical APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; +	for_each_cpu_and(i, cpumask, andmask) { +		if (!cpumask_test_cpu(i, cpu_online_mask)) +			continue; +		if (cluster != x2apic_cluster(i)) +			continue; +		dest |= per_cpu(x86_cpu_to_logical_apicid, i);  	} -	return per_cpu(x86_cpu_to_logical_apicid, cpu); +	*apicid = dest; + +	return 0;  }  static void init_x2apic_ldr(void) @@ -208,6 +209,32 @@ static int x2apic_cluster_probe(void)  		return 0;  } +static const struct cpumask *x2apic_cluster_target_cpus(void) +{ +	return cpu_all_mask; +} + +/* + * Each x2apic cluster is an allocation domain. + */ +static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask, +					     const struct cpumask *mask) +{ +	/* +	 * To minimize vector pressure, default case of boot, device bringup +	 * etc will use a single cpu for the interrupt destination. +	 * +	 * On explicit migration requests coming from irqbalance etc, +	 * interrupts will be routed to the x2apic cluster (cluster-id +	 * derived from the first cpu in the mask) members specified +	 * in the mask. +	 */ +	if (mask == x2apic_cluster_target_cpus()) +		cpumask_copy(retmask, cpumask_of(cpu)); +	else +		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu)); +} +  static struct apic apic_x2apic_cluster = {  	.name				= "cluster x2apic", @@ -219,13 +246,13 @@ static struct apic apic_x2apic_cluster = {  	.irq_delivery_mode		= dest_LowestPrio,  	.irq_dest_mode			= 1, /* logical */ -	.target_cpus			= x2apic_target_cpus, +	.target_cpus			= x2apic_cluster_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= x2apic_vector_allocation_domain, +	.vector_allocation_domain	= cluster_vector_allocation_domain,  	.init_apic_ldr			= init_x2apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -243,7 +270,6 @@ static struct apic apic_x2apic_cluster = {  	.set_apic_id			= x2apic_set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,  	.send_IPI_mask			= x2apic_send_IPI_mask, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index c17e982db27..e03a1e180e8 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -76,38 +76,6 @@ static void x2apic_send_IPI_all(int vector)  	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);  } -static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); - -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu); -	else -		return BAD_APICID; -} - -static unsigned int -x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			      const struct cpumask *andmask) -{ -	int cpu; - -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	for_each_cpu_and(cpu, cpumask, andmask) { -		if (cpumask_test_cpu(cpu, cpu_online_mask)) -			break; -	} - -	return per_cpu(x86_cpu_to_apicid, cpu); -} -  static void init_x2apic_ldr(void)  {  } @@ -131,13 +99,13 @@ static struct apic apic_x2apic_phys = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= x2apic_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= 0,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= x2apic_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= init_x2apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -155,8 +123,7 @@ static struct apic apic_x2apic_phys = {  	.set_apic_id			= x2apic_set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid, -	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and, +	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,  	.send_IPI_mask			= x2apic_send_IPI_mask,  	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c6d03f7a440..8cfade9510a 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -185,17 +185,6 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);  unsigned long sn_rtc_cycles_per_second;  EXPORT_SYMBOL(sn_rtc_cycles_per_second); -static const struct cpumask *uv_target_cpus(void) -{ -	return cpu_online_mask; -} - -static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) -{ -	cpumask_clear(retmask); -	cpumask_set_cpu(cpu, retmask); -} -  static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)  {  #ifdef CONFIG_SMP @@ -280,25 +269,12 @@ static void uv_init_apic_ldr(void)  {  } -static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) -{ -	/* -	 * We're using fixed IRQ delivery, can only return one phys APIC ID. -	 * May as well be the first. -	 */ -	int cpu = cpumask_first(cpumask); - -	if ((unsigned)cpu < nr_cpu_ids) -		return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; -	else -		return BAD_APICID; -} - -static unsigned int +static int  uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, -			  const struct cpumask *andmask) +			  const struct cpumask *andmask, +			  unsigned int *apicid)  { -	int cpu; +	int unsigned cpu;  	/*  	 * We're using fixed IRQ delivery, can only return one phys APIC ID. @@ -308,7 +284,13 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,  		if (cpumask_test_cpu(cpu, cpu_online_mask))  			break;  	} -	return per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; + +	if (likely(cpu < nr_cpu_ids)) { +		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits; +		return 0; +	} + +	return -EINVAL;  }  static unsigned int x2apic_get_apic_id(unsigned long x) @@ -362,13 +344,13 @@ static struct apic __refdata apic_x2apic_uv_x = {  	.irq_delivery_mode		= dest_Fixed,  	.irq_dest_mode			= 0, /* physical */ -	.target_cpus			= uv_target_cpus, +	.target_cpus			= online_target_cpus,  	.disable_esr			= 0,  	.dest_logical			= APIC_DEST_LOGICAL,  	.check_apicid_used		= NULL,  	.check_apicid_present		= NULL, -	.vector_allocation_domain	= uv_vector_allocation_domain, +	.vector_allocation_domain	= default_vector_allocation_domain,  	.init_apic_ldr			= uv_init_apic_ldr,  	.ioapic_phys_id_map		= NULL, @@ -386,7 +368,6 @@ static struct apic __refdata apic_x2apic_uv_x = {  	.set_apic_id			= set_apic_id,  	.apic_id_mask			= 0xFFFFFFFFu, -	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,  	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,  	.send_IPI_mask			= uv_send_IPI_mask, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 07b0c0db466..d65464e4350 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -201,6 +201,8 @@   *    http://www.microsoft.com/whdc/archive/amp_12.mspx]   */ +#define pr_fmt(fmt) "apm: " fmt +  #include <linux/module.h>  #include <linux/poll.h> @@ -485,11 +487,11 @@ static void apm_error(char *str, int err)  		if (error_table[i].key == err)  			break;  	if (i < ERROR_COUNT) -		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg); +		pr_notice("%s: %s\n", str, error_table[i].msg);  	else if (err < 0) -		printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err); +		pr_notice("%s: linux error code %i\n", str, err);  	else -		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n", +		pr_notice("%s: unknown error code %#2.2x\n",  		       str, err);  } @@ -1184,7 +1186,7 @@ static void queue_event(apm_event_t event, struct apm_user *sender)  			static int notified;  			if (notified++ == 0) -			    printk(KERN_ERR "apm: an event queue overflowed\n"); +				pr_err("an event queue overflowed\n");  			if (++as->event_tail >= APM_MAX_EVENTS)  				as->event_tail = 0;  		} @@ -1447,7 +1449,7 @@ static void apm_mainloop(void)  static int check_apm_user(struct apm_user *as, const char *func)  {  	if (as == NULL || as->magic != APM_BIOS_MAGIC) { -		printk(KERN_ERR "apm: %s passed bad filp\n", func); +		pr_err("%s passed bad filp\n", func);  		return 1;  	}  	return 0; @@ -1586,7 +1588,7 @@ static int do_release(struct inode *inode, struct file *filp)  		     as1 = as1->next)  			;  		if (as1 == NULL) -			printk(KERN_ERR "apm: filp not in user list\n"); +			pr_err("filp not in user list\n");  		else  			as1->next = as->next;  	} @@ -1600,11 +1602,9 @@ static int do_open(struct inode *inode, struct file *filp)  	struct apm_user *as;  	as = kmalloc(sizeof(*as), GFP_KERNEL); -	if (as == NULL) { -		printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", -		       sizeof(*as)); +	if (as == NULL)  		return -ENOMEM; -	} +  	as->magic = APM_BIOS_MAGIC;  	as->event_tail = as->event_head = 0;  	as->suspends_pending = as->standbys_pending = 0; @@ -2313,16 +2313,16 @@ static int __init apm_init(void)  	}  	if (apm_info.disabled) { -		printk(KERN_NOTICE "apm: disabled on user request.\n"); +		pr_notice("disabled on user request.\n");  		return -ENODEV;  	}  	if ((num_online_cpus() > 1) && !power_off && !smp) { -		printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n"); +		pr_notice("disabled - APM is not SMP safe.\n");  		apm_info.disabled = 1;  		return -ENODEV;  	}  	if (!acpi_disabled) { -		printk(KERN_NOTICE "apm: overridden by ACPI.\n"); +		pr_notice("overridden by ACPI.\n");  		apm_info.disabled = 1;  		return -ENODEV;  	} @@ -2356,8 +2356,7 @@ static int __init apm_init(void)  	kapmd_task = kthread_create(apm, NULL, "kapmd");  	if (IS_ERR(kapmd_task)) { -		printk(KERN_ERR "apm: disabled - Unable to start kernel " -				"thread.\n"); +		pr_err("disabled - Unable to start kernel thread\n");  		err = PTR_ERR(kapmd_task);  		kapmd_task = NULL;  		remove_proc_entry("apm", NULL); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 6ab6aa2fdfd..bac4c3804cc 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,9 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o  ifdef CONFIG_PERF_EVENTS  obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o +obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o  endif  obj-$(CONFIG_X86_MCE)			+= mcheck/ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 146bb6218ee..9d92e19039f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -19,6 +19,39 @@  #include "cpu.h" +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); +	u32 gprs[8] = { 0 }; +	int err; + +	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + +	gprs[1] = msr; +	gprs[7] = 0x9c5a203a; + +	err = rdmsr_safe_regs(gprs); + +	*p = gprs[0] | ((u64)gprs[2] << 32); + +	return err; +} + +static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val) +{ +	struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); +	u32 gprs[8] = { 0 }; + +	WARN_ONCE((c->x86 != 0xf), "%s should only be used on K8!\n", __func__); + +	gprs[0] = (u32)val; +	gprs[1] = msr; +	gprs[2] = val >> 32; +	gprs[7] = 0x9c5a203a; + +	return wrmsr_safe_regs(gprs); +} +  #ifdef CONFIG_X86_32  /*   *	B step AMD K6 before B 9730xxxx have hardware bugs that can cause @@ -586,9 +619,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  	    !cpu_has(c, X86_FEATURE_TOPOEXT)) {  		u64 val; -		if (!rdmsrl_amd_safe(0xc0011005, &val)) { +		if (!rdmsrl_safe(0xc0011005, &val)) {  			val |= 1ULL << 54; -			wrmsrl_amd_safe(0xc0011005, val); +			wrmsrl_safe(0xc0011005, val);  			rdmsrl(0xc0011005, val);  			if (val & (1ULL << 54)) {  				set_cpu_cap(c, X86_FEATURE_TOPOEXT); @@ -679,7 +712,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)  		err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask);  		if (err == 0) {  			mask |= (1 << 10); -			checking_wrmsrl(MSR_AMD64_MCx_MASK(4), mask); +			wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask);  		}  	} diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 46674fbb62b..c97bb7b5a9f 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -55,8 +55,8 @@ static void __init check_fpu(void)  	if (!boot_cpu_data.hard_math) {  #ifndef CONFIG_MATH_EMULATION -		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); -		printk(KERN_EMERG "Giving up.\n"); +		pr_emerg("No coprocessor found and no math emulation present\n"); +		pr_emerg("Giving up\n");  		for (;;) ;  #endif  		return; @@ -86,7 +86,7 @@ static void __init check_fpu(void)  	boot_cpu_data.fdiv_bug = fdiv_bug;  	if (boot_cpu_data.fdiv_bug) -		printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); +		pr_warn("Hmm, FPU with FDIV bug\n");  }  static void __init check_hlt(void) @@ -94,16 +94,16 @@ static void __init check_hlt(void)  	if (boot_cpu_data.x86 >= 5 || paravirt_enabled())  		return; -	printk(KERN_INFO "Checking 'hlt' instruction... "); +	pr_info("Checking 'hlt' instruction... ");  	if (!boot_cpu_data.hlt_works_ok) { -		printk("disabled\n"); +		pr_cont("disabled\n");  		return;  	}  	halt();  	halt();  	halt();  	halt(); -	printk(KERN_CONT "OK.\n"); +	pr_cont("OK\n");  }  /* @@ -116,7 +116,7 @@ static void __init check_popad(void)  #ifndef CONFIG_X86_POPAD_OK  	int res, inp = (int) &res; -	printk(KERN_INFO "Checking for popad bug... "); +	pr_info("Checking for popad bug... ");  	__asm__ __volatile__(  	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "  	  : "=&a" (res) @@ -127,9 +127,9 @@ static void __init check_popad(void)  	 * CPU hard. Too bad.  	 */  	if (res != 12345678) -		printk(KERN_CONT "Buggy.\n"); +		pr_cont("Buggy\n");  	else -		printk(KERN_CONT "OK.\n"); +		pr_cont("OK\n");  #endif  } @@ -161,7 +161,7 @@ void __init check_bugs(void)  {  	identify_boot_cpu();  #ifndef CONFIG_SMP -	printk(KERN_INFO "CPU: "); +	pr_info("CPU: ");  	print_cpu_info(&boot_cpu_data);  #endif  	check_config(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6b9333b429b..5bbc082c47a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -947,7 +947,7 @@ static void __cpuinit __print_cpu_msr(void)  		index_max = msr_range_array[i].max;  		for (index = index_min; index < index_max; index++) { -			if (rdmsrl_amd_safe(index, &val)) +			if (rdmsrl_safe(index, &val))  				continue;  			printk(KERN_INFO " MSR%08x: %016llx\n", index, val);  		} diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index da27c5d2168..9473e8772fd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -7,6 +7,9 @@   * Copyright 2008 Intel Corporation   * Author: Andi Kleen   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/thread_info.h>  #include <linux/capability.h>  #include <linux/miscdevice.h> @@ -210,7 +213,7 @@ static void drain_mcelog_buffer(void)  				cpu_relax();  				if (!m->finished && retries >= 4) { -					pr_err("MCE: skipping error being logged currently!\n"); +					pr_err("skipping error being logged currently!\n");  					break;  				}  			} @@ -1167,8 +1170,9 @@ int memory_failure(unsigned long pfn, int vector, int flags)  {  	/* mce_severity() should not hand us an ACTION_REQUIRED error */  	BUG_ON(flags & MF_ACTION_REQUIRED); -	printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n" -		"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn); +	pr_err("Uncorrected memory error in page 0x%lx ignored\n" +	       "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", +	       pfn);  	return 0;  } @@ -1186,6 +1190,7 @@ void mce_notify_process(void)  {  	unsigned long pfn;  	struct mce_info *mi = mce_find_info(); +	int flags = MF_ACTION_REQUIRED;  	if (!mi)  		mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL); @@ -1200,8 +1205,9 @@ void mce_notify_process(void)  	 * doomed. We still need to mark the page as poisoned and alert any  	 * other users of the page.  	 */ -	if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0 || -			   mi->restartable == 0) { +	if (!mi->restartable) +		flags |= MF_MUST_KILL; +	if (memory_failure(pfn, MCE_VECTOR, flags) < 0) {  		pr_err("Memory error not recovered");  		force_sig(SIGBUS, current);  	} @@ -1358,11 +1364,10 @@ static int __cpuinit __mcheck_cpu_cap_init(void)  	b = cap & MCG_BANKCNT_MASK;  	if (!banks) -		printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); +		pr_info("CPU supports %d MCE banks\n", b);  	if (b > MAX_NR_BANKS) { -		printk(KERN_WARNING -		       "MCE: Using only %u machine check banks out of %u\n", +		pr_warn("Using only %u machine check banks out of %u\n",  			MAX_NR_BANKS, b);  		b = MAX_NR_BANKS;  	} @@ -1419,7 +1424,7 @@ static void __mcheck_cpu_init_generic(void)  static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)  {  	if (c->x86_vendor == X86_VENDOR_UNKNOWN) { -		pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); +		pr_info("unknown CPU type - not enabling MCE support\n");  		return -EOPNOTSUPP;  	} @@ -1574,7 +1579,7 @@ static void __mcheck_cpu_init_timer(void)  /* Handle unconfigured int18 (should never happen) */  static void unexpected_machine_check(struct pt_regs *regs, long error_code)  { -	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", +	pr_err("CPU#%d: Unexpected int18 (Machine Check)\n",  	       smp_processor_id());  } @@ -1893,8 +1898,7 @@ static int __init mcheck_enable(char *str)  			get_option(&str, &monarch_timeout);  		}  	} else { -		printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", -		       str); +		pr_info("mce argument %s ignored. Please use /sys\n", str);  		return 0;  	}  	return 1; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f4873a64f46..671b95a2ffb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -1,15 +1,17 @@  /* - *  (c) 2005, 2006 Advanced Micro Devices, Inc. + *  (c) 2005-2012 Advanced Micro Devices, Inc.   *  Your use of this code is subject to the terms and conditions of the   *  GNU general public license version 2. See "COPYING" or   *  http://www.gnu.org/licenses/gpl.html   *   *  Written by Jacob Shin - AMD, Inc.   * - *  Support : jacob.shin@amd.com + *  Support: borislav.petkov@amd.com   *   *  April 2006   *     - added support for AMD Family 0x10 processors + *  May 2012 + *     - major scrubbing   *   *  All MC4_MISCi registers are shared between multi-cores   */ @@ -25,6 +27,7 @@  #include <linux/cpu.h>  #include <linux/smp.h> +#include <asm/amd_nb.h>  #include <asm/apic.h>  #include <asm/idle.h>  #include <asm/mce.h> @@ -45,23 +48,15 @@  #define MASK_BLKPTR_LO    0xFF000000  #define MCG_XBLK_ADDR     0xC0000400 -struct threshold_block { -	unsigned int		block; -	unsigned int		bank; -	unsigned int		cpu; -	u32			address; -	u16			interrupt_enable; -	bool			interrupt_capable; -	u16			threshold_limit; -	struct kobject		kobj; -	struct list_head	miscj; +static const char * const th_names[] = { +	"load_store", +	"insn_fetch", +	"combined_unit", +	"", +	"northbridge", +	"execution_unit",  }; -struct threshold_bank { -	struct kobject		*kobj; -	struct threshold_block	*blocks; -	cpumask_var_t		cpus; -};  static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);  static unsigned char shared_bank[NR_BANKS] = { @@ -84,6 +79,26 @@ struct thresh_restart {  	u16			old_limit;  }; +static const char * const bank4_names(struct threshold_block *b) +{ +	switch (b->address) { +	/* MSR4_MISC0 */ +	case 0x00000413: +		return "dram"; + +	case 0xc0000408: +		return "ht_links"; + +	case 0xc0000409: +		return "l3_cache"; + +	default: +		WARN(1, "Funny MSR: 0x%08x\n", b->address); +		return ""; +	} +}; + +  static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)  {  	/* @@ -224,8 +239,6 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)  			if (!block)  				per_cpu(bank_map, cpu) |= (1 << bank); -			if (shared_bank[bank] && c->cpu_core_id) -				break;  			memset(&b, 0, sizeof(b));  			b.cpu			= cpu; @@ -326,7 +339,7 @@ struct threshold_attr {  #define SHOW_FIELDS(name)						\  static ssize_t show_ ## name(struct threshold_block *b, char *buf)	\  {									\ -	return sprintf(buf, "%lx\n", (unsigned long) b->name);		\ +	return sprintf(buf, "%lu\n", (unsigned long) b->name);		\  }  SHOW_FIELDS(interrupt_enable)  SHOW_FIELDS(threshold_limit) @@ -377,38 +390,21 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)  	return size;  } -struct threshold_block_cross_cpu { -	struct threshold_block	*tb; -	long			retval; -}; - -static void local_error_count_handler(void *_tbcc) -{ -	struct threshold_block_cross_cpu *tbcc = _tbcc; -	struct threshold_block *b = tbcc->tb; -	u32 low, high; - -	rdmsr(b->address, low, high); -	tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); -} -  static ssize_t show_error_count(struct threshold_block *b, char *buf)  { -	struct threshold_block_cross_cpu tbcc = { .tb = b, }; +	u32 lo, hi; -	smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1); -	return sprintf(buf, "%lx\n", tbcc.retval); -} +	rdmsr_on_cpu(b->cpu, b->address, &lo, &hi); -static ssize_t store_error_count(struct threshold_block *b, -				 const char *buf, size_t count) -{ -	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; - -	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); -	return 1; +	return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) - +				     (THRESHOLD_MAX - b->threshold_limit)));  } +static struct threshold_attr error_count = { +	.attr = {.name = __stringify(error_count), .mode = 0444 }, +	.show = show_error_count, +}; +  #define RW_ATTR(val)							\  static struct threshold_attr val = {					\  	.attr	= {.name = __stringify(val), .mode = 0644 },		\ @@ -418,7 +414,6 @@ static struct threshold_attr val = {					\  RW_ATTR(interrupt_enable);  RW_ATTR(threshold_limit); -RW_ATTR(error_count);  static struct attribute *default_attrs[] = {  	&threshold_limit.attr, @@ -517,7 +512,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu,  	err = kobject_init_and_add(&b->kobj, &threshold_ktype,  				   per_cpu(threshold_banks, cpu)[bank]->kobj, -				   "misc%i", block); +				   (bank == 4 ? bank4_names(b) : th_names[bank]));  	if (err)  		goto out_free;  recurse: @@ -548,98 +543,91 @@ out_free:  	return err;  } -static __cpuinit long -local_allocate_threshold_blocks(int cpu, unsigned int bank) +static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)  { -	return allocate_threshold_blocks(cpu, bank, 0, -					 MSR_IA32_MC0_MISC + bank * 4); +	struct list_head *head = &b->blocks->miscj; +	struct threshold_block *pos = NULL; +	struct threshold_block *tmp = NULL; +	int err = 0; + +	err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name); +	if (err) +		return err; + +	list_for_each_entry_safe(pos, tmp, head, miscj) { + +		err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name); +		if (err) { +			list_for_each_entry_safe_reverse(pos, tmp, head, miscj) +				kobject_del(&pos->kobj); + +			return err; +		} +	} +	return err;  } -/* symlinks sibling shared banks to first core.  first core owns dir/files. */  static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)  { -	int i, err = 0; -	struct threshold_bank *b = NULL;  	struct device *dev = per_cpu(mce_device, cpu); -	char name[32]; - -	sprintf(name, "threshold_bank%i", bank); +	struct amd_northbridge *nb = NULL; +	struct threshold_bank *b = NULL; +	const char *name = th_names[bank]; +	int err = 0; -#ifdef CONFIG_SMP -	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */ -		i = cpumask_first(cpu_llc_shared_mask(cpu)); +	if (shared_bank[bank]) { -		/* first core not up yet */ -		if (cpu_data(i).cpu_core_id) -			goto out; +		nb = node_to_amd_nb(amd_get_nb_id(cpu)); +		WARN_ON(!nb); -		/* already linked */ -		if (per_cpu(threshold_banks, cpu)[bank]) -			goto out; +		/* threshold descriptor already initialized on this node? */ +		if (nb->bank4) { +			/* yes, use it */ +			b = nb->bank4; +			err = kobject_add(b->kobj, &dev->kobj, name); +			if (err) +				goto out; -		b = per_cpu(threshold_banks, i)[bank]; +			per_cpu(threshold_banks, cpu)[bank] = b; +			atomic_inc(&b->cpus); -		if (!b) -			goto out; +			err = __threshold_add_blocks(b); -		err = sysfs_create_link(&dev->kobj, b->kobj, name); -		if (err)  			goto out; - -		cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); -		per_cpu(threshold_banks, cpu)[bank] = b; - -		goto out; +		}  	} -#endif  	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);  	if (!b) {  		err = -ENOMEM;  		goto out;  	} -	if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { -		kfree(b); -		err = -ENOMEM; -		goto out; -	}  	b->kobj = kobject_create_and_add(name, &dev->kobj); -	if (!b->kobj) +	if (!b->kobj) { +		err = -EINVAL;  		goto out_free; - -#ifndef CONFIG_SMP -	cpumask_setall(b->cpus); -#else -	cpumask_set_cpu(cpu, b->cpus); -#endif +	}  	per_cpu(threshold_banks, cpu)[bank] = b; -	err = local_allocate_threshold_blocks(cpu, bank); -	if (err) -		goto out_free; - -	for_each_cpu(i, b->cpus) { -		if (i == cpu) -			continue; +	if (shared_bank[bank]) { +		atomic_set(&b->cpus, 1); -		dev = per_cpu(mce_device, i); -		if (dev) -			err = sysfs_create_link(&dev->kobj,b->kobj, name); -		if (err) -			goto out; - -		per_cpu(threshold_banks, i)[bank] = b; +		/* nb is already initialized, see above */ +		WARN_ON(nb->bank4); +		nb->bank4 = b;  	} -	goto out; +	err = allocate_threshold_blocks(cpu, bank, 0, +					MSR_IA32_MC0_MISC + bank * 4); +	if (!err) +		goto out; -out_free: -	per_cpu(threshold_banks, cpu)[bank] = NULL; -	free_cpumask_var(b->cpus); + out_free:  	kfree(b); -out: + + out:  	return err;  } @@ -660,12 +648,6 @@ static __cpuinit int threshold_create_device(unsigned int cpu)  	return err;  } -/* - * let's be hotplug friendly. - * in case of multiple core processors, the first core always takes ownership - *   of shared sysfs dir/files, and rest of the cores will be symlinked to it. - */ -  static void deallocate_threshold_block(unsigned int cpu,  						 unsigned int bank)  { @@ -686,41 +668,42 @@ static void deallocate_threshold_block(unsigned int cpu,  	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;  } +static void __threshold_remove_blocks(struct threshold_bank *b) +{ +	struct threshold_block *pos = NULL; +	struct threshold_block *tmp = NULL; + +	kobject_del(b->kobj); + +	list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj) +		kobject_del(&pos->kobj); +} +  static void threshold_remove_bank(unsigned int cpu, int bank)  { +	struct amd_northbridge *nb;  	struct threshold_bank *b; -	struct device *dev; -	char name[32]; -	int i = 0;  	b = per_cpu(threshold_banks, cpu)[bank];  	if (!b)  		return; +  	if (!b->blocks)  		goto free_out; -	sprintf(name, "threshold_bank%i", bank); - -#ifdef CONFIG_SMP -	/* sibling symlink */ -	if (shared_bank[bank] && b->blocks->cpu != cpu) { -		dev = per_cpu(mce_device, cpu); -		sysfs_remove_link(&dev->kobj, name); -		per_cpu(threshold_banks, cpu)[bank] = NULL; - -		return; -	} -#endif - -	/* remove all sibling symlinks before unregistering */ -	for_each_cpu(i, b->cpus) { -		if (i == cpu) -			continue; - -		dev = per_cpu(mce_device, i); -		if (dev) -			sysfs_remove_link(&dev->kobj, name); -		per_cpu(threshold_banks, i)[bank] = NULL; +	if (shared_bank[bank]) { +		if (!atomic_dec_and_test(&b->cpus)) { +			__threshold_remove_blocks(b); +			per_cpu(threshold_banks, cpu)[bank] = NULL; +			return; +		} else { +			/* +			 * the last CPU on this node using the shared bank is +			 * going away, remove that bank now. +			 */ +			nb = node_to_amd_nb(amd_get_nb_id(cpu)); +			nb->bank4 = NULL; +		}  	}  	deallocate_threshold_block(cpu, bank); @@ -728,7 +711,6 @@ static void threshold_remove_bank(unsigned int cpu, int bank)  free_out:  	kobject_del(b->kobj);  	kobject_put(b->kobj); -	free_cpumask_var(b->cpus);  	kfree(b);  	per_cpu(threshold_banks, cpu)[bank] = NULL;  } diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index dfea390e160..c7b3fe2d72e 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/perl -w  #  # Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h  # @@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";  print OUT "#include <asm/cpufeature.h>\n\n";  print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; +%features = (); +$err = 0; +  while (defined($line = <IN>)) {  	if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {  		$macro = $1; -		$feature = $2; +		$feature = "\L$2";  		$tail = $3;  		if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) { -			$feature = $1; +			$feature = "\L$1";  		} -		if ($feature ne '') { -			printf OUT "\t%-32s = \"%s\",\n", -				"[$macro]", "\L$feature"; +		next if ($feature eq ''); + +		if ($features{$feature}++) { +			print STDERR "$in: duplicate feature name: $feature\n"; +			$err++;  		} +		printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;  	}  }  print OUT "};\n";  close(IN);  close(OUT); + +if ($err) { +	unlink($out); +	exit(1); +} + +exit(0); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index bdda2e6c673..35ffda5d072 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -258,11 +258,11 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,  		/* Compute the maximum size with which we can make a range: */  		if (range_startk) -			max_align = ffs(range_startk) - 1; +			max_align = __ffs(range_startk);  		else -			max_align = 32; +			max_align = BITS_PER_LONG - 1; -		align = fls(range_sizek) - 1; +		align = __fls(range_sizek);  		if (align > max_align)  			align = max_align; diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 75772ae6c65..e9fe907cd24 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -361,11 +361,7 @@ static void __init print_mtrr_state(void)  	}  	pr_debug("MTRR variable ranges %sabled:\n",  		 mtrr_state.enabled & 2 ? "en" : "dis"); -	if (size_or_mask & 0xffffffffUL) -		high_width = ffs(size_or_mask & 0xffffffffUL) - 1; -	else -		high_width = ffs(size_or_mask>>32) + 32 - 1; -	high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; +	high_width = (__ffs64(size_or_mask) - (32 - PAGE_SHIFT) + 3) / 4;  	for (i = 0; i < num_var_ranges; ++i) {  		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c4706cf9c01..29557aa06dd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -35,17 +35,6 @@  #include "perf_event.h" -#if 0 -#undef wrmsrl -#define wrmsrl(msr, val) 					\ -do {								\ -	trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ -			(unsigned long)(val));			\ -	native_write_msr((msr), (u32)((u64)(val)), 		\ -			(u32)((u64)(val) >> 32));		\ -} while (0) -#endif -  struct x86_pmu x86_pmu __read_mostly;  DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { @@ -74,7 +63,7 @@ u64 x86_perf_event_update(struct perf_event *event)  	int idx = hwc->idx;  	s64 delta; -	if (idx == X86_PMC_IDX_FIXED_BTS) +	if (idx == INTEL_PMC_IDX_FIXED_BTS)  		return 0;  	/* @@ -86,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)  	 */  again:  	prev_raw_count = local64_read(&hwc->prev_count); -	rdmsrl(hwc->event_base, new_raw_count); +	rdpmcl(hwc->event_base_rdpmc, new_raw_count);  	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,  					new_raw_count) != prev_raw_count) @@ -189,7 +178,7 @@ static void release_pmc_hardware(void) {}  static bool check_hw_exists(void)  { -	u64 val, val_new = 0; +	u64 val, val_new = ~0;  	int i, reg, ret = 0;  	/* @@ -222,8 +211,9 @@ static bool check_hw_exists(void)  	 * that don't trap on the MSR access and always return 0s.  	 */  	val = 0xabcdUL; -	ret = checking_wrmsrl(x86_pmu_event_addr(0), val); -	ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); +	reg = x86_pmu_event_addr(0); +	ret = wrmsrl_safe(reg, val); +	ret |= rdmsrl_safe(reg, &val_new);  	if (ret || val != val_new)  		goto msr_fail; @@ -240,6 +230,7 @@ bios_fail:  msr_fail:  	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); +	printk(KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);  	return false;  } @@ -388,7 +379,7 @@ int x86_pmu_hw_config(struct perf_event *event)  		int precise = 0;  		/* Support for constant skid */ -		if (x86_pmu.pebs_active) { +		if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {  			precise++;  			/* Support for IP fixup */ @@ -637,8 +628,8 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  	c = sched->constraints[sched->state.event];  	/* Prefer fixed purpose counters */ -	if (x86_pmu.num_counters_fixed) { -		idx = X86_PMC_IDX_FIXED; +	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { +		idx = INTEL_PMC_IDX_FIXED;  		for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {  			if (!__test_and_set_bit(idx, sched->state.used))  				goto done; @@ -646,7 +637,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  	}  	/* Grab the first unused counter starting with idx */  	idx = sched->state.counter; -	for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) { +	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) {  		if (!__test_and_set_bit(idx, sched->state.used))  			goto done;  	} @@ -704,8 +695,8 @@ static bool perf_sched_next_event(struct perf_sched *sched)  /*   * Assign a counter for each event.   */ -static int perf_assign_events(struct event_constraint **constraints, int n, -			      int wmin, int wmax, int *assign) +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int *assign)  {  	struct perf_sched sched; @@ -824,15 +815,17 @@ static inline void x86_assign_hw_event(struct perf_event *event,  	hwc->last_cpu = smp_processor_id();  	hwc->last_tag = ++cpuc->tags[i]; -	if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { +	if (hwc->idx == INTEL_PMC_IDX_FIXED_BTS) {  		hwc->config_base = 0;  		hwc->event_base	= 0; -	} else if (hwc->idx >= X86_PMC_IDX_FIXED) { +	} else if (hwc->idx >= INTEL_PMC_IDX_FIXED) {  		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; -		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED); +		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - INTEL_PMC_IDX_FIXED); +		hwc->event_base_rdpmc = (hwc->idx - INTEL_PMC_IDX_FIXED) | 1<<30;  	} else {  		hwc->config_base = x86_pmu_config_addr(hwc->idx);  		hwc->event_base  = x86_pmu_event_addr(hwc->idx); +		hwc->event_base_rdpmc = hwc->idx;  	}  } @@ -930,7 +923,7 @@ int x86_perf_event_set_period(struct perf_event *event)  	s64 period = hwc->sample_period;  	int ret = 0, idx = hwc->idx; -	if (idx == X86_PMC_IDX_FIXED_BTS) +	if (idx == INTEL_PMC_IDX_FIXED_BTS)  		return 0;  	/* @@ -1316,7 +1309,6 @@ static struct attribute_group x86_pmu_format_group = {  static int __init init_hw_perf_events(void)  {  	struct x86_pmu_quirk *quirk; -	struct event_constraint *c;  	int err;  	pr_info("Performance Events: "); @@ -1347,21 +1339,8 @@ static int __init init_hw_perf_events(void)  	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)  		quirk->func(); -	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { -		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", -		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC); -		x86_pmu.num_counters = X86_PMC_MAX_GENERIC; -	} -	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - -	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { -		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", -		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); -		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; -	} - -	x86_pmu.intel_ctrl |= -		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; +	if (!x86_pmu.intel_ctrl) +		x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;  	perf_events_lapic_init();  	register_nmi_handler(NMI_LOCAL, perf_event_nmi_handler, 0, "PMI"); @@ -1370,22 +1349,6 @@ static int __init init_hw_perf_events(void)  		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,  				   0, x86_pmu.num_counters, 0); -	if (x86_pmu.event_constraints) { -		/* -		 * event on fixed counter2 (REF_CYCLES) only works on this -		 * counter, so do not extend mask to generic counters -		 */ -		for_each_event_constraint(c, x86_pmu.event_constraints) { -			if (c->cmask != X86_RAW_EVENT_MASK -			    || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { -				continue; -			} - -			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; -			c->weight += x86_pmu.num_counters; -		} -	} -  	x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */  	x86_pmu_format_group.attrs = x86_pmu.format_attrs; @@ -1620,8 +1583,8 @@ static int x86_pmu_event_idx(struct perf_event *event)  	if (!x86_pmu.attr_rdpmc)  		return 0; -	if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { -		idx -= X86_PMC_IDX_FIXED; +	if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { +		idx -= INTEL_PMC_IDX_FIXED;  		idx |= 1 << 30;  	} @@ -1649,7 +1612,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev,  			      struct device_attribute *attr,  			      const char *buf, size_t count)  { -	unsigned long val = simple_strtoul(buf, NULL, 0); +	unsigned long val; +	ssize_t ret; + +	ret = kstrtoul(buf, 0, &val); +	if (ret) +		return ret;  	if (!!val != !!x86_pmu.attr_rdpmc) {  		x86_pmu.attr_rdpmc = !!val; @@ -1682,13 +1650,20 @@ static void x86_pmu_flush_branch_stack(void)  		x86_pmu.flush_branch_stack();  } +void perf_check_microcode(void) +{ +	if (x86_pmu.check_microcode) +		x86_pmu.check_microcode(); +} +EXPORT_SYMBOL_GPL(perf_check_microcode); +  static struct pmu pmu = {  	.pmu_enable		= x86_pmu_enable,  	.pmu_disable		= x86_pmu_disable, -	.attr_groups	= x86_pmu_attr_groups, +	.attr_groups		= x86_pmu_attr_groups, -	.event_init	= x86_pmu_event_init, +	.event_init		= x86_pmu_event_init,  	.add			= x86_pmu_add,  	.del			= x86_pmu_del, @@ -1696,11 +1671,11 @@ static struct pmu pmu = {  	.stop			= x86_pmu_stop,  	.read			= x86_pmu_read, -	.start_txn	= x86_pmu_start_txn, -	.cancel_txn	= x86_pmu_cancel_txn, -	.commit_txn	= x86_pmu_commit_txn, +	.start_txn		= x86_pmu_start_txn, +	.cancel_txn		= x86_pmu_cancel_txn, +	.commit_txn		= x86_pmu_commit_txn, -	.event_idx	= x86_pmu_event_idx, +	.event_idx		= x86_pmu_event_idx,  	.flush_branch_stack	= x86_pmu_flush_branch_stack,  }; @@ -1863,7 +1838,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)  		else  			misc |= PERF_RECORD_MISC_GUEST_KERNEL;  	} else { -		if (user_mode(regs)) +		if (!kernel_ip(regs->ip))  			misc |= PERF_RECORD_MISC_USER;  		else  			misc |= PERF_RECORD_MISC_KERNEL; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7241e2fc3c1..a15df4be151 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -14,6 +14,18 @@  #include <linux/perf_event.h> +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) 						\ +do {									\ +	unsigned int _msr = (msr);					\ +	u64 _val = (val);						\ +	trace_printk("wrmsrl(%x, %Lx)\n", (unsigned int)(_msr),		\ +			(unsigned long long)(_val));			\ +	native_write_msr((_msr), (u32)(_val), (u32)(_val >> 32));	\ +} while (0) +#endif +  /*   *          |   NHM/WSM    |      SNB     |   * register ------------------------------- @@ -57,7 +69,7 @@ struct amd_nb {  };  /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS		4 +#define MAX_PEBS_EVENTS		8  /*   * A debug store configuration. @@ -349,6 +361,8 @@ struct x86_pmu {  	void		(*cpu_starting)(int cpu);  	void		(*cpu_dying)(int cpu);  	void		(*cpu_dead)(int cpu); + +	void		(*check_microcode)(void);  	void		(*flush_branch_stack)(void);  	/* @@ -360,12 +374,16 @@ struct x86_pmu {  	/*  	 * Intel DebugStore bits  	 */ -	int		bts, pebs; -	int		bts_active, pebs_active; +	int		bts		:1, +			bts_active	:1, +			pebs		:1, +			pebs_active	:1, +			pebs_broken	:1;  	int		pebs_record_size;  	void		(*drain_pebs)(struct pt_regs *regs);  	struct event_constraint *pebs_constraints;  	void		(*pebs_aliases)(struct perf_event *event); +	int 		max_pebs_events;  	/*  	 * Intel LBR @@ -468,6 +486,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,  void x86_pmu_enable_all(int added); +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int *assign);  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);  void x86_pmu_stop(struct perf_event *event, int flags); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 11a4eb9131d..4528ae7b6ec 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -366,7 +366,7 @@ static void amd_pmu_cpu_starting(int cpu)  	cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; -	if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) +	if (boot_cpu_data.x86_max_cores < 2)  		return;  	nb_id = amd_get_nb_id(cpu); @@ -422,35 +422,6 @@ static struct attribute *amd_format_attr[] = {  	NULL,  }; -static __initconst const struct x86_pmu amd_pmu = { -	.name			= "AMD", -	.handle_irq		= x86_pmu_handle_irq, -	.disable_all		= x86_pmu_disable_all, -	.enable_all		= x86_pmu_enable_all, -	.enable			= x86_pmu_enable_event, -	.disable		= x86_pmu_disable_event, -	.hw_config		= amd_pmu_hw_config, -	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_K7_EVNTSEL0, -	.perfctr		= MSR_K7_PERFCTR0, -	.event_map		= amd_pmu_event_map, -	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), -	.num_counters		= AMD64_NUM_COUNTERS, -	.cntval_bits		= 48, -	.cntval_mask		= (1ULL << 48) - 1, -	.apic			= 1, -	/* use highest bit to detect overflow */ -	.max_period		= (1ULL << 47) - 1, -	.get_event_constraints	= amd_get_event_constraints, -	.put_event_constraints	= amd_put_event_constraints, - -	.format_attrs		= amd_format_attr, - -	.cpu_prepare		= amd_pmu_cpu_prepare, -	.cpu_starting		= amd_pmu_cpu_starting, -	.cpu_dead		= amd_pmu_cpu_dead, -}; -  /* AMD Family 15h */  #define AMD_EVENT_TYPE_MASK	0x000000F0ULL @@ -597,8 +568,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev  	}  } -static __initconst const struct x86_pmu amd_pmu_f15h = { -	.name			= "AMD Family 15h", +static __initconst const struct x86_pmu amd_pmu = { +	.name			= "AMD",  	.handle_irq		= x86_pmu_handle_irq,  	.disable_all		= x86_pmu_disable_all,  	.enable_all		= x86_pmu_enable_all, @@ -606,50 +577,68 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {  	.disable		= x86_pmu_disable_event,  	.hw_config		= amd_pmu_hw_config,  	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_F15H_PERF_CTL, -	.perfctr		= MSR_F15H_PERF_CTR, +	.eventsel		= MSR_K7_EVNTSEL0, +	.perfctr		= MSR_K7_PERFCTR0,  	.event_map		= amd_pmu_event_map,  	.max_events		= ARRAY_SIZE(amd_perfmon_event_map), -	.num_counters		= AMD64_NUM_COUNTERS_F15H, +	.num_counters		= AMD64_NUM_COUNTERS,  	.cntval_bits		= 48,  	.cntval_mask		= (1ULL << 48) - 1,  	.apic			= 1,  	/* use highest bit to detect overflow */  	.max_period		= (1ULL << 47) - 1, -	.get_event_constraints	= amd_get_event_constraints_f15h, -	/* nortbridge counters not yet implemented: */ -#if 0 +	.get_event_constraints	= amd_get_event_constraints,  	.put_event_constraints	= amd_put_event_constraints, +	.format_attrs		= amd_format_attr, +  	.cpu_prepare		= amd_pmu_cpu_prepare, -	.cpu_dead		= amd_pmu_cpu_dead, -#endif  	.cpu_starting		= amd_pmu_cpu_starting, -	.format_attrs		= amd_format_attr, +	.cpu_dead		= amd_pmu_cpu_dead,  }; +static int setup_event_constraints(void) +{ +	if (boot_cpu_data.x86 >= 0x15) +		x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; +	return 0; +} + +static int setup_perfctr_core(void) +{ +	if (!cpu_has_perfctr_core) { +		WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, +		     KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); +		return -ENODEV; +	} + +	WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, +	     KERN_ERR "hw perf events core counters need constraints handler!"); + +	/* +	 * If core performance counter extensions exists, we must use +	 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also +	 * x86_pmu_addr_offset(). +	 */ +	x86_pmu.eventsel	= MSR_F15H_PERF_CTL; +	x86_pmu.perfctr		= MSR_F15H_PERF_CTR; +	x86_pmu.num_counters	= AMD64_NUM_COUNTERS_CORE; + +	printk(KERN_INFO "perf: AMD core performance counters detected\n"); + +	return 0; +} +  __init int amd_pmu_init(void)  {  	/* Performance-monitoring supported from K7 and later: */  	if (boot_cpu_data.x86 < 6)  		return -ENODEV; -	/* -	 * If core performance counter extensions exists, it must be -	 * family 15h, otherwise fail. See x86_pmu_addr_offset(). -	 */ -	switch (boot_cpu_data.x86) { -	case 0x15: -		if (!cpu_has_perfctr_core) -			return -ENODEV; -		x86_pmu = amd_pmu_f15h; -		break; -	default: -		if (cpu_has_perfctr_core) -			return -ENODEV; -		x86_pmu = amd_pmu; -		break; -	} +	x86_pmu = amd_pmu; + +	setup_event_constraints(); +	setup_perfctr_core();  	/* Events are common for all AMDs */  	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 187c294bc65..7a8b9d0abca 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -5,6 +5,8 @@   * among events on a single PMU.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/stddef.h>  #include <linux/types.h>  #include <linux/init.h> @@ -21,14 +23,14 @@   */  static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =  { -  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c, -  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0, -  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e, -  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e, -  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4, -  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5, -  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c, -  [PERF_COUNT_HW_REF_CPU_CYCLES]	= 0x0300, /* pseudo-encoding */ +	[PERF_COUNT_HW_CPU_CYCLES]		= 0x003c, +	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0, +	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e, +	[PERF_COUNT_HW_CACHE_MISSES]		= 0x412e, +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4, +	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5, +	[PERF_COUNT_HW_BUS_CYCLES]		= 0x013c, +	[PERF_COUNT_HW_REF_CPU_CYCLES]		= 0x0300, /* pseudo-encoding */  };  static struct event_constraint intel_core_event_constraints[] __read_mostly = @@ -747,7 +749,7 @@ static void intel_pmu_disable_all(void)  	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); -	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) +	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))  		intel_pmu_disable_bts();  	intel_pmu_pebs_disable_all(); @@ -763,9 +765,9 @@ static void intel_pmu_enable_all(int added)  	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,  			x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask); -	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { +	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {  		struct perf_event *event = -			cpuc->events[X86_PMC_IDX_FIXED_BTS]; +			cpuc->events[INTEL_PMC_IDX_FIXED_BTS];  		if (WARN_ON_ONCE(!event))  			return; @@ -871,7 +873,7 @@ static inline void intel_pmu_ack_status(u64 ack)  static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)  { -	int idx = hwc->idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;  	u64 ctrl_val, mask;  	mask = 0xfULL << (idx * 4); @@ -886,7 +888,7 @@ static void intel_pmu_disable_event(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { +	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {  		intel_pmu_disable_bts();  		intel_pmu_drain_bts_buffer();  		return; @@ -915,7 +917,7 @@ static void intel_pmu_disable_event(struct perf_event *event)  static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)  { -	int idx = hwc->idx - X86_PMC_IDX_FIXED; +	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;  	u64 ctrl_val, bits, mask;  	/* @@ -949,7 +951,7 @@ static void intel_pmu_enable_event(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { +	if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {  		if (!__this_cpu_read(cpu_hw_events.enabled))  			return; @@ -1000,14 +1002,14 @@ static void intel_pmu_reset(void)  	local_irq_save(flags); -	printk("clearing PMU state on CPU#%d\n", smp_processor_id()); +	pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());  	for (idx = 0; idx < x86_pmu.num_counters; idx++) { -		checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); -		checking_wrmsrl(x86_pmu_event_addr(idx),  0ull); +		wrmsrl_safe(x86_pmu_config_addr(idx), 0ull); +		wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);  	}  	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) -		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); +		wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);  	if (ds)  		ds->bts_index = ds->bts_buffer_base; @@ -1707,16 +1709,61 @@ static __init void intel_clovertown_quirk(void)  	 * But taken together it might just make sense to not enable PEBS on  	 * these chips.  	 */ -	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); +	pr_warn("PEBS disabled due to CPU errata\n");  	x86_pmu.pebs = 0;  	x86_pmu.pebs_constraints = NULL;  } +static int intel_snb_pebs_broken(int cpu) +{ +	u32 rev = UINT_MAX; /* default to broken for unknown models */ + +	switch (cpu_data(cpu).x86_model) { +	case 42: /* SNB */ +		rev = 0x28; +		break; + +	case 45: /* SNB-EP */ +		switch (cpu_data(cpu).x86_mask) { +		case 6: rev = 0x618; break; +		case 7: rev = 0x70c; break; +		} +	} + +	return (cpu_data(cpu).microcode < rev); +} + +static void intel_snb_check_microcode(void) +{ +	int pebs_broken = 0; +	int cpu; + +	get_online_cpus(); +	for_each_online_cpu(cpu) { +		if ((pebs_broken = intel_snb_pebs_broken(cpu))) +			break; +	} +	put_online_cpus(); + +	if (pebs_broken == x86_pmu.pebs_broken) +		return; + +	/* +	 * Serialized by the microcode lock.. +	 */ +	if (x86_pmu.pebs_broken) { +		pr_info("PEBS enabled due to microcode update\n"); +		x86_pmu.pebs_broken = 0; +	} else { +		pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n"); +		x86_pmu.pebs_broken = 1; +	} +} +  static __init void intel_sandybridge_quirk(void)  { -	printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); -	x86_pmu.pebs = 0; -	x86_pmu.pebs_constraints = NULL; +	x86_pmu.check_microcode = intel_snb_check_microcode; +	intel_snb_check_microcode();  }  static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { @@ -1736,8 +1783,8 @@ static __init void intel_arch_events_quirk(void)  	/* disable event that reported as not presend by cpuid */  	for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {  		intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; -		printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", -				intel_arch_events_map[bit].name); +		pr_warn("CPUID marked event: \'%s\' unavailable\n", +			intel_arch_events_map[bit].name);  	}  } @@ -1756,7 +1803,7 @@ static __init void intel_nehalem_quirk(void)  		intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;  		ebx.split.no_branch_misses_retired = 0;  		x86_pmu.events_maskl = ebx.full; -		printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); +		pr_info("CPU erratum AAJ80 worked around\n");  	}  } @@ -1765,6 +1812,7 @@ __init int intel_pmu_init(void)  	union cpuid10_edx edx;  	union cpuid10_eax eax;  	union cpuid10_ebx ebx; +	struct event_constraint *c;  	unsigned int unused;  	int version; @@ -1800,6 +1848,8 @@ __init int intel_pmu_init(void)  	x86_pmu.events_maskl		= ebx.full;  	x86_pmu.events_mask_len		= eax.split.mask_length; +	x86_pmu.max_pebs_events		= min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters); +  	/*  	 * Quirk: v2 perfmon does not report fixed-purpose events, so  	 * assume at least 3 events: @@ -1951,5 +2001,37 @@ __init int intel_pmu_init(void)  		}  	} +	if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) { +		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", +		     x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC); +		x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC; +	} +	x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + +	if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) { +		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", +		     x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED); +		x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; +	} + +	x86_pmu.intel_ctrl |= +		((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + +	if (x86_pmu.event_constraints) { +		/* +		 * event on fixed counter2 (REF_CYCLES) only works on this +		 * counter, so do not extend mask to generic counters +		 */ +		for_each_event_constraint(c, x86_pmu.event_constraints) { +			if (c->cmask != X86_RAW_EVENT_MASK +			    || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { +				continue; +			} + +			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; +			c->weight += x86_pmu.num_counters; +		} +	} +  	return 0;  } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 35e2192df9f..629ae0b7ad9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -248,7 +248,7 @@ void reserve_ds_buffers(void)   */  struct event_constraint bts_constraint = -	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); +	EVENT_CONSTRAINT(0, 1ULL << INTEL_PMC_IDX_FIXED_BTS, 0);  void intel_pmu_enable_bts(u64 config)  { @@ -295,7 +295,7 @@ int intel_pmu_drain_bts_buffer(void)  		u64	to;  		u64	flags;  	}; -	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; +	struct perf_event *event = cpuc->events[INTEL_PMC_IDX_FIXED_BTS];  	struct bts_record *at, *top;  	struct perf_output_handle handle;  	struct perf_event_header header; @@ -620,7 +620,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)  	 * Should not happen, we program the threshold at 1 and do not  	 * set a reset value.  	 */ -	WARN_ON_ONCE(n > 1); +	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);  	at += n - 1;  	__intel_pmu_pebs_event(event, iregs, at); @@ -651,10 +651,10 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  	 * Should not happen, we program the threshold at 1 and do not  	 * set a reset value.  	 */ -	WARN_ON_ONCE(n > MAX_PEBS_EVENTS); +	WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);  	for ( ; at < top; at++) { -		for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { +		for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {  			event = cpuc->events[bit];  			if (!test_bit(bit, cpuc->active_mask))  				continue; @@ -670,7 +670,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)  			break;  		} -		if (!event || bit >= MAX_PEBS_EVENTS) +		if (!event || bit >= x86_pmu.max_pebs_events)  			continue;  		__intel_pmu_pebs_event(event, iregs, at); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c new file mode 100644 index 00000000000..19faffc6088 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -0,0 +1,1850 @@ +#include "perf_event_intel_uncore.h" + +static struct intel_uncore_type *empty_uncore[] = { NULL, }; +static struct intel_uncore_type **msr_uncores = empty_uncore; +static struct intel_uncore_type **pci_uncores = empty_uncore; +/* pci bus to socket mapping */ +static int pcibus_to_physid[256] = { [0 ... 255] = -1, }; + +static DEFINE_RAW_SPINLOCK(uncore_box_lock); + +/* mask of cpus that collect uncore events */ +static cpumask_t uncore_cpu_mask; + +/* constraint for the fixed counter */ +static struct event_constraint constraint_fixed = +	EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL); +static struct event_constraint constraint_empty = +	EVENT_CONSTRAINT(0, 0, 0); + +DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); +DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19"); +DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); +DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(cmask8, cmask, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31"); +DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28"); +DEFINE_UNCORE_FORMAT_ATTR(occ_sel, occ_sel, "config:14-15"); +DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30"); +DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51"); +DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4"); +DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17"); +DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22"); +DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand0, filter_brand0, "config1:0-7"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand1, filter_brand1, "config1:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand2, filter_brand2, "config1:16-23"); +DEFINE_UNCORE_FORMAT_ATTR(filter_brand3, filter_brand3, "config1:24-31"); + +/* Sandy Bridge-EP uncore support */ +static struct intel_uncore_type snbep_uncore_cbox; +static struct intel_uncore_type snbep_uncore_pcu; + +static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	int box_ctl = uncore_pci_box_ctl(box); +	u32 config; + +	pci_read_config_dword(pdev, box_ctl, &config); +	config |= SNBEP_PMON_BOX_CTL_FRZ; +	pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	int box_ctl = uncore_pci_box_ctl(box); +	u32 config; + +	pci_read_config_dword(pdev, box_ctl, &config); +	config &= ~SNBEP_PMON_BOX_CTL_FRZ; +	pci_write_config_dword(pdev, box_ctl, config); +} + +static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; + +	pci_write_config_dword(pdev, hwc->config_base, hwc->config | +				SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_pci_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; + +	pci_write_config_dword(pdev, hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct pci_dev *pdev = box->pci_dev; +	struct hw_perf_event *hwc = &event->hw; +	u64 count; + +	pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); +	pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); +	return count; +} + +static void snbep_uncore_pci_init_box(struct intel_uncore_box *box) +{ +	struct pci_dev *pdev = box->pci_dev; +	pci_write_config_dword(pdev, SNBEP_PCI_PMON_BOX_CTL, +				SNBEP_PMON_BOX_CTL_INT); +} + +static void snbep_uncore_msr_disable_box(struct intel_uncore_box *box) +{ +	u64 config; +	unsigned msr; + +	msr = uncore_msr_box_ctl(box); +	if (msr) { +		rdmsrl(msr, config); +		config |= SNBEP_PMON_BOX_CTL_FRZ; +		wrmsrl(msr, config); +		return; +	} +} + +static void snbep_uncore_msr_enable_box(struct intel_uncore_box *box) +{ +	u64 config; +	unsigned msr; + +	msr = uncore_msr_box_ctl(box); +	if (msr) { +		rdmsrl(msr, config); +		config &= ~SNBEP_PMON_BOX_CTL_FRZ; +		wrmsrl(msr, config); +		return; +	} +} + +static void snbep_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + +	if (reg1->idx != EXTRA_REG_NONE) +		wrmsrl(reg1->reg, reg1->config); + +	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN); +} + +static void snbep_uncore_msr_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	wrmsrl(hwc->config_base, hwc->config); +} + +static u64 snbep_uncore_msr_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	u64 count; + +	rdmsrl(hwc->event_base, count); +	return count; +} + +static void snbep_uncore_msr_init_box(struct intel_uncore_box *box) +{ +	unsigned msr = uncore_msr_box_ctl(box); +	if (msr) +		wrmsrl(msr, SNBEP_PMON_BOX_CTL_INT); +} + +static struct event_constraint * +snbep_uncore_get_constraint(struct intel_uncore_box *box, +			    struct perf_event *event) +{ +	struct intel_uncore_extra_reg *er; +	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; +	unsigned long flags; +	bool ok = false; + +	if (reg1->idx == EXTRA_REG_NONE || (box->phys_id >= 0 && reg1->alloc)) +		return NULL; + +	er = &box->shared_regs[reg1->idx]; +	raw_spin_lock_irqsave(&er->lock, flags); +	if (!atomic_read(&er->ref) || er->config1 == reg1->config) { +		atomic_inc(&er->ref); +		er->config1 = reg1->config; +		ok = true; +	} +	raw_spin_unlock_irqrestore(&er->lock, flags); + +	if (ok) { +		if (box->phys_id >= 0) +			reg1->alloc = 1; +		return NULL; +	} +	return &constraint_empty; +} + +static void snbep_uncore_put_constraint(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct intel_uncore_extra_reg *er; +	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; + +	if (box->phys_id < 0 || !reg1->alloc) +		return; + +	er = &box->shared_regs[reg1->idx]; +	atomic_dec(&er->ref); +	reg1->alloc = 0; +} + +static int snbep_uncore_hw_config(struct intel_uncore_box *box, +				  struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	struct hw_perf_event_extra *reg1 = &hwc->extra_reg; + +	if (box->pmu->type == &snbep_uncore_cbox) { +		reg1->reg = SNBEP_C0_MSR_PMON_BOX_FILTER + +			SNBEP_CBO_MSR_OFFSET * box->pmu->pmu_idx; +		reg1->config = event->attr.config1 & +			SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK; +	} else if (box->pmu->type == &snbep_uncore_pcu) { +		reg1->reg = SNBEP_PCU_MSR_PMON_BOX_FILTER; +		reg1->config = event->attr.config1 & +			SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK; +	} else { +		return 0; +	} +	reg1->idx = 0; +	return 0; +} + +static struct attribute *snbep_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh8.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_ubox_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh5.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_cbox_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_tid_en.attr, +	&format_attr_inv.attr, +	&format_attr_thresh8.attr, +	&format_attr_filter_tid.attr, +	&format_attr_filter_nid.attr, +	&format_attr_filter_state.attr, +	&format_attr_filter_opc.attr, +	NULL, +}; + +static struct attribute *snbep_uncore_pcu_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_occ_sel.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_thresh5.attr, +	&format_attr_occ_invert.attr, +	&format_attr_occ_edge.attr, +	&format_attr_filter_brand0.attr, +	&format_attr_filter_brand1.attr, +	&format_attr_filter_brand2.attr, +	&format_attr_filter_brand3.attr, +	NULL, +}; + +static struct uncore_event_desc snbep_uncore_imc_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,      "event=0xff,umask=0x00"), +	INTEL_UNCORE_EVENT_DESC(cas_count_read,  "event=0x04,umask=0x03"), +	INTEL_UNCORE_EVENT_DESC(cas_count_write, "event=0x04,umask=0x0c"), +	{ /* end: all zeroes */ }, +}; + +static struct uncore_event_desc snbep_uncore_qpi_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"), +	INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"), +	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x02,umask=0x08"), +	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x03,umask=0x04"), +	{ /* end: all zeroes */ }, +}; + +static struct attribute_group snbep_uncore_format_group = { +	.name = "format", +	.attrs = snbep_uncore_formats_attr, +}; + +static struct attribute_group snbep_uncore_ubox_format_group = { +	.name = "format", +	.attrs = snbep_uncore_ubox_formats_attr, +}; + +static struct attribute_group snbep_uncore_cbox_format_group = { +	.name = "format", +	.attrs = snbep_uncore_cbox_formats_attr, +}; + +static struct attribute_group snbep_uncore_pcu_format_group = { +	.name = "format", +	.attrs = snbep_uncore_pcu_formats_attr, +}; + +static struct intel_uncore_ops snbep_uncore_msr_ops = { +	.init_box	= snbep_uncore_msr_init_box, +	.disable_box	= snbep_uncore_msr_disable_box, +	.enable_box	= snbep_uncore_msr_enable_box, +	.disable_event	= snbep_uncore_msr_disable_event, +	.enable_event	= snbep_uncore_msr_enable_event, +	.read_counter	= snbep_uncore_msr_read_counter, +	.get_constraint = snbep_uncore_get_constraint, +	.put_constraint = snbep_uncore_put_constraint, +	.hw_config	= snbep_uncore_hw_config, +}; + +static struct intel_uncore_ops snbep_uncore_pci_ops = { +	.init_box	= snbep_uncore_pci_init_box, +	.disable_box	= snbep_uncore_pci_disable_box, +	.enable_box	= snbep_uncore_pci_enable_box, +	.disable_event	= snbep_uncore_pci_disable_event, +	.enable_event	= snbep_uncore_pci_enable_event, +	.read_counter	= snbep_uncore_pci_read_counter, +}; + +static struct event_constraint snbep_uncore_cbox_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x01, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x02, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x04, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x05, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x07, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x13, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x1b, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1c, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1d, 0xc), +	UNCORE_EVENT_CONSTRAINT(0x1e, 0xc), +	EVENT_CONSTRAINT_OVERLAP(0x1f, 0xe, 0xff), +	UNCORE_EVENT_CONSTRAINT(0x21, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x31, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x35, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x36, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x37, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x38, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x39, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x3b, 0x1), +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r2pcie_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x10, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x24, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x25, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x26, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint snbep_uncore_r3qpi_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x10, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x11, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x12, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x13, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x20, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x21, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x22, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x23, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x24, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x25, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x26, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x30, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x31, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x32, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x33, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x34, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x36, 0x3), +	UNCORE_EVENT_CONSTRAINT(0x37, 0x3), +	EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snbep_uncore_ubox = { +	.name		= "ubox", +	.num_counters   = 2, +	.num_boxes	= 1, +	.perf_ctr_bits	= 44, +	.fixed_ctr_bits	= 48, +	.perf_ctr	= SNBEP_U_MSR_PMON_CTR0, +	.event_ctl	= SNBEP_U_MSR_PMON_CTL0, +	.event_mask	= SNBEP_U_MSR_PMON_RAW_EVENT_MASK, +	.fixed_ctr	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTR, +	.fixed_ctl	= SNBEP_U_MSR_PMON_UCLK_FIXED_CTL, +	.ops		= &snbep_uncore_msr_ops, +	.format_group	= &snbep_uncore_ubox_format_group, +}; + +static struct intel_uncore_type snbep_uncore_cbox = { +	.name			= "cbox", +	.num_counters		= 4, +	.num_boxes		= 8, +	.perf_ctr_bits		= 44, +	.event_ctl		= SNBEP_C0_MSR_PMON_CTL0, +	.perf_ctr		= SNBEP_C0_MSR_PMON_CTR0, +	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, +	.box_ctl		= SNBEP_C0_MSR_PMON_BOX_CTL, +	.msr_offset		= SNBEP_CBO_MSR_OFFSET, +	.num_shared_regs	= 1, +	.constraints		= snbep_uncore_cbox_constraints, +	.ops			= &snbep_uncore_msr_ops, +	.format_group		= &snbep_uncore_cbox_format_group, +}; + +static struct intel_uncore_type snbep_uncore_pcu = { +	.name			= "pcu", +	.num_counters		= 4, +	.num_boxes		= 1, +	.perf_ctr_bits		= 48, +	.perf_ctr		= SNBEP_PCU_MSR_PMON_CTR0, +	.event_ctl		= SNBEP_PCU_MSR_PMON_CTL0, +	.event_mask		= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK, +	.box_ctl		= SNBEP_PCU_MSR_PMON_BOX_CTL, +	.num_shared_regs	= 1, +	.ops			= &snbep_uncore_msr_ops, +	.format_group		= &snbep_uncore_pcu_format_group, +}; + +static struct intel_uncore_type *snbep_msr_uncores[] = { +	&snbep_uncore_ubox, +	&snbep_uncore_cbox, +	&snbep_uncore_pcu, +	NULL, +}; + +#define SNBEP_UNCORE_PCI_COMMON_INIT()				\ +	.perf_ctr	= SNBEP_PCI_PMON_CTR0,			\ +	.event_ctl	= SNBEP_PCI_PMON_CTL0,			\ +	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,		\ +	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,		\ +	.ops		= &snbep_uncore_pci_ops,		\ +	.format_group	= &snbep_uncore_format_group + +static struct intel_uncore_type snbep_uncore_ha = { +	.name		= "ha", +	.num_counters   = 4, +	.num_boxes	= 1, +	.perf_ctr_bits	= 48, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_imc = { +	.name		= "imc", +	.num_counters   = 4, +	.num_boxes	= 4, +	.perf_ctr_bits	= 48, +	.fixed_ctr_bits	= 48, +	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR, +	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL, +	.event_descs	= snbep_uncore_imc_events, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_qpi = { +	.name		= "qpi", +	.num_counters   = 4, +	.num_boxes	= 2, +	.perf_ctr_bits	= 48, +	.event_descs	= snbep_uncore_qpi_events, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + + +static struct intel_uncore_type snbep_uncore_r2pcie = { +	.name		= "r2pcie", +	.num_counters   = 4, +	.num_boxes	= 1, +	.perf_ctr_bits	= 44, +	.constraints	= snbep_uncore_r2pcie_constraints, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type snbep_uncore_r3qpi = { +	.name		= "r3qpi", +	.num_counters   = 3, +	.num_boxes	= 2, +	.perf_ctr_bits	= 44, +	.constraints	= snbep_uncore_r3qpi_constraints, +	SNBEP_UNCORE_PCI_COMMON_INIT(), +}; + +static struct intel_uncore_type *snbep_pci_uncores[] = { +	&snbep_uncore_ha, +	&snbep_uncore_imc, +	&snbep_uncore_qpi, +	&snbep_uncore_r2pcie, +	&snbep_uncore_r3qpi, +	NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = { +	{ /* Home Agent */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA), +		.driver_data = (unsigned long)&snbep_uncore_ha, +	}, +	{ /* MC Channel 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 2 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* MC Channel 3 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3), +		.driver_data = (unsigned long)&snbep_uncore_imc, +	}, +	{ /* QPI Port 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0), +		.driver_data = (unsigned long)&snbep_uncore_qpi, +	}, +	{ /* QPI Port 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1), +		.driver_data = (unsigned long)&snbep_uncore_qpi, +	}, +	{ /* P2PCIe */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE), +		.driver_data = (unsigned long)&snbep_uncore_r2pcie, +	}, +	{ /* R3QPI Link 0 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0), +		.driver_data = (unsigned long)&snbep_uncore_r3qpi, +	}, +	{ /* R3QPI Link 1 */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1), +		.driver_data = (unsigned long)&snbep_uncore_r3qpi, +	}, +	{ /* end: all zeroes */ } +}; + +static struct pci_driver snbep_uncore_pci_driver = { +	.name		= "snbep_uncore", +	.id_table	= snbep_uncore_pci_ids, +}; + +/* + * build pci bus to socket mapping + */ +static void snbep_pci2phy_map_init(void) +{ +	struct pci_dev *ubox_dev = NULL; +	int i, bus, nodeid; +	u32 config; + +	while (1) { +		/* find the UBOX device */ +		ubox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, +					PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX, +					ubox_dev); +		if (!ubox_dev) +			break; +		bus = ubox_dev->bus->number; +		/* get the Node ID of the local register */ +		pci_read_config_dword(ubox_dev, 0x40, &config); +		nodeid = config; +		/* get the Node ID mapping */ +		pci_read_config_dword(ubox_dev, 0x54, &config); +		/* +		 * every three bits in the Node ID mapping register maps +		 * to a particular node. +		 */ +		for (i = 0; i < 8; i++) { +			if (nodeid == ((config >> (3 * i)) & 0x7)) { +				pcibus_to_physid[bus] = i; +				break; +			} +		} +	}; +	return; +} +/* end of Sandy Bridge-EP uncore support */ + + +/* Sandy Bridge uncore support */ +static void snb_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	if (hwc->idx < UNCORE_PMC_IDX_FIXED) +		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); +	else +		wrmsrl(hwc->config_base, SNB_UNC_CTL_EN); +} + +static void snb_uncore_msr_disable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	wrmsrl(event->hw.config_base, 0); +} + +static u64 snb_uncore_msr_read_counter(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	u64 count; +	rdmsrl(event->hw.event_base, count); +	return count; +} + +static void snb_uncore_msr_init_box(struct intel_uncore_box *box) +{ +	if (box->pmu->pmu_idx == 0) { +		wrmsrl(SNB_UNC_PERF_GLOBAL_CTL, +			SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL); +	} +} + +static struct attribute *snb_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_cmask5.attr, +	NULL, +}; + +static struct attribute_group snb_uncore_format_group = { +	.name = "format", +	.attrs = snb_uncore_formats_attr, +}; + +static struct intel_uncore_ops snb_uncore_msr_ops = { +	.init_box	= snb_uncore_msr_init_box, +	.disable_event	= snb_uncore_msr_disable_event, +	.enable_event	= snb_uncore_msr_enable_event, +	.read_counter	= snb_uncore_msr_read_counter, +}; + +static struct event_constraint snb_uncore_cbox_constraints[] = { +	UNCORE_EVENT_CONSTRAINT(0x80, 0x1), +	UNCORE_EVENT_CONSTRAINT(0x83, 0x1), +	EVENT_CONSTRAINT_END +}; + +static struct intel_uncore_type snb_uncore_cbox = { +	.name		= "cbox", +	.num_counters   = 2, +	.num_boxes	= 4, +	.perf_ctr_bits	= 44, +	.fixed_ctr_bits	= 48, +	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0, +	.event_ctl	= SNB_UNC_CBO_0_PERFEVTSEL0, +	.fixed_ctr	= SNB_UNC_FIXED_CTR, +	.fixed_ctl	= SNB_UNC_FIXED_CTR_CTRL, +	.single_fixed	= 1, +	.event_mask	= SNB_UNC_RAW_EVENT_MASK, +	.msr_offset	= SNB_UNC_CBO_MSR_OFFSET, +	.constraints	= snb_uncore_cbox_constraints, +	.ops		= &snb_uncore_msr_ops, +	.format_group	= &snb_uncore_format_group, +}; + +static struct intel_uncore_type *snb_msr_uncores[] = { +	&snb_uncore_cbox, +	NULL, +}; +/* end of Sandy Bridge uncore support */ + +/* Nehalem uncore support */ +static void nhm_uncore_msr_disable_box(struct intel_uncore_box *box) +{ +	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, 0); +} + +static void nhm_uncore_msr_enable_box(struct intel_uncore_box *box) +{ +	wrmsrl(NHM_UNC_PERF_GLOBAL_CTL, +		NHM_UNC_GLOBAL_CTL_EN_PC_ALL | NHM_UNC_GLOBAL_CTL_EN_FC); +} + +static void nhm_uncore_msr_enable_event(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; + +	if (hwc->idx < UNCORE_PMC_IDX_FIXED) +		wrmsrl(hwc->config_base, hwc->config | SNB_UNC_CTL_EN); +	else +		wrmsrl(hwc->config_base, NHM_UNC_FIXED_CTR_CTL_EN); +} + +static struct attribute *nhm_uncore_formats_attr[] = { +	&format_attr_event.attr, +	&format_attr_umask.attr, +	&format_attr_edge.attr, +	&format_attr_inv.attr, +	&format_attr_cmask8.attr, +	NULL, +}; + +static struct attribute_group nhm_uncore_format_group = { +	.name = "format", +	.attrs = nhm_uncore_formats_attr, +}; + +static struct uncore_event_desc nhm_uncore_events[] = { +	INTEL_UNCORE_EVENT_DESC(clockticks,                "event=0xff,umask=0x00"), +	INTEL_UNCORE_EVENT_DESC(qmc_writes_full_any,       "event=0x2f,umask=0x0f"), +	INTEL_UNCORE_EVENT_DESC(qmc_normal_reads_any,      "event=0x2c,umask=0x0f"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_reads,     "event=0x20,umask=0x01"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_ioh_writes,    "event=0x20,umask=0x02"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_reads,  "event=0x20,umask=0x04"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_remote_writes, "event=0x20,umask=0x08"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_local_reads,   "event=0x20,umask=0x10"), +	INTEL_UNCORE_EVENT_DESC(qhl_request_local_writes,  "event=0x20,umask=0x20"), +	{ /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops nhm_uncore_msr_ops = { +	.disable_box	= nhm_uncore_msr_disable_box, +	.enable_box	= nhm_uncore_msr_enable_box, +	.disable_event	= snb_uncore_msr_disable_event, +	.enable_event	= nhm_uncore_msr_enable_event, +	.read_counter	= snb_uncore_msr_read_counter, +}; + +static struct intel_uncore_type nhm_uncore = { +	.name		= "", +	.num_counters   = 8, +	.num_boxes	= 1, +	.perf_ctr_bits	= 48, +	.fixed_ctr_bits	= 48, +	.event_ctl	= NHM_UNC_PERFEVTSEL0, +	.perf_ctr	= NHM_UNC_UNCORE_PMC0, +	.fixed_ctr	= NHM_UNC_FIXED_CTR, +	.fixed_ctl	= NHM_UNC_FIXED_CTR_CTRL, +	.event_mask	= NHM_UNC_RAW_EVENT_MASK, +	.event_descs	= nhm_uncore_events, +	.ops		= &nhm_uncore_msr_ops, +	.format_group	= &nhm_uncore_format_group, +}; + +static struct intel_uncore_type *nhm_msr_uncores[] = { +	&nhm_uncore, +	NULL, +}; +/* end of Nehalem uncore support */ + +static void uncore_assign_hw_event(struct intel_uncore_box *box, +				struct perf_event *event, int idx) +{ +	struct hw_perf_event *hwc = &event->hw; + +	hwc->idx = idx; +	hwc->last_tag = ++box->tags[idx]; + +	if (hwc->idx == UNCORE_PMC_IDX_FIXED) { +		hwc->event_base = uncore_fixed_ctr(box); +		hwc->config_base = uncore_fixed_ctl(box); +		return; +	} + +	hwc->config_base = uncore_event_ctl(box, hwc->idx); +	hwc->event_base  = uncore_perf_ctr(box, hwc->idx); +} + +static void uncore_perf_event_update(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	u64 prev_count, new_count, delta; +	int shift; + +	if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) +		shift = 64 - uncore_fixed_ctr_bits(box); +	else +		shift = 64 - uncore_perf_ctr_bits(box); + +	/* the hrtimer might modify the previous event value */ +again: +	prev_count = local64_read(&event->hw.prev_count); +	new_count = uncore_read_counter(box, event); +	if (local64_xchg(&event->hw.prev_count, new_count) != prev_count) +		goto again; + +	delta = (new_count << shift) - (prev_count << shift); +	delta >>= shift; + +	local64_add(delta, &event->count); +} + +/* + * The overflow interrupt is unavailable for SandyBridge-EP, is broken + * for SandyBridge. So we use hrtimer to periodically poll the counter + * to avoid overflow. + */ +static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) +{ +	struct intel_uncore_box *box; +	unsigned long flags; +	int bit; + +	box = container_of(hrtimer, struct intel_uncore_box, hrtimer); +	if (!box->n_active || box->cpu != smp_processor_id()) +		return HRTIMER_NORESTART; +	/* +	 * disable local interrupt to prevent uncore_pmu_event_start/stop +	 * to interrupt the update process +	 */ +	local_irq_save(flags); + +	for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) +		uncore_perf_event_update(box, box->events[bit]); + +	local_irq_restore(flags); + +	hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); +	return HRTIMER_RESTART; +} + +static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) +{ +	__hrtimer_start_range_ns(&box->hrtimer, +			ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, +			HRTIMER_MODE_REL_PINNED, 0); +} + +static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box) +{ +	hrtimer_cancel(&box->hrtimer); +} + +static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) +{ +	hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); +	box->hrtimer.function = uncore_pmu_hrtimer; +} + +struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, +					  int cpu) +{ +	struct intel_uncore_box *box; +	int i, size; + +	size = sizeof(*box) + type->num_shared_regs * +		sizeof(struct intel_uncore_extra_reg); + +	box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); +	if (!box) +		return NULL; + +	for (i = 0; i < type->num_shared_regs; i++) +		raw_spin_lock_init(&box->shared_regs[i].lock); + +	uncore_pmu_init_hrtimer(box); +	atomic_set(&box->refcnt, 1); +	box->cpu = -1; +	box->phys_id = -1; + +	return box; +} + +static struct intel_uncore_box * +uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +{ +	static struct intel_uncore_box *box; + +	box = *per_cpu_ptr(pmu->box, cpu); +	if (box) +		return box; + +	raw_spin_lock(&uncore_box_lock); +	list_for_each_entry(box, &pmu->box_list, list) { +		if (box->phys_id == topology_physical_package_id(cpu)) { +			atomic_inc(&box->refcnt); +			*per_cpu_ptr(pmu->box, cpu) = box; +			break; +		} +	} +	raw_spin_unlock(&uncore_box_lock); + +	return *per_cpu_ptr(pmu->box, cpu); +} + +static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ +	return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ +	/* +	 * perf core schedules event on the basis of cpu, uncore events are +	 * collected by one of the cpus inside a physical package. +	 */ +	return uncore_pmu_to_box(uncore_event_to_pmu(event), +				 smp_processor_id()); +} + +static int uncore_collect_events(struct intel_uncore_box *box, +				struct perf_event *leader, bool dogrp) +{ +	struct perf_event *event; +	int n, max_count; + +	max_count = box->pmu->type->num_counters; +	if (box->pmu->type->fixed_ctl) +		max_count++; + +	if (box->n_events >= max_count) +		return -EINVAL; + +	n = box->n_events; +	box->event_list[n] = leader; +	n++; +	if (!dogrp) +		return n; + +	list_for_each_entry(event, &leader->sibling_list, group_entry) { +		if (event->state <= PERF_EVENT_STATE_OFF) +			continue; + +		if (n >= max_count) +			return -EINVAL; + +		box->event_list[n] = event; +		n++; +	} +	return n; +} + +static struct event_constraint * +uncore_get_event_constraint(struct intel_uncore_box *box, +			    struct perf_event *event) +{ +	struct intel_uncore_type *type = box->pmu->type; +	struct event_constraint *c; + +	if (type->ops->get_constraint) { +		c = type->ops->get_constraint(box, event); +		if (c) +			return c; +	} + +	if (event->hw.config == ~0ULL) +		return &constraint_fixed; + +	if (type->constraints) { +		for_each_event_constraint(c, type->constraints) { +			if ((event->hw.config & c->cmask) == c->code) +				return c; +		} +	} + +	return &type->unconstrainted; +} + +static void uncore_put_event_constraint(struct intel_uncore_box *box, +					struct perf_event *event) +{ +	if (box->pmu->type->ops->put_constraint) +		box->pmu->type->ops->put_constraint(box, event); +} + +static int uncore_assign_events(struct intel_uncore_box *box, +				int assign[], int n) +{ +	unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; +	struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; +	int i, wmin, wmax, ret = 0; +	struct hw_perf_event *hwc; + +	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); + +	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { +		c = uncore_get_event_constraint(box, box->event_list[i]); +		constraints[i] = c; +		wmin = min(wmin, c->weight); +		wmax = max(wmax, c->weight); +	} + +	/* fastpath, try to reuse previous register */ +	for (i = 0; i < n; i++) { +		hwc = &box->event_list[i]->hw; +		c = constraints[i]; + +		/* never assigned */ +		if (hwc->idx == -1) +			break; + +		/* constraint still honored */ +		if (!test_bit(hwc->idx, c->idxmsk)) +			break; + +		/* not already used */ +		if (test_bit(hwc->idx, used_mask)) +			break; + +		__set_bit(hwc->idx, used_mask); +		if (assign) +			assign[i] = hwc->idx; +	} +	/* slow path */ +	if (i != n) +		ret = perf_assign_events(constraints, n, wmin, wmax, assign); + +	if (!assign || ret) { +		for (i = 0; i < n; i++) +			uncore_put_event_constraint(box, box->event_list[i]); +	} +	return ret ? -EINVAL : 0; +} + +static void uncore_pmu_event_start(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	int idx = event->hw.idx; + +	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) +		return; + +	if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) +		return; + +	event->hw.state = 0; +	box->events[idx] = event; +	box->n_active++; +	__set_bit(idx, box->active_mask); + +	local64_set(&event->hw.prev_count, uncore_read_counter(box, event)); +	uncore_enable_event(box, event); + +	if (box->n_active == 1) { +		uncore_enable_box(box); +		uncore_pmu_start_hrtimer(box); +	} +} + +static void uncore_pmu_event_stop(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	struct hw_perf_event *hwc = &event->hw; + +	if (__test_and_clear_bit(hwc->idx, box->active_mask)) { +		uncore_disable_event(box, event); +		box->n_active--; +		box->events[hwc->idx] = NULL; +		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); +		hwc->state |= PERF_HES_STOPPED; + +		if (box->n_active == 0) { +			uncore_disable_box(box); +			uncore_pmu_cancel_hrtimer(box); +		} +	} + +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { +		/* +		 * Drain the remaining delta count out of a event +		 * that we are disabling: +		 */ +		uncore_perf_event_update(box, event); +		hwc->state |= PERF_HES_UPTODATE; +	} +} + +static int uncore_pmu_event_add(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	struct hw_perf_event *hwc = &event->hw; +	int assign[UNCORE_PMC_IDX_MAX]; +	int i, n, ret; + +	if (!box) +		return -ENODEV; + +	ret = n = uncore_collect_events(box, event, false); +	if (ret < 0) +		return ret; + +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; +	if (!(flags & PERF_EF_START)) +		hwc->state |= PERF_HES_ARCH; + +	ret = uncore_assign_events(box, assign, n); +	if (ret) +		return ret; + +	/* save events moving to new counters */ +	for (i = 0; i < box->n_events; i++) { +		event = box->event_list[i]; +		hwc = &event->hw; + +		if (hwc->idx == assign[i] && +			hwc->last_tag == box->tags[assign[i]]) +			continue; +		/* +		 * Ensure we don't accidentally enable a stopped +		 * counter simply because we rescheduled. +		 */ +		if (hwc->state & PERF_HES_STOPPED) +			hwc->state |= PERF_HES_ARCH; + +		uncore_pmu_event_stop(event, PERF_EF_UPDATE); +	} + +	/* reprogram moved events into new counters */ +	for (i = 0; i < n; i++) { +		event = box->event_list[i]; +		hwc = &event->hw; + +		if (hwc->idx != assign[i] || +			hwc->last_tag != box->tags[assign[i]]) +			uncore_assign_hw_event(box, event, assign[i]); +		else if (i < box->n_events) +			continue; + +		if (hwc->state & PERF_HES_ARCH) +			continue; + +		uncore_pmu_event_start(event, 0); +	} +	box->n_events = n; + +	return 0; +} + +static void uncore_pmu_event_del(struct perf_event *event, int flags) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	int i; + +	uncore_pmu_event_stop(event, PERF_EF_UPDATE); + +	for (i = 0; i < box->n_events; i++) { +		if (event == box->event_list[i]) { +			uncore_put_event_constraint(box, event); + +			while (++i < box->n_events) +				box->event_list[i - 1] = box->event_list[i]; + +			--box->n_events; +			break; +		} +	} + +	event->hw.idx = -1; +	event->hw.last_tag = ~0ULL; +} + +static void uncore_pmu_event_read(struct perf_event *event) +{ +	struct intel_uncore_box *box = uncore_event_to_box(event); +	uncore_perf_event_update(box, event); +} + +/* + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ +static int uncore_validate_group(struct intel_uncore_pmu *pmu, +				struct perf_event *event) +{ +	struct perf_event *leader = event->group_leader; +	struct intel_uncore_box *fake_box; +	int ret = -EINVAL, n; + +	fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); +	if (!fake_box) +		return -ENOMEM; + +	fake_box->pmu = pmu; +	/* +	 * the event is not yet connected with its +	 * siblings therefore we must first collect +	 * existing siblings, then add the new event +	 * before we can simulate the scheduling +	 */ +	n = uncore_collect_events(fake_box, leader, true); +	if (n < 0) +		goto out; + +	fake_box->n_events = n; +	n = uncore_collect_events(fake_box, event, false); +	if (n < 0) +		goto out; + +	fake_box->n_events = n; + +	ret = uncore_assign_events(fake_box, NULL, n); +out: +	kfree(fake_box); +	return ret; +} + +int uncore_pmu_event_init(struct perf_event *event) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	struct hw_perf_event *hwc = &event->hw; +	int ret; + +	if (event->attr.type != event->pmu->type) +		return -ENOENT; + +	pmu = uncore_event_to_pmu(event); +	/* no device found for this pmu */ +	if (pmu->func_id < 0) +		return -ENOENT; + +	/* +	 * Uncore PMU does measure at all privilege level all the time. +	 * So it doesn't make sense to specify any exclude bits. +	 */ +	if (event->attr.exclude_user || event->attr.exclude_kernel || +			event->attr.exclude_hv || event->attr.exclude_idle) +		return -EINVAL; + +	/* Sampling not supported yet */ +	if (hwc->sample_period) +		return -EINVAL; + +	/* +	 * Place all uncore events for a particular physical package +	 * onto a single cpu +	 */ +	if (event->cpu < 0) +		return -EINVAL; +	box = uncore_pmu_to_box(pmu, event->cpu); +	if (!box || box->cpu < 0) +		return -EINVAL; +	event->cpu = box->cpu; + +	event->hw.idx = -1; +	event->hw.last_tag = ~0ULL; +	event->hw.extra_reg.idx = EXTRA_REG_NONE; + +	if (event->attr.config == UNCORE_FIXED_EVENT) { +		/* no fixed counter */ +		if (!pmu->type->fixed_ctl) +			return -EINVAL; +		/* +		 * if there is only one fixed counter, only the first pmu +		 * can access the fixed counter +		 */ +		if (pmu->type->single_fixed && pmu->pmu_idx > 0) +			return -EINVAL; +		hwc->config = ~0ULL; +	} else { +		hwc->config = event->attr.config & pmu->type->event_mask; +		if (pmu->type->ops->hw_config) { +			ret = pmu->type->ops->hw_config(box, event); +			if (ret) +				return ret; +		} +	} + +	if (event->group_leader != event) +		ret = uncore_validate_group(pmu, event); +	else +		ret = 0; + +	return ret; +} + +static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) +{ +	int ret; + +	pmu->pmu = (struct pmu) { +		.attr_groups	= pmu->type->attr_groups, +		.task_ctx_nr	= perf_invalid_context, +		.event_init	= uncore_pmu_event_init, +		.add		= uncore_pmu_event_add, +		.del		= uncore_pmu_event_del, +		.start		= uncore_pmu_event_start, +		.stop		= uncore_pmu_event_stop, +		.read		= uncore_pmu_event_read, +	}; + +	if (pmu->type->num_boxes == 1) { +		if (strlen(pmu->type->name) > 0) +			sprintf(pmu->name, "uncore_%s", pmu->type->name); +		else +			sprintf(pmu->name, "uncore"); +	} else { +		sprintf(pmu->name, "uncore_%s_%d", pmu->type->name, +			pmu->pmu_idx); +	} + +	ret = perf_pmu_register(&pmu->pmu, pmu->name, -1); +	return ret; +} + +static void __init uncore_type_exit(struct intel_uncore_type *type) +{ +	int i; + +	for (i = 0; i < type->num_boxes; i++) +		free_percpu(type->pmus[i].box); +	kfree(type->pmus); +	type->pmus = NULL; +	kfree(type->attr_groups[1]); +	type->attr_groups[1] = NULL; +} + +static void uncore_types_exit(struct intel_uncore_type **types) +{ +	int i; +	for (i = 0; types[i]; i++) +		uncore_type_exit(types[i]); +} + +static int __init uncore_type_init(struct intel_uncore_type *type) +{ +	struct intel_uncore_pmu *pmus; +	struct attribute_group *events_group; +	struct attribute **attrs; +	int i, j; + +	pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL); +	if (!pmus) +		return -ENOMEM; + +	type->unconstrainted = (struct event_constraint) +		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, +				0, type->num_counters, 0); + +	for (i = 0; i < type->num_boxes; i++) { +		pmus[i].func_id = -1; +		pmus[i].pmu_idx = i; +		pmus[i].type = type; +		INIT_LIST_HEAD(&pmus[i].box_list); +		pmus[i].box = alloc_percpu(struct intel_uncore_box *); +		if (!pmus[i].box) +			goto fail; +	} + +	if (type->event_descs) { +		i = 0; +		while (type->event_descs[i].attr.attr.name) +			i++; + +		events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + +					sizeof(*events_group), GFP_KERNEL); +		if (!events_group) +			goto fail; + +		attrs = (struct attribute **)(events_group + 1); +		events_group->name = "events"; +		events_group->attrs = attrs; + +		for (j = 0; j < i; j++) +			attrs[j] = &type->event_descs[j].attr.attr; + +		type->attr_groups[1] = events_group; +	} + +	type->pmus = pmus; +	return 0; +fail: +	uncore_type_exit(type); +	return -ENOMEM; +} + +static int __init uncore_types_init(struct intel_uncore_type **types) +{ +	int i, ret; + +	for (i = 0; types[i]; i++) { +		ret = uncore_type_init(types[i]); +		if (ret) +			goto fail; +	} +	return 0; +fail: +	while (--i >= 0) +		uncore_type_exit(types[i]); +	return ret; +} + +static struct pci_driver *uncore_pci_driver; +static bool pcidrv_registered; + +/* + * add a pci uncore device + */ +static int __devinit uncore_pci_add(struct intel_uncore_type *type, +				    struct pci_dev *pdev) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, phys_id; + +	phys_id = pcibus_to_physid[pdev->bus->number]; +	if (phys_id < 0) +		return -ENODEV; + +	box = uncore_alloc_box(type, 0); +	if (!box) +		return -ENOMEM; + +	/* +	 * for performance monitoring unit with multiple boxes, +	 * each box has a different function id. +	 */ +	for (i = 0; i < type->num_boxes; i++) { +		pmu = &type->pmus[i]; +		if (pmu->func_id == pdev->devfn) +			break; +		if (pmu->func_id < 0) { +			pmu->func_id = pdev->devfn; +			break; +		} +		pmu = NULL; +	} + +	if (!pmu) { +		kfree(box); +		return -EINVAL; +	} + +	box->phys_id = phys_id; +	box->pci_dev = pdev; +	box->pmu = pmu; +	uncore_box_init(box); +	pci_set_drvdata(pdev, box); + +	raw_spin_lock(&uncore_box_lock); +	list_add_tail(&box->list, &pmu->box_list); +	raw_spin_unlock(&uncore_box_lock); + +	return 0; +} + +static void uncore_pci_remove(struct pci_dev *pdev) +{ +	struct intel_uncore_box *box = pci_get_drvdata(pdev); +	struct intel_uncore_pmu *pmu = box->pmu; +	int cpu, phys_id = pcibus_to_physid[pdev->bus->number]; + +	if (WARN_ON_ONCE(phys_id != box->phys_id)) +		return; + +	raw_spin_lock(&uncore_box_lock); +	list_del(&box->list); +	raw_spin_unlock(&uncore_box_lock); + +	for_each_possible_cpu(cpu) { +		if (*per_cpu_ptr(pmu->box, cpu) == box) { +			*per_cpu_ptr(pmu->box, cpu) = NULL; +			atomic_dec(&box->refcnt); +		} +	} + +	WARN_ON_ONCE(atomic_read(&box->refcnt) != 1); +	kfree(box); +} + +static int __devinit uncore_pci_probe(struct pci_dev *pdev, +				const struct pci_device_id *id) +{ +	struct intel_uncore_type *type; + +	type = (struct intel_uncore_type *)id->driver_data; +	return uncore_pci_add(type, pdev); +} + +static int __init uncore_pci_init(void) +{ +	int ret; + +	switch (boot_cpu_data.x86_model) { +	case 45: /* Sandy Bridge-EP */ +		pci_uncores = snbep_pci_uncores; +		uncore_pci_driver = &snbep_uncore_pci_driver; +		snbep_pci2phy_map_init(); +		break; +	default: +		return 0; +	} + +	ret = uncore_types_init(pci_uncores); +	if (ret) +		return ret; + +	uncore_pci_driver->probe = uncore_pci_probe; +	uncore_pci_driver->remove = uncore_pci_remove; + +	ret = pci_register_driver(uncore_pci_driver); +	if (ret == 0) +		pcidrv_registered = true; +	else +		uncore_types_exit(pci_uncores); + +	return ret; +} + +static void __init uncore_pci_exit(void) +{ +	if (pcidrv_registered) { +		pcidrv_registered = false; +		pci_unregister_driver(uncore_pci_driver); +		uncore_types_exit(pci_uncores); +	} +} + +static void __cpuinit uncore_cpu_dying(int cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			box = *per_cpu_ptr(pmu->box, cpu); +			*per_cpu_ptr(pmu->box, cpu) = NULL; +			if (box && atomic_dec_and_test(&box->refcnt)) +				kfree(box); +		} +	} +} + +static int __cpuinit uncore_cpu_starting(int cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box, *exist; +	int i, j, k, phys_id; + +	phys_id = topology_physical_package_id(cpu); + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			box = *per_cpu_ptr(pmu->box, cpu); +			/* called by uncore_cpu_init? */ +			if (box && box->phys_id >= 0) { +				uncore_box_init(box); +				continue; +			} + +			for_each_online_cpu(k) { +				exist = *per_cpu_ptr(pmu->box, k); +				if (exist && exist->phys_id == phys_id) { +					atomic_inc(&exist->refcnt); +					*per_cpu_ptr(pmu->box, cpu) = exist; +					kfree(box); +					box = NULL; +					break; +				} +			} + +			if (box) { +				box->phys_id = phys_id; +				uncore_box_init(box); +			} +		} +	} +	return 0; +} + +static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			if (pmu->func_id < 0) +				pmu->func_id = j; + +			box = uncore_alloc_box(type, cpu); +			if (!box) +				return -ENOMEM; + +			box->pmu = pmu; +			box->phys_id = phys_id; +			*per_cpu_ptr(pmu->box, cpu) = box; +		} +	} +	return 0; +} + +static void __cpuinit uncore_change_context(struct intel_uncore_type **uncores, +					    int old_cpu, int new_cpu) +{ +	struct intel_uncore_type *type; +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_box *box; +	int i, j; + +	for (i = 0; uncores[i]; i++) { +		type = uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			if (old_cpu < 0) +				box = uncore_pmu_to_box(pmu, new_cpu); +			else +				box = uncore_pmu_to_box(pmu, old_cpu); +			if (!box) +				continue; + +			if (old_cpu < 0) { +				WARN_ON_ONCE(box->cpu != -1); +				box->cpu = new_cpu; +				continue; +			} + +			WARN_ON_ONCE(box->cpu != old_cpu); +			if (new_cpu >= 0) { +				uncore_pmu_cancel_hrtimer(box); +				perf_pmu_migrate_context(&pmu->pmu, +						old_cpu, new_cpu); +				box->cpu = new_cpu; +			} else { +				box->cpu = -1; +			} +		} +	} +} + +static void __cpuinit uncore_event_exit_cpu(int cpu) +{ +	int i, phys_id, target; + +	/* if exiting cpu is used for collecting uncore events */ +	if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) +		return; + +	/* find a new cpu to collect uncore events */ +	phys_id = topology_physical_package_id(cpu); +	target = -1; +	for_each_online_cpu(i) { +		if (i == cpu) +			continue; +		if (phys_id == topology_physical_package_id(i)) { +			target = i; +			break; +		} +	} + +	/* migrate uncore events to the new cpu */ +	if (target >= 0) +		cpumask_set_cpu(target, &uncore_cpu_mask); + +	uncore_change_context(msr_uncores, cpu, target); +	uncore_change_context(pci_uncores, cpu, target); +} + +static void __cpuinit uncore_event_init_cpu(int cpu) +{ +	int i, phys_id; + +	phys_id = topology_physical_package_id(cpu); +	for_each_cpu(i, &uncore_cpu_mask) { +		if (phys_id == topology_physical_package_id(i)) +			return; +	} + +	cpumask_set_cpu(cpu, &uncore_cpu_mask); + +	uncore_change_context(msr_uncores, -1, cpu); +	uncore_change_context(pci_uncores, -1, cpu); +} + +static int __cpuinit uncore_cpu_notifier(struct notifier_block *self, +					 unsigned long action, void *hcpu) +{ +	unsigned int cpu = (long)hcpu; + +	/* allocate/free data structure for uncore box */ +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_UP_PREPARE: +		uncore_cpu_prepare(cpu, -1); +		break; +	case CPU_STARTING: +		uncore_cpu_starting(cpu); +		break; +	case CPU_UP_CANCELED: +	case CPU_DYING: +		uncore_cpu_dying(cpu); +		break; +	default: +		break; +	} + +	/* select the cpu that collects uncore events */ +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_DOWN_FAILED: +	case CPU_STARTING: +		uncore_event_init_cpu(cpu); +		break; +	case CPU_DOWN_PREPARE: +		uncore_event_exit_cpu(cpu); +		break; +	default: +		break; +	} + +	return NOTIFY_OK; +} + +static struct notifier_block uncore_cpu_nb __cpuinitdata = { +	.notifier_call = uncore_cpu_notifier, +	/* +	 * to migrate uncore events, our notifier should be executed +	 * before perf core's notifier. +	 */ +	.priority = CPU_PRI_PERF + 1, +}; + +static void __init uncore_cpu_setup(void *dummy) +{ +	uncore_cpu_starting(smp_processor_id()); +} + +static int __init uncore_cpu_init(void) +{ +	int ret, cpu, max_cores; + +	max_cores = boot_cpu_data.x86_max_cores; +	switch (boot_cpu_data.x86_model) { +	case 26: /* Nehalem */ +	case 30: +	case 37: /* Westmere */ +	case 44: +		msr_uncores = nhm_msr_uncores; +		break; +	case 42: /* Sandy Bridge */ +		if (snb_uncore_cbox.num_boxes > max_cores) +			snb_uncore_cbox.num_boxes = max_cores; +		msr_uncores = snb_msr_uncores; +		break; +	case 45: /* Sandy Birdge-EP */ +		if (snbep_uncore_cbox.num_boxes > max_cores) +			snbep_uncore_cbox.num_boxes = max_cores; +		msr_uncores = snbep_msr_uncores; +		break; +	default: +		return 0; +	} + +	ret = uncore_types_init(msr_uncores); +	if (ret) +		return ret; + +	get_online_cpus(); + +	for_each_online_cpu(cpu) { +		int i, phys_id = topology_physical_package_id(cpu); + +		for_each_cpu(i, &uncore_cpu_mask) { +			if (phys_id == topology_physical_package_id(i)) { +				phys_id = -1; +				break; +			} +		} +		if (phys_id < 0) +			continue; + +		uncore_cpu_prepare(cpu, phys_id); +		uncore_event_init_cpu(cpu); +	} +	on_each_cpu(uncore_cpu_setup, NULL, 1); + +	register_cpu_notifier(&uncore_cpu_nb); + +	put_online_cpus(); + +	return 0; +} + +static int __init uncore_pmus_register(void) +{ +	struct intel_uncore_pmu *pmu; +	struct intel_uncore_type *type; +	int i, j; + +	for (i = 0; msr_uncores[i]; i++) { +		type = msr_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			uncore_pmu_register(pmu); +		} +	} + +	for (i = 0; pci_uncores[i]; i++) { +		type = pci_uncores[i]; +		for (j = 0; j < type->num_boxes; j++) { +			pmu = &type->pmus[j]; +			uncore_pmu_register(pmu); +		} +	} + +	return 0; +} + +static int __init intel_uncore_init(void) +{ +	int ret; + +	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) +		return -ENODEV; + +	ret = uncore_pci_init(); +	if (ret) +		goto fail; +	ret = uncore_cpu_init(); +	if (ret) { +		uncore_pci_exit(); +		goto fail; +	} + +	uncore_pmus_register(); +	return 0; +fail: +	return ret; +} +device_initcall(intel_uncore_init); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h new file mode 100644 index 00000000000..b13e9ea81de --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -0,0 +1,424 @@ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/pci.h> +#include <linux/perf_event.h> +#include "perf_event.h" + +#define UNCORE_PMU_NAME_LEN		32 +#define UNCORE_BOX_HASH_SIZE		8 + +#define UNCORE_PMU_HRTIMER_INTERVAL	(60 * NSEC_PER_SEC) + +#define UNCORE_FIXED_EVENT		0xff +#define UNCORE_PMC_IDX_MAX_GENERIC	8 +#define UNCORE_PMC_IDX_FIXED		UNCORE_PMC_IDX_MAX_GENERIC +#define UNCORE_PMC_IDX_MAX		(UNCORE_PMC_IDX_FIXED + 1) + +#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff) + +/* SNB event control */ +#define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff +#define SNB_UNC_CTL_UMASK_MASK			0x0000ff00 +#define SNB_UNC_CTL_EDGE_DET			(1 << 18) +#define SNB_UNC_CTL_EN				(1 << 22) +#define SNB_UNC_CTL_INVERT			(1 << 23) +#define SNB_UNC_CTL_CMASK_MASK			0x1f000000 +#define NHM_UNC_CTL_CMASK_MASK			0xff000000 +#define NHM_UNC_FIXED_CTR_CTL_EN		(1 << 0) + +#define SNB_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \ +						 SNB_UNC_CTL_UMASK_MASK | \ +						 SNB_UNC_CTL_EDGE_DET | \ +						 SNB_UNC_CTL_INVERT | \ +						 SNB_UNC_CTL_CMASK_MASK) + +#define NHM_UNC_RAW_EVENT_MASK			(SNB_UNC_CTL_EV_SEL_MASK | \ +						 SNB_UNC_CTL_UMASK_MASK | \ +						 SNB_UNC_CTL_EDGE_DET | \ +						 SNB_UNC_CTL_INVERT | \ +						 NHM_UNC_CTL_CMASK_MASK) + +/* SNB global control register */ +#define SNB_UNC_PERF_GLOBAL_CTL                 0x391 +#define SNB_UNC_FIXED_CTR_CTRL                  0x394 +#define SNB_UNC_FIXED_CTR                       0x395 + +/* SNB uncore global control */ +#define SNB_UNC_GLOBAL_CTL_CORE_ALL             ((1 << 4) - 1) +#define SNB_UNC_GLOBAL_CTL_EN                   (1 << 29) + +/* SNB Cbo register */ +#define SNB_UNC_CBO_0_PERFEVTSEL0               0x700 +#define SNB_UNC_CBO_0_PER_CTR0                  0x706 +#define SNB_UNC_CBO_MSR_OFFSET                  0x10 + +/* NHM global control register */ +#define NHM_UNC_PERF_GLOBAL_CTL                 0x391 +#define NHM_UNC_FIXED_CTR                       0x394 +#define NHM_UNC_FIXED_CTR_CTRL                  0x395 + +/* NHM uncore global control */ +#define NHM_UNC_GLOBAL_CTL_EN_PC_ALL            ((1ULL << 8) - 1) +#define NHM_UNC_GLOBAL_CTL_EN_FC                (1ULL << 32) + +/* NHM uncore register */ +#define NHM_UNC_PERFEVTSEL0                     0x3c0 +#define NHM_UNC_UNCORE_PMC0                     0x3b0 + +/* SNB-EP Box level control */ +#define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0) +#define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1) +#define SNBEP_PMON_BOX_CTL_FRZ		(1 << 8) +#define SNBEP_PMON_BOX_CTL_FRZ_EN	(1 << 16) +#define SNBEP_PMON_BOX_CTL_INT		(SNBEP_PMON_BOX_CTL_RST_CTRL | \ +					 SNBEP_PMON_BOX_CTL_RST_CTRS | \ +					 SNBEP_PMON_BOX_CTL_FRZ_EN) +/* SNB-EP event control */ +#define SNBEP_PMON_CTL_EV_SEL_MASK	0x000000ff +#define SNBEP_PMON_CTL_UMASK_MASK	0x0000ff00 +#define SNBEP_PMON_CTL_RST		(1 << 17) +#define SNBEP_PMON_CTL_EDGE_DET		(1 << 18) +#define SNBEP_PMON_CTL_EV_SEL_EXT	(1 << 21)	/* only for QPI */ +#define SNBEP_PMON_CTL_EN		(1 << 22) +#define SNBEP_PMON_CTL_INVERT		(1 << 23) +#define SNBEP_PMON_CTL_TRESH_MASK	0xff000000 +#define SNBEP_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \ +					 SNBEP_PMON_CTL_UMASK_MASK | \ +					 SNBEP_PMON_CTL_EDGE_DET | \ +					 SNBEP_PMON_CTL_INVERT | \ +					 SNBEP_PMON_CTL_TRESH_MASK) + +/* SNB-EP Ubox event control */ +#define SNBEP_U_MSR_PMON_CTL_TRESH_MASK		0x1f000000 +#define SNBEP_U_MSR_PMON_RAW_EVENT_MASK		\ +				(SNBEP_PMON_CTL_EV_SEL_MASK | \ +				 SNBEP_PMON_CTL_UMASK_MASK | \ +				 SNBEP_PMON_CTL_EDGE_DET | \ +				 SNBEP_PMON_CTL_INVERT | \ +				 SNBEP_U_MSR_PMON_CTL_TRESH_MASK) + +#define SNBEP_CBO_PMON_CTL_TID_EN		(1 << 19) +#define SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK	(SNBEP_PMON_RAW_EVENT_MASK | \ +						 SNBEP_CBO_PMON_CTL_TID_EN) + +/* SNB-EP PCU event control */ +#define SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK	0x0000c000 +#define SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK	0x1f000000 +#define SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT	(1 << 30) +#define SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET	(1 << 31) +#define SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK	\ +				(SNBEP_PMON_CTL_EV_SEL_MASK | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ +				 SNBEP_PMON_CTL_EDGE_DET | \ +				 SNBEP_PMON_CTL_INVERT | \ +				 SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ +				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET) + +/* SNB-EP pci control register */ +#define SNBEP_PCI_PMON_BOX_CTL			0xf4 +#define SNBEP_PCI_PMON_CTL0			0xd8 +/* SNB-EP pci counter register */ +#define SNBEP_PCI_PMON_CTR0			0xa0 + +/* SNB-EP home agent register */ +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH0	0x40 +#define SNBEP_HA_PCI_PMON_BOX_ADDRMATCH1	0x44 +#define SNBEP_HA_PCI_PMON_BOX_OPCODEMATCH	0x48 +/* SNB-EP memory controller register */ +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTL		0xf0 +#define SNBEP_MC_CHy_PCI_PMON_FIXED_CTR		0xd0 +/* SNB-EP QPI register */ +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH0		0x228 +#define SNBEP_Q_Py_PCI_PMON_PKT_MATCH1		0x22c +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK0		0x238 +#define SNBEP_Q_Py_PCI_PMON_PKT_MASK1		0x23c + +/* SNB-EP Ubox register */ +#define SNBEP_U_MSR_PMON_CTR0			0xc16 +#define SNBEP_U_MSR_PMON_CTL0			0xc10 + +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTL		0xc08 +#define SNBEP_U_MSR_PMON_UCLK_FIXED_CTR		0xc09 + +/* SNB-EP Cbo register */ +#define SNBEP_C0_MSR_PMON_CTR0			0xd16 +#define SNBEP_C0_MSR_PMON_CTL0			0xd10 +#define SNBEP_C0_MSR_PMON_BOX_CTL		0xd04 +#define SNBEP_C0_MSR_PMON_BOX_FILTER		0xd14 +#define SNBEP_CB0_MSR_PMON_BOX_FILTER_MASK	0xfffffc1f +#define SNBEP_CBO_MSR_OFFSET			0x20 + +/* SNB-EP PCU register */ +#define SNBEP_PCU_MSR_PMON_CTR0			0xc36 +#define SNBEP_PCU_MSR_PMON_CTL0			0xc30 +#define SNBEP_PCU_MSR_PMON_BOX_CTL		0xc24 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER		0xc34 +#define SNBEP_PCU_MSR_PMON_BOX_FILTER_MASK	0xffffffff +#define SNBEP_PCU_MSR_CORE_C3_CTR		0x3fc +#define SNBEP_PCU_MSR_CORE_C6_CTR		0x3fd + +struct intel_uncore_ops; +struct intel_uncore_pmu; +struct intel_uncore_box; +struct uncore_event_desc; + +struct intel_uncore_type { +	const char *name; +	int num_counters; +	int num_boxes; +	int perf_ctr_bits; +	int fixed_ctr_bits; +	unsigned perf_ctr; +	unsigned event_ctl; +	unsigned event_mask; +	unsigned fixed_ctr; +	unsigned fixed_ctl; +	unsigned box_ctl; +	unsigned msr_offset; +	unsigned num_shared_regs:8; +	unsigned single_fixed:1; +	struct event_constraint unconstrainted; +	struct event_constraint *constraints; +	struct intel_uncore_pmu *pmus; +	struct intel_uncore_ops *ops; +	struct uncore_event_desc *event_descs; +	const struct attribute_group *attr_groups[3]; +}; + +#define format_group attr_groups[0] + +struct intel_uncore_ops { +	void (*init_box)(struct intel_uncore_box *); +	void (*disable_box)(struct intel_uncore_box *); +	void (*enable_box)(struct intel_uncore_box *); +	void (*disable_event)(struct intel_uncore_box *, struct perf_event *); +	void (*enable_event)(struct intel_uncore_box *, struct perf_event *); +	u64 (*read_counter)(struct intel_uncore_box *, struct perf_event *); +	int (*hw_config)(struct intel_uncore_box *, struct perf_event *); +	struct event_constraint *(*get_constraint)(struct intel_uncore_box *, +						   struct perf_event *); +	void (*put_constraint)(struct intel_uncore_box *, struct perf_event *); +}; + +struct intel_uncore_pmu { +	struct pmu pmu; +	char name[UNCORE_PMU_NAME_LEN]; +	int pmu_idx; +	int func_id; +	struct intel_uncore_type *type; +	struct intel_uncore_box ** __percpu box; +	struct list_head box_list; +}; + +struct intel_uncore_extra_reg { +	raw_spinlock_t lock; +	u64 config1; +	atomic_t ref; +}; + +struct intel_uncore_box { +	int phys_id; +	int n_active;	/* number of active events */ +	int n_events; +	int cpu;	/* cpu to collect events */ +	unsigned long flags; +	atomic_t refcnt; +	struct perf_event *events[UNCORE_PMC_IDX_MAX]; +	struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; +	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; +	u64 tags[UNCORE_PMC_IDX_MAX]; +	struct pci_dev *pci_dev; +	struct intel_uncore_pmu *pmu; +	struct hrtimer hrtimer; +	struct list_head list; +	struct intel_uncore_extra_reg shared_regs[0]; +}; + +#define UNCORE_BOX_FLAG_INITIATED	0 + +struct uncore_event_desc { +	struct kobj_attribute attr; +	const char *config; +}; + +#define INTEL_UNCORE_EVENT_DESC(_name, _config)			\ +{								\ +	.attr	= __ATTR(_name, 0444, uncore_event_show, NULL),	\ +	.config	= _config,					\ +} + +#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format)			\ +static ssize_t __uncore_##_var##_show(struct kobject *kobj,		\ +				struct kobj_attribute *attr,		\ +				char *page)				\ +{									\ +	BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE);			\ +	return sprintf(page, _format "\n");				\ +}									\ +static struct kobj_attribute format_attr_##_var =			\ +	__ATTR(_name, 0444, __uncore_##_var##_show, NULL) + + +static ssize_t uncore_event_show(struct kobject *kobj, +				struct kobj_attribute *attr, char *buf) +{ +	struct uncore_event_desc *event = +		container_of(attr, struct uncore_event_desc, attr); +	return sprintf(buf, "%s", event->config); +} + +static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) +{ +	return box->pmu->type->box_ctl; +} + +static inline unsigned uncore_pci_fixed_ctl(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctl; +} + +static inline unsigned uncore_pci_fixed_ctr(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr; +} + +static inline +unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx) +{ +	return idx * 4 + box->pmu->type->event_ctl; +} + +static inline +unsigned uncore_pci_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	return idx * 8 + box->pmu->type->perf_ctr; +} + +static inline +unsigned uncore_msr_box_ctl(struct intel_uncore_box *box) +{ +	if (!box->pmu->type->box_ctl) +		return 0; +	return box->pmu->type->box_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctl(struct intel_uncore_box *box) +{ +	if (!box->pmu->type->fixed_ctl) +		return 0; +	return box->pmu->type->fixed_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) +{ +	return idx + box->pmu->type->event_ctl + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	return idx + box->pmu->type->perf_ctr + +		box->pmu->type->msr_offset * box->pmu->pmu_idx; +} + +static inline +unsigned uncore_fixed_ctl(struct intel_uncore_box *box) +{ +	if (box->pci_dev) +		return uncore_pci_fixed_ctl(box); +	else +		return uncore_msr_fixed_ctl(box); +} + +static inline +unsigned uncore_fixed_ctr(struct intel_uncore_box *box) +{ +	if (box->pci_dev) +		return uncore_pci_fixed_ctr(box); +	else +		return uncore_msr_fixed_ctr(box); +} + +static inline +unsigned uncore_event_ctl(struct intel_uncore_box *box, int idx) +{ +	if (box->pci_dev) +		return uncore_pci_event_ctl(box, idx); +	else +		return uncore_msr_event_ctl(box, idx); +} + +static inline +unsigned uncore_perf_ctr(struct intel_uncore_box *box, int idx) +{ +	if (box->pci_dev) +		return uncore_pci_perf_ctr(box, idx); +	else +		return uncore_msr_perf_ctr(box, idx); +} + +static inline int uncore_perf_ctr_bits(struct intel_uncore_box *box) +{ +	return box->pmu->type->perf_ctr_bits; +} + +static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) +{ +	return box->pmu->type->fixed_ctr_bits; +} + +static inline int uncore_num_counters(struct intel_uncore_box *box) +{ +	return box->pmu->type->num_counters; +} + +static inline void uncore_disable_box(struct intel_uncore_box *box) +{ +	if (box->pmu->type->ops->disable_box) +		box->pmu->type->ops->disable_box(box); +} + +static inline void uncore_enable_box(struct intel_uncore_box *box) +{ +	if (box->pmu->type->ops->enable_box) +		box->pmu->type->ops->enable_box(box); +} + +static inline void uncore_disable_event(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	box->pmu->type->ops->disable_event(box, event); +} + +static inline void uncore_enable_event(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	box->pmu->type->ops->enable_event(box, event); +} + +static inline u64 uncore_read_counter(struct intel_uncore_box *box, +				struct perf_event *event) +{ +	return box->pmu->type->ops->read_counter(box, event); +} + +static inline void uncore_box_init(struct intel_uncore_box *box) +{ +	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { +		if (box->pmu->type->ops->init_box) +			box->pmu->type->ops->init_box(box); +	} +} diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 47124a73dd7..92c7e39a079 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -895,8 +895,8 @@ static void p4_pmu_disable_pebs(void)  	 * So at moment let leave metrics turned on forever -- it's  	 * ok for now but need to be revisited!  	 * -	 * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0); -	 * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0); +	 * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, (u64)0); +	 * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, (u64)0);  	 */  } @@ -909,7 +909,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)  	 * state we need to clear P4_CCCR_OVF, otherwise interrupt get  	 * asserted again and again  	 */ -	(void)checking_wrmsrl(hwc->config_base, +	(void)wrmsrl_safe(hwc->config_base,  		(u64)(p4_config_unpack_cccr(hwc->config)) &  			~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);  } @@ -943,8 +943,8 @@ static void p4_pmu_enable_pebs(u64 config)  	bind = &p4_pebs_bind_map[idx]; -	(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs); -	(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert); +	(void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE,	(u64)bind->metric_pebs); +	(void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT,	(u64)bind->metric_vert);  }  static void p4_pmu_enable_event(struct perf_event *event) @@ -978,8 +978,8 @@ static void p4_pmu_enable_event(struct perf_event *event)  	 */  	p4_pmu_enable_pebs(hwc->config); -	(void)checking_wrmsrl(escr_addr, escr_conf); -	(void)checking_wrmsrl(hwc->config_base, +	(void)wrmsrl_safe(escr_addr, escr_conf); +	(void)wrmsrl_safe(hwc->config_base,  				(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);  } @@ -1325,7 +1325,7 @@ __init int p4_pmu_init(void)  	unsigned int low, high;  	/* If we get stripped -- indexing fails */ -	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); +	BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC);  	rdmsr(MSR_IA32_MISC_ENABLE, low, high);  	if (!(low & (1 << 7))) { diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 32bcfc7dd23..e4dd0f7a045 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -71,7 +71,7 @@ p6_pmu_disable_event(struct perf_event *event)  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base, val); +	(void)wrmsrl_safe(hwc->config_base, val);  }  static void p6_pmu_enable_event(struct perf_event *event) @@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event)  	if (cpuc->enabled)  		val |= ARCH_PERFMON_EVENTSEL_ENABLE; -	(void)checking_wrmsrl(hwc->config_base, val); +	(void)wrmsrl_safe(hwc->config_base, val);  }  PMU_FORMAT_ATTR(event,	"config:0-7"	); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index addf9e82a7f..ee8e9abc859 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)  	const struct cpuid_bit *cb;  	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { -		{ X86_FEATURE_DTS,		CR_EAX, 0, 0x00000006, 0 }, +		{ X86_FEATURE_DTHERM,		CR_EAX, 0, 0x00000006, 0 },  		{ X86_FEATURE_IDA,		CR_EAX, 1, 0x00000006, 0 },  		{ X86_FEATURE_ARAT,		CR_EAX, 2, 0x00000006, 0 },  		{ X86_FEATURE_PLN,		CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 571246d81ed..ae42418bc50 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -27,8 +27,8 @@ static int die_counter;  void printk_address(unsigned long address, int reliable)  { -	printk(" [<%p>] %s%pB\n", (void *) address, -			reliable ? "" : "? ", (void *) address); +	pr_cont(" [<%p>] %s%pB\n", +		(void *)address, reliable ? "" : "? ", (void *)address);  }  #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -271,6 +271,7 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)  			current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)  		return 1; +	print_modules();  	show_regs(regs);  #ifdef CONFIG_X86_32  	if (user_mode_vm(regs)) { diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index e0b1d783daa..1038a417ea5 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -73,11 +73,11 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,  		if (kstack_end(stack))  			break;  		if (i && ((i % STACKSLOTS_PER_LINE) == 0)) -			printk(KERN_CONT "\n"); -		printk(KERN_CONT " %08lx", *stack++); +			pr_cont("\n"); +		pr_cont(" %08lx", *stack++);  		touch_nmi_watchdog();  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  	show_trace_log_lvl(task, regs, sp, bp, log_lvl);  } @@ -86,12 +86,11 @@ void show_regs(struct pt_regs *regs)  {  	int i; -	print_modules();  	__show_regs(regs, !user_mode_vm(regs)); -	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", -		TASK_COMM_LEN, current->comm, task_pid_nr(current), -		current_thread_info(), current, task_thread_info(current)); +	pr_emerg("Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", +		 TASK_COMM_LEN, current->comm, task_pid_nr(current), +		 current_thread_info(), current, task_thread_info(current));  	/*  	 * When in-kernel, we also print out the stack and code at the  	 * time of the fault.. @@ -102,10 +101,10 @@ void show_regs(struct pt_regs *regs)  		unsigned char c;  		u8 *ip; -		printk(KERN_EMERG "Stack:\n"); +		pr_emerg("Stack:\n");  		show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); -		printk(KERN_EMERG "Code: "); +		pr_emerg("Code:");  		ip = (u8 *)regs->ip - code_prologue;  		if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { @@ -116,16 +115,16 @@ void show_regs(struct pt_regs *regs)  		for (i = 0; i < code_len; i++, ip++) {  			if (ip < (u8 *)PAGE_OFFSET ||  					probe_kernel_address(ip, c)) { -				printk(KERN_CONT " Bad EIP value."); +				pr_cont("  Bad EIP value.");  				break;  			}  			if (ip == (u8 *)regs->ip) -				printk(KERN_CONT "<%02x> ", c); +				pr_cont(" <%02x>", c);  			else -				printk(KERN_CONT "%02x ", c); +				pr_cont(" %02x", c);  		}  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 791b76122aa..b653675d528 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -228,20 +228,20 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,  		if (stack >= irq_stack && stack <= irq_stack_end) {  			if (stack == irq_stack_end) {  				stack = (unsigned long *) (irq_stack_end[-1]); -				printk(KERN_CONT " <EOI> "); +				pr_cont(" <EOI> ");  			}  		} else {  		if (((long) stack & (THREAD_SIZE-1)) == 0)  			break;  		}  		if (i && ((i % STACKSLOTS_PER_LINE) == 0)) -			printk(KERN_CONT "\n"); -		printk(KERN_CONT " %016lx", *stack++); +			pr_cont("\n"); +		pr_cont(" %016lx", *stack++);  		touch_nmi_watchdog();  	}  	preempt_enable(); -	printk(KERN_CONT "\n"); +	pr_cont("\n");  	show_trace_log_lvl(task, regs, sp, bp, log_lvl);  } @@ -254,10 +254,9 @@ void show_regs(struct pt_regs *regs)  	sp = regs->sp;  	printk("CPU %d ", cpu); -	print_modules();  	__show_regs(regs, 1); -	printk("Process %s (pid: %d, threadinfo %p, task %p)\n", -		cur->comm, cur->pid, task_thread_info(cur), cur); +	printk(KERN_DEFAULT "Process %s (pid: %d, threadinfo %p, task %p)\n", +	       cur->comm, cur->pid, task_thread_info(cur), cur);  	/*  	 * When in-kernel, we also print out the stack and code at the @@ -284,16 +283,16 @@ void show_regs(struct pt_regs *regs)  		for (i = 0; i < code_len; i++, ip++) {  			if (ip < (u8 *)PAGE_OFFSET ||  					probe_kernel_address(ip, c)) { -				printk(KERN_CONT " Bad RIP value."); +				pr_cont(" Bad RIP value.");  				break;  			}  			if (ip == (u8 *)regs->ip) -				printk(KERN_CONT "<%02x> ", c); +				pr_cont("<%02x> ", c);  			else -				printk(KERN_CONT "%02x ", c); +				pr_cont("%02x ", c);  		}  	} -	printk(KERN_CONT "\n"); +	pr_cont("\n");  }  int is_valid_bugaddr(unsigned long ip) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133b51b..111f6bbd8b3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1758,10 +1758,30 @@ end_repeat_nmi:  	 */  	call save_paranoid  	DEFAULT_FRAME 0 + +	/* +	 * Save off the CR2 register. If we take a page fault in the NMI then +	 * it could corrupt the CR2 value. If the NMI preempts a page fault +	 * handler before it was able to read the CR2 register, and then the +	 * NMI itself takes a page fault, the page fault that was preempted +	 * will read the information from the NMI page fault and not the +	 * origin fault. Save it off and restore it if it changes. +	 * Use the r12 callee-saved register. +	 */ +	movq %cr2, %r12 +  	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */  	movq %rsp,%rdi  	movq $-1,%rsi  	call do_nmi + +	/* Did the NMI take a page fault? Restore cr2 if it did */ +	movq %cr2, %rcx +	cmpq %rcx, %r12 +	je 1f +	movq %r12, %cr2 +1: +	  	testl %ebx,%ebx				/* swapgs needed? */  	jnz nmi_restore  nmi_swapgs: diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3dafc6003b7..1f5f1d5d2a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -294,9 +294,9 @@ void fixup_irqs(void)  		raw_spin_unlock(&desc->lock);  		if (break_affinity && set_affinity) -			printk("Broke affinity for irq %i\n", irq); +			pr_notice("Broke affinity for irq %i\n", irq);  		else if (!set_affinity) -			printk("Cannot set affinity for irq %i\n", irq); +			pr_notice("Cannot set affinity for irq %i\n", irq);  	}  	/* diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8bfb6146f75..3f61904365c 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags)  /**   *	kgdb_arch_handle_exception - Handle architecture specific GDB packets. - *	@vector: The error vector of the exception that happened. + *	@e_vector: The error vector of the exception that happened.   *	@signo: The signal number of the exception that happened.   *	@err_code: The error code of the exception that happened. - *	@remcom_in_buffer: The buffer of the packet we have read. - *	@remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. - *	@regs: The &struct pt_regs of the current process. + *	@remcomInBuffer: The buffer of the packet we have read. + *	@remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into. + *	@linux_regs: The &struct pt_regs of the current process.   *   *	This function MUST handle the 'c' and 's' command packets,   *	as well packets to set / remove a hardware breakpoint, if used. diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index fbdfc691718..4873e62db6a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -87,6 +87,7 @@  #include <asm/microcode.h>  #include <asm/processor.h>  #include <asm/cpu_device_id.h> +#include <asm/perf_event.h>  MODULE_DESCRIPTION("Microcode Update Driver");  MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); @@ -277,7 +278,6 @@ static int reload_for_cpu(int cpu)  	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;  	int err = 0; -	mutex_lock(µcode_mutex);  	if (uci->valid) {  		enum ucode_state ustate; @@ -288,7 +288,6 @@ static int reload_for_cpu(int cpu)  			if (ustate == UCODE_ERROR)  				err = -EINVAL;  	} -	mutex_unlock(µcode_mutex);  	return err;  } @@ -298,19 +297,31 @@ static ssize_t reload_store(struct device *dev,  			    const char *buf, size_t size)  {  	unsigned long val; -	int cpu = dev->id; -	ssize_t ret = 0; +	int cpu; +	ssize_t ret = 0, tmp_ret;  	ret = kstrtoul(buf, 0, &val);  	if (ret)  		return ret; -	if (val == 1) { -		get_online_cpus(); -		if (cpu_online(cpu)) -			ret = reload_for_cpu(cpu); -		put_online_cpus(); +	if (val != 1) +		return size; + +	get_online_cpus(); +	mutex_lock(µcode_mutex); +	for_each_online_cpu(cpu) { +		tmp_ret = reload_for_cpu(cpu); +		if (tmp_ret != 0) +			pr_warn("Error reloading microcode on CPU %d\n", cpu); + +		/* save retval of the first encountered reload error */ +		if (!ret) +			ret = tmp_ret;  	} +	if (!ret) +		perf_check_microcode(); +	mutex_unlock(µcode_mutex); +	put_online_cpus();  	if (!ret)  		ret = size; @@ -339,7 +350,6 @@ static DEVICE_ATTR(version, 0400, version_show, NULL);  static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);  static struct attribute *mc_default_attrs[] = { -	&dev_attr_reload.attr,  	&dev_attr_version.attr,  	&dev_attr_processor_flags.attr,  	NULL @@ -504,7 +514,7 @@ static struct notifier_block __refdata mc_cpu_notifier = {  #ifdef MODULE  /* Autoload on Intel and AMD systems */ -static const struct x86_cpu_id microcode_id[] = { +static const struct x86_cpu_id __initconst microcode_id[] = {  #ifdef CONFIG_MICROCODE_INTEL  	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },  #endif @@ -516,6 +526,16 @@ static const struct x86_cpu_id microcode_id[] = {  MODULE_DEVICE_TABLE(x86cpu, microcode_id);  #endif +static struct attribute *cpu_root_microcode_attrs[] = { +	&dev_attr_reload.attr, +	NULL +}; + +static struct attribute_group cpu_root_microcode_group = { +	.name  = "microcode", +	.attrs = cpu_root_microcode_attrs, +}; +  static int __init microcode_init(void)  {  	struct cpuinfo_x86 *c = &cpu_data(0); @@ -540,16 +560,25 @@ static int __init microcode_init(void)  	mutex_lock(µcode_mutex);  	error = subsys_interface_register(&mc_cpu_interface); - +	if (!error) +		perf_check_microcode();  	mutex_unlock(µcode_mutex);  	put_online_cpus();  	if (error)  		goto out_pdev; +	error = sysfs_create_group(&cpu_subsys.dev_root->kobj, +				   &cpu_root_microcode_group); + +	if (error) { +		pr_err("Error creating microcode group!\n"); +		goto out_driver; +	} +  	error = microcode_dev_init();  	if (error) -		goto out_driver; +		goto out_ucode_group;  	register_syscore_ops(&mc_syscore_ops);  	register_hotcpu_notifier(&mc_cpu_notifier); @@ -559,7 +588,11 @@ static int __init microcode_init(void)  	return 0; -out_driver: + out_ucode_group: +	sysfs_remove_group(&cpu_subsys.dev_root->kobj, +			   &cpu_root_microcode_group); + + out_driver:  	get_online_cpus();  	mutex_lock(µcode_mutex); @@ -568,7 +601,7 @@ out_driver:  	mutex_unlock(µcode_mutex);  	put_online_cpus(); -out_pdev: + out_pdev:  	platform_device_unregister(microcode_pdev);  	return error; @@ -584,6 +617,9 @@ static void __exit microcode_exit(void)  	unregister_hotcpu_notifier(&mc_cpu_notifier);  	unregister_syscore_ops(&mc_syscore_ops); +	sysfs_remove_group(&cpu_subsys.dev_root->kobj, +			   &cpu_root_microcode_group); +  	get_online_cpus();  	mutex_lock(µcode_mutex); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f21fd94ac89..202494d2ec6 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -15,6 +15,9 @@      along with this program; if not, write to the Free Software      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/moduleloader.h>  #include <linux/elf.h>  #include <linux/vmalloc.h> @@ -30,9 +33,14 @@  #include <asm/pgtable.h>  #if 0 -#define DEBUGP printk +#define DEBUGP(fmt, ...)				\ +	printk(KERN_DEBUG fmt, ##__VA_ARGS__)  #else -#define DEBUGP(fmt...) +#define DEBUGP(fmt, ...)				\ +do {							\ +	if (0)						\ +		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\ +} while (0)  #endif  void *module_alloc(unsigned long size) @@ -56,8 +64,8 @@ int apply_relocate(Elf32_Shdr *sechdrs,  	Elf32_Sym *sym;  	uint32_t *location; -	DEBUGP("Applying relocate section %u to %u\n", relsec, -	       sechdrs[relsec].sh_info); +	DEBUGP("Applying relocate section %u to %u\n", +	       relsec, sechdrs[relsec].sh_info);  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {  		/* This is where to make the change */  		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -77,7 +85,7 @@ int apply_relocate(Elf32_Shdr *sechdrs,  			*location += sym->st_value - (uint32_t)location;  			break;  		default: -			printk(KERN_ERR "module %s: Unknown relocation: %u\n", +			pr_err("%s: Unknown relocation: %u\n",  			       me->name, ELF32_R_TYPE(rel[i].r_info));  			return -ENOEXEC;  		} @@ -97,8 +105,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	void *loc;  	u64 val; -	DEBUGP("Applying relocate section %u to %u\n", relsec, -	       sechdrs[relsec].sh_info); +	DEBUGP("Applying relocate section %u to %u\n", +	       relsec, sechdrs[relsec].sh_info);  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {  		/* This is where to make the change */  		loc = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -110,8 +118,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			+ ELF64_R_SYM(rel[i].r_info);  		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", -			(int)ELF64_R_TYPE(rel[i].r_info), -			sym->st_value, rel[i].r_addend, (u64)loc); +		       (int)ELF64_R_TYPE(rel[i].r_info), +		       sym->st_value, rel[i].r_addend, (u64)loc);  		val = sym->st_value + rel[i].r_addend; @@ -140,7 +148,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  #endif  			break;  		default: -			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n", +			pr_err("%s: Unknown rela relocation: %llu\n",  			       me->name, ELF64_R_TYPE(rel[i].r_info));  			return -ENOEXEC;  		} @@ -148,9 +156,9 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	return 0;  overflow: -	printk(KERN_ERR "overflow in relocation type %d val %Lx\n", +	pr_err("overflow in relocation type %d val %Lx\n",  	       (int)ELF64_R_TYPE(rel[i].r_info), val); -	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", +	pr_err("`%s' likely not compiled with -mcmodel=kernel\n",  	       me->name);  	return -ENOEXEC;  } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457b..f84f5c57de3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)  #ifdef CONFIG_X86_32  /*   * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. + * add a workaround to the iret problem in C (preventing nested + * NMIs if an NMI takes a trap). Simply have 3 states the NMI + * can be in:   *   *  1) not running   *  2) executing @@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)   * If an NMI hits a breakpoint that executes an iret, another   * NMI can preempt it. We do not want to allow this new NMI   * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. + * We set the state to "latched", and the exit of the first NMI will + * perform a dec_return, if the result is zero (NOT_RUNNING), then + * it will simply exit the NMI handler. If not, the dec_return + * would have set the state to NMI_EXECUTING (what we want it to + * be when we are running). In this case, we simply jump back + * to rerun the NMI handler again, and restart the 'latched' NMI. + * + * No trap (breakpoint or page fault) should be hit before nmi_restart, + * thus there is no race between the first check of state for NOT_RUNNING + * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs + * at this point. + * + * In case the NMI takes a page fault, we need to save off the CR2 + * because the NMI could have preempted another page fault and corrupt + * the CR2 that is about to be read. As nested NMIs must be restarted + * and they can not take breakpoints or page faults, the update of the + * CR2 must be done before converting the nmi state back to NOT_RUNNING. + * Otherwise, there would be a race of another nested NMI coming in + * after setting state to NOT_RUNNING but before updating the nmi_cr2.   */  enum nmi_states { -	NMI_NOT_RUNNING, +	NMI_NOT_RUNNING = 0,  	NMI_EXECUTING,  	NMI_LATCHED,  };  static DEFINE_PER_CPU(enum nmi_states, nmi_state); +static DEFINE_PER_CPU(unsigned long, nmi_cr2);  #define nmi_nesting_preprocess(regs)					\  	do {								\ -		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\ -			__get_cpu_var(nmi_state) = NMI_LATCHED;		\ +		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\ +			this_cpu_write(nmi_state, NMI_LATCHED);		\  			return;						\  		}							\ -	nmi_restart:							\ -		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\ -	} while (0) +		this_cpu_write(nmi_state, NMI_EXECUTING);		\ +		this_cpu_write(nmi_cr2, read_cr2());			\ +	} while (0);							\ +	nmi_restart:  #define nmi_nesting_postprocess()					\  	do {								\ -		if (cmpxchg(&__get_cpu_var(nmi_state),			\ -		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\ +		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\ +			write_cr2(this_cpu_read(nmi_cr2));		\ +		if (this_cpu_dec_return(nmi_state))			\  			goto nmi_restart;				\  	} while (0)  #else /* x86_64 */ diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index 149b8d9c6ad..6d9582ec032 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,8 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)  static void __init init_nmi_testsuite(void)  {  	/* trap all the unknown NMIs we may generate */ -	register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); +	register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk", +			__initdata);  }  static void __init cleanup_nmi_testsuite(void) @@ -64,8 +65,8 @@ static void __init test_nmi_ipi(struct cpumask *mask)  {  	unsigned long timeout; -	if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, -				 NMI_FLAG_FIRST, "nmi_selftest")) { +	if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, +				 NMI_FLAG_FIRST, "nmi_selftest", __initdata)) {  		nmi_fail = FAILURE;  		return;  	} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9ce885996fd..17fff18a103 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -352,9 +352,7 @@ struct pv_cpu_ops pv_cpu_ops = {  #endif  	.wbinvd = native_wbinvd,  	.read_msr = native_read_msr_safe, -	.rdmsr_regs = native_rdmsr_safe_regs,  	.write_msr = native_write_msr_safe, -	.wrmsr_regs = native_wrmsr_safe_regs,  	.read_tsc = native_read_tsc,  	.read_pmc = native_read_pmc,  	.read_tscp = native_read_tscp, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index b72838bae64..299d49302e7 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -22,6 +22,8 @@   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   */ +#define pr_fmt(fmt) "Calgary: " fmt +  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/types.h> @@ -245,7 +247,7 @@ static unsigned long iommu_range_alloc(struct device *dev,  		offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,  					  npages, 0, boundary_size, 0);  		if (offset == ~0UL) { -			printk(KERN_WARNING "Calgary: IOMMU full.\n"); +			pr_warn("IOMMU full\n");  			spin_unlock_irqrestore(&tbl->it_lock, flags);  			if (panic_on_overflow)  				panic("Calgary: fix the allocator.\n"); @@ -271,8 +273,8 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,  	entry = iommu_range_alloc(dev, tbl, npages);  	if (unlikely(entry == DMA_ERROR_CODE)) { -		printk(KERN_WARNING "Calgary: failed to allocate %u pages in " -		       "iommu %p\n", npages, tbl); +		pr_warn("failed to allocate %u pages in iommu %p\n", +			npages, tbl);  		return DMA_ERROR_CODE;  	} @@ -561,8 +563,7 @@ static void calgary_tce_cache_blast(struct iommu_table *tbl)  		i++;  	} while ((val & 0xff) != 0xff && i < 100);  	if (i == 100) -		printk(KERN_WARNING "Calgary: PCI bus not quiesced, " -		       "continuing anyway\n"); +		pr_warn("PCI bus not quiesced, continuing anyway\n");  	/* invalidate TCE cache */  	target = calgary_reg(bbar, tar_offset(tbl->it_busno)); @@ -604,8 +605,7 @@ begin:  		i++;  	} while ((val64 & 0xff) != 0xff && i < 100);  	if (i == 100) -		printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, " -		       "continuing anyway\n"); +		pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");  	/* 3. poll Page Migration DEBUG for SoftStopFault */  	target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG); @@ -617,8 +617,7 @@ begin:  		if (++count < 100)  			goto begin;  		else { -			printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, " -			       "aborting TCE cache flush sequence!\n"); +			pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");  			return; /* pray for the best */  		}  	} @@ -840,8 +839,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl)  	plssr = be32_to_cpu(readl(target));  	/* If no error, the agent ID in the CSR is not valid */ -	printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, " -	       "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr); +	pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n", +		 tbl->it_busno, csr, plssr);  }  static void calioc2_dump_error_regs(struct iommu_table *tbl) @@ -867,22 +866,21 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)  	target = calgary_reg(bbar, phboff | 0x800);  	mck = be32_to_cpu(readl(target)); -	printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n", -	       tbl->it_busno); +	pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno); -	printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", -	       csr, plssr, csmr, mck); +	pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n", +		 csr, plssr, csmr, mck);  	/* dump rest of error regs */ -	printk(KERN_EMERG "Calgary: "); +	pr_emerg("");  	for (i = 0; i < ARRAY_SIZE(errregs); i++) {  		/* err regs are at 0x810 - 0x870 */  		erroff = (0x810 + (i * 0x10));  		target = calgary_reg(bbar, phboff | erroff);  		errregs[i] = be32_to_cpu(readl(target)); -		printk("0x%08x@0x%lx ", errregs[i], erroff); +		pr_cont("0x%08x@0x%lx ", errregs[i], erroff);  	} -	printk("\n"); +	pr_cont("\n");  	/* root complex status */  	target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e54e5..ef6a8456f71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/errno.h>  #include <linux/kernel.h>  #include <linux/mm.h> @@ -145,16 +147,14 @@ void show_regs_common(void)  	/* Board Name is optional */  	board = dmi_get_system_info(DMI_BOARD_NAME); -	printk(KERN_CONT "\n"); -	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", -		current->pid, current->comm, print_tainted(), -		init_utsname()->release, -		(int)strcspn(init_utsname()->version, " "), -		init_utsname()->version); -	printk(KERN_CONT " %s %s", vendor, product); -	if (board) -		printk(KERN_CONT "/%s", board); -	printk(KERN_CONT "\n"); +	printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s %s%s%s\n", +	       current->pid, current->comm, print_tainted(), +	       init_utsname()->release, +	       (int)strcspn(init_utsname()->version, " "), +	       init_utsname()->version, +	       vendor, product, +	       board ? "/" : "", +	       board ? board : "");  }  void flush_thread(void) @@ -645,7 +645,7 @@ static void amd_e400_idle(void)  			amd_e400_c1e_detected = true;  			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))  				mark_tsc_unstable("TSC halt in AMD C1E"); -			printk(KERN_INFO "System has AMD C1E enabled\n"); +			pr_info("System has AMD C1E enabled\n");  		}  	} @@ -659,8 +659,7 @@ static void amd_e400_idle(void)  			 */  			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,  					   &cpu); -			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", -			       cpu); +			pr_info("Switch to broadcast mode on CPU%d\n", cpu);  		}  		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); @@ -681,8 +680,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)  {  #ifdef CONFIG_SMP  	if (pm_idle == poll_idle && smp_num_siblings > 1) { -		printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," -			" performance may degrade.\n"); +		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");  	}  #endif  	if (pm_idle) @@ -692,11 +690,11 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)  		/*  		 * One CPU supports mwait => All CPUs supports mwait  		 */ -		printk(KERN_INFO "using mwait in idle threads.\n"); +		pr_info("using mwait in idle threads\n");  		pm_idle = mwait_idle;  	} else if (cpu_has_amd_erratum(amd_erratum_400)) {  		/* E400: APIC timer interrupt does not wake up CPU from C1e */ -		printk(KERN_INFO "using AMD E400 aware idle routine\n"); +		pr_info("using AMD E400 aware idle routine\n");  		pm_idle = amd_e400_idle;  	} else  		pm_idle = default_idle; @@ -715,7 +713,7 @@ static int __init idle_setup(char *str)  		return -EINVAL;  	if (!strcmp(str, "poll")) { -		printk("using polling idle threads.\n"); +		pr_info("using polling idle threads\n");  		pm_idle = poll_idle;  		boot_option_idle_override = IDLE_POLL;  	} else if (!strcmp(str, "mwait")) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 61cdf7fdf09..0a980c9d7cb 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,10 +117,10 @@ void release_thread(struct task_struct *dead_task)  {  	if (dead_task->mm) {  		if (dead_task->mm->context.size) { -			printk("WARNING: dead process %8s still has LDT? <%p/%d>\n", -					dead_task->comm, -					dead_task->mm->context.ldt, -					dead_task->mm->context.size); +			pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", +				dead_task->comm, +				dead_task->mm->context.ldt, +				dead_task->mm->context.size);  			BUG();  		}  	} @@ -466,7 +466,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)  			task->thread.gs = addr;  			if (doit) {  				load_gs_index(0); -				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); +				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);  			}  		}  		put_cpu(); @@ -494,7 +494,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)  				/* set the selector to 0 to not confuse  				   __switch_to */  				loadsegment(fs, 0); -				ret = checking_wrmsrl(MSR_FS_BASE, addr); +				ret = wrmsrl_safe(MSR_FS_BASE, addr);  			}  		}  		put_cpu(); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 25b48edb847..52190a938b4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h>  #include <linux/reboot.h>  #include <linux/init.h> @@ -20,14 +22,12 @@  #include <asm/virtext.h>  #include <asm/cpu.h>  #include <asm/nmi.h> +#include <asm/smp.h> -#ifdef CONFIG_X86_32 -# include <linux/ctype.h> -# include <linux/mc146818rtc.h> -# include <asm/realmode.h> -#else -# include <asm/x86_init.h> -#endif +#include <linux/ctype.h> +#include <linux/mc146818rtc.h> +#include <asm/realmode.h> +#include <asm/x86_init.h>  /*   * Power off function, if any @@ -49,7 +49,7 @@ int reboot_force;   */  static int reboot_default = 1; -#if defined(CONFIG_X86_32) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP  static int reboot_cpu = -1;  #endif @@ -67,8 +67,8 @@ bool port_cf9_safe = false;   * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]   * warm   Don't set the cold reboot flag   * cold   Set the cold reboot flag - * bios   Reboot by jumping through the BIOS (only for X86_32) - * smp    Reboot by executing reset on BSP or other CPU (only for X86_32) + * bios   Reboot by jumping through the BIOS + * smp    Reboot by executing reset on BSP or other CPU   * triple Force a triple fault (init)   * kbd    Use the keyboard controller. cold reset (default)   * acpi   Use the RESET_REG in the FADT @@ -95,7 +95,6 @@ static int __init reboot_setup(char *str)  			reboot_mode = 0;  			break; -#ifdef CONFIG_X86_32  #ifdef CONFIG_SMP  		case 's':  			if (isdigit(*(str+1))) { @@ -112,7 +111,6 @@ static int __init reboot_setup(char *str)  #endif /* CONFIG_SMP */  		case 'b': -#endif  		case 'a':  		case 'k':  		case 't': @@ -138,7 +136,6 @@ static int __init reboot_setup(char *str)  __setup("reboot=", reboot_setup); -#ifdef CONFIG_X86_32  /*   * Reboot options and system auto-detection code provided by   * Dell Inc. so their systems "just work". :-) @@ -152,16 +149,14 @@ static int __init set_bios_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_BIOS) {  		reboot_type = BOOT_BIOS; -		printk(KERN_INFO "%s series board detected. Selecting BIOS-method for reboots.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboots.\n", +			"BIOS", d->ident);  	}  	return 0;  } -void machine_real_restart(unsigned int type) +void __noreturn machine_real_restart(unsigned int type)  { -	void (*restart_lowmem)(unsigned int) = (void (*)(unsigned int)) -		real_mode_header->machine_real_restart_asm; -  	local_irq_disable();  	/* @@ -181,25 +176,28 @@ void machine_real_restart(unsigned int type)  	/*  	 * Switch back to the initial page table.  	 */ +#ifdef CONFIG_X86_32  	load_cr3(initial_page_table); - -	/* -	 * Write 0x1234 to absolute memory location 0x472.  The BIOS reads -	 * this on booting to tell it to "Bypass memory test (also warm -	 * boot)".  This seems like a fairly standard thing that gets set by -	 * REBOOT.COM programs, and the previous reset routine did this -	 * too. */ -	*((unsigned short *)0x472) = reboot_mode; +#else +	write_cr3(real_mode_header->trampoline_pgd); +#endif  	/* Jump to the identity-mapped low memory code */ -	restart_lowmem(type); +#ifdef CONFIG_X86_32 +	asm volatile("jmpl *%0" : : +		     "rm" (real_mode_header->machine_real_restart_asm), +		     "a" (type)); +#else +	asm volatile("ljmpl *%0" : : +		     "m" (real_mode_header->machine_real_restart_asm), +		     "D" (type)); +#endif +	unreachable();  }  #ifdef CONFIG_APM_MODULE  EXPORT_SYMBOL(machine_real_restart);  #endif -#endif /* CONFIG_X86_32 */ -  /*   * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot   */ @@ -207,8 +205,8 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_CF9) {  		reboot_type = BOOT_CF9; -		printk(KERN_INFO "%s series board detected. " -		       "Selecting PCI-method for reboots.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboots.\n", +			"PCI", d->ident);  	}  	return 0;  } @@ -217,17 +215,16 @@ static int __init set_kbd_reboot(const struct dmi_system_id *d)  {  	if (reboot_type != BOOT_KBD) {  		reboot_type = BOOT_KBD; -		printk(KERN_INFO "%s series board detected. Selecting KBD-method for reboot.\n", d->ident); +		pr_info("%s series board detected. Selecting %s-method for reboot.\n", +			"KBD", d->ident);  	}  	return 0;  }  /* - * This is a single dmi_table handling all reboot quirks.  Note that - * REBOOT_BIOS is only available for 32bit + * This is a single dmi_table handling all reboot quirks.   */  static struct dmi_system_id __initdata reboot_dmi_table[] = { -#ifdef CONFIG_X86_32  	{	/* Handle problems with rebooting on Dell E520's */  		.callback = set_bios_reboot,  		.ident = "Dell E520", @@ -377,7 +374,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {  			DMI_MATCH(DMI_BOARD_NAME, "P4S800"),  		},  	}, -#endif /* CONFIG_X86_32 */  	{	/* Handle reboot issue on Acer Aspire one */  		.callback = set_kbd_reboot, @@ -451,6 +447,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {  			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),  		},  	}, +	{	/* Handle problems with rebooting on the Precision M6600. */ +		.callback = set_pci_reboot, +		.ident = "Dell OptiPlex 990", +		.matches = { +			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), +			DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), +		}, +	},  	{ }  }; @@ -576,13 +580,11 @@ static void native_machine_emergency_restart(void)  			reboot_type = BOOT_KBD;  			break; -#ifdef CONFIG_X86_32  		case BOOT_BIOS:  			machine_real_restart(MRR_BIOS);  			reboot_type = BOOT_KBD;  			break; -#endif  		case BOOT_ACPI:  			acpi_reboot(); @@ -624,12 +626,10 @@ void native_machine_shutdown(void)  	/* The boot cpu is always logical cpu 0 */  	int reboot_cpu_id = 0; -#ifdef CONFIG_X86_32  	/* See if there has been given a command line override */  	if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&  		cpu_online(reboot_cpu))  		reboot_cpu_id = reboot_cpu; -#endif  	/* Make certain the cpu I'm about to reboot on is online */  	if (!cpu_online(reboot_cpu_id)) @@ -670,7 +670,7 @@ static void __machine_emergency_restart(int emergency)  static void native_machine_restart(char *__unused)  { -	printk("machine restart\n"); +	pr_notice("machine restart\n");  	if (!reboot_force)  		machine_shutdown(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 16be6dc14db..f4b9b80e1b9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1031,8 +1031,6 @@ void __init setup_arch(char **cmdline_p)  	x86_init.timers.wallclock_init(); -	x86_platform.wallclock_init(); -  	mcheck_init();  	arch_init_ideal_nops(); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 21af737053a..b280908a376 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,6 +6,9 @@   *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes   *  2000-2002   x86-64 support by Andi Kleen   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/sched.h>  #include <linux/mm.h>  #include <linux/smp.h> @@ -814,7 +817,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)  		       me->comm, me->pid, where, frame,  		       regs->ip, regs->sp, regs->orig_ax);  		print_vma_addr(" in ", regs->ip); -		printk(KERN_CONT "\n"); +		pr_cont("\n");  	}  	force_sig(SIGSEGV, me); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7bd8a082365..c1a310fb830 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1,4 +1,4 @@ -/* + /*   *	x86 SMP booting functions   *   *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> @@ -39,6 +39,8 @@   *	Glauber Costa		:	i386 and x86_64 integration   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/init.h>  #include <linux/smp.h>  #include <linux/module.h> @@ -184,7 +186,7 @@ static void __cpuinit smp_callin(void)  	 * boards)  	 */ -	pr_debug("CALLIN, before setup_local_APIC().\n"); +	pr_debug("CALLIN, before setup_local_APIC()\n");  	if (apic->smp_callin_clear_local_apic)  		apic->smp_callin_clear_local_apic();  	setup_local_APIC(); @@ -255,22 +257,13 @@ notrace static void __cpuinit start_secondary(void *unused)  	check_tsc_sync_target();  	/* -	 * We need to hold call_lock, so there is no inconsistency -	 * between the time smp_call_function() determines number of -	 * IPI recipients, and the time when the determination is made -	 * for which cpus receive the IPI. Holding this -	 * lock helps us to not include this cpu in a currently in progress -	 * smp_call_function(). -	 *  	 * We need to hold vector_lock so there the set of online cpus  	 * does not change while we are assigning vectors to cpus.  Holding  	 * this lock ensures we don't half assign or remove an irq from a cpu.  	 */ -	ipi_call_lock();  	lock_vector_lock();  	set_cpu_online(smp_processor_id(), true);  	unlock_vector_lock(); -	ipi_call_unlock();  	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;  	x86_platform.nmi_init(); @@ -432,17 +425,16 @@ static void impress_friends(void)  	/*  	 * Allow the user to impress friends.  	 */ -	pr_debug("Before bogomips.\n"); +	pr_debug("Before bogomips\n");  	for_each_possible_cpu(cpu)  		if (cpumask_test_cpu(cpu, cpu_callout_mask))  			bogosum += cpu_data(cpu).loops_per_jiffy; -	printk(KERN_INFO -		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n", +	pr_info("Total of %d processors activated (%lu.%02lu BogoMIPS)\n",  		num_online_cpus(),  		bogosum/(500000/HZ),  		(bogosum/(5000/HZ))%100); -	pr_debug("Before bogocount - setting activated=1.\n"); +	pr_debug("Before bogocount - setting activated=1\n");  }  void __inquire_remote_apic(int apicid) @@ -452,18 +444,17 @@ void __inquire_remote_apic(int apicid)  	int timeout;  	u32 status; -	printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); +	pr_info("Inquiring remote APIC 0x%x...\n", apicid);  	for (i = 0; i < ARRAY_SIZE(regs); i++) { -		printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); +		pr_info("... APIC 0x%x %s: ", apicid, names[i]);  		/*  		 * Wait for idle.  		 */  		status = safe_apic_wait_icr_idle();  		if (status) -			printk(KERN_CONT -			       "a previous APIC delivery may have failed\n"); +			pr_cont("a previous APIC delivery may have failed\n");  		apic_icr_write(APIC_DM_REMRD | regs[i], apicid); @@ -476,10 +467,10 @@ void __inquire_remote_apic(int apicid)  		switch (status) {  		case APIC_ICR_RR_VALID:  			status = apic_read(APIC_RRR); -			printk(KERN_CONT "%08x\n", status); +			pr_cont("%08x\n", status);  			break;  		default: -			printk(KERN_CONT "failed\n"); +			pr_cont("failed\n");  		}  	}  } @@ -513,12 +504,12 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)  			apic_write(APIC_ESR, 0);  		accept_status = (apic_read(APIC_ESR) & 0xEF);  	} -	pr_debug("NMI sent.\n"); +	pr_debug("NMI sent\n");  	if (send_status) -		printk(KERN_ERR "APIC never delivered???\n"); +		pr_err("APIC never delivered???\n");  	if (accept_status) -		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); +		pr_err("APIC delivery error (%lx)\n", accept_status);  	return (send_status | accept_status);  } @@ -540,7 +531,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		apic_read(APIC_ESR);  	} -	pr_debug("Asserting INIT.\n"); +	pr_debug("Asserting INIT\n");  	/*  	 * Turn INIT on target chip @@ -556,7 +547,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  	mdelay(10); -	pr_debug("Deasserting INIT.\n"); +	pr_debug("Deasserting INIT\n");  	/* Target chip */  	/* Send IPI */ @@ -589,14 +580,14 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  	/*  	 * Run STARTUP IPI loop.  	 */ -	pr_debug("#startup loops: %d.\n", num_starts); +	pr_debug("#startup loops: %d\n", num_starts);  	for (j = 1; j <= num_starts; j++) { -		pr_debug("Sending STARTUP #%d.\n", j); +		pr_debug("Sending STARTUP #%d\n", j);  		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */  			apic_write(APIC_ESR, 0);  		apic_read(APIC_ESR); -		pr_debug("After apic_write.\n"); +		pr_debug("After apic_write\n");  		/*  		 * STARTUP IPI @@ -613,7 +604,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		 */  		udelay(300); -		pr_debug("Startup point 1.\n"); +		pr_debug("Startup point 1\n");  		pr_debug("Waiting for send to finish...\n");  		send_status = safe_apic_wait_icr_idle(); @@ -628,12 +619,12 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)  		if (send_status || accept_status)  			break;  	} -	pr_debug("After Startup.\n"); +	pr_debug("After Startup\n");  	if (send_status) -		printk(KERN_ERR "APIC never delivered???\n"); +		pr_err("APIC never delivered???\n");  	if (accept_status) -		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); +		pr_err("APIC delivery error (%lx)\n", accept_status);  	return (send_status | accept_status);  } @@ -647,11 +638,11 @@ static void __cpuinit announce_cpu(int cpu, int apicid)  	if (system_state == SYSTEM_BOOTING) {  		if (node != current_node) {  			if (current_node > (-1)) -				pr_cont(" Ok.\n"); +				pr_cont(" OK\n");  			current_node = node;  			pr_info("Booting Node %3d, Processors ", node);  		} -		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); +		pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");  		return;  	} else  		pr_info("Booting Node %d Processor %d APIC 0x%x\n", @@ -731,9 +722,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)  		/*  		 * allow APs to start initializing.  		 */ -		pr_debug("Before Callout %d.\n", cpu); +		pr_debug("Before Callout %d\n", cpu);  		cpumask_set_cpu(cpu, cpu_callout_mask); -		pr_debug("After Callout %d.\n", cpu); +		pr_debug("After Callout %d\n", cpu);  		/*  		 * Wait 5s total for a response @@ -761,7 +752,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)  				pr_err("CPU%d: Stuck ??\n", cpu);  			else  				/* trampoline code not run */ -				pr_err("CPU%d: Not responding.\n", cpu); +				pr_err("CPU%d: Not responding\n", cpu);  			if (apic->inquire_remote_apic)  				apic->inquire_remote_apic(apicid);  		} @@ -806,7 +797,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)  	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||  	    !physid_isset(apicid, phys_cpu_present_map) ||  	    !apic->apic_id_valid(apicid)) { -		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu); +		pr_err("%s: bad cpu %d\n", __func__, cpu);  		return -EINVAL;  	} @@ -887,9 +878,8 @@ static int __init smp_sanity_check(unsigned max_cpus)  		unsigned int cpu;  		unsigned nr; -		printk(KERN_WARNING -		       "More than 8 CPUs detected - skipping them.\n" -		       "Use CONFIG_X86_BIGSMP.\n"); +		pr_warn("More than 8 CPUs detected - skipping them\n" +			"Use CONFIG_X86_BIGSMP\n");  		nr = 0;  		for_each_present_cpu(cpu) { @@ -910,8 +900,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  #endif  	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { -		printk(KERN_WARNING -			"weird, boot CPU (#%d) not listed by the BIOS.\n", +		pr_warn("weird, boot CPU (#%d) not listed by the BIOS\n",  			hard_smp_processor_id());  		physid_set(hard_smp_processor_id(), phys_cpu_present_map); @@ -923,11 +912,10 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 */  	if (!smp_found_config && !acpi_lapic) {  		preempt_enable(); -		printk(KERN_NOTICE "SMP motherboard not detected.\n"); +		pr_notice("SMP motherboard not detected\n");  		disable_smp();  		if (APIC_init_uniprocessor()) -			printk(KERN_NOTICE "Local APIC not detected." -					   " Using dummy APIC emulation.\n"); +			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");  		return -1;  	} @@ -936,9 +924,8 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 * CPU too, but we do it for the sake of robustness anyway.  	 */  	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { -		printk(KERN_NOTICE -			"weird, boot CPU (#%d) not listed by the BIOS.\n", -			boot_cpu_physical_apicid); +		pr_notice("weird, boot CPU (#%d) not listed by the BIOS\n", +			  boot_cpu_physical_apicid);  		physid_set(hard_smp_processor_id(), phys_cpu_present_map);  	}  	preempt_enable(); @@ -951,8 +938,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  		if (!disable_apic) {  			pr_err("BIOS bug, local APIC #%d not detected!...\n",  				boot_cpu_physical_apicid); -			pr_err("... forcing use of dummy APIC emulation." -				"(tell your hw vendor)\n"); +			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");  		}  		smpboot_clear_io_apic();  		disable_ioapic_support(); @@ -965,7 +951,7 @@ static int __init smp_sanity_check(unsigned max_cpus)  	 * If SMP should be disabled, then really disable it!  	 */  	if (!max_cpus) { -		printk(KERN_INFO "SMP mode deactivated.\n"); +		pr_info("SMP mode deactivated\n");  		smpboot_clear_io_apic();  		connect_bsp_APIC(); @@ -1017,7 +1003,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)  	if (smp_sanity_check(max_cpus) < 0) { -		printk(KERN_INFO "SMP disabled\n"); +		pr_info("SMP disabled\n");  		disable_smp();  		goto out;  	} @@ -1055,7 +1041,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)  	 * Set up local APIC timer on boot CPU.  	 */ -	printk(KERN_INFO "CPU%d: ", 0); +	pr_info("CPU%d: ", 0);  	print_cpu_info(&cpu_data(0));  	x86_init.timers.setup_percpu_clockev(); @@ -1105,7 +1091,7 @@ void __init native_smp_prepare_boot_cpu(void)  void __init native_smp_cpus_done(unsigned int max_cpus)  { -	pr_debug("Boot done.\n"); +	pr_debug("Boot done\n");  	nmi_selftest();  	impress_friends(); @@ -1166,8 +1152,7 @@ __init void prefill_possible_map(void)  	/* nr_cpu_ids could be reduced via nr_cpus= */  	if (possible > nr_cpu_ids) { -		printk(KERN_WARNING -			"%d Processors exceeds NR_CPUS limit of %d\n", +		pr_warn("%d Processors exceeds NR_CPUS limit of %d\n",  			possible, nr_cpu_ids);  		possible = nr_cpu_ids;  	} @@ -1176,13 +1161,12 @@ __init void prefill_possible_map(void)  	if (!setup_max_cpus)  #endif  	if (possible > i) { -		printk(KERN_WARNING -			"%d Processors exceeds max_cpus limit of %u\n", +		pr_warn("%d Processors exceeds max_cpus limit of %u\n",  			possible, setup_max_cpus);  		possible = i;  	} -	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", +	pr_info("Allowing %d CPUs, %d hotplug CPUs\n",  		possible, max_t(int, possible - num_processors, 0));  	for (i = 0; i < possible; i++) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05b31d92f69..b481341c936 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -9,6 +9,9 @@  /*   * Handle hardware traps and faults.   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/interrupt.h>  #include <linux/kallsyms.h>  #include <linux/spinlock.h> @@ -143,12 +146,11 @@ trap_signal:  #ifdef CONFIG_X86_64  	if (show_unhandled_signals && unhandled_signal(tsk, signr) &&  	    printk_ratelimit()) { -		printk(KERN_INFO -		       "%s[%d] trap %s ip:%lx sp:%lx error:%lx", -		       tsk->comm, tsk->pid, str, -		       regs->ip, regs->sp, error_code); +		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", +			tsk->comm, tsk->pid, str, +			regs->ip, regs->sp, error_code);  		print_vma_addr(" in ", regs->ip); -		printk("\n"); +		pr_cont("\n");  	}  #endif @@ -269,12 +271,11 @@ do_general_protection(struct pt_regs *regs, long error_code)  	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&  			printk_ratelimit()) { -		printk(KERN_INFO -			"%s[%d] general protection ip:%lx sp:%lx error:%lx", +		pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",  			tsk->comm, task_pid_nr(tsk),  			regs->ip, regs->sp, error_code);  		print_vma_addr(" in ", regs->ip); -		printk("\n"); +		pr_cont("\n");  	}  	force_sig(SIGSEGV, tsk); @@ -570,7 +571,7 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)  	conditional_sti(regs);  #if 0  	/* No need to warn about this any longer. */ -	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); +	pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");  #endif  } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fc0a147e372..cfa5d4f7ca5 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/kernel.h>  #include <linux/sched.h>  #include <linux/init.h> @@ -84,8 +86,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);  #ifdef CONFIG_X86_TSC  int __init notsc_setup(char *str)  { -	printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " -			"cannot disable TSC completely.\n"); +	pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");  	tsc_disabled = 1;  	return 1;  } @@ -373,7 +374,7 @@ static unsigned long quick_pit_calibrate(void)  			goto success;  		}  	} -	printk("Fast TSC calibration failed\n"); +	pr_err("Fast TSC calibration failed\n");  	return 0;  success: @@ -392,7 +393,7 @@ success:  	 */  	delta *= PIT_TICK_RATE;  	do_div(delta, i*256*1000); -	printk("Fast TSC calibration using PIT\n"); +	pr_info("Fast TSC calibration using PIT\n");  	return delta;  } @@ -487,9 +488,8 @@ unsigned long native_calibrate_tsc(void)  		 * use the reference value, as it is more precise.  		 */  		if (delta >= 90 && delta <= 110) { -			printk(KERN_INFO -			       "TSC: PIT calibration matches %s. %d loops\n", -			       hpet ? "HPET" : "PMTIMER", i + 1); +			pr_info("PIT calibration matches %s. %d loops\n", +				hpet ? "HPET" : "PMTIMER", i + 1);  			return tsc_ref_min;  		} @@ -511,38 +511,36 @@ unsigned long native_calibrate_tsc(void)  	 */  	if (tsc_pit_min == ULONG_MAX) {  		/* PIT gave no useful value */ -		printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); +		pr_warn("Unable to calibrate against PIT\n");  		/* We don't have an alternative source, disable TSC */  		if (!hpet && !ref1 && !ref2) { -			printk("TSC: No reference (HPET/PMTIMER) available\n"); +			pr_notice("No reference (HPET/PMTIMER) available\n");  			return 0;  		}  		/* The alternative source failed as well, disable TSC */  		if (tsc_ref_min == ULONG_MAX) { -			printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " -			       "failed.\n"); +			pr_warn("HPET/PMTIMER calibration failed\n");  			return 0;  		}  		/* Use the alternative source */ -		printk(KERN_INFO "TSC: using %s reference calibration\n", -		       hpet ? "HPET" : "PMTIMER"); +		pr_info("using %s reference calibration\n", +			hpet ? "HPET" : "PMTIMER");  		return tsc_ref_min;  	}  	/* We don't have an alternative source, use the PIT calibration value */  	if (!hpet && !ref1 && !ref2) { -		printk(KERN_INFO "TSC: Using PIT calibration value\n"); +		pr_info("Using PIT calibration value\n");  		return tsc_pit_min;  	}  	/* The alternative source failed, use the PIT calibration value */  	if (tsc_ref_min == ULONG_MAX) { -		printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " -		       "Using PIT calibration\n"); +		pr_warn("HPET/PMTIMER calibration failed. Using PIT calibration.\n");  		return tsc_pit_min;  	} @@ -551,9 +549,9 @@ unsigned long native_calibrate_tsc(void)  	 * the PIT value as we know that there are PMTIMERs around  	 * running at double speed. At least we let the user know:  	 */ -	printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", -	       hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); -	printk(KERN_INFO "TSC: Using PIT calibration value\n"); +	pr_warn("PIT calibration deviates from %s: %lu %lu\n", +		hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); +	pr_info("Using PIT calibration value\n");  	return tsc_pit_min;  } @@ -785,7 +783,7 @@ void mark_tsc_unstable(char *reason)  		tsc_unstable = 1;  		sched_clock_stable = 0;  		disable_sched_clock_irqtime(); -		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); +		pr_info("Marking TSC unstable due to %s\n", reason);  		/* Change only the rating, when not registered */  		if (clocksource_tsc.mult)  			clocksource_mark_unstable(&clocksource_tsc); @@ -912,9 +910,9 @@ static void tsc_refine_calibration_work(struct work_struct *work)  		goto out;  	tsc_khz = freq; -	printk(KERN_INFO "Refined TSC clocksource calibration: " -		"%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000, -					(unsigned long)tsc_khz % 1000); +	pr_info("Refined TSC clocksource calibration: %lu.%03lu MHz\n", +		(unsigned long)tsc_khz / 1000, +		(unsigned long)tsc_khz % 1000);  out:  	clocksource_register_khz(&clocksource_tsc, tsc_khz); @@ -970,9 +968,9 @@ void __init tsc_init(void)  		return;  	} -	printk("Detected %lu.%03lu MHz processor.\n", -			(unsigned long)cpu_khz / 1000, -			(unsigned long)cpu_khz % 1000); +	pr_info("Detected %lu.%03lu MHz processor\n", +		(unsigned long)cpu_khz / 1000, +		(unsigned long)cpu_khz % 1000);  	/*  	 * Secondary CPUs do not run through tsc_init(), so set up diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index dc4e910a7d9..36fd42091fa 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm,   * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.   * @mm: the probed address space.   * @arch_uprobe: the probepoint information. + * @addr: virtual address at which to install the probepoint   * Return 0 on success or a -ve number on error.   */ -int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)  {  	int ret;  	struct insn insn; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 255f58ae71e..54abcc0baf2 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -28,6 +28,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/capability.h>  #include <linux/errno.h>  #include <linux/interrupt.h> @@ -137,14 +139,14 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)  	local_irq_enable();  	if (!current->thread.vm86_info) { -		printk("no vm86_info: BAD\n"); +		pr_alert("no vm86_info: BAD\n");  		do_exit(SIGSEGV);  	}  	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);  	tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs, regs);  	tmp += put_user(current->thread.screen_bitmap, ¤t->thread.vm86_info->screen_bitmap);  	if (tmp) { -		printk("vm86: could not access userspace vm86_info\n"); +		pr_alert("could not access userspace vm86_info\n");  		do_exit(SIGSEGV);  	} diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 8eeb55a551b..992f890283e 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -16,6 +16,7 @@  #include <linux/pci_ids.h>  #include <linux/pci_regs.h>  #include <linux/smp.h> +#include <linux/irq.h>  #include <asm/apic.h>  #include <asm/pci-direct.h> @@ -95,6 +96,18 @@ static void __init set_vsmp_pv_ops(void)  	ctl = readl(address + 4);  	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",  	       cap, ctl); + +	/* If possible, let the vSMP foundation route the interrupt optimally */ +#ifdef CONFIG_SMP +	if (cap & ctl & BIT(8)) { +		ctl &= ~BIT(8); +#ifdef CONFIG_PROC_FS +		/* Don't let users change irq affinity via procfs */ +		no_irq_affinity = 1; +#endif +	} +#endif +  	if (cap & ctl & (1 << 4)) {  		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */  		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable); @@ -102,12 +115,11 @@ static void __init set_vsmp_pv_ops(void)  		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);  		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);  		pv_init_ops.patch = vsmp_patch; -  		ctl &= ~(1 << 4); -		writel(ctl, address + 4); -		ctl = readl(address + 4); -		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);  	} +	writel(ctl, address + 4); +	ctl = readl(address + 4); +	pr_info("vSMP CTL: control set to:0x%08x\n", ctl);  	early_iounmap(address, 8);  } @@ -187,12 +199,36 @@ static void __init vsmp_cap_cpus(void)  #endif  } +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ +	return hard_smp_processor_id() >> index_msb; +} + +/* + * In vSMP, all cpus should be capable of handling interrupts, regardless of + * the APIC used. + */ +static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask, +					  const struct cpumask *mask) +{ +	cpumask_setall(retmask); +} + +static void vsmp_apic_post_init(void) +{ +	/* need to update phys_pkg_id */ +	apic->phys_pkg_id = apicid_phys_pkg_id; +	apic->vector_allocation_domain = fill_vector_allocation_domain; +} +  void __init vsmp_init(void)  {  	detect_vsmp_box();  	if (!is_vsmp_box())  		return; +	x86_platform.apic_post_init = vsmp_apic_post_init; +  	vsmp_cap_cpus();  	set_vsmp_pv_ops(); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 7515cf0e180..8d141b30904 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,6 +18,8 @@   *  use the vDSO.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/time.h>  #include <linux/init.h>  #include <linux/kernel.h> @@ -111,18 +113,13 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,  static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,  			      const char *message)  { -	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); -	struct task_struct *tsk; - -	if (!show_unhandled_signals || !__ratelimit(&rs)) +	if (!show_unhandled_signals)  		return; -	tsk = current; - -	printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", -	       level, tsk->comm, task_pid_nr(tsk), -	       message, regs->ip, regs->cs, -	       regs->sp, regs->ax, regs->si, regs->di); +	pr_notice_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n", +			      level, current->comm, task_pid_nr(current), +			      message, regs->ip, regs->cs, +			      regs->sp, regs->ax, regs->si, regs->di);  }  static int addr_to_vsyscall_nr(unsigned long addr) @@ -139,6 +136,19 @@ static int addr_to_vsyscall_nr(unsigned long addr)  	return nr;  } +#ifdef CONFIG_SECCOMP +static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr) +{ +	if (!seccomp_mode(&tsk->seccomp)) +		return 0; +	task_pt_regs(tsk)->orig_ax = syscall_nr; +	task_pt_regs(tsk)->ax = syscall_nr; +	return __secure_computing(syscall_nr); +} +#else +#define vsyscall_seccomp(_tsk, _nr) 0 +#endif +  static bool write_ok_or_segv(unsigned long ptr, size_t size)  {  	/* @@ -174,6 +184,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	int vsyscall_nr;  	int prev_sig_on_uaccess_error;  	long ret; +	int skip;  	/*  	 * No point in checking CS -- the only way to get here is a user mode @@ -205,9 +216,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	}  	tsk = current; -	if (seccomp_mode(&tsk->seccomp)) -		do_exit(SIGKILL); -  	/*  	 * With a real vsyscall, page faults cause SIGSEGV.  We want to  	 * preserve that behavior to make writing exploits harder. @@ -222,8 +230,13 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	 * address 0".  	 */  	ret = -EFAULT; +	skip = 0;  	switch (vsyscall_nr) {  	case 0: +		skip = vsyscall_seccomp(tsk, __NR_gettimeofday); +		if (skip) +			break; +  		if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||  		    !write_ok_or_segv(regs->si, sizeof(struct timezone)))  			break; @@ -234,6 +247,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  		break;  	case 1: +		skip = vsyscall_seccomp(tsk, __NR_time); +		if (skip) +			break; +  		if (!write_ok_or_segv(regs->di, sizeof(time_t)))  			break; @@ -241,6 +258,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  		break;  	case 2: +		skip = vsyscall_seccomp(tsk, __NR_getcpu); +		if (skip) +			break; +  		if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||  		    !write_ok_or_segv(regs->si, sizeof(unsigned)))  			break; @@ -253,6 +274,12 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; +	if (skip) { +		if ((long)regs->ax <= 0L) /* seccomp errno emulation */ +			goto do_ret; +		goto done; /* seccomp trace/trap */ +	} +  	if (ret == -EFAULT) {  		/* Bad news -- userspace fed a bad pointer to a vsyscall. */  		warn_bad_vsyscall(KERN_INFO, regs, @@ -271,10 +298,11 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)  	regs->ax = ret; +do_ret:  	/* Emulate a ret instruction. */  	regs->ip = caller;  	regs->sp += 8; - +done:  	return true;  sigsegv: diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 9796c2f3d07..6020f6f5927 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -28,6 +28,7 @@ EXPORT_SYMBOL(__put_user_8);  EXPORT_SYMBOL(copy_user_generic_string);  EXPORT_SYMBOL(copy_user_generic_unrolled); +EXPORT_SYMBOL(copy_user_enhanced_fast_string);  EXPORT_SYMBOL(__copy_user_nocache);  EXPORT_SYMBOL(_copy_from_user);  EXPORT_SYMBOL(_copy_to_user); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 35c5e543f55..9f3167e891e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -29,7 +29,6 @@ void __init x86_init_uint_noop(unsigned int unused) { }  void __init x86_init_pgd_noop(pgd_t *unused) { }  int __init iommu_init_noop(void) { return 0; }  void iommu_shutdown_noop(void) { } -void wallclock_init_noop(void) { }  /*   * The platform setup functions are preset with the default functions @@ -101,7 +100,6 @@ static int default_i8042_detect(void) { return 1; };  struct x86_platform_ops x86_platform = {  	.calibrate_tsc			= native_calibrate_tsc, -	.wallclock_init			= wallclock_init_noop,  	.get_wallclock			= mach_get_cmos_time,  	.set_wallclock			= mach_set_rtc_mmss,  	.iommu_shutdown			= iommu_shutdown_noop, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index bd18149b2b0..3d3e2070911 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -3,6 +3,9 @@   *   * Author: Suresh Siddha <suresh.b.siddha@intel.com>   */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/bootmem.h>  #include <linux/compat.h>  #include <asm/i387.h> @@ -162,7 +165,7 @@ int save_i387_xstate(void __user *buf)  	BUG_ON(sig_xstate_size < xstate_size);  	if ((unsigned long)buf % 64) -		printk("save_i387_xstate: bad fpstate %p\n", buf); +		pr_err("%s: bad fpstate %p\n", __func__, buf);  	if (!used_math())  		return 0; @@ -422,7 +425,7 @@ static void __init xstate_enable_boot_cpu(void)  	pcntxt_mask = eax + ((u64)edx << 32);  	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { -		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n", +		pr_err("FP/SSE not shown under xsave features 0x%llx\n",  		       pcntxt_mask);  		BUG();  	} @@ -445,9 +448,8 @@ static void __init xstate_enable_boot_cpu(void)  	setup_xstate_init(); -	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, " -	       "cntxt size 0x%x\n", -	       pcntxt_mask, xstate_size); +	pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", +		pcntxt_mask, xstate_size);  }  /*  |