diff options
Diffstat (limited to 'arch/sparc/kernel')
| -rw-r--r-- | arch/sparc/kernel/head_64.S | 14 | ||||
| -rw-r--r-- | arch/sparc/kernel/hvapi.c | 1 | ||||
| -rw-r--r-- | arch/sparc/kernel/hvcalls.S | 16 | ||||
| -rw-r--r-- | arch/sparc/kernel/ktlb.S | 25 | ||||
| -rw-r--r-- | arch/sparc/kernel/leon_pci.c | 9 | ||||
| -rw-r--r-- | arch/sparc/kernel/mdesc.c | 24 | ||||
| -rw-r--r-- | arch/sparc/kernel/nmi.c | 21 | ||||
| -rw-r--r-- | arch/sparc/kernel/pci.c | 4 | ||||
| -rw-r--r-- | arch/sparc/kernel/pci_sun4v.c | 2 | ||||
| -rw-r--r-- | arch/sparc/kernel/pcr.c | 172 | ||||
| -rw-r--r-- | arch/sparc/kernel/perf_event.c | 516 | ||||
| -rw-r--r-- | arch/sparc/kernel/setup_64.c | 67 | 
12 files changed, 690 insertions, 181 deletions
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index b42ddbf9651..ee5dcced249 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -559,10 +559,10 @@ niagara_tlb_fixup:  	be,pt	%xcc, niagara2_patch  	 nop  	cmp	%g1, SUN4V_CHIP_NIAGARA4 -	be,pt	%xcc, niagara2_patch +	be,pt	%xcc, niagara4_patch  	 nop  	cmp	%g1, SUN4V_CHIP_NIAGARA5 -	be,pt	%xcc, niagara2_patch +	be,pt	%xcc, niagara4_patch  	 nop  	call	generic_patch_copyops @@ -573,6 +573,16 @@ niagara_tlb_fixup:  	 nop  	ba,a,pt	%xcc, 80f +niagara4_patch: +	call	niagara4_patch_copyops +	 nop +	call	niagara_patch_bzero +	 nop +	call	niagara4_patch_pageops +	 nop + +	ba,a,pt	%xcc, 80f +  niagara2_patch:  	call	niagara2_patch_copyops  	 nop diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 8593672838f..1032df43ec9 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -45,6 +45,7 @@ static struct api_info api_table[] = {  	{ .group = HV_GRP_NIU,					},  	{ .group = HV_GRP_VF_CPU,				},  	{ .group = HV_GRP_KT_CPU,				}, +	{ .group = HV_GRP_VT_CPU,				},  	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},  }; diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S index 58d60de4d65..f3ab509b76a 100644 --- a/arch/sparc/kernel/hvcalls.S +++ b/arch/sparc/kernel/hvcalls.S @@ -805,3 +805,19 @@ ENTRY(sun4v_reboot_data_set)  	retl  	 nop  ENDPROC(sun4v_reboot_data_set) + +ENTRY(sun4v_vt_get_perfreg) +	mov	%o1, %o4 +	mov	HV_FAST_VT_GET_PERFREG, %o5 +	ta	HV_FAST_TRAP +	stx	%o1, [%o4] +	retl +	 nop +ENDPROC(sun4v_vt_get_perfreg) + +ENTRY(sun4v_vt_set_perfreg) +	mov	HV_FAST_VT_SET_PERFREG, %o5 +	ta	HV_FAST_TRAP +	retl +	 nop +ENDPROC(sun4v_vt_set_perfreg) diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S index 79f31036484..0746e5e32b3 100644 --- a/arch/sparc/kernel/ktlb.S +++ b/arch/sparc/kernel/ktlb.S @@ -188,31 +188,26 @@ valid_addr_bitmap_patch:  	be,pn		%xcc, kvmap_dtlb_longpath  2:	 sethi		%hi(kpte_linear_bitmap), %g2 -	or		%g2, %lo(kpte_linear_bitmap), %g2  	/* Get the 256MB physical address index. */  	sllx		%g4, 21, %g5 -	mov		1, %g7 +	or		%g2, %lo(kpte_linear_bitmap), %g2  	srlx		%g5, 21 + 28, %g5 +	and		%g5, (32 - 1), %g7 -	/* Don't try this at home kids... this depends upon srlx -	 * only taking the low 6 bits of the shift count in %g5. -	 */ -	sllx		%g7, %g5, %g7 - -	/* Divide by 64 to get the offset into the bitmask.  */ -	srlx		%g5, 6, %g5 +	/* Divide by 32 to get the offset into the bitmask.  */ +	srlx		%g5, 5, %g5 +	add		%g7, %g7, %g7  	sllx		%g5, 3, %g5 -	/* kern_linear_pte_xor[((mask & bit) ? 1 : 0)] */ +	/* kern_linear_pte_xor[(mask >> shift) & 3)] */  	ldx		[%g2 + %g5], %g2 -	andcc		%g2, %g7, %g0 +	srlx		%g2, %g7, %g7  	sethi		%hi(kern_linear_pte_xor), %g5 +	and		%g7, 3, %g7  	or		%g5, %lo(kern_linear_pte_xor), %g5 -	bne,a,pt	%xcc, 1f -	 add		%g5, 8, %g5 - -1:	ldx		[%g5], %g2 +	sllx		%g7, 3, %g7 +	ldx		[%g5 + %g7], %g2  	.globl		kvmap_linear_patch  kvmap_linear_patch: diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c index 21dcda75a52..fc052116156 100644 --- a/arch/sparc/kernel/leon_pci.c +++ b/arch/sparc/kernel/leon_pci.c @@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)  	return pci_enable_resources(dev, mask);  } -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ -#ifdef CONFIG_PCI_DEBUG -	printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq, -		pci_name(dev)); -#endif -	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} -  /* in/out routines taken from pcic.c   *   * This probably belongs here rather than ioport.c because diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 6dc79628058..831c001604e 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -817,6 +817,30 @@ void __cpuinit mdesc_populate_present_mask(cpumask_t *mask)  	mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);  } +static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg) +{ +	const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL); +	unsigned long *pgsz_mask = arg; +	u64 val; + +	val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K | +	       HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB); +	if (pgsz_prop) +		val = *pgsz_prop; + +	if (!*pgsz_mask) +		*pgsz_mask = val; +	else +		*pgsz_mask &= val; +	return NULL; +} + +void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask) +{ +	*pgsz_mask = 0; +	mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask); +} +  static void * __cpuinit fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)  {  	const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL); diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index eb1c1f010a4..6479256fd5a 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -22,7 +22,6 @@  #include <asm/perf_event.h>  #include <asm/ptrace.h>  #include <asm/pcr.h> -#include <asm/perfctr.h>  #include "kstack.h" @@ -109,7 +108,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)  		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)  		touched = 1;  	else -		pcr_ops->write(PCR_PIC_PRIV); +		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);  	sum = local_cpu_data().irq0_irqs;  	if (__get_cpu_var(nmi_touch)) { @@ -126,8 +125,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)  		__this_cpu_write(alert_counter, 0);  	}  	if (__get_cpu_var(wd_enabled)) { -		write_pic(picl_value(nmi_hz)); -		pcr_ops->write(pcr_enable); +		pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); +		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);  	}  	restore_hardirq_stack(orig_sp); @@ -166,7 +165,7 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count)  void stop_nmi_watchdog(void *unused)  { -	pcr_ops->write(PCR_PIC_PRIV); +	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);  	__get_cpu_var(wd_enabled) = 0;  	atomic_dec(&nmi_active);  } @@ -223,10 +222,10 @@ void start_nmi_watchdog(void *unused)  	__get_cpu_var(wd_enabled) = 1;  	atomic_inc(&nmi_active); -	pcr_ops->write(PCR_PIC_PRIV); -	write_pic(picl_value(nmi_hz)); +	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); +	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); -	pcr_ops->write(pcr_enable); +	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);  }  static void nmi_adjust_hz_one(void *unused) @@ -234,10 +233,10 @@ static void nmi_adjust_hz_one(void *unused)  	if (!__get_cpu_var(wd_enabled))  		return; -	pcr_ops->write(PCR_PIC_PRIV); -	write_pic(picl_value(nmi_hz)); +	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable); +	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz)); -	pcr_ops->write(pcr_enable); +	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);  }  void nmi_adjust_hz(unsigned int new_hz) diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 065b88c4f86..acc8c838ff7 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)  {  } -void pcibios_update_irq(struct pci_dev *pdev, int irq) -{ -} -  resource_size_t pcibios_align_resource(void *data, const struct resource *res,  				resource_size_t size, resource_size_t align)  { diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 7661e84a05a..051b69caeff 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -594,7 +594,7 @@ static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)  		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",  		       vdma[0], vdma[1]);  		return -EINVAL; -	}; +	}  	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);  	num_tsb_entries = vdma[1] / IO_PAGE_SIZE; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 0ce0dd2332a..269af58497a 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -13,23 +13,14 @@  #include <asm/pil.h>  #include <asm/pcr.h>  #include <asm/nmi.h> +#include <asm/asi.h>  #include <asm/spitfire.h> -#include <asm/perfctr.h>  /* This code is shared between various users of the performance   * counters.  Users will be oprofile, pseudo-NMI watchdog, and the   * perf_event support layer.   */ -#define PCR_SUN4U_ENABLE	(PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE) -#define PCR_N2_ENABLE		(PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | \ -				 PCR_N2_TOE_OV1 | \ -				 (2 << PCR_N2_SL1_SHIFT) | \ -				 (0xff << PCR_N2_MASK1_SHIFT)) - -u64 pcr_enable; -unsigned int picl_shift; -  /* Performance counter interrupts run unmasked at PIL level 15.   * Therefore we can't do things like wakeups and other work   * that expects IRQ disabling to be adhered to in locking etc. @@ -60,39 +51,144 @@ void arch_irq_work_raise(void)  const struct pcr_ops *pcr_ops;  EXPORT_SYMBOL_GPL(pcr_ops); -static u64 direct_pcr_read(void) +static u64 direct_pcr_read(unsigned long reg_num)  {  	u64 val; -	read_pcr(val); +	WARN_ON_ONCE(reg_num != 0); +	__asm__ __volatile__("rd %%pcr, %0" : "=r" (val));  	return val;  } -static void direct_pcr_write(u64 val) +static void direct_pcr_write(unsigned long reg_num, u64 val) +{ +	WARN_ON_ONCE(reg_num != 0); +	__asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val)); +} + +static u64 direct_pic_read(unsigned long reg_num) +{ +	u64 val; + +	WARN_ON_ONCE(reg_num != 0); +	__asm__ __volatile__("rd %%pic, %0" : "=r" (val)); +	return val; +} + +static void direct_pic_write(unsigned long reg_num, u64 val) +{ +	WARN_ON_ONCE(reg_num != 0); + +	/* Blackbird errata workaround.  See commentary in +	 * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() +	 * for more information. +	 */ +	__asm__ __volatile__("ba,pt	%%xcc, 99f\n\t" +			     " nop\n\t" +			     ".align	64\n" +			  "99:wr	%0, 0x0, %%pic\n\t" +			     "rd	%%pic, %%g0" : : "r" (val)); +} + +static u64 direct_picl_value(unsigned int nmi_hz)  { -	write_pcr(val); +	u32 delta = local_cpu_data().clock_tick / nmi_hz; + +	return ((u64)((0 - delta) & 0xffffffff)) << 32;  }  static const struct pcr_ops direct_pcr_ops = { -	.read	= direct_pcr_read, -	.write	= direct_pcr_write, +	.read_pcr		= direct_pcr_read, +	.write_pcr		= direct_pcr_write, +	.read_pic		= direct_pic_read, +	.write_pic		= direct_pic_write, +	.nmi_picl_value		= direct_picl_value, +	.pcr_nmi_enable		= (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE), +	.pcr_nmi_disable	= PCR_PIC_PRIV,  }; -static void n2_pcr_write(u64 val) +static void n2_pcr_write(unsigned long reg_num, u64 val)  {  	unsigned long ret; +	WARN_ON_ONCE(reg_num != 0);  	if (val & PCR_N2_HTRACE) {  		ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);  		if (ret != HV_EOK) -			write_pcr(val); +			direct_pcr_write(reg_num, val);  	} else -		write_pcr(val); +		direct_pcr_write(reg_num, val); +} + +static u64 n2_picl_value(unsigned int nmi_hz) +{ +	u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); + +	return ((u64)((0 - delta) & 0xffffffff)) << 32;  }  static const struct pcr_ops n2_pcr_ops = { -	.read	= direct_pcr_read, -	.write	= n2_pcr_write, +	.read_pcr		= direct_pcr_read, +	.write_pcr		= n2_pcr_write, +	.read_pic		= direct_pic_read, +	.write_pic		= direct_pic_write, +	.nmi_picl_value		= n2_picl_value, +	.pcr_nmi_enable		= (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE | +				   PCR_N2_TOE_OV1 | +				   (2 << PCR_N2_SL1_SHIFT) | +				   (0xff << PCR_N2_MASK1_SHIFT)), +	.pcr_nmi_disable	= PCR_PIC_PRIV, +}; + +static u64 n4_pcr_read(unsigned long reg_num) +{ +	unsigned long val; + +	(void) sun4v_vt_get_perfreg(reg_num, &val); + +	return val; +} + +static void n4_pcr_write(unsigned long reg_num, u64 val) +{ +	(void) sun4v_vt_set_perfreg(reg_num, val); +} + +static u64 n4_pic_read(unsigned long reg_num) +{ +	unsigned long val; + +	__asm__ __volatile__("ldxa [%1] %2, %0" +			     : "=r" (val) +			     : "r" (reg_num * 0x8UL), "i" (ASI_PIC)); + +	return val; +} + +static void n4_pic_write(unsigned long reg_num, u64 val) +{ +	__asm__ __volatile__("stxa %0, [%1] %2" +			     : /* no outputs */ +			     : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC)); +} + +static u64 n4_picl_value(unsigned int nmi_hz) +{ +	u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2); + +	return ((u64)((0 - delta) & 0xffffffff)); +} + +static const struct pcr_ops n4_pcr_ops = { +	.read_pcr		= n4_pcr_read, +	.write_pcr		= n4_pcr_write, +	.read_pic		= n4_pic_read, +	.write_pic		= n4_pic_write, +	.nmi_picl_value		= n4_picl_value, +	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE | +				   PCR_N4_UTRACE | PCR_N4_TOE | +				   (26 << PCR_N4_SL_SHIFT)), +	.pcr_nmi_disable	= PCR_N4_PICNPT,  };  static unsigned long perf_hsvc_group; @@ -115,6 +211,10 @@ static int __init register_perf_hsvc(void)  			perf_hsvc_group = HV_GRP_KT_CPU;  			break; +		case SUN4V_CHIP_NIAGARA4: +			perf_hsvc_group = HV_GRP_VT_CPU; +			break; +  		default:  			return -ENODEV;  		} @@ -139,6 +239,29 @@ static void __init unregister_perf_hsvc(void)  	sun4v_hvapi_unregister(perf_hsvc_group);  } +static int __init setup_sun4v_pcr_ops(void) +{ +	int ret = 0; + +	switch (sun4v_chip_type) { +	case SUN4V_CHIP_NIAGARA1: +	case SUN4V_CHIP_NIAGARA2: +	case SUN4V_CHIP_NIAGARA3: +		pcr_ops = &n2_pcr_ops; +		break; + +	case SUN4V_CHIP_NIAGARA4: +		pcr_ops = &n4_pcr_ops; +		break; + +	default: +		ret = -ENODEV; +		break; +	} + +	return ret; +} +  int __init pcr_arch_init(void)  {  	int err = register_perf_hsvc(); @@ -148,15 +271,14 @@ int __init pcr_arch_init(void)  	switch (tlb_type) {  	case hypervisor: -		pcr_ops = &n2_pcr_ops; -		pcr_enable = PCR_N2_ENABLE; -		picl_shift = 2; +		err = setup_sun4v_pcr_ops(); +		if (err) +			goto out_unregister;  		break;  	case cheetah:  	case cheetah_plus:  		pcr_ops = &direct_pcr_ops; -		pcr_enable = PCR_SUN4U_ENABLE;  		break;  	case spitfire: diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 5713957dcb8..e48651dace1 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -25,36 +25,48 @@  #include <linux/atomic.h>  #include <asm/nmi.h>  #include <asm/pcr.h> -#include <asm/perfctr.h>  #include <asm/cacheflush.h>  #include "kernel.h"  #include "kstack.h" -/* Sparc64 chips have two performance counters, 32-bits each, with - * overflow interrupts generated on transition from 0xffffffff to 0. - * The counters are accessed in one go using a 64-bit register. +/* Two classes of sparc64 chips currently exist.  All of which have + * 32-bit counters which can generate overflow interrupts on the + * transition from 0xffffffff to 0.   * - * Both counters are controlled using a single control register.  The - * only way to stop all sampling is to clear all of the context (user, - * supervisor, hypervisor) sampling enable bits.  But these bits apply - * to both counters, thus the two counters can't be enabled/disabled - * individually. + * All chips upto and including SPARC-T3 have two performance + * counters.  The two 32-bit counters are accessed in one go using a + * single 64-bit register.   * - * The control register has two event fields, one for each of the two - * counters.  It's thus nearly impossible to have one counter going - * while keeping the other one stopped.  Therefore it is possible to - * get overflow interrupts for counters not currently "in use" and - * that condition must be checked in the overflow interrupt handler. + * On these older chips both counters are controlled using a single + * control register.  The only way to stop all sampling is to clear + * all of the context (user, supervisor, hypervisor) sampling enable + * bits.  But these bits apply to both counters, thus the two counters + * can't be enabled/disabled individually. + * + * Furthermore, the control register on these older chips have two + * event fields, one for each of the two counters.  It's thus nearly + * impossible to have one counter going while keeping the other one + * stopped.  Therefore it is possible to get overflow interrupts for + * counters not currently "in use" and that condition must be checked + * in the overflow interrupt handler.   *   * So we use a hack, in that we program inactive counters with the   * "sw_count0" and "sw_count1" events.  These count how many times   * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an   * unusual way to encode a NOP and therefore will not trigger in   * normal code. + * + * Starting with SPARC-T4 we have one control register per counter. + * And the counters are stored in individual registers.  The registers + * for the counters are 64-bit but only a 32-bit counter is + * implemented.  The event selections on SPARC-T4 lack any + * restrictions, therefore we can elide all of the complicated + * conflict resolution code we have for SPARC-T3 and earlier chips.   */ -#define MAX_HWEVENTS			2 +#define MAX_HWEVENTS			4 +#define MAX_PCRS			4  #define MAX_PERIOD			((1UL << 32) - 1)  #define PIC_UPPER_INDEX			0 @@ -90,8 +102,8 @@ struct cpu_hw_events {  	 */  	int			current_idx[MAX_HWEVENTS]; -	/* Software copy of %pcr register on this cpu.  */ -	u64			pcr; +	/* Software copy of %pcr register(s) on this cpu.  */ +	u64			pcr[MAX_HWEVENTS];  	/* Enabled/disable state.  */  	int			enabled; @@ -103,6 +115,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };  /* An event map describes the characteristics of a performance   * counter event.  In particular it gives the encoding as well as   * a mask telling which counters the event can be measured on. + * + * The mask is unused on SPARC-T4 and later.   */  struct perf_event_map {  	u16	encoding; @@ -142,15 +156,53 @@ struct sparc_pmu {  	const struct perf_event_map	*(*event_map)(int);  	const cache_map_t		*cache_map;  	int				max_events; +	u32				(*read_pmc)(int); +	void				(*write_pmc)(int, u64);  	int				upper_shift;  	int				lower_shift;  	int				event_mask; +	int				user_bit; +	int				priv_bit;  	int				hv_bit;  	int				irq_bit;  	int				upper_nop;  	int				lower_nop; +	unsigned int			flags; +#define SPARC_PMU_ALL_EXCLUDES_SAME	0x00000001 +#define SPARC_PMU_HAS_CONFLICTS		0x00000002 +	int				max_hw_events; +	int				num_pcrs; +	int				num_pic_regs;  }; +static u32 sparc_default_read_pmc(int idx) +{ +	u64 val; + +	val = pcr_ops->read_pic(0); +	if (idx == PIC_UPPER_INDEX) +		val >>= 32; + +	return val & 0xffffffff; +} + +static void sparc_default_write_pmc(int idx, u64 val) +{ +	u64 shift, mask, pic; + +	shift = 0; +	if (idx == PIC_UPPER_INDEX) +		shift = 32; + +	mask = ((u64) 0xffffffff) << shift; +	val <<= shift; + +	pic = pcr_ops->read_pic(0); +	pic &= ~mask; +	pic |= val; +	pcr_ops->write_pic(0, pic); +} +  static const struct perf_event_map ultra3_perfmon_event_map[] = {  	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },  	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER }, @@ -268,11 +320,20 @@ static const struct sparc_pmu ultra3_pmu = {  	.event_map	= ultra3_event_map,  	.cache_map	= &ultra3_cache_map,  	.max_events	= ARRAY_SIZE(ultra3_perfmon_event_map), +	.read_pmc	= sparc_default_read_pmc, +	.write_pmc	= sparc_default_write_pmc,  	.upper_shift	= 11,  	.lower_shift	= 4,  	.event_mask	= 0x3f, +	.user_bit	= PCR_UTRACE, +	.priv_bit	= PCR_STRACE,  	.upper_nop	= 0x1c,  	.lower_nop	= 0x14, +	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME | +			   SPARC_PMU_HAS_CONFLICTS), +	.max_hw_events	= 2, +	.num_pcrs	= 1, +	.num_pic_regs	= 1,  };  /* Niagara1 is very limited.  The upper PIC is hard-locked to count @@ -397,11 +458,20 @@ static const struct sparc_pmu niagara1_pmu = {  	.event_map	= niagara1_event_map,  	.cache_map	= &niagara1_cache_map,  	.max_events	= ARRAY_SIZE(niagara1_perfmon_event_map), +	.read_pmc	= sparc_default_read_pmc, +	.write_pmc	= sparc_default_write_pmc,  	.upper_shift	= 0,  	.lower_shift	= 4,  	.event_mask	= 0x7, +	.user_bit	= PCR_UTRACE, +	.priv_bit	= PCR_STRACE,  	.upper_nop	= 0x0,  	.lower_nop	= 0x0, +	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME | +			   SPARC_PMU_HAS_CONFLICTS), +	.max_hw_events	= 2, +	.num_pcrs	= 1, +	.num_pic_regs	= 1,  };  static const struct perf_event_map niagara2_perfmon_event_map[] = { @@ -523,13 +593,203 @@ static const struct sparc_pmu niagara2_pmu = {  	.event_map	= niagara2_event_map,  	.cache_map	= &niagara2_cache_map,  	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map), +	.read_pmc	= sparc_default_read_pmc, +	.write_pmc	= sparc_default_write_pmc,  	.upper_shift	= 19,  	.lower_shift	= 6,  	.event_mask	= 0xfff, -	.hv_bit		= 0x8, +	.user_bit	= PCR_UTRACE, +	.priv_bit	= PCR_STRACE, +	.hv_bit		= PCR_N2_HTRACE,  	.irq_bit	= 0x30,  	.upper_nop	= 0x220,  	.lower_nop	= 0x220, +	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME | +			   SPARC_PMU_HAS_CONFLICTS), +	.max_hw_events	= 2, +	.num_pcrs	= 1, +	.num_pic_regs	= 1, +}; + +static const struct perf_event_map niagara4_perfmon_event_map[] = { +	[PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) }, +	[PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f }, +	[PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 }, +	[PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 }, +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 }, +	[PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f }, +}; + +static const struct perf_event_map *niagara4_event_map(int event_id) +{ +	return &niagara4_perfmon_event_map[event_id]; +} + +static const cache_map_t niagara4_cache_map = { +[C(L1D)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, +		[C(RESULT_MISS)] = { (16 << 6) | 0x07 }, +	}, +	[C(OP_WRITE)] = { +		[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, +		[C(RESULT_MISS)] = { (16 << 6) | 0x07 }, +	}, +	[C(OP_PREFETCH)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(L1I)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { (3 << 6) | 0x3f }, +		[C(RESULT_MISS)] = { (11 << 6) | 0x03 }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(LL)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 }, +		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, +	}, +	[C(OP_WRITE)] = { +		[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 }, +		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, +	}, +	[C(OP_PREFETCH)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(DTLB)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)] = { (17 << 6) | 0x3f }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(ITLB)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)] = { (6 << 6) | 0x3f }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(BPU)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +[C(NODE)] = { +	[C(OP_READ)] = { +		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED }, +		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED }, +		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED }, +	}, +}, +}; + +static u32 sparc_vt_read_pmc(int idx) +{ +	u64 val = pcr_ops->read_pic(idx); + +	return val & 0xffffffff; +} + +static void sparc_vt_write_pmc(int idx, u64 val) +{ +	u64 pcr; + +	/* There seems to be an internal latch on the overflow event +	 * on SPARC-T4 that prevents it from triggering unless you +	 * update the PIC exactly as we do here.  The requirement +	 * seems to be that you have to turn off event counting in the +	 * PCR around the PIC update. +	 * +	 * For example, after the following sequence: +	 * +	 * 1) set PIC to -1 +	 * 2) enable event counting and overflow reporting in PCR +	 * 3) overflow triggers, softint 15 handler invoked +	 * 4) clear OV bit in PCR +	 * 5) write PIC to -1 +	 * +	 * a subsequent overflow event will not trigger.  This +	 * sequence works on SPARC-T3 and previous chips. +	 */ +	pcr = pcr_ops->read_pcr(idx); +	pcr_ops->write_pcr(idx, PCR_N4_PICNPT); + +	pcr_ops->write_pic(idx, val & 0xffffffff); + +	pcr_ops->write_pcr(idx, pcr); +} + +static const struct sparc_pmu niagara4_pmu = { +	.event_map	= niagara4_event_map, +	.cache_map	= &niagara4_cache_map, +	.max_events	= ARRAY_SIZE(niagara4_perfmon_event_map), +	.read_pmc	= sparc_vt_read_pmc, +	.write_pmc	= sparc_vt_write_pmc, +	.upper_shift	= 5, +	.lower_shift	= 5, +	.event_mask	= 0x7ff, +	.user_bit	= PCR_N4_UTRACE, +	.priv_bit	= PCR_N4_STRACE, + +	/* We explicitly don't support hypervisor tracing.  The T4 +	 * generates the overflow event for precise events via a trap +	 * which will not be generated (ie. it's completely lost) if +	 * we happen to be in the hypervisor when the event triggers. +	 * Essentially, the overflow event reporting is completely +	 * unusable when you have hypervisor mode tracing enabled. +	 */ +	.hv_bit		= 0, + +	.irq_bit	= PCR_N4_TOE, +	.upper_nop	= 0, +	.lower_nop	= 0, +	.flags		= 0, +	.max_hw_events	= 4, +	.num_pcrs	= 4, +	.num_pic_regs	= 4,  };  static const struct sparc_pmu *sparc_pmu __read_mostly; @@ -558,55 +818,35 @@ static u64 nop_for_index(int idx)  static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)  {  	u64 val, mask = mask_for_index(idx); +	int pcr_index = 0; + +	if (sparc_pmu->num_pcrs > 1) +		pcr_index = idx; -	val = cpuc->pcr; +	val = cpuc->pcr[pcr_index];  	val &= ~mask;  	val |= hwc->config; -	cpuc->pcr = val; +	cpuc->pcr[pcr_index] = val; -	pcr_ops->write(cpuc->pcr); +	pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);  }  static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)  {  	u64 mask = mask_for_index(idx);  	u64 nop = nop_for_index(idx); +	int pcr_index = 0;  	u64 val; -	val = cpuc->pcr; +	if (sparc_pmu->num_pcrs > 1) +		pcr_index = idx; + +	val = cpuc->pcr[pcr_index];  	val &= ~mask;  	val |= nop; -	cpuc->pcr = val; +	cpuc->pcr[pcr_index] = val; -	pcr_ops->write(cpuc->pcr); -} - -static u32 read_pmc(int idx) -{ -	u64 val; - -	read_pic(val); -	if (idx == PIC_UPPER_INDEX) -		val >>= 32; - -	return val & 0xffffffff; -} - -static void write_pmc(int idx, u64 val) -{ -	u64 shift, mask, pic; - -	shift = 0; -	if (idx == PIC_UPPER_INDEX) -		shift = 32; - -	mask = ((u64) 0xffffffff) << shift; -	val <<= shift; - -	read_pic(pic); -	pic &= ~mask; -	pic |= val; -	write_pic(pic); +	pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);  }  static u64 sparc_perf_event_update(struct perf_event *event, @@ -618,7 +858,7 @@ static u64 sparc_perf_event_update(struct perf_event *event,  again:  	prev_raw_count = local64_read(&hwc->prev_count); -	new_raw_count = read_pmc(idx); +	new_raw_count = sparc_pmu->read_pmc(idx);  	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,  			     new_raw_count) != prev_raw_count) @@ -658,25 +898,17 @@ static int sparc_perf_event_set_period(struct perf_event *event,  	local64_set(&hwc->prev_count, (u64)-left); -	write_pmc(idx, (u64)(-left) & 0xffffffff); +	sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);  	perf_event_update_userpage(event);  	return ret;  } -/* If performance event entries have been added, move existing - * events around (if necessary) and then assign new entries to - * counters. - */ -static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) +static void read_in_all_counters(struct cpu_hw_events *cpuc)  {  	int i; -	if (!cpuc->n_added) -		goto out; - -	/* Read in the counters which are moving.  */  	for (i = 0; i < cpuc->n_events; i++) {  		struct perf_event *cp = cpuc->event[i]; @@ -687,6 +919,20 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)  			cpuc->current_idx[i] = PIC_NO_INDEX;  		}  	} +} + +/* On this PMU all PICs are programmed using a single PCR.  Calculate + * the combined control register value. + * + * For such chips we require that all of the events have the same + * configuration, so just fetch the settings from the first entry. + */ +static void calculate_single_pcr(struct cpu_hw_events *cpuc) +{ +	int i; + +	if (!cpuc->n_added) +		goto out;  	/* Assign to counters all unassigned events.  */  	for (i = 0; i < cpuc->n_events; i++) { @@ -702,20 +948,71 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)  		cpuc->current_idx[i] = idx;  		enc = perf_event_get_enc(cpuc->events[i]); -		pcr &= ~mask_for_index(idx); +		cpuc->pcr[0] &= ~mask_for_index(idx);  		if (hwc->state & PERF_HES_STOPPED) -			pcr |= nop_for_index(idx); +			cpuc->pcr[0] |= nop_for_index(idx);  		else -			pcr |= event_encoding(enc, idx); +			cpuc->pcr[0] |= event_encoding(enc, idx);  	}  out: -	return pcr; +	cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; +} + +/* On this PMU each PIC has it's own PCR control register.  */ +static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) +{ +	int i; + +	if (!cpuc->n_added) +		goto out; + +	for (i = 0; i < cpuc->n_events; i++) { +		struct perf_event *cp = cpuc->event[i]; +		struct hw_perf_event *hwc = &cp->hw; +		int idx = hwc->idx; +		u64 enc; + +		if (cpuc->current_idx[i] != PIC_NO_INDEX) +			continue; + +		sparc_perf_event_set_period(cp, hwc, idx); +		cpuc->current_idx[i] = idx; + +		enc = perf_event_get_enc(cpuc->events[i]); +		cpuc->pcr[idx] &= ~mask_for_index(idx); +		if (hwc->state & PERF_HES_STOPPED) +			cpuc->pcr[idx] |= nop_for_index(idx); +		else +			cpuc->pcr[idx] |= event_encoding(enc, idx); +	} +out: +	for (i = 0; i < cpuc->n_events; i++) { +		struct perf_event *cp = cpuc->event[i]; +		int idx = cp->hw.idx; + +		cpuc->pcr[idx] |= cp->hw.config_base; +	} +} + +/* If performance event entries have been added, move existing events + * around (if necessary) and then assign new entries to counters. + */ +static void update_pcrs_for_enable(struct cpu_hw_events *cpuc) +{ +	if (cpuc->n_added) +		read_in_all_counters(cpuc); + +	if (sparc_pmu->num_pcrs == 1) { +		calculate_single_pcr(cpuc); +	} else { +		calculate_multiple_pcrs(cpuc); +	}  }  static void sparc_pmu_enable(struct pmu *pmu)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	u64 pcr; +	int i;  	if (cpuc->enabled)  		return; @@ -723,26 +1020,17 @@ static void sparc_pmu_enable(struct pmu *pmu)  	cpuc->enabled = 1;  	barrier(); -	pcr = cpuc->pcr; -	if (!cpuc->n_events) { -		pcr = 0; -	} else { -		pcr = maybe_change_configuration(cpuc, pcr); - -		/* We require that all of the events have the same -		 * configuration, so just fetch the settings from the -		 * first entry. -		 */ -		cpuc->pcr = pcr | cpuc->event[0]->hw.config_base; -	} +	if (cpuc->n_events) +		update_pcrs_for_enable(cpuc); -	pcr_ops->write(cpuc->pcr); +	for (i = 0; i < sparc_pmu->num_pcrs; i++) +		pcr_ops->write_pcr(i, cpuc->pcr[i]);  }  static void sparc_pmu_disable(struct pmu *pmu)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	u64 val; +	int i;  	if (!cpuc->enabled)  		return; @@ -750,12 +1038,14 @@ static void sparc_pmu_disable(struct pmu *pmu)  	cpuc->enabled = 0;  	cpuc->n_added = 0; -	val = cpuc->pcr; -	val &= ~(PCR_UTRACE | PCR_STRACE | -		 sparc_pmu->hv_bit | sparc_pmu->irq_bit); -	cpuc->pcr = val; +	for (i = 0; i < sparc_pmu->num_pcrs; i++) { +		u64 val = cpuc->pcr[i]; -	pcr_ops->write(cpuc->pcr); +		val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit | +			 sparc_pmu->hv_bit | sparc_pmu->irq_bit); +		cpuc->pcr[i] = val; +		pcr_ops->write_pcr(i, cpuc->pcr[i]); +	}  }  static int active_event_index(struct cpu_hw_events *cpuc, @@ -854,9 +1144,11 @@ static DEFINE_MUTEX(pmc_grab_mutex);  static void perf_stop_nmi_watchdog(void *unused)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	int i;  	stop_nmi_watchdog(NULL); -	cpuc->pcr = pcr_ops->read(); +	for (i = 0; i < sparc_pmu->num_pcrs; i++) +		cpuc->pcr[i] = pcr_ops->read_pcr(i);  }  void perf_event_grab_pmc(void) @@ -942,9 +1234,17 @@ static int sparc_check_constraints(struct perf_event **evts,  	if (!n_ev)  		return 0; -	if (n_ev > MAX_HWEVENTS) +	if (n_ev > sparc_pmu->max_hw_events)  		return -1; +	if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) { +		int i; + +		for (i = 0; i < n_ev; i++) +			evts[i]->hw.idx = i; +		return 0; +	} +  	msk0 = perf_event_get_msk(events[0]);  	if (n_ev == 1) {  		if (msk0 & PIC_LOWER) @@ -1000,6 +1300,9 @@ static int check_excludes(struct perf_event **evts, int n_prev, int n_new)  	struct perf_event *event;  	int i, n, first; +	if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME)) +		return 0; +  	n = n_prev + n_new;  	if (n <= 1)  		return 0; @@ -1059,7 +1362,7 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)  	perf_pmu_disable(event->pmu);  	n0 = cpuc->n_events; -	if (n0 >= MAX_HWEVENTS) +	if (n0 >= sparc_pmu->max_hw_events)  		goto out;  	cpuc->event[n0] = event; @@ -1146,16 +1449,16 @@ static int sparc_pmu_event_init(struct perf_event *event)  	/* We save the enable bits in the config_base.  */  	hwc->config_base = sparc_pmu->irq_bit;  	if (!attr->exclude_user) -		hwc->config_base |= PCR_UTRACE; +		hwc->config_base |= sparc_pmu->user_bit;  	if (!attr->exclude_kernel) -		hwc->config_base |= PCR_STRACE; +		hwc->config_base |= sparc_pmu->priv_bit;  	if (!attr->exclude_hv)  		hwc->config_base |= sparc_pmu->hv_bit;  	n = 0;  	if (event->group_leader != event) {  		n = collect_events(event->group_leader, -				   MAX_HWEVENTS - 1, +				   sparc_pmu->max_hw_events - 1,  				   evts, events, current_idx_dmy);  		if (n < 0)  			return -EINVAL; @@ -1254,8 +1557,7 @@ static struct pmu pmu = {  void perf_event_print_debug(void)  {  	unsigned long flags; -	u64 pcr, pic; -	int cpu; +	int cpu, i;  	if (!sparc_pmu)  		return; @@ -1264,12 +1566,13 @@ void perf_event_print_debug(void)  	cpu = smp_processor_id(); -	pcr = pcr_ops->read(); -	read_pic(pic); -  	pr_info("\n"); -	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n", -		cpu, pcr, pic); +	for (i = 0; i < sparc_pmu->num_pcrs; i++) +		pr_info("CPU#%d: PCR%d[%016llx]\n", +			cpu, i, pcr_ops->read_pcr(i)); +	for (i = 0; i < sparc_pmu->num_pic_regs; i++) +		pr_info("CPU#%d: PIC%d[%016llx]\n", +			cpu, i, pcr_ops->read_pic(i));  	local_irq_restore(flags);  } @@ -1305,8 +1608,9 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,  	 * Do this before we peek at the counters to determine  	 * overflow so we don't lose any events.  	 */ -	if (sparc_pmu->irq_bit) -		pcr_ops->write(cpuc->pcr); +	if (sparc_pmu->irq_bit && +	    sparc_pmu->num_pcrs == 1) +		pcr_ops->write_pcr(0, cpuc->pcr[0]);  	for (i = 0; i < cpuc->n_events; i++) {  		struct perf_event *event = cpuc->event[i]; @@ -1314,6 +1618,10 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,  		struct hw_perf_event *hwc;  		u64 val; +		if (sparc_pmu->irq_bit && +		    sparc_pmu->num_pcrs > 1) +			pcr_ops->write_pcr(idx, cpuc->pcr[idx]); +  		hwc = &event->hw;  		val = sparc_perf_event_update(event, hwc, idx);  		if (val & (1ULL << 31)) @@ -1352,6 +1660,10 @@ static bool __init supported_pmu(void)  		sparc_pmu = &niagara2_pmu;  		return true;  	} +	if (!strcmp(sparc_pmu_type, "niagara4")) { +		sparc_pmu = &niagara4_pmu; +		return true; +	}  	return false;  } diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 1414d16712b..0800e71d8a8 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -340,7 +340,12 @@ static const char *hwcaps[] = {  	 */  	"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",  	"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau", -	"ima", "cspare", +	"ima", "cspare", "pause", "cbcond", +}; + +static const char *crypto_hwcaps[] = { +	"aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256", +	"sha512", "mpmul", "montmul", "montsqr", "crc32c",  };  void cpucap_info(struct seq_file *m) @@ -357,27 +362,61 @@ void cpucap_info(struct seq_file *m)  			printed++;  		}  	} +	if (caps & HWCAP_SPARC_CRYPTO) { +		unsigned long cfr; + +		__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); +		for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { +			unsigned long bit = 1UL << i; +			if (cfr & bit) { +				seq_printf(m, "%s%s", +					   printed ? "," : "", crypto_hwcaps[i]); +				printed++; +			} +		} +	}  	seq_putc(m, '\n');  } +static void __init report_one_hwcap(int *printed, const char *name) +{ +	if ((*printed) == 0) +		printk(KERN_INFO "CPU CAPS: ["); +	printk(KERN_CONT "%s%s", +	       (*printed) ? "," : "", name); +	if (++(*printed) == 8) { +		printk(KERN_CONT "]\n"); +		*printed = 0; +	} +} + +static void __init report_crypto_hwcaps(int *printed) +{ +	unsigned long cfr; +	int i; + +	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + +	for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { +		unsigned long bit = 1UL << i; +		if (cfr & bit) +			report_one_hwcap(printed, crypto_hwcaps[i]); +	} +} +  static void __init report_hwcaps(unsigned long caps)  {  	int i, printed = 0; -	printk(KERN_INFO "CPU CAPS: [");  	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {  		unsigned long bit = 1UL << i; -		if (caps & bit) { -			printk(KERN_CONT "%s%s", -			       printed ? "," : "", hwcaps[i]); -			if (++printed == 8) { -				printk(KERN_CONT "]\n"); -				printk(KERN_INFO "CPU CAPS: ["); -				printed = 0; -			} -		} +		if (caps & bit) +			report_one_hwcap(&printed, hwcaps[i]);  	} -	printk(KERN_CONT "]\n"); +	if (caps & HWCAP_SPARC_CRYPTO) +		report_crypto_hwcaps(&printed); +	if (printed != 0) +		printk(KERN_CONT "]\n");  }  static unsigned long __init mdesc_cpu_hwcap_list(void) @@ -411,6 +450,10 @@ static unsigned long __init mdesc_cpu_hwcap_list(void)  				break;  			}  		} +		for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) { +			if (!strcmp(prop, crypto_hwcaps[i])) +				caps |= HWCAP_SPARC_CRYPTO; +		}  		plen = strlen(prop) + 1;  		prop += plen;  |