diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 971 | 
1 files changed, 971 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 00000000000..cf6590cf4a5 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -0,0 +1,971 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static const u64 intel_perfmon_event_map[] = +{ +  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c, +  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0, +  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e, +  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e, +  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4, +  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5, +  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c, +}; + +static struct event_constraint intel_core_event_constraints[] = +{ +	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ +	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ +	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ +	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ +	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ +	INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_core2_event_constraints[] = +{ +	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ +	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ +	INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ +	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ +	INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ +	INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ +	INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ +	INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ +	INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ +	INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ +	INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_event_constraints[] = +{ +	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ +	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ +	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ +	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ +	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ +	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ +	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ +	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ +	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ +	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_event_constraints[] = +{ +	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ +	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ +	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ +	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ +	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ +	EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] = +{ +	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ +	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ +	EVENT_CONSTRAINT_END +}; + +static u64 intel_pmu_event_map(int hw_event) +{ +	return intel_perfmon_event_map[hw_event]; +} + +static __initconst u64 westmere_hw_cache_event_ids +				[PERF_COUNT_HW_CACHE_MAX] +				[PERF_COUNT_HW_CACHE_OP_MAX] +				[PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */ +		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */ +		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */ +		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */ +	}, + }, + [ C(L1I ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */ +		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x0, +		[ C(RESULT_MISS)   ] = 0x0, +	}, + }, + [ C(LL  ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */ +		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */ +		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */ +		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */ +	}, + }, + [ C(DTLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */ +		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */ +		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x0, +		[ C(RESULT_MISS)   ] = 0x0, +	}, + }, + [ C(ITLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */ +		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, + [ C(BPU ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ +		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, +}; + +static __initconst u64 nehalem_hw_cache_event_ids +				[PERF_COUNT_HW_CACHE_MAX] +				[PERF_COUNT_HW_CACHE_OP_MAX] +				[PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */ +		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */ +		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */ +		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */ +	}, + }, + [ C(L1I ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */ +		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x0, +		[ C(RESULT_MISS)   ] = 0x0, +	}, + }, + [ C(LL  ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */ +		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */ +		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */ +		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */ +	}, + }, + [ C(DTLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */ +		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */ +		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x0, +		[ C(RESULT_MISS)   ] = 0x0, +	}, + }, + [ C(ITLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */ +		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, + [ C(BPU ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ +		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, +}; + +static __initconst u64 core2_hw_cache_event_ids +				[PERF_COUNT_HW_CACHE_MAX] +				[PERF_COUNT_HW_CACHE_OP_MAX] +				[PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */ +		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */ +		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */ +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(L1I ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */ +		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(LL  ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */ +		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */ +		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(DTLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */ +		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */ +		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(ITLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */ +		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, + [ C(BPU ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */ +		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, +}; + +static __initconst u64 atom_hw_cache_event_ids +				[PERF_COUNT_HW_CACHE_MAX] +				[PERF_COUNT_HW_CACHE_OP_MAX] +				[PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */ +		[ C(RESULT_MISS)   ] = 0, +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */ +		[ C(RESULT_MISS)   ] = 0, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0x0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(L1I ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */ +		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(LL  ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */ +		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */ +		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(DTLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */ +		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */ +		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */ +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = 0, +		[ C(RESULT_MISS)   ] = 0, +	}, + }, + [ C(ITLB) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */ +		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, + [ C(BPU ) ] = { +	[ C(OP_READ) ] = { +		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */ +		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */ +	}, +	[ C(OP_WRITE) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, +	[ C(OP_PREFETCH) ] = { +		[ C(RESULT_ACCESS) ] = -1, +		[ C(RESULT_MISS)   ] = -1, +	}, + }, +}; + +static u64 intel_pmu_raw_event(u64 hw_event) +{ +#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL +#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL +#define CORE_EVNTSEL_INV_MASK		0x00800000ULL +#define CORE_EVNTSEL_REG_MASK		0xFF000000ULL + +#define CORE_EVNTSEL_MASK		\ +	(INTEL_ARCH_EVTSEL_MASK |	\ +	 INTEL_ARCH_UNIT_MASK   |	\ +	 INTEL_ARCH_EDGE_MASK   |	\ +	 INTEL_ARCH_INV_MASK    |	\ +	 INTEL_ARCH_CNT_MASK) + +	return hw_event & CORE_EVNTSEL_MASK; +} + +static void intel_pmu_enable_bts(u64 config) +{ +	unsigned long debugctlmsr; + +	debugctlmsr = get_debugctlmsr(); + +	debugctlmsr |= X86_DEBUGCTL_TR; +	debugctlmsr |= X86_DEBUGCTL_BTS; +	debugctlmsr |= X86_DEBUGCTL_BTINT; + +	if (!(config & ARCH_PERFMON_EVENTSEL_OS)) +		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + +	if (!(config & ARCH_PERFMON_EVENTSEL_USR)) +		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + +	update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	unsigned long debugctlmsr; + +	if (!cpuc->ds) +		return; + +	debugctlmsr = get_debugctlmsr(); + +	debugctlmsr &= +		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | +		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + +	update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_all(void) +{ +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + +	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + +	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) +		intel_pmu_disable_bts(); +} + +static void intel_pmu_enable_all(void) +{ +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + +	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + +	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { +		struct perf_event *event = +			cpuc->events[X86_PMC_IDX_FIXED_BTS]; + +		if (WARN_ON_ONCE(!event)) +			return; + +		intel_pmu_enable_bts(event->hw.config); +	} +} + +static inline u64 intel_pmu_get_status(void) +{ +	u64 status; + +	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + +	return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ +	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +{ +	int idx = __idx - X86_PMC_IDX_FIXED; +	u64 ctrl_val, mask; + +	mask = 0xfULL << (idx * 4); + +	rdmsrl(hwc->config_base, ctrl_val); +	ctrl_val &= ~mask; +	(void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void intel_pmu_drain_bts_buffer(void) +{ +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	struct debug_store *ds = cpuc->ds; +	struct bts_record { +		u64	from; +		u64	to; +		u64	flags; +	}; +	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; +	struct bts_record *at, *top; +	struct perf_output_handle handle; +	struct perf_event_header header; +	struct perf_sample_data data; +	struct pt_regs regs; + +	if (!event) +		return; + +	if (!ds) +		return; + +	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base; +	top = (struct bts_record *)(unsigned long)ds->bts_index; + +	if (top <= at) +		return; + +	ds->bts_index = ds->bts_buffer_base; + + +	data.period	= event->hw.last_period; +	data.addr	= 0; +	data.raw	= NULL; +	regs.ip		= 0; + +	/* +	 * Prepare a generic sample, i.e. fill in the invariant fields. +	 * We will overwrite the from and to address before we output +	 * the sample. +	 */ +	perf_prepare_sample(&header, &data, event, ®s); + +	if (perf_output_begin(&handle, event, +			      header.size * (top - at), 1, 1)) +		return; + +	for (; at < top; at++) { +		data.ip		= at->from; +		data.addr	= at->to; + +		perf_output_sample(&handle, &header, &data, event); +	} + +	perf_output_end(&handle); + +	/* There's new data available. */ +	event->hw.interrupts++; +	event->pending_kill = POLL_IN; +} + +static inline void +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ +	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { +		intel_pmu_disable_bts(); +		intel_pmu_drain_bts_buffer(); +		return; +	} + +	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { +		intel_pmu_disable_fixed(hwc, idx); +		return; +	} + +	x86_pmu_disable_event(hwc, idx); +} + +static inline void +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +{ +	int idx = __idx - X86_PMC_IDX_FIXED; +	u64 ctrl_val, bits, mask; +	int err; + +	/* +	 * Enable IRQ generation (0x8), +	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1) +	 * if requested: +	 */ +	bits = 0x8ULL; +	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) +		bits |= 0x2; +	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) +		bits |= 0x1; + +	/* +	 * ANY bit is supported in v3 and up +	 */ +	if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) +		bits |= 0x4; + +	bits <<= (idx * 4); +	mask = 0xfULL << (idx * 4); + +	rdmsrl(hwc->config_base, ctrl_val); +	ctrl_val &= ~mask; +	ctrl_val |= bits; +	err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ +	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { +		if (!__get_cpu_var(cpu_hw_events).enabled) +			return; + +		intel_pmu_enable_bts(hwc->config); +		return; +	} + +	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { +		intel_pmu_enable_fixed(hwc, idx); +		return; +	} + +	__x86_pmu_enable_event(hwc, idx); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +static int intel_pmu_save_and_restart(struct perf_event *event) +{ +	struct hw_perf_event *hwc = &event->hw; +	int idx = hwc->idx; +	int ret; + +	x86_perf_event_update(event, hwc, idx); +	ret = x86_perf_event_set_period(event, hwc, idx); + +	return ret; +} + +static void intel_pmu_reset(void) +{ +	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; +	unsigned long flags; +	int idx; + +	if (!x86_pmu.num_events) +		return; + +	local_irq_save(flags); + +	printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + +	for (idx = 0; idx < x86_pmu.num_events; idx++) { +		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); +		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull); +	} +	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { +		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); +	} +	if (ds) +		ds->bts_index = ds->bts_buffer_base; + +	local_irq_restore(flags); +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ +	struct perf_sample_data data; +	struct cpu_hw_events *cpuc; +	int bit, loops; +	u64 ack, status; + +	data.addr = 0; +	data.raw = NULL; + +	cpuc = &__get_cpu_var(cpu_hw_events); + +	perf_disable(); +	intel_pmu_drain_bts_buffer(); +	status = intel_pmu_get_status(); +	if (!status) { +		perf_enable(); +		return 0; +	} + +	loops = 0; +again: +	if (++loops > 100) { +		WARN_ONCE(1, "perfevents: irq loop stuck!\n"); +		perf_event_print_debug(); +		intel_pmu_reset(); +		perf_enable(); +		return 1; +	} + +	inc_irq_stat(apic_perf_irqs); +	ack = status; +	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { +		struct perf_event *event = cpuc->events[bit]; + +		clear_bit(bit, (unsigned long *) &status); +		if (!test_bit(bit, cpuc->active_mask)) +			continue; + +		if (!intel_pmu_save_and_restart(event)) +			continue; + +		data.period = event->hw.last_period; + +		if (perf_event_overflow(event, 1, &data, regs)) +			intel_pmu_disable_event(&event->hw, bit); +	} + +	intel_pmu_ack_status(ack); + +	/* +	 * Repeat if there is more work to be done: +	 */ +	status = intel_pmu_get_status(); +	if (status) +		goto again; + +	perf_enable(); + +	return 1; +} + +static struct event_constraint bts_constraint = +	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + +static struct event_constraint * +intel_special_constraints(struct perf_event *event) +{ +	unsigned int hw_event; + +	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + +	if (unlikely((hw_event == +		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && +		     (event->hw.sample_period == 1))) { + +		return &bts_constraint; +	} +	return NULL; +} + +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ +	struct event_constraint *c; + +	c = intel_special_constraints(event); +	if (c) +		return c; + +	return x86_get_event_constraints(cpuc, event); +} + +static __initconst struct x86_pmu core_pmu = { +	.name			= "core", +	.handle_irq		= x86_pmu_handle_irq, +	.disable_all		= x86_pmu_disable_all, +	.enable_all		= x86_pmu_enable_all, +	.enable			= x86_pmu_enable_event, +	.disable		= x86_pmu_disable_event, +	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, +	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, +	.event_map		= intel_pmu_event_map, +	.raw_event		= intel_pmu_raw_event, +	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), +	.apic			= 1, +	/* +	 * Intel PMCs cannot be accessed sanely above 32 bit width, +	 * so we install an artificial 1<<31 period regardless of +	 * the generic event period: +	 */ +	.max_period		= (1ULL << 31) - 1, +	.get_event_constraints	= intel_get_event_constraints, +	.event_constraints	= intel_core_event_constraints, +}; + +static __initconst struct x86_pmu intel_pmu = { +	.name			= "Intel", +	.handle_irq		= intel_pmu_handle_irq, +	.disable_all		= intel_pmu_disable_all, +	.enable_all		= intel_pmu_enable_all, +	.enable			= intel_pmu_enable_event, +	.disable		= intel_pmu_disable_event, +	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, +	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, +	.event_map		= intel_pmu_event_map, +	.raw_event		= intel_pmu_raw_event, +	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), +	.apic			= 1, +	/* +	 * Intel PMCs cannot be accessed sanely above 32 bit width, +	 * so we install an artificial 1<<31 period regardless of +	 * the generic event period: +	 */ +	.max_period		= (1ULL << 31) - 1, +	.enable_bts		= intel_pmu_enable_bts, +	.disable_bts		= intel_pmu_disable_bts, +	.get_event_constraints	= intel_get_event_constraints +}; + +static __init int intel_pmu_init(void) +{ +	union cpuid10_edx edx; +	union cpuid10_eax eax; +	unsigned int unused; +	unsigned int ebx; +	int version; + +	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { +		/* check for P6 processor family */ +	   if (boot_cpu_data.x86 == 6) { +		return p6_pmu_init(); +	   } else { +		return -ENODEV; +	   } +	} + +	/* +	 * Check whether the Architectural PerfMon supports +	 * Branch Misses Retired hw_event or not. +	 */ +	cpuid(10, &eax.full, &ebx, &unused, &edx.full); +	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) +		return -ENODEV; + +	version = eax.split.version_id; +	if (version < 2) +		x86_pmu = core_pmu; +	else +		x86_pmu = intel_pmu; + +	x86_pmu.version			= version; +	x86_pmu.num_events		= eax.split.num_events; +	x86_pmu.event_bits		= eax.split.bit_width; +	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1; + +	/* +	 * Quirk: v2 perfmon does not report fixed-purpose events, so +	 * assume at least 3 events: +	 */ +	if (version > 1) +		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + +	/* +	 * Install the hw-cache-events table: +	 */ +	switch (boot_cpu_data.x86_model) { +	case 14: /* 65 nm core solo/duo, "Yonah" */ +		pr_cont("Core events, "); +		break; + +	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ +	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ +	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ +	case 29: /* six-core 45 nm xeon "Dunnington" */ +		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, +		       sizeof(hw_cache_event_ids)); + +		x86_pmu.event_constraints = intel_core2_event_constraints; +		pr_cont("Core2 events, "); +		break; + +	case 26: /* 45 nm nehalem, "Bloomfield" */ +	case 30: /* 45 nm nehalem, "Lynnfield" */ +		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, +		       sizeof(hw_cache_event_ids)); + +		x86_pmu.event_constraints = intel_nehalem_event_constraints; +		pr_cont("Nehalem/Corei7 events, "); +		break; +	case 28: +		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, +		       sizeof(hw_cache_event_ids)); + +		x86_pmu.event_constraints = intel_gen_event_constraints; +		pr_cont("Atom events, "); +		break; + +	case 37: /* 32 nm nehalem, "Clarkdale" */ +	case 44: /* 32 nm nehalem, "Gulftown" */ +		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, +		       sizeof(hw_cache_event_ids)); + +		x86_pmu.event_constraints = intel_westmere_event_constraints; +		pr_cont("Westmere events, "); +		break; +	default: +		/* +		 * default constraints for v2 and up +		 */ +		x86_pmu.event_constraints = intel_gen_event_constraints; +		pr_cont("generic architected perfmon, "); +	} +	return 0; +} + +#else /* CONFIG_CPU_SUP_INTEL */ + +static int intel_pmu_init(void) +{ +	return 0; +} + +#endif /* CONFIG_CPU_SUP_INTEL */  |