diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 267 | 
1 files changed, 165 insertions, 102 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b1fbdeecf6c..db5bdc8addf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -21,6 +21,7 @@  #include <linux/kdebug.h>  #include <linux/sched.h>  #include <linux/uaccess.h> +#include <linux/slab.h>  #include <linux/highmem.h>  #include <linux/cpu.h>  #include <linux/bitops.h> @@ -28,6 +29,7 @@  #include <asm/apic.h>  #include <asm/stacktrace.h>  #include <asm/nmi.h> +#include <asm/compat.h>  static u64 perf_event_mask __read_mostly; @@ -73,10 +75,10 @@ struct debug_store {  struct event_constraint {  	union {  		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; -		u64		idxmsk64[1]; +		u64		idxmsk64;  	}; -	int	code; -	int	cmask; +	u64	code; +	u64	cmask;  	int	weight;  }; @@ -103,7 +105,7 @@ struct cpu_hw_events {  };  #define __EVENT_CONSTRAINT(c, n, m, w) {\ -	{ .idxmsk64[0] = (n) },		\ +	{ .idxmsk64 = (n) },		\  	.code = (c),			\  	.cmask = (m),			\  	.weight = (w),			\ @@ -116,7 +118,7 @@ struct cpu_hw_events {  	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)  #define FIXED_EVENT_CONSTRAINT(c, n)	\ -	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)  #define EVENT_CONSTRAINT_END		\  	EVENT_CONSTRAINT(0, 0, 0) @@ -133,8 +135,8 @@ struct x86_pmu {  	int		(*handle_irq)(struct pt_regs *);  	void		(*disable_all)(void);  	void		(*enable_all)(void); -	void		(*enable)(struct hw_perf_event *, int); -	void		(*disable)(struct hw_perf_event *, int); +	void		(*enable)(struct perf_event *); +	void		(*disable)(struct perf_event *);  	unsigned	eventsel;  	unsigned	perfctr;  	u64		(*event_map)(int); @@ -157,6 +159,11 @@ struct x86_pmu {  	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,  						 struct perf_event *event);  	struct event_constraint *event_constraints; + +	int		(*cpu_prepare)(int cpu); +	void		(*cpu_starting)(int cpu); +	void		(*cpu_dying)(int cpu); +	void		(*cpu_dead)(int cpu);  };  static struct x86_pmu x86_pmu __read_mostly; @@ -165,8 +172,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {  	.enabled = 1,  }; -static int x86_perf_event_set_period(struct perf_event *event, -			     struct hw_perf_event *hwc, int idx); +static int x86_perf_event_set_period(struct perf_event *event);  /*   * Generalized hw caching related hw_event table, filled @@ -189,11 +195,12 @@ static u64 __read_mostly hw_cache_event_ids   * Returns the delta events processed.   */  static u64 -x86_perf_event_update(struct perf_event *event, -			struct hw_perf_event *hwc, int idx) +x86_perf_event_update(struct perf_event *event)  { +	struct hw_perf_event *hwc = &event->hw;  	int shift = 64 - x86_pmu.event_bits;  	u64 prev_raw_count, new_raw_count; +	int idx = hwc->idx;  	s64 delta;  	if (idx == X86_PMC_IDX_FIXED_BTS) @@ -293,7 +300,7 @@ static inline bool bts_available(void)  	return x86_pmu.enable_bts != NULL;  } -static inline void init_debug_store_on_cpu(int cpu) +static void init_debug_store_on_cpu(int cpu)  {  	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -305,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu)  		     (u32)((u64)(unsigned long)ds >> 32));  } -static inline void fini_debug_store_on_cpu(int cpu) +static void fini_debug_store_on_cpu(int cpu)  {  	if (!per_cpu(cpu_hw_events, cpu).ds)  		return; @@ -503,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event)  	 */  	if (attr->type == PERF_TYPE_RAW) {  		hwc->config |= x86_pmu.raw_event(attr->config); +		if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && +		    perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) +			return -EACCES;  		return 0;  	} @@ -553,9 +563,9 @@ static void x86_pmu_disable_all(void)  		if (!test_bit(idx, cpuc->active_mask))  			continue;  		rdmsrl(x86_pmu.eventsel + idx, val); -		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) +		if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))  			continue; -		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; +		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;  		wrmsrl(x86_pmu.eventsel + idx, val);  	}  } @@ -590,7 +600,7 @@ static void x86_pmu_enable_all(void)  			continue;  		val = event->hw.config; -		val |= ARCH_PERFMON_EVENTSEL0_ENABLE; +		val |= ARCH_PERFMON_EVENTSEL_ENABLE;  		wrmsrl(x86_pmu.eventsel + idx, val);  	}  } @@ -612,8 +622,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  	bitmap_zero(used_mask, X86_PMC_IDX_MAX);  	for (i = 0; i < n; i++) { -		constraints[i] = -		  x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); +		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); +		constraints[i] = c;  	}  	/* @@ -635,7 +645,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  		if (test_bit(hwc->idx, used_mask))  			break; -		set_bit(hwc->idx, used_mask); +		__set_bit(hwc->idx, used_mask);  		if (assign)  			assign[i] = hwc->idx;  	} @@ -684,7 +694,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  			if (j == X86_PMC_IDX_MAX)  				break; -			set_bit(j, used_mask); +			__set_bit(j, used_mask);  			if (assign)  				assign[i] = j; @@ -777,6 +787,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,  		hwc->last_tag == cpuc->tags[i];  } +static int x86_pmu_start(struct perf_event *event);  static void x86_pmu_stop(struct perf_event *event);  void hw_perf_enable(void) @@ -793,6 +804,7 @@ void hw_perf_enable(void)  		return;  	if (cpuc->n_added) { +		int n_running = cpuc->n_events - cpuc->n_added;  		/*  		 * apply assignment obtained either from  		 * hw_perf_group_sched_in() or x86_pmu_enable() @@ -800,8 +812,7 @@ void hw_perf_enable(void)  		 * step1: save events moving to new counters  		 * step2: reprogram moved events into new counters  		 */ -		for (i = 0; i < cpuc->n_events; i++) { - +		for (i = 0; i < n_running; i++) {  			event = cpuc->event_list[i];  			hwc = &event->hw; @@ -816,29 +827,18 @@ void hw_perf_enable(void)  				continue;  			x86_pmu_stop(event); - -			hwc->idx = -1;  		}  		for (i = 0; i < cpuc->n_events; i++) { -  			event = cpuc->event_list[i];  			hwc = &event->hw; -			if (hwc->idx == -1) { +			if (!match_prev_assignment(hwc, cpuc, i))  				x86_assign_hw_event(event, cpuc, i); -				x86_perf_event_set_period(event, hwc, hwc->idx); -			} -			/* -			 * need to mark as active because x86_pmu_disable() -			 * clear active_mask and events[] yet it preserves -			 * idx -			 */ -			set_bit(hwc->idx, cpuc->active_mask); -			cpuc->events[hwc->idx] = event; +			else if (i < n_running) +				continue; -			x86_pmu.enable(hwc, hwc->idx); -			perf_event_update_userpage(event); +			x86_pmu_start(event);  		}  		cpuc->n_added = 0;  		perf_events_lapic_init(); @@ -850,15 +850,16 @@ void hw_perf_enable(void)  	x86_pmu.enable_all();  } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)  { -	(void)checking_wrmsrl(hwc->config_base + idx, -			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); +	(void)checking_wrmsrl(hwc->config_base + hwc->idx, +			      hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);  } -static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) +static inline void x86_pmu_disable_event(struct perf_event *event)  { -	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config); +	struct hw_perf_event *hwc = &event->hw; +	(void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);  }  static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -868,12 +869,12 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);   * To be called with the event disabled in hw:   */  static int -x86_perf_event_set_period(struct perf_event *event, -			     struct hw_perf_event *hwc, int idx) +x86_perf_event_set_period(struct perf_event *event)  { +	struct hw_perf_event *hwc = &event->hw;  	s64 left = atomic64_read(&hwc->period_left);  	s64 period = hwc->sample_period; -	int err, ret = 0; +	int err, ret = 0, idx = hwc->idx;  	if (idx == X86_PMC_IDX_FIXED_BTS)  		return 0; @@ -919,11 +920,11 @@ x86_perf_event_set_period(struct perf_event *event,  	return ret;  } -static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void x86_pmu_enable_event(struct perf_event *event)  {  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);  	if (cpuc->enabled) -		__x86_pmu_enable_event(hwc, idx); +		__x86_pmu_enable_event(&event->hw);  }  /* @@ -959,34 +960,32 @@ static int x86_pmu_enable(struct perf_event *event)  	memcpy(cpuc->assign, assign, n*sizeof(int));  	cpuc->n_events = n; -	cpuc->n_added  = n - n0; +	cpuc->n_added += n - n0;  	return 0;  }  static int x86_pmu_start(struct perf_event *event)  { -	struct hw_perf_event *hwc = &event->hw; +	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); +	int idx = event->hw.idx; -	if (hwc->idx == -1) +	if (idx == -1)  		return -EAGAIN; -	x86_perf_event_set_period(event, hwc, hwc->idx); -	x86_pmu.enable(hwc, hwc->idx); +	x86_perf_event_set_period(event); +	cpuc->events[idx] = event; +	__set_bit(idx, cpuc->active_mask); +	x86_pmu.enable(event); +	perf_event_update_userpage(event);  	return 0;  }  static void x86_pmu_unthrottle(struct perf_event *event)  { -	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); -	struct hw_perf_event *hwc = &event->hw; - -	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || -				cpuc->events[hwc->idx] != event)) -		return; - -	x86_pmu.enable(hwc, hwc->idx); +	int ret = x86_pmu_start(event); +	WARN_ON_ONCE(ret);  }  void perf_event_print_debug(void) @@ -1046,18 +1045,16 @@ static void x86_pmu_stop(struct perf_event *event)  	struct hw_perf_event *hwc = &event->hw;  	int idx = hwc->idx; -	/* -	 * Must be done before we disable, otherwise the nmi handler -	 * could reenable again: -	 */ -	clear_bit(idx, cpuc->active_mask); -	x86_pmu.disable(hwc, idx); +	if (!__test_and_clear_bit(idx, cpuc->active_mask)) +		return; + +	x86_pmu.disable(event);  	/*  	 * Drain the remaining delta count out of a event  	 * that we are disabling:  	 */ -	x86_perf_event_update(event, hwc, idx); +	x86_perf_event_update(event);  	cpuc->events[idx] = NULL;  } @@ -1094,8 +1091,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  	int idx, handled = 0;  	u64 val; -	data.addr = 0; -	data.raw = NULL; +	perf_sample_data_init(&data, 0);  	cpuc = &__get_cpu_var(cpu_hw_events); @@ -1106,7 +1102,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  		event = cpuc->events[idx];  		hwc = &event->hw; -		val = x86_perf_event_update(event, hwc, idx); +		val = x86_perf_event_update(event);  		if (val & (1ULL << (x86_pmu.event_bits - 1)))  			continue; @@ -1116,11 +1112,11 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)  		handled		= 1;  		data.period	= event->hw.last_period; -		if (!x86_perf_event_set_period(event, hwc, idx)) +		if (!x86_perf_event_set_period(event))  			continue;  		if (perf_event_overflow(event, 1, &data, regs)) -			x86_pmu.disable(hwc, idx); +			x86_pmu_stop(event);  	}  	if (handled) @@ -1307,7 +1303,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,  	memcpy(cpuc->assign, assign, n0*sizeof(int));  	cpuc->n_events  = n0; -	cpuc->n_added   = n1; +	cpuc->n_added  += n1;  	ctx->nr_active += n1;  	/* @@ -1335,6 +1331,41 @@ undo:  #include "perf_event_p6.c"  #include "perf_event_intel.c" +static int __cpuinit +x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ +	unsigned int cpu = (long)hcpu; +	int ret = NOTIFY_OK; + +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_UP_PREPARE: +		if (x86_pmu.cpu_prepare) +			ret = x86_pmu.cpu_prepare(cpu); +		break; + +	case CPU_STARTING: +		if (x86_pmu.cpu_starting) +			x86_pmu.cpu_starting(cpu); +		break; + +	case CPU_DYING: +		if (x86_pmu.cpu_dying) +			x86_pmu.cpu_dying(cpu); +		break; + +	case CPU_UP_CANCELED: +	case CPU_DEAD: +		if (x86_pmu.cpu_dead) +			x86_pmu.cpu_dead(cpu); +		break; + +	default: +		break; +	} + +	return ret; +} +  static void __init pmu_check_apic(void)  {  	if (cpu_has_apic) @@ -1347,6 +1378,7 @@ static void __init pmu_check_apic(void)  void __init init_hw_perf_events(void)  { +	struct event_constraint *c;  	int err;  	pr_info("Performance Events: "); @@ -1395,6 +1427,16 @@ void __init init_hw_perf_events(void)  		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1,  				   0, x86_pmu.num_events); +	if (x86_pmu.event_constraints) { +		for_each_event_constraint(c, x86_pmu.event_constraints) { +			if (c->cmask != INTEL_ARCH_FIXED_MASK) +				continue; + +			c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; +			c->weight += x86_pmu.num_events; +		} +	} +  	pr_info("... version:                %d\n",     x86_pmu.version);  	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);  	pr_info("... generic registers:      %d\n",     x86_pmu.num_events); @@ -1402,11 +1444,13 @@ void __init init_hw_perf_events(void)  	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);  	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);  	pr_info("... event mask:             %016Lx\n", perf_event_mask); + +	perf_cpu_notifier(x86_pmu_notifier);  }  static inline void x86_pmu_read(struct perf_event *event)  { -	x86_perf_event_update(event, &event->hw, event->hw.idx); +	x86_perf_event_update(event);  }  static const struct pmu pmu = { @@ -1588,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)  	return len;  } -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +#ifdef CONFIG_COMPAT +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)  { -	unsigned long bytes; +	/* 32-bit process in 64-bit kernel. */ +	struct stack_frame_ia32 frame; +	const void __user *fp; -	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); +	if (!test_thread_flag(TIF_IA32)) +		return 0; -	return bytes == sizeof(*frame); +	fp = compat_ptr(regs->bp); +	while (entry->nr < PERF_MAX_STACK_DEPTH) { +		unsigned long bytes; +		frame.next_frame     = 0; +		frame.return_address = 0; + +		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); +		if (bytes != sizeof(frame)) +			break; + +		if (fp < compat_ptr(regs->sp)) +			break; + +		callchain_store(entry, frame.return_address); +		fp = compat_ptr(frame.next_frame); +	} +	return 1; +} +#else +static inline int +perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ +    return 0;  } +#endif  static void  perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) @@ -1611,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)  	callchain_store(entry, PERF_CONTEXT_USER);  	callchain_store(entry, regs->ip); +	if (perf_callchain_user32(regs, entry)) +		return; +  	while (entry->nr < PERF_MAX_STACK_DEPTH) { +		unsigned long bytes;  		frame.next_frame	     = NULL;  		frame.return_address = 0; -		if (!copy_stack_frame(fp, &frame)) +		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); +		if (bytes != sizeof(frame))  			break;  		if ((unsigned long)fp < regs->sp) @@ -1662,28 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)  	return entry;  } -void hw_perf_event_setup_online(int cpu) -{ -	init_debug_store_on_cpu(cpu); - -	switch (boot_cpu_data.x86_vendor) { -	case X86_VENDOR_AMD: -		amd_pmu_cpu_online(cpu); -		break; -	default: -		return; -	} -} - -void hw_perf_event_setup_offline(int cpu) +void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)  { -	init_debug_store_on_cpu(cpu); - -	switch (boot_cpu_data.x86_vendor) { -	case X86_VENDOR_AMD: -		amd_pmu_cpu_offline(cpu); -		break; -	default: -		return; -	} +	regs->ip = ip; +	/* +	 * perf_arch_fetch_caller_regs adds another call, we need to increment +	 * the skip level +	 */ +	regs->bp = rewind_frame_pointer(skip + 1); +	regs->cs = __KERNEL_CS; +	local_save_flags(regs->flags);  }  |