diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 262 | 
1 files changed, 205 insertions, 57 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bda212a001..5adce1040b1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)  	return event->pmu == &pmu;  } +/* + * Event scheduler state: + * + * Assign events iterating over all events and counters, beginning + * with events with least weights first. Keep the current iterator + * state in struct sched_state. + */ +struct sched_state { +	int	weight; +	int	event;		/* event index */ +	int	counter;	/* counter index */ +	int	unassigned;	/* number of events to be assigned left */ +	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; +}; + +/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ +#define	SCHED_STATES_MAX	2 + +struct perf_sched { +	int			max_weight; +	int			max_events; +	struct event_constraint	**constraints; +	struct sched_state	state; +	int			saved_states; +	struct sched_state	saved[SCHED_STATES_MAX]; +}; + +/* + * Initialize interator that runs through all events and counters. + */ +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, +			    int num, int wmin, int wmax) +{ +	int idx; + +	memset(sched, 0, sizeof(*sched)); +	sched->max_events	= num; +	sched->max_weight	= wmax; +	sched->constraints	= c; + +	for (idx = 0; idx < num; idx++) { +		if (c[idx]->weight == wmin) +			break; +	} + +	sched->state.event	= idx;		/* start with min weight */ +	sched->state.weight	= wmin; +	sched->state.unassigned	= num; +} + +static void perf_sched_save_state(struct perf_sched *sched) +{ +	if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) +		return; + +	sched->saved[sched->saved_states] = sched->state; +	sched->saved_states++; +} + +static bool perf_sched_restore_state(struct perf_sched *sched) +{ +	if (!sched->saved_states) +		return false; + +	sched->saved_states--; +	sched->state = sched->saved[sched->saved_states]; + +	/* continue with next counter: */ +	clear_bit(sched->state.counter++, sched->state.used); + +	return true; +} + +/* + * Select a counter for the current event to schedule. Return true on + * success. + */ +static bool __perf_sched_find_counter(struct perf_sched *sched) +{ +	struct event_constraint *c; +	int idx; + +	if (!sched->state.unassigned) +		return false; + +	if (sched->state.event >= sched->max_events) +		return false; + +	c = sched->constraints[sched->state.event]; + +	/* Prefer fixed purpose counters */ +	if (x86_pmu.num_counters_fixed) { +		idx = X86_PMC_IDX_FIXED; +		for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { +			if (!__test_and_set_bit(idx, sched->state.used)) +				goto done; +		} +	} +	/* Grab the first unused counter starting with idx */ +	idx = sched->state.counter; +	for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { +		if (!__test_and_set_bit(idx, sched->state.used)) +			goto done; +	} + +	return false; + +done: +	sched->state.counter = idx; + +	if (c->overlap) +		perf_sched_save_state(sched); + +	return true; +} + +static bool perf_sched_find_counter(struct perf_sched *sched) +{ +	while (!__perf_sched_find_counter(sched)) { +		if (!perf_sched_restore_state(sched)) +			return false; +	} + +	return true; +} + +/* + * Go through all unassigned events and find the next one to schedule. + * Take events with the least weight first. Return true on success. + */ +static bool perf_sched_next_event(struct perf_sched *sched) +{ +	struct event_constraint *c; + +	if (!sched->state.unassigned || !--sched->state.unassigned) +		return false; + +	do { +		/* next event */ +		sched->state.event++; +		if (sched->state.event >= sched->max_events) { +			/* next weight */ +			sched->state.event = 0; +			sched->state.weight++; +			if (sched->state.weight > sched->max_weight) +				return false; +		} +		c = sched->constraints[sched->state.event]; +	} while (c->weight != sched->state.weight); + +	sched->state.counter = 0;	/* start with first counter */ + +	return true; +} + +/* + * Assign a counter for each event. + */ +static int perf_assign_events(struct event_constraint **constraints, int n, +			      int wmin, int wmax, int *assign) +{ +	struct perf_sched sched; + +	perf_sched_init(&sched, constraints, n, wmin, wmax); + +	do { +		if (!perf_sched_find_counter(&sched)) +			break;	/* failed */ +		if (assign) +			assign[sched.state.event] = sched.state.counter; +	} while (perf_sched_next_event(&sched)); + +	return sched.state.unassigned; +} +  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  {  	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];  	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; -	int i, j, w, wmax, num = 0; +	int i, wmin, wmax, num = 0;  	struct hw_perf_event *hwc;  	bitmap_zero(used_mask, X86_PMC_IDX_MAX); -	for (i = 0; i < n; i++) { +	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {  		c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);  		constraints[i] = c; +		wmin = min(wmin, c->weight); +		wmax = max(wmax, c->weight);  	}  	/* @@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  		if (assign)  			assign[i] = hwc->idx;  	} -	if (i == n) -		goto done; - -	/* -	 * begin slow path -	 */ - -	bitmap_zero(used_mask, X86_PMC_IDX_MAX); -	/* -	 * weight = number of possible counters -	 * -	 * 1    = most constrained, only works on one counter -	 * wmax = least constrained, works on any counter -	 * -	 * assign events to counters starting with most -	 * constrained events. -	 */ -	wmax = x86_pmu.num_counters; +	/* slow path */ +	if (i != n) +		num = perf_assign_events(constraints, n, wmin, wmax, assign);  	/* -	 * when fixed event counters are present, -	 * wmax is incremented by 1 to account -	 * for one more choice -	 */ -	if (x86_pmu.num_counters_fixed) -		wmax++; - -	for (w = 1, num = n; num && w <= wmax; w++) { -		/* for each event */ -		for (i = 0; num && i < n; i++) { -			c = constraints[i]; -			hwc = &cpuc->event_list[i]->hw; - -			if (c->weight != w) -				continue; - -			for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { -				if (!test_bit(j, used_mask)) -					break; -			} - -			if (j == X86_PMC_IDX_MAX) -				break; - -			__set_bit(j, used_mask); - -			if (assign) -				assign[i] = j; -			num--; -		} -	} -done: -	/*  	 * scheduling failed or is just a simulation,  	 * free resources if necessary  	 */ @@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)  static int __init init_hw_perf_events(void)  { +	struct x86_pmu_quirk *quirk;  	struct event_constraint *c;  	int err; @@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)  	pr_cont("%s PMU driver.\n", x86_pmu.name); -	if (x86_pmu.quirks) -		x86_pmu.quirks(); +	for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) +		quirk->func();  	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {  		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", @@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)  	unconstrained = (struct event_constraint)  		__EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, -				   0, x86_pmu.num_counters); +				   0, x86_pmu.num_counters, 0);  	if (x86_pmu.event_constraints) { +		/* +		 * event on fixed counter2 (REF_CYCLES) only works on this +		 * counter, so do not extend mask to generic counters +		 */  		for_each_event_constraint(c, x86_pmu.event_constraints) { -			if (c->cmask != X86_RAW_EVENT_MASK) +			if (c->cmask != X86_RAW_EVENT_MASK +			    || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {  				continue; +			}  			c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;  			c->weight += x86_pmu.num_counters; @@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)  	return misc;  } + +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) +{ +	cap->version		= x86_pmu.version; +	cap->num_counters_gp	= x86_pmu.num_counters; +	cap->num_counters_fixed	= x86_pmu.num_counters_fixed; +	cap->bit_width_gp	= x86_pmu.cntval_bits; +	cap->bit_width_fixed	= x86_pmu.cntval_bits; +	cap->events_mask	= (unsigned int)x86_pmu.events_maskl; +	cap->events_mask_len	= x86_pmu.events_mask_len; +} +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);  |