diff options
Diffstat (limited to 'tools/power/x86/turbostat/turbostat.c')
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1323 | 
1 files changed, 826 insertions, 497 deletions
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 16de7ad4850..861d7719020 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -67,92 +67,119 @@ double bclk;  unsigned int show_pkg;  unsigned int show_core;  unsigned int show_cpu; +unsigned int show_pkg_only; +unsigned int show_core_only; +char *output_buffer, *outp;  int aperf_mperf_unstable;  int backwards_count;  char *progname; -int num_cpus; -cpu_set_t *cpu_present_set, *cpu_mask; -size_t cpu_present_setsize, cpu_mask_size; +cpu_set_t *cpu_present_set, *cpu_affinity_set; +size_t cpu_present_setsize, cpu_affinity_setsize; -struct counters { -	unsigned long long tsc;		/* per thread */ -	unsigned long long aperf;	/* per thread */ -	unsigned long long mperf;	/* per thread */ -	unsigned long long c1;	/* per thread (calculated) */ -	unsigned long long c3;	/* per core */ -	unsigned long long c6;	/* per core */ -	unsigned long long c7;	/* per core */ -	unsigned long long pc2;	/* per package */ -	unsigned long long pc3;	/* per package */ -	unsigned long long pc6;	/* per package */ -	unsigned long long pc7;	/* per package */ -	unsigned long long extra_msr;	/* per thread */ -	int pkg; -	int core; -	int cpu; -	struct counters *next; -}; +struct thread_data { +	unsigned long long tsc; +	unsigned long long aperf; +	unsigned long long mperf; +	unsigned long long c1;	/* derived */ +	unsigned long long extra_msr; +	unsigned int cpu_id; +	unsigned int flags; +#define CPU_IS_FIRST_THREAD_IN_CORE	0x2 +#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4 +} *thread_even, *thread_odd; + +struct core_data { +	unsigned long long c3; +	unsigned long long c6; +	unsigned long long c7; +	unsigned int core_id; +} *core_even, *core_odd; + +struct pkg_data { +	unsigned long long pc2; +	unsigned long long pc3; +	unsigned long long pc6; +	unsigned long long pc7; +	unsigned int package_id; +} *package_even, *package_odd; + +#define ODD_COUNTERS thread_odd, core_odd, package_odd +#define EVEN_COUNTERS thread_even, core_even, package_even + +#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ +	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \ +		topo.num_threads_per_core + \ +		(core_no) * topo.num_threads_per_core + (thread_no)) +#define GET_CORE(core_base, core_no, pkg_no) \ +	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) +#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) -struct counters *cnt_even; -struct counters *cnt_odd; -struct counters *cnt_delta; -struct counters *cnt_average; -struct timeval tv_even; -struct timeval tv_odd; -struct timeval tv_delta; +struct system_summary { +	struct thread_data threads; +	struct core_data cores; +	struct pkg_data packages; +} sum, average; -int mark_cpu_present(int pkg, int core, int cpu) + +struct topo_params { +	int num_packages; +	int num_cpus; +	int num_cores; +	int max_cpu_num; +	int num_cores_per_pkg; +	int num_threads_per_core; +} topo; + +struct timeval tv_even, tv_odd, tv_delta; + +void setup_all_buffers(void); + +int cpu_is_not_present(int cpu)  { -	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); -	return 0; +	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);  } -  /* - * cpu_mask_init(ncpus) - * - * allocate and clear cpu_mask - * set cpu_mask_size + * run func(thread, core, package) in topology order + * skip non-present cpus   */ -void cpu_mask_init(int ncpus) + +int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), +	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)  { -	cpu_mask = CPU_ALLOC(ncpus); -	if (cpu_mask == NULL) { -		perror("CPU_ALLOC"); -		exit(3); -	} -	cpu_mask_size = CPU_ALLOC_SIZE(ncpus); -	CPU_ZERO_S(cpu_mask_size, cpu_mask); +	int retval, pkg_no, core_no, thread_no; -	/* -	 * Allocate and initialize cpu_present_set -	 */ -	cpu_present_set = CPU_ALLOC(ncpus); -	if (cpu_present_set == NULL) { -		perror("CPU_ALLOC"); -		exit(3); -	} -	cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); -	CPU_ZERO_S(cpu_present_setsize, cpu_present_set); -	for_all_cpus(mark_cpu_present); -} +	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { +		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { +			for (thread_no = 0; thread_no < +				topo.num_threads_per_core; ++thread_no) { +				struct thread_data *t; +				struct core_data *c; +				struct pkg_data *p; -void cpu_mask_uninit() -{ -	CPU_FREE(cpu_mask); -	cpu_mask = NULL; -	cpu_mask_size = 0; -	CPU_FREE(cpu_present_set); -	cpu_present_set = NULL; -	cpu_present_setsize = 0; +				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + +				if (cpu_is_not_present(t->cpu_id)) +					continue; + +				c = GET_CORE(core_base, core_no, pkg_no); +				p = GET_PKG(pkg_base, pkg_no); + +				retval = func(t, c, p); +				if (retval) +					return retval; +			} +		} +	} +	return 0;  }  int cpu_migrate(int cpu)  { -	CPU_ZERO_S(cpu_mask_size, cpu_mask); -	CPU_SET_S(cpu, cpu_mask_size, cpu_mask); -	if (sched_setaffinity(0, cpu_mask_size, cpu_mask) == -1) +	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); +	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); +	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)  		return -1;  	else  		return 0; @@ -181,67 +208,72 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)  void print_header(void)  {  	if (show_pkg) -		fprintf(stderr, "pk"); +		outp += sprintf(outp, "pk");  	if (show_pkg) -		fprintf(stderr, " "); +		outp += sprintf(outp, " ");  	if (show_core) -		fprintf(stderr, "cor"); +		outp += sprintf(outp, "cor");  	if (show_cpu) -		fprintf(stderr, " CPU"); +		outp += sprintf(outp, " CPU");  	if (show_pkg || show_core || show_cpu) -		fprintf(stderr, " "); +		outp += sprintf(outp, " ");  	if (do_nhm_cstates) -		fprintf(stderr, "   %%c0"); +		outp += sprintf(outp, "   %%c0");  	if (has_aperf) -		fprintf(stderr, "  GHz"); -	fprintf(stderr, "  TSC"); +		outp += sprintf(outp, "  GHz"); +	outp += sprintf(outp, "  TSC");  	if (do_nhm_cstates) -		fprintf(stderr, "    %%c1"); +		outp += sprintf(outp, "    %%c1");  	if (do_nhm_cstates) -		fprintf(stderr, "    %%c3"); +		outp += sprintf(outp, "    %%c3");  	if (do_nhm_cstates) -		fprintf(stderr, "    %%c6"); +		outp += sprintf(outp, "    %%c6");  	if (do_snb_cstates) -		fprintf(stderr, "    %%c7"); +		outp += sprintf(outp, "    %%c7");  	if (do_snb_cstates) -		fprintf(stderr, "   %%pc2"); +		outp += sprintf(outp, "   %%pc2");  	if (do_nhm_cstates) -		fprintf(stderr, "   %%pc3"); +		outp += sprintf(outp, "   %%pc3");  	if (do_nhm_cstates) -		fprintf(stderr, "   %%pc6"); +		outp += sprintf(outp, "   %%pc6");  	if (do_snb_cstates) -		fprintf(stderr, "   %%pc7"); +		outp += sprintf(outp, "   %%pc7");  	if (extra_msr_offset) -		fprintf(stderr, "        MSR 0x%x ", extra_msr_offset); +		outp += sprintf(outp, "        MSR 0x%x ", extra_msr_offset); -	putc('\n', stderr); +	outp += sprintf(outp, "\n");  } -void dump_cnt(struct counters *cnt) +int dump_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p)  { -	if (!cnt) -		return; -	if (cnt->pkg) fprintf(stderr, "package: %d ", cnt->pkg); -	if (cnt->core) fprintf(stderr, "core:: %d ", cnt->core); -	if (cnt->cpu) fprintf(stderr, "CPU: %d ", cnt->cpu); -	if (cnt->tsc) fprintf(stderr, "TSC: %016llX\n", cnt->tsc); -	if (cnt->c3) fprintf(stderr, "c3: %016llX\n", cnt->c3); -	if (cnt->c6) fprintf(stderr, "c6: %016llX\n", cnt->c6); -	if (cnt->c7) fprintf(stderr, "c7: %016llX\n", cnt->c7); -	if (cnt->aperf) fprintf(stderr, "aperf: %016llX\n", cnt->aperf); -	if (cnt->pc2) fprintf(stderr, "pc2: %016llX\n", cnt->pc2); -	if (cnt->pc3) fprintf(stderr, "pc3: %016llX\n", cnt->pc3); -	if (cnt->pc6) fprintf(stderr, "pc6: %016llX\n", cnt->pc6); -	if (cnt->pc7) fprintf(stderr, "pc7: %016llX\n", cnt->pc7); -	if (cnt->extra_msr) fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr); -} +	fprintf(stderr, "t %p, c %p, p %p\n", t, c, p); -void dump_list(struct counters *cnt) -{ -	printf("dump_list 0x%p\n", cnt); +	if (t) { +		fprintf(stderr, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); +		fprintf(stderr, "TSC: %016llX\n", t->tsc); +		fprintf(stderr, "aperf: %016llX\n", t->aperf); +		fprintf(stderr, "mperf: %016llX\n", t->mperf); +		fprintf(stderr, "c1: %016llX\n", t->c1); +		fprintf(stderr, "msr0x%x: %016llX\n", +			extra_msr_offset, t->extra_msr); +	} + +	if (c) { +		fprintf(stderr, "core: %d\n", c->core_id); +		fprintf(stderr, "c3: %016llX\n", c->c3); +		fprintf(stderr, "c6: %016llX\n", c->c6); +		fprintf(stderr, "c7: %016llX\n", c->c7); +	} -	for (; cnt; cnt = cnt->next) -		dump_cnt(cnt); +	if (p) { +		fprintf(stderr, "package: %d\n", p->package_id); +		fprintf(stderr, "pc2: %016llX\n", p->pc2); +		fprintf(stderr, "pc3: %016llX\n", p->pc3); +		fprintf(stderr, "pc6: %016llX\n", p->pc6); +		fprintf(stderr, "pc7: %016llX\n", p->pc7); +	} +	return 0;  }  /* @@ -253,321 +285,389 @@ void dump_list(struct counters *cnt)   * TSC: "TSC" 3 columns %3.2   * percentage " %pc3" %6.2   */ -void print_cnt(struct counters *p) +int format_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p)  {  	double interval_float; +	 /* if showing only 1st thread in core and this isn't one, bail out */ +	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	 /* if showing only 1st thread in pkg and this isn't one, bail out */ +	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; +  	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; -	/* topology columns, print blanks on 1st (average) line */ -	if (p == cnt_average) { +	/* topo columns, print blanks on 1st (average) line */ +	if (t == &average.threads) {  		if (show_pkg) -			fprintf(stderr, "  "); +			outp += sprintf(outp, "  ");  		if (show_pkg && show_core) -			fprintf(stderr, " "); +			outp += sprintf(outp, " ");  		if (show_core) -			fprintf(stderr, "   "); +			outp += sprintf(outp, "   ");  		if (show_cpu) -			fprintf(stderr, " " "   "); +			outp += sprintf(outp, " " "   ");  	} else { -		if (show_pkg) -			fprintf(stderr, "%2d", p->pkg); +		if (show_pkg) { +			if (p) +				outp += sprintf(outp, "%2d", p->package_id); +			else +				outp += sprintf(outp, "  "); +		}  		if (show_pkg && show_core) -			fprintf(stderr, " "); -		if (show_core) -			fprintf(stderr, "%3d", p->core); +			outp += sprintf(outp, " "); +		if (show_core) { +			if (c) +				outp += sprintf(outp, "%3d", c->core_id); +			else +				outp += sprintf(outp, "   "); +		}  		if (show_cpu) -			fprintf(stderr, " %3d", p->cpu); +			outp += sprintf(outp, " %3d", t->cpu_id);  	}  	/* %c0 */  	if (do_nhm_cstates) {  		if (show_pkg || show_core || show_cpu) -			fprintf(stderr, " "); +			outp += sprintf(outp, " ");  		if (!skip_c0) -			fprintf(stderr, "%6.2f", 100.0 * p->mperf/p->tsc); +			outp += sprintf(outp, "%6.2f", 100.0 * t->mperf/t->tsc);  		else -			fprintf(stderr, "  ****"); +			outp += sprintf(outp, "  ****");  	}  	/* GHz */  	if (has_aperf) {  		if (!aperf_mperf_unstable) { -			fprintf(stderr, " %3.2f", -				1.0 * p->tsc / units * p->aperf / -				p->mperf / interval_float); +			outp += sprintf(outp, " %3.2f", +				1.0 * t->tsc / units * t->aperf / +				t->mperf / interval_float);  		} else { -			if (p->aperf > p->tsc || p->mperf > p->tsc) { -				fprintf(stderr, " ***"); +			if (t->aperf > t->tsc || t->mperf > t->tsc) { +				outp += sprintf(outp, " ***");  			} else { -				fprintf(stderr, "%3.1f*", -					1.0 * p->tsc / -					units * p->aperf / -					p->mperf / interval_float); +				outp += sprintf(outp, "%3.1f*", +					1.0 * t->tsc / +					units * t->aperf / +					t->mperf / interval_float);  			}  		}  	}  	/* TSC */ -	fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); +	outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);  	if (do_nhm_cstates) {  		if (!skip_c1) -			fprintf(stderr, " %6.2f", 100.0 * p->c1/p->tsc); +			outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc);  		else -			fprintf(stderr, "  ****"); +			outp += sprintf(outp, "  ****");  	} + +	/* print per-core data only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		goto done; +  	if (do_nhm_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->c3/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);  	if (do_nhm_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->c6/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);  	if (do_snb_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->c7/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); + +	/* print per-package data only for 1st core in package */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		goto done; +  	if (do_snb_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->pc2/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);  	if (do_nhm_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->pc3/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);  	if (do_nhm_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->pc6/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);  	if (do_snb_cstates) -		fprintf(stderr, " %6.2f", 100.0 * p->pc7/p->tsc); +		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); +done:  	if (extra_msr_offset) -		fprintf(stderr, "  0x%016llx", p->extra_msr); -	putc('\n', stderr); +		outp += sprintf(outp, "  0x%016llx", t->extra_msr); +	outp += sprintf(outp, "\n"); + +	return 0;  } -void print_counters(struct counters *counters) +void flush_stdout() +{ +	fputs(output_buffer, stdout); +	outp = output_buffer; +} +void flush_stderr() +{ +	fputs(output_buffer, stderr); +	outp = output_buffer; +} +void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)  { -	struct counters *cnt;  	static int printed; -  	if (!printed || !summary_only)  		print_header(); -	if (num_cpus > 1) -		print_cnt(cnt_average); +	if (topo.num_cpus > 1) +		format_counters(&average.threads, &average.cores, +			&average.packages);  	printed = 1;  	if (summary_only)  		return; -	for (cnt = counters; cnt != NULL; cnt = cnt->next) -		print_cnt(cnt); +	for_all_cpus(format_counters, t, c, p); +} +void +delta_package(struct pkg_data *new, struct pkg_data *old) +{ +	old->pc2 = new->pc2 - old->pc2; +	old->pc3 = new->pc3 - old->pc3; +	old->pc6 = new->pc6 - old->pc6; +	old->pc7 = new->pc7 - old->pc7;  } -#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) +void +delta_core(struct core_data *new, struct core_data *old) +{ +	old->c3 = new->c3 - old->c3; +	old->c6 = new->c6 - old->c6; +	old->c7 = new->c7 - old->c7; +} -int compute_delta(struct counters *after, -	struct counters *before, struct counters *delta) +/* + * old = new - old + */ +void +delta_thread(struct thread_data *new, struct thread_data *old, +	struct core_data *core_delta)  { -	int errors = 0; -	int perf_err = 0; +	old->tsc = new->tsc - old->tsc; -	skip_c0 = skip_c1 = 0; +	/* check for TSC < 1 Mcycles over interval */ +	if (old->tsc < (1000 * 1000)) { +		fprintf(stderr, "Insanely slow TSC rate, TSC stops in idle?\n"); +		fprintf(stderr, "You can disable all c-states by booting with \"idle=poll\"\n"); +		fprintf(stderr, "or just the deep ones with \"processor.max_cstate=1\"\n"); +		exit(-3); +	} -	for ( ; after && before && delta; -		after = after->next, before = before->next, delta = delta->next) { -		if (before->cpu != after->cpu) { -			printf("cpu configuration changed: %d != %d\n", -				before->cpu, after->cpu); -			return -1; -		} +	old->c1 = new->c1 - old->c1; -		if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { -			fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", -				before->cpu, before->tsc, after->tsc); -			errors++; -		} -		/* check for TSC < 1 Mcycles over interval */ -		if (delta->tsc < (1000 * 1000)) { -			fprintf(stderr, "Insanely slow TSC rate," -				" TSC stops in idle?\n"); -			fprintf(stderr, "You can disable all c-states" -				" by booting with \"idle=poll\"\n"); -			fprintf(stderr, "or just the deep ones with" -				" \"processor.max_cstate=1\"\n"); -			exit(-3); -		} -		if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { -			fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", -				before->cpu, before->c3, after->c3); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { -			fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", -				before->cpu, before->c6, after->c6); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { -			fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", -				before->cpu, before->c7, after->c7); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { -			fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", -				before->cpu, before->pc2, after->pc2); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { -			fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", -				before->cpu, before->pc3, after->pc3); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { -			fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", -				before->cpu, before->pc6, after->pc6); -			errors++; -		} -		if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { -			fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", -				before->cpu, before->pc7, after->pc7); -			errors++; -		} +	if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { +		old->aperf = new->aperf - old->aperf; +		old->mperf = new->mperf - old->mperf; +	} else { -		perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); -		if (perf_err) { -			fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", -				before->cpu, before->aperf, after->aperf); -		} -		perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); -		if (perf_err) { -			fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", -				before->cpu, before->mperf, after->mperf); -		} -		if (perf_err) { -			if (!aperf_mperf_unstable) { -				fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); -				fprintf(stderr, "* Frequency results do not cover entire interval *\n"); -				fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); +		if (!aperf_mperf_unstable) { +			fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); +			fprintf(stderr, "* Frequency results do not cover entire interval *\n"); +			fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); -				aperf_mperf_unstable = 1; -			} -			/* -			 * mperf delta is likely a huge "positive" number -			 * can not use it for calculating c0 time -			 */ -			skip_c0 = 1; -			skip_c1 = 1; +			aperf_mperf_unstable = 1;  		} -  		/* -		 * As mperf and tsc collection are not atomic, -		 * it is possible for mperf's non-halted cycles -		 * to exceed TSC's all cycles: show c1 = 0% in that case. +		 * mperf delta is likely a huge "positive" number +		 * can not use it for calculating c0 time  		 */ -		if (delta->mperf > delta->tsc) -			delta->c1 = 0; -		else /* normal case, derive c1 */ -			delta->c1 = delta->tsc - delta->mperf -				- delta->c3 - delta->c6 - delta->c7; +		skip_c0 = 1; +		skip_c1 = 1; +	} -		if (delta->mperf == 0) -			delta->mperf = 1;	/* divide by 0 protection */ -		/* -		 * for "extra msr", just copy the latest w/o subtracting -		 */ -		delta->extra_msr = after->extra_msr; -		if (errors) { -			fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); -			dump_cnt(before); -			fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); -			dump_cnt(after); -			errors = 0; -		} +	/* +	 * As counter collection is not atomic, +	 * it is possible for mperf's non-halted cycles + idle states +	 * to exceed TSC's all cycles: show c1 = 0% in that case. +	 */ +	if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) +		old->c1 = 0; +	else { +		/* normal case, derive c1 */ +		old->c1 = old->tsc - old->mperf - core_delta->c3 +				- core_delta->c6 - core_delta->c7; +	} + +	if (old->mperf == 0) { +		if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); +		old->mperf = 1;	/* divide by 0 protection */  	} + +	/* +	 * for "extra msr", just copy the latest w/o subtracting +	 */ +	old->extra_msr = new->extra_msr; +} + +int delta_cpu(struct thread_data *t, struct core_data *c, +	struct pkg_data *p, struct thread_data *t2, +	struct core_data *c2, struct pkg_data *p2) +{ +	/* calculate core delta only for 1st thread in core */ +	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) +		delta_core(c, c2); + +	/* always calculate thread delta */ +	delta_thread(t, t2, c2);	/* c2 is core delta */ + +	/* calculate package delta only for 1st core in package */ +	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) +		delta_package(p, p2); +  	return 0;  } -void compute_average(struct counters *delta, struct counters *avg) +void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)  { -	struct counters *sum; +	t->tsc = 0; +	t->aperf = 0; +	t->mperf = 0; +	t->c1 = 0; -	sum = calloc(1, sizeof(struct counters)); -	if (sum == NULL) { -		perror("calloc sum"); -		exit(1); -	} +	/* tells format_counters to dump all fields from this set */ +	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; -	for (; delta; delta = delta->next) { -		sum->tsc += delta->tsc; -		sum->c1 += delta->c1; -		sum->c3 += delta->c3; -		sum->c6 += delta->c6; -		sum->c7 += delta->c7; -		sum->aperf += delta->aperf; -		sum->mperf += delta->mperf; -		sum->pc2 += delta->pc2; -		sum->pc3 += delta->pc3; -		sum->pc6 += delta->pc6; -		sum->pc7 += delta->pc7; -	} -	avg->tsc = sum->tsc/num_cpus; -	avg->c1 = sum->c1/num_cpus; -	avg->c3 = sum->c3/num_cpus; -	avg->c6 = sum->c6/num_cpus; -	avg->c7 = sum->c7/num_cpus; -	avg->aperf = sum->aperf/num_cpus; -	avg->mperf = sum->mperf/num_cpus; -	avg->pc2 = sum->pc2/num_cpus; -	avg->pc3 = sum->pc3/num_cpus; -	avg->pc6 = sum->pc6/num_cpus; -	avg->pc7 = sum->pc7/num_cpus; +	c->c3 = 0; +	c->c6 = 0; +	c->c7 = 0; -	free(sum); +	p->pc2 = 0; +	p->pc3 = 0; +	p->pc6 = 0; +	p->pc7 = 0;  } +int sum_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p) +{ +	average.threads.tsc += t->tsc; +	average.threads.aperf += t->aperf; +	average.threads.mperf += t->mperf; +	average.threads.c1 += t->c1; -int get_counters(struct counters *cnt) +	/* sum per-core values only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	average.cores.c3 += c->c3; +	average.cores.c6 += c->c6; +	average.cores.c7 += c->c7; + +	/* sum per-pkg values only for 1st core in pkg */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	average.packages.pc2 += p->pc2; +	average.packages.pc3 += p->pc3; +	average.packages.pc6 += p->pc6; +	average.packages.pc7 += p->pc7; + +	return 0; +} +/* + * sum the counters for all cpus in the system + * compute the weighted average + */ +void compute_average(struct thread_data *t, struct core_data *c, +	struct pkg_data *p)  { -	for ( ; cnt; cnt = cnt->next) { +	clear_counters(&average.threads, &average.cores, &average.packages); -		if (cpu_migrate(cnt->cpu)) -			return -1; +	for_all_cpus(sum_counters, t, c, p); -		if (get_msr(cnt->cpu, MSR_TSC, &cnt->tsc)) -			return -1; +	average.threads.tsc /= topo.num_cpus; +	average.threads.aperf /= topo.num_cpus; +	average.threads.mperf /= topo.num_cpus; +	average.threads.c1 /= topo.num_cpus; -		if (has_aperf) { -			if (get_msr(cnt->cpu, MSR_APERF, &cnt->aperf)) -				return -1; -			if (get_msr(cnt->cpu, MSR_MPERF, &cnt->mperf)) -				return -1; -		} +	average.cores.c3 /= topo.num_cores; +	average.cores.c6 /= topo.num_cores; +	average.cores.c7 /= topo.num_cores; -		if (do_nhm_cstates) { -			if (get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY, &cnt->c3)) -				return -1; -			if (get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY, &cnt->c6)) -				return -1; -		} +	average.packages.pc2 /= topo.num_packages; +	average.packages.pc3 /= topo.num_packages; +	average.packages.pc6 /= topo.num_packages; +	average.packages.pc7 /= topo.num_packages; +} -		if (do_snb_cstates) -			if (get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY, &cnt->c7)) -				return -1; +static unsigned long long rdtsc(void) +{ +	unsigned int low, high; -		if (do_nhm_cstates) { -			if (get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY, &cnt->pc3)) -				return -1; -			if (get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY, &cnt->pc6)) -				return -1; -		} -		if (do_snb_cstates) { -			if (get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY, &cnt->pc2)) -				return -1; -			if (get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY, &cnt->pc7)) -				return -1; -		} -		if (extra_msr_offset) -			if (get_msr(cnt->cpu, extra_msr_offset, &cnt->extra_msr)) -				return -1; +	asm volatile("rdtsc" : "=a" (low), "=d" (high)); + +	return low | ((unsigned long long)high) << 32; +} + + +/* + * get_counters(...) + * migrate to cpu + * acquire and record local counters for that cpu + */ +int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	int cpu = t->cpu_id; + +	if (cpu_migrate(cpu)) +		return -1; + +	t->tsc = rdtsc();	/* we are running on local CPU of interest */ + +	if (has_aperf) { +		if (get_msr(cpu, MSR_APERF, &t->aperf)) +			return -3; +		if (get_msr(cpu, MSR_MPERF, &t->mperf)) +			return -4; +	} + +	if (extra_msr_offset) +		if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) +			return -5; + +	/* collect core counters only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	if (do_nhm_cstates) { +		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) +			return -6; +		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) +			return -7; +	} + +	if (do_snb_cstates) +		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) +			return -8; + +	/* collect package counters only for 1st core in package */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	if (do_nhm_cstates) { +		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) +			return -9; +		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) +			return -10; +	} +	if (do_snb_cstates) { +		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) +			return -11; +		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) +			return -12;  	}  	return 0;  } -void print_nehalem_info(void) +void print_verbose_header(void)  {  	unsigned long long msr;  	unsigned int ratio; @@ -615,143 +715,82 @@ void print_nehalem_info(void)  } -void free_counter_list(struct counters *list) +void free_all_buffers(void)  { -	struct counters *p; +	CPU_FREE(cpu_present_set); +	cpu_present_set = NULL; +	cpu_present_set = 0; -	for (p = list; p; ) { -		struct counters *free_me; +	CPU_FREE(cpu_affinity_set); +	cpu_affinity_set = NULL; +	cpu_affinity_setsize = 0; -		free_me = p; -		p = p->next; -		free(free_me); -	} -} +	free(thread_even); +	free(core_even); +	free(package_even); -void free_all_counters(void) -{ -	free_counter_list(cnt_even); -	cnt_even = NULL; +	thread_even = NULL; +	core_even = NULL; +	package_even = NULL; -	free_counter_list(cnt_odd); -	cnt_odd = NULL; +	free(thread_odd); +	free(core_odd); +	free(package_odd); -	free_counter_list(cnt_delta); -	cnt_delta = NULL; +	thread_odd = NULL; +	core_odd = NULL; +	package_odd = NULL; -	free_counter_list(cnt_average); -	cnt_average = NULL; +	free(output_buffer); +	output_buffer = NULL; +	outp = NULL;  } -void insert_counters(struct counters **list, -	struct counters *new) +/* + * cpu_is_first_sibling_in_core(cpu) + * return 1 if given CPU is 1st HT sibling in the core + */ +int cpu_is_first_sibling_in_core(int cpu)  { -	struct counters *prev; - -	/* -	 * list was empty -	 */ -	if (*list == NULL) { -		new->next = *list; -		*list = new; -		return; -	} - -	if (!summary_only) -		show_cpu = 1;	/* there is more than one CPU */ - -	/* -	 * insert on front of list. -	 * It is sorted by ascending package#, core#, cpu# -	 */ -	if (((*list)->pkg > new->pkg) || -	    (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || -	    (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { -		new->next = *list; -		*list = new; -		return; -	} - -	prev = *list; - -	while (prev->next && (prev->next->pkg < new->pkg)) { -		prev = prev->next; -		if (!summary_only) -			show_pkg = 1;	/* there is more than 1 package */ -	} - -	while (prev->next && (prev->next->pkg == new->pkg) -		&& (prev->next->core < new->core)) { -		prev = prev->next; -		if (!summary_only) -			show_core = 1;	/* there is more than 1 core */ -	} +	char path[64]; +	FILE *filep; +	int first_cpu; -	while (prev->next && (prev->next->pkg == new->pkg) -		&& (prev->next->core == new->core) -		&& (prev->next->cpu < new->cpu)) { -		prev = prev->next; +	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); +	filep = fopen(path, "r"); +	if (filep == NULL) { +		perror(path); +		exit(1);  	} - -	/* -	 * insert after "prev" -	 */ -	new->next = prev->next; -	prev->next = new; +	fscanf(filep, "%d", &first_cpu); +	fclose(filep); +	return (cpu == first_cpu);  } -void alloc_new_counters(int pkg, int core, int cpu) +/* + * cpu_is_first_core_in_package(cpu) + * return 1 if given CPU is 1st core in package + */ +int cpu_is_first_core_in_package(int cpu)  { -	struct counters *new; - -	if (verbose > 1) -		printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); - -	new = (struct counters *)calloc(1, sizeof(struct counters)); -	if (new == NULL) { -		perror("calloc"); -		exit(1); -	} -	new->pkg = pkg; -	new->core = core; -	new->cpu = cpu; -	insert_counters(&cnt_odd, new); - -	new = (struct counters *)calloc(1, -		sizeof(struct counters)); -	if (new == NULL) { -		perror("calloc"); -		exit(1); -	} -	new->pkg = pkg; -	new->core = core; -	new->cpu = cpu; -	insert_counters(&cnt_even, new); - -	new = (struct counters *)calloc(1, sizeof(struct counters)); -	if (new == NULL) { -		perror("calloc"); -		exit(1); -	} -	new->pkg = pkg; -	new->core = core; -	new->cpu = cpu; -	insert_counters(&cnt_delta, new); +	char path[64]; +	FILE *filep; +	int first_cpu; -	new = (struct counters *)calloc(1, sizeof(struct counters)); -	if (new == NULL) { -		perror("calloc"); +	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); +	filep = fopen(path, "r"); +	if (filep == NULL) { +		perror(path);  		exit(1);  	} -	new->pkg = pkg; -	new->core = core; -	new->cpu = cpu; -	cnt_average = new; +	fscanf(filep, "%d", &first_cpu); +	fclose(filep); +	return (cpu == first_cpu);  }  int get_physical_package_id(int cpu)  { -	char path[64]; +	char path[80];  	FILE *filep;  	int pkg; @@ -768,7 +807,7 @@ int get_physical_package_id(int cpu)  int get_core_id(int cpu)  { -	char path[64]; +	char path[80];  	FILE *filep;  	int core; @@ -783,14 +822,87 @@ int get_core_id(int cpu)  	return core;  } +int get_num_ht_siblings(int cpu) +{ +	char path[80]; +	FILE *filep; +	int sib1, sib2; +	int matches; +	char character; + +	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); +	filep = fopen(path, "r"); +	if (filep == NULL) { +		perror(path); +		exit(1); +	} +	/* +	 * file format: +	 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) +	 * otherwinse 1 sibling (self). +	 */ +	matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + +	fclose(filep); + +	if (matches == 3) +		return 2; +	else +		return 1; +} +  /* - * run func(pkg, core, cpu) on every cpu in /proc/stat + * run func(thread, core, package) in topology order + * skip non-present cpus   */ -int for_all_cpus(void (func)(int, int, int)) +int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, +	struct pkg_data *, struct thread_data *, struct core_data *, +	struct pkg_data *), struct thread_data *thread_base, +	struct core_data *core_base, struct pkg_data *pkg_base, +	struct thread_data *thread_base2, struct core_data *core_base2, +	struct pkg_data *pkg_base2) +{ +	int retval, pkg_no, core_no, thread_no; + +	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { +		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { +			for (thread_no = 0; thread_no < +				topo.num_threads_per_core; ++thread_no) { +				struct thread_data *t, *t2; +				struct core_data *c, *c2; +				struct pkg_data *p, *p2; + +				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + +				if (cpu_is_not_present(t->cpu_id)) +					continue; + +				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); + +				c = GET_CORE(core_base, core_no, pkg_no); +				c2 = GET_CORE(core_base2, core_no, pkg_no); + +				p = GET_PKG(pkg_base, pkg_no); +				p2 = GET_PKG(pkg_base2, pkg_no); + +				retval = func(t, c, p, t2, c2, p2); +				if (retval) +					return retval; +			} +		} +	} +	return 0; +} + +/* + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number + */ +int for_all_proc_cpus(int (func)(int))  {  	FILE *fp; -	int cpu_count; +	int cpu_num;  	int retval;  	fp = fopen(proc_stat, "r"); @@ -805,78 +917,88 @@ int for_all_cpus(void (func)(int, int, int))  		exit(1);  	} -	for (cpu_count = 0; ; cpu_count++) { -		int cpu; - -		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); +	while (1) { +		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);  		if (retval != 1)  			break; -		func(get_physical_package_id(cpu), get_core_id(cpu), cpu); +		retval = func(cpu_num); +		if (retval) { +			fclose(fp); +			return(retval); +		}  	}  	fclose(fp); -	return cpu_count; +	return 0;  }  void re_initialize(void)  { -	free_all_counters(); -	num_cpus = for_all_cpus(alloc_new_counters); -	cpu_mask_uninit(); -	cpu_mask_init(num_cpus); -	printf("turbostat: re-initialized with num_cpus %d\n", num_cpus); +	free_all_buffers(); +	setup_all_buffers(); +	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);  } -void dummy(int pkg, int core, int cpu) { return; } +  /* - * check to see if a cpu came on-line + * count_cpus() + * remember the last one seen, it will be the max   */ -int verify_num_cpus(void) +int count_cpus(int cpu)  { -	int new_num_cpus; +	if (topo.max_cpu_num < cpu) +		topo.max_cpu_num = cpu; -	new_num_cpus = for_all_cpus(dummy); - -	if (new_num_cpus != num_cpus) { -		if (verbose) -			printf("num_cpus was %d, is now  %d\n", -				num_cpus, new_num_cpus); -		return -1; -	} +	topo.num_cpus += 1; +	return 0; +} +int mark_cpu_present(int cpu) +{ +	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);  	return 0;  }  void turbostat_loop()  { +	int retval; +  restart: -	get_counters(cnt_even); +	retval = for_all_cpus(get_counters, EVEN_COUNTERS); +	if (retval) { +		re_initialize(); +		goto restart; +	}  	gettimeofday(&tv_even, (struct timezone *)NULL);  	while (1) { -		if (verify_num_cpus()) { +		if (for_all_proc_cpus(cpu_is_not_present)) {  			re_initialize();  			goto restart;  		}  		sleep(interval_sec); -		if (get_counters(cnt_odd)) { +		retval = for_all_cpus(get_counters, ODD_COUNTERS); +		if (retval) {  			re_initialize();  			goto restart;  		}  		gettimeofday(&tv_odd, (struct timezone *)NULL); -		compute_delta(cnt_odd, cnt_even, cnt_delta);  		timersub(&tv_odd, &tv_even, &tv_delta); -		compute_average(cnt_delta, cnt_average); -		print_counters(cnt_delta); +		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); +		compute_average(EVEN_COUNTERS); +		format_all_counters(EVEN_COUNTERS); +		flush_stdout();  		sleep(interval_sec); -		if (get_counters(cnt_even)) { +		retval = for_all_cpus(get_counters, EVEN_COUNTERS); +		if (retval) {  			re_initialize();  			goto restart;  		}  		gettimeofday(&tv_even, (struct timezone *)NULL); -		compute_delta(cnt_even, cnt_odd, cnt_delta);  		timersub(&tv_even, &tv_odd, &tv_delta); -		compute_average(cnt_delta, cnt_average); -		print_counters(cnt_delta); +		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); +		compute_average(ODD_COUNTERS); +		format_all_counters(ODD_COUNTERS); +		flush_stdout();  	}  } @@ -1051,6 +1173,208 @@ int open_dev_cpu_msr(int dummy1)  	return 0;  } +void topology_probe() +{ +	int i; +	int max_core_id = 0; +	int max_package_id = 0; +	int max_siblings = 0; +	struct cpu_topology { +		int core_id; +		int physical_package_id; +	} *cpus; + +	/* Initialize num_cpus, max_cpu_num */ +	topo.num_cpus = 0; +	topo.max_cpu_num = 0; +	for_all_proc_cpus(count_cpus); +	if (!summary_only && topo.num_cpus > 1) +		show_cpu = 1; + +	if (verbose > 1) +		fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + +	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology)); +	if (cpus == NULL) { +		perror("calloc cpus"); +		exit(1); +	} + +	/* +	 * Allocate and initialize cpu_present_set +	 */ +	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); +	if (cpu_present_set == NULL) { +		perror("CPU_ALLOC"); +		exit(3); +	} +	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); +	CPU_ZERO_S(cpu_present_setsize, cpu_present_set); +	for_all_proc_cpus(mark_cpu_present); + +	/* +	 * Allocate and initialize cpu_affinity_set +	 */ +	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); +	if (cpu_affinity_set == NULL) { +		perror("CPU_ALLOC"); +		exit(3); +	} +	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); +	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + + +	/* +	 * For online cpus +	 * find max_core_id, max_package_id +	 */ +	for (i = 0; i <= topo.max_cpu_num; ++i) { +		int siblings; + +		if (cpu_is_not_present(i)) { +			if (verbose > 1) +				fprintf(stderr, "cpu%d NOT PRESENT\n", i); +			continue; +		} +		cpus[i].core_id = get_core_id(i); +		if (cpus[i].core_id > max_core_id) +			max_core_id = cpus[i].core_id; + +		cpus[i].physical_package_id = get_physical_package_id(i); +		if (cpus[i].physical_package_id > max_package_id) +			max_package_id = cpus[i].physical_package_id; + +		siblings = get_num_ht_siblings(i); +		if (siblings > max_siblings) +			max_siblings = siblings; +		if (verbose > 1) +			fprintf(stderr, "cpu %d pkg %d core %d\n", +				i, cpus[i].physical_package_id, cpus[i].core_id); +	} +	topo.num_cores_per_pkg = max_core_id + 1; +	if (verbose > 1) +		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", +			max_core_id, topo.num_cores_per_pkg); +	if (!summary_only && topo.num_cores_per_pkg > 1) +		show_core = 1; + +	topo.num_packages = max_package_id + 1; +	if (verbose > 1) +		fprintf(stderr, "max_package_id %d, sizing for %d packages\n", +			max_package_id, topo.num_packages); +	if (!summary_only && topo.num_packages > 1) +		show_pkg = 1; + +	topo.num_threads_per_core = max_siblings; +	if (verbose > 1) +		fprintf(stderr, "max_siblings %d\n", max_siblings); + +	free(cpus); +} + +void +allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +{ +	int i; + +	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * +		topo.num_packages, sizeof(struct thread_data)); +	if (*t == NULL) +		goto error; + +	for (i = 0; i < topo.num_threads_per_core * +		topo.num_cores_per_pkg * topo.num_packages; i++) +		(*t)[i].cpu_id = -1; + +	*c = calloc(topo.num_cores_per_pkg * topo.num_packages, +		sizeof(struct core_data)); +	if (*c == NULL) +		goto error; + +	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) +		(*c)[i].core_id = -1; + +	*p = calloc(topo.num_packages, sizeof(struct pkg_data)); +	if (*p == NULL) +		goto error; + +	for (i = 0; i < topo.num_packages; i++) +		(*p)[i].package_id = i; + +	return; +error: +	perror("calloc counters"); +	exit(1); +} +/* + * init_counter() + * + * set cpu_id, core_num, pkg_num + * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE + * + * increment topo.num_cores when 1st core in pkg seen + */ +void init_counter(struct thread_data *thread_base, struct core_data *core_base, +	struct pkg_data *pkg_base, int thread_num, int core_num, +	int pkg_num, int cpu_id) +{ +	struct thread_data *t; +	struct core_data *c; +	struct pkg_data *p; + +	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); +	c = GET_CORE(core_base, core_num, pkg_num); +	p = GET_PKG(pkg_base, pkg_num); + +	t->cpu_id = cpu_id; +	if (thread_num == 0) { +		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; +		if (cpu_is_first_core_in_package(cpu_id)) +			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; +	} + +	c->core_id = core_num; +	p->package_id = pkg_num; +} + + +int initialize_counters(int cpu_id) +{ +	int my_thread_id, my_core_id, my_package_id; + +	my_package_id = get_physical_package_id(cpu_id); +	my_core_id = get_core_id(cpu_id); + +	if (cpu_is_first_sibling_in_core(cpu_id)) { +		my_thread_id = 0; +		topo.num_cores++; +	} else { +		my_thread_id = 1; +	} + +	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); +	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); +	return 0; +} + +void allocate_output_buffer() +{ +	output_buffer = calloc(1, (1 + topo.num_cpus) * 128); +	outp = output_buffer; +	if (outp == NULL) { +		perror("calloc"); +		exit(-1); +	} +} + +void setup_all_buffers(void) +{ +	topology_probe(); +	allocate_counters(&thread_even, &core_even, &package_even); +	allocate_counters(&thread_odd, &core_odd, &package_odd); +	allocate_output_buffer(); +	for_all_proc_cpus(initialize_counters); +}  void turbostat_init()  {  	check_cpuid(); @@ -1058,21 +1382,19 @@ void turbostat_init()  	check_dev_msr();  	check_super_user(); -	num_cpus = for_all_cpus(alloc_new_counters); -	cpu_mask_init(num_cpus); +	setup_all_buffers();  	if (verbose) -		print_nehalem_info(); +		print_verbose_header();  }  int fork_it(char **argv)  { -	int retval;  	pid_t child_pid; -	get_counters(cnt_even); -        /* clear affinity side-effect of get_counters() */ -        sched_setaffinity(0, cpu_present_setsize, cpu_present_set); +	for_all_cpus(get_counters, EVEN_COUNTERS); +	/* clear affinity side-effect of get_counters() */ +	sched_setaffinity(0, cpu_present_setsize, cpu_present_set);  	gettimeofday(&tv_even, (struct timezone *)NULL);  	child_pid = fork(); @@ -1095,14 +1417,17 @@ int fork_it(char **argv)  			exit(1);  		}  	} -	get_counters(cnt_odd); +	/* +	 * n.b. fork_it() does not check for errors from for_all_cpus() +	 * because re-starting is problematic when forking +	 */ +	for_all_cpus(get_counters, ODD_COUNTERS);  	gettimeofday(&tv_odd, (struct timezone *)NULL); -	retval = compute_delta(cnt_odd, cnt_even, cnt_delta); -  	timersub(&tv_odd, &tv_even, &tv_delta); -	compute_average(cnt_delta, cnt_average); -	if (!retval) -		print_counters(cnt_delta); +	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); +	compute_average(EVEN_COUNTERS); +	format_all_counters(EVEN_COUNTERS); +	flush_stderr();  	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); @@ -1115,8 +1440,14 @@ void cmdline(int argc, char **argv)  	progname = argv[0]; -	while ((opt = getopt(argc, argv, "+svi:M:")) != -1) { +	while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) {  		switch (opt) { +		case 'c': +			show_core_only++; +			break; +		case 'p': +			show_pkg_only++; +			break;  		case 's':  			summary_only++;  			break; @@ -1142,10 +1473,8 @@ int main(int argc, char **argv)  	cmdline(argc, argv);  	if (verbose > 1) -		fprintf(stderr, "turbostat Dec 6, 2010" +		fprintf(stderr, "turbostat v2.0 May 16, 2012"  			" - Len Brown <lenb@kernel.org>\n"); -	if (verbose > 1) -		fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");  	turbostat_init();  |