diff options
| -rw-r--r-- | tools/perf/Documentation/perf-trace.txt | 53 | ||||
| -rw-r--r-- | tools/perf/Makefile | 3 | ||||
| -rw-r--r-- | tools/perf/builtin-trace.c | 300 | ||||
| -rw-r--r-- | tools/perf/builtin.h | 1 | ||||
| -rw-r--r-- | tools/perf/command-list.txt | 1 | ||||
| -rw-r--r-- | tools/perf/perf.c | 1 | 
6 files changed, 358 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 00000000000..3a2ae37310a --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,53 @@ +perf-trace(1) +============= + +NAME +---- +perf-trace - strace inspired tool + +SYNOPSIS +-------- +[verse] +'perf trace' + +DESCRIPTION +----------- +This command will show the events associated with the target, initially +syscalls, but other system events like pagefaults, task lifetime events, +scheduling events, etc. + +Initially this is a live mode only tool, but eventually will work with +perf.data files like the other tools, allowing a detached 'record' from +analysis phases. + +OPTIONS +------- + +--all-cpus:: +        System-wide collection from all CPUs. + +-p:: +--pid=:: +	Record events on existing process ID (comma separated list). + +--tid=:: +        Record events on existing thread ID (comma separated list). + +--uid=:: +        Record events in threads owned by uid. Name or number. + +--no-inherit:: +	Child tasks do not inherit counters. + +--mmap-pages=:: +	Number of mmap data pages. Must be a power of two. + +--cpu:: +Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a +comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. +In per-thread mode with inheritance mode on (default), Events are captured only when +the thread executes on the designated CPUs. Default is to monitor all CPUs. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 251dcd7fb5a..6958ba4f5dc 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -102,7 +102,7 @@ ifdef PARSER_DEBUG  endif  CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) -EXTLIBS = -lpthread -lrt -lelf -lm +EXTLIBS = -lpthread -lrt -lelf -lm -laudit  ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE  ALL_LDFLAGS = $(LDFLAGS)  STRIP ?= strip @@ -442,6 +442,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o  BUILTIN_OBJS += $(OUTPUT)builtin-lock.o  BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o  BUILTIN_OBJS += $(OUTPUT)builtin-test.o +BUILTIN_OBJS += $(OUTPUT)builtin-trace.o  BUILTIN_OBJS += $(OUTPUT)builtin-inject.o  PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c new file mode 100644 index 00000000000..5fa1820cc96 --- /dev/null +++ b/tools/perf/builtin-trace.c @@ -0,0 +1,300 @@ +#include "builtin.h" +#include "util/evlist.h" +#include "util/parse-options.h" +#include "util/thread_map.h" +#include "event-parse.h" + +#include <libaudit.h> +#include <stdlib.h> + +static struct syscall_fmt { +	const char *name; +	bool	   errmsg; +	bool	   timeout; +} syscall_fmts[] = { +	{ .name	    = "futex",	  .errmsg = true, }, +	{ .name	    = "poll",	  .errmsg = true, .timeout = true, }, +	{ .name	    = "ppoll",	  .errmsg = true, .timeout = true, }, +	{ .name	    = "read",	  .errmsg = true, }, +	{ .name	    = "recvfrom", .errmsg = true, }, +	{ .name	    = "select",	  .errmsg = true, .timeout = true, }, +}; + +static int syscall_fmt__cmp(const void *name, const void *fmtp) +{ +	const struct syscall_fmt *fmt = fmtp; +	return strcmp(name, fmt->name); +} + +static struct syscall_fmt *syscall_fmt__find(const char *name) +{ +	const int nmemb = ARRAY_SIZE(syscall_fmts); +	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); +} + +struct syscall { +	struct event_format *tp_format; +	const char	    *name; +	struct syscall_fmt  *fmt; +}; + +struct trace { +	int			audit_machine; +	struct { +		int		max; +		struct syscall  *table; +	} syscalls; +	struct perf_record_opts opts; +}; + +static int trace__read_syscall_info(struct trace *trace, int id) +{ +	char tp_name[128]; +	struct syscall *sc; + +	if (id > trace->syscalls.max) { +		struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); + +		if (nsyscalls == NULL) +			return -1; + +		if (trace->syscalls.max != -1) { +			memset(nsyscalls + trace->syscalls.max + 1, 0, +			       (id - trace->syscalls.max) * sizeof(*sc)); +		} else { +			memset(nsyscalls, 0, (id + 1) * sizeof(*sc)); +		} + +		trace->syscalls.table = nsyscalls; +		trace->syscalls.max   = id; +	} + +	sc = trace->syscalls.table + id; +	sc->name = audit_syscall_to_name(id, trace->audit_machine); +	if (sc->name == NULL) +		return -1; + +	snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); + +	sc->tp_format = event_format__new("syscalls", tp_name); +	sc->fmt = syscall_fmt__find(sc->name); + +	return sc->tp_format != NULL ? 0 : -1; +} + +static size_t syscall__fprintf_args(struct syscall *sc, unsigned long *args, FILE *fp) +{ +	int i = 0; +	size_t printed = 0; + +	if (sc->tp_format != NULL) { +		struct format_field *field; + +		for (field = sc->tp_format->format.fields->next; field; field = field->next) { +			printed += fprintf(fp, "%s%s: %ld", printed ? ", " : "", +					   field->name, args[i++]); +		} +	} else { +		while (i < 6) { +			printed += fprintf(fp, "%sarg%d: %ld", printed ? ", " : "", i, args[i]); +			++i; +		} +	} + +	return printed; +} + +static int trace__run(struct trace *trace) +{ +	struct perf_evlist *evlist = perf_evlist__new(NULL, NULL); +	struct perf_evsel *evsel, *evsel_enter, *evsel_exit; +	int err = -1, i, nr_events = 0, before; + +	if (evlist == NULL) { +		printf("Not enough memory to run!\n"); +		goto out; +	} + +	evsel_enter = perf_evsel__newtp("raw_syscalls", "sys_enter", 0); +	if (evsel_enter == NULL) { +		printf("Couldn't read the raw_syscalls:sys_enter tracepoint information!\n"); +		goto out_delete_evlist; +	} + +	perf_evlist__add(evlist, evsel_enter); + +	evsel_exit = perf_evsel__newtp("raw_syscalls", "sys_exit", 1); +	if (evsel_exit == NULL) { +		printf("Couldn't read the raw_syscalls:sys_exit tracepoint information!\n"); +		goto out_delete_evlist; +	} + +	perf_evlist__add(evlist, evsel_exit); + +	err = perf_evlist__create_maps(evlist, &trace->opts.target); +	if (err < 0) { +		printf("Problems parsing the target to trace, check your options!\n"); +		goto out_delete_evlist; +	} + +	perf_evlist__config_attrs(evlist, &trace->opts); + +	err = perf_evlist__open(evlist); +	if (err < 0) { +		printf("Couldn't create the events: %s\n", strerror(errno)); +		goto out_delete_evlist; +	} + +	err = perf_evlist__mmap(evlist, UINT_MAX, false); +	if (err < 0) { +		printf("Couldn't mmap the events: %s\n", strerror(errno)); +		goto out_delete_evlist; +	} + +	perf_evlist__enable(evlist); +again: +	before = nr_events; + +	for (i = 0; i < evlist->nr_mmaps; i++) { +		union perf_event *event; + +		while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { +			const u32 type = event->header.type; +			struct syscall *sc; +			struct perf_sample sample; +			int id; + +			++nr_events; + +			switch (type) { +			case PERF_RECORD_SAMPLE: +				break; +			case PERF_RECORD_LOST: +				printf("LOST %" PRIu64 " events!\n", event->lost.lost); +				continue; +			default: +				printf("Unexpected %s event, skipping...\n", +					perf_event__name(type)); +				continue; +			} + +			err = perf_evlist__parse_sample(evlist, event, &sample); +			if (err) { +				printf("Can't parse sample, err = %d, skipping...\n", err); +				continue; +			} + +			evsel = perf_evlist__id2evsel(evlist, sample.id); +			if (evsel == NULL) { +				printf("Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); +				continue; +			} + +			id = perf_evsel__intval(evsel, &sample, "id"); +			if (id < 0) { +				printf("Invalid syscall %d id, skipping...\n", id); +				continue; +			} + +			if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) && +			    trace__read_syscall_info(trace, id)) +				continue; + +			if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL)) +				continue; + +			sc = &trace->syscalls.table[id]; + +			if (evlist->threads->map[0] == -1 || evlist->threads->nr > 1) +				printf("%d ", sample.tid); + +			if (evsel == evsel_enter) { +				void *args = perf_evsel__rawptr(evsel, &sample, "args"); + +				printf("%s(", sc->name); +				syscall__fprintf_args(sc, args, stdout); +			} else if (evsel == evsel_exit) { +				int ret = perf_evsel__intval(evsel, &sample, "ret"); + +				if (ret < 0 && sc->fmt && sc->fmt->errmsg) { +					char bf[256]; +					const char *emsg = strerror_r(-ret, bf, sizeof(bf)), +						   *e = audit_errno_to_name(-ret); + +					printf(") = -1 %s %s", e, emsg); +				} else if (ret == 0 && sc->fmt && sc->fmt->timeout) +					printf(") = 0 Timeout"); +				else +					printf(") = %d", ret); + +				putchar('\n'); +			} +		} +	} + +	if (nr_events == before) +		poll(evlist->pollfd, evlist->nr_fds, -1); + +	goto again; + +out_delete_evlist: +	perf_evlist__delete(evlist); +out: +	return err; +} + +int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) +{ +	const char * const trace_usage[] = { +		"perf trace [<options>]", +		NULL +	}; +	struct trace trace = { +		.audit_machine = audit_detect_machine(), +		.syscalls = { +			. max = -1, +		}, +		.opts = { +			.target = { +				.uid	   = UINT_MAX, +				.uses_mmap = true, +			}, +			.user_freq     = UINT_MAX, +			.user_interval = ULLONG_MAX, +			.no_delay      = true, +			.mmap_pages    = 1024, +		}, +	}; +	const struct option trace_options[] = { +	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", +		    "trace events on existing process id"), +	OPT_STRING(0, "tid", &trace.opts.target.tid, "tid", +		    "trace events on existing thread id"), +	OPT_BOOLEAN(0, "all-cpus", &trace.opts.target.system_wide, +		    "system-wide collection from all CPUs"), +	OPT_STRING(0, "cpu", &trace.opts.target.cpu_list, "cpu", +		    "list of cpus to monitor"), +	OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit, +		    "child tasks do not inherit counters"), +	OPT_UINTEGER(0, "mmap-pages", &trace.opts.mmap_pages, +		     "number of mmap data pages"), +	OPT_STRING(0, "uid", &trace.opts.target.uid_str, "user", +		   "user to profile"), +	OPT_END() +	}; +	int err; + +	argc = parse_options(argc, argv, trace_options, trace_usage, 0); +	if (argc) +		usage_with_options(trace_usage, trace_options); + +	err = perf_target__parse_uid(&trace.opts.target); +	if (err) { +		char bf[BUFSIZ]; +		perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf)); +		printf("%s", bf); +		return err; +	} + +	return trace__run(&trace); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3ea74ed1b26..08143bd854c 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -34,6 +34,7 @@ extern int cmd_kmem(int argc, const char **argv, const char *prefix);  extern int cmd_lock(int argc, const char **argv, const char *prefix);  extern int cmd_kvm(int argc, const char **argv, const char *prefix);  extern int cmd_test(int argc, const char **argv, const char *prefix); +extern int cmd_trace(int argc, const char **argv, const char *prefix);  extern int cmd_inject(int argc, const char **argv, const char *prefix);  extern int find_scripts(char **scripts_array, char **scripts_path_array); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0303ec69227..3e86bbd8c2d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -17,6 +17,7 @@ perf-report			mainporcelain common  perf-stat			mainporcelain common  perf-timechart			mainporcelain common  perf-top			mainporcelain common +perf-trace			mainporcelain common  perf-script			mainporcelain common  perf-probe			mainporcelain full  perf-kmem			mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index fb8578cfa03..3fb052c9a27 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -55,6 +55,7 @@ static struct cmd_struct commands[] = {  	{ "lock",	cmd_lock,	0 },  	{ "kvm",	cmd_kvm,	0 },  	{ "test",	cmd_test,	0 }, +	{ "trace",	cmd_trace,	0 },  	{ "inject",	cmd_inject,	0 },  };  |