diff options
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 68 | ||||
| -rw-r--r-- | kernel/trace/Makefile | 4 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 5 | ||||
| -rw-r--r-- | kernel/trace/kmemtrace.c | 529 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 40 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 127 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 90 | ||||
| -rw-r--r-- | kernel/trace/trace_boot.c | 185 | ||||
| -rw-r--r-- | kernel/trace/trace_clock.c | 5 | ||||
| -rw-r--r-- | kernel/trace/trace_entries.h | 94 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 27 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 299 | ||||
| -rw-r--r-- | kernel/trace/trace_events_filter.c | 27 | ||||
| -rw-r--r-- | kernel/trace/trace_export.c | 8 | ||||
| -rw-r--r-- | kernel/trace/trace_functions.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_functions_graph.c | 3 | ||||
| -rw-r--r-- | kernel/trace/trace_irqsoff.c | 3 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 379 | ||||
| -rw-r--r-- | kernel/trace/trace_ksym.c | 508 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 69 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_selftest.c | 87 | ||||
| -rw-r--r-- | kernel/trace/trace_stack.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_sysprof.c | 329 | 
25 files changed, 570 insertions, 2342 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8b1797c4545..c7683fd8a03 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -194,15 +194,6 @@ config PREEMPT_TRACER  	  enabled. This option and the irqs-off timing option can be  	  used together or separately.) -config SYSPROF_TRACER -	bool "Sysprof Tracer" -	depends on X86 -	select GENERIC_TRACER -	select CONTEXT_SWITCH_TRACER -	help -	  This tracer provides the trace needed by the 'Sysprof' userspace -	  tool. -  config SCHED_TRACER  	bool "Scheduling Latency Tracer"  	select GENERIC_TRACER @@ -229,23 +220,6 @@ config FTRACE_SYSCALLS  	help  	  Basic tracer to catch the syscall entry and exit events. -config BOOT_TRACER -	bool "Trace boot initcalls" -	select GENERIC_TRACER -	select CONTEXT_SWITCH_TRACER -	help -	  This tracer helps developers to optimize boot times: it records -	  the timings of the initcalls and traces key events and the identity -	  of tasks that can cause boot delays, such as context-switches. - -	  Its aim is to be parsed by the scripts/bootgraph.pl tool to -	  produce pretty graphics about boot inefficiencies, giving a visual -	  representation of the delays during initcalls - but the raw -	  /debug/tracing/trace text output is readable too. - -	  You must pass in initcall_debug and ftrace=initcall to the kernel -	  command line to enable this on bootup. -  config TRACE_BRANCH_PROFILING  	bool  	select GENERIC_TRACER @@ -325,28 +299,6 @@ config BRANCH_TRACER  	  Say N if unsure. -config KSYM_TRACER -	bool "Trace read and write access on kernel memory locations" -	depends on HAVE_HW_BREAKPOINT -	select TRACING -	help -	  This tracer helps find read and write operations on any given kernel -	  symbol i.e. /proc/kallsyms. - -config PROFILE_KSYM_TRACER -	bool "Profile all kernel memory accesses on 'watched' variables" -	depends on KSYM_TRACER -	help -	  This tracer profiles kernel accesses on variables watched through the -	  ksym tracer ftrace plugin. Depending upon the hardware, all read -	  and write operations on kernel variables can be monitored for -	  accesses. - -	  The results will be displayed in: -	  /debugfs/tracing/profile_ksym - -	  Say N if unsure. -  config STACK_TRACER  	bool "Trace max stack"  	depends on HAVE_FUNCTION_TRACER @@ -371,26 +323,6 @@ config STACK_TRACER  	  Say N if unsure. -config KMEMTRACE -	bool "Trace SLAB allocations" -	select GENERIC_TRACER -	help -	  kmemtrace provides tracing for slab allocator functions, such as -	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected -	  data is then fed to the userspace application in order to analyse -	  allocation hotspots, internal fragmentation and so on, making it -	  possible to see how well an allocator performs, as well as debug -	  and profile kernel code. - -	  This requires an userspace application to use. See -	  Documentation/trace/kmemtrace.txt for more information. - -	  Saying Y will make the kernel somewhat larger and slower. However, -	  if you disable kmemtrace at run-time or boot-time, the performance -	  impact is minimal (depending on the arch the kernel is built for). - -	  If unsure, say N. -  config WORKQUEUE_TRACER  	bool "Trace workqueues"  	select GENERIC_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 4215530b490..53f338190b2 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o  obj-$(CONFIG_TRACING) += trace_stat.o  obj-$(CONFIG_TRACING) += trace_printk.o  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o -obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o  obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o  obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o @@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o  obj-$(CONFIG_NOP_TRACER) += trace_nop.o  obj-$(CONFIG_STACK_TRACER) += trace_stack.o  obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o -obj-$(CONFIG_BOOT_TRACER) += trace_boot.o  obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o  obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o -obj-$(CONFIG_KMEMTRACE) += kmemtrace.o  obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o  obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o  ifeq ($(CONFIG_BLOCK),y) @@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o  endif  obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o  obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o -obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o  obj-$(CONFIG_EVENT_TRACING) += power-traces.o  ifeq ($(CONFIG_TRACING),y)  obj-$(CONFIG_KGDB_KDB) += trace_kdb.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6d2cb14f944..0d88ce9b9fb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)  	struct hlist_head *hhd;  	struct hlist_node *n;  	unsigned long key; -	int resched;  	key = hash_long(ip, FTRACE_HASH_BITS); @@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)  	 * period. This syncs the hash iteration and freeing of items  	 * on the hash. rcu_read_lock is too dangerous here.  	 */ -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	hlist_for_each_entry_rcu(entry, n, hhd, node) {  		if (entry->ip == ip)  			entry->ops->func(ip, parent_ip, &entry->data);  	} -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  }  static struct ftrace_ops trace_probe_ops __read_mostly = diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c deleted file mode 100644 index bbfc1bb1660..00000000000 --- a/kernel/trace/kmemtrace.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Memory allocator tracing - * - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi> - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> - */ - -#include <linux/tracepoint.h> -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/dcache.h> -#include <linux/fs.h> - -#include <linux/kmemtrace.h> - -#include "trace_output.h" -#include "trace.h" - -/* Select an alternative, minimalistic output than the original one */ -#define TRACE_KMEM_OPT_MINIMAL	0x1 - -static struct tracer_opt kmem_opts[] = { -	/* Default disable the minimalistic output */ -	{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) }, -	{ } -}; - -static struct tracer_flags kmem_tracer_flags = { -	.val			= 0, -	.opts			= kmem_opts -}; - -static struct trace_array *kmemtrace_array; - -/* Trace allocations */ -static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, -				   unsigned long call_site, -				   const void *ptr, -				   size_t bytes_req, -				   size_t bytes_alloc, -				   gfp_t gfp_flags, -				   int node) -{ -	struct ftrace_event_call *call = &event_kmem_alloc; -	struct trace_array *tr = kmemtrace_array; -	struct kmemtrace_alloc_entry *entry; -	struct ring_buffer_event *event; - -	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); -	if (!event) -		return; - -	entry = ring_buffer_event_data(event); -	tracing_generic_entry_update(&entry->ent, 0, 0); - -	entry->ent.type		= TRACE_KMEM_ALLOC; -	entry->type_id		= type_id; -	entry->call_site	= call_site; -	entry->ptr		= ptr; -	entry->bytes_req	= bytes_req; -	entry->bytes_alloc	= bytes_alloc; -	entry->gfp_flags	= gfp_flags; -	entry->node		= node; - -	if (!filter_check_discard(call, entry, tr->buffer, event)) -		ring_buffer_unlock_commit(tr->buffer, event); - -	trace_wake_up(); -} - -static inline void kmemtrace_free(enum kmemtrace_type_id type_id, -				  unsigned long call_site, -				  const void *ptr) -{ -	struct ftrace_event_call *call = &event_kmem_free; -	struct trace_array *tr = kmemtrace_array; -	struct kmemtrace_free_entry *entry; -	struct ring_buffer_event *event; - -	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry)); -	if (!event) -		return; -	entry	= ring_buffer_event_data(event); -	tracing_generic_entry_update(&entry->ent, 0, 0); - -	entry->ent.type		= TRACE_KMEM_FREE; -	entry->type_id		= type_id; -	entry->call_site	= call_site; -	entry->ptr		= ptr; - -	if (!filter_check_discard(call, entry, tr->buffer, event)) -		ring_buffer_unlock_commit(tr->buffer, event); - -	trace_wake_up(); -} - -static void kmemtrace_kmalloc(void *ignore, -			      unsigned long call_site, -			      const void *ptr, -			      size_t bytes_req, -			      size_t bytes_alloc, -			      gfp_t gfp_flags) -{ -	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, -			bytes_req, bytes_alloc, gfp_flags, -1); -} - -static void kmemtrace_kmem_cache_alloc(void *ignore, -				       unsigned long call_site, -				       const void *ptr, -				       size_t bytes_req, -				       size_t bytes_alloc, -				       gfp_t gfp_flags) -{ -	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, -			bytes_req, bytes_alloc, gfp_flags, -1); -} - -static void kmemtrace_kmalloc_node(void *ignore, -				   unsigned long call_site, -				   const void *ptr, -				   size_t bytes_req, -				   size_t bytes_alloc, -				   gfp_t gfp_flags, -				   int node) -{ -	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr, -			bytes_req, bytes_alloc, gfp_flags, node); -} - -static void kmemtrace_kmem_cache_alloc_node(void *ignore, -					    unsigned long call_site, -					    const void *ptr, -					    size_t bytes_req, -					    size_t bytes_alloc, -					    gfp_t gfp_flags, -					    int node) -{ -	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr, -			bytes_req, bytes_alloc, gfp_flags, node); -} - -static void -kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr) -{ -	kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr); -} - -static void kmemtrace_kmem_cache_free(void *ignore, -				      unsigned long call_site, const void *ptr) -{ -	kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr); -} - -static int kmemtrace_start_probes(void) -{ -	int err; - -	err = register_trace_kmalloc(kmemtrace_kmalloc, NULL); -	if (err) -		return err; -	err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL); -	if (err) -		return err; -	err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL); -	if (err) -		return err; -	err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL); -	if (err) -		return err; -	err = register_trace_kfree(kmemtrace_kfree, NULL); -	if (err) -		return err; -	err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL); - -	return err; -} - -static void kmemtrace_stop_probes(void) -{ -	unregister_trace_kmalloc(kmemtrace_kmalloc, NULL); -	unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL); -	unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL); -	unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL); -	unregister_trace_kfree(kmemtrace_kfree, NULL); -	unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL); -} - -static int kmem_trace_init(struct trace_array *tr) -{ -	kmemtrace_array = tr; - -	tracing_reset_online_cpus(tr); - -	kmemtrace_start_probes(); - -	return 0; -} - -static void kmem_trace_reset(struct trace_array *tr) -{ -	kmemtrace_stop_probes(); -} - -static void kmemtrace_headers(struct seq_file *s) -{ -	/* Don't need headers for the original kmemtrace output */ -	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) -		return; - -	seq_printf(s, "#\n"); -	seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     " -			"      POINTER         NODE    CALLER\n"); -	seq_printf(s, "# FREE   |      |     |       |       " -			"       |   |            |        |\n"); -	seq_printf(s, "# |\n\n"); -} - -/* - * The following functions give the original output from kmemtrace, - * plus the origin CPU, since reordering occurs in-kernel now. - */ - -#define KMEMTRACE_USER_ALLOC	0 -#define KMEMTRACE_USER_FREE	1 - -struct kmemtrace_user_event { -	u8			event_id; -	u8			type_id; -	u16			event_size; -	u32			cpu; -	u64			timestamp; -	unsigned long		call_site; -	unsigned long		ptr; -}; - -struct kmemtrace_user_event_alloc { -	size_t			bytes_req; -	size_t			bytes_alloc; -	unsigned		gfp_flags; -	int			node; -}; - -static enum print_line_t -kmemtrace_print_alloc(struct trace_iterator *iter, int flags, -		      struct trace_event *event) -{ -	struct trace_seq *s = &iter->seq; -	struct kmemtrace_alloc_entry *entry; -	int ret; - -	trace_assign_type(entry, iter->ent); - -	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu " -	    "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", -	    entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr, -	    (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc, -	    (unsigned long)entry->gfp_flags, entry->node); - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free(struct trace_iterator *iter, int flags, -		     struct trace_event *event) -{ -	struct trace_seq *s = &iter->seq; -	struct kmemtrace_free_entry *entry; -	int ret; - -	trace_assign_type(entry, iter->ent); - -	ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n", -			       entry->type_id, (void *)entry->call_site, -			       (unsigned long)entry->ptr); - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags, -			   struct trace_event *event) -{ -	struct trace_seq *s = &iter->seq; -	struct kmemtrace_alloc_entry *entry; -	struct kmemtrace_user_event *ev; -	struct kmemtrace_user_event_alloc *ev_alloc; - -	trace_assign_type(entry, iter->ent); - -	ev = trace_seq_reserve(s, sizeof(*ev)); -	if (!ev) -		return TRACE_TYPE_PARTIAL_LINE; - -	ev->event_id		= KMEMTRACE_USER_ALLOC; -	ev->type_id		= entry->type_id; -	ev->event_size		= sizeof(*ev) + sizeof(*ev_alloc); -	ev->cpu			= iter->cpu; -	ev->timestamp		= iter->ts; -	ev->call_site		= entry->call_site; -	ev->ptr			= (unsigned long)entry->ptr; - -	ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc)); -	if (!ev_alloc) -		return TRACE_TYPE_PARTIAL_LINE; - -	ev_alloc->bytes_req	= entry->bytes_req; -	ev_alloc->bytes_alloc	= entry->bytes_alloc; -	ev_alloc->gfp_flags	= entry->gfp_flags; -	ev_alloc->node		= entry->node; - -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free_user(struct trace_iterator *iter, int flags, -			  struct trace_event *event) -{ -	struct trace_seq *s = &iter->seq; -	struct kmemtrace_free_entry *entry; -	struct kmemtrace_user_event *ev; - -	trace_assign_type(entry, iter->ent); - -	ev = trace_seq_reserve(s, sizeof(*ev)); -	if (!ev) -		return TRACE_TYPE_PARTIAL_LINE; - -	ev->event_id		= KMEMTRACE_USER_FREE; -	ev->type_id		= entry->type_id; -	ev->event_size		= sizeof(*ev); -	ev->cpu			= iter->cpu; -	ev->timestamp		= iter->ts; -	ev->call_site		= entry->call_site; -	ev->ptr			= (unsigned long)entry->ptr; - -	return TRACE_TYPE_HANDLED; -} - -/* The two other following provide a more minimalistic output */ -static enum print_line_t -kmemtrace_print_alloc_compress(struct trace_iterator *iter) -{ -	struct kmemtrace_alloc_entry *entry; -	struct trace_seq *s = &iter->seq; -	int ret; - -	trace_assign_type(entry, iter->ent); - -	/* Alloc entry */ -	ret = trace_seq_printf(s, "  +      "); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Type */ -	switch (entry->type_id) { -	case KMEMTRACE_TYPE_KMALLOC: -		ret = trace_seq_printf(s, "K   "); -		break; -	case KMEMTRACE_TYPE_CACHE: -		ret = trace_seq_printf(s, "C   "); -		break; -	case KMEMTRACE_TYPE_PAGES: -		ret = trace_seq_printf(s, "P   "); -		break; -	default: -		ret = trace_seq_printf(s, "?   "); -	} - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Requested */ -	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Allocated */ -	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Flags -	 * TODO: would be better to see the name of the GFP flag names -	 */ -	ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Pointer to allocated */ -	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Node and call site*/ -	ret = trace_seq_printf(s, "%4d   %pf\n", entry->node, -						 (void *)entry->call_site); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -kmemtrace_print_free_compress(struct trace_iterator *iter) -{ -	struct kmemtrace_free_entry *entry; -	struct trace_seq *s = &iter->seq; -	int ret; - -	trace_assign_type(entry, iter->ent); - -	/* Free entry */ -	ret = trace_seq_printf(s, "  -      "); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Type */ -	switch (entry->type_id) { -	case KMEMTRACE_TYPE_KMALLOC: -		ret = trace_seq_printf(s, "K     "); -		break; -	case KMEMTRACE_TYPE_CACHE: -		ret = trace_seq_printf(s, "C     "); -		break; -	case KMEMTRACE_TYPE_PAGES: -		ret = trace_seq_printf(s, "P     "); -		break; -	default: -		ret = trace_seq_printf(s, "?     "); -	} - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Skip requested/allocated/flags */ -	ret = trace_seq_printf(s, "                       "); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Pointer to allocated */ -	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	/* Skip node and print call site*/ -	ret = trace_seq_printf(s, "       %pf\n", (void *)entry->call_site); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter) -{ -	struct trace_entry *entry = iter->ent; - -	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)) -		return TRACE_TYPE_UNHANDLED; - -	switch (entry->type) { -	case TRACE_KMEM_ALLOC: -		return kmemtrace_print_alloc_compress(iter); -	case TRACE_KMEM_FREE: -		return kmemtrace_print_free_compress(iter); -	default: -		return TRACE_TYPE_UNHANDLED; -	} -} - -static struct trace_event_functions kmem_trace_alloc_funcs = { -	.trace			= kmemtrace_print_alloc, -	.binary			= kmemtrace_print_alloc_user, -}; - -static struct trace_event kmem_trace_alloc = { -	.type			= TRACE_KMEM_ALLOC, -	.funcs			= &kmem_trace_alloc_funcs, -}; - -static struct trace_event_functions kmem_trace_free_funcs = { -	.trace			= kmemtrace_print_free, -	.binary			= kmemtrace_print_free_user, -}; - -static struct trace_event kmem_trace_free = { -	.type			= TRACE_KMEM_FREE, -	.funcs			= &kmem_trace_free_funcs, -}; - -static struct tracer kmem_tracer __read_mostly = { -	.name			= "kmemtrace", -	.init			= kmem_trace_init, -	.reset			= kmem_trace_reset, -	.print_line		= kmemtrace_print_line, -	.print_header		= kmemtrace_headers, -	.flags			= &kmem_tracer_flags -}; - -void kmemtrace_init(void) -{ -	/* earliest opportunity to start kmem tracing */ -} - -static int __init init_kmem_tracer(void) -{ -	if (!register_ftrace_event(&kmem_trace_alloc)) { -		pr_warning("Warning: could not register kmem events\n"); -		return 1; -	} - -	if (!register_ftrace_event(&kmem_trace_free)) { -		pr_warning("Warning: could not register kmem events\n"); -		return 1; -	} - -	if (register_tracer(&kmem_tracer) != 0) { -		pr_warning("Warning: could not register the kmem tracer\n"); -		return 1; -	} - -	return 0; -} -device_initcall(init_kmem_tracer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1da7b6ea8b8..3632ce87674 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)   */  struct ring_buffer_per_cpu {  	int				cpu; +	atomic_t			record_disabled;  	struct ring_buffer		*buffer;  	spinlock_t			reader_lock;	/* serialize readers */  	arch_spinlock_t			lock; @@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {  	unsigned long			read;  	u64				write_stamp;  	u64				read_stamp; -	atomic_t			record_disabled;  };  struct ring_buffer { @@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)  #endif -static DEFINE_PER_CPU(int, rb_need_resched); -  /**   * ring_buffer_lock_reserve - reserve a part of the buffer   * @buffer: the ring buffer to reserve from @@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)  {  	struct ring_buffer_per_cpu *cpu_buffer;  	struct ring_buffer_event *event; -	int cpu, resched; +	int cpu;  	if (ring_buffer_flags != RB_BUFFERS_ON)  		return NULL;  	/* If we are tracing schedule, we don't want to recurse */ -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	if (atomic_read(&buffer->record_disabled))  		goto out_nocheck; @@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)  	if (!event)  		goto out; -	/* -	 * Need to store resched state on this cpu. -	 * Only the first needs to. -	 */ - -	if (preempt_count() == 1) -		per_cpu(rb_need_resched, cpu) = resched; -  	return event;   out:  	trace_recursive_unlock();   out_nocheck: -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  	return NULL;  }  EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); @@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,  	trace_recursive_unlock(); -	/* -	 * Only the last preempt count needs to restore preemption. -	 */ -	if (preempt_count() == 1) -		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); -	else -		preempt_enable_no_resched_notrace(); +	preempt_enable_notrace();  	return 0;  } @@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,  	trace_recursive_unlock(); -	/* -	 * Only the last preempt count needs to restore preemption. -	 */ -	if (preempt_count() == 1) -		ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); -	else -		preempt_enable_no_resched_notrace(); +	preempt_enable_notrace();  }  EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); @@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,  	struct ring_buffer_event *event;  	void *body;  	int ret = -EBUSY; -	int cpu, resched; +	int cpu;  	if (ring_buffer_flags != RB_BUFFERS_ON)  		return -EBUSY; -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	if (atomic_read(&buffer->record_disabled))  		goto out; @@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,  	ret = 0;   out: -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  	return ret;  } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d6736b93dc2..ed1032d6f81 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);  /* trace_flags holds trace_options default values */  unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |  	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | -	TRACE_ITER_GRAPH_TIME; +	TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;  static int trace_stop_count;  static DEFINE_SPINLOCK(tracing_start_lock); @@ -425,6 +425,7 @@ static const char *trace_options[] = {  	"latency-format",  	"sleep-time",  	"graph-time", +	"record-cmd",  	NULL  }; @@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  		return;  	WARN_ON_ONCE(!irqs_disabled()); +	if (!current_trace->use_max_tr) { +		WARN_ON_ONCE(1); +		return; +	}  	arch_spin_lock(&ftrace_max_lock);  	tr->buffer = max_tr.buffer; @@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)  		return;  	WARN_ON_ONCE(!irqs_disabled()); +	if (!current_trace->use_max_tr) { +		WARN_ON_ONCE(1); +		return; +	} +  	arch_spin_lock(&ftrace_max_lock);  	ftrace_disable_cpu(); @@ -726,7 +736,7 @@ __acquires(kernel_lock)  		return -1;  	} -	if (strlen(type->name) > MAX_TRACER_SIZE) { +	if (strlen(type->name) >= MAX_TRACER_SIZE) {  		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);  		return -1;  	} @@ -1328,61 +1338,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)  #endif /* CONFIG_STACKTRACE */ -static void -ftrace_trace_special(void *__tr, -		     unsigned long arg1, unsigned long arg2, unsigned long arg3, -		     int pc) -{ -	struct ftrace_event_call *call = &event_special; -	struct ring_buffer_event *event; -	struct trace_array *tr = __tr; -	struct ring_buffer *buffer = tr->buffer; -	struct special_entry *entry; - -	event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL, -					  sizeof(*entry), 0, pc); -	if (!event) -		return; -	entry	= ring_buffer_event_data(event); -	entry->arg1			= arg1; -	entry->arg2			= arg2; -	entry->arg3			= arg3; - -	if (!filter_check_discard(call, entry, buffer, event)) -		trace_buffer_unlock_commit(buffer, event, 0, pc); -} - -void -__trace_special(void *__tr, void *__data, -		unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -	ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); -} - -void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -	struct trace_array *tr = &global_trace; -	struct trace_array_cpu *data; -	unsigned long flags; -	int cpu; -	int pc; - -	if (tracing_disabled) -		return; - -	pc = preempt_count(); -	local_irq_save(flags); -	cpu = raw_smp_processor_id(); -	data = tr->data[cpu]; - -	if (likely(atomic_inc_return(&data->disabled) == 1)) -		ftrace_trace_special(tr, arg1, arg2, arg3, pc); - -	atomic_dec(&data->disabled); -	local_irq_restore(flags); -} -  /**   * trace_vbprintk - write binary msg to tracing buffer   * @@ -1401,7 +1356,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)  	struct bprint_entry *entry;  	unsigned long flags;  	int disable; -	int resched;  	int cpu, len = 0, size, pc;  	if (unlikely(tracing_selftest_running || tracing_disabled)) @@ -1411,7 +1365,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)  	pause_graph_tracing();  	pc = preempt_count(); -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	cpu = raw_smp_processor_id();  	data = tr->data[cpu]; @@ -1449,7 +1403,7 @@ out_unlock:  out:  	atomic_dec_return(&data->disabled); -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  	unpause_graph_tracing();  	return len; @@ -2386,6 +2340,7 @@ static const struct file_operations show_traces_fops = {  	.open		= show_traces_open,  	.read		= seq_read,  	.release	= seq_release, +	.llseek		= seq_lseek,  };  /* @@ -2479,6 +2434,7 @@ static const struct file_operations tracing_cpumask_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_cpumask_read,  	.write		= tracing_cpumask_write, +	.llseek		= generic_file_llseek,  };  static int tracing_trace_options_show(struct seq_file *m, void *v) @@ -2554,6 +2510,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)  		trace_flags |= mask;  	else  		trace_flags &= ~mask; + +	if (mask == TRACE_ITER_RECORD_CMD) +		trace_event_enable_cmd_record(enabled);  }  static ssize_t @@ -2645,6 +2604,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,  static const struct file_operations tracing_readme_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_readme_read, +	.llseek		= generic_file_llseek,  };  static ssize_t @@ -2695,6 +2655,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,  static const struct file_operations tracing_saved_cmdlines_fops = {      .open       = tracing_open_generic,      .read       = tracing_saved_cmdlines_read, +    .llseek	= generic_file_llseek,  };  static ssize_t @@ -2790,6 +2751,9 @@ static int tracing_resize_ring_buffer(unsigned long size)  	if (ret < 0)  		return ret; +	if (!current_trace->use_max_tr) +		goto out; +  	ret = ring_buffer_resize(max_tr.buffer, size);  	if (ret < 0) {  		int r; @@ -2817,11 +2781,14 @@ static int tracing_resize_ring_buffer(unsigned long size)  		return ret;  	} +	max_tr.entries = size; + out:  	global_trace.entries = size;  	return ret;  } +  /**   * tracing_update_buffers - used by tracing facility to expand ring buffers   * @@ -2882,12 +2849,26 @@ static int tracing_set_tracer(const char *buf)  	trace_branch_disable();  	if (current_trace && current_trace->reset)  		current_trace->reset(tr); - +	if (current_trace && current_trace->use_max_tr) { +		/* +		 * We don't free the ring buffer. instead, resize it because +		 * The max_tr ring buffer has some state (e.g. ring->clock) and +		 * we want preserve it. +		 */ +		ring_buffer_resize(max_tr.buffer, 1); +		max_tr.entries = 1; +	}  	destroy_trace_option_files(topts);  	current_trace = t;  	topts = create_trace_option_files(current_trace); +	if (current_trace->use_max_tr) { +		ret = ring_buffer_resize(max_tr.buffer, global_trace.entries); +		if (ret < 0) +			goto out; +		max_tr.entries = global_trace.entries; +	}  	if (t->init) {  		ret = tracer_init(t, tr); @@ -3024,6 +3005,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)  	if (iter->trace->pipe_open)  		iter->trace->pipe_open(iter); +	nonseekable_open(inode, filp);  out:  	mutex_unlock(&trace_types_lock);  	return ret; @@ -3469,7 +3451,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,  	}  	tracing_start(); -	max_tr.entries = global_trace.entries;  	mutex_unlock(&trace_types_lock);  	return cnt; @@ -3582,18 +3563,21 @@ static const struct file_operations tracing_max_lat_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_max_lat_read,  	.write		= tracing_max_lat_write, +	.llseek		= generic_file_llseek,  };  static const struct file_operations tracing_ctrl_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_ctrl_read,  	.write		= tracing_ctrl_write, +	.llseek		= generic_file_llseek,  };  static const struct file_operations set_tracer_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_set_trace_read,  	.write		= tracing_set_trace_write, +	.llseek		= generic_file_llseek,  };  static const struct file_operations tracing_pipe_fops = { @@ -3602,17 +3586,20 @@ static const struct file_operations tracing_pipe_fops = {  	.read		= tracing_read_pipe,  	.splice_read	= tracing_splice_read_pipe,  	.release	= tracing_release_pipe, +	.llseek		= no_llseek,  };  static const struct file_operations tracing_entries_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_entries_read,  	.write		= tracing_entries_write, +	.llseek		= generic_file_llseek,  };  static const struct file_operations tracing_mark_fops = {  	.open		= tracing_open_generic,  	.write		= tracing_mark_write, +	.llseek		= generic_file_llseek,  };  static const struct file_operations trace_clock_fops = { @@ -3918,6 +3905,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,  static const struct file_operations tracing_stats_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_stats_read, +	.llseek		= generic_file_llseek,  };  #ifdef CONFIG_DYNAMIC_FTRACE @@ -3954,6 +3942,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,  static const struct file_operations tracing_dyn_info_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_read_dyn_info, +	.llseek		= generic_file_llseek,  };  #endif @@ -4107,6 +4096,7 @@ static const struct file_operations trace_options_fops = {  	.open = tracing_open_generic,  	.read = trace_options_read,  	.write = trace_options_write, +	.llseek	= generic_file_llseek,  };  static ssize_t @@ -4158,6 +4148,7 @@ static const struct file_operations trace_options_core_fops = {  	.open = tracing_open_generic,  	.read = trace_options_core_read,  	.write = trace_options_core_write, +	.llseek = generic_file_llseek,  };  struct dentry *trace_create_file(const char *name, @@ -4347,9 +4338,6 @@ static __init int tracer_init_debugfs(void)  	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,  			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);  #endif -#ifdef CONFIG_SYSPROF_TRACER -	init_tracer_sysprof_debugfs(d_tracer); -#endif  	create_trace_options_dir(); @@ -4576,16 +4564,14 @@ __init static int tracer_alloc_buffers(void)  #ifdef CONFIG_TRACER_MAX_TRACE -	max_tr.buffer = ring_buffer_alloc(ring_buf_size, -					     TRACE_BUFFER_FLAGS); +	max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);  	if (!max_tr.buffer) {  		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");  		WARN_ON(1);  		ring_buffer_free(global_trace.buffer);  		goto out_free_cpumask;  	} -	max_tr.entries = ring_buffer_size(max_tr.buffer); -	WARN_ON(max_tr.entries != global_trace.entries); +	max_tr.entries = 1;  #endif  	/* Allocate the first page for all buffers */ @@ -4598,9 +4584,6 @@ __init static int tracer_alloc_buffers(void)  	register_tracer(&nop_trace);  	current_trace = &nop_trace; -#ifdef CONFIG_BOOT_TRACER -	register_tracer(&boot_tracer); -#endif  	/* All seems OK, enable tracing */  	tracing_disabled = 0; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0605fc00c17..d39b3c5454a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,10 +9,7 @@  #include <linux/mmiotrace.h>  #include <linux/tracepoint.h>  #include <linux/ftrace.h> -#include <trace/boot.h> -#include <linux/kmemtrace.h>  #include <linux/hw_breakpoint.h> -  #include <linux/trace_seq.h>  #include <linux/ftrace_event.h> @@ -25,30 +22,17 @@ enum trace_type {  	TRACE_STACK,  	TRACE_PRINT,  	TRACE_BPRINT, -	TRACE_SPECIAL,  	TRACE_MMIO_RW,  	TRACE_MMIO_MAP,  	TRACE_BRANCH, -	TRACE_BOOT_CALL, -	TRACE_BOOT_RET,  	TRACE_GRAPH_RET,  	TRACE_GRAPH_ENT,  	TRACE_USER_STACK, -	TRACE_KMEM_ALLOC, -	TRACE_KMEM_FREE,  	TRACE_BLK, -	TRACE_KSYM,  	__TRACE_LAST_TYPE,  }; -enum kmemtrace_type_id { -	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */ -	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */ -	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */ -}; - -extern struct tracer boot_tracer;  #undef __field  #define __field(type, item)		type	item; @@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);  		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\  		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\  		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\ -		IF_ASSIGN(var, ent, struct special_entry, 0);		\  		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\  			  TRACE_MMIO_RW);				\  		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\  			  TRACE_MMIO_MAP);				\ -		IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ -		IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\  		IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \  		IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry,	\  			  TRACE_GRAPH_ENT);		\  		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\  			  TRACE_GRAPH_RET);		\ -		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\ -			  TRACE_KMEM_ALLOC);	\ -		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\ -			  TRACE_KMEM_FREE);	\ -		IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\  		__ftrace_bad_type();					\  	} while (0) @@ -298,6 +274,7 @@ struct tracer {  	struct tracer		*next;  	int			print_max;  	struct tracer_flags	*flags; +	int			use_max_tr;  }; @@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,  				 const struct file_operations *fops);  struct dentry *tracing_init_dentry(void); -void init_tracer_sysprof_debugfs(struct dentry *d_tracer);  struct ring_buffer_event; @@ -363,11 +339,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,  				struct task_struct *wakee,  				struct task_struct *cur,  				unsigned long flags, int pc); -void trace_special(struct trace_array *tr, -		   struct trace_array_cpu *data, -		   unsigned long arg1, -		   unsigned long arg2, -		   unsigned long arg3, int pc);  void trace_function(struct trace_array *tr,  		    unsigned long ip,  		    unsigned long parent_ip, @@ -398,8 +369,6 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask;  #define for_each_tracing_cpu(cpu)	\  	for_each_cpu(cpu, tracing_buffer_mask) -extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); -  extern unsigned long nsecs_to_usecs(unsigned long nsecs);  extern unsigned long tracing_thresh; @@ -469,12 +438,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,  					 struct trace_array *tr);  extern int trace_selftest_startup_sched_switch(struct tracer *trace,  					       struct trace_array *tr); -extern int trace_selftest_startup_sysprof(struct tracer *trace, -					       struct trace_array *tr);  extern int trace_selftest_startup_branch(struct tracer *trace,  					 struct trace_array *tr); -extern int trace_selftest_startup_ksym(struct tracer *trace, -					 struct trace_array *tr);  #endif /* CONFIG_FTRACE_STARTUP_TEST */  extern void *head_page(struct trace_array_cpu *data); @@ -636,6 +601,7 @@ enum trace_iterator_flags {  	TRACE_ITER_LATENCY_FMT		= 0x20000,  	TRACE_ITER_SLEEP_TIME		= 0x40000,  	TRACE_ITER_GRAPH_TIME		= 0x80000, +	TRACE_ITER_RECORD_CMD		= 0x100000,  };  /* @@ -647,54 +613,6 @@ enum trace_iterator_flags {  extern struct tracer nop_trace; -/** - * ftrace_preempt_disable - disable preemption scheduler safe - * - * When tracing can happen inside the scheduler, there exists - * cases that the tracing might happen before the need_resched - * flag is checked. If this happens and the tracer calls - * preempt_enable (after a disable), a schedule might take place - * causing an infinite recursion. - * - * To prevent this, we read the need_resched flag before - * disabling preemption. When we want to enable preemption we - * check the flag, if it is set, then we call preempt_enable_no_resched. - * Otherwise, we call preempt_enable. - * - * The rational for doing the above is that if need_resched is set - * and we have yet to reschedule, we are either in an atomic location - * (where we do not need to check for scheduling) or we are inside - * the scheduler and do not want to resched. - */ -static inline int ftrace_preempt_disable(void) -{ -	int resched; - -	resched = need_resched(); -	preempt_disable_notrace(); - -	return resched; -} - -/** - * ftrace_preempt_enable - enable preemption scheduler safe - * @resched: the return value from ftrace_preempt_disable - * - * This is a scheduler safe way to enable preemption and not miss - * any preemption checks. The disabled saved the state of preemption. - * If resched is set, then we are either inside an atomic or - * are inside the scheduler (we would have already scheduled - * otherwise). In this case, we do not want to call normal - * preempt_enable, but preempt_enable_no_resched instead. - */ -static inline void ftrace_preempt_enable(int resched) -{ -	if (resched) -		preempt_enable_no_resched_notrace(); -	else -		preempt_enable_notrace(); -} -  #ifdef CONFIG_BRANCH_TRACER  extern int enable_branch_tracing(struct trace_array *tr);  extern void disable_branch_tracing(void); @@ -785,6 +703,8 @@ struct filter_pred {  	int 			pop_n;  }; +extern struct list_head ftrace_common_fields; +  extern enum regex_type  filter_parse_regex(char *buff, int len, char **search, int *not);  extern void print_event_filter(struct ftrace_event_call *call, @@ -814,6 +734,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,  	return 0;  } +extern void trace_event_enable_cmd_record(bool enable); +  extern struct mutex event_mutex;  extern struct list_head ftrace_events; diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c deleted file mode 100644 index c21d5f3956a..00000000000 --- a/kernel/trace/trace_boot.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * ring buffer based initcalls tracer - * - * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> - * - */ - -#include <linux/init.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/kallsyms.h> -#include <linux/time.h> - -#include "trace.h" -#include "trace_output.h" - -static struct trace_array *boot_trace; -static bool pre_initcalls_finished; - -/* Tells the boot tracer that the pre_smp_initcalls are finished. - * So we are ready . - * It doesn't enable sched events tracing however. - * You have to call enable_boot_trace to do so. - */ -void start_boot_trace(void) -{ -	pre_initcalls_finished = true; -} - -void enable_boot_trace(void) -{ -	if (boot_trace && pre_initcalls_finished) -		tracing_start_sched_switch_record(); -} - -void disable_boot_trace(void) -{ -	if (boot_trace && pre_initcalls_finished) -		tracing_stop_sched_switch_record(); -} - -static int boot_trace_init(struct trace_array *tr) -{ -	boot_trace = tr; - -	if (!tr) -		return 0; - -	tracing_reset_online_cpus(tr); - -	tracing_sched_switch_assign_trace(tr); -	return 0; -} - -static enum print_line_t -initcall_call_print_line(struct trace_iterator *iter) -{ -	struct trace_entry *entry = iter->ent; -	struct trace_seq *s = &iter->seq; -	struct trace_boot_call *field; -	struct boot_trace_call *call; -	u64 ts; -	unsigned long nsec_rem; -	int ret; - -	trace_assign_type(field, entry); -	call = &field->boot_call; -	ts = iter->ts; -	nsec_rem = do_div(ts, NSEC_PER_SEC); - -	ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n", -			(unsigned long)ts, nsec_rem, call->func, call->caller); - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -	else -		return TRACE_TYPE_HANDLED; -} - -static enum print_line_t -initcall_ret_print_line(struct trace_iterator *iter) -{ -	struct trace_entry *entry = iter->ent; -	struct trace_seq *s = &iter->seq; -	struct trace_boot_ret *field; -	struct boot_trace_ret *init_ret; -	u64 ts; -	unsigned long nsec_rem; -	int ret; - -	trace_assign_type(field, entry); -	init_ret = &field->boot_ret; -	ts = iter->ts; -	nsec_rem = do_div(ts, NSEC_PER_SEC); - -	ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " -			"returned %d after %llu msecs\n", -			(unsigned long) ts, -			nsec_rem, -			init_ret->func, init_ret->result, init_ret->duration); - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; -	else -		return TRACE_TYPE_HANDLED; -} - -static enum print_line_t initcall_print_line(struct trace_iterator *iter) -{ -	struct trace_entry *entry = iter->ent; - -	switch (entry->type) { -	case TRACE_BOOT_CALL: -		return initcall_call_print_line(iter); -	case TRACE_BOOT_RET: -		return initcall_ret_print_line(iter); -	default: -		return TRACE_TYPE_UNHANDLED; -	} -} - -struct tracer boot_tracer __read_mostly = -{ -	.name		= "initcall", -	.init		= boot_trace_init, -	.reset		= tracing_reset_online_cpus, -	.print_line	= initcall_print_line, -}; - -void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) -{ -	struct ftrace_event_call *call = &event_boot_call; -	struct ring_buffer_event *event; -	struct ring_buffer *buffer; -	struct trace_boot_call *entry; -	struct trace_array *tr = boot_trace; - -	if (!tr || !pre_initcalls_finished) -		return; - -	/* Get its name now since this function could -	 * disappear because it is in the .init section. -	 */ -	sprint_symbol(bt->func, (unsigned long)fn); -	preempt_disable(); - -	buffer = tr->buffer; -	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL, -					  sizeof(*entry), 0, 0); -	if (!event) -		goto out; -	entry	= ring_buffer_event_data(event); -	entry->boot_call = *bt; -	if (!filter_check_discard(call, entry, buffer, event)) -		trace_buffer_unlock_commit(buffer, event, 0, 0); - out: -	preempt_enable(); -} - -void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) -{ -	struct ftrace_event_call *call = &event_boot_ret; -	struct ring_buffer_event *event; -	struct ring_buffer *buffer; -	struct trace_boot_ret *entry; -	struct trace_array *tr = boot_trace; - -	if (!tr || !pre_initcalls_finished) -		return; - -	sprint_symbol(bt->func, (unsigned long)fn); -	preempt_disable(); - -	buffer = tr->buffer; -	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET, -					  sizeof(*entry), 0, 0); -	if (!event) -		goto out; -	entry	= ring_buffer_event_data(event); -	entry->boot_ret = *bt; -	if (!filter_check_discard(call, entry, buffer, event)) -		trace_buffer_unlock_commit(buffer, event, 0, 0); - out: -	preempt_enable(); -} diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 9d589d8dcd1..52fda6c04ac 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -32,16 +32,15 @@  u64 notrace trace_clock_local(void)  {  	u64 clock; -	int resched;  	/*  	 * sched_clock() is an architecture implemented, fast, scalable,  	 * lockless clock. It is not guaranteed to be coherent across  	 * CPUs, nor across CPU idle events.  	 */ -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	clock = sched_clock(); -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  	return clock;  } diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index dc008c1240d..e3dfecaf13e 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,  );  /* - * Special (free-form) trace entry: - */ -FTRACE_ENTRY(special, special_entry, - -	TRACE_SPECIAL, - -	F_STRUCT( -		__field(	unsigned long,	arg1	) -		__field(	unsigned long,	arg2	) -		__field(	unsigned long,	arg3	) -	), - -	F_printk("(%08lx) (%08lx) (%08lx)", -		 __entry->arg1, __entry->arg2, __entry->arg3) -); - -/*   * Stack-trace entry:   */ @@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,  		 __entry->map_id, __entry->opcode)  ); -FTRACE_ENTRY(boot_call, trace_boot_call, - -	TRACE_BOOT_CALL, - -	F_STRUCT( -		__field_struct(	struct boot_trace_call,	boot_call	) -		__field_desc(	pid_t,	boot_call,	caller		) -		__array_desc(	char,	boot_call,	func,	KSYM_SYMBOL_LEN) -	), - -	F_printk("%d  %s", __entry->caller, __entry->func) -); - -FTRACE_ENTRY(boot_ret, trace_boot_ret, - -	TRACE_BOOT_RET, - -	F_STRUCT( -		__field_struct(	struct boot_trace_ret,	boot_ret	) -		__array_desc(	char,	boot_ret,	func,	KSYM_SYMBOL_LEN) -		__field_desc(	int,	boot_ret,	result		) -		__field_desc(	unsigned long, boot_ret, duration	) -	), - -	F_printk("%s %d %lx", -		 __entry->func, __entry->result, __entry->duration) -);  #define TRACE_FUNC_SIZE 30  #define TRACE_FILE_SIZE 20 @@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,  		 __entry->func, __entry->file, __entry->correct)  ); -FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry, - -	TRACE_KMEM_ALLOC, - -	F_STRUCT( -		__field(	enum kmemtrace_type_id,	type_id		) -		__field(	unsigned long,		call_site	) -		__field(	const void *,		ptr		) -		__field(	size_t,			bytes_req	) -		__field(	size_t,			bytes_alloc	) -		__field(	gfp_t,			gfp_flags	) -		__field(	int,			node		) -	), - -	F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi" -		 " flags:%x node:%d", -		 __entry->type_id, __entry->call_site, __entry->ptr, -		 __entry->bytes_req, __entry->bytes_alloc, -		 __entry->gfp_flags, __entry->node) -); - -FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, - -	TRACE_KMEM_FREE, - -	F_STRUCT( -		__field(	enum kmemtrace_type_id,	type_id		) -		__field(	unsigned long,		call_site	) -		__field(	const void *,		ptr		) -	), - -	F_printk("type:%u call_site:%lx ptr:%p", -		 __entry->type_id, __entry->call_site, __entry->ptr) -); - -FTRACE_ENTRY(ksym_trace, ksym_trace_entry, - -	TRACE_KSYM, - -	F_STRUCT( -		__field(	unsigned long,	ip			  ) -		__field(	unsigned char,	type			  ) -		__array(	char	     ,	cmd,	   TASK_COMM_LEN  ) -		__field(	unsigned long,  addr			  ) -	), - -	F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", -		(void *)__entry->ip, (unsigned int)__entry->type, -		(void *)__entry->addr,  __entry->cmd) -); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 8a2b73f7c06..000e6e85b44 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,8 +9,6 @@  #include <linux/kprobes.h>  #include "trace.h" -EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); -  static char *perf_trace_buf[4];  /* @@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,  		}  	} -	if (tp_event->class->reg) -		ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); -	else -		ret = tracepoint_probe_register(tp_event->name, -						tp_event->class->perf_probe, -						tp_event); - +	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);  	if (ret)  		goto fail; @@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)  	mutex_lock(&event_mutex);  	list_for_each_entry(tp_event, &ftrace_events, list) {  		if (tp_event->event.type == event_id && -		    tp_event->class && -		    (tp_event->class->perf_probe || -		     tp_event->class->reg) && +		    tp_event->class && tp_event->class->reg &&  		    try_module_get(tp_event->mod)) {  			ret = perf_trace_event_init(tp_event, p_event);  			break; @@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)  	if (--tp_event->perf_refcount > 0)  		goto out; -	if (tp_event->class->reg) -		tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); -	else -		tracepoint_probe_unregister(tp_event->name, -					    tp_event->class->perf_probe, -					    tp_event); +	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);  	/* -	 * Ensure our callback won't be called anymore. See -	 * tracepoint_probe_unregister() and __DO_TRACE(). +	 * Ensure our callback won't be called anymore. The buffers +	 * will be freed after that.  	 */ -	synchronize_sched(); +	tracepoint_synchronize_unregister();  	free_percpu(tp_event->perf_events);  	tp_event->perf_events = NULL; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 53cffc0b080..09b4fa6e4d3 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -28,6 +28,7 @@  DEFINE_MUTEX(event_mutex);  LIST_HEAD(ftrace_events); +LIST_HEAD(ftrace_common_fields);  struct list_head *  trace_get_fields(struct ftrace_event_call *event_call) @@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)  	return event_call->class->get_fields(event_call);  } -int trace_define_field(struct ftrace_event_call *call, const char *type, -		       const char *name, int offset, int size, int is_signed, -		       int filter_type) +static int __trace_define_field(struct list_head *head, const char *type, +				const char *name, int offset, int size, +				int is_signed, int filter_type)  {  	struct ftrace_event_field *field; -	struct list_head *head; - -	if (WARN_ON(!call->class)) -		return 0;  	field = kzalloc(sizeof(*field), GFP_KERNEL);  	if (!field) @@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,  	field->size = size;  	field->is_signed = is_signed; -	head = trace_get_fields(call);  	list_add(&field->link, head);  	return 0; @@ -80,17 +76,32 @@ err:  	return -ENOMEM;  } + +int trace_define_field(struct ftrace_event_call *call, const char *type, +		       const char *name, int offset, int size, int is_signed, +		       int filter_type) +{ +	struct list_head *head; + +	if (WARN_ON(!call->class)) +		return 0; + +	head = trace_get_fields(call); +	return __trace_define_field(head, type, name, offset, size, +				    is_signed, filter_type); +}  EXPORT_SYMBOL_GPL(trace_define_field);  #define __common_field(type, item)					\ -	ret = trace_define_field(call, #type, "common_" #item,		\ -				 offsetof(typeof(ent), item),		\ -				 sizeof(ent.item),			\ -				 is_signed_type(type), FILTER_OTHER);	\ +	ret = __trace_define_field(&ftrace_common_fields, #type,	\ +				   "common_" #item,			\ +				   offsetof(typeof(ent), item),		\ +				   sizeof(ent.item),			\ +				   is_signed_type(type), FILTER_OTHER);	\  	if (ret)							\  		return ret; -static int trace_define_common_fields(struct ftrace_event_call *call) +static int trace_define_common_fields(void)  {  	int ret;  	struct trace_entry ent; @@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)  }  EXPORT_SYMBOL_GPL(trace_event_raw_init); +int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type) +{ +	switch (type) { +	case TRACE_REG_REGISTER: +		return tracepoint_probe_register(call->name, +						 call->class->probe, +						 call); +	case TRACE_REG_UNREGISTER: +		tracepoint_probe_unregister(call->name, +					    call->class->probe, +					    call); +		return 0; + +#ifdef CONFIG_PERF_EVENTS +	case TRACE_REG_PERF_REGISTER: +		return tracepoint_probe_register(call->name, +						 call->class->perf_probe, +						 call); +	case TRACE_REG_PERF_UNREGISTER: +		tracepoint_probe_unregister(call->name, +					    call->class->perf_probe, +					    call); +		return 0; +#endif +	} +	return 0; +} +EXPORT_SYMBOL_GPL(ftrace_event_reg); + +void trace_event_enable_cmd_record(bool enable) +{ +	struct ftrace_event_call *call; + +	mutex_lock(&event_mutex); +	list_for_each_entry(call, &ftrace_events, list) { +		if (!(call->flags & TRACE_EVENT_FL_ENABLED)) +			continue; + +		if (enable) { +			tracing_start_cmdline_record(); +			call->flags |= TRACE_EVENT_FL_RECORDED_CMD; +		} else { +			tracing_stop_cmdline_record(); +			call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; +		} +	} +	mutex_unlock(&event_mutex); +} +  static int ftrace_event_enable_disable(struct ftrace_event_call *call,  					int enable)  { @@ -139,24 +199,20 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,  	case 0:  		if (call->flags & TRACE_EVENT_FL_ENABLED) {  			call->flags &= ~TRACE_EVENT_FL_ENABLED; -			tracing_stop_cmdline_record(); -			if (call->class->reg) -				call->class->reg(call, TRACE_REG_UNREGISTER); -			else -				tracepoint_probe_unregister(call->name, -							    call->class->probe, -							    call); +			if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) { +				tracing_stop_cmdline_record(); +				call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD; +			} +			call->class->reg(call, TRACE_REG_UNREGISTER);  		}  		break;  	case 1:  		if (!(call->flags & TRACE_EVENT_FL_ENABLED)) { -			tracing_start_cmdline_record(); -			if (call->class->reg) -				ret = call->class->reg(call, TRACE_REG_REGISTER); -			else -				ret = tracepoint_probe_register(call->name, -								call->class->probe, -								call); +			if (trace_flags & TRACE_ITER_RECORD_CMD) { +				tracing_start_cmdline_record(); +				call->flags |= TRACE_EVENT_FL_RECORDED_CMD; +			} +			ret = call->class->reg(call, TRACE_REG_REGISTER);  			if (ret) {  				tracing_stop_cmdline_record();  				pr_info("event trace: Could not enable event " @@ -194,8 +250,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,  	mutex_lock(&event_mutex);  	list_for_each_entry(call, &ftrace_events, list) { -		if (!call->name || !call->class || -		    (!call->class->probe && !call->class->reg)) +		if (!call->name || !call->class || !call->class->reg)  			continue;  		if (match && @@ -321,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)  		 * The ftrace subsystem is for showing formats only.  		 * They can not be enabled or disabled via the event files.  		 */ -		if (call->class && (call->class->probe || call->class->reg)) +		if (call->class && call->class->reg)  			return call;  	} @@ -474,8 +529,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,  	mutex_lock(&event_mutex);  	list_for_each_entry(call, &ftrace_events, list) { -		if (!call->name || !call->class || -		    (!call->class->probe && !call->class->reg)) +		if (!call->name || !call->class || !call->class->reg)  			continue;  		if (system && strcmp(call->class->system, system) != 0) @@ -544,32 +598,10 @@ out:  	return ret;  } -static ssize_t -event_format_read(struct file *filp, char __user *ubuf, size_t cnt, -		  loff_t *ppos) +static void print_event_fields(struct trace_seq *s, struct list_head *head)  { -	struct ftrace_event_call *call = filp->private_data;  	struct ftrace_event_field *field; -	struct list_head *head; -	struct trace_seq *s; -	int common_field_count = 5; -	char *buf; -	int r = 0; - -	if (*ppos) -		return 0; - -	s = kmalloc(sizeof(*s), GFP_KERNEL); -	if (!s) -		return -ENOMEM; - -	trace_seq_init(s); - -	trace_seq_printf(s, "name: %s\n", call->name); -	trace_seq_printf(s, "ID: %d\n", call->event.type); -	trace_seq_printf(s, "format:\n"); -	head = trace_get_fields(call);  	list_for_each_entry_reverse(field, head, link) {  		/*  		 * Smartly shows the array type(except dynamic array). @@ -584,29 +616,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,  			array_descriptor = NULL;  		if (!array_descriptor) { -			r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" +			trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"  					"\tsize:%u;\tsigned:%d;\n",  					field->type, field->name, field->offset,  					field->size, !!field->is_signed);  		} else { -			r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" +			trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"  					"\tsize:%u;\tsigned:%d;\n",  					(int)(array_descriptor - field->type),  					field->type, field->name,  					array_descriptor, field->offset,  					field->size, !!field->is_signed);  		} +	} +} -		if (--common_field_count == 0) -			r = trace_seq_printf(s, "\n"); +static ssize_t +event_format_read(struct file *filp, char __user *ubuf, size_t cnt, +		  loff_t *ppos) +{ +	struct ftrace_event_call *call = filp->private_data; +	struct list_head *head; +	struct trace_seq *s; +	char *buf; +	int r; -		if (!r) -			break; -	} +	if (*ppos) +		return 0; + +	s = kmalloc(sizeof(*s), GFP_KERNEL); +	if (!s) +		return -ENOMEM; + +	trace_seq_init(s); + +	trace_seq_printf(s, "name: %s\n", call->name); +	trace_seq_printf(s, "ID: %d\n", call->event.type); +	trace_seq_printf(s, "format:\n"); + +	/* print common fields */ +	print_event_fields(s, &ftrace_common_fields); -	if (r) -		r = trace_seq_printf(s, "\nprint fmt: %s\n", -				call->print_fmt); +	trace_seq_putc(s, '\n'); + +	/* print event specific fields */ +	head = trace_get_fields(call); +	print_event_fields(s, head); + +	r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);  	if (!r) {  		/* @@ -963,35 +1020,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,  		return -1;  	} -	if (call->class->probe || call->class->reg) +	if (call->class->reg)  		trace_create_file("enable", 0644, call->dir, call,  				  enable);  #ifdef CONFIG_PERF_EVENTS -	if (call->event.type && (call->class->perf_probe || call->class->reg)) +	if (call->event.type && call->class->reg)  		trace_create_file("id", 0444, call->dir, call,  		 		  id);  #endif -	if (call->class->define_fields) { -		/* -		 * Other events may have the same class. Only update -		 * the fields if they are not already defined. -		 */ -		head = trace_get_fields(call); -		if (list_empty(head)) { -			ret = trace_define_common_fields(call); -			if (!ret) -				ret = call->class->define_fields(call); -			if (ret < 0) { -				pr_warning("Could not initialize trace point" -					   " events/%s\n", call->name); -				return ret; -			} +	/* +	 * Other events may have the same class. Only update +	 * the fields if they are not already defined. +	 */ +	head = trace_get_fields(call); +	if (list_empty(head)) { +		ret = call->class->define_fields(call); +		if (ret < 0) { +			pr_warning("Could not initialize trace point" +				   " events/%s\n", call->name); +			return ret;  		} -		trace_create_file("filter", 0644, call->dir, call, -				  filter);  	} +	trace_create_file("filter", 0644, call->dir, call, +			  filter);  	trace_create_file("format", 0444, call->dir, call,  			  format); @@ -999,11 +1052,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,  	return 0;  } -static int __trace_add_event_call(struct ftrace_event_call *call) +static int +__trace_add_event_call(struct ftrace_event_call *call, struct module *mod, +		       const struct file_operations *id, +		       const struct file_operations *enable, +		       const struct file_operations *filter, +		       const struct file_operations *format)  {  	struct dentry *d_events;  	int ret; +	/* The linker may leave blanks */  	if (!call->name)  		return -EINVAL; @@ -1011,8 +1070,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)  		ret = call->class->raw_init(call);  		if (ret < 0) {  			if (ret != -ENOSYS) -				pr_warning("Could not initialize trace " -				"events/%s\n", call->name); +				pr_warning("Could not initialize trace events/%s\n", +					   call->name);  			return ret;  		}  	} @@ -1021,11 +1080,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)  	if (!d_events)  		return -ENOENT; -	ret = event_create_dir(call, d_events, &ftrace_event_id_fops, -				&ftrace_enable_fops, &ftrace_event_filter_fops, -				&ftrace_event_format_fops); +	ret = event_create_dir(call, d_events, id, enable, filter, format);  	if (!ret)  		list_add(&call->list, &ftrace_events); +	call->mod = mod;  	return ret;  } @@ -1035,7 +1093,10 @@ int trace_add_event_call(struct ftrace_event_call *call)  {  	int ret;  	mutex_lock(&event_mutex); -	ret = __trace_add_event_call(call); +	ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops, +				     &ftrace_enable_fops, +				     &ftrace_event_filter_fops, +				     &ftrace_event_format_fops);  	mutex_unlock(&event_mutex);  	return ret;  } @@ -1152,8 +1213,6 @@ static void trace_module_add_events(struct module *mod)  {  	struct ftrace_module_file_ops *file_ops = NULL;  	struct ftrace_event_call *call, *start, *end; -	struct dentry *d_events; -	int ret;  	start = mod->trace_events;  	end = mod->trace_events + mod->num_trace_events; @@ -1161,38 +1220,14 @@ static void trace_module_add_events(struct module *mod)  	if (start == end)  		return; -	d_events = event_trace_events_dir(); -	if (!d_events) +	file_ops = trace_create_file_ops(mod); +	if (!file_ops)  		return;  	for_each_event(call, start, end) { -		/* The linker may leave blanks */ -		if (!call->name) -			continue; -		if (call->class->raw_init) { -			ret = call->class->raw_init(call); -			if (ret < 0) { -				if (ret != -ENOSYS) -					pr_warning("Could not initialize trace " -					"point events/%s\n", call->name); -				continue; -			} -		} -		/* -		 * This module has events, create file ops for this module -		 * if not already done. -		 */ -		if (!file_ops) { -			file_ops = trace_create_file_ops(mod); -			if (!file_ops) -				return; -		} -		call->mod = mod; -		ret = event_create_dir(call, d_events, +		__trace_add_event_call(call, mod,  				       &file_ops->id, &file_ops->enable,  				       &file_ops->filter, &file_ops->format); -		if (!ret) -			list_add(&call->list, &ftrace_events);  	}  } @@ -1319,25 +1354,14 @@ static __init int event_trace_init(void)  	trace_create_file("enable", 0644, d_events,  			  NULL, &ftrace_system_enable_fops); +	if (trace_define_common_fields()) +		pr_warning("tracing: Failed to allocate common fields"); +  	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { -		/* The linker may leave blanks */ -		if (!call->name) -			continue; -		if (call->class->raw_init) { -			ret = call->class->raw_init(call); -			if (ret < 0) { -				if (ret != -ENOSYS) -					pr_warning("Could not initialize trace " -					"point events/%s\n", call->name); -				continue; -			} -		} -		ret = event_create_dir(call, d_events, &ftrace_event_id_fops, +		__trace_add_event_call(call, NULL, &ftrace_event_id_fops,  				       &ftrace_enable_fops,  				       &ftrace_event_filter_fops,  				       &ftrace_event_format_fops); -		if (!ret) -			list_add(&call->list, &ftrace_events);  	}  	while (true) { @@ -1524,12 +1548,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)  	struct ftrace_entry *entry;  	unsigned long flags;  	long disabled; -	int resched;  	int cpu;  	int pc;  	pc = preempt_count(); -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	cpu = raw_smp_processor_id();  	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu)); @@ -1551,7 +1574,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)   out:  	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu)); -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  }  static struct ftrace_ops trace_ops __initdata  = diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 57bb1bb3299..36d40104b17 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,  }  static struct ftrace_event_field * -find_event_field(struct ftrace_event_call *call, char *name) +__find_event_field(struct list_head *head, char *name)  {  	struct ftrace_event_field *field; -	struct list_head *head; -	head = trace_get_fields(call);  	list_for_each_entry(field, head, link) {  		if (!strcmp(field->name, name))  			return field; @@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)  	return NULL;  } +static struct ftrace_event_field * +find_event_field(struct ftrace_event_call *call, char *name) +{ +	struct ftrace_event_field *field; +	struct list_head *head; + +	field = __find_event_field(&ftrace_common_fields, name); +	if (field) +		return field; + +	head = trace_get_fields(call); +	return __find_event_field(head, name); +} +  static void filter_free_pred(struct filter_pred *pred)  {  	if (!pred) @@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)  	int err;  	list_for_each_entry(call, &ftrace_events, list) { -		if (!call->class || !call->class->define_fields) -			continue; -  		if (strcmp(call->class->system, system->name) != 0)  			continue; @@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)  	struct ftrace_event_call *call;  	list_for_each_entry(call, &ftrace_events, list) { -		if (!call->class || !call->class->define_fields) -			continue; -  		if (strcmp(call->class->system, system->name) != 0)  			continue; @@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,  	list_for_each_entry(call, &ftrace_events, list) {  		struct event_filter *filter = call->filter; -		if (!call->class || !call->class->define_fields) -			continue; -  		if (strcmp(call->class->system, system->name) != 0)  			continue; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 8536e2a6596..4ba44deaac2 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\  #include "trace_entries.h" -static int ftrace_raw_init_event(struct ftrace_event_call *call) -{ -	INIT_LIST_HEAD(&call->class->fields); -	return 0; -} -  #undef __entry  #define __entry REC @@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)  struct ftrace_event_class event_class_ftrace_##call = {			\  	.system			= __stringify(TRACE_SYSTEM),		\  	.define_fields		= ftrace_define_fields_##call,		\ -	.raw_init		= ftrace_raw_init_event,		\ +	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\  };									\  									\  struct ftrace_event_call __used						\ diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b3f3776b0cd..16aee4d44e8 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)  	struct trace_array_cpu *data;  	unsigned long flags;  	long disabled; -	int cpu, resched; +	int cpu;  	int pc;  	if (unlikely(!ftrace_function_enabled))  		return;  	pc = preempt_count(); -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	local_save_flags(flags);  	cpu = raw_smp_processor_id();  	data = tr->data[cpu]; @@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)  		trace_function(tr, ip, parent_ip, flags, pc);  	atomic_dec(&data->disabled); -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  }  static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 79f4bac99a9..6bff2362578 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -641,7 +641,8 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)  	/* Print nsecs (we don't want to exceed 7 numbers) */  	if (len < 7) { -		snprintf(nsecs_str, 8 - len, "%03lu", nsecs_rem); +		snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", +			 nsecs_rem);  		ret = trace_seq_printf(s, ".%s", nsecs_str);  		if (!ret)  			return TRACE_TYPE_PARTIAL_LINE; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 6fd486e0cef..73a6b0601f2 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -649,6 +649,7 @@ static struct tracer irqsoff_tracer __read_mostly =  #endif  	.open           = irqsoff_trace_open,  	.close          = irqsoff_trace_close, +	.use_max_tr	= 1,  };  # define register_irqsoff(trace) register_tracer(&trace)  #else @@ -681,6 +682,7 @@ static struct tracer preemptoff_tracer __read_mostly =  #endif  	.open		= irqsoff_trace_open,  	.close		= irqsoff_trace_close, +	.use_max_tr	= 1,  };  # define register_preemptoff(trace) register_tracer(&trace)  #else @@ -715,6 +717,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =  #endif  	.open		= irqsoff_trace_open,  	.close		= irqsoff_trace_close, +	.use_max_tr	= 1,  };  # define register_preemptirqsoff(trace) register_tracer(&trace) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f52b5f50299..8b27c9849b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -30,6 +30,8 @@  #include <linux/ptrace.h>  #include <linux/perf_event.h>  #include <linux/stringify.h> +#include <linux/limits.h> +#include <linux/uaccess.h>  #include <asm/bitsperlong.h>  #include "trace.h" @@ -38,6 +40,7 @@  #define MAX_TRACE_ARGS 128  #define MAX_ARGSTR_LEN 63  #define MAX_EVENT_NAME_LEN 64 +#define MAX_STRING_SIZE PATH_MAX  #define KPROBE_EVENT_SYSTEM "kprobes"  /* Reserved field names */ @@ -58,14 +61,16 @@ const char *reserved_field_names[] = {  };  /* Printing function type */ -typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *); +typedef int (*print_type_func_t)(struct trace_seq *, const char *, void *, +				 void *);  #define PRINT_TYPE_FUNC_NAME(type)	print_type_##type  #define PRINT_TYPE_FMT_NAME(type)	print_type_format_##type  /* Printing  in basic type function template */  #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt, cast)			\  static __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s,	\ -						const char *name, void *data)\ +						const char *name,	\ +						void *data, void *ent)\  {									\  	return trace_seq_printf(s, " %s=" fmt, name, (cast)*(type *)data);\  }									\ @@ -80,6 +85,49 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d", int)  DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%ld", long)  DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%lld", long long) +/* data_rloc: data relative location, compatible with u32 */ +#define make_data_rloc(len, roffs)	\ +	(((u32)(len) << 16) | ((u32)(roffs) & 0xffff)) +#define get_rloc_len(dl)	((u32)(dl) >> 16) +#define get_rloc_offs(dl)	((u32)(dl) & 0xffff) + +static inline void *get_rloc_data(u32 *dl) +{ +	return (u8 *)dl + get_rloc_offs(*dl); +} + +/* For data_loc conversion */ +static inline void *get_loc_data(u32 *dl, void *ent) +{ +	return (u8 *)ent + get_rloc_offs(*dl); +} + +/* + * Convert data_rloc to data_loc: + *  data_rloc stores the offset from data_rloc itself, but data_loc + *  stores the offset from event entry. + */ +#define convert_rloc_to_loc(dl, offs)	((u32)(dl) + (offs)) + +/* For defining macros, define string/string_size types */ +typedef u32 string; +typedef u32 string_size; + +/* Print type function for string type */ +static __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, +						  const char *name, +						  void *data, void *ent) +{ +	int len = *(u32 *)data >> 16; + +	if (!len) +		return trace_seq_printf(s, " %s=(fault)", name); +	else +		return trace_seq_printf(s, " %s=\"%s\"", name, +					(const char *)get_loc_data(data, ent)); +} +static const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; +  /* Data fetch function type */  typedef	void (*fetch_func_t)(struct pt_regs *, void *, void *); @@ -94,32 +142,38 @@ static __kprobes void call_fetch(struct fetch_param *fprm,  	return fprm->fn(regs, fprm->data, dest);  } -#define FETCH_FUNC_NAME(kind, type)	fetch_##kind##_##type +#define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type  /*   * Define macro for basic types - we don't need to define s* types, because   * we have to care only about bitwidth at recording time.   */ -#define DEFINE_BASIC_FETCH_FUNCS(kind)  \ -DEFINE_FETCH_##kind(u8)			\ -DEFINE_FETCH_##kind(u16)		\ -DEFINE_FETCH_##kind(u32)		\ -DEFINE_FETCH_##kind(u64) +#define DEFINE_BASIC_FETCH_FUNCS(method) \ +DEFINE_FETCH_##method(u8)		\ +DEFINE_FETCH_##method(u16)		\ +DEFINE_FETCH_##method(u32)		\ +DEFINE_FETCH_##method(u64) -#define CHECK_BASIC_FETCH_FUNCS(kind, fn)	\ -	((FETCH_FUNC_NAME(kind, u8) == fn) ||	\ -	 (FETCH_FUNC_NAME(kind, u16) == fn) ||	\ -	 (FETCH_FUNC_NAME(kind, u32) == fn) ||	\ -	 (FETCH_FUNC_NAME(kind, u64) == fn)) +#define CHECK_FETCH_FUNCS(method, fn)			\ +	(((FETCH_FUNC_NAME(method, u8) == fn) ||	\ +	  (FETCH_FUNC_NAME(method, u16) == fn) ||	\ +	  (FETCH_FUNC_NAME(method, u32) == fn) ||	\ +	  (FETCH_FUNC_NAME(method, u64) == fn) ||	\ +	  (FETCH_FUNC_NAME(method, string) == fn) ||	\ +	  (FETCH_FUNC_NAME(method, string_size) == fn)) \ +	 && (fn != NULL))  /* Data fetch function templates */  #define DEFINE_FETCH_reg(type)						\  static __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs,	\ -					  void *offset, void *dest)	\ +					void *offset, void *dest)	\  {									\  	*(type *)dest = (type)regs_get_register(regs,			\  				(unsigned int)((unsigned long)offset));	\  }  DEFINE_BASIC_FETCH_FUNCS(reg) +/* No string on the register */ +#define fetch_reg_string NULL +#define fetch_reg_string_size NULL  #define DEFINE_FETCH_stack(type)					\  static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ @@ -129,6 +183,9 @@ static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\  				(unsigned int)((unsigned long)offset));	\  }  DEFINE_BASIC_FETCH_FUNCS(stack) +/* No string on the stack entry */ +#define fetch_stack_string NULL +#define fetch_stack_string_size NULL  #define DEFINE_FETCH_retval(type)					\  static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\ @@ -137,6 +194,9 @@ static __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs,\  	*(type *)dest = (type)regs_return_value(regs);			\  }  DEFINE_BASIC_FETCH_FUNCS(retval) +/* No string on the retval */ +#define fetch_retval_string NULL +#define fetch_retval_string_size NULL  #define DEFINE_FETCH_memory(type)					\  static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ @@ -149,6 +209,62 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\  		*(type *)dest = retval;					\  }  DEFINE_BASIC_FETCH_FUNCS(memory) +/* + * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max + * length and relative data location. + */ +static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, +						      void *addr, void *dest) +{ +	long ret; +	int maxlen = get_rloc_len(*(u32 *)dest); +	u8 *dst = get_rloc_data(dest); +	u8 *src = addr; +	mm_segment_t old_fs = get_fs(); +	if (!maxlen) +		return; +	/* +	 * Try to get string again, since the string can be changed while +	 * probing. +	 */ +	set_fs(KERNEL_DS); +	pagefault_disable(); +	do +		ret = __copy_from_user_inatomic(dst++, src++, 1); +	while (dst[-1] && ret == 0 && src - (u8 *)addr < maxlen); +	dst[-1] = '\0'; +	pagefault_enable(); +	set_fs(old_fs); + +	if (ret < 0) {	/* Failed to fetch string */ +		((u8 *)get_rloc_data(dest))[0] = '\0'; +		*(u32 *)dest = make_data_rloc(0, get_rloc_offs(*(u32 *)dest)); +	} else +		*(u32 *)dest = make_data_rloc(src - (u8 *)addr, +					      get_rloc_offs(*(u32 *)dest)); +} +/* Return the length of string -- including null terminal byte */ +static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, +							void *addr, void *dest) +{ +	int ret, len = 0; +	u8 c; +	mm_segment_t old_fs = get_fs(); + +	set_fs(KERNEL_DS); +	pagefault_disable(); +	do { +		ret = __copy_from_user_inatomic(&c, (u8 *)addr + len, 1); +		len++; +	} while (c && ret == 0 && len < MAX_STRING_SIZE); +	pagefault_enable(); +	set_fs(old_fs); + +	if (ret < 0)	/* Failed to check the length */ +		*(u32 *)dest = 0; +	else +		*(u32 *)dest = len; +}  /* Memory fetching by symbol */  struct symbol_cache { @@ -203,6 +319,8 @@ static __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs,\  		*(type *)dest = 0;					\  }  DEFINE_BASIC_FETCH_FUNCS(symbol) +DEFINE_FETCH_symbol(string) +DEFINE_FETCH_symbol(string_size)  /* Dereference memory access function */  struct deref_fetch_param { @@ -224,12 +342,14 @@ static __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs,\  		*(type *)dest = 0;					\  }  DEFINE_BASIC_FETCH_FUNCS(deref) +DEFINE_FETCH_deref(string) +DEFINE_FETCH_deref(string_size)  static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)  { -	if (CHECK_BASIC_FETCH_FUNCS(deref, data->orig.fn)) +	if (CHECK_FETCH_FUNCS(deref, data->orig.fn))  		free_deref_fetch_param(data->orig.data); -	else if (CHECK_BASIC_FETCH_FUNCS(symbol, data->orig.fn)) +	else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))  		free_symbol_cache(data->orig.data);  	kfree(data);  } @@ -240,23 +360,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)  #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)  #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE) -#define ASSIGN_FETCH_FUNC(kind, type)	\ -	.kind = FETCH_FUNC_NAME(kind, type) +/* Fetch types */ +enum { +	FETCH_MTD_reg = 0, +	FETCH_MTD_stack, +	FETCH_MTD_retval, +	FETCH_MTD_memory, +	FETCH_MTD_symbol, +	FETCH_MTD_deref, +	FETCH_MTD_END, +}; -#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)	\ -	{.name = #ptype,			\ -	 .size = sizeof(ftype),			\ -	 .is_signed = sign,			\ -	 .print = PRINT_TYPE_FUNC_NAME(ptype),	\ -	 .fmt = PRINT_TYPE_FMT_NAME(ptype),	\ -ASSIGN_FETCH_FUNC(reg, ftype),			\ -ASSIGN_FETCH_FUNC(stack, ftype),		\ -ASSIGN_FETCH_FUNC(retval, ftype),		\ -ASSIGN_FETCH_FUNC(memory, ftype),		\ -ASSIGN_FETCH_FUNC(symbol, ftype),		\ -ASSIGN_FETCH_FUNC(deref, ftype),		\ +#define ASSIGN_FETCH_FUNC(method, type)	\ +	[FETCH_MTD_##method] = FETCH_FUNC_NAME(method, type) + +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\ +	{.name = _name,				\ +	 .size = _size,					\ +	 .is_signed = sign,				\ +	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\ +	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\ +	 .fmttype = _fmttype,				\ +	 .fetch = {					\ +ASSIGN_FETCH_FUNC(reg, ftype),				\ +ASSIGN_FETCH_FUNC(stack, ftype),			\ +ASSIGN_FETCH_FUNC(retval, ftype),			\ +ASSIGN_FETCH_FUNC(memory, ftype),			\ +ASSIGN_FETCH_FUNC(symbol, ftype),			\ +ASSIGN_FETCH_FUNC(deref, ftype),			\ +	  }						\  	} +#define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\ +	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype) + +#define FETCH_TYPE_STRING 0 +#define FETCH_TYPE_STRSIZE 1 +  /* Fetch type information table */  static const struct fetch_type {  	const char	*name;		/* Name of type */ @@ -264,14 +404,16 @@ static const struct fetch_type {  	int		is_signed;	/* Signed flag */  	print_type_func_t	print;	/* Print functions */  	const char	*fmt;		/* Fromat string */ +	const char	*fmttype;	/* Name in format file */  	/* Fetch functions */ -	fetch_func_t	reg; -	fetch_func_t	stack; -	fetch_func_t	retval; -	fetch_func_t	memory; -	fetch_func_t	symbol; -	fetch_func_t	deref; +	fetch_func_t	fetch[FETCH_MTD_END];  } fetch_type_table[] = { +	/* Special types */ +	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string, +					sizeof(u32), 1, "__data_loc char[]"), +	[FETCH_TYPE_STRSIZE] = __ASSIGN_FETCH_TYPE("string_size", u32, +					string_size, sizeof(u32), 0, "u32"), +	/* Basic types */  	ASSIGN_FETCH_TYPE(u8,  u8,  0),  	ASSIGN_FETCH_TYPE(u16, u16, 0),  	ASSIGN_FETCH_TYPE(u32, u32, 0), @@ -302,12 +444,28 @@ static __kprobes void fetch_stack_address(struct pt_regs *regs,  	*(unsigned long *)dest = kernel_stack_pointer(regs);  } +static fetch_func_t get_fetch_size_function(const struct fetch_type *type, +					    fetch_func_t orig_fn) +{ +	int i; + +	if (type != &fetch_type_table[FETCH_TYPE_STRING]) +		return NULL;	/* Only string type needs size function */ +	for (i = 0; i < FETCH_MTD_END; i++) +		if (type->fetch[i] == orig_fn) +			return fetch_type_table[FETCH_TYPE_STRSIZE].fetch[i]; + +	WARN_ON(1);	/* This should not happen */ +	return NULL; +} +  /**   * Kprobe event core functions   */  struct probe_arg {  	struct fetch_param	fetch; +	struct fetch_param	fetch_size;  	unsigned int		offset;	/* Offset from argument entry */  	const char		*name;	/* Name of this argument */  	const char		*comm;	/* Command of this argument */ @@ -429,9 +587,9 @@ error:  static void free_probe_arg(struct probe_arg *arg)  { -	if (CHECK_BASIC_FETCH_FUNCS(deref, arg->fetch.fn)) +	if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))  		free_deref_fetch_param(arg->fetch.data); -	else if (CHECK_BASIC_FETCH_FUNCS(symbol, arg->fetch.fn)) +	else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))  		free_symbol_cache(arg->fetch.data);  	kfree(arg->name);  	kfree(arg->comm); @@ -548,7 +706,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,  	if (strcmp(arg, "retval") == 0) {  		if (is_return) -			f->fn = t->retval; +			f->fn = t->fetch[FETCH_MTD_retval];  		else  			ret = -EINVAL;  	} else if (strncmp(arg, "stack", 5) == 0) { @@ -562,7 +720,7 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,  			if (ret || param > PARAM_MAX_STACK)  				ret = -EINVAL;  			else { -				f->fn = t->stack; +				f->fn = t->fetch[FETCH_MTD_stack];  				f->data = (void *)param;  			}  		} else @@ -588,7 +746,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,  	case '%':	/* named register */  		ret = regs_query_register_offset(arg + 1);  		if (ret >= 0) { -			f->fn = t->reg; +			f->fn = t->fetch[FETCH_MTD_reg];  			f->data = (void *)(unsigned long)ret;  			ret = 0;  		} @@ -598,7 +756,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,  			ret = strict_strtoul(arg + 1, 0, ¶m);  			if (ret)  				break; -			f->fn = t->memory; +			f->fn = t->fetch[FETCH_MTD_memory];  			f->data = (void *)param;  		} else {  			ret = split_symbol_offset(arg + 1, &offset); @@ -606,7 +764,7 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,  				break;  			f->data = alloc_symbol_cache(arg + 1, offset);  			if (f->data) -				f->fn = t->symbol; +				f->fn = t->fetch[FETCH_MTD_symbol];  		}  		break;  	case '+':	/* deref memory */ @@ -636,14 +794,17 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,  			if (ret)  				kfree(dprm);  			else { -				f->fn = t->deref; +				f->fn = t->fetch[FETCH_MTD_deref];  				f->data = (void *)dprm;  			}  		}  		break;  	} -	if (!ret && !f->fn) +	if (!ret && !f->fn) {	/* Parsed, but do not find fetch method */ +		pr_info("%s type has no corresponding fetch method.\n", +			t->name);  		ret = -EINVAL; +	}  	return ret;  } @@ -652,6 +813,7 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,  			   struct probe_arg *parg, int is_return)  {  	const char *t; +	int ret;  	if (strlen(arg) > MAX_ARGSTR_LEN) {  		pr_info("Argument is too long.: %s\n",  arg); @@ -674,7 +836,13 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,  	}  	parg->offset = tp->size;  	tp->size += parg->type->size; -	return __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); +	ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return); +	if (ret >= 0) { +		parg->fetch_size.fn = get_fetch_size_function(parg->type, +							      parg->fetch.fn); +		parg->fetch_size.data = parg->fetch.data; +	} +	return ret;  }  /* Return 1 if name is reserved or already used by another argument */ @@ -757,14 +925,17 @@ static int create_trace_probe(int argc, char **argv)  			pr_info("Delete command needs an event name.\n");  			return -EINVAL;  		} +		mutex_lock(&probe_lock);  		tp = find_probe_event(event, group);  		if (!tp) { +			mutex_unlock(&probe_lock);  			pr_info("Event %s/%s doesn't exist.\n", group, event);  			return -ENOENT;  		}  		/* delete an event */  		unregister_trace_probe(tp);  		free_trace_probe(tp); +		mutex_unlock(&probe_lock);  		return 0;  	} @@ -1043,6 +1214,54 @@ static const struct file_operations kprobe_profile_ops = {  	.release        = seq_release,  }; +/* Sum up total data length for dynamic arraies (strings) */ +static __kprobes int __get_data_size(struct trace_probe *tp, +				     struct pt_regs *regs) +{ +	int i, ret = 0; +	u32 len; + +	for (i = 0; i < tp->nr_args; i++) +		if (unlikely(tp->args[i].fetch_size.fn)) { +			call_fetch(&tp->args[i].fetch_size, regs, &len); +			ret += len; +		} + +	return ret; +} + +/* Store the value of each argument */ +static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, +				       struct pt_regs *regs, +				       u8 *data, int maxlen) +{ +	int i; +	u32 end = tp->size; +	u32 *dl;	/* Data (relative) location */ + +	for (i = 0; i < tp->nr_args; i++) { +		if (unlikely(tp->args[i].fetch_size.fn)) { +			/* +			 * First, we set the relative location and +			 * maximum data length to *dl +			 */ +			dl = (u32 *)(data + tp->args[i].offset); +			*dl = make_data_rloc(maxlen, end - tp->args[i].offset); +			/* Then try to fetch string or dynamic array data */ +			call_fetch(&tp->args[i].fetch, regs, dl); +			/* Reduce maximum length */ +			end += get_rloc_len(*dl); +			maxlen -= get_rloc_len(*dl); +			/* Trick here, convert data_rloc to data_loc */ +			*dl = convert_rloc_to_loc(*dl, +				 ent_size + tp->args[i].offset); +		} else +			/* Just fetching data normally */ +			call_fetch(&tp->args[i].fetch, regs, +				   data + tp->args[i].offset); +	} +} +  /* Kprobe handler */  static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  { @@ -1050,8 +1269,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  	struct kprobe_trace_entry_head *entry;  	struct ring_buffer_event *event;  	struct ring_buffer *buffer; -	u8 *data; -	int size, i, pc; +	int size, dsize, pc;  	unsigned long irq_flags;  	struct ftrace_event_call *call = &tp->call; @@ -1060,7 +1278,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  	local_save_flags(irq_flags);  	pc = preempt_count(); -	size = sizeof(*entry) + tp->size; +	dsize = __get_data_size(tp, regs); +	size = sizeof(*entry) + tp->size + dsize;  	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,  						  size, irq_flags, pc); @@ -1069,9 +1288,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)  	entry = ring_buffer_event_data(event);  	entry->ip = (unsigned long)kp->addr; -	data = (u8 *)&entry[1]; -	for (i = 0; i < tp->nr_args; i++) -		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); +	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	if (!filter_current_check_discard(buffer, call, entry, event))  		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -1085,15 +1302,15 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  	struct kretprobe_trace_entry_head *entry;  	struct ring_buffer_event *event;  	struct ring_buffer *buffer; -	u8 *data; -	int size, i, pc; +	int size, pc, dsize;  	unsigned long irq_flags;  	struct ftrace_event_call *call = &tp->call;  	local_save_flags(irq_flags);  	pc = preempt_count(); -	size = sizeof(*entry) + tp->size; +	dsize = __get_data_size(tp, regs); +	size = sizeof(*entry) + tp->size + dsize;  	event = trace_current_buffer_lock_reserve(&buffer, call->event.type,  						  size, irq_flags, pc); @@ -1103,9 +1320,7 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,  	entry = ring_buffer_event_data(event);  	entry->func = (unsigned long)tp->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; -	data = (u8 *)&entry[1]; -	for (i = 0; i < tp->nr_args; i++) -		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); +	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	if (!filter_current_check_discard(buffer, call, entry, event))  		trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -1137,7 +1352,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,  	data = (u8 *)&field[1];  	for (i = 0; i < tp->nr_args; i++)  		if (!tp->args[i].type->print(s, tp->args[i].name, -					     data + tp->args[i].offset)) +					     data + tp->args[i].offset, field))  			goto partial;  	if (!trace_seq_puts(s, "\n")) @@ -1179,7 +1394,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,  	data = (u8 *)&field[1];  	for (i = 0; i < tp->nr_args; i++)  		if (!tp->args[i].type->print(s, tp->args[i].name, -					     data + tp->args[i].offset)) +					     data + tp->args[i].offset, field))  			goto partial;  	if (!trace_seq_puts(s, "\n")) @@ -1214,11 +1429,6 @@ static void probe_event_disable(struct ftrace_event_call *call)  	}  } -static int probe_event_raw_init(struct ftrace_event_call *event_call) -{ -	return 0; -} -  #undef DEFINE_FIELD  #define DEFINE_FIELD(type, item, name, is_signed)			\  	do {								\ @@ -1239,7 +1449,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)  	DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);  	/* Set argument names as fields */  	for (i = 0; i < tp->nr_args; i++) { -		ret = trace_define_field(event_call, tp->args[i].type->name, +		ret = trace_define_field(event_call, tp->args[i].type->fmttype,  					 tp->args[i].name,  					 sizeof(field) + tp->args[i].offset,  					 tp->args[i].type->size, @@ -1261,7 +1471,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)  	DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);  	/* Set argument names as fields */  	for (i = 0; i < tp->nr_args; i++) { -		ret = trace_define_field(event_call, tp->args[i].type->name, +		ret = trace_define_field(event_call, tp->args[i].type->fmttype,  					 tp->args[i].name,  					 sizeof(field) + tp->args[i].offset,  					 tp->args[i].type->size, @@ -1301,8 +1511,13 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)  	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);  	for (i = 0; i < tp->nr_args; i++) { -		pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", -				tp->args[i].name); +		if (strcmp(tp->args[i].type->name, "string") == 0) +			pos += snprintf(buf + pos, LEN_OR_ZERO, +					", __get_str(%s)", +					tp->args[i].name); +		else +			pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", +					tp->args[i].name);  	}  #undef LEN_OR_ZERO @@ -1339,11 +1554,11 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,  	struct ftrace_event_call *call = &tp->call;  	struct kprobe_trace_entry_head *entry;  	struct hlist_head *head; -	u8 *data; -	int size, __size, i; +	int size, __size, dsize;  	int rctx; -	__size = sizeof(*entry) + tp->size; +	dsize = __get_data_size(tp, regs); +	__size = sizeof(*entry) + tp->size + dsize;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32);  	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, @@ -1355,9 +1570,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,  		return;  	entry->ip = (unsigned long)kp->addr; -	data = (u8 *)&entry[1]; -	for (i = 0; i < tp->nr_args; i++) -		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); +	memset(&entry[1], 0, dsize); +	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	head = this_cpu_ptr(call->perf_events);  	perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); @@ -1371,11 +1585,11 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  	struct ftrace_event_call *call = &tp->call;  	struct kretprobe_trace_entry_head *entry;  	struct hlist_head *head; -	u8 *data; -	int size, __size, i; +	int size, __size, dsize;  	int rctx; -	__size = sizeof(*entry) + tp->size; +	dsize = __get_data_size(tp, regs); +	__size = sizeof(*entry) + tp->size + dsize;  	size = ALIGN(__size + sizeof(u32), sizeof(u64));  	size -= sizeof(u32);  	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, @@ -1388,9 +1602,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,  	entry->func = (unsigned long)tp->rp.kp.addr;  	entry->ret_ip = (unsigned long)ri->ret_addr; -	data = (u8 *)&entry[1]; -	for (i = 0; i < tp->nr_args; i++) -		call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); +	store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);  	head = this_cpu_ptr(call->perf_events);  	perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); @@ -1486,15 +1698,12 @@ static int register_probe_event(struct trace_probe *tp)  	int ret;  	/* Initialize ftrace_event_call */ +	INIT_LIST_HEAD(&call->class->fields);  	if (probe_is_return(tp)) { -		INIT_LIST_HEAD(&call->class->fields);  		call->event.funcs = &kretprobe_funcs; -		call->class->raw_init = probe_event_raw_init;  		call->class->define_fields = kretprobe_event_define_fields;  	} else { -		INIT_LIST_HEAD(&call->class->fields);  		call->event.funcs = &kprobe_funcs; -		call->class->raw_init = probe_event_raw_init;  		call->class->define_fields = kprobe_event_define_fields;  	}  	if (set_print_fmt(tp) < 0) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c deleted file mode 100644 index 8eaf00749b6..00000000000 --- a/kernel/trace/trace_ksym.c +++ /dev/null @@ -1,508 +0,0 @@ -/* - * trace_ksym.c - Kernel Symbol Tracer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2009 - */ - -#include <linux/kallsyms.h> -#include <linux/uaccess.h> -#include <linux/debugfs.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/fs.h> - -#include "trace_output.h" -#include "trace.h" - -#include <linux/hw_breakpoint.h> -#include <asm/hw_breakpoint.h> - -#include <asm/atomic.h> - -#define KSYM_TRACER_OP_LEN 3 /* rw- */ - -struct trace_ksym { -	struct perf_event	**ksym_hbp; -	struct perf_event_attr	attr; -#ifdef CONFIG_PROFILE_KSYM_TRACER -	atomic64_t		counter; -#endif -	struct hlist_node	ksym_hlist; -}; - -static struct trace_array *ksym_trace_array; - -static unsigned int ksym_tracing_enabled; - -static HLIST_HEAD(ksym_filter_head); - -static DEFINE_MUTEX(ksym_tracer_mutex); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - -#define MAX_UL_INT 0xffffffff - -void ksym_collect_stats(unsigned long hbp_hit_addr) -{ -	struct hlist_node *node; -	struct trace_ksym *entry; - -	rcu_read_lock(); -	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { -		if (entry->attr.bp_addr == hbp_hit_addr) { -			atomic64_inc(&entry->counter); -			break; -		} -	} -	rcu_read_unlock(); -} -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -void ksym_hbp_handler(struct perf_event *hbp, int nmi, -		      struct perf_sample_data *data, -		      struct pt_regs *regs) -{ -	struct ring_buffer_event *event; -	struct ksym_trace_entry *entry; -	struct ring_buffer *buffer; -	int pc; - -	if (!ksym_tracing_enabled) -		return; - -	buffer = ksym_trace_array->buffer; - -	pc = preempt_count(); - -	event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, -							sizeof(*entry), 0, pc); -	if (!event) -		return; - -	entry		= ring_buffer_event_data(event); -	entry->ip	= instruction_pointer(regs); -	entry->type	= hw_breakpoint_type(hbp); -	entry->addr	= hw_breakpoint_addr(hbp); -	strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); - -#ifdef CONFIG_PROFILE_KSYM_TRACER -	ksym_collect_stats(hw_breakpoint_addr(hbp)); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -	trace_buffer_unlock_commit(buffer, event, 0, pc); -} - -/* Valid access types are represented as - * - * rw- : Set Read/Write Access Breakpoint - * -w- : Set Write Access Breakpoint - * --- : Clear Breakpoints - * --x : Set Execution Break points (Not available yet) - * - */ -static int ksym_trace_get_access_type(char *str) -{ -	int access = 0; - -	if (str[0] == 'r') -		access |= HW_BREAKPOINT_R; - -	if (str[1] == 'w') -		access |= HW_BREAKPOINT_W; - -	if (str[2] == 'x') -		access |= HW_BREAKPOINT_X; - -	switch (access) { -	case HW_BREAKPOINT_R: -	case HW_BREAKPOINT_W: -	case HW_BREAKPOINT_W | HW_BREAKPOINT_R: -		return access; -	default: -		return -EINVAL; -	} -} - -/* - * There can be several possible malformed requests and we attempt to capture - * all of them. We enumerate some of the rules - * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. - *    i.e. multiple ':' symbols disallowed. Possible uses are of the form - *    <module>:<ksym_name>:<op>. - * 2. No delimiter symbol ':' in the input string - * 3. Spurious operator symbols or symbols not in their respective positions - * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file - * 5. Kernel symbol not a part of /proc/kallsyms - * 6. Duplicate requests - */ -static int parse_ksym_trace_str(char *input_string, char **ksymname, -							unsigned long *addr) -{ -	int ret; - -	*ksymname = strsep(&input_string, ":"); -	*addr = kallsyms_lookup_name(*ksymname); - -	/* Check for malformed request: (2), (1) and (5) */ -	if ((!input_string) || -	    (strlen(input_string) != KSYM_TRACER_OP_LEN) || -	    (*addr == 0)) -		return -EINVAL;; - -	ret = ksym_trace_get_access_type(input_string); - -	return ret; -} - -int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) -{ -	struct trace_ksym *entry; -	int ret = -ENOMEM; - -	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); -	if (!entry) -		return -ENOMEM; - -	hw_breakpoint_init(&entry->attr); - -	entry->attr.bp_type = op; -	entry->attr.bp_addr = addr; -	entry->attr.bp_len = HW_BREAKPOINT_LEN_4; - -	entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, -					ksym_hbp_handler); - -	if (IS_ERR(entry->ksym_hbp)) { -		ret = PTR_ERR(entry->ksym_hbp); -		if (ret == -ENOSPC) { -			printk(KERN_ERR "ksym_tracer: Maximum limit reached." -			" No new requests for tracing can be accepted now.\n"); -		} else { -			printk(KERN_INFO "ksym_tracer request failed. Try again" -					 " later!!\n"); -		} -		goto err; -	} - -	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); - -	return 0; - -err: -	kfree(entry); - -	return ret; -} - -static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, -						size_t count, loff_t *ppos) -{ -	struct trace_ksym *entry; -	struct hlist_node *node; -	struct trace_seq *s; -	ssize_t cnt = 0; -	int ret; - -	s = kmalloc(sizeof(*s), GFP_KERNEL); -	if (!s) -		return -ENOMEM; -	trace_seq_init(s); - -	mutex_lock(&ksym_tracer_mutex); - -	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { -		ret = trace_seq_printf(s, "%pS:", -				(void *)(unsigned long)entry->attr.bp_addr); -		if (entry->attr.bp_type == HW_BREAKPOINT_R) -			ret = trace_seq_puts(s, "r--\n"); -		else if (entry->attr.bp_type == HW_BREAKPOINT_W) -			ret = trace_seq_puts(s, "-w-\n"); -		else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) -			ret = trace_seq_puts(s, "rw-\n"); -		WARN_ON_ONCE(!ret); -	} - -	cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); - -	mutex_unlock(&ksym_tracer_mutex); - -	kfree(s); - -	return cnt; -} - -static void __ksym_trace_reset(void) -{ -	struct trace_ksym *entry; -	struct hlist_node *node, *node1; - -	mutex_lock(&ksym_tracer_mutex); -	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, -								ksym_hlist) { -		unregister_wide_hw_breakpoint(entry->ksym_hbp); -		hlist_del_rcu(&(entry->ksym_hlist)); -		synchronize_rcu(); -		kfree(entry); -	} -	mutex_unlock(&ksym_tracer_mutex); -} - -static ssize_t ksym_trace_filter_write(struct file *file, -					const char __user *buffer, -						size_t count, loff_t *ppos) -{ -	struct trace_ksym *entry; -	struct hlist_node *node; -	char *buf, *input_string, *ksymname = NULL; -	unsigned long ksym_addr = 0; -	int ret, op, changed = 0; - -	buf = kzalloc(count + 1, GFP_KERNEL); -	if (!buf) -		return -ENOMEM; - -	ret = -EFAULT; -	if (copy_from_user(buf, buffer, count)) -		goto out; - -	buf[count] = '\0'; -	input_string = strstrip(buf); - -	/* -	 * Clear all breakpoints if: -	 * 1: echo > ksym_trace_filter -	 * 2: echo 0 > ksym_trace_filter -	 * 3: echo "*:---" > ksym_trace_filter -	 */ -	if (!input_string[0] || !strcmp(input_string, "0") || -	    !strcmp(input_string, "*:---")) { -		__ksym_trace_reset(); -		ret = 0; -		goto out; -	} - -	ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); -	if (ret < 0) -		goto out; - -	mutex_lock(&ksym_tracer_mutex); - -	ret = -EINVAL; -	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { -		if (entry->attr.bp_addr == ksym_addr) { -			/* Check for malformed request: (6) */ -			if (entry->attr.bp_type != op) -				changed = 1; -			else -				goto out_unlock; -			break; -		} -	} -	if (changed) { -		unregister_wide_hw_breakpoint(entry->ksym_hbp); -		entry->attr.bp_type = op; -		ret = 0; -		if (op > 0) { -			entry->ksym_hbp = -				register_wide_hw_breakpoint(&entry->attr, -					ksym_hbp_handler); -			if (IS_ERR(entry->ksym_hbp)) -				ret = PTR_ERR(entry->ksym_hbp); -			else -				goto out_unlock; -		} -		/* Error or "symbol:---" case: drop it */ -		hlist_del_rcu(&(entry->ksym_hlist)); -		synchronize_rcu(); -		kfree(entry); -		goto out_unlock; -	} else { -		/* Check for malformed request: (4) */ -		if (op) -			ret = process_new_ksym_entry(ksymname, op, ksym_addr); -	} -out_unlock: -	mutex_unlock(&ksym_tracer_mutex); -out: -	kfree(buf); -	return !ret ? count : ret; -} - -static const struct file_operations ksym_tracing_fops = { -	.open		= tracing_open_generic, -	.read		= ksym_trace_filter_read, -	.write		= ksym_trace_filter_write, -}; - -static void ksym_trace_reset(struct trace_array *tr) -{ -	ksym_tracing_enabled = 0; -	__ksym_trace_reset(); -} - -static int ksym_trace_init(struct trace_array *tr) -{ -	int cpu, ret = 0; - -	for_each_online_cpu(cpu) -		tracing_reset(tr, cpu); -	ksym_tracing_enabled = 1; -	ksym_trace_array = tr; - -	return ret; -} - -static void ksym_trace_print_header(struct seq_file *m) -{ -	seq_puts(m, -		 "#       TASK-PID   CPU#      Symbol                    " -		 "Type    Function\n"); -	seq_puts(m, -		 "#          |        |          |                       " -		 " |         |\n"); -} - -static enum print_line_t ksym_trace_output(struct trace_iterator *iter) -{ -	struct trace_entry *entry = iter->ent; -	struct trace_seq *s = &iter->seq; -	struct ksym_trace_entry *field; -	char str[KSYM_SYMBOL_LEN]; -	int ret; - -	if (entry->type != TRACE_KSYM) -		return TRACE_TYPE_UNHANDLED; - -	trace_assign_type(field, entry); - -	ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, -				entry->pid, iter->cpu, (char *)field->addr); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	switch (field->type) { -	case HW_BREAKPOINT_R: -		ret = trace_seq_printf(s, " R  "); -		break; -	case HW_BREAKPOINT_W: -		ret = trace_seq_printf(s, " W  "); -		break; -	case HW_BREAKPOINT_R | HW_BREAKPOINT_W: -		ret = trace_seq_printf(s, " RW "); -		break; -	default: -		return TRACE_TYPE_PARTIAL_LINE; -	} - -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	sprint_symbol(str, field->ip); -	ret = trace_seq_printf(s, "%s\n", str); -	if (!ret) -		return TRACE_TYPE_PARTIAL_LINE; - -	return TRACE_TYPE_HANDLED; -} - -struct tracer ksym_tracer __read_mostly = -{ -	.name		= "ksym_tracer", -	.init		= ksym_trace_init, -	.reset		= ksym_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST -	.selftest	= trace_selftest_startup_ksym, -#endif -	.print_header   = ksym_trace_print_header, -	.print_line	= ksym_trace_output -}; - -#ifdef CONFIG_PROFILE_KSYM_TRACER -static int ksym_profile_show(struct seq_file *m, void *v) -{ -	struct hlist_node *node; -	struct trace_ksym *entry; -	int access_type = 0; -	char fn_name[KSYM_NAME_LEN]; - -	seq_puts(m, "  Access Type "); -	seq_puts(m, "  Symbol                                       Counter\n"); -	seq_puts(m, "  ----------- "); -	seq_puts(m, "  ------                                       -------\n"); - -	rcu_read_lock(); -	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - -		access_type = entry->attr.bp_type; - -		switch (access_type) { -		case HW_BREAKPOINT_R: -			seq_puts(m, "  R           "); -			break; -		case HW_BREAKPOINT_W: -			seq_puts(m, "  W           "); -			break; -		case HW_BREAKPOINT_R | HW_BREAKPOINT_W: -			seq_puts(m, "  RW          "); -			break; -		default: -			seq_puts(m, "  NA          "); -		} - -		if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) -			seq_printf(m, "  %-36s", fn_name); -		else -			seq_printf(m, "  %-36s", "<NA>"); -		seq_printf(m, " %15llu\n", -			   (unsigned long long)atomic64_read(&entry->counter)); -	} -	rcu_read_unlock(); - -	return 0; -} - -static int ksym_profile_open(struct inode *node, struct file *file) -{ -	return single_open(file, ksym_profile_show, NULL); -} - -static const struct file_operations ksym_profile_fops = { -	.open		= ksym_profile_open, -	.read		= seq_read, -	.llseek		= seq_lseek, -	.release	= single_release, -}; -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -__init static int init_ksym_trace(void) -{ -	struct dentry *d_tracer; - -	d_tracer = tracing_init_dentry(); - -	trace_create_file("ksym_trace_filter", 0644, d_tracer, -			  NULL, &ksym_tracing_fops); - -#ifdef CONFIG_PROFILE_KSYM_TRACER -	trace_create_file("ksym_profile", 0444, d_tracer, -			  NULL, &ksym_profile_fops); -#endif - -	return register_tracer(&ksym_tracer); -} -device_initcall(init_ksym_trace); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 57c1b459647..02272baa220 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -16,9 +16,6 @@  DECLARE_RWSEM(trace_event_mutex); -DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq); -EXPORT_PER_CPU_SYMBOL(ftrace_event_seq); -  static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;  static int next_event_type = __TRACE_LAST_TYPE + 1; @@ -1069,65 +1066,6 @@ static struct trace_event trace_wake_event = {  	.funcs		= &trace_wake_funcs,  }; -/* TRACE_SPECIAL */ -static enum print_line_t trace_special_print(struct trace_iterator *iter, -					     int flags, struct trace_event *event) -{ -	struct special_entry *field; - -	trace_assign_type(field, iter->ent); - -	if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n", -			      field->arg1, -			      field->arg2, -			      field->arg3)) -		return TRACE_TYPE_PARTIAL_LINE; - -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t trace_special_hex(struct trace_iterator *iter, -					   int flags, struct trace_event *event) -{ -	struct special_entry *field; -	struct trace_seq *s = &iter->seq; - -	trace_assign_type(field, iter->ent); - -	SEQ_PUT_HEX_FIELD_RET(s, field->arg1); -	SEQ_PUT_HEX_FIELD_RET(s, field->arg2); -	SEQ_PUT_HEX_FIELD_RET(s, field->arg3); - -	return TRACE_TYPE_HANDLED; -} - -static enum print_line_t trace_special_bin(struct trace_iterator *iter, -					   int flags, struct trace_event *event) -{ -	struct special_entry *field; -	struct trace_seq *s = &iter->seq; - -	trace_assign_type(field, iter->ent); - -	SEQ_PUT_FIELD_RET(s, field->arg1); -	SEQ_PUT_FIELD_RET(s, field->arg2); -	SEQ_PUT_FIELD_RET(s, field->arg3); - -	return TRACE_TYPE_HANDLED; -} - -static struct trace_event_functions trace_special_funcs = { -	.trace		= trace_special_print, -	.raw		= trace_special_print, -	.hex		= trace_special_hex, -	.binary		= trace_special_bin, -}; - -static struct trace_event trace_special_event = { -	.type		= TRACE_SPECIAL, -	.funcs		= &trace_special_funcs, -}; -  /* TRACE_STACK */  static enum print_line_t trace_stack_print(struct trace_iterator *iter, @@ -1161,9 +1099,6 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,  static struct trace_event_functions trace_stack_funcs = {  	.trace		= trace_stack_print, -	.raw		= trace_special_print, -	.hex		= trace_special_hex, -	.binary		= trace_special_bin,  };  static struct trace_event trace_stack_event = { @@ -1194,9 +1129,6 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,  static struct trace_event_functions trace_user_stack_funcs = {  	.trace		= trace_user_stack_print, -	.raw		= trace_special_print, -	.hex		= trace_special_hex, -	.binary		= trace_special_bin,  };  static struct trace_event trace_user_stack_event = { @@ -1314,7 +1246,6 @@ static struct trace_event *events[] __initdata = {  	&trace_fn_event,  	&trace_ctx_event,  	&trace_wake_event, -	&trace_special_event,  	&trace_stack_event,  	&trace_user_stack_event,  	&trace_bprint_event, diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0e73bc2ef8c..4086eae6e81 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -46,7 +46,6 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)  	struct trace_array_cpu *data;  	unsigned long flags;  	long disabled; -	int resched;  	int cpu;  	int pc; @@ -54,7 +53,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)  		return;  	pc = preempt_count(); -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	cpu = raw_smp_processor_id();  	if (cpu != wakeup_current_cpu) @@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)   out:  	atomic_dec(&data->disabled);   out_enable: -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  }  static struct ftrace_ops trace_ops __read_mostly = @@ -383,6 +382,7 @@ static struct tracer wakeup_tracer __read_mostly =  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_wakeup,  #endif +	.use_max_tr	= 1,  };  static struct tracer wakeup_rt_tracer __read_mostly = @@ -397,6 +397,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_wakeup,  #endif +	.use_max_tr	= 1,  };  __init static int init_wakeup_tracer(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 250e7f9bd2f..155a415b320 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -13,11 +13,9 @@ static inline int trace_valid_entry(struct trace_entry *entry)  	case TRACE_WAKE:  	case TRACE_STACK:  	case TRACE_PRINT: -	case TRACE_SPECIAL:  	case TRACE_BRANCH:  	case TRACE_GRAPH_ENT:  	case TRACE_GRAPH_RET: -	case TRACE_KSYM:  		return 1;  	}  	return 0; @@ -691,38 +689,6 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr  }  #endif /* CONFIG_CONTEXT_SWITCH_TRACER */ -#ifdef CONFIG_SYSPROF_TRACER -int -trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) -{ -	unsigned long count; -	int ret; - -	/* start the tracing */ -	ret = tracer_init(trace, tr); -	if (ret) { -		warn_failed_init_tracer(trace, ret); -		return ret; -	} - -	/* Sleep for a 1/10 of a second */ -	msleep(100); -	/* stop the tracing. */ -	tracing_stop(); -	/* check the trace buffer */ -	ret = trace_test_buffer(tr, &count); -	trace->reset(tr); -	tracing_start(); - -	if (!ret && !count) { -		printk(KERN_CONT ".. no entries found .."); -		ret = -1; -	} - -	return ret; -} -#endif /* CONFIG_SYSPROF_TRACER */ -  #ifdef CONFIG_BRANCH_TRACER  int  trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) @@ -755,56 +721,3 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)  }  #endif /* CONFIG_BRANCH_TRACER */ -#ifdef CONFIG_KSYM_TRACER -static int ksym_selftest_dummy; - -int -trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) -{ -	unsigned long count; -	int ret; - -	/* start the tracing */ -	ret = tracer_init(trace, tr); -	if (ret) { -		warn_failed_init_tracer(trace, ret); -		return ret; -	} - -	ksym_selftest_dummy = 0; -	/* Register the read-write tracing request */ - -	ret = process_new_ksym_entry("ksym_selftest_dummy", -				     HW_BREAKPOINT_R | HW_BREAKPOINT_W, -					(unsigned long)(&ksym_selftest_dummy)); - -	if (ret < 0) { -		printk(KERN_CONT "ksym_trace read-write startup test failed\n"); -		goto ret_path; -	} -	/* Perform a read and a write operation over the dummy variable to -	 * trigger the tracer -	 */ -	if (ksym_selftest_dummy == 0) -		ksym_selftest_dummy++; - -	/* stop the tracing. */ -	tracing_stop(); -	/* check the trace buffer */ -	ret = trace_test_buffer(tr, &count); -	trace->reset(tr); -	tracing_start(); - -	/* read & write operations - one each is performed on the dummy variable -	 * triggering two entries in the trace buffer -	 */ -	if (!ret && count != 2) { -		printk(KERN_CONT "Ksym tracer startup test failed"); -		ret = -1; -	} - -ret_path: -	return ret; -} -#endif /* CONFIG_KSYM_TRACER */ - diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index f4bc9b27de5..056468eae7c 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -110,12 +110,12 @@ static inline void check_stack(void)  static void  stack_trace_call(unsigned long ip, unsigned long parent_ip)  { -	int cpu, resched; +	int cpu;  	if (unlikely(!ftrace_enabled || stack_trace_disabled))  		return; -	resched = ftrace_preempt_disable(); +	preempt_disable_notrace();  	cpu = raw_smp_processor_id();  	/* no atomic needed, we only modify this variable by this cpu */ @@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)   out:  	per_cpu(trace_active, cpu)--;  	/* prevent recursion in schedule */ -	ftrace_preempt_enable(resched); +	preempt_enable_notrace();  }  static struct ftrace_ops trace_ops __read_mostly = diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 34e35804304..bac752f0cfb 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -23,6 +23,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,  static int syscall_enter_define_fields(struct ftrace_event_call *call);  static int syscall_exit_define_fields(struct ftrace_event_call *call); +/* All syscall exit events have the same fields */ +static LIST_HEAD(syscall_exit_fields); +  static struct list_head *  syscall_get_enter_fields(struct ftrace_event_call *call)  { @@ -34,9 +37,7 @@ syscall_get_enter_fields(struct ftrace_event_call *call)  static struct list_head *  syscall_get_exit_fields(struct ftrace_event_call *call)  { -	struct syscall_metadata *entry = call->data; - -	return &entry->exit_fields; +	return &syscall_exit_fields;  }  struct trace_event_functions enter_syscall_print_funcs = { diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c deleted file mode 100644 index a7974a552ca..00000000000 --- a/kernel/trace/trace_sysprof.c +++ /dev/null @@ -1,329 +0,0 @@ -/* - * trace stack traces - * - * Copyright (C) 2004-2008, Soeren Sandmann - * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com> - * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> - */ -#include <linux/kallsyms.h> -#include <linux/debugfs.h> -#include <linux/hrtimer.h> -#include <linux/uaccess.h> -#include <linux/ftrace.h> -#include <linux/module.h> -#include <linux/irq.h> -#include <linux/fs.h> - -#include <asm/stacktrace.h> - -#include "trace.h" - -static struct trace_array	*sysprof_trace; -static int __read_mostly	tracer_enabled; - -/* - * 1 msec sample interval by default: - */ -static unsigned long sample_period = 1000000; -static const unsigned int sample_max_depth = 512; - -static DEFINE_MUTEX(sample_timer_lock); -/* - * Per CPU hrtimers that do the profiling: - */ -static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer); - -struct stack_frame { -	const void __user	*next_fp; -	unsigned long		return_address; -}; - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ -	int ret; - -	if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) -		return 0; - -	ret = 1; -	pagefault_disable(); -	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) -		ret = 0; -	pagefault_enable(); - -	return ret; -} - -struct backtrace_info { -	struct trace_array_cpu	*data; -	struct trace_array	*tr; -	int			pos; -}; - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ -	/* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ -	/* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ -	/* Don't bother with IRQ stacks for now */ -	return -1; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ -	struct backtrace_info *info = data; - -	if (info->pos < sample_max_depth && reliable) { -		__trace_special(info->tr, info->data, 1, addr, 0); - -		info->pos++; -	} -} - -static const struct stacktrace_ops backtrace_ops = { -	.warning		= backtrace_warning, -	.warning_symbol		= backtrace_warning_symbol, -	.stack			= backtrace_stack, -	.address		= backtrace_address, -	.walk_stack		= print_context_stack, -}; - -static int -trace_kernel(struct pt_regs *regs, struct trace_array *tr, -	     struct trace_array_cpu *data) -{ -	struct backtrace_info info; -	unsigned long bp; -	char *stack; - -	info.tr = tr; -	info.data = data; -	info.pos = 1; - -	__trace_special(info.tr, info.data, 1, regs->ip, 0); - -	stack = ((char *)regs + sizeof(struct pt_regs)); -#ifdef CONFIG_FRAME_POINTER -	bp = regs->bp; -#else -	bp = 0; -#endif - -	dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, &info); - -	return info.pos; -} - -static void timer_notify(struct pt_regs *regs, int cpu) -{ -	struct trace_array_cpu *data; -	struct stack_frame frame; -	struct trace_array *tr; -	const void __user *fp; -	int is_user; -	int i; - -	if (!regs) -		return; - -	tr = sysprof_trace; -	data = tr->data[cpu]; -	is_user = user_mode(regs); - -	if (!current || current->pid == 0) -		return; - -	if (is_user && current->state != TASK_RUNNING) -		return; - -	__trace_special(tr, data, 0, 0, current->pid); - -	if (!is_user) -		i = trace_kernel(regs, tr, data); -	else -		i = 0; - -	/* -	 * Trace user stack if we are not a kernel thread -	 */ -	if (current->mm && i < sample_max_depth) { -		regs = (struct pt_regs *)current->thread.sp0 - 1; - -		fp = (void __user *)regs->bp; - -		__trace_special(tr, data, 2, regs->ip, 0); - -		while (i < sample_max_depth) { -			frame.next_fp = NULL; -			frame.return_address = 0; -			if (!copy_stack_frame(fp, &frame)) -				break; -			if ((unsigned long)fp < regs->sp) -				break; - -			__trace_special(tr, data, 2, frame.return_address, -					(unsigned long)fp); -			fp = frame.next_fp; - -			i++; -		} - -	} - -	/* -	 * Special trace entry if we overflow the max depth: -	 */ -	if (i == sample_max_depth) -		__trace_special(tr, data, -1, -1, -1); - -	__trace_special(tr, data, 3, current->pid, i); -} - -static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer) -{ -	/* trace here */ -	timer_notify(get_irq_regs(), smp_processor_id()); - -	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - -	return HRTIMER_RESTART; -} - -static void start_stack_timer(void *unused) -{ -	struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer); - -	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -	hrtimer->function = stack_trace_timer_fn; - -	hrtimer_start(hrtimer, ns_to_ktime(sample_period), -		      HRTIMER_MODE_REL_PINNED); -} - -static void start_stack_timers(void) -{ -	on_each_cpu(start_stack_timer, NULL, 1); -} - -static void stop_stack_timer(int cpu) -{ -	struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu); - -	hrtimer_cancel(hrtimer); -} - -static void stop_stack_timers(void) -{ -	int cpu; - -	for_each_online_cpu(cpu) -		stop_stack_timer(cpu); -} - -static void stop_stack_trace(struct trace_array *tr) -{ -	mutex_lock(&sample_timer_lock); -	stop_stack_timers(); -	tracer_enabled = 0; -	mutex_unlock(&sample_timer_lock); -} - -static int stack_trace_init(struct trace_array *tr) -{ -	sysprof_trace = tr; - -	tracing_start_cmdline_record(); - -	mutex_lock(&sample_timer_lock); -	start_stack_timers(); -	tracer_enabled = 1; -	mutex_unlock(&sample_timer_lock); -	return 0; -} - -static void stack_trace_reset(struct trace_array *tr) -{ -	tracing_stop_cmdline_record(); -	stop_stack_trace(tr); -} - -static struct tracer stack_trace __read_mostly = -{ -	.name		= "sysprof", -	.init		= stack_trace_init, -	.reset		= stack_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST -	.selftest    = trace_selftest_startup_sysprof, -#endif -}; - -__init static int init_stack_trace(void) -{ -	return register_tracer(&stack_trace); -} -device_initcall(init_stack_trace); - -#define MAX_LONG_DIGITS 22 - -static ssize_t -sysprof_sample_read(struct file *filp, char __user *ubuf, -		    size_t cnt, loff_t *ppos) -{ -	char buf[MAX_LONG_DIGITS]; -	int r; - -	r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period)); - -	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -sysprof_sample_write(struct file *filp, const char __user *ubuf, -		     size_t cnt, loff_t *ppos) -{ -	char buf[MAX_LONG_DIGITS]; -	unsigned long val; - -	if (cnt > MAX_LONG_DIGITS-1) -		cnt = MAX_LONG_DIGITS-1; - -	if (copy_from_user(&buf, ubuf, cnt)) -		return -EFAULT; - -	buf[cnt] = 0; - -	val = simple_strtoul(buf, NULL, 10); -	/* -	 * Enforce a minimum sample period of 100 usecs: -	 */ -	if (val < 100) -		val = 100; - -	mutex_lock(&sample_timer_lock); -	stop_stack_timers(); -	sample_period = val * 1000; -	start_stack_timers(); -	mutex_unlock(&sample_timer_lock); - -	return cnt; -} - -static const struct file_operations sysprof_sample_fops = { -	.read		= sysprof_sample_read, -	.write		= sysprof_sample_write, -}; - -void init_tracer_sysprof_debugfs(struct dentry *d_tracer) -{ - -	trace_create_file("sysprof_sample_period", 0644, -			d_tracer, NULL, &sysprof_sample_fops); -}  |