diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 8 | ||||
| -rw-r--r-- | kernel/fork.c | 5 | ||||
| -rw-r--r-- | kernel/futex.c | 46 | ||||
| -rw-r--r-- | kernel/printk.c | 80 | ||||
| -rw-r--r-- | kernel/signal.c | 5 | ||||
| -rw-r--r-- | kernel/sys.c | 57 | ||||
| -rw-r--r-- | kernel/trace/Kconfig | 24 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 86 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_irqsoff.c | 19 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 18 | ||||
| -rw-r--r-- | kernel/user_namespace.c | 4 | ||||
| -rw-r--r-- | kernel/workqueue.c | 51 | 
14 files changed, 261 insertions, 152 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index b0cd86501c3..59412d037ee 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4434,12 +4434,15 @@ static void perf_event_task_event(struct perf_task_event *task_event)  			if (ctxn < 0)  				goto next;  			ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); +			if (ctx) +				perf_event_task_ctx(ctx, task_event);  		} -		if (ctx) -			perf_event_task_ctx(ctx, task_event);  next:  		put_cpu_ptr(pmu->pmu_cpu_context);  	} +	if (task_event->task_ctx) +		perf_event_task_ctx(task_event->task_ctx, task_event); +  	rcu_read_unlock();  } @@ -5647,6 +5650,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)  		event->attr.sample_period = NSEC_PER_SEC / freq;  		hwc->sample_period = event->attr.sample_period;  		local64_set(&hwc->period_left, hwc->sample_period); +		hwc->last_period = hwc->sample_period;  		event->attr.freq = 0;  	}  } diff --git a/kernel/fork.c b/kernel/fork.c index 8d932b1c905..1766d324d5e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1141,6 +1141,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))  		return ERR_PTR(-EINVAL); +	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) +		return ERR_PTR(-EINVAL); +  	/*  	 * Thread groups must share signals as well, and detached threads  	 * can only be started up within the thread group. @@ -1807,7 +1810,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)  	 * If unsharing a user namespace must also unshare the thread.  	 */  	if (unshare_flags & CLONE_NEWUSER) -		unshare_flags |= CLONE_THREAD; +		unshare_flags |= CLONE_THREAD | CLONE_FS;  	/*  	 * If unsharing a pid namespace must also unshare the thread.  	 */ diff --git a/kernel/futex.c b/kernel/futex.c index f0090a993da..b26dcfc02c9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -223,7 +223,8 @@ static void drop_futex_key_refs(union futex_key *key)   * @rw:		mapping needs to be read/write (values: VERIFY_READ,   *              VERIFY_WRITE)   * - * Returns a negative error code or 0 + * Return: a negative error code or 0 + *   * The key words are stored in *key on success.   *   * For shared mappings, it's (page->index, file_inode(vma->vm_file), @@ -705,9 +706,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,   *			be "current" except in the case of requeue pi.   * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)   * - * Returns: - *  0 - ready to wait - *  1 - acquired the lock + * Return: + *  0 - ready to wait; + *  1 - acquired the lock;   * <0 - error   *   * The hb->lock and futex_key refs shall be held by the caller. @@ -1191,9 +1192,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,   * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.   * hb1 and hb2 must be held by the caller.   * - * Returns: - *  0 - failed to acquire the lock atomicly - *  1 - acquired the lock + * Return: + *  0 - failed to acquire the lock atomically; + *  1 - acquired the lock;   * <0 - error   */  static int futex_proxy_trylock_atomic(u32 __user *pifutex, @@ -1254,8 +1255,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,   * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire   * uaddr2 atomically on behalf of the top waiter.   * - * Returns: - * >=0 - on success, the number of tasks requeued or woken + * Return: + * >=0 - on success, the number of tasks requeued or woken;   *  <0 - on error   */  static int futex_requeue(u32 __user *uaddr1, unsigned int flags, @@ -1536,8 +1537,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)   * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must   * be paired with exactly one earlier call to queue_me().   * - * Returns: - *   1 - if the futex_q was still queued (and we removed unqueued it) + * Return: + *   1 - if the futex_q was still queued (and we removed unqueued it);   *   0 - if the futex_q was already removed by the waking thread   */  static int unqueue_me(struct futex_q *q) @@ -1707,9 +1708,9 @@ static long futex_wait_restart(struct restart_block *restart);   * the pi_state owner as well as handle race conditions that may allow us to   * acquire the lock. Must be called with the hb lock held.   * - * Returns: - *  1 - success, lock taken - *  0 - success, lock not taken + * Return: + *  1 - success, lock taken; + *  0 - success, lock not taken;   * <0 - on error (-EFAULT)   */  static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) @@ -1824,8 +1825,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,   * Return with the hb lock held and a q.key reference on success, and unlocked   * with no q.key reference on failure.   * - * Returns: - *  0 - uaddr contains val and hb has been locked + * Return: + *  0 - uaddr contains val and hb has been locked;   * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked   */  static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, @@ -2203,9 +2204,9 @@ pi_faulted:   * the wakeup and return the appropriate error code to the caller.  Must be   * called with the hb lock held.   * - * Returns - *  0 - no early wakeup detected - * <0 - -ETIMEDOUT or -ERESTARTNOINTR + * Return: + *  0 = no early wakeup detected; + * <0 = -ETIMEDOUT or -ERESTARTNOINTR   */  static inline  int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, @@ -2247,7 +2248,6 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,   * @val:	the expected value of uaddr   * @abs_time:	absolute timeout   * @bitset:	32 bit wakeup bitset set by userspace, defaults to all - * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)   * @uaddr2:	the pi futex we will take prior to returning to user-space   *   * The caller will wait on uaddr and will be requeued by futex_requeue() to @@ -2258,7 +2258,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,   * there was a need to.   *   * We call schedule in futex_wait_queue_me() when we enqueue and return there - * via the following: + * via the following--   * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()   * 2) wakeup on uaddr2 after a requeue   * 3) signal @@ -2276,8 +2276,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,   *   * If 4 or 7, we cleanup and return with -ETIMEDOUT.   * - * Returns: - *  0 - On success + * Return: + *  0 - On success;   * <0 - On error   */  static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, diff --git a/kernel/printk.c b/kernel/printk.c index 0b31715f335..abbdd9e2ac8 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -63,8 +63,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)  #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */  #define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ -DECLARE_WAIT_QUEUE_HEAD(log_wait); -  int console_printk[4] = {  	DEFAULT_CONSOLE_LOGLEVEL,	/* console_loglevel */  	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */ @@ -224,6 +222,7 @@ struct log {  static DEFINE_RAW_SPINLOCK(logbuf_lock);  #ifdef CONFIG_PRINTK +DECLARE_WAIT_QUEUE_HEAD(log_wait);  /* the next printk record to read by syslog(READ) or /proc/kmsg */  static u64 syslog_seq;  static u32 syslog_idx; @@ -1957,45 +1956,6 @@ int is_console_locked(void)  	return console_locked;  } -/* - * Delayed printk version, for scheduler-internal messages: - */ -#define PRINTK_BUF_SIZE		512 - -#define PRINTK_PENDING_WAKEUP	0x01 -#define PRINTK_PENDING_SCHED	0x02 - -static DEFINE_PER_CPU(int, printk_pending); -static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); - -static void wake_up_klogd_work_func(struct irq_work *irq_work) -{ -	int pending = __this_cpu_xchg(printk_pending, 0); - -	if (pending & PRINTK_PENDING_SCHED) { -		char *buf = __get_cpu_var(printk_sched_buf); -		printk(KERN_WARNING "[sched_delayed] %s", buf); -	} - -	if (pending & PRINTK_PENDING_WAKEUP) -		wake_up_interruptible(&log_wait); -} - -static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { -	.func = wake_up_klogd_work_func, -	.flags = IRQ_WORK_LAZY, -}; - -void wake_up_klogd(void) -{ -	preempt_disable(); -	if (waitqueue_active(&log_wait)) { -		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); -		irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); -	} -	preempt_enable(); -} -  static void console_cont_flush(char *text, size_t size)  {  	unsigned long flags; @@ -2458,6 +2418,44 @@ static int __init printk_late_init(void)  late_initcall(printk_late_init);  #if defined CONFIG_PRINTK +/* + * Delayed printk version, for scheduler-internal messages: + */ +#define PRINTK_BUF_SIZE		512 + +#define PRINTK_PENDING_WAKEUP	0x01 +#define PRINTK_PENDING_SCHED	0x02 + +static DEFINE_PER_CPU(int, printk_pending); +static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); + +static void wake_up_klogd_work_func(struct irq_work *irq_work) +{ +	int pending = __this_cpu_xchg(printk_pending, 0); + +	if (pending & PRINTK_PENDING_SCHED) { +		char *buf = __get_cpu_var(printk_sched_buf); +		printk(KERN_WARNING "[sched_delayed] %s", buf); +	} + +	if (pending & PRINTK_PENDING_WAKEUP) +		wake_up_interruptible(&log_wait); +} + +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { +	.func = wake_up_klogd_work_func, +	.flags = IRQ_WORK_LAZY, +}; + +void wake_up_klogd(void) +{ +	preempt_disable(); +	if (waitqueue_active(&log_wait)) { +		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); +		irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); +	} +	preempt_enable(); +}  int printk_sched(const char *fmt, ...)  { diff --git a/kernel/signal.c b/kernel/signal.c index 2ec870a4c3c..dd72567767d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -485,6 +485,9 @@ flush_signal_handlers(struct task_struct *t, int force_default)  		if (force_default || ka->sa.sa_handler != SIG_IGN)  			ka->sa.sa_handler = SIG_DFL;  		ka->sa.sa_flags = 0; +#ifdef __ARCH_HAS_SA_RESTORER +		ka->sa.sa_restorer = NULL; +#endif  		sigemptyset(&ka->sa.sa_mask);  		ka++;  	} @@ -2682,7 +2685,7 @@ static int do_sigpending(void *set, unsigned long sigsetsize)  /**   *  sys_rt_sigpending - examine a pending signal that has been raised   *			while blocked - *  @set: stores pending signals + *  @uset: stores pending signals   *  @sigsetsize: size of sigset_t type or larger   */  SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) diff --git a/kernel/sys.c b/kernel/sys.c index 81f56445fba..39c9c4a2949 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2185,9 +2185,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,  char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; -static int __orderly_poweroff(void) +static int __orderly_poweroff(bool force)  { -	int argc;  	char **argv;  	static char *envp[] = {  		"HOME=/", @@ -2196,20 +2195,40 @@ static int __orderly_poweroff(void)  	};  	int ret; -	argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); -	if (argv == NULL) { +	argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL); +	if (argv) { +		ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); +		argv_free(argv); +	} else {  		printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", -		       __func__, poweroff_cmd); -		return -ENOMEM; +					 __func__, poweroff_cmd); +		ret = -ENOMEM;  	} -	ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, -				      NULL, NULL, NULL); -	argv_free(argv); +	if (ret && force) { +		printk(KERN_WARNING "Failed to start orderly shutdown: " +					"forcing the issue\n"); +		/* +		 * I guess this should try to kick off some daemon to sync and +		 * poweroff asap.  Or not even bother syncing if we're doing an +		 * emergency shutdown? +		 */ +		emergency_sync(); +		kernel_power_off(); +	}  	return ret;  } +static bool poweroff_force; + +static void poweroff_work_func(struct work_struct *work) +{ +	__orderly_poweroff(poweroff_force); +} + +static DECLARE_WORK(poweroff_work, poweroff_work_func); +  /**   * orderly_poweroff - Trigger an orderly system poweroff   * @force: force poweroff if command execution fails @@ -2219,21 +2238,9 @@ static int __orderly_poweroff(void)   */  int orderly_poweroff(bool force)  { -	int ret = __orderly_poweroff(); - -	if (ret && force) { -		printk(KERN_WARNING "Failed to start orderly shutdown: " -		       "forcing the issue\n"); - -		/* -		 * I guess this should try to kick off some daemon to sync and -		 * poweroff asap.  Or not even bother syncing if we're doing an -		 * emergency shutdown? -		 */ -		emergency_sync(); -		kernel_power_off(); -	} - -	return ret; +	if (force) /* do not override the pending "true" */ +		poweroff_force = true; +	schedule_work(&poweroff_work); +	return 0;  }  EXPORT_SYMBOL_GPL(orderly_poweroff); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 192473b2279..fc382d6e276 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -414,24 +414,28 @@ config PROBE_EVENTS  	def_bool n  config DYNAMIC_FTRACE -	bool "enable/disable ftrace tracepoints dynamically" +	bool "enable/disable function tracing dynamically"  	depends on FUNCTION_TRACER  	depends on HAVE_DYNAMIC_FTRACE  	default y  	help -          This option will modify all the calls to ftrace dynamically -	  (will patch them out of the binary image and replace them -	  with a No-Op instruction) as they are called. A table is -	  created to dynamically enable them again. +	  This option will modify all the calls to function tracing +	  dynamically (will patch them out of the binary image and +	  replace them with a No-Op instruction) on boot up. During +	  compile time, a table is made of all the locations that ftrace +	  can function trace, and this table is linked into the kernel +	  image. When this is enabled, functions can be individually +	  enabled, and the functions not enabled will not affect +	  performance of the system. + +	  See the files in /sys/kernel/debug/tracing: +	    available_filter_functions +	    set_ftrace_filter +	    set_ftrace_notrace  	  This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but  	  otherwise has native performance as long as no tracing is active. -	  The changes to the code are done by a kernel thread that -	  wakes up once a second and checks to see if any ftrace calls -	  were made. If so, it runs stop_machine (stops all CPUS) -	  and modifies the code to jump over the call to ftrace. -  config DYNAMIC_FTRACE_WITH_REGS  	def_bool y  	depends on DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ab25b88aae5..6893d5a2bf0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3104,8 +3104,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,  					continue;  			} -			hlist_del(&entry->node); -			call_rcu(&entry->rcu, ftrace_free_entry_rcu); +			hlist_del_rcu(&entry->node); +			call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);  		}  	}  	__disable_ftrace_function_probe(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c2e2c231037..4f1dade5698 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -704,7 +704,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  void  update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  { -	struct ring_buffer *buf = tr->buffer; +	struct ring_buffer *buf;  	if (trace_stop_count)  		return; @@ -719,6 +719,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)  	arch_spin_lock(&ftrace_max_lock); +	buf = tr->buffer;  	tr->buffer = max_tr.buffer;  	max_tr.buffer = buf; @@ -2400,6 +2401,27 @@ static void test_ftrace_alive(struct seq_file *m)  	seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");  } +#ifdef CONFIG_TRACER_MAX_TRACE +static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) +{ +	if (iter->trace->allocated_snapshot) +		seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); +	else +		seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); + +	seq_printf(m, "# Snapshot commands:\n"); +	seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); +	seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); +	seq_printf(m, "#                      Takes a snapshot of the main buffer.\n"); +	seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate)\n"); +	seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n"); +	seq_printf(m, "#                       is not a '0' or '1')\n"); +} +#else +/* Should never be called */ +static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { } +#endif +  static int s_show(struct seq_file *m, void *v)  {  	struct trace_iterator *iter = v; @@ -2411,7 +2433,9 @@ static int s_show(struct seq_file *m, void *v)  			seq_puts(m, "#\n");  			test_ftrace_alive(m);  		} -		if (iter->trace && iter->trace->print_header) +		if (iter->snapshot && trace_empty(iter)) +			print_snapshot_help(m, iter); +		else if (iter->trace && iter->trace->print_header)  			iter->trace->print_header(m);  		else  			trace_default_header(m); @@ -2857,11 +2881,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)  	return -EINVAL;  } -static void set_tracer_flags(unsigned int mask, int enabled) +/* Some tracers require overwrite to stay enabled */ +int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) +{ +	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set) +		return -1; + +	return 0; +} + +int set_tracer_flag(unsigned int mask, int enabled)  {  	/* do nothing if flag is already set */  	if (!!(trace_flags & mask) == !!enabled) -		return; +		return 0; + +	/* Give the tracer a chance to approve the change */ +	if (current_trace->flag_changed) +		if (current_trace->flag_changed(current_trace, mask, !!enabled)) +			return -EINVAL;  	if (enabled)  		trace_flags |= mask; @@ -2871,18 +2909,24 @@ static void set_tracer_flags(unsigned int mask, int enabled)  	if (mask == TRACE_ITER_RECORD_CMD)  		trace_event_enable_cmd_record(enabled); -	if (mask == TRACE_ITER_OVERWRITE) +	if (mask == TRACE_ITER_OVERWRITE) {  		ring_buffer_change_overwrite(global_trace.buffer, enabled); +#ifdef CONFIG_TRACER_MAX_TRACE +		ring_buffer_change_overwrite(max_tr.buffer, enabled); +#endif +	}  	if (mask == TRACE_ITER_PRINTK)  		trace_printk_start_stop_comm(enabled); + +	return 0;  }  static int trace_set_options(char *option)  {  	char *cmp;  	int neg = 0; -	int ret = 0; +	int ret = -ENODEV;  	int i;  	cmp = strstrip(option); @@ -2892,19 +2936,20 @@ static int trace_set_options(char *option)  		cmp += 2;  	} +	mutex_lock(&trace_types_lock); +  	for (i = 0; trace_options[i]; i++) {  		if (strcmp(cmp, trace_options[i]) == 0) { -			set_tracer_flags(1 << i, !neg); +			ret = set_tracer_flag(1 << i, !neg);  			break;  		}  	}  	/* If no option could be set, test the specific tracer options */ -	if (!trace_options[i]) { -		mutex_lock(&trace_types_lock); +	if (!trace_options[i])  		ret = set_tracer_option(current_trace, cmp, neg); -		mutex_unlock(&trace_types_lock); -	} + +	mutex_unlock(&trace_types_lock);  	return ret;  } @@ -2914,6 +2959,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,  			size_t cnt, loff_t *ppos)  {  	char buf[64]; +	int ret;  	if (cnt >= sizeof(buf))  		return -EINVAL; @@ -2923,7 +2969,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,  	buf[cnt] = 0; -	trace_set_options(buf); +	ret = trace_set_options(buf); +	if (ret < 0) +		return ret;  	*ppos += cnt; @@ -3227,6 +3275,9 @@ static int tracing_set_tracer(const char *buf)  		goto out;  	trace_branch_disable(); + +	current_trace->enabled = false; +  	if (current_trace->reset)  		current_trace->reset(tr); @@ -3271,6 +3322,7 @@ static int tracing_set_tracer(const char *buf)  	}  	current_trace = t; +	current_trace->enabled = true;  	trace_branch_enable(tr);   out:  	mutex_unlock(&trace_types_lock); @@ -4144,8 +4196,6 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,  	default:  		if (current_trace->allocated_snapshot)  			tracing_reset_online_cpus(&max_tr); -		else -			ret = -EINVAL;  		break;  	} @@ -4759,7 +4809,13 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,  	if (val != 0 && val != 1)  		return -EINVAL; -	set_tracer_flags(1 << index, val); + +	mutex_lock(&trace_types_lock); +	ret = set_tracer_flag(1 << index, val); +	mutex_unlock(&trace_types_lock); + +	if (ret < 0) +		return ret;  	*ppos += cnt; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 57d7e5397d5..2081971367e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -283,11 +283,15 @@ struct tracer {  	enum print_line_t	(*print_line)(struct trace_iterator *iter);  	/* If you handled the flag setting, return 0 */  	int			(*set_flag)(u32 old_flags, u32 bit, int set); +	/* Return 0 if OK with change, else return non-zero */ +	int			(*flag_changed)(struct tracer *tracer, +						u32 mask, int set);  	struct tracer		*next;  	struct tracer_flags	*flags;  	bool			print_max;  	bool			use_max_tr;  	bool			allocated_snapshot; +	bool			enabled;  }; @@ -943,6 +947,8 @@ extern const char *__stop___trace_bprintk_fmt[];  void trace_printk_init_buffers(void);  void trace_printk_start_comm(void); +int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); +int set_tracer_flag(unsigned int mask, int enabled);  #undef FTRACE_ENTRY  #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\ diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 713a2cac488..443b25b43b4 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -32,7 +32,7 @@ enum {  static int trace_type __read_mostly; -static int save_lat_flag; +static int save_flags;  static void stop_irqsoff_tracer(struct trace_array *tr, int graph);  static int start_irqsoff_tracer(struct trace_array *tr, int graph); @@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)  static void __irqsoff_tracer_init(struct trace_array *tr)  { -	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; -	trace_flags |= TRACE_ITER_LATENCY_FMT; +	save_flags = trace_flags; + +	/* non overwrite screws up the latency tracers */ +	set_tracer_flag(TRACE_ITER_OVERWRITE, 1); +	set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);  	tracing_max_latency = 0;  	irqsoff_trace = tr; @@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr)  static void irqsoff_tracer_reset(struct trace_array *tr)  { +	int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; +	int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; +  	stop_irqsoff_tracer(tr, is_graph()); -	if (!save_lat_flag) -		trace_flags &= ~TRACE_ITER_LATENCY_FMT; +	set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); +	set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);  }  static void irqsoff_tracer_start(struct trace_array *tr) @@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __read_mostly =  	.print_line     = irqsoff_print_line,  	.flags		= &tracer_flags,  	.set_flag	= irqsoff_set_flag, +	.flag_changed	= trace_keep_overwrite,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_irqsoff,  #endif @@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer __read_mostly =  	.print_line     = irqsoff_print_line,  	.flags		= &tracer_flags,  	.set_flag	= irqsoff_set_flag, +	.flag_changed	= trace_keep_overwrite,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_preemptoff,  #endif @@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =  	.print_line     = irqsoff_print_line,  	.flags		= &tracer_flags,  	.set_flag	= irqsoff_set_flag, +	.flag_changed	= trace_keep_overwrite,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_preemptirqsoff,  #endif diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 75aa97fbe1a..fde652c9a51 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_array *tr);  static int wakeup_graph_entry(struct ftrace_graph_ent *trace);  static void wakeup_graph_return(struct ftrace_graph_ret *trace); -static int save_lat_flag; +static int save_flags;  #define TRACE_DISPLAY_GRAPH     1 @@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)  static int __wakeup_tracer_init(struct trace_array *tr)  { -	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; -	trace_flags |= TRACE_ITER_LATENCY_FMT; +	save_flags = trace_flags; + +	/* non overwrite screws up the latency tracers */ +	set_tracer_flag(TRACE_ITER_OVERWRITE, 1); +	set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);  	tracing_max_latency = 0;  	wakeup_trace = tr; @@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)  static void wakeup_tracer_reset(struct trace_array *tr)  { +	int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT; +	int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE; +  	stop_wakeup_tracer(tr);  	/* make sure we put back any tasks we are tracing */  	wakeup_reset(tr); -	if (!save_lat_flag) -		trace_flags &= ~TRACE_ITER_LATENCY_FMT; +	set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag); +	set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);  }  static void wakeup_tracer_start(struct trace_array *tr) @@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __read_mostly =  	.print_line	= wakeup_print_line,  	.flags		= &tracer_flags,  	.set_flag	= wakeup_set_flag, +	.flag_changed	= trace_keep_overwrite,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_wakeup,  #endif @@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =  	.print_line	= wakeup_print_line,  	.flags		= &tracer_flags,  	.set_flag	= wakeup_set_flag, +	.flag_changed	= trace_keep_overwrite,  #ifdef CONFIG_FTRACE_SELFTEST  	.selftest    = trace_selftest_startup_wakeup,  #endif diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 8b650837083..b14f4d34204 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,7 @@  #include <linux/uaccess.h>  #include <linux/ctype.h>  #include <linux/projid.h> +#include <linux/fs_struct.h>  static struct kmem_cache *user_ns_cachep __read_mostly; @@ -837,6 +838,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)  	if (atomic_read(¤t->mm->mm_users) > 1)  		return -EINVAL; +	if (current->fs->users != 1) +		return -EINVAL; +  	if (!ns_capable(user_ns, CAP_SYS_ADMIN))  		return -EPERM; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 81f2457811e..b48cd597145 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -457,11 +457,12 @@ static int worker_pool_assign_id(struct worker_pool *pool)  	int ret;  	mutex_lock(&worker_pool_idr_mutex); -	idr_pre_get(&worker_pool_idr, GFP_KERNEL); -	ret = idr_get_new(&worker_pool_idr, pool, &pool->id); +	ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); +	if (ret >= 0) +		pool->id = ret;  	mutex_unlock(&worker_pool_idr_mutex); -	return ret; +	return ret < 0 ? ret : 0;  }  /* @@ -3446,28 +3447,34 @@ static void wq_unbind_fn(struct work_struct *work)  		spin_unlock_irq(&pool->lock);  		mutex_unlock(&pool->assoc_mutex); -	} -	/* -	 * Call schedule() so that we cross rq->lock and thus can guarantee -	 * sched callbacks see the %WORKER_UNBOUND flag.  This is necessary -	 * as scheduler callbacks may be invoked from other cpus. -	 */ -	schedule(); +		/* +		 * Call schedule() so that we cross rq->lock and thus can +		 * guarantee sched callbacks see the %WORKER_UNBOUND flag. +		 * This is necessary as scheduler callbacks may be invoked +		 * from other cpus. +		 */ +		schedule(); -	/* -	 * Sched callbacks are disabled now.  Zap nr_running.  After this, -	 * nr_running stays zero and need_more_worker() and keep_working() -	 * are always true as long as the worklist is not empty.  Pools on -	 * @cpu now behave as unbound (in terms of concurrency management) -	 * pools which are served by workers tied to the CPU. -	 * -	 * On return from this function, the current worker would trigger -	 * unbound chain execution of pending work items if other workers -	 * didn't already. -	 */ -	for_each_std_worker_pool(pool, cpu) +		/* +		 * Sched callbacks are disabled now.  Zap nr_running. +		 * After this, nr_running stays zero and need_more_worker() +		 * and keep_working() are always true as long as the +		 * worklist is not empty.  This pool now behaves as an +		 * unbound (in terms of concurrency management) pool which +		 * are served by workers tied to the pool. +		 */  		atomic_set(&pool->nr_running, 0); + +		/* +		 * With concurrency management just turned off, a busy +		 * worker blocking could lead to lengthy stalls.  Kick off +		 * unbound chain execution of currently pending work items. +		 */ +		spin_lock_irq(&pool->lock); +		wake_up_worker(pool); +		spin_unlock_irq(&pool->lock); +	}  }  /*  |