diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/capability.c | 24 | ||||
| -rw-r--r-- | kernel/events/core.c | 6 | ||||
| -rw-r--r-- | kernel/events/internal.h | 2 | ||||
| -rw-r--r-- | kernel/events/ring_buffer.c | 22 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 3 | ||||
| -rw-r--r-- | kernel/kexec.c | 118 | ||||
| -rw-r--r-- | kernel/kprobes.c | 19 | ||||
| -rw-r--r-- | kernel/kthread.c | 52 | ||||
| -rw-r--r-- | kernel/sched/clock.c | 26 | ||||
| -rw-r--r-- | kernel/sched/core.c | 8 | ||||
| -rw-r--r-- | kernel/sched/cputime.c | 2 | ||||
| -rw-r--r-- | kernel/signal.c | 2 | ||||
| -rw-r--r-- | kernel/smpboot.c | 14 | ||||
| -rw-r--r-- | kernel/sys.c | 3 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 26 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 54 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace_stack.c | 2 | ||||
| -rw-r--r-- | kernel/user_namespace.c | 22 | 
19 files changed, 289 insertions, 125 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index 493d9725948..f6c2ce5701e 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -393,6 +393,30 @@ bool ns_capable(struct user_namespace *ns, int cap)  EXPORT_SYMBOL(ns_capable);  /** + * file_ns_capable - Determine if the file's opener had a capability in effect + * @file:  The file we want to check + * @ns:  The usernamespace we want the capability in + * @cap: The capability to be tested for + * + * Return true if task that opened the file had a capability in effect + * when the file was opened. + * + * This does not set PF_SUPERPRIV because the caller may not + * actually be privileged. + */ +bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) +{ +	if (WARN_ON_ONCE(!cap_valid(cap))) +		return false; + +	if (security_capable(file->f_cred, ns, cap) == 0) +		return true; + +	return false; +} +EXPORT_SYMBOL(file_ns_capable); + +/**   * capable - Determine if the current task has a superior capability in effect   * @cap: The capability to be tested for   * diff --git a/kernel/events/core.c b/kernel/events/core.c index 59412d037ee..4d3124b3927 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4737,7 +4737,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)  	} else {  		if (arch_vma_name(mmap_event->vma)) {  			name = strncpy(tmp, arch_vma_name(mmap_event->vma), -				       sizeof(tmp)); +				       sizeof(tmp) - 1); +			tmp[sizeof(tmp) - 1] = '\0';  			goto got_name;  		} @@ -5330,7 +5331,7 @@ static void sw_perf_event_destroy(struct perf_event *event)  static int perf_swevent_init(struct perf_event *event)  { -	int event_id = event->attr.config; +	u64 event_id = event->attr.config;  	if (event->attr.type != PERF_TYPE_SOFTWARE)  		return -ENOENT; @@ -5986,6 +5987,7 @@ skip_type:  	if (pmu->pmu_cpu_context)  		goto got_cpu_context; +	ret = -ENOMEM;  	pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);  	if (!pmu->pmu_cpu_context)  		goto free_dev; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index d56a64c99a8..eb675c4d59d 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -16,7 +16,7 @@ struct ring_buffer {  	int				page_order;	/* allocation order  */  #endif  	int				nr_pages;	/* nr of data pages  */ -	int				writable;	/* are we writable   */ +	int				overwrite;	/* can overwrite itself */  	atomic_t			poll;		/* POLL_ for wakeups */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 23cb34ff397..97fddb09762 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -18,12 +18,24 @@  static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,  			      unsigned long offset, unsigned long head)  { -	unsigned long mask; +	unsigned long sz = perf_data_size(rb); +	unsigned long mask = sz - 1; -	if (!rb->writable) +	/* +	 * check if user-writable +	 * overwrite : over-write its own tail +	 * !overwrite: buffer possibly drops events. +	 */ +	if (rb->overwrite)  		return true; -	mask = perf_data_size(rb) - 1; +	/* +	 * verify that payload is not bigger than buffer +	 * otherwise masking logic may fail to detect +	 * the "not enough space" condition +	 */ +	if ((head - offset) > sz) +		return false;  	offset = (offset - tail) & mask;  	head   = (head   - tail) & mask; @@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)  		rb->watermark = max_size / 2;  	if (flags & RING_BUFFER_WRITABLE) -		rb->writable = 1; +		rb->overwrite = 0; +	else +		rb->overwrite = 1;  	atomic_set(&rb->refcount, 1); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cc47812d3fe..14be27feda4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -63,6 +63,7 @@  DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =  { +	.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),  	.clock_base =  	{  		{ @@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)  	struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);  	int i; -	raw_spin_lock_init(&cpu_base->lock); -  	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {  		cpu_base->clock_base[i].cpu_base = cpu_base;  		timerqueue_init_head(&cpu_base->clock_base[i].active); diff --git a/kernel/kexec.c b/kernel/kexec.c index bddd3d7a74b..ffd4e111fd6 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -55,7 +55,7 @@ struct resource crashk_res = {  	.flags = IORESOURCE_BUSY | IORESOURCE_MEM  };  struct resource crashk_low_res = { -	.name  = "Crash kernel low", +	.name  = "Crash kernel",  	.start = 0,  	.end   = 0,  	.flags = IORESOURCE_BUSY | IORESOURCE_MEM @@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char 		*cmdline,  	return 0;  } +#define SUFFIX_HIGH 0 +#define SUFFIX_LOW  1 +#define SUFFIX_NULL 2 +static __initdata char *suffix_tbl[] = { +	[SUFFIX_HIGH] = ",high", +	[SUFFIX_LOW]  = ",low", +	[SUFFIX_NULL] = NULL, +}; +  /* - * That function is the entry point for command line parsing and should be - * called from the arch-specific code. + * That function parses "suffix"  crashkernel command lines like + * + *	crashkernel=size,[high|low] + * + * It returns 0 on success and -EINVAL on failure.   */ +static int __init parse_crashkernel_suffix(char *cmdline, +					   unsigned long long	*crash_size, +					   unsigned long long	*crash_base, +					   const char *suffix) +{ +	char *cur = cmdline; + +	*crash_size = memparse(cmdline, &cur); +	if (cmdline == cur) { +		pr_warn("crashkernel: memory value expected\n"); +		return -EINVAL; +	} + +	/* check with suffix */ +	if (strncmp(cur, suffix, strlen(suffix))) { +		pr_warn("crashkernel: unrecognized char\n"); +		return -EINVAL; +	} +	cur += strlen(suffix); +	if (*cur != ' ' && *cur != '\0') { +		pr_warn("crashkernel: unrecognized char\n"); +		return -EINVAL; +	} + +	return 0; +} + +static __init char *get_last_crashkernel(char *cmdline, +			     const char *name, +			     const char *suffix) +{ +	char *p = cmdline, *ck_cmdline = NULL; + +	/* find crashkernel and use the last one if there are more */ +	p = strstr(p, name); +	while (p) { +		char *end_p = strchr(p, ' '); +		char *q; + +		if (!end_p) +			end_p = p + strlen(p); + +		if (!suffix) { +			int i; + +			/* skip the one with any known suffix */ +			for (i = 0; suffix_tbl[i]; i++) { +				q = end_p - strlen(suffix_tbl[i]); +				if (!strncmp(q, suffix_tbl[i], +					     strlen(suffix_tbl[i]))) +					goto next; +			} +			ck_cmdline = p; +		} else { +			q = end_p - strlen(suffix); +			if (!strncmp(q, suffix, strlen(suffix))) +				ck_cmdline = p; +		} +next: +		p = strstr(p+1, name); +	} + +	if (!ck_cmdline) +		return NULL; + +	return ck_cmdline; +} +  static int __init __parse_crashkernel(char *cmdline,  			     unsigned long long system_ram,  			     unsigned long long *crash_size,  			     unsigned long long *crash_base, -				const char *name) +			     const char *name, +			     const char *suffix)  { -	char 	*p = cmdline, *ck_cmdline = NULL;  	char	*first_colon, *first_space; +	char	*ck_cmdline;  	BUG_ON(!crash_size || !crash_base);  	*crash_size = 0;  	*crash_base = 0; -	/* find crashkernel and use the last one if there are more */ -	p = strstr(p, name); -	while (p) { -		ck_cmdline = p; -		p = strstr(p+1, name); -	} +	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);  	if (!ck_cmdline)  		return -EINVAL;  	ck_cmdline += strlen(name); +	if (suffix) +		return parse_crashkernel_suffix(ck_cmdline, crash_size, +				crash_base, suffix);  	/*  	 * if the commandline contains a ':', then that's the extended  	 * syntax -- if not, it must be the classic syntax @@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline,  	return 0;  } +/* + * That function is the entry point for command line parsing and should be + * called from the arch-specific code. + */  int __init parse_crashkernel(char *cmdline,  			     unsigned long long system_ram,  			     unsigned long long *crash_size,  			     unsigned long long *crash_base)  {  	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, -					"crashkernel="); +					"crashkernel=", NULL); +} + +int __init parse_crashkernel_high(char *cmdline, +			     unsigned long long system_ram, +			     unsigned long long *crash_size, +			     unsigned long long *crash_base) +{ +	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, +				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);  }  int __init parse_crashkernel_low(char *cmdline, @@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline,  			     unsigned long long *crash_base)  {  	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, -					"crashkernel_low="); +				"crashkernel=", suffix_tbl[SUFFIX_LOW]);  }  static void update_vmcoreinfo_note(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e35be53f661..3fed7f0cbcd 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -794,16 +794,16 @@ out:  }  #ifdef CONFIG_SYSCTL -/* This should be called with kprobe_mutex locked */  static void __kprobes optimize_all_kprobes(void)  {  	struct hlist_head *head;  	struct kprobe *p;  	unsigned int i; +	mutex_lock(&kprobe_mutex);  	/* If optimization is already allowed, just return */  	if (kprobes_allow_optimization) -		return; +		goto out;  	kprobes_allow_optimization = true;  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void)  				optimize_kprobe(p);  	}  	printk(KERN_INFO "Kprobes globally optimized\n"); +out: +	mutex_unlock(&kprobe_mutex);  } -/* This should be called with kprobe_mutex locked */  static void __kprobes unoptimize_all_kprobes(void)  {  	struct hlist_head *head;  	struct kprobe *p;  	unsigned int i; +	mutex_lock(&kprobe_mutex);  	/* If optimization is already prohibited, just return */ -	if (!kprobes_allow_optimization) +	if (!kprobes_allow_optimization) { +		mutex_unlock(&kprobe_mutex);  		return; +	}  	kprobes_allow_optimization = false;  	for (i = 0; i < KPROBE_TABLE_SIZE; i++) { @@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void)  				unoptimize_kprobe(p, false);  		}  	} +	mutex_unlock(&kprobe_mutex); +  	/* Wait for unoptimizing completion */  	wait_for_kprobe_optimizer();  	printk(KERN_INFO "Kprobes globally unoptimized\n");  } +static DEFINE_MUTEX(kprobe_sysctl_mutex);  int sysctl_kprobes_optimization;  int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  				      void __user *buffer, size_t *length, @@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  {  	int ret; -	mutex_lock(&kprobe_mutex); +	mutex_lock(&kprobe_sysctl_mutex);  	sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;  	ret = proc_dointvec_minmax(table, write, buffer, length, ppos); @@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,  		optimize_all_kprobes();  	else  		unoptimize_all_kprobes(); -	mutex_unlock(&kprobe_mutex); +	mutex_unlock(&kprobe_sysctl_mutex);  	return ret;  } diff --git a/kernel/kthread.c b/kernel/kthread.c index 691dc2ef9ba..9eb7fed0bba 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)  static void __kthread_parkme(struct kthread *self)  { -	__set_current_state(TASK_INTERRUPTIBLE); +	__set_current_state(TASK_PARKED);  	while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {  		if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))  			complete(&self->parked);  		schedule(); -		__set_current_state(TASK_INTERRUPTIBLE); +		__set_current_state(TASK_PARKED);  	}  	clear_bit(KTHREAD_IS_PARKED, &self->flags);  	__set_current_state(TASK_RUNNING); @@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),  }  EXPORT_SYMBOL(kthread_create_on_node); -static void __kthread_bind(struct task_struct *p, unsigned int cpu) +static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)  { +	/* Must have done schedule() in kthread() before we set_task_cpu */ +	if (!wait_task_inactive(p, state)) { +		WARN_ON(1); +		return; +	}  	/* It's safe because the task is inactive. */  	do_set_cpus_allowed(p, cpumask_of(cpu));  	p->flags |= PF_THREAD_BOUND; @@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)   */  void kthread_bind(struct task_struct *p, unsigned int cpu)  { -	/* Must have done schedule() in kthread() before we set_task_cpu */ -	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { -		WARN_ON(1); -		return; -	} -	__kthread_bind(p, cpu); +	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);  }  EXPORT_SYMBOL(kthread_bind); @@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)  	return NULL;  } +static void __kthread_unpark(struct task_struct *k, struct kthread *kthread) +{ +	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); +	/* +	 * We clear the IS_PARKED bit here as we don't wait +	 * until the task has left the park code. So if we'd +	 * park before that happens we'd see the IS_PARKED bit +	 * which might be about to be cleared. +	 */ +	if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { +		if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) +			__kthread_bind(k, kthread->cpu, TASK_PARKED); +		wake_up_state(k, TASK_PARKED); +	} +} +  /**   * kthread_unpark - unpark a thread created by kthread_create().   * @k:		thread created by kthread_create(). @@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)  {  	struct kthread *kthread = task_get_live_kthread(k); -	if (kthread) { -		clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); -		/* -		 * We clear the IS_PARKED bit here as we don't wait -		 * until the task has left the park code. So if we'd -		 * park before that happens we'd see the IS_PARKED bit -		 * which might be about to be cleared. -		 */ -		if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { -			if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) -				__kthread_bind(k, kthread->cpu); -			wake_up_process(k); -		} -	} +	if (kthread) +		__kthread_unpark(k, kthread);  	put_task_struct(k);  } @@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)  	trace_sched_kthread_stop(k);  	if (kthread) {  		set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); -		clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); +		__kthread_unpark(k, kthread);  		wake_up_process(k);  		wait_for_completion(&kthread->exited);  	} diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c685e31492d..c3ae1446461 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)  	u64 this_clock, remote_clock;  	u64 *ptr, old_val, val; +#if BITS_PER_LONG != 64 +again: +	/* +	 * Careful here: The local and the remote clock values need to +	 * be read out atomic as we need to compare the values and +	 * then update either the local or the remote side. So the +	 * cmpxchg64 below only protects one readout. +	 * +	 * We must reread via sched_clock_local() in the retry case on +	 * 32bit as an NMI could use sched_clock_local() via the +	 * tracer and hit between the readout of +	 * the low32bit and the high 32bit portion. +	 */ +	this_clock = sched_clock_local(my_scd); +	/* +	 * We must enforce atomic readout on 32bit, otherwise the +	 * update on the remote cpu can hit inbetween the readout of +	 * the low32bit and the high 32bit portion. +	 */ +	remote_clock = cmpxchg64(&scd->clock, 0, 0); +#else +	/* +	 * On 64bit the read of [my]scd->clock is atomic versus the +	 * update, so we can avoid the above 32bit dance. +	 */  	sched_clock_local(my_scd);  again:  	this_clock = my_scd->clock;  	remote_clock = scd->clock; +#endif  	/*  	 * Use the opportunity that we have both locks diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f12624a393..67d04651f44 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1498,8 +1498,10 @@ static void try_to_wake_up_local(struct task_struct *p)  {  	struct rq *rq = task_rq(p); -	BUG_ON(rq != this_rq()); -	BUG_ON(p == current); +	if (WARN_ON_ONCE(rq != this_rq()) || +	    WARN_ON_ONCE(p == current)) +		return; +  	lockdep_assert_held(&rq->lock);  	if (!raw_spin_trylock(&p->pi_lock)) { @@ -4999,7 +5001,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)  }  static int min_load_idx = 0; -static int max_load_idx = CPU_LOAD_IDX_MAX; +static int max_load_idx = CPU_LOAD_IDX_MAX-1;  static void  set_table_entry(struct ctl_table *entry, diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ed12cbb135f..e93cca92f38 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -310,7 +310,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)  	t = tsk;  	do { -		task_cputime(tsk, &utime, &stime); +		task_cputime(t, &utime, &stime);  		times->utime += utime;  		times->stime += stime;  		times->sum_exec_runtime += task_sched_runtime(t); diff --git a/kernel/signal.c b/kernel/signal.c index 497330ec2ae..06ff7764ab7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2950,7 +2950,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)  static int do_tkill(pid_t tgid, pid_t pid, int sig)  { -	struct siginfo info; +	struct siginfo info = {};  	info.si_signo = sig;  	info.si_errno = 0; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 8eaed9aa9cf..02fc5c93367 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)  	}  	get_task_struct(tsk);  	*per_cpu_ptr(ht->store, cpu) = tsk; -	if (ht->create) -		ht->create(cpu); +	if (ht->create) { +		/* +		 * Make sure that the task has actually scheduled out +		 * into park position, before calling the create +		 * callback. At least the migration thread callback +		 * requires that the task is off the runqueue. +		 */ +		if (!wait_task_inactive(tsk, TASK_PARKED)) +			WARN_ON(1); +		else +			ht->create(cpu); +	}  	return 0;  } diff --git a/kernel/sys.c b/kernel/sys.c index 39c9c4a2949..0da73cf73e6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd)  	system_state = SYSTEM_RESTART;  	usermodehelper_disable();  	device_shutdown(); -	syscore_shutdown();  }  /** @@ -370,6 +369,7 @@ void kernel_restart(char *cmd)  {  	kernel_restart_prepare(cmd);  	disable_nonboot_cpus(); +	syscore_shutdown();  	if (!cmd)  		printk(KERN_EMERG "Restarting system.\n");  	else @@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state)  void kernel_halt(void)  {  	kernel_shutdown_prepare(SYSTEM_HALT); +	disable_nonboot_cpus();  	syscore_shutdown();  	printk(KERN_EMERG "System halted.\n");  	kmsg_dump(KMSG_DUMP_HALT); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 9e5b8c272ee..5a0f781cd72 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore,  				      struct request_queue *q,  				      struct request *rq)  { -	struct blk_trace *bt = q->blk_trace; - -	/* if control ever passes through here, it's a request based driver */ -	if (unlikely(bt && !bt->rq_based)) -		bt->rq_based = true; -  	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);  } @@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore,  	blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);  } -static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) +static void blk_add_trace_bio_complete(void *ignore, +				       struct request_queue *q, struct bio *bio, +				       int error)  { -	struct request_queue *q; -	struct blk_trace *bt; - -	if (!bio->bi_bdev) -		return; - -	q = bdev_get_queue(bio->bi_bdev); -	bt = q->blk_trace; - -	/* -	 * Request based drivers will generate both rq and bio completions. -	 * Ignore bio ones. -	 */ -	if (likely(!bt) || bt->rq_based) -		return; -  	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);  } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6893d5a2bf0..b3fde6d7b7f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -66,7 +66,7 @@  static struct ftrace_ops ftrace_list_end __read_mostly = {  	.func		= ftrace_stub, -	.flags		= FTRACE_OPS_FL_RECURSION_SAFE, +	.flags		= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,  };  /* ftrace_enabled is a method to turn ftrace on or off */ @@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)  		free_page(tmp);  	} -	free_page((unsigned long)stat->pages);  	stat->pages = NULL;  	stat->start = NULL; @@ -1053,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)  static struct pid * const ftrace_swapper_pid = &init_struct_pid; +loff_t +ftrace_filter_lseek(struct file *file, loff_t offset, int whence) +{ +	loff_t ret; + +	if (file->f_mode & FMODE_READ) +		ret = seq_lseek(file, offset, whence); +	else +		file->f_pos = ret = 1; + +	return ret; +} +  #ifdef CONFIG_DYNAMIC_FTRACE  #ifndef CONFIG_FTRACE_MCOUNT_RECORD @@ -2613,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)   * routine, you can use ftrace_filter_write() for the write   * routine if @flag has FTRACE_ITER_FILTER set, or   * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. - * ftrace_regex_lseek() should be used as the lseek routine, and + * ftrace_filter_lseek() should be used as the lseek routine, and   * release must call ftrace_regex_release().   */  int @@ -2697,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)  				 inode, file);  } -loff_t -ftrace_regex_lseek(struct file *file, loff_t offset, int whence) -{ -	loff_t ret; - -	if (file->f_mode & FMODE_READ) -		ret = seq_lseek(file, offset, whence); -	else -		file->f_pos = ret = 1; - -	return ret; -} -  static int ftrace_match(char *str, char *regex, int len, int type)  {  	int matched = 0; @@ -3441,14 +3440,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;  static int __init set_ftrace_notrace(char *str)  { -	strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); +	strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);  	return 1;  }  __setup("ftrace_notrace=", set_ftrace_notrace);  static int __init set_ftrace_filter(char *str)  { -	strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); +	strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);  	return 1;  }  __setup("ftrace_filter=", set_ftrace_filter); @@ -3571,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = {  	.open = ftrace_filter_open,  	.read = seq_read,  	.write = ftrace_filter_write, -	.llseek = ftrace_regex_lseek, +	.llseek = ftrace_filter_lseek,  	.release = ftrace_regex_release,  }; @@ -3579,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = {  	.open = ftrace_notrace_open,  	.read = seq_read,  	.write = ftrace_notrace_write, -	.llseek = ftrace_regex_lseek, +	.llseek = ftrace_filter_lseek,  	.release = ftrace_regex_release,  }; @@ -3784,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = {  	.open		= ftrace_graph_open,  	.read		= seq_read,  	.write		= ftrace_graph_write, +	.llseek		= ftrace_filter_lseek,  	.release	= ftrace_graph_release, -	.llseek		= seq_lseek,  };  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -4131,7 +4130,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,  	preempt_disable_notrace();  	trace_recursion_set(TRACE_CONTROL_BIT);  	do_for_each_ftrace_op(op, ftrace_control_list) { -		if (!ftrace_function_local_disabled(op) && +		if (!(op->flags & FTRACE_OPS_FL_STUB) && +		    !ftrace_function_local_disabled(op) &&  		    ftrace_ops_test(op, ip))  			op->func(ip, parent_ip, op, regs);  	} while_for_each_ftrace_op(op); @@ -4439,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = {  	.open		= ftrace_pid_open,  	.write		= ftrace_pid_write,  	.read		= seq_read, -	.llseek		= seq_lseek, +	.llseek		= ftrace_filter_lseek,  	.release	= ftrace_pid_release,  }; @@ -4555,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,  		ftrace_startup_sysctl();  		/* we are starting ftrace again */ -		if (ftrace_ops_list != &ftrace_list_end) { -			if (ftrace_ops_list->next == &ftrace_list_end) -				ftrace_trace_function = ftrace_ops_list->func; -			else -				ftrace_trace_function = ftrace_ops_list_func; -		} +		if (ftrace_ops_list != &ftrace_list_end) +			update_ftrace_function();  	} else {  		/* stopping ftrace calls (just send to ftrace_stub) */ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4f1dade5698..66338c4f7f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -132,7 +132,7 @@ static char *default_bootup_tracer;  static int __init set_cmdline_ftrace(char *str)  { -	strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); +	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);  	default_bootup_tracer = bootup_tracer_buf;  	/* We are using ftrace early, expand it */  	ring_buffer_expanded = 1; @@ -162,7 +162,7 @@ static char *trace_boot_options __initdata;  static int __init set_trace_boot_options(char *str)  { -	strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); +	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);  	trace_boot_options = trace_boot_options_buf;  	return 0;  } @@ -744,8 +744,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)  		return;  	WARN_ON_ONCE(!irqs_disabled()); -	if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) +	if (!current_trace->allocated_snapshot) { +		/* Only the nop tracer should hit this when disabling */ +		WARN_ON_ONCE(current_trace != &nop_trace);  		return; +	}  	arch_spin_lock(&ftrace_max_lock); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 42ca822fc70..83a8b5b7bd3 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = {  	.open = stack_trace_filter_open,  	.read = seq_read,  	.write = ftrace_filter_write, -	.llseek = ftrace_regex_lseek, +	.llseek = ftrace_filter_lseek,  	.release = ftrace_regex_release,  }; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a54f26f82eb..e134d8f365d 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -25,7 +25,8 @@  static struct kmem_cache *user_ns_cachep __read_mostly; -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file, +				struct user_namespace *ns, int cap_setid,  				struct uid_gid_map *map);  static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) @@ -612,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,  	if (map->nr_extents != 0)  		goto out; -	/* Require the appropriate privilege CAP_SETUID or CAP_SETGID -	 * over the user namespace in order to set the id mapping. +	/* +	 * Adjusting namespace settings requires capabilities on the target.  	 */ -	if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) +	if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))  		goto out;  	/* Get a buffer */ @@ -700,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,  	ret = -EPERM;  	/* Validate the user is allowed to use user id's mapped to. */ -	if (!new_idmap_permitted(ns, cap_setid, &new_map)) +	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))  		goto out;  	/* Map the lower ids from the parent user namespace to the @@ -787,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t  			 &ns->projid_map, &ns->parent->projid_map);  } -static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, +static bool new_idmap_permitted(const struct file *file,  +				struct user_namespace *ns, int cap_setid,  				struct uid_gid_map *new_map)  {  	/* Allow mapping to your own filesystem ids */ @@ -795,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,  		u32 id = new_map->extent[0].lower_first;  		if (cap_setid == CAP_SETUID) {  			kuid_t uid = make_kuid(ns->parent, id); -			if (uid_eq(uid, current_fsuid())) +			if (uid_eq(uid, file->f_cred->fsuid))  				return true;  		}  		else if (cap_setid == CAP_SETGID) {  			kgid_t gid = make_kgid(ns->parent, id); -			if (gid_eq(gid, current_fsgid())) +			if (gid_eq(gid, file->f_cred->fsgid))  				return true;  		}  	} @@ -811,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,  	/* Allow the specified ids if we have the appropriate capability  	 * (CAP_SETUID or CAP_SETGID) over the parent user namespace. +	 * And the opener of the id file also had the approprpiate capability.  	 */ -	if (ns_capable(ns->parent, cap_setid)) +	if (ns_capable(ns->parent, cap_setid) && +	    file_ns_capable(file, ns->parent, cap_setid))  		return true;  	return false;  |