diff options
| author | Jiri Kosina <jkosina@suse.cz> | 2010-12-22 18:57:02 +0100 | 
|---|---|---|
| committer | Jiri Kosina <jkosina@suse.cz> | 2010-12-22 18:57:02 +0100 | 
| commit | 4b7bd364700d9ac8372eff48832062b936d0793b (patch) | |
| tree | 0dbf78c95456a0b02d07fcd473281f04a87e266d /kernel | |
| parent | c0d8768af260e2cbb4bf659ae6094a262c86b085 (diff) | |
| parent | 90a8a73c06cc32b609a880d48449d7083327e11a (diff) | |
| download | olio-linux-3.10-4b7bd364700d9ac8372eff48832062b936d0793b.tar.xz olio-linux-3.10-4b7bd364700d9ac8372eff48832062b936d0793b.zip  | |
Merge branch 'master' into for-next
Conflicts:
	MAINTAINERS
	arch/arm/mach-omap2/pm24xx.c
	drivers/scsi/bfa/bfa_fcpim.c
Needed to update to apply fixes for which the old branch was too
outdated.
Diffstat (limited to 'kernel')
33 files changed, 658 insertions, 291 deletions
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 7242cc71bb7..bd3e8e29caa 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50];  #define for_each_kdbcmd(cmd, num)					\  	for ((cmd) = kdb_base_commands, (num) = 0;			\  	     num < kdb_max_commands;					\ -	     num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) +	     num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++)  typedef struct _kdbmsg {  	int	km_diag;	/* kdb diagnostic */ @@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)  	}  	if (!s->usable)  		return KDB_NOTIMP; -	s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); +	s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);  	if (!s->command) {  		kdb_printf("Could not allocate new kdb_defcmd table for %s\n",  			   cmdstr); @@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv)   */  static int kdb_ll(int argc, const char **argv)  { -	int diag; +	int diag = 0;  	unsigned long addr;  	long offset = 0;  	unsigned long va; @@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv)  		char buf[80];  		if (KDB_FLAG(CMD_INTERRUPT)) -			return 0; +			goto out;  		sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);  		diag = kdb_parse(buf);  		if (diag) -			return diag; +			goto out;  		addr = va + linkoffset;  		if (kdb_getword(&va, addr, sizeof(va))) -			return 0; +			goto out;  	} -	kfree(command); -	return 0; +out: +	kfree(command); +	return diag;  }  static int kdb_kgdb(int argc, const char **argv) @@ -2739,13 +2740,13 @@ int kdb_register_repeat(char *cmd,  		}  		if (kdb_commands) {  			memcpy(new, kdb_commands, -			       kdb_max_commands * sizeof(*new)); +			  (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new));  			kfree(kdb_commands);  		}  		memset(new + kdb_max_commands, 0,  		       kdb_command_extend * sizeof(*new));  		kdb_commands = new; -		kp = kdb_commands + kdb_max_commands; +		kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX;  		kdb_max_commands += kdb_command_extend;  	} diff --git a/kernel/exit.c b/kernel/exit.c index b194febf579..676149a4ac5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -96,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk)  		sig->tty = NULL;  	} else {  		/* +		 * This can only happen if the caller is de_thread(). +		 * FIXME: this is the temporary hack, we should teach +		 * posix-cpu-timers to handle this case correctly. +		 */ +		if (unlikely(has_group_leader_pid(tsk))) +			posix_cpu_timers_exit_group(tsk); + +		/*  		 * If there is any task waiting for the group exit  		 * then notify it:  		 */ @@ -906,6 +914,15 @@ NORET_TYPE void do_exit(long code)  	if (unlikely(!tsk->pid))  		panic("Attempted to kill the idle task!"); +	/* +	 * If do_exit is called because this processes oopsed, it's possible +	 * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before +	 * continuing. Amongst other possible reasons, this is to prevent +	 * mm_release()->clear_child_tid() from writing to a user-controlled +	 * kernel address. +	 */ +	set_fs(USER_DS); +  	tracehook_report_exit(&code);  	validate_creds_for_do_exit(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 3b159c5991b..5447dc7defa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)  	setup_thread_stack(tsk, orig);  	clear_user_return_notifier(tsk); +	clear_tsk_need_resched(tsk);  	stackend = end_of_stack(tsk);  	*stackend = STACK_END_MAGIC;	/* for overflow detection */ diff --git a/kernel/futex.c b/kernel/futex.c index 6c683b37f2c..40a8777a27d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr)  {  	struct robust_list_head __user *head = curr->robust_list;  	struct robust_list __user *entry, *next_entry, *pending; -	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; +	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +	unsigned int uninitialized_var(next_pi);  	unsigned long futex_offset;  	int rc; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4dfc339..a7934ac75e5 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr)  {  	struct compat_robust_list_head __user *head = curr->compat_robust_list;  	struct robust_list __user *entry, *next_entry, *pending; -	unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; +	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; +	unsigned int uninitialized_var(next_pi);  	compat_uptr_t uentry, next_uentry, upending;  	compat_long_t futex_offset;  	int rc; diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afc..e5325825aeb 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {  	.read		= hw_breakpoint_pmu_read,  }; -static int __init init_hw_breakpoint(void) +int __init init_hw_breakpoint(void)  {  	unsigned int **task_bp_pinned;  	int cpu, err_cpu; @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)  	return -ENOMEM;  } -core_initcall(init_hw_breakpoint); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5fa36..5f92acc5f95 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -324,6 +324,10 @@ void enable_irq(unsigned int irq)  	if (!desc)  		return; +	if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable, +	    KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) +		return; +  	chip_bus_lock(desc);  	raw_spin_lock_irqsave(&desc->lock, flags);  	__enable_irq(desc, irq, false); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 01b1d3a8898..6c8a2a9f8a7 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)  static int irq_spurious_proc_open(struct inode *inode, struct file *file)  { -	return single_open(file, irq_spurious_proc_show, NULL); +	return single_open(file, irq_spurious_proc_show, PDE(inode)->data);  }  static const struct file_operations irq_spurious_proc_fops = { diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff848..90f881904bb 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -145,7 +145,9 @@ void irq_work_run(void)  		 * Clear the BUSY bit and return to the free state if  		 * no-one else claimed it meanwhile.  		 */ -		cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); +		(void)cmpxchg(&entry->next, +			      next_flags(NULL, IRQ_WORK_BUSY), +			      NULL);  	}  }  EXPORT_SYMBOL_GPL(irq_work_run); diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 877fb306d41..17110a4a4fc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)  	account_global_scheduler_latency(tsk, &lat); -	/* -	 * short term hack; if we're > 32 we stop; future we recycle: -	 */ -	tsk->latency_record_count++; -	if (tsk->latency_record_count >= LT_SAVECOUNT) -		goto out_unlock; - -	for (i = 0; i < LT_SAVECOUNT; i++) { +	for (i = 0; i < tsk->latency_record_count; i++) {  		struct latency_record *mylat;  		int same = 1; @@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)  		}  	} +	/* +	 * short term hack; if we're > 32 we stop; future we recycle: +	 */ +	if (tsk->latency_record_count >= LT_SAVECOUNT) +		goto out_unlock; +  	/* Allocated a new one: */ -	i = tsk->latency_record_count; +	i = tsk->latency_record_count++;  	memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));  out_unlock: diff --git a/kernel/module.c b/kernel/module.c index 437a74a7524..d190664f25f 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)  	kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *  			   mod->num_trace_events, GFP_KERNEL);  #endif +#ifdef CONFIG_TRACING +	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", +					 sizeof(*mod->trace_bprintk_fmt_start), +					 &mod->num_trace_bprintk_fmt); +	/* +	 * This section contains pointers to allocated objects in the trace +	 * code and not scanning it leads to false positives. +	 */ +	kmemleak_scan_area(mod->trace_bprintk_fmt_start, +			   sizeof(*mod->trace_bprintk_fmt_start) * +			   mod->num_trace_bprintk_fmt, GFP_KERNEL); +#endif  #ifdef CONFIG_FTRACE_MCOUNT_RECORD  	/* sechdrs[0].sh_size is always zero */  	mod->ftrace_callsites = section_objs(info, "__mcount_loc", diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 06682e7b12e..83d8fd991c8 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,6 +31,7 @@  #include <linux/kernel_stat.h>  #include <linux/perf_event.h>  #include <linux/ftrace_event.h> +#include <linux/hw_breakpoint.h>  #include <asm/irq_regs.h> @@ -674,6 +675,8 @@ event_sched_in(struct perf_event *event,  	event->tstamp_running += ctx->time - event->tstamp_stopped; +	event->shadow_ctx_time = ctx->time - ctx->timestamp; +  	if (!is_software_event(event))  		cpuctx->active_oncpu++;  	ctx->nr_active++; @@ -1284,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,  {  	int ctxn; -	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); -  	for_each_task_context_nr(ctxn)  		perf_event_context_sched_out(task, ctxn, next);  } @@ -1619,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)  {  	raw_spin_lock(&ctx->lock); -	/* Rotate the first entry last of non-pinned groups */ -	list_rotate_left(&ctx->flexible_groups); +	/* +	 * Rotate the first entry last of non-pinned groups. Rotation might be +	 * disabled by the inheritance code. +	 */ +	if (!ctx->rotate_disable) +		list_rotate_left(&ctx->flexible_groups);  	raw_spin_unlock(&ctx->lock);  } @@ -2232,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)  	raw_spin_unlock_irq(&ctx->lock);  	mutex_unlock(&ctx->mutex); -	mutex_lock(&event->owner->perf_event_mutex); -	list_del_init(&event->owner_entry); -	mutex_unlock(&event->owner->perf_event_mutex); -	put_task_struct(event->owner); -  	free_event(event);  	return 0; @@ -2249,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);  static int perf_release(struct inode *inode, struct file *file)  {  	struct perf_event *event = file->private_data; +	struct task_struct *owner;  	file->private_data = NULL; +	rcu_read_lock(); +	owner = ACCESS_ONCE(event->owner); +	/* +	 * Matches the smp_wmb() in perf_event_exit_task(). If we observe +	 * !owner it means the list deletion is complete and we can indeed +	 * free this event, otherwise we need to serialize on +	 * owner->perf_event_mutex. +	 */ +	smp_read_barrier_depends(); +	if (owner) { +		/* +		 * Since delayed_put_task_struct() also drops the last +		 * task reference we can safely take a new reference +		 * while holding the rcu_read_lock(). +		 */ +		get_task_struct(owner); +	} +	rcu_read_unlock(); + +	if (owner) { +		mutex_lock(&owner->perf_event_mutex); +		/* +		 * We have to re-check the event->owner field, if it is cleared +		 * we raced with perf_event_exit_task(), acquiring the mutex +		 * ensured they're done, and we can proceed with freeing the +		 * event. +		 */ +		if (event->owner) +			list_del_init(&event->owner_entry); +		mutex_unlock(&owner->perf_event_mutex); +		put_task_struct(owner); +	} +  	return perf_event_release_kernel(event);  } @@ -3396,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)  }  static void perf_output_read_one(struct perf_output_handle *handle, -				 struct perf_event *event) +				 struct perf_event *event, +				 u64 enabled, u64 running)  {  	u64 read_format = event->attr.read_format;  	u64 values[4]; @@ -3404,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,  	values[n++] = perf_event_count(event);  	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { -		values[n++] = event->total_time_enabled + +		values[n++] = enabled +  			atomic64_read(&event->child_total_time_enabled);  	}  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { -		values[n++] = event->total_time_running + +		values[n++] = running +  			atomic64_read(&event->child_total_time_running);  	}  	if (read_format & PERF_FORMAT_ID) @@ -3421,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,   * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.   */  static void perf_output_read_group(struct perf_output_handle *handle, -			    struct perf_event *event) +			    struct perf_event *event, +			    u64 enabled, u64 running)  {  	struct perf_event *leader = event->group_leader, *sub;  	u64 read_format = event->attr.read_format; @@ -3431,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,  	values[n++] = 1 + leader->nr_siblings;  	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) -		values[n++] = leader->total_time_enabled; +		values[n++] = enabled;  	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) -		values[n++] = leader->total_time_running; +		values[n++] = running;  	if (leader != event)  		leader->pmu->read(leader); @@ -3459,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,  	}  } +#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ +				 PERF_FORMAT_TOTAL_TIME_RUNNING) +  static void perf_output_read(struct perf_output_handle *handle,  			     struct perf_event *event)  { +	u64 enabled = 0, running = 0, now, ctx_time; +	u64 read_format = event->attr.read_format; + +	/* +	 * compute total_time_enabled, total_time_running +	 * based on snapshot values taken when the event +	 * was last scheduled in. +	 * +	 * we cannot simply called update_context_time() +	 * because of locking issue as we are called in +	 * NMI context +	 */ +	if (read_format & PERF_FORMAT_TOTAL_TIMES) { +		now = perf_clock(); +		ctx_time = event->shadow_ctx_time + now; +		enabled = ctx_time - event->tstamp_enabled; +		running = ctx_time - event->tstamp_running; +	} +  	if (event->attr.read_format & PERF_FORMAT_GROUP) -		perf_output_read_group(handle, event); +		perf_output_read_group(handle, event, enabled, running);  	else -		perf_output_read_one(handle, event); +		perf_output_read_one(handle, event, enabled, running);  }  void perf_output_sample(struct perf_output_handle *handle, @@ -3766,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event)  	rcu_read_lock();  	list_for_each_entry_rcu(pmu, &pmus, entry) {  		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); +		if (cpuctx->active_pmu != pmu) +			goto next;  		perf_event_task_ctx(&cpuctx->ctx, task_event);  		ctx = task_event->task_ctx; @@ -3901,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)  	rcu_read_lock();  	list_for_each_entry_rcu(pmu, &pmus, entry) {  		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); +		if (cpuctx->active_pmu != pmu) +			goto next;  		perf_event_comm_ctx(&cpuctx->ctx, comm_event);  		ctxn = pmu->task_ctx_nr; @@ -4086,6 +4148,8 @@ got_name:  	rcu_read_lock();  	list_for_each_entry_rcu(pmu, &pmus, entry) {  		cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); +		if (cpuctx->active_pmu != pmu) +			goto next;  		perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,  					vma->vm_flags & VM_EXEC); @@ -4655,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event)  		break;  	} -	if (event_id > PERF_COUNT_SW_MAX) +	if (event_id >= PERF_COUNT_SW_MAX)  		return -ENOENT;  	if (!event->parent) { @@ -5087,20 +5151,36 @@ static void *find_pmu_context(int ctxn)  	return NULL;  } -static void free_pmu_context(void * __percpu cpu_context) +static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)  { -	struct pmu *pmu; +	int cpu; + +	for_each_possible_cpu(cpu) { +		struct perf_cpu_context *cpuctx; + +		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + +		if (cpuctx->active_pmu == old_pmu) +			cpuctx->active_pmu = pmu; +	} +} + +static void free_pmu_context(struct pmu *pmu) +{ +	struct pmu *i;  	mutex_lock(&pmus_lock);  	/*  	 * Like a real lame refcount.  	 */ -	list_for_each_entry(pmu, &pmus, entry) { -		if (pmu->pmu_cpu_context == cpu_context) +	list_for_each_entry(i, &pmus, entry) { +		if (i->pmu_cpu_context == pmu->pmu_cpu_context) { +			update_pmu_context(i, pmu);  			goto out; +		}  	} -	free_percpu(cpu_context); +	free_percpu(pmu->pmu_cpu_context);  out:  	mutex_unlock(&pmus_lock);  } @@ -5132,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu)  		cpuctx->ctx.pmu = pmu;  		cpuctx->jiffies_interval = 1;  		INIT_LIST_HEAD(&cpuctx->rotation_list); +		cpuctx->active_pmu = pmu;  	}  got_cpu_context: @@ -5183,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu)  	synchronize_rcu();  	free_percpu(pmu->pmu_disable_count); -	free_pmu_context(pmu->pmu_cpu_context); +	free_pmu_context(pmu);  }  struct pmu *perf_init_event(struct perf_event *event) @@ -5651,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open,  	mutex_unlock(&ctx->mutex);  	event->owner = current; -	get_task_struct(current); +  	mutex_lock(¤t->perf_event_mutex);  	list_add_tail(&event->owner_entry, ¤t->perf_event_list);  	mutex_unlock(¤t->perf_event_mutex); @@ -5719,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,  	++ctx->generation;  	mutex_unlock(&ctx->mutex); -	event->owner = current; -	get_task_struct(current); -	mutex_lock(¤t->perf_event_mutex); -	list_add_tail(&event->owner_entry, ¤t->perf_event_list); -	mutex_unlock(¤t->perf_event_mutex); -  	return event;  err_free: @@ -5875,8 +5950,24 @@ again:   */  void perf_event_exit_task(struct task_struct *child)  { +	struct perf_event *event, *tmp;  	int ctxn; +	mutex_lock(&child->perf_event_mutex); +	list_for_each_entry_safe(event, tmp, &child->perf_event_list, +				 owner_entry) { +		list_del_init(&event->owner_entry); + +		/* +		 * Ensure the list deletion is visible before we clear +		 * the owner, closes a race against perf_release() where +		 * we need to serialize on the owner->perf_event_mutex. +		 */ +		smp_wmb(); +		event->owner = NULL; +	} +	mutex_unlock(&child->perf_event_mutex); +  	for_each_task_context_nr(ctxn)  		perf_event_exit_task_context(child, ctxn);  } @@ -6096,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  	struct perf_event *event;  	struct task_struct *parent = current;  	int inherited_all = 1; +	unsigned long flags;  	int ret = 0;  	child->perf_event_ctxp[ctxn] = NULL; @@ -6136,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  			break;  	} +	/* +	 * We can't hold ctx->lock when iterating the ->flexible_group list due +	 * to allocations, but we need to prevent rotation because +	 * rotate_ctx() will change the list from interrupt context. +	 */ +	raw_spin_lock_irqsave(&parent_ctx->lock, flags); +	parent_ctx->rotate_disable = 1; +	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +  	list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {  		ret = inherit_task_group(event, parent, parent_ctx,  					 child, ctxn, &inherited_all); @@ -6143,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)  			break;  	} +	raw_spin_lock_irqsave(&parent_ctx->lock, flags); +	parent_ctx->rotate_disable = 0; +	raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); +  	child_ctx = child->perf_event_ctxp[ctxn];  	if (child_ctx && inherited_all) { @@ -6295,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)  void __init perf_event_init(void)  { +	int ret; +  	perf_event_init_all_cpus();  	init_srcu_struct(&pmus_srcu);  	perf_pmu_register(&perf_swevent); @@ -6302,4 +6409,7 @@ void __init perf_event_init(void)  	perf_pmu_register(&perf_task_clock);  	perf_tp_register();  	perf_cpu_notifier(perf_cpu_notify); + +	ret = init_hw_breakpoint(); +	WARN(ret, "hw_breakpoint initialization failed with: %d", ret);  } diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index c7a8f453919..aeaa7f84682 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -121,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)  	switch (o->type) {  	case PM_QOS_MIN: -		return plist_last(&o->requests)->prio; +		return plist_first(&o->requests)->prio;  	case PM_QOS_MAX: -		return plist_first(&o->requests)->prio; +		return plist_last(&o->requests)->prio;  	default:  		/* runtime check for not using enum */ diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba587..05bb7173850 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock)  	if (pid == 0)  		return 0; -	read_lock(&tasklist_lock); +	rcu_read_lock();  	p = find_task_by_vpid(pid);  	if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? -		   same_thread_group(p, current) : thread_group_leader(p))) { +		   same_thread_group(p, current) : has_group_leader_pid(p))) {  		error = -EINVAL;  	} -	read_unlock(&tasklist_lock); +	rcu_read_unlock();  	return error;  } @@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  	INIT_LIST_HEAD(&new_timer->it.cpu.entry); -	read_lock(&tasklist_lock); +	rcu_read_lock();  	if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {  		if (pid == 0) {  			p = current; @@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  			p = current->group_leader;  		} else {  			p = find_task_by_vpid(pid); -			if (p && !thread_group_leader(p)) +			if (p && !has_group_leader_pid(p))  				p = NULL;  		}  	} @@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)  	} else {  		ret = -EINVAL;  	} -	read_unlock(&tasklist_lock); +	rcu_read_unlock();  	return ret;  } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff6117ab..a5aff3ebad3 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -246,9 +246,13 @@ config PM_OPS  	depends on PM_SLEEP || PM_RUNTIME  	default y +config ARCH_HAS_OPP +	bool +  config PM_OPP  	bool "Operating Performance Point (OPP) Layer library"  	depends on PM +	depends on ARCH_HAS_OPP  	---help---  	  SOCs have a standard set of tuples consisting of frequency and  	  voltage pairs that the device will support per voltage domain. This diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 491b81a2711..b6279be691a 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -327,7 +327,6 @@ static int create_image(int platform_mode)  int hibernation_snapshot(int platform_mode)  {  	int error; -	gfp_t saved_mask;  	error = platform_begin(platform_mode);  	if (error) @@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode)  		goto Close;  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	error = dpm_suspend_start(PMSG_FREEZE);  	if (error)  		goto Recover_platform; @@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode)  		goto Recover_platform;  	error = create_image(platform_mode); -	/* Control returns here after successful restore */ +	/* +	 * Control returns here (1) after the image has been created or the +	 * image creation has failed and (2) after a successful restore. +	 */   Resume_devices:  	/* We may need to release the preallocated image pages here. */ @@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode)  	dpm_resume_end(in_suspend ?  		(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); -	set_gfp_allowed_mask(saved_mask); + +	if (error || !in_suspend) +		pm_restore_gfp_mask(); +  	resume_console();   Close:  	platform_end(platform_mode); @@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode)  int hibernation_restore(int platform_mode)  {  	int error; -	gfp_t saved_mask;  	pm_prepare_console();  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	error = dpm_suspend_start(PMSG_QUIESCE);  	if (!error) {  		error = resume_target_kernel(platform_mode);  		dpm_resume_end(PMSG_RECOVER);  	} -	set_gfp_allowed_mask(saved_mask); +	pm_restore_gfp_mask();  	resume_console();  	pm_restore_console();  	return error; @@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode)  int hibernation_platform_enter(void)  {  	int error; -	gfp_t saved_mask;  	if (!hibernation_ops)  		return -ENOSYS; @@ -492,7 +495,6 @@ int hibernation_platform_enter(void)  	entering_platform_hibernation = true;  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS);  	error = dpm_suspend_start(PMSG_HIBERNATE);  	if (error) {  		if (hibernation_ops->recover) @@ -536,7 +538,6 @@ int hibernation_platform_enter(void)   Resume_devices:  	entering_platform_hibernation = false;  	dpm_resume_end(PMSG_RESTORE); -	set_gfp_allowed_mask(saved_mask);  	resume_console();   Close: @@ -646,6 +647,7 @@ int hibernate(void)  		swsusp_free();  		if (!error)  			power_down(); +		pm_restore_gfp_mask();  	} else {  		pr_debug("PM: Image restored successfully.\n");  	} diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 80051bdde6f..ea64ece26d9 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state)  int suspend_devices_and_enter(suspend_state_t state)  {  	int error; -	gfp_t saved_mask;  	if (!suspend_ops)  		return -ENOSYS; @@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state)  			goto Close;  	}  	suspend_console(); -	saved_mask = clear_gfp_allowed_mask(GFP_IOFS); +	pm_restrict_gfp_mask();  	suspend_test_start();  	error = dpm_suspend_start(PMSG_SUSPEND);  	if (error) { @@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state)  	suspend_test_start();  	dpm_resume_end(PMSG_RESUME);  	suspend_test_finish("resume devices"); -	set_gfp_allowed_mask(saved_mask); +	pm_restore_gfp_mask();  	resume_console();   Close:  	if (suspend_ops->end) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index cd09c22de03..69425889bd4 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -6,6 +6,7 @@   *   * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>   * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com>   *   * This file is released under the GPLv2.   * @@ -29,7 +30,7 @@  #include "power.h" -#define HIBERNATE_SIG	"LINHIB0001" +#define HIBERNATE_SIG	"S1SUSPEND"  /*   *	The swap map is a data structure used for keeping track of each page @@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle,  {  	unsigned int m;  	int error = 0; +	struct bio *bio;  	struct timeval start;  	struct timeval stop;  	unsigned nr_pages; -	size_t off, unc_len, cmp_len; -	unsigned char *unc, *cmp, *page; +	size_t i, off, unc_len, cmp_len; +	unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; -	page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); -	if (!page) { -		printk(KERN_ERR "PM: Failed to allocate LZO page\n"); -		return -ENOMEM; +	for (i = 0; i < LZO_CMP_PAGES; i++) { +		page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); +		if (!page[i]) { +			printk(KERN_ERR "PM: Failed to allocate LZO page\n"); + +			while (i) +				free_page((unsigned long)page[--i]); + +			return -ENOMEM; +		}  	}  	unc = vmalloc(LZO_UNC_SIZE);  	if (!unc) {  		printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); -		free_page((unsigned long)page); + +		for (i = 0; i < LZO_CMP_PAGES; i++) +			free_page((unsigned long)page[i]); +  		return -ENOMEM;  	}  	cmp = vmalloc(LZO_CMP_SIZE);  	if (!cmp) {  		printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); +  		vfree(unc); -		free_page((unsigned long)page); +		for (i = 0; i < LZO_CMP_PAGES; i++) +			free_page((unsigned long)page[i]); +  		return -ENOMEM;  	} @@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle,  	if (!m)  		m = 1;  	nr_pages = 0; +	bio = NULL;  	do_gettimeofday(&start);  	error = snapshot_write_next(snapshot); @@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle,  		goto out_finish;  	for (;;) { -		error = swap_read_page(handle, page, NULL); /* sync */ +		error = swap_read_page(handle, page[0], NULL); /* sync */  		if (error)  			break; -		cmp_len = *(size_t *)page; +		cmp_len = *(size_t *)page[0];  		if (unlikely(!cmp_len ||  		             cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) {  			printk(KERN_ERR "PM: Invalid LZO compressed length\n"); @@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle,  			break;  		} -		memcpy(cmp, page, PAGE_SIZE); -		for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { -			error = swap_read_page(handle, page, NULL); /* sync */ +		for (off = PAGE_SIZE, i = 1; +		     off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { +			error = swap_read_page(handle, page[i], &bio);  			if (error)  				goto out_finish; +		} -			memcpy(cmp + off, page, PAGE_SIZE); +		error = hib_wait_on_bio_chain(&bio); /* need all data now */ +		if (error) +			goto out_finish; + +		for (off = 0, i = 0; +		     off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { +			memcpy(cmp + off, page[i], PAGE_SIZE);  		}  		unc_len = LZO_UNC_SIZE; @@ -857,7 +879,8 @@ out_finish:  	vfree(cmp);  	vfree(unc); -	free_page((unsigned long)page); +	for (i = 0; i < LZO_CMP_PAGES; i++) +		free_page((unsigned long)page[i]);  	return error;  } diff --git a/kernel/power/user.c b/kernel/power/user.c index e819e17877c..c36c3b9e8a8 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)  	free_all_swap_pages(data->swap);  	if (data->frozen)  		thaw_processes(); -	pm_notifier_call_chain(data->mode == O_WRONLY ? +	pm_notifier_call_chain(data->mode == O_RDONLY ?  			PM_POST_HIBERNATION : PM_POST_RESTORE);  	atomic_inc(&snapshot_device_available); @@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  	case SNAPSHOT_UNFREEZE:  		if (!data->frozen || data->ready)  			break; +		pm_restore_gfp_mask();  		thaw_processes();  		usermodehelper_enable();  		data->frozen = 0; @@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,  			error = -EPERM;  			break;  		} +		pm_restore_gfp_mask();  		error = hibernation_snapshot(data->platform_support);  		if (!error)  			error = put_user(in_suspend, (int __user *)arg); diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c37..a23315dc449 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -261,6 +261,12 @@ static inline void boot_delay_msec(void)  }  #endif +#ifdef CONFIG_SECURITY_DMESG_RESTRICT +int dmesg_restrict = 1; +#else +int dmesg_restrict; +#endif +  int do_syslog(int type, char __user *buf, int len, bool from_file)  {  	unsigned i, j, limit, count; @@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)  	char c;  	int error = 0; -	error = security_syslog(type, from_file); +	/* +	 * If this is from /proc/kmsg we only do the capabilities checks +	 * at open time. +	 */ +	if (type == SYSLOG_ACTION_OPEN || !from_file) { +		if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) +			return -EPERM; +		if ((type != SYSLOG_ACTION_READ_ALL && +		     type != SYSLOG_ACTION_SIZE_BUFFER) && +		    !capable(CAP_SYS_ADMIN)) +			return -EPERM; +	} + +	error = security_syslog(type);  	if (error)  		return error; @@ -1063,13 +1082,15 @@ void printk_tick(void)  int printk_needs_cpu(int cpu)  { +	if (unlikely(cpu_is_offline(cpu))) +		printk_tick();  	return per_cpu(printk_pending, cpu);  }  void wake_up_klogd(void)  {  	if (waitqueue_active(&log_wait)) -		__raw_get_cpu_var(printk_pending) = 1; +		this_cpu_write(printk_pending, 1);  }  /** diff --git a/kernel/range.c b/kernel/range.c index 471b66acabb..37fa9b99ad5 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2)  int clean_sort_range(struct range *range, int az)  { -	int i, j, k = az - 1, nr_range = 0; +	int i, j, k = az - 1, nr_range = az;  	for (i = 0; i < k; i++) {  		if (range[i].end) diff --git a/kernel/relay.c b/kernel/relay.c index c7cf397fb92..859ea5a9605 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = {   */  static struct page **relay_alloc_page_array(unsigned int n_pages)  { -	struct page **array; -	size_t pa_size = n_pages * sizeof(struct page *); - -	if (pa_size > PAGE_SIZE) { -		array = vmalloc(pa_size); -		if (array) -			memset(array, 0, pa_size); -	} else { -		array = kzalloc(pa_size, GFP_KERNEL); -	} -	return array; +	const size_t pa_size = n_pages * sizeof(struct page *); +	if (pa_size > PAGE_SIZE) +		return vzalloc(pa_size); +	return kzalloc(pa_size, GFP_KERNEL);  }  /* diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0d..798e2fae2a0 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource);  static DEFINE_RWLOCK(resource_lock); -/* - * By default, we allocate free space bottom-up.  The architecture can request - * top-down by clearing this flag.  The user can override the architecture's - * choice with the "resource_alloc_from_bottom" kernel boot option, but that - * should only be a debugging tool. - */ -int resource_alloc_from_bottom = 1; - -static __init int setup_alloc_from_bottom(char *s) -{ -	printk(KERN_INFO -	       "resource: allocating from bottom-up; please report a bug\n"); -	resource_alloc_from_bottom = 1; -	return 0; -} -early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); -  static void *r_next(struct seq_file *m, void *v, loff_t *pos)  {  	struct resource *p = v; @@ -374,6 +357,10 @@ int __weak page_is_ram(unsigned long pfn)  	return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;  } +void __weak arch_remove_reservations(struct resource *avail) +{ +} +  static resource_size_t simple_align_resource(void *data,  					     const struct resource *avail,  					     resource_size_t size, @@ -397,74 +384,7 @@ static bool resource_contains(struct resource *res1, struct resource *res2)  }  /* - * Find the resource before "child" in the sibling list of "root" children. - */ -static struct resource *find_sibling_prev(struct resource *root, struct resource *child) -{ -	struct resource *this; - -	for (this = root->child; this; this = this->sibling) -		if (this->sibling == child) -			return this; - -	return NULL; -} - -/*   * Find empty slot in the resource tree given range and alignment. - * This version allocates from the end of the root resource first. - */ -static int find_resource_from_top(struct resource *root, struct resource *new, -				  resource_size_t size, resource_size_t min, -				  resource_size_t max, resource_size_t align, -				  resource_size_t (*alignf)(void *, -						   const struct resource *, -						   resource_size_t, -						   resource_size_t), -				  void *alignf_data) -{ -	struct resource *this; -	struct resource tmp, avail, alloc; - -	tmp.start = root->end; -	tmp.end = root->end; - -	this = find_sibling_prev(root, NULL); -	for (;;) { -		if (this) { -			if (this->end < root->end) -				tmp.start = this->end + 1; -		} else -			tmp.start = root->start; - -		resource_clip(&tmp, min, max); - -		/* Check for overflow after ALIGN() */ -		avail = *new; -		avail.start = ALIGN(tmp.start, align); -		avail.end = tmp.end; -		if (avail.start >= tmp.start) { -			alloc.start = alignf(alignf_data, &avail, size, align); -			alloc.end = alloc.start + size - 1; -			if (resource_contains(&avail, &alloc)) { -				new->start = alloc.start; -				new->end = alloc.end; -				return 0; -			} -		} - -		if (!this || this->start == root->start) -			break; - -		tmp.end = this->start - 1; -		this = find_sibling_prev(root, this); -	} -	return -EBUSY; -} - -/* - * Find empty slot in the resource tree given range and alignment. - * This version allocates from the beginning of the root resource first.   */  static int find_resource(struct resource *root, struct resource *new,  			 resource_size_t size, resource_size_t min, @@ -478,23 +398,24 @@ static int find_resource(struct resource *root, struct resource *new,  	struct resource *this = root->child;  	struct resource tmp = *new, avail, alloc; +	tmp.flags = new->flags;  	tmp.start = root->start;  	/* -	 * Skip past an allocated resource that starts at 0, since the -	 * assignment of this->start - 1 to tmp->end below would cause an -	 * underflow. +	 * Skip past an allocated resource that starts at 0, since the assignment +	 * of this->start - 1 to tmp->end below would cause an underflow.  	 */  	if (this && this->start == 0) {  		tmp.start = this->end + 1;  		this = this->sibling;  	} -	for (;;) { +	for(;;) {  		if (this)  			tmp.end = this->start - 1;  		else  			tmp.end = root->end;  		resource_clip(&tmp, min, max); +		arch_remove_reservations(&tmp);  		/* Check for overflow after ALIGN() */  		avail = *new; @@ -509,10 +430,8 @@ static int find_resource(struct resource *root, struct resource *new,  				return 0;  			}  		} -  		if (!this)  			break; -  		tmp.start = this->end + 1;  		this = this->sibling;  	} @@ -545,10 +464,7 @@ int allocate_resource(struct resource *root, struct resource *new,  		alignf = simple_align_resource;  	write_lock(&resource_lock); -	if (resource_alloc_from_bottom) -		err = find_resource(root, new, size, min, max, align, alignf, alignf_data); -	else -		err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); +	err = find_resource(root, new, size, min, max, align, alignf, alignf_data);  	if (err >= 0 && __request_resource(root, new))  		err = -EBUSY;  	write_unlock(&resource_lock); diff --git a/kernel/sched.c b/kernel/sched.c index 554c0d6c489..f89fb67818d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -560,18 +560,8 @@ struct rq {  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -static inline -void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) -{ -	rq->curr->sched_class->check_preempt_curr(rq, p, flags); -	/* -	 * A queue event has occurred, and we're going to schedule.  In -	 * this case, we can save a useless back to back clock update. -	 */ -	if (test_tsk_need_resched(p)) -		rq->skip_clock_update = 1; -} +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);  static inline int cpu_of(struct rq *rq)  { @@ -646,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p)  #endif /* CONFIG_CGROUP_SCHED */ -static u64 irq_time_cpu(int cpu); -static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); +static void update_rq_clock_task(struct rq *rq, s64 delta); -inline void update_rq_clock(struct rq *rq) +static void update_rq_clock(struct rq *rq)  { -	if (!rq->skip_clock_update) { -		int cpu = cpu_of(rq); -		u64 irq_time; +	s64 delta; -		rq->clock = sched_clock_cpu(cpu); -		irq_time = irq_time_cpu(cpu); -		if (rq->clock - irq_time > rq->clock_task) -			rq->clock_task = rq->clock - irq_time; +	if (rq->skip_clock_update) +		return; -		sched_irq_time_avg_update(rq, irq_time); -	} +	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; +	rq->clock += delta; +	update_rq_clock_task(rq, delta);  }  /* @@ -1934,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)   * They are read and saved off onto struct rq in update_rq_clock().   * This may result in other CPU reading this CPU's irq time and can   * race with irq/account_system_vtime on this CPU. We would either get old - * or new value (or semi updated value on 32 bit) with a side effect of - * accounting a slice of irq time to wrong task when irq is in progress - * while we read rq->clock. That is a worthy compromise in place of having - * locks on each irq in account_system_time. + * or new value with a side effect of accounting a slice of irq time to wrong + * task when irq is in progress while we read rq->clock. That is a worthy + * compromise in place of having locks on each irq in account_system_time.   */  static DEFINE_PER_CPU(u64, cpu_hardirq_time);  static DEFINE_PER_CPU(u64, cpu_softirq_time); @@ -1955,19 +1940,58 @@ void disable_sched_clock_irqtime(void)  	sched_clock_irqtime = 0;  } -static u64 irq_time_cpu(int cpu) +#ifndef CONFIG_64BIT +static DEFINE_PER_CPU(seqcount_t, irq_time_seq); + +static inline void irq_time_write_begin(void)  { -	if (!sched_clock_irqtime) -		return 0; +	__this_cpu_inc(irq_time_seq.sequence); +	smp_wmb(); +} +static inline void irq_time_write_end(void) +{ +	smp_wmb(); +	__this_cpu_inc(irq_time_seq.sequence); +} + +static inline u64 irq_time_read(int cpu) +{ +	u64 irq_time; +	unsigned seq; + +	do { +		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); +		irq_time = per_cpu(cpu_softirq_time, cpu) + +			   per_cpu(cpu_hardirq_time, cpu); +	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); + +	return irq_time; +} +#else /* CONFIG_64BIT */ +static inline void irq_time_write_begin(void) +{ +} + +static inline void irq_time_write_end(void) +{ +} + +static inline u64 irq_time_read(int cpu) +{  	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);  } +#endif /* CONFIG_64BIT */ +/* + * Called before incrementing preempt_count on {soft,}irq_enter + * and before decrementing preempt_count on {soft,}irq_exit. + */  void account_system_vtime(struct task_struct *curr)  {  	unsigned long flags; +	s64 delta;  	int cpu; -	u64 now, delta;  	if (!sched_clock_irqtime)  		return; @@ -1975,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr)  	local_irq_save(flags);  	cpu = smp_processor_id(); -	now = sched_clock_cpu(cpu); -	delta = now - per_cpu(irq_start_time, cpu); -	per_cpu(irq_start_time, cpu) = now; +	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); +	__this_cpu_add(irq_start_time, delta); + +	irq_time_write_begin();  	/*  	 * We do not account for softirq time from ksoftirqd here.  	 * We want to continue accounting softirq time to ksoftirqd thread @@ -1985,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr)  	 * that do not consume any time, but still wants to run.  	 */  	if (hardirq_count()) -		per_cpu(cpu_hardirq_time, cpu) += delta; +		__this_cpu_add(cpu_hardirq_time, delta);  	else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) -		per_cpu(cpu_softirq_time, cpu) += delta; +		__this_cpu_add(cpu_softirq_time, delta); +	irq_time_write_end();  	local_irq_restore(flags);  }  EXPORT_SYMBOL_GPL(account_system_vtime); -static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) +static void update_rq_clock_task(struct rq *rq, s64 delta)  { -	if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { -		u64 delta_irq = curr_irq_time - rq->prev_irq_time; -		rq->prev_irq_time = curr_irq_time; -		sched_rt_avg_update(rq, delta_irq); -	} +	s64 irq_delta; + +	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; + +	/* +	 * Since irq_time is only updated on {soft,}irq_exit, we might run into +	 * this case when a previous update_rq_clock() happened inside a +	 * {soft,}irq region. +	 * +	 * When this happens, we stop ->clock_task and only update the +	 * prev_irq_time stamp to account for the part that fit, so that a next +	 * update will consume the rest. This ensures ->clock_task is +	 * monotonic. +	 * +	 * It does however cause some slight miss-attribution of {soft,}irq +	 * time, a more accurate solution would be to update the irq_time using +	 * the current rq->clock timestamp, except that would require using +	 * atomic ops. +	 */ +	if (irq_delta > delta) +		irq_delta = delta; + +	rq->prev_irq_time += irq_delta; +	delta -= irq_delta; +	rq->clock_task += delta; + +	if (irq_delta && sched_feat(NONIRQ_POWER)) +		sched_rt_avg_update(rq, irq_delta);  } -#else +#else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static u64 irq_time_cpu(int cpu) +static void update_rq_clock_task(struct rq *rq, s64 delta)  { -	return 0; +	rq->clock_task += delta;  } -static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } - -#endif +#endif /* CONFIG_IRQ_TIME_ACCOUNTING */  #include "sched_idletask.c"  #include "sched_fair.c" @@ -2118,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,  		p->sched_class->prio_changed(rq, p, oldprio, running);  } +static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) +{ +	const struct sched_class *class; + +	if (p->sched_class == rq->curr->sched_class) { +		rq->curr->sched_class->check_preempt_curr(rq, p, flags); +	} else { +		for_each_class(class) { +			if (class == rq->curr->sched_class) +				break; +			if (class == p->sched_class) { +				resched_task(rq->curr); +				break; +			} +		} +	} + +	/* +	 * A queue event has occurred, and we're going to schedule.  In +	 * this case, we can save a useless back to back clock update. +	 */ +	if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) +		rq->skip_clock_update = 1; +} +  #ifdef CONFIG_SMP  /*   * Is this task likely cache-hot: @@ -3104,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq)  	return delta;  } +static unsigned long +calc_load(unsigned long load, unsigned long exp, unsigned long active) +{ +	load *= exp; +	load += active * (FIXED_1 - exp); +	load += 1UL << (FSHIFT - 1); +	return load >> FSHIFT; +} +  #ifdef CONFIG_NO_HZ  /*   * For NO_HZ we delay the active fold to the next LOAD_FREQ update. @@ -3133,6 +3214,128 @@ static long calc_load_fold_idle(void)  	return delta;  } + +/** + * fixed_power_int - compute: x^n, in O(log n) time + * + * @x:         base of the power + * @frac_bits: fractional bits of @x + * @n:         power to raise @x to. + * + * By exploiting the relation between the definition of the natural power + * function: x^n := x*x*...*x (x multiplied by itself for n times), and + * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, + * (where: n_i \elem {0, 1}, the binary vector representing n), + * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is + * of course trivially computable in O(log_2 n), the length of our binary + * vector. + */ +static unsigned long +fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) +{ +	unsigned long result = 1UL << frac_bits; + +	if (n) for (;;) { +		if (n & 1) { +			result *= x; +			result += 1UL << (frac_bits - 1); +			result >>= frac_bits; +		} +		n >>= 1; +		if (!n) +			break; +		x *= x; +		x += 1UL << (frac_bits - 1); +		x >>= frac_bits; +	} + +	return result; +} + +/* + * a1 = a0 * e + a * (1 - e) + * + * a2 = a1 * e + a * (1 - e) + *    = (a0 * e + a * (1 - e)) * e + a * (1 - e) + *    = a0 * e^2 + a * (1 - e) * (1 + e) + * + * a3 = a2 * e + a * (1 - e) + *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) + *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2) + * + *  ... + * + * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] + *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) + *    = a0 * e^n + a * (1 - e^n) + * + * [1] application of the geometric series: + * + *              n         1 - x^(n+1) + *     S_n := \Sum x^i = ------------- + *             i=0          1 - x + */ +static unsigned long +calc_load_n(unsigned long load, unsigned long exp, +	    unsigned long active, unsigned int n) +{ + +	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); +} + +/* + * NO_HZ can leave us missing all per-cpu ticks calling + * calc_load_account_active(), but since an idle CPU folds its delta into + * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold + * in the pending idle delta if our idle period crossed a load cycle boundary. + * + * Once we've updated the global active value, we need to apply the exponential + * weights adjusted to the number of cycles missed. + */ +static void calc_global_nohz(unsigned long ticks) +{ +	long delta, active, n; + +	if (time_before(jiffies, calc_load_update)) +		return; + +	/* +	 * If we crossed a calc_load_update boundary, make sure to fold +	 * any pending idle changes, the respective CPUs might have +	 * missed the tick driven calc_load_account_active() update +	 * due to NO_HZ. +	 */ +	delta = calc_load_fold_idle(); +	if (delta) +		atomic_long_add(delta, &calc_load_tasks); + +	/* +	 * If we were idle for multiple load cycles, apply them. +	 */ +	if (ticks >= LOAD_FREQ) { +		n = ticks / LOAD_FREQ; + +		active = atomic_long_read(&calc_load_tasks); +		active = active > 0 ? active * FIXED_1 : 0; + +		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); +		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); +		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + +		calc_load_update += n * LOAD_FREQ; +	} + +	/* +	 * Its possible the remainder of the above division also crosses +	 * a LOAD_FREQ period, the regular check in calc_global_load() +	 * which comes after this will take care of that. +	 * +	 * Consider us being 11 ticks before a cycle completion, and us +	 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will +	 * age us 4 cycles, and the test in calc_global_load() will +	 * pick up the final one. +	 */ +}  #else  static void calc_load_account_idle(struct rq *this_rq)  { @@ -3142,6 +3345,10 @@ static inline long calc_load_fold_idle(void)  {  	return 0;  } + +static void calc_global_nohz(unsigned long ticks) +{ +}  #endif  /** @@ -3159,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)  	loads[2] = (avenrun[2] + offset) << shift;  } -static unsigned long -calc_load(unsigned long load, unsigned long exp, unsigned long active) -{ -	load *= exp; -	load += active * (FIXED_1 - exp); -	return load >> FSHIFT; -} -  /*   * calc_load - update the avenrun load estimates 10 ticks after the   * CPUs have updated calc_load_tasks.   */ -void calc_global_load(void) +void calc_global_load(unsigned long ticks)  { -	unsigned long upd = calc_load_update + 10;  	long active; -	if (time_before(jiffies, upd)) +	calc_global_nohz(ticks); + +	if (time_before(jiffies, calc_load_update + 10))  		return;  	active = atomic_long_read(&calc_load_tasks); @@ -3830,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)  {  	if (prev->se.on_rq)  		update_rq_clock(rq); -	rq->skip_clock_update = 0;  	prev->sched_class->put_prev_task(rq, prev);  } @@ -3888,7 +4087,6 @@ need_resched_nonpreemptible:  		hrtick_clear(rq);  	raw_spin_lock_irq(&rq->lock); -	clear_tsk_need_resched(prev);  	switch_count = &prev->nivcsw;  	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { @@ -3920,6 +4118,8 @@ need_resched_nonpreemptible:  	put_prev_task(rq, prev);  	next = pick_next_task(rq); +	clear_tsk_need_resched(prev); +	rq->skip_clock_update = 0;  	if (likely(prev != next)) {  		sched_info_switch(prev, next); @@ -6960,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)  	if (cpu != group_first_cpu(sd->groups))  		return; +	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); +  	child = sd->child;  	sd->groups->cpu_power = 0; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f4f6a8326dd..00ebd768667 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_  	struct cfs_rq *cfs_rq = task_cfs_rq(curr);  	int scale = cfs_rq->nr_running >= sched_nr_latency; -	if (unlikely(rt_prio(p->prio))) -		goto preempt; - -	if (unlikely(p->sched_class != &fair_sched_class)) -		return; -  	if (unlikely(se == pse))  		return; @@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,  	set_task_cpu(p, this_cpu);  	activate_task(this_rq, p, 0);  	check_preempt_curr(this_rq, p, 0); - -	/* re-arm NEWIDLE balancing when moving tasks */ -	src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; -	this_rq->idle_stamp = 0;  }  /* @@ -2035,13 +2025,16 @@ struct sd_lb_stats {  	unsigned long this_load_per_task;  	unsigned long this_nr_running;  	unsigned long this_has_capacity; +	unsigned int  this_idle_cpus;  	/* Statistics of the busiest group */ +	unsigned int  busiest_idle_cpus;  	unsigned long max_load;  	unsigned long busiest_load_per_task;  	unsigned long busiest_nr_running;  	unsigned long busiest_group_capacity;  	unsigned long busiest_has_capacity; +	unsigned int  busiest_group_weight;  	int group_imb; /* Is there imbalance in this sd */  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) @@ -2063,6 +2056,8 @@ struct sg_lb_stats {  	unsigned long sum_nr_running; /* Nr tasks running in the group */  	unsigned long sum_weighted_load; /* Weighted load of group's tasks */  	unsigned long group_capacity; +	unsigned long idle_cpus; +	unsigned long group_weight;  	int group_imb; /* Is there an imbalance in the group ? */  	int group_has_capacity; /* Is there extra capacity in the group? */  }; @@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,  		sgs->group_load += load;  		sgs->sum_nr_running += rq->nr_running;  		sgs->sum_weighted_load += weighted_cpuload(i); - +		if (idle_cpu(i)) +			sgs->idle_cpus++;  	}  	/* @@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,  	sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);  	if (!sgs->group_capacity)  		sgs->group_capacity = fix_small_capacity(sd, group); +	sgs->group_weight = group->group_weight;  	if (sgs->group_capacity > sgs->sum_nr_running)  		sgs->group_has_capacity = 1; @@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,  			sds->this_nr_running = sgs.sum_nr_running;  			sds->this_load_per_task = sgs.sum_weighted_load;  			sds->this_has_capacity = sgs.group_has_capacity; +			sds->this_idle_cpus = sgs.idle_cpus;  		} else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {  			sds->max_load = sgs.avg_load;  			sds->busiest = sg;  			sds->busiest_nr_running = sgs.sum_nr_running; +			sds->busiest_idle_cpus = sgs.idle_cpus;  			sds->busiest_group_capacity = sgs.group_capacity;  			sds->busiest_load_per_task = sgs.sum_weighted_load;  			sds->busiest_has_capacity = sgs.group_has_capacity; +			sds->busiest_group_weight = sgs.group_weight;  			sds->group_imb = sgs.group_imb;  		} @@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,  	if (sds.this_load >= sds.avg_load)  		goto out_balanced; -	if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) -		goto out_balanced; +	/* +	 * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. +	 * And to check for busy balance use !idle_cpu instead of +	 * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE +	 * even when they are idle. +	 */ +	if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { +		if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) +			goto out_balanced; +	} else { +		/* +		 * This cpu is idle. If the busiest group load doesn't +		 * have more tasks than the number of available cpu's and +		 * there is no imbalance between this and busiest group +		 * wrt to idle cpu's, it is balanced. +		 */ +		if ((sds.this_idle_cpus  <= sds.busiest_idle_cpus + 1) && +		    sds.busiest_nr_running <= sds.busiest_group_weight) +			goto out_balanced; +	}  force_balance:  	/* Looks like there is an imbalance. Compute it */ @@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)  		interval = msecs_to_jiffies(sd->balance_interval);  		if (time_after(next_balance, sd->last_balance + interval))  			next_balance = sd->last_balance + interval; -		if (pulled_task) +		if (pulled_task) { +			this_rq->idle_stamp = 0;  			break; +		}  	}  	raw_spin_lock(&this_rq->lock); diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c104..2bf6b47058c 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c @@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p,  static void  check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)  { -	resched_task(rq->curr); /* we preempt everything */ +	/* we're never preempted */  }  static struct task_struct *pick_next_task_stop(struct rq *rq)  {  	struct task_struct *stop = rq->stop; -	if (stop && stop->state == TASK_RUNNING) +	if (stop && stop->se.on_rq)  		return stop;  	return NULL; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c33a1edb799..5abfa151855 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -702,6 +702,15 @@ static struct ctl_table kern_table[] = {  		.extra1		= &zero,  		.extra2		= &ten_thousand,  	}, +	{ +		.procname	= "dmesg_restrict", +		.data		= &dmesg_restrict, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec_minmax, +		.extra1		= &zero, +		.extra2		= &one, +	},  #endif  	{  		.procname	= "ngroups_max", diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b..353b9227c2e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now)  	struct tvec_base *base = __get_cpu_var(tvec_bases);  	unsigned long expires; +	/* +	 * Pretend that there is no timer pending if the cpu is offline. +	 * Possible pending timers will be migrated later to an active cpu. +	 */ +	if (cpu_is_offline(smp_processor_id())) +		return now + NEXT_TIMER_MAX_DELTA;  	spin_lock(&base->lock);  	if (time_before_eq(base->next_timer, base->timer_jiffies))  		base->next_timer = __next_timer_interrupt(base); @@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks)  {  	jiffies_64 += ticks;  	update_wall_time(); -	calc_global_load(); +	calc_global_load(ticks);  }  #ifdef __ARCH_WANT_SYS_ALARM diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef8..ea37e2ff416 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -126,7 +126,7 @@ if FTRACE  config FUNCTION_TRACER  	bool "Kernel Function Tracer"  	depends on HAVE_FUNCTION_TRACER -	select FRAME_POINTER if (!ARM_UNWIND) +	select FRAME_POINTER if !ARM_UNWIND && !S390  	select KALLSYMS  	select GENERIC_TRACER  	select CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc251ed6672..7b8ec028154 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,  static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),  				 BLK_TC_ACT(BLK_TC_WRITE) }; -#define BLK_TC_HARDBARRIER	BLK_TC_BARRIER  #define BLK_TC_RAHEAD		BLK_TC_AHEAD  /* The ilog2() calls fall out because they're constant */ @@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,  		return;  	what |= ddir_act[rw & WRITE]; -	what |= MASK_TC_BIT(rw, HARDBARRIER);  	what |= MASK_TC_BIT(rw, SYNC);  	what |= MASK_TC_BIT(rw, RAHEAD);  	what |= MASK_TC_BIT(rw, META); @@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)  	if (rw & REQ_RAHEAD)  		rwbs[i++] = 'A'; -	if (rw & REQ_HARDBARRIER) -		rwbs[i++] = 'B';  	if (rw & REQ_SYNC)  		rwbs[i++] = 'S';  	if (rw & REQ_META) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 82d9b8106cd..f8cf959bad4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -17,7 +17,6 @@  #include <linux/writeback.h>  #include <linux/kallsyms.h>  #include <linux/seq_file.h> -#include <linux/smp_lock.h>  #include <linux/notifier.h>  #include <linux/irqflags.h>  #include <linux/debugfs.h> @@ -1284,6 +1283,8 @@ void trace_dump_stack(void)  	__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());  } +static DEFINE_PER_CPU(int, user_stack_count); +  void  ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  { @@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  	if (unlikely(in_nmi()))  		return; +	/* +	 * prevent recursion, since the user stack tracing may +	 * trigger other kernel events. +	 */ +	preempt_disable(); +	if (__this_cpu_read(user_stack_count)) +		goto out; + +	__this_cpu_inc(user_stack_count); + + +  	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,  					  sizeof(*entry), flags, pc);  	if (!event) @@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)  	save_stack_trace_user(&trace);  	if (!filter_check_discard(call, entry, buffer, event))  		ring_buffer_unlock_commit(buffer, event); + +	__this_cpu_dec(user_stack_count); + + out: +	preempt_enable();  }  #ifdef UNUSED @@ -2320,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,  	return count;  } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ +	if (file->f_mode & FMODE_READ) +		return seq_lseek(file, offset, origin); +	else +		return 0; +} +  static const struct file_operations tracing_fops = {  	.open		= tracing_open,  	.read		= seq_read,  	.write		= tracing_write_stub, -	.llseek		= seq_lseek, +	.llseek		= tracing_seek,  	.release	= tracing_release,  }; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d..6e3c41a4024 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);  static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);  #endif -static int __initdata no_watchdog; +static int no_watchdog;  /* boot commands */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90db1bd1a97..e785b0f2aea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)  {  	struct worker *worker = kthread_data(task); -	if (likely(!(worker->flags & WORKER_NOT_RUNNING))) +	if (!(worker->flags & WORKER_NOT_RUNNING))  		atomic_inc(get_gcwq_nr_running(cpu));  } @@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,  	struct global_cwq *gcwq = get_gcwq(cpu);  	atomic_t *nr_running = get_gcwq_nr_running(cpu); -	if (unlikely(worker->flags & WORKER_NOT_RUNNING)) +	if (worker->flags & WORKER_NOT_RUNNING)  		return NULL;  	/* this can only happen on the local cpu */ @@ -3692,7 +3692,8 @@ static int __init init_workqueues(void)  	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);  	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,  					    WQ_UNBOUND_MAX_ACTIVE); -	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); +	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || +	       !system_unbound_wq);  	return 0;  }  early_initcall(init_workqueues);  |