diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
| -rw-r--r-- | kernel/rcutree_plugin.h | 597 | 
1 files changed, 231 insertions, 366 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7f3244c0df0..f9211548818 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,8 @@   */  #include <linux/delay.h> +#include <linux/oom.h> +#include <linux/smpboot.h>  #define RCU_KTHREAD_PRIO 1 @@ -118,7 +120,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);   */  void rcu_force_quiescent_state(void)  { -	force_quiescent_state(&rcu_preempt_state, 0); +	force_quiescent_state(&rcu_preempt_state);  }  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); @@ -136,8 +138,6 @@ static void rcu_preempt_qs(int cpu)  {  	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); -	rdp->passed_quiesce_gpnum = rdp->gpnum; -	barrier();  	if (rdp->passed_quiesce == 0)  		trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");  	rdp->passed_quiesce = 1; @@ -422,9 +422,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)  	unsigned long flags;  	struct task_struct *t; -	if (!rcu_preempt_blocked_readers_cgp(rnp)) -		return;  	raw_spin_lock_irqsave(&rnp->lock, flags); +	if (!rcu_preempt_blocked_readers_cgp(rnp)) { +		raw_spin_unlock_irqrestore(&rnp->lock, flags); +		return; +	}  	t = list_entry(rnp->gp_tasks,  		       struct task_struct, rcu_node_entry);  	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) @@ -584,17 +586,23 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,  		raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */  	} +	rnp->gp_tasks = NULL; +	rnp->exp_tasks = NULL;  #ifdef CONFIG_RCU_BOOST -	/* In case root is being boosted and leaf is not. */ +	rnp->boost_tasks = NULL; +	/* +	 * In case root is being boosted and leaf was not.  Make sure +	 * that we boost the tasks blocking the current grace period +	 * in this case. +	 */  	raw_spin_lock(&rnp_root->lock); /* irqs already disabled */  	if (rnp_root->boost_tasks != NULL && -	    rnp_root->boost_tasks != rnp_root->gp_tasks) +	    rnp_root->boost_tasks != rnp_root->gp_tasks && +	    rnp_root->boost_tasks != rnp_root->exp_tasks)  		rnp_root->boost_tasks = rnp_root->gp_tasks;  	raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */  #endif /* #ifdef CONFIG_RCU_BOOST */ -	rnp->gp_tasks = NULL; -	rnp->exp_tasks = NULL;  	return retval;  } @@ -676,7 +684,7 @@ void synchronize_rcu(void)  EXPORT_SYMBOL_GPL(synchronize_rcu);  static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); -static long sync_rcu_preempt_exp_count; +static unsigned long sync_rcu_preempt_exp_count;  static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);  /* @@ -791,7 +799,7 @@ void synchronize_rcu_expedited(void)  	unsigned long flags;  	struct rcu_node *rnp;  	struct rcu_state *rsp = &rcu_preempt_state; -	long snap; +	unsigned long snap;  	int trycount = 0;  	smp_mb(); /* Caller's modifications seen first by other CPUs. */ @@ -799,33 +807,47 @@ void synchronize_rcu_expedited(void)  	smp_mb(); /* Above access cannot bleed into critical section. */  	/* +	 * Block CPU-hotplug operations.  This means that any CPU-hotplug +	 * operation that finds an rcu_node structure with tasks in the +	 * process of being boosted will know that all tasks blocking +	 * this expedited grace period will already be in the process of +	 * being boosted.  This simplifies the process of moving tasks +	 * from leaf to root rcu_node structures. +	 */ +	get_online_cpus(); + +	/*  	 * Acquire lock, falling back to synchronize_rcu() if too many  	 * lock-acquisition failures.  Of course, if someone does the  	 * expedited grace period for us, just leave.  	 */  	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { +		if (ULONG_CMP_LT(snap, +		    ACCESS_ONCE(sync_rcu_preempt_exp_count))) { +			put_online_cpus(); +			goto mb_ret; /* Others did our work for us. */ +		}  		if (trycount++ < 10) {  			udelay(trycount * num_online_cpus());  		} else { +			put_online_cpus();  			synchronize_rcu();  			return;  		} -		if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) -			goto mb_ret; /* Others did our work for us. */  	} -	if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) +	if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { +		put_online_cpus();  		goto unlock_mb_ret; /* Others did our work for us. */ +	}  	/* force all RCU readers onto ->blkd_tasks lists. */  	synchronize_sched_expedited(); -	raw_spin_lock_irqsave(&rsp->onofflock, flags); -  	/* Initialize ->expmask for all non-leaf rcu_node structures. */  	rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { -		raw_spin_lock(&rnp->lock); /* irqs already disabled. */ +		raw_spin_lock_irqsave(&rnp->lock, flags);  		rnp->expmask = rnp->qsmaskinit; -		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ +		raw_spin_unlock_irqrestore(&rnp->lock, flags);  	}  	/* Snapshot current state of ->blkd_tasks lists. */ @@ -834,7 +856,7 @@ void synchronize_rcu_expedited(void)  	if (NUM_RCU_NODES > 1)  		sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); -	raw_spin_unlock_irqrestore(&rsp->onofflock, flags); +	put_online_cpus();  	/* Wait for snapshotted ->blkd_tasks lists to drain. */  	rnp = rcu_get_root(rsp); @@ -1069,6 +1091,16 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)  #endif /* #else #ifdef CONFIG_RCU_TRACE */ +static void rcu_wake_cond(struct task_struct *t, int status) +{ +	/* +	 * If the thread is yielding, only wake it when this +	 * is invoked from idle +	 */ +	if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) +		wake_up_process(t); +} +  /*   * Carry out RCU priority boosting on the task indicated by ->exp_tasks   * or ->boost_tasks, advancing the pointer to the next task in the @@ -1141,17 +1173,6 @@ static int rcu_boost(struct rcu_node *rnp)  }  /* - * Timer handler to initiate waking up of boost kthreads that - * have yielded the CPU due to excessive numbers of tasks to - * boost.  We wake up the per-rcu_node kthread, which in turn - * will wake up the booster kthread. - */ -static void rcu_boost_kthread_timer(unsigned long arg) -{ -	invoke_rcu_node_kthread((struct rcu_node *)arg); -} - -/*   * Priority-boosting kthread.  One per leaf rcu_node and one for the   * root rcu_node.   */ @@ -1174,8 +1195,9 @@ static int rcu_boost_kthread(void *arg)  		else  			spincnt = 0;  		if (spincnt > 10) { +			rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;  			trace_rcu_utilization("End boost kthread@rcu_yield"); -			rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); +			schedule_timeout_interruptible(2);  			trace_rcu_utilization("Start boost kthread@rcu_yield");  			spincnt = 0;  		} @@ -1191,9 +1213,9 @@ static int rcu_boost_kthread(void *arg)   * kthread to start boosting them.  If there is an expedited grace   * period in progress, it is always time to boost.   * - * The caller must hold rnp->lock, which this function releases, - * but irqs remain disabled.  The ->boost_kthread_task is immortal, - * so we don't need to worry about it going away. + * The caller must hold rnp->lock, which this function releases. + * The ->boost_kthread_task is immortal, so we don't need to worry + * about it going away.   */  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)  { @@ -1213,8 +1235,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)  			rnp->boost_tasks = rnp->gp_tasks;  		raw_spin_unlock_irqrestore(&rnp->lock, flags);  		t = rnp->boost_kthread_task; -		if (t != NULL) -			wake_up_process(t); +		if (t) +			rcu_wake_cond(t, rnp->boost_kthread_status);  	} else {  		rcu_initiate_boost_trace(rnp);  		raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1231,8 +1253,10 @@ static void invoke_rcu_callbacks_kthread(void)  	local_irq_save(flags);  	__this_cpu_write(rcu_cpu_has_work, 1);  	if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && -	    current != __this_cpu_read(rcu_cpu_kthread_task)) -		wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); +	    current != __this_cpu_read(rcu_cpu_kthread_task)) { +		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), +			      __this_cpu_read(rcu_cpu_kthread_status)); +	}  	local_irq_restore(flags);  } @@ -1245,21 +1269,6 @@ static bool rcu_is_callbacks_kthread(void)  	return __get_cpu_var(rcu_cpu_kthread_task) == current;  } -/* - * Set the affinity of the boost kthread.  The CPU-hotplug locks are - * held, so no one should be messing with the existence of the boost - * kthread. - */ -static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, -					  cpumask_var_t cm) -{ -	struct task_struct *t; - -	t = rnp->boost_kthread_task; -	if (t != NULL) -		set_cpus_allowed_ptr(rnp->boost_kthread_task, cm); -} -  #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)  /* @@ -1276,15 +1285,19 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)   * Returns zero if all is well, a negated errno otherwise.   */  static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, -						 struct rcu_node *rnp, -						 int rnp_index) +						 struct rcu_node *rnp)  { +	int rnp_index = rnp - &rsp->node[0];  	unsigned long flags;  	struct sched_param sp;  	struct task_struct *t;  	if (&rcu_preempt_state != rsp)  		return 0; + +	if (!rcu_scheduler_fully_active || rnp->qsmaskinit == 0) +		return 0; +  	rsp->boost = 1;  	if (rnp->boost_kthread_task != NULL)  		return 0; @@ -1301,25 +1314,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,  	return 0;  } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Stop the RCU's per-CPU kthread when its CPU goes offline,. - */ -static void rcu_stop_cpu_kthread(int cpu) -{ -	struct task_struct *t; - -	/* Stop the CPU's kthread. */ -	t = per_cpu(rcu_cpu_kthread_task, cpu); -	if (t != NULL) { -		per_cpu(rcu_cpu_kthread_task, cpu) = NULL; -		kthread_stop(t); -	} -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ -  static void rcu_kthread_do_work(void)  {  	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); @@ -1327,112 +1321,22 @@ static void rcu_kthread_do_work(void)  	rcu_preempt_do_callbacks();  } -/* - * Wake up the specified per-rcu_node-structure kthread. - * Because the per-rcu_node kthreads are immortal, we don't need - * to do anything to keep them alive. - */ -static void invoke_rcu_node_kthread(struct rcu_node *rnp) -{ -	struct task_struct *t; - -	t = rnp->node_kthread_task; -	if (t != NULL) -		wake_up_process(t); -} - -/* - * Set the specified CPU's kthread to run RT or not, as specified by - * the to_rt argument.  The CPU-hotplug locks are held, so the task - * is not going away. - */ -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +static void rcu_cpu_kthread_setup(unsigned int cpu)  { -	int policy;  	struct sched_param sp; -	struct task_struct *t; - -	t = per_cpu(rcu_cpu_kthread_task, cpu); -	if (t == NULL) -		return; -	if (to_rt) { -		policy = SCHED_FIFO; -		sp.sched_priority = RCU_KTHREAD_PRIO; -	} else { -		policy = SCHED_NORMAL; -		sp.sched_priority = 0; -	} -	sched_setscheduler_nocheck(t, policy, &sp); -} - -/* - * Timer handler to initiate the waking up of per-CPU kthreads that - * have yielded the CPU due to excess numbers of RCU callbacks. - * We wake up the per-rcu_node kthread, which in turn will wake up - * the booster kthread. - */ -static void rcu_cpu_kthread_timer(unsigned long arg) -{ -	struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); -	struct rcu_node *rnp = rdp->mynode; -	atomic_or(rdp->grpmask, &rnp->wakemask); -	invoke_rcu_node_kthread(rnp); +	sp.sched_priority = RCU_KTHREAD_PRIO; +	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);  } -/* - * Drop to non-real-time priority and yield, but only after posting a - * timer that will cause us to regain our real-time priority if we - * remain preempted.  Either way, we restore our real-time priority - * before returning. - */ -static void rcu_yield(void (*f)(unsigned long), unsigned long arg) +static void rcu_cpu_kthread_park(unsigned int cpu)  { -	struct sched_param sp; -	struct timer_list yield_timer; -	int prio = current->rt_priority; - -	setup_timer_on_stack(&yield_timer, f, arg); -	mod_timer(&yield_timer, jiffies + 2); -	sp.sched_priority = 0; -	sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); -	set_user_nice(current, 19); -	schedule(); -	set_user_nice(current, 0); -	sp.sched_priority = prio; -	sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); -	del_timer(&yield_timer); +	per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;  } -/* - * Handle cases where the rcu_cpu_kthread() ends up on the wrong CPU. - * This can happen while the corresponding CPU is either coming online - * or going offline.  We cannot wait until the CPU is fully online - * before starting the kthread, because the various notifier functions - * can wait for RCU grace periods.  So we park rcu_cpu_kthread() until - * the corresponding CPU is online. - * - * Return 1 if the kthread needs to stop, 0 otherwise. - * - * Caller must disable bh.  This function can momentarily enable it. - */ -static int rcu_cpu_kthread_should_stop(int cpu) +static int rcu_cpu_kthread_should_run(unsigned int cpu)  { -	while (cpu_is_offline(cpu) || -	       !cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu)) || -	       smp_processor_id() != cpu) { -		if (kthread_should_stop()) -			return 1; -		per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; -		per_cpu(rcu_cpu_kthread_cpu, cpu) = raw_smp_processor_id(); -		local_bh_enable(); -		schedule_timeout_uninterruptible(1); -		if (!cpumask_equal(¤t->cpus_allowed, cpumask_of(cpu))) -			set_cpus_allowed_ptr(current, cpumask_of(cpu)); -		local_bh_disable(); -	} -	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; -	return 0; +	return __get_cpu_var(rcu_cpu_has_work);  }  /* @@ -1440,138 +1344,35 @@ static int rcu_cpu_kthread_should_stop(int cpu)   * RCU softirq used in flavors and configurations of RCU that do not   * support RCU priority boosting.   */ -static int rcu_cpu_kthread(void *arg) +static void rcu_cpu_kthread(unsigned int cpu)  { -	int cpu = (int)(long)arg; -	unsigned long flags; -	int spincnt = 0; -	unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); -	char work; -	char *workp = &per_cpu(rcu_cpu_has_work, cpu); +	unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); +	char work, *workp = &__get_cpu_var(rcu_cpu_has_work); +	int spincnt; -	trace_rcu_utilization("Start CPU kthread@init"); -	for (;;) { -		*statusp = RCU_KTHREAD_WAITING; -		trace_rcu_utilization("End CPU kthread@rcu_wait"); -		rcu_wait(*workp != 0 || kthread_should_stop()); +	for (spincnt = 0; spincnt < 10; spincnt++) {  		trace_rcu_utilization("Start CPU kthread@rcu_wait");  		local_bh_disable(); -		if (rcu_cpu_kthread_should_stop(cpu)) { -			local_bh_enable(); -			break; -		}  		*statusp = RCU_KTHREAD_RUNNING; -		per_cpu(rcu_cpu_kthread_loops, cpu)++; -		local_irq_save(flags); +		this_cpu_inc(rcu_cpu_kthread_loops); +		local_irq_disable();  		work = *workp;  		*workp = 0; -		local_irq_restore(flags); +		local_irq_enable();  		if (work)  			rcu_kthread_do_work();  		local_bh_enable(); -		if (*workp != 0) -			spincnt++; -		else -			spincnt = 0; -		if (spincnt > 10) { -			*statusp = RCU_KTHREAD_YIELDING; -			trace_rcu_utilization("End CPU kthread@rcu_yield"); -			rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); -			trace_rcu_utilization("Start CPU kthread@rcu_yield"); -			spincnt = 0; -		} -	} -	*statusp = RCU_KTHREAD_STOPPED; -	trace_rcu_utilization("End CPU kthread@term"); -	return 0; -} - -/* - * Spawn a per-CPU kthread, setting up affinity and priority. - * Because the CPU hotplug lock is held, no other CPU will be attempting - * to manipulate rcu_cpu_kthread_task.  There might be another CPU - * attempting to access it during boot, but the locking in kthread_bind() - * will enforce sufficient ordering. - * - * Please note that we cannot simply refuse to wake up the per-CPU - * kthread because kthreads are created in TASK_UNINTERRUPTIBLE state, - * which can result in softlockup complaints if the task ends up being - * idle for more than a couple of minutes. - * - * However, please note also that we cannot bind the per-CPU kthread to its - * CPU until that CPU is fully online.  We also cannot wait until the - * CPU is fully online before we create its per-CPU kthread, as this would - * deadlock the system when CPU notifiers tried waiting for grace - * periods.  So we bind the per-CPU kthread to its CPU only if the CPU - * is online.  If its CPU is not yet fully online, then the code in - * rcu_cpu_kthread() will wait until it is fully online, and then do - * the binding. - */ -static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) -{ -	struct sched_param sp; -	struct task_struct *t; - -	if (!rcu_scheduler_fully_active || -	    per_cpu(rcu_cpu_kthread_task, cpu) != NULL) -		return 0; -	t = kthread_create_on_node(rcu_cpu_kthread, -				   (void *)(long)cpu, -				   cpu_to_node(cpu), -				   "rcuc/%d", cpu); -	if (IS_ERR(t)) -		return PTR_ERR(t); -	if (cpu_online(cpu)) -		kthread_bind(t, cpu); -	per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; -	WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); -	sp.sched_priority = RCU_KTHREAD_PRIO; -	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -	per_cpu(rcu_cpu_kthread_task, cpu) = t; -	wake_up_process(t); /* Get to TASK_INTERRUPTIBLE quickly. */ -	return 0; -} - -/* - * Per-rcu_node kthread, which is in charge of waking up the per-CPU - * kthreads when needed.  We ignore requests to wake up kthreads - * for offline CPUs, which is OK because force_quiescent_state() - * takes care of this case. - */ -static int rcu_node_kthread(void *arg) -{ -	int cpu; -	unsigned long flags; -	unsigned long mask; -	struct rcu_node *rnp = (struct rcu_node *)arg; -	struct sched_param sp; -	struct task_struct *t; - -	for (;;) { -		rnp->node_kthread_status = RCU_KTHREAD_WAITING; -		rcu_wait(atomic_read(&rnp->wakemask) != 0); -		rnp->node_kthread_status = RCU_KTHREAD_RUNNING; -		raw_spin_lock_irqsave(&rnp->lock, flags); -		mask = atomic_xchg(&rnp->wakemask, 0); -		rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ -		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { -			if ((mask & 0x1) == 0) -				continue; -			preempt_disable(); -			t = per_cpu(rcu_cpu_kthread_task, cpu); -			if (!cpu_online(cpu) || t == NULL) { -				preempt_enable(); -				continue; -			} -			per_cpu(rcu_cpu_has_work, cpu) = 1; -			sp.sched_priority = RCU_KTHREAD_PRIO; -			sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -			preempt_enable(); +		if (*workp == 0) { +			trace_rcu_utilization("End CPU kthread@rcu_wait"); +			*statusp = RCU_KTHREAD_WAITING; +			return;  		}  	} -	/* NOTREACHED */ -	rnp->node_kthread_status = RCU_KTHREAD_STOPPED; -	return 0; +	*statusp = RCU_KTHREAD_YIELDING; +	trace_rcu_utilization("Start CPU kthread@rcu_yield"); +	schedule_timeout_interruptible(2); +	trace_rcu_utilization("End CPU kthread@rcu_yield"); +	*statusp = RCU_KTHREAD_WAITING;  }  /* @@ -1583,17 +1384,17 @@ static int rcu_node_kthread(void *arg)   * no outgoing CPU.  If there are no CPUs left in the affinity set,   * this function allows the kthread to execute on any CPU.   */ -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)  { +	struct task_struct *t = rnp->boost_kthread_task; +	unsigned long mask = rnp->qsmaskinit;  	cpumask_var_t cm;  	int cpu; -	unsigned long mask = rnp->qsmaskinit; -	if (rnp->node_kthread_task == NULL) +	if (!t)  		return; -	if (!alloc_cpumask_var(&cm, GFP_KERNEL)) +	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))  		return; -	cpumask_clear(cm);  	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)  		if ((mask & 0x1) && cpu != outgoingcpu)  			cpumask_set_cpu(cpu, cm); @@ -1603,62 +1404,36 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)  			cpumask_clear_cpu(cpu, cm);  		WARN_ON_ONCE(cpumask_weight(cm) == 0);  	} -	set_cpus_allowed_ptr(rnp->node_kthread_task, cm); -	rcu_boost_kthread_setaffinity(rnp, cm); +	set_cpus_allowed_ptr(t, cm);  	free_cpumask_var(cm);  } -/* - * Spawn a per-rcu_node kthread, setting priority and affinity. - * Called during boot before online/offline can happen, or, if - * during runtime, with the main CPU-hotplug locks held.  So only - * one of these can be executing at a time. - */ -static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, -						struct rcu_node *rnp) -{ -	unsigned long flags; -	int rnp_index = rnp - &rsp->node[0]; -	struct sched_param sp; -	struct task_struct *t; - -	if (!rcu_scheduler_fully_active || -	    rnp->qsmaskinit == 0) -		return 0; -	if (rnp->node_kthread_task == NULL) { -		t = kthread_create(rcu_node_kthread, (void *)rnp, -				   "rcun/%d", rnp_index); -		if (IS_ERR(t)) -			return PTR_ERR(t); -		raw_spin_lock_irqsave(&rnp->lock, flags); -		rnp->node_kthread_task = t; -		raw_spin_unlock_irqrestore(&rnp->lock, flags); -		sp.sched_priority = 99; -		sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); -		wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ -	} -	return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); -} +static struct smp_hotplug_thread rcu_cpu_thread_spec = { +	.store			= &rcu_cpu_kthread_task, +	.thread_should_run	= rcu_cpu_kthread_should_run, +	.thread_fn		= rcu_cpu_kthread, +	.thread_comm		= "rcuc/%u", +	.setup			= rcu_cpu_kthread_setup, +	.park			= rcu_cpu_kthread_park, +};  /*   * Spawn all kthreads -- called as soon as the scheduler is running.   */  static int __init rcu_spawn_kthreads(void)  { -	int cpu;  	struct rcu_node *rnp; +	int cpu;  	rcu_scheduler_fully_active = 1; -	for_each_possible_cpu(cpu) { +	for_each_possible_cpu(cpu)  		per_cpu(rcu_cpu_has_work, cpu) = 0; -		if (cpu_online(cpu)) -			(void)rcu_spawn_one_cpu_kthread(cpu); -	} +	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));  	rnp = rcu_get_root(rcu_state); -	(void)rcu_spawn_one_node_kthread(rcu_state, rnp); +	(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);  	if (NUM_RCU_NODES > 1) {  		rcu_for_each_leaf_node(rcu_state, rnp) -			(void)rcu_spawn_one_node_kthread(rcu_state, rnp); +			(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);  	}  	return 0;  } @@ -1670,11 +1445,8 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)  	struct rcu_node *rnp = rdp->mynode;  	/* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ -	if (rcu_scheduler_fully_active) { -		(void)rcu_spawn_one_cpu_kthread(cpu); -		if (rnp->node_kthread_task == NULL) -			(void)rcu_spawn_one_node_kthread(rcu_state, rnp); -	} +	if (rcu_scheduler_fully_active) +		(void)rcu_spawn_one_boost_kthread(rcu_state, rnp);  }  #else /* #ifdef CONFIG_RCU_BOOST */ @@ -1698,19 +1470,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)  {  } -#ifdef CONFIG_HOTPLUG_CPU - -static void rcu_stop_cpu_kthread(int cpu) -{ -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) -{ -} - -static void rcu_cpu_kthread_setrt(int cpu, int to_rt) +static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)  {  } @@ -1997,6 +1757,26 @@ static void rcu_prepare_for_idle(int cpu)  	if (!tne)  		return; +	/* Adaptive-tick mode, where usermode execution is idle to RCU. */ +	if (!is_idle_task(current)) { +		rdtp->dyntick_holdoff = jiffies - 1; +		if (rcu_cpu_has_nonlazy_callbacks(cpu)) { +			trace_rcu_prep_idle("User dyntick with callbacks"); +			rdtp->idle_gp_timer_expires = +				round_up(jiffies + RCU_IDLE_GP_DELAY, +					 RCU_IDLE_GP_DELAY); +		} else if (rcu_cpu_has_callbacks(cpu)) { +			rdtp->idle_gp_timer_expires = +				round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY); +			trace_rcu_prep_idle("User dyntick with lazy callbacks"); +		} else { +			return; +		} +		tp = &rdtp->idle_gp_timer; +		mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); +		return; +	} +  	/*  	 * If this is an idle re-entry, for example, due to use of  	 * RCU_NONIDLE() or the new idle-loop tracing API within the idle @@ -2075,16 +1855,16 @@ static void rcu_prepare_for_idle(int cpu)  #ifdef CONFIG_TREE_PREEMPT_RCU  	if (per_cpu(rcu_preempt_data, cpu).nxtlist) {  		rcu_preempt_qs(cpu); -		force_quiescent_state(&rcu_preempt_state, 0); +		force_quiescent_state(&rcu_preempt_state);  	}  #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */  	if (per_cpu(rcu_sched_data, cpu).nxtlist) {  		rcu_sched_qs(cpu); -		force_quiescent_state(&rcu_sched_state, 0); +		force_quiescent_state(&rcu_sched_state);  	}  	if (per_cpu(rcu_bh_data, cpu).nxtlist) {  		rcu_bh_qs(cpu); -		force_quiescent_state(&rcu_bh_state, 0); +		force_quiescent_state(&rcu_bh_state);  	}  	/* @@ -2112,6 +1892,88 @@ static void rcu_idle_count_callbacks_posted(void)  	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);  } +/* + * Data for flushing lazy RCU callbacks at OOM time. + */ +static atomic_t oom_callback_count; +static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); + +/* + * RCU OOM callback -- decrement the outstanding count and deliver the + * wake-up if we are the last one. + */ +static void rcu_oom_callback(struct rcu_head *rhp) +{ +	if (atomic_dec_and_test(&oom_callback_count)) +		wake_up(&oom_callback_wq); +} + +/* + * Post an rcu_oom_notify callback on the current CPU if it has at + * least one lazy callback.  This will unnecessarily post callbacks + * to CPUs that already have a non-lazy callback at the end of their + * callback list, but this is an infrequent operation, so accept some + * extra overhead to keep things simple. + */ +static void rcu_oom_notify_cpu(void *unused) +{ +	struct rcu_state *rsp; +	struct rcu_data *rdp; + +	for_each_rcu_flavor(rsp) { +		rdp = __this_cpu_ptr(rsp->rda); +		if (rdp->qlen_lazy != 0) { +			atomic_inc(&oom_callback_count); +			rsp->call(&rdp->oom_head, rcu_oom_callback); +		} +	} +} + +/* + * If low on memory, ensure that each CPU has a non-lazy callback. + * This will wake up CPUs that have only lazy callbacks, in turn + * ensuring that they free up the corresponding memory in a timely manner. + * Because an uncertain amount of memory will be freed in some uncertain + * timeframe, we do not claim to have freed anything. + */ +static int rcu_oom_notify(struct notifier_block *self, +			  unsigned long notused, void *nfreed) +{ +	int cpu; + +	/* Wait for callbacks from earlier instance to complete. */ +	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); + +	/* +	 * Prevent premature wakeup: ensure that all increments happen +	 * before there is a chance of the counter reaching zero. +	 */ +	atomic_set(&oom_callback_count, 1); + +	get_online_cpus(); +	for_each_online_cpu(cpu) { +		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); +		cond_resched(); +	} +	put_online_cpus(); + +	/* Unconditionally decrement: no need to wake ourselves up. */ +	atomic_dec(&oom_callback_count); + +	return NOTIFY_OK; +} + +static struct notifier_block rcu_oom_nb = { +	.notifier_call = rcu_oom_notify +}; + +static int __init rcu_register_oom_notifier(void) +{ +	register_oom_notifier(&rcu_oom_nb); +	return 0; +} +early_initcall(rcu_register_oom_notifier); +  #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */  #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -2122,11 +1984,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  {  	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);  	struct timer_list *tltp = &rdtp->idle_gp_timer; +	char c; -	sprintf(cp, "drain=%d %c timer=%lu", -		rdtp->dyntick_drain, -		rdtp->dyntick_holdoff == jiffies ? 'H' : '.', -		timer_pending(tltp) ? tltp->expires - jiffies : -1); +	c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; +	if (timer_pending(tltp)) +		sprintf(cp, "drain=%d %c timer=%lu", +			rdtp->dyntick_drain, c, tltp->expires - jiffies); +	else +		sprintf(cp, "drain=%d %c timer not pending", +			rdtp->dyntick_drain, c);  }  #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ @@ -2194,11 +2060,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)  /* Increment ->ticks_this_gp for all flavors of RCU. */  static void increment_cpu_stall_ticks(void)  { -	__get_cpu_var(rcu_sched_data).ticks_this_gp++; -	__get_cpu_var(rcu_bh_data).ticks_this_gp++; -#ifdef CONFIG_TREE_PREEMPT_RCU -	__get_cpu_var(rcu_preempt_data).ticks_this_gp++; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		__this_cpu_ptr(rsp->rda)->ticks_this_gp++;  }  #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */  |