diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
| -rw-r--r-- | kernel/rcutree_plugin.h | 174 | 
1 files changed, 140 insertions, 34 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c1961aed121..9c71c1b18e0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -25,6 +25,7 @@   */  #include <linux/delay.h> +#include <linux/oom.h>  #include <linux/smpboot.h>  #define RCU_KTHREAD_PRIO 1 @@ -119,7 +120,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);   */  void rcu_force_quiescent_state(void)  { -	force_quiescent_state(&rcu_preempt_state, 0); +	force_quiescent_state(&rcu_preempt_state);  }  EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); @@ -137,8 +138,6 @@ static void rcu_preempt_qs(int cpu)  {  	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); -	rdp->passed_quiesce_gpnum = rdp->gpnum; -	barrier();  	if (rdp->passed_quiesce == 0)  		trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");  	rdp->passed_quiesce = 1; @@ -423,9 +422,11 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)  	unsigned long flags;  	struct task_struct *t; -	if (!rcu_preempt_blocked_readers_cgp(rnp)) -		return;  	raw_spin_lock_irqsave(&rnp->lock, flags); +	if (!rcu_preempt_blocked_readers_cgp(rnp)) { +		raw_spin_unlock_irqrestore(&rnp->lock, flags); +		return; +	}  	t = list_entry(rnp->gp_tasks,  		       struct task_struct, rcu_node_entry);  	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) @@ -585,17 +586,23 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,  		raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */  	} +	rnp->gp_tasks = NULL; +	rnp->exp_tasks = NULL;  #ifdef CONFIG_RCU_BOOST -	/* In case root is being boosted and leaf is not. */ +	rnp->boost_tasks = NULL; +	/* +	 * In case root is being boosted and leaf was not.  Make sure +	 * that we boost the tasks blocking the current grace period +	 * in this case. +	 */  	raw_spin_lock(&rnp_root->lock); /* irqs already disabled */  	if (rnp_root->boost_tasks != NULL && -	    rnp_root->boost_tasks != rnp_root->gp_tasks) +	    rnp_root->boost_tasks != rnp_root->gp_tasks && +	    rnp_root->boost_tasks != rnp_root->exp_tasks)  		rnp_root->boost_tasks = rnp_root->gp_tasks;  	raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */  #endif /* #ifdef CONFIG_RCU_BOOST */ -	rnp->gp_tasks = NULL; -	rnp->exp_tasks = NULL;  	return retval;  } @@ -677,7 +684,7 @@ void synchronize_rcu(void)  EXPORT_SYMBOL_GPL(synchronize_rcu);  static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); -static long sync_rcu_preempt_exp_count; +static unsigned long sync_rcu_preempt_exp_count;  static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);  /* @@ -792,7 +799,7 @@ void synchronize_rcu_expedited(void)  	unsigned long flags;  	struct rcu_node *rnp;  	struct rcu_state *rsp = &rcu_preempt_state; -	long snap; +	unsigned long snap;  	int trycount = 0;  	smp_mb(); /* Caller's modifications seen first by other CPUs. */ @@ -800,33 +807,47 @@ void synchronize_rcu_expedited(void)  	smp_mb(); /* Above access cannot bleed into critical section. */  	/* +	 * Block CPU-hotplug operations.  This means that any CPU-hotplug +	 * operation that finds an rcu_node structure with tasks in the +	 * process of being boosted will know that all tasks blocking +	 * this expedited grace period will already be in the process of +	 * being boosted.  This simplifies the process of moving tasks +	 * from leaf to root rcu_node structures. +	 */ +	get_online_cpus(); + +	/*  	 * Acquire lock, falling back to synchronize_rcu() if too many  	 * lock-acquisition failures.  Of course, if someone does the  	 * expedited grace period for us, just leave.  	 */  	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { +		if (ULONG_CMP_LT(snap, +		    ACCESS_ONCE(sync_rcu_preempt_exp_count))) { +			put_online_cpus(); +			goto mb_ret; /* Others did our work for us. */ +		}  		if (trycount++ < 10) {  			udelay(trycount * num_online_cpus());  		} else { +			put_online_cpus();  			synchronize_rcu();  			return;  		} -		if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) -			goto mb_ret; /* Others did our work for us. */  	} -	if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) +	if (ULONG_CMP_LT(snap, ACCESS_ONCE(sync_rcu_preempt_exp_count))) { +		put_online_cpus();  		goto unlock_mb_ret; /* Others did our work for us. */ +	}  	/* force all RCU readers onto ->blkd_tasks lists. */  	synchronize_sched_expedited(); -	raw_spin_lock_irqsave(&rsp->onofflock, flags); -  	/* Initialize ->expmask for all non-leaf rcu_node structures. */  	rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { -		raw_spin_lock(&rnp->lock); /* irqs already disabled. */ +		raw_spin_lock_irqsave(&rnp->lock, flags);  		rnp->expmask = rnp->qsmaskinit; -		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ +		raw_spin_unlock_irqrestore(&rnp->lock, flags);  	}  	/* Snapshot current state of ->blkd_tasks lists. */ @@ -835,7 +856,7 @@ void synchronize_rcu_expedited(void)  	if (NUM_RCU_NODES > 1)  		sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); -	raw_spin_unlock_irqrestore(&rsp->onofflock, flags); +	put_online_cpus();  	/* Wait for snapshotted ->blkd_tasks lists to drain. */  	rnp = rcu_get_root(rsp); @@ -1192,9 +1213,9 @@ static int rcu_boost_kthread(void *arg)   * kthread to start boosting them.  If there is an expedited grace   * period in progress, it is always time to boost.   * - * The caller must hold rnp->lock, which this function releases, - * but irqs remain disabled.  The ->boost_kthread_task is immortal, - * so we don't need to worry about it going away. + * The caller must hold rnp->lock, which this function releases. + * The ->boost_kthread_task is immortal, so we don't need to worry + * about it going away.   */  static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)  { @@ -1814,16 +1835,16 @@ static void rcu_prepare_for_idle(int cpu)  #ifdef CONFIG_TREE_PREEMPT_RCU  	if (per_cpu(rcu_preempt_data, cpu).nxtlist) {  		rcu_preempt_qs(cpu); -		force_quiescent_state(&rcu_preempt_state, 0); +		force_quiescent_state(&rcu_preempt_state);  	}  #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */  	if (per_cpu(rcu_sched_data, cpu).nxtlist) {  		rcu_sched_qs(cpu); -		force_quiescent_state(&rcu_sched_state, 0); +		force_quiescent_state(&rcu_sched_state);  	}  	if (per_cpu(rcu_bh_data, cpu).nxtlist) {  		rcu_bh_qs(cpu); -		force_quiescent_state(&rcu_bh_state, 0); +		force_quiescent_state(&rcu_bh_state);  	}  	/* @@ -1851,6 +1872,88 @@ static void rcu_idle_count_callbacks_posted(void)  	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);  } +/* + * Data for flushing lazy RCU callbacks at OOM time. + */ +static atomic_t oom_callback_count; +static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq); + +/* + * RCU OOM callback -- decrement the outstanding count and deliver the + * wake-up if we are the last one. + */ +static void rcu_oom_callback(struct rcu_head *rhp) +{ +	if (atomic_dec_and_test(&oom_callback_count)) +		wake_up(&oom_callback_wq); +} + +/* + * Post an rcu_oom_notify callback on the current CPU if it has at + * least one lazy callback.  This will unnecessarily post callbacks + * to CPUs that already have a non-lazy callback at the end of their + * callback list, but this is an infrequent operation, so accept some + * extra overhead to keep things simple. + */ +static void rcu_oom_notify_cpu(void *unused) +{ +	struct rcu_state *rsp; +	struct rcu_data *rdp; + +	for_each_rcu_flavor(rsp) { +		rdp = __this_cpu_ptr(rsp->rda); +		if (rdp->qlen_lazy != 0) { +			atomic_inc(&oom_callback_count); +			rsp->call(&rdp->oom_head, rcu_oom_callback); +		} +	} +} + +/* + * If low on memory, ensure that each CPU has a non-lazy callback. + * This will wake up CPUs that have only lazy callbacks, in turn + * ensuring that they free up the corresponding memory in a timely manner. + * Because an uncertain amount of memory will be freed in some uncertain + * timeframe, we do not claim to have freed anything. + */ +static int rcu_oom_notify(struct notifier_block *self, +			  unsigned long notused, void *nfreed) +{ +	int cpu; + +	/* Wait for callbacks from earlier instance to complete. */ +	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0); + +	/* +	 * Prevent premature wakeup: ensure that all increments happen +	 * before there is a chance of the counter reaching zero. +	 */ +	atomic_set(&oom_callback_count, 1); + +	get_online_cpus(); +	for_each_online_cpu(cpu) { +		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); +		cond_resched(); +	} +	put_online_cpus(); + +	/* Unconditionally decrement: no need to wake ourselves up. */ +	atomic_dec(&oom_callback_count); + +	return NOTIFY_OK; +} + +static struct notifier_block rcu_oom_nb = { +	.notifier_call = rcu_oom_notify +}; + +static int __init rcu_register_oom_notifier(void) +{ +	register_oom_notifier(&rcu_oom_nb); +	return 0; +} +early_initcall(rcu_register_oom_notifier); +  #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */  #ifdef CONFIG_RCU_CPU_STALL_INFO @@ -1861,11 +1964,15 @@ static void print_cpu_stall_fast_no_hz(char *cp, int cpu)  {  	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);  	struct timer_list *tltp = &rdtp->idle_gp_timer; +	char c; -	sprintf(cp, "drain=%d %c timer=%lu", -		rdtp->dyntick_drain, -		rdtp->dyntick_holdoff == jiffies ? 'H' : '.', -		timer_pending(tltp) ? tltp->expires - jiffies : -1); +	c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; +	if (timer_pending(tltp)) +		sprintf(cp, "drain=%d %c timer=%lu", +			rdtp->dyntick_drain, c, tltp->expires - jiffies); +	else +		sprintf(cp, "drain=%d %c timer not pending", +			rdtp->dyntick_drain, c);  }  #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ @@ -1933,11 +2040,10 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)  /* Increment ->ticks_this_gp for all flavors of RCU. */  static void increment_cpu_stall_ticks(void)  { -	__get_cpu_var(rcu_sched_data).ticks_this_gp++; -	__get_cpu_var(rcu_bh_data).ticks_this_gp++; -#ifdef CONFIG_TREE_PREEMPT_RCU -	__get_cpu_var(rcu_preempt_data).ticks_this_gp++; -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		__this_cpu_ptr(rsp->rda)->ticks_this_gp++;  }  #else /* #ifdef CONFIG_RCU_CPU_STALL_INFO */  |