diff options
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 124 | 
1 files changed, 103 insertions, 21 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 8ed9c481db0..5ffadcc3bb2 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -212,13 +212,13 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {  #endif  }; -static int blimit = 10;		/* Maximum callbacks per rcu_do_batch. */ -static int qhimark = 10000;	/* If this many pending, ignore blimit. */ -static int qlowmark = 100;	/* Once only this many pending, use blimit. */ +static long blimit = 10;	/* Maximum callbacks per rcu_do_batch. */ +static long qhimark = 10000;	/* If this many pending, ignore blimit. */ +static long qlowmark = 100;	/* Once only this many pending, use blimit. */ -module_param(blimit, int, 0444); -module_param(qhimark, int, 0444); -module_param(qlowmark, int, 0444); +module_param(blimit, long, 0444); +module_param(qhimark, long, 0444); +module_param(qlowmark, long, 0444);  int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */  int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; @@ -313,7 +313,7 @@ static int  cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)  {  	return *rdp->nxttail[RCU_DONE_TAIL + -			     ACCESS_ONCE(rsp->completed) != rdp->completed] && +			     (ACCESS_ONCE(rsp->completed) != rdp->completed)] &&  	       !rcu_gp_in_progress(rsp);  } @@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)  	rsp->jiffies_stall = jiffies + jiffies_till_stall_check();  } +/* + * Dump stacks of all tasks running on stalled CPUs.  This is a fallback + * for architectures that do not implement trigger_all_cpu_backtrace(). + * The NMI-triggered stack traces are more accurate because they are + * printed by the target CPU. + */ +static void rcu_dump_cpu_stacks(struct rcu_state *rsp) +{ +	int cpu; +	unsigned long flags; +	struct rcu_node *rnp; + +	rcu_for_each_leaf_node(rsp, rnp) { +		raw_spin_lock_irqsave(&rnp->lock, flags); +		if (rnp->qsmask != 0) { +			for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) +				if (rnp->qsmask & (1UL << cpu)) +					dump_cpu_task(rnp->grplo + cpu); +		} +		raw_spin_unlock_irqrestore(&rnp->lock, flags); +	} +} +  static void print_other_cpu_stall(struct rcu_state *rsp)  {  	int cpu; @@ -880,6 +903,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)  	unsigned long flags;  	int ndetected = 0;  	struct rcu_node *rnp = rcu_get_root(rsp); +	long totqlen = 0;  	/* Only let one CPU complain about others per time interval. */ @@ -924,12 +948,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)  	raw_spin_unlock_irqrestore(&rnp->lock, flags);  	print_cpu_stall_info_end(); -	printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n", -	       smp_processor_id(), (long)(jiffies - rsp->gp_start)); +	for_each_possible_cpu(cpu) +		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; +	pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n", +	       smp_processor_id(), (long)(jiffies - rsp->gp_start), +	       rsp->gpnum, rsp->completed, totqlen);  	if (ndetected == 0)  		printk(KERN_ERR "INFO: Stall ended before state dump start\n");  	else if (!trigger_all_cpu_backtrace()) -		dump_stack(); +		rcu_dump_cpu_stacks(rsp);  	/* Complain about tasks blocking the grace period. */ @@ -940,8 +967,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)  static void print_cpu_stall(struct rcu_state *rsp)  { +	int cpu;  	unsigned long flags;  	struct rcu_node *rnp = rcu_get_root(rsp); +	long totqlen = 0;  	/*  	 * OK, time to rat on ourselves... @@ -952,7 +981,10 @@ static void print_cpu_stall(struct rcu_state *rsp)  	print_cpu_stall_info_begin();  	print_cpu_stall_info(rsp, smp_processor_id());  	print_cpu_stall_info_end(); -	printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start); +	for_each_possible_cpu(cpu) +		totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen; +	pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n", +		jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);  	if (!trigger_all_cpu_backtrace())  		dump_stack(); @@ -1404,15 +1436,37 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)  	    !cpu_needs_another_gp(rsp, rdp)) {  		/*  		 * Either we have not yet spawned the grace-period -		 * task or this CPU does not need another grace period. +		 * task, this CPU does not need another grace period, +		 * or a grace period is already in progress.  		 * Either way, don't start a new grace period.  		 */  		raw_spin_unlock_irqrestore(&rnp->lock, flags);  		return;  	} +	/* +	 * Because there is no grace period in progress right now, +	 * any callbacks we have up to this point will be satisfied +	 * by the next grace period.  So promote all callbacks to be +	 * handled after the end of the next grace period.  If the +	 * CPU is not yet aware of the end of the previous grace period, +	 * we need to allow for the callback advancement that will +	 * occur when it does become aware.  Deadlock prevents us from +	 * making it aware at this point: We cannot acquire a leaf +	 * rcu_node ->lock while holding the root rcu_node ->lock. +	 */ +	rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; +	if (rdp->completed == rsp->completed) +		rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; +  	rsp->gp_flags = RCU_GP_FLAG_INIT; -	raw_spin_unlock_irqrestore(&rnp->lock, flags); +	raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */ + +	/* Ensure that CPU is aware of completion of last grace period. */ +	rcu_process_gp_end(rsp, rdp); +	local_irq_restore(flags); + +	/* Wake up rcu_gp_kthread() to start the grace period. */  	wake_up(&rsp->gp_wq);  } @@ -1769,7 +1823,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  {  	unsigned long flags;  	struct rcu_head *next, *list, **tail; -	int bl, count, count_lazy, i; +	long bl, count, count_lazy; +	int i;  	/* If no callbacks are ready, just return.*/  	if (!cpu_has_callbacks_ready_to_invoke(rdp)) { @@ -2205,10 +2260,28 @@ static inline int rcu_blocking_is_gp(void)   * rcu_read_lock_sched().   *   * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns.  However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. + * non-threaded hardware-interrupt handlers, in progress on entry will + * have completed before this primitive returns.  However, this does not + * guarantee that softirq handlers will have completed, since in some + * kernels, these handlers can run in process context, and can block. + * + * Note that this guarantee implies further memory-ordering guarantees. + * On systems with more than one CPU, when synchronize_sched() returns, + * each CPU is guaranteed to have executed a full memory barrier since the + * end of its last RCU-sched read-side critical section whose beginning + * preceded the call to synchronize_sched().  In addition, each CPU having + * an RCU read-side critical section that extends beyond the return from + * synchronize_sched() is guaranteed to have executed a full memory barrier + * after the beginning of synchronize_sched() and before the beginning of + * that RCU read-side critical section.  Note that these guarantees include + * CPUs that are offline, idle, or executing in user mode, as well as CPUs + * that are executing in the kernel. + * + * Furthermore, if CPU A invoked synchronize_sched(), which returned + * to its caller on CPU B, then both CPU A and CPU B are guaranteed + * to have executed a full memory barrier during the execution of + * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but + * again only if the system has more than one CPU).   *   * This primitive provides the guarantees made by the (now removed)   * synchronize_kernel() API.  In contrast, synchronize_rcu() only @@ -2224,7 +2297,10 @@ void synchronize_sched(void)  			   "Illegal synchronize_sched() in RCU-sched read-side critical section");  	if (rcu_blocking_is_gp())  		return; -	wait_rcu_gp(call_rcu_sched); +	if (rcu_expedited) +		synchronize_sched_expedited(); +	else +		wait_rcu_gp(call_rcu_sched);  }  EXPORT_SYMBOL_GPL(synchronize_sched); @@ -2236,6 +2312,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);   * read-side critical sections have completed.  RCU read-side critical   * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),   * and may be nested. + * + * See the description of synchronize_sched() for more detailed information + * on memory ordering guarantees.   */  void synchronize_rcu_bh(void)  { @@ -2245,7 +2324,10 @@ void synchronize_rcu_bh(void)  			   "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");  	if (rcu_blocking_is_gp())  		return; -	wait_rcu_gp(call_rcu_bh); +	if (rcu_expedited) +		synchronize_rcu_bh_expedited(); +	else +		wait_rcu_gp(call_rcu_bh);  }  EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -2357,7 +2439,7 @@ void synchronize_sched_expedited(void)  		if (trycount++ < 10) {  			udelay(trycount * num_online_cpus());  		} else { -			synchronize_sched(); +			wait_rcu_gp(call_rcu_sched);  			atomic_long_inc(&rsp->expedited_normal);  			return;  		}  |