diff options
Diffstat (limited to 'arch/x86/kernel/nmi.c')
| -rw-r--r-- | arch/x86/kernel/nmi.c | 47 | 
1 files changed, 33 insertions, 14 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index a0b2f84457b..f84f5c57de3 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -365,8 +365,9 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)  #ifdef CONFIG_X86_32  /*   * For i386, NMIs use the same stack as the kernel, and we can - * add a workaround to the iret problem in C. Simply have 3 states - * the NMI can be in. + * add a workaround to the iret problem in C (preventing nested + * NMIs if an NMI takes a trap). Simply have 3 states the NMI + * can be in:   *   *  1) not running   *  2) executing @@ -383,32 +384,50 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)   * If an NMI hits a breakpoint that executes an iret, another   * NMI can preempt it. We do not want to allow this new NMI   * to run, but we want to execute it when the first one finishes. - * We set the state to "latched", and the first NMI will perform - * an cmpxchg on the state, and if it doesn't successfully - * reset the state to "not running" it will restart the next - * NMI. + * We set the state to "latched", and the exit of the first NMI will + * perform a dec_return, if the result is zero (NOT_RUNNING), then + * it will simply exit the NMI handler. If not, the dec_return + * would have set the state to NMI_EXECUTING (what we want it to + * be when we are running). In this case, we simply jump back + * to rerun the NMI handler again, and restart the 'latched' NMI. + * + * No trap (breakpoint or page fault) should be hit before nmi_restart, + * thus there is no race between the first check of state for NOT_RUNNING + * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs + * at this point. + * + * In case the NMI takes a page fault, we need to save off the CR2 + * because the NMI could have preempted another page fault and corrupt + * the CR2 that is about to be read. As nested NMIs must be restarted + * and they can not take breakpoints or page faults, the update of the + * CR2 must be done before converting the nmi state back to NOT_RUNNING. + * Otherwise, there would be a race of another nested NMI coming in + * after setting state to NOT_RUNNING but before updating the nmi_cr2.   */  enum nmi_states { -	NMI_NOT_RUNNING, +	NMI_NOT_RUNNING = 0,  	NMI_EXECUTING,  	NMI_LATCHED,  };  static DEFINE_PER_CPU(enum nmi_states, nmi_state); +static DEFINE_PER_CPU(unsigned long, nmi_cr2);  #define nmi_nesting_preprocess(regs)					\  	do {								\ -		if (__get_cpu_var(nmi_state) != NMI_NOT_RUNNING) {	\ -			__get_cpu_var(nmi_state) = NMI_LATCHED;		\ +		if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {	\ +			this_cpu_write(nmi_state, NMI_LATCHED);		\  			return;						\  		}							\ -	nmi_restart:							\ -		__get_cpu_var(nmi_state) = NMI_EXECUTING;		\ -	} while (0) +		this_cpu_write(nmi_state, NMI_EXECUTING);		\ +		this_cpu_write(nmi_cr2, read_cr2());			\ +	} while (0);							\ +	nmi_restart:  #define nmi_nesting_postprocess()					\  	do {								\ -		if (cmpxchg(&__get_cpu_var(nmi_state),			\ -		    NMI_EXECUTING, NMI_NOT_RUNNING) != NMI_EXECUTING)	\ +		if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))	\ +			write_cr2(this_cpu_read(nmi_cr2));		\ +		if (this_cpu_dec_return(nmi_state))			\  			goto nmi_restart;				\  	} while (0)  #else /* x86_64 */  |