diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 09:42:04 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 09:42:04 -0700 | 
| commit | 754b9800779402924fffe456b49d557e15260cbf (patch) | |
| tree | 0e0441eca766616fccd8fc37a3885397efc6063a /mm/memory-failure.c | |
| parent | 35cb8d9e18c0bb33b90d7e574abadbe23b65427d (diff) | |
| parent | ea281a9ebaba3287130dbe15bb0aad6f798bb06b (diff) | |
| download | olio-linux-3.10-754b9800779402924fffe456b49d557e15260cbf.tar.xz olio-linux-3.10-754b9800779402924fffe456b49d557e15260cbf.zip  | |
Merge branch 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull MCE changes from Ingo Molnar.
* 'x86-mce-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Fix return value of mce_chrdev_read() when erst is disabled
  x86/mce: Convert static array of pointers to per-cpu variables
  x86/mce: Replace hard coded hex constants with symbolic defines
  x86/mce: Recognise machine check bank signature for data path error
  x86/mce: Handle "action required" errors
  x86/mce: Add mechanism to safely save information in MCE handler
  x86/mce: Create helper function to save addr/misc when needed
  HWPOISON: Add code to handle "action required" errors.
  HWPOISON: Clean up memory_failure() vs. __memory_failure()
Diffstat (limited to 'mm/memory-failure.c')
| -rw-r--r-- | mm/memory-failure.c | 96 | 
1 files changed, 50 insertions, 46 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c22076ffdd4..97cc2733551 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)  EXPORT_SYMBOL_GPL(hwpoison_filter);  /* - * Send all the processes who have the page mapped an ``action optional'' - * signal. + * Send all the processes who have the page mapped a signal. + * ``action optional'' if they are not immediately affected by the error + * ``action required'' if error happened in current execution context   */ -static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, -			unsigned long pfn, struct page *page) +static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, +			unsigned long pfn, struct page *page, int flags)  {  	struct siginfo si;  	int ret;  	printk(KERN_ERR -		"MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", +		"MCE %#lx: Killing %s:%d due to hardware memory corruption\n",  		pfn, t->comm, t->pid);  	si.si_signo = SIGBUS;  	si.si_errno = 0; -	si.si_code = BUS_MCEERR_AO;  	si.si_addr = (void *)addr;  #ifdef __ARCH_SI_TRAPNO  	si.si_trapno = trapno;  #endif  	si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; -	/* -	 * Don't use force here, it's convenient if the signal -	 * can be temporarily blocked. -	 * This could cause a loop when the user sets SIGBUS -	 * to SIG_IGN, but hopefully no one will do that? -	 */ -	ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */ + +	if ((flags & MF_ACTION_REQUIRED) && t == current) { +		si.si_code = BUS_MCEERR_AR; +		ret = force_sig_info(SIGBUS, &si, t); +	} else { +		/* +		 * Don't use force here, it's convenient if the signal +		 * can be temporarily blocked. +		 * This could cause a loop when the user sets SIGBUS +		 * to SIG_IGN, but hopefully no one will do that? +		 */ +		si.si_code = BUS_MCEERR_AO; +		ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */ +	}  	if (ret < 0)  		printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",  		       t->comm, t->pid, ret); @@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,   * Also when FAIL is set do a force kill because something went   * wrong earlier.   */ -static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, -			  int fail, struct page *page, unsigned long pfn) +static void kill_procs(struct list_head *to_kill, int doit, int trapno, +			  int fail, struct page *page, unsigned long pfn, +			  int flags)  {  	struct to_kill *tk, *next; @@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,  			 * check for that, but we need to tell the  			 * process anyways.  			 */ -			else if (kill_proc_ao(tk->tsk, tk->addr, trapno, -					      pfn, page) < 0) +			else if (kill_proc(tk->tsk, tk->addr, trapno, +					      pfn, page, flags) < 0)  				printk(KERN_ERR  		"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",  					pfn, tk->tsk->comm, tk->tsk->pid); @@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,   * the pages and send SIGBUS to the processes if the data was dirty.   */  static int hwpoison_user_mappings(struct page *p, unsigned long pfn, -				  int trapno) +				  int trapno, int flags)  {  	enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;  	struct address_space *mapping; @@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,  	 * use a more force-full uncatchable kill to prevent  	 * any accesses to the poisoned memory.  	 */ -	kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, -		      ret != SWAP_SUCCESS, p, pfn); +	kill_procs(&tokill, !!PageDirty(ppage), trapno, +		      ret != SWAP_SUCCESS, p, pfn, flags);  	return ret;  } @@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)  		ClearPageHWPoison(hpage + i);  } -int __memory_failure(unsigned long pfn, int trapno, int flags) +/** + * memory_failure - Handle memory failure of a page. + * @pfn: Page Number of the corrupted page + * @trapno: Trap number reported in the signal to user space. + * @flags: fine tune action taken + * + * This function is called by the low level machine check code + * of an architecture when it detects hardware memory corruption + * of a page. It tries its best to recover, which includes + * dropping pages, killing processes etc. + * + * The function is primarily of use for corruptions that + * happen outside the current execution context (e.g. when + * detected by a background scrubber) + * + * Must run in process context (e.g. a work queue) with interrupts + * enabled and no spinlocks hold. + */ +int memory_failure(unsigned long pfn, int trapno, int flags)  {  	struct page_state *ps;  	struct page *p; @@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)  	 * Now take care of user space mappings.  	 * Abort on fail: __delete_from_page_cache() assumes unmapped page.  	 */ -	if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { +	if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {  		printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);  		res = -EBUSY;  		goto out; @@ -1156,29 +1182,7 @@ out:  	unlock_page(hpage);  	return res;  } -EXPORT_SYMBOL_GPL(__memory_failure); - -/** - * memory_failure - Handle memory failure of a page. - * @pfn: Page Number of the corrupted page - * @trapno: Trap number reported in the signal to user space. - * - * This function is called by the low level machine check code - * of an architecture when it detects hardware memory corruption - * of a page. It tries its best to recover, which includes - * dropping pages, killing processes etc. - * - * The function is primarily of use for corruptions that - * happen outside the current execution context (e.g. when - * detected by a background scrubber) - * - * Must run in process context (e.g. a work queue) with interrupts - * enabled and no spinlocks hold. - */ -void memory_failure(unsigned long pfn, int trapno) -{ -	__memory_failure(pfn, trapno, 0); -} +EXPORT_SYMBOL_GPL(memory_failure);  #define MEMORY_FAILURE_FIFO_ORDER	4  #define MEMORY_FAILURE_FIFO_SIZE	(1 << MEMORY_FAILURE_FIFO_ORDER) @@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work)  		spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);  		if (!gotten)  			break; -		__memory_failure(entry.pfn, entry.trapno, entry.flags); +		memory_failure(entry.pfn, entry.trapno, entry.flags);  	}  }  |