diff options
Diffstat (limited to 'arch/arm64/mm/fault.c')
| -rw-r--r-- | arch/arm64/mm/fault.c | 534 | 
1 files changed, 534 insertions, 0 deletions
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c new file mode 100644 index 00000000000..1909a69983c --- /dev/null +++ b/arch/arm64/mm/fault.c @@ -0,0 +1,534 @@ +/* + * Based on arch/arm/mm/fault.c + * + * Copyright (C) 1995  Linus Torvalds + * Copyright (C) 1995-2004 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program.  If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/page-flags.h> +#include <linux/sched.h> +#include <linux/highmem.h> +#include <linux/perf_event.h> + +#include <asm/exception.h> +#include <asm/debug-monitors.h> +#include <asm/system_misc.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +/* + * Dump out the page tables associated with 'addr' in mm 'mm'. + */ +void show_pte(struct mm_struct *mm, unsigned long addr) +{ +	pgd_t *pgd; + +	if (!mm) +		mm = &init_mm; + +	pr_alert("pgd = %p\n", mm->pgd); +	pgd = pgd_offset(mm, addr); +	pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + +	do { +		pud_t *pud; +		pmd_t *pmd; +		pte_t *pte; + +		if (pgd_none_or_clear_bad(pgd)) +			break; + +		pud = pud_offset(pgd, addr); +		if (pud_none_or_clear_bad(pud)) +			break; + +		pmd = pmd_offset(pud, addr); +		printk(", *pmd=%016llx", pmd_val(*pmd)); +		if (pmd_none_or_clear_bad(pmd)) +			break; + +		pte = pte_offset_map(pmd, addr); +		printk(", *pte=%016llx", pte_val(*pte)); +		pte_unmap(pte); +	} while(0); + +	printk("\n"); +} + +/* + * The kernel tried to access some page that wasn't present. + */ +static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, +			      unsigned int esr, struct pt_regs *regs) +{ +	/* +	 * Are we prepared to handle this kernel fault? +	 */ +	if (fixup_exception(regs)) +		return; + +	/* +	 * No handler, we'll have to terminate things with extreme prejudice. +	 */ +	bust_spinlocks(1); +	pr_alert("Unable to handle kernel %s at virtual address %08lx\n", +		 (addr < PAGE_SIZE) ? "NULL pointer dereference" : +		 "paging request", addr); + +	show_pte(mm, addr); +	die("Oops", regs, esr); +	bust_spinlocks(0); +	do_exit(SIGKILL); +} + +/* + * Something tried to access memory that isn't in our memory map. User mode + * accesses just cause a SIGSEGV + */ +static void __do_user_fault(struct task_struct *tsk, unsigned long addr, +			    unsigned int esr, unsigned int sig, int code, +			    struct pt_regs *regs) +{ +	struct siginfo si; + +	if (show_unhandled_signals) { +		pr_info("%s[%d]: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", +			tsk->comm, task_pid_nr(tsk), sig, addr, esr); +		show_pte(tsk->mm, addr); +		show_regs(regs); +	} + +	tsk->thread.fault_address = addr; +	si.si_signo = sig; +	si.si_errno = 0; +	si.si_code = code; +	si.si_addr = (void __user *)addr; +	force_sig_info(sig, &si, tsk); +} + +void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ +	struct task_struct *tsk = current; +	struct mm_struct *mm = tsk->active_mm; + +	/* +	 * If we are in kernel mode at this point, we have no context to +	 * handle this fault with. +	 */ +	if (user_mode(regs)) +		__do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs); +	else +		__do_kernel_fault(mm, addr, esr, regs); +} + +#define VM_FAULT_BADMAP		0x010000 +#define VM_FAULT_BADACCESS	0x020000 + +#define ESR_WRITE		(1 << 6) +#define ESR_LNX_EXEC		(1 << 24) + +/* + * Check that the permissions on the VMA allow for the fault which occurred. + * If we encountered a write fault, we must have write permission, otherwise + * we allow any permission. + */ +static inline bool access_error(unsigned int esr, struct vm_area_struct *vma) +{ +	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; + +	if (esr & ESR_WRITE) +		mask = VM_WRITE; +	if (esr & ESR_LNX_EXEC) +		mask = VM_EXEC; + +	return vma->vm_flags & mask ? false : true; +} + +static int __do_page_fault(struct mm_struct *mm, unsigned long addr, +			   unsigned int esr, unsigned int flags, +			   struct task_struct *tsk) +{ +	struct vm_area_struct *vma; +	int fault; + +	vma = find_vma(mm, addr); +	fault = VM_FAULT_BADMAP; +	if (unlikely(!vma)) +		goto out; +	if (unlikely(vma->vm_start > addr)) +		goto check_stack; + +	/* +	 * Ok, we have a good vm_area for this memory access, so we can handle +	 * it. +	 */ +good_area: +	if (access_error(esr, vma)) { +		fault = VM_FAULT_BADACCESS; +		goto out; +	} + +	return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags); + +check_stack: +	if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) +		goto good_area; +out: +	return fault; +} + +static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, +				   struct pt_regs *regs) +{ +	struct task_struct *tsk; +	struct mm_struct *mm; +	int fault, sig, code; +	int write = esr & ESR_WRITE; +	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | +		(write ? FAULT_FLAG_WRITE : 0); + +	tsk = current; +	mm  = tsk->mm; + +	/* Enable interrupts if they were enabled in the parent context. */ +	if (interrupts_enabled(regs)) +		local_irq_enable(); + +	/* +	 * If we're in an interrupt or have no user context, we must not take +	 * the fault. +	 */ +	if (in_atomic() || !mm) +		goto no_context; + +	/* +	 * As per x86, we may deadlock here. However, since the kernel only +	 * validly references user space from well defined areas of the code, +	 * we can bug out early if this is from code which shouldn't. +	 */ +	if (!down_read_trylock(&mm->mmap_sem)) { +		if (!user_mode(regs) && !search_exception_tables(regs->pc)) +			goto no_context; +retry: +		down_read(&mm->mmap_sem); +	} else { +		/* +		 * The above down_read_trylock() might have succeeded in which +		 * case, we'll have missed the might_sleep() from down_read(). +		 */ +		might_sleep(); +#ifdef CONFIG_DEBUG_VM +		if (!user_mode(regs) && !search_exception_tables(regs->pc)) +			goto no_context; +#endif +	} + +	fault = __do_page_fault(mm, addr, esr, flags, tsk); + +	/* +	 * If we need to retry but a fatal signal is pending, handle the +	 * signal first. We do not need to release the mmap_sem because it +	 * would already be released in __lock_page_or_retry in mm/filemap.c. +	 */ +	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) +		return 0; + +	/* +	 * Major/minor page fault accounting is only done on the initial +	 * attempt. If we go through a retry, it is extremely likely that the +	 * page will be found in page cache at that point. +	 */ + +	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); +	if (flags & FAULT_FLAG_ALLOW_RETRY) { +		if (fault & VM_FAULT_MAJOR) { +			tsk->maj_flt++; +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, +				      addr); +		} else { +			tsk->min_flt++; +			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, +				      addr); +		} +		if (fault & VM_FAULT_RETRY) { +			/* +			 * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of +			 * starvation. +			 */ +			flags &= ~FAULT_FLAG_ALLOW_RETRY; +			goto retry; +		} +	} + +	up_read(&mm->mmap_sem); + +	/* +	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR +	 */ +	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | +			      VM_FAULT_BADACCESS)))) +		return 0; + +	if (fault & VM_FAULT_OOM) { +		/* +		 * We ran out of memory, call the OOM killer, and return to +		 * userspace (which will retry the fault, or kill us if we got +		 * oom-killed). +		 */ +		pagefault_out_of_memory(); +		return 0; +	} + +	/* +	 * If we are in kernel mode at this point, we have no context to +	 * handle this fault with. +	 */ +	if (!user_mode(regs)) +		goto no_context; + +	if (fault & VM_FAULT_SIGBUS) { +		/* +		 * We had some memory, but were unable to successfully fix up +		 * this page fault. +		 */ +		sig = SIGBUS; +		code = BUS_ADRERR; +	} else { +		/* +		 * Something tried to access memory that isn't in our memory +		 * map. +		 */ +		sig = SIGSEGV; +		code = fault == VM_FAULT_BADACCESS ? +			SEGV_ACCERR : SEGV_MAPERR; +	} + +	__do_user_fault(tsk, addr, esr, sig, code, regs); +	return 0; + +no_context: +	__do_kernel_fault(mm, addr, esr, regs); +	return 0; +} + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain a valid + * entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are probably + * faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant entry, we + * copy the it to this task.  If not, we send the process a signal, fixup the + * exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an interrupt + * or a critical region, and should only copy the information from the master + * page table, nothing more. + */ +static int __kprobes do_translation_fault(unsigned long addr, +					  unsigned int esr, +					  struct pt_regs *regs) +{ +	if (addr < TASK_SIZE) +		return do_page_fault(addr, esr, regs); + +	do_bad_area(addr, esr, regs); +	return 0; +} + +/* + * Some section permission faults need to be handled gracefully.  They can + * happen due to a __{get,put}_user during an oops. + */ +static int do_sect_fault(unsigned long addr, unsigned int esr, +			 struct pt_regs *regs) +{ +	do_bad_area(addr, esr, regs); +	return 0; +} + +/* + * This abort handler always returns "fault". + */ +static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ +	return 1; +} + +static struct fault_info { +	int	(*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); +	int	sig; +	int	code; +	const char *name; +} fault_info[] = { +	{ do_bad,		SIGBUS,  0,		"ttbr address size fault"	}, +	{ do_bad,		SIGBUS,  0,		"level 1 address size fault"	}, +	{ do_bad,		SIGBUS,  0,		"level 2 address size fault"	}, +	{ do_bad,		SIGBUS,  0,		"level 3 address size fault"	}, +	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"input address range fault"	}, +	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	}, +	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	}, +	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	}, +	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	}, +	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	}, +	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	}, +	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	}, +	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	}, +	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	}, +	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	}, +	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	}, +	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	}, +	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	}, +	{ do_bad,		SIGBUS,  0,		"unknown 18"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 19"			}, +	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" }, +	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" }, +	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" }, +	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" }, +	{ do_bad,		SIGBUS,  0,		"synchronous parity error"	}, +	{ do_bad,		SIGBUS,  0,		"asynchronous parity error"	}, +	{ do_bad,		SIGBUS,  0,		"unknown 26"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 27"			}, +	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" }, +	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" }, +	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" }, +	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" }, +	{ do_bad,		SIGBUS,  0,		"unknown 32"			}, +	{ do_bad,		SIGBUS,  BUS_ADRALN,	"alignment fault"		}, +	{ do_bad,		SIGBUS,  0,		"debug event"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 35"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 36"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 37"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 38"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 39"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 40"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 41"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 42"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 43"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 44"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 45"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 46"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 47"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 48"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 49"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 50"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 51"			}, +	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" }, +	{ do_bad,		SIGBUS,  0,		"unknown 53"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 54"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 55"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 56"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 57"			}, +	{ do_bad,		SIGBUS,  0,		"implementation fault (coprocessor abort)" }, +	{ do_bad,		SIGBUS,  0,		"unknown 59"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 60"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 61"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 62"			}, +	{ do_bad,		SIGBUS,  0,		"unknown 63"			}, +}; + +/* + * Dispatch a data abort to the relevant handler. + */ +asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, +					 struct pt_regs *regs) +{ +	const struct fault_info *inf = fault_info + (esr & 63); +	struct siginfo info; + +	if (!inf->fn(addr, esr, regs)) +		return; + +	pr_alert("Unhandled fault: %s (0x%08x) at 0x%016lx\n", +		 inf->name, esr, addr); + +	info.si_signo = inf->sig; +	info.si_errno = 0; +	info.si_code  = inf->code; +	info.si_addr  = (void __user *)addr; +	arm64_notify_die("", regs, &info, esr); +} + +/* + * Handle stack alignment exceptions. + */ +asmlinkage void __exception do_sp_pc_abort(unsigned long addr, +					   unsigned int esr, +					   struct pt_regs *regs) +{ +	struct siginfo info; + +	info.si_signo = SIGBUS; +	info.si_errno = 0; +	info.si_code  = BUS_ADRALN; +	info.si_addr  = (void __user *)addr; +	arm64_notify_die("", regs, &info, esr); +} + +static struct fault_info debug_fault_info[] = { +	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware breakpoint"	}, +	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware single-step"	}, +	{ do_bad,	SIGTRAP,	TRAP_HWBKPT,	"hardware watchpoint"	}, +	{ do_bad,	SIGBUS,		0,		"unknown 3"		}, +	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch32 BKPT"		}, +	{ do_bad,	SIGTRAP,	0,		"aarch32 vector catch"	}, +	{ do_bad,	SIGTRAP,	TRAP_BRKPT,	"aarch64 BRK"		}, +	{ do_bad,	SIGBUS,		0,		"unknown 7"		}, +}; + +void __init hook_debug_fault_code(int nr, +				  int (*fn)(unsigned long, unsigned int, struct pt_regs *), +				  int sig, int code, const char *name) +{ +	BUG_ON(nr < 0 || nr >= ARRAY_SIZE(debug_fault_info)); + +	debug_fault_info[nr].fn		= fn; +	debug_fault_info[nr].sig	= sig; +	debug_fault_info[nr].code	= code; +	debug_fault_info[nr].name	= name; +} + +asmlinkage int __exception do_debug_exception(unsigned long addr, +					      unsigned int esr, +					      struct pt_regs *regs) +{ +	const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); +	struct siginfo info; + +	if (!inf->fn(addr, esr, regs)) +		return 1; + +	pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n", +		 inf->name, esr, addr); + +	info.si_signo = inf->sig; +	info.si_errno = 0; +	info.si_code  = inf->code; +	info.si_addr  = (void __user *)addr; +	arm64_notify_die("", regs, &info, esr); + +	return 0; +}  |