diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 6 | ||||
| -rw-r--r-- | kernel/cgroup.c | 41 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 345 | ||||
| -rw-r--r-- | kernel/futex.c | 41 | ||||
| -rw-r--r-- | kernel/module-internal.h | 3 | ||||
| -rw-r--r-- | kernel/module.c | 51 | ||||
| -rw-r--r-- | kernel/module_signing.c | 24 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 33 | ||||
| -rw-r--r-- | kernel/printk.c | 1 | ||||
| -rw-r--r-- | kernel/sys.c | 12 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 4 | ||||
| -rw-r--r-- | kernel/workqueue.c | 2 | 
12 files changed, 273 insertions, 290 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0dfeca4324e..86e3285ae7e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -174,10 +174,8 @@ signing_key.priv signing_key.x509: x509.genkey  	@echo "###"  	@echo "### If this takes a long time, you might wish to run rngd in the"  	@echo "### background to keep the supply of entropy topped up.  It" -	@echo "### needs to be run as root, and should use a hardware random" -	@echo "### number generator if one is available, eg:" -	@echo "###" -	@echo "###     rngd -r /dev/hwrandom" +	@echo "### needs to be run as root, and uses a hardware random" +	@echo "### number generator if one is available."  	@echo "###"  	openssl req -new -nodes -utf8 $(sign_key_with_hash) -days 36500 -batch \  		-x509 -config x509.genkey \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 13774b3b39a..f24f724620d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1962,9 +1962,8 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,  	 * trading it for newcg is protected by cgroup_mutex, we're safe to drop  	 * it here; it will be freed under RCU.  	 */ -	put_css_set(oldcg); -  	set_bit(CGRP_RELEASABLE, &oldcgrp->flags); +	put_css_set(oldcg);  }  /** @@ -4815,31 +4814,20 @@ static const struct file_operations proc_cgroupstats_operations = {   *   * A pointer to the shared css_set was automatically copied in   * fork.c by dup_task_struct().  However, we ignore that copy, since - * it was not made under the protection of RCU, cgroup_mutex or - * threadgroup_change_begin(), so it might no longer be a valid - * cgroup pointer.  cgroup_attach_task() might have already changed - * current->cgroups, allowing the previously referenced cgroup - * group to be removed and freed. - * - * Outside the pointer validity we also need to process the css_set - * inheritance between threadgoup_change_begin() and - * threadgoup_change_end(), this way there is no leak in any process - * wide migration performed by cgroup_attach_proc() that could otherwise - * miss a thread because it is too early or too late in the fork stage. + * it was not made under the protection of RCU or cgroup_mutex, so + * might no longer be a valid cgroup pointer.  cgroup_attach_task() might + * have already changed current->cgroups, allowing the previously + * referenced cgroup group to be removed and freed.   *   * At the point that cgroup_fork() is called, 'current' is the parent   * task, and the passed argument 'child' points to the child task.   */  void cgroup_fork(struct task_struct *child)  { -	/* -	 * We don't need to task_lock() current because current->cgroups -	 * can't be changed concurrently here. The parent obviously hasn't -	 * exited and called cgroup_exit(), and we are synchronized against -	 * cgroup migration through threadgroup_change_begin(). -	 */ +	task_lock(current);  	child->cgroups = current->cgroups;  	get_css_set(child->cgroups); +	task_unlock(current);  	INIT_LIST_HEAD(&child->cg_list);  } @@ -4895,19 +4883,10 @@ void cgroup_post_fork(struct task_struct *child)  	 */  	if (use_task_css_set_links) {  		write_lock(&css_set_lock); -		if (list_empty(&child->cg_list)) { -			/* -			 * It's safe to use child->cgroups without task_lock() -			 * here because we are protected through -			 * threadgroup_change_begin() against concurrent -			 * css_set change in cgroup_task_migrate(). Also -			 * the task can't exit at that point until -			 * wake_up_new_task() is called, so we are protected -			 * against cgroup_exit() setting child->cgroup to -			 * init_css_set. -			 */ +		task_lock(child); +		if (list_empty(&child->cg_list))  			list_add(&child->cg_list, &child->cgroups->tasks); -		} +		task_unlock(child);  		write_unlock(&css_set_lock);  	}  } diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 98256bc71ee..5cc4e7e42e6 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -78,15 +78,23 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];   */  static atomic_t uprobe_events = ATOMIC_INIT(0); +/* Have a copy of original instruction */ +#define UPROBE_COPY_INSN	0 +/* Dont run handlers when first register/ last unregister in progress*/ +#define UPROBE_RUN_HANDLER	1 +/* Can skip singlestep */ +#define UPROBE_SKIP_SSTEP	2 +  struct uprobe {  	struct rb_node		rb_node;	/* node in the rb tree */  	atomic_t		ref;  	struct rw_semaphore	consumer_rwsem; +	struct mutex		copy_mutex;	/* TODO: kill me and UPROBE_COPY_INSN */  	struct list_head	pending_list;  	struct uprobe_consumer	*consumers;  	struct inode		*inode;		/* Also hold a ref to inode */  	loff_t			offset; -	int			flags; +	unsigned long		flags;  	struct arch_uprobe	arch;  }; @@ -100,17 +108,12 @@ struct uprobe {   */  static bool valid_vma(struct vm_area_struct *vma, bool is_register)  { -	if (!vma->vm_file) -		return false; - -	if (!is_register) -		return true; +	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; -	if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) -				== (VM_READ|VM_EXEC)) -		return true; +	if (is_register) +		flags |= VM_WRITE; -	return false; +	return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;  }  static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) @@ -193,19 +196,44 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)  	return *insn == UPROBE_SWBP_INSN;  } +static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode) +{ +	void *kaddr = kmap_atomic(page); +	memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE); +	kunmap_atomic(kaddr); +} + +static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode) +{ +	uprobe_opcode_t old_opcode; +	bool is_swbp; + +	copy_opcode(page, vaddr, &old_opcode); +	is_swbp = is_swbp_insn(&old_opcode); + +	if (is_swbp_insn(new_opcode)) { +		if (is_swbp)		/* register: already installed? */ +			return 0; +	} else { +		if (!is_swbp)		/* unregister: was it changed by us? */ +			return 0; +	} + +	return 1; +} +  /*   * NOTE:   * Expect the breakpoint instruction to be the smallest size instruction for   * the architecture. If an arch has variable length instruction and the   * breakpoint instruction is not of the smallest length instruction - * supported by that architecture then we need to modify read_opcode / + * supported by that architecture then we need to modify is_swbp_at_addr and   * write_opcode accordingly. This would never be a problem for archs that   * have fixed length instructions.   */  /*   * write_opcode - write the opcode at a given virtual address. - * @auprobe: arch breakpointing information.   * @mm: the probed process address space.   * @vaddr: the virtual address to store the opcode.   * @opcode: opcode to be written at @vaddr. @@ -216,8 +244,8 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)   * For mm @mm, write the opcode at @vaddr.   * Return 0 (success) or a negative errno.   */ -static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, -			unsigned long vaddr, uprobe_opcode_t opcode) +static int write_opcode(struct mm_struct *mm, unsigned long vaddr, +			uprobe_opcode_t opcode)  {  	struct page *old_page, *new_page;  	void *vaddr_old, *vaddr_new; @@ -226,10 +254,14 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,  retry:  	/* Read the page with vaddr into memory */ -	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); +	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);  	if (ret <= 0)  		return ret; +	ret = verify_opcode(old_page, vaddr, &opcode); +	if (ret <= 0) +		goto put_old; +  	ret = -ENOMEM;  	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);  	if (!new_page) @@ -264,63 +296,6 @@ put_old:  }  /** - * read_opcode - read the opcode at a given virtual address. - * @mm: the probed process address space. - * @vaddr: the virtual address to read the opcode. - * @opcode: location to store the read opcode. - * - * Called with mm->mmap_sem held (for read and with a reference to - * mm. - * - * For mm @mm, read the opcode at @vaddr and store it in @opcode. - * Return 0 (success) or a negative errno. - */ -static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode) -{ -	struct page *page; -	void *vaddr_new; -	int ret; - -	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); -	if (ret <= 0) -		return ret; - -	vaddr_new = kmap_atomic(page); -	vaddr &= ~PAGE_MASK; -	memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); -	kunmap_atomic(vaddr_new); - -	put_page(page); - -	return 0; -} - -static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) -{ -	uprobe_opcode_t opcode; -	int result; - -	if (current->mm == mm) { -		pagefault_disable(); -		result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, -								sizeof(opcode)); -		pagefault_enable(); - -		if (likely(result == 0)) -			goto out; -	} - -	result = read_opcode(mm, vaddr, &opcode); -	if (result) -		return result; -out: -	if (is_swbp_insn(&opcode)) -		return 1; - -	return 0; -} - -/**   * set_swbp - store breakpoint at a given address.   * @auprobe: arch specific probepoint information.   * @mm: the probed process address space. @@ -331,18 +306,7 @@ out:   */  int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)  { -	int result; -	/* -	 * See the comment near uprobes_hash(). -	 */ -	result = is_swbp_at_addr(mm, vaddr); -	if (result == 1) -		return 0; - -	if (result) -		return result; - -	return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN); +	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);  }  /** @@ -357,16 +321,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned  int __weak  set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)  { -	int result; - -	result = is_swbp_at_addr(mm, vaddr); -	if (!result) -		return -EINVAL; - -	if (result != 1) -		return result; - -	return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); +	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);  }  static int match_uprobe(struct uprobe *l, struct uprobe *r) @@ -473,7 +428,7 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)  	spin_unlock(&uprobes_treelock);  	/* For now assume that the instruction need not be single-stepped */ -	uprobe->flags |= UPROBE_SKIP_SSTEP; +	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);  	return u;  } @@ -495,6 +450,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	uprobe->inode = igrab(inode);  	uprobe->offset = offset;  	init_rwsem(&uprobe->consumer_rwsem); +	mutex_init(&uprobe->copy_mutex);  	/* add to uprobes_tree, sorted on inode:offset */  	cur_uprobe = insert_uprobe(uprobe); @@ -515,7 +471,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)  {  	struct uprobe_consumer *uc; -	if (!(uprobe->flags & UPROBE_RUN_HANDLER)) +	if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))  		return;  	down_read(&uprobe->consumer_rwsem); @@ -621,29 +577,43 @@ static int copy_insn(struct uprobe *uprobe, struct file *filp)  	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);  } -/* - * How mm->uprobes_state.count gets updated - * uprobe_mmap() increments the count if - * 	- it successfully adds a breakpoint. - * 	- it cannot add a breakpoint, but sees that there is a underlying - * 	  breakpoint (via a is_swbp_at_addr()). - * - * uprobe_munmap() decrements the count if - * 	- it sees a underlying breakpoint, (via is_swbp_at_addr) - * 	  (Subsequent uprobe_unregister wouldnt find the breakpoint - * 	  unless a uprobe_mmap kicks in, since the old vma would be - * 	  dropped just after uprobe_munmap.) - * - * uprobe_register increments the count if: - * 	- it successfully adds a breakpoint. - * - * uprobe_unregister decrements the count if: - * 	- it sees a underlying breakpoint and removes successfully. - * 	  (via is_swbp_at_addr) - * 	  (Subsequent uprobe_munmap wouldnt find the breakpoint - * 	  since there is no underlying breakpoint after the - * 	  breakpoint removal.) - */ +static int prepare_uprobe(struct uprobe *uprobe, struct file *file, +				struct mm_struct *mm, unsigned long vaddr) +{ +	int ret = 0; + +	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) +		return ret; + +	mutex_lock(&uprobe->copy_mutex); +	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) +		goto out; + +	ret = copy_insn(uprobe, file); +	if (ret) +		goto out; + +	ret = -ENOTSUPP; +	if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) +		goto out; + +	ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); +	if (ret) +		goto out; + +	/* write_opcode() assumes we don't cross page boundary */ +	BUG_ON((uprobe->offset & ~PAGE_MASK) + +			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); + +	smp_wmb(); /* pairs with rmb() in find_active_uprobe() */ +	set_bit(UPROBE_COPY_INSN, &uprobe->flags); + + out: +	mutex_unlock(&uprobe->copy_mutex); + +	return ret; +} +  static int  install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  			struct vm_area_struct *vma, unsigned long vaddr) @@ -661,24 +631,9 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  	if (!uprobe->consumers)  		return 0; -	if (!(uprobe->flags & UPROBE_COPY_INSN)) { -		ret = copy_insn(uprobe, vma->vm_file); -		if (ret) -			return ret; - -		if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) -			return -ENOTSUPP; - -		ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); -		if (ret) -			return ret; - -		/* write_opcode() assumes we don't cross page boundary */ -		BUG_ON((uprobe->offset & ~PAGE_MASK) + -				UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); - -		uprobe->flags |= UPROBE_COPY_INSN; -	} +	ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); +	if (ret) +		return ret;  	/*  	 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), @@ -697,15 +652,15 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,  	return ret;  } -static void +static int  remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)  {  	/* can happen if uprobe_register() fails */  	if (!test_bit(MMF_HAS_UPROBES, &mm->flags)) -		return; +		return 0;  	set_bit(MMF_RECALC_UPROBES, &mm->flags); -	set_orig_insn(&uprobe->arch, mm, vaddr); +	return set_orig_insn(&uprobe->arch, mm, vaddr);  }  /* @@ -820,7 +775,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)  		struct mm_struct *mm = info->mm;  		struct vm_area_struct *vma; -		if (err) +		if (err && is_register)  			goto free;  		down_write(&mm->mmap_sem); @@ -836,7 +791,7 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)  		if (is_register)  			err = install_breakpoint(uprobe, mm, vma, info->vaddr);  		else -			remove_breakpoint(uprobe, mm, info->vaddr); +			err |= remove_breakpoint(uprobe, mm, info->vaddr);   unlock:  		up_write(&mm->mmap_sem); @@ -893,13 +848,15 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *  	mutex_lock(uprobes_hash(inode));  	uprobe = alloc_uprobe(inode, offset); -	if (uprobe && !consumer_add(uprobe, uc)) { +	if (!uprobe) { +		ret = -ENOMEM; +	} else if (!consumer_add(uprobe, uc)) {  		ret = __uprobe_register(uprobe);  		if (ret) {  			uprobe->consumers = NULL;  			__uprobe_unregister(uprobe);  		} else { -			uprobe->flags |= UPROBE_RUN_HANDLER; +			set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);  		}  	} @@ -932,7 +889,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume  	if (consumer_del(uprobe, uc)) {  		if (!uprobe->consumers) {  			__uprobe_unregister(uprobe); -			uprobe->flags &= ~UPROBE_RUN_HANDLER; +			clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);  		}  	} @@ -1393,10 +1350,11 @@ bool uprobe_deny_signal(void)   */  static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)  { -	if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) -		return true; - -	uprobe->flags &= ~UPROBE_SKIP_SSTEP; +	if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) { +		if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) +			return true; +		clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags); +	}  	return false;  } @@ -1419,6 +1377,30 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)  	clear_bit(MMF_HAS_UPROBES, &mm->flags);  } +static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) +{ +	struct page *page; +	uprobe_opcode_t opcode; +	int result; + +	pagefault_disable(); +	result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, +							sizeof(opcode)); +	pagefault_enable(); + +	if (likely(result == 0)) +		goto out; + +	result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); +	if (result < 0) +		return result; + +	copy_opcode(page, vaddr, &opcode); +	put_page(page); + out: +	return is_swbp_insn(&opcode); +} +  static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)  {  	struct mm_struct *mm = current->mm; @@ -1489,38 +1471,41 @@ static void handle_swbp(struct pt_regs *regs)  		}  		return;  	} +	/* +	 * TODO: move copy_insn/etc into _register and remove this hack. +	 * After we hit the bp, _unregister + _register can install the +	 * new and not-yet-analyzed uprobe at the same address, restart. +	 */ +	smp_rmb(); /* pairs with wmb() in install_breakpoint() */ +	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) +		goto restart;  	utask = current->utask;  	if (!utask) {  		utask = add_utask();  		/* Cannot allocate; re-execute the instruction. */  		if (!utask) -			goto cleanup_ret; +			goto restart;  	} -	utask->active_uprobe = uprobe; +  	handler_chain(uprobe, regs); -	if (uprobe->flags & UPROBE_SKIP_SSTEP && can_skip_sstep(uprobe, regs)) -		goto cleanup_ret; +	if (can_skip_sstep(uprobe, regs)) +		goto out; -	utask->state = UTASK_SSTEP;  	if (!pre_ssout(uprobe, regs, bp_vaddr)) {  		arch_uprobe_enable_step(&uprobe->arch); +		utask->active_uprobe = uprobe; +		utask->state = UTASK_SSTEP;  		return;  	} -cleanup_ret: -	if (utask) { -		utask->active_uprobe = NULL; -		utask->state = UTASK_RUNNING; -	} -	if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) - -		/* -		 * cannot singlestep; cannot skip instruction; -		 * re-execute the instruction. -		 */ -		instruction_pointer_set(regs, bp_vaddr); - +restart: +	/* +	 * cannot singlestep; cannot skip instruction; +	 * re-execute the instruction. +	 */ +	instruction_pointer_set(regs, bp_vaddr); +out:  	put_uprobe(uprobe);  } @@ -1552,13 +1537,12 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)  }  /* - * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag.  (and on - * subsequent probe hits on the thread sets the state to UTASK_BP_HIT) and - * allows the thread to return from interrupt. + * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and + * allows the thread to return from interrupt. After that handle_swbp() + * sets utask->active_uprobe.   * - * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag and - * also sets the state to UTASK_SSTEP_ACK and allows the thread to return from - * interrupt. + * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag + * and allows the thread to return from interrupt.   *   * While returning to userspace, thread notices the TIF_UPROBE flag and calls   * uprobe_notify_resume(). @@ -1567,11 +1551,13 @@ void uprobe_notify_resume(struct pt_regs *regs)  {  	struct uprobe_task *utask; +	clear_thread_flag(TIF_UPROBE); +  	utask = current->utask; -	if (!utask || utask->state == UTASK_BP_HIT) -		handle_swbp(regs); -	else +	if (utask && utask->active_uprobe)  		handle_singlestep(utask, regs); +	else +		handle_swbp(regs);  }  /* @@ -1580,17 +1566,10 @@ void uprobe_notify_resume(struct pt_regs *regs)   */  int uprobe_pre_sstep_notifier(struct pt_regs *regs)  { -	struct uprobe_task *utask; -  	if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags))  		return 0; -	utask = current->utask; -	if (utask) -		utask->state = UTASK_BP_HIT; -  	set_thread_flag(TIF_UPROBE); -  	return 1;  } diff --git a/kernel/futex.c b/kernel/futex.c index 3717e7b306e..20ef219bbe9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -716,7 +716,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,  				struct futex_pi_state **ps,  				struct task_struct *task, int set_waiters)  { -	int lock_taken, ret, ownerdied = 0; +	int lock_taken, ret, force_take = 0;  	u32 uval, newval, curval, vpid = task_pid_vnr(task);  retry: @@ -755,17 +755,15 @@ retry:  	newval = curval | FUTEX_WAITERS;  	/* -	 * There are two cases, where a futex might have no owner (the -	 * owner TID is 0): OWNER_DIED. We take over the futex in this -	 * case. We also do an unconditional take over, when the owner -	 * of the futex died. -	 * -	 * This is safe as we are protected by the hash bucket lock ! +	 * Should we force take the futex? See below.  	 */ -	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { -		/* Keep the OWNER_DIED bit */ +	if (unlikely(force_take)) { +		/* +		 * Keep the OWNER_DIED and the WAITERS bit and set the +		 * new TID value. +		 */  		newval = (curval & ~FUTEX_TID_MASK) | vpid; -		ownerdied = 0; +		force_take = 0;  		lock_taken = 1;  	} @@ -775,7 +773,7 @@ retry:  		goto retry;  	/* -	 * We took the lock due to owner died take over. +	 * We took the lock due to forced take over.  	 */  	if (unlikely(lock_taken))  		return 1; @@ -790,20 +788,25 @@ retry:  		switch (ret) {  		case -ESRCH:  			/* -			 * No owner found for this futex. Check if the -			 * OWNER_DIED bit is set to figure out whether -			 * this is a robust futex or not. +			 * We failed to find an owner for this +			 * futex. So we have no pi_state to block +			 * on. This can happen in two cases: +			 * +			 * 1) The owner died +			 * 2) A stale FUTEX_WAITERS bit +			 * +			 * Re-read the futex value.  			 */  			if (get_futex_value_locked(&curval, uaddr))  				return -EFAULT;  			/* -			 * We simply start over in case of a robust -			 * futex. The code above will take the futex -			 * and return happy. +			 * If the owner died or we have a stale +			 * WAITERS bit the owner TID in the user space +			 * futex is 0.  			 */ -			if (curval & FUTEX_OWNER_DIED) { -				ownerdied = 1; +			if (!(curval & FUTEX_TID_MASK)) { +				force_take = 1;  				goto retry;  			}  		default: diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 6114a13419b..24f9247b7d0 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -11,5 +11,4 @@  extern struct key *modsign_keyring; -extern int mod_verify_sig(const void *mod, unsigned long modlen, -			  const void *sig, unsigned long siglen); +extern int mod_verify_sig(const void *mod, unsigned long *_modlen); diff --git a/kernel/module.c b/kernel/module.c index 0e2da8695f8..6e48c3a4359 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2293,12 +2293,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)  	src = (void *)info->hdr + symsect->sh_offset;  	nsrc = symsect->sh_size / sizeof(*src); +	/* strtab always starts with a nul, so offset 0 is the empty string. */ +	strtab_size = 1; +  	/* Compute total space required for the core symbols' strtab. */ -	for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) -		if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { -			strtab_size += strlen(&info->strtab[src->st_name]) + 1; +	for (ndst = i = 0; i < nsrc; i++) { +		if (i == 0 || +		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { +			strtab_size += strlen(&info->strtab[src[i].st_name])+1;  			ndst++;  		} +	}  	/* Append room for core symbols at end of core part. */  	info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); @@ -2332,15 +2337,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)  	mod->core_symtab = dst = mod->module_core + info->symoffs;  	mod->core_strtab = s = mod->module_core + info->stroffs;  	src = mod->symtab; -	*dst = *src;  	*s++ = 0; -	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { -		if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) -			continue; - -		dst[ndst] = *src; -		dst[ndst++].st_name = s - mod->core_strtab; -		s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; +	for (ndst = i = 0; i < mod->num_symtab; i++) { +		if (i == 0 || +		    is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { +			dst[ndst] = src[i]; +			dst[ndst++].st_name = s - mod->core_strtab; +			s += strlcpy(s, &mod->strtab[src[i].st_name], +				     KSYM_NAME_LEN) + 1; +		}  	}  	mod->core_num_syms = ndst;  } @@ -2421,25 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,  #ifdef CONFIG_MODULE_SIG  static int module_sig_check(struct load_info *info, -			    const void *mod, unsigned long *len) +			    const void *mod, unsigned long *_len)  {  	int err = -ENOKEY; -	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; -	const void *p = mod, *end = mod + *len; +	unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; +	unsigned long len = *_len; -	/* Poor man's memmem. */ -	while ((p = memchr(p, MODULE_SIG_STRING[0], end - p))) { -		if (p + markerlen > end) -			break; - -		if (memcmp(p, MODULE_SIG_STRING, markerlen) == 0) { -			const void *sig = p + markerlen; -			/* Truncate module up to signature. */ -			*len = p - mod; -			err = mod_verify_sig(mod, *len, sig, end - sig); -			break; -		} -		p++; +	if (len > markerlen && +	    memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { +		/* We truncate the module to discard the signature */ +		*_len -= markerlen; +		err = mod_verify_sig(mod, _len);  	}  	if (!err) { diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 6b09f6983ac..ea1b1df5dbb 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -183,27 +183,33 @@ static struct key *request_asymmetric_key(const char *signer, size_t signer_len,  /*   * Verify the signature on a module.   */ -int mod_verify_sig(const void *mod, unsigned long modlen, -		   const void *sig, unsigned long siglen) +int mod_verify_sig(const void *mod, unsigned long *_modlen)  {  	struct public_key_signature *pks;  	struct module_signature ms;  	struct key *key; -	size_t sig_len; +	const void *sig; +	size_t modlen = *_modlen, sig_len;  	int ret; -	pr_devel("==>%s(,%lu,,%lu,)\n", __func__, modlen, siglen); +	pr_devel("==>%s(,%zu)\n", __func__, modlen); -	if (siglen <= sizeof(ms)) +	if (modlen <= sizeof(ms))  		return -EBADMSG; -	memcpy(&ms, sig + (siglen - sizeof(ms)), sizeof(ms)); -	siglen -= sizeof(ms); +	memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); +	modlen -= sizeof(ms);  	sig_len = be32_to_cpu(ms.sig_len); -	if (sig_len >= siglen || -	    siglen - sig_len != (size_t)ms.signer_len + ms.key_id_len) +	if (sig_len >= modlen)  		return -EBADMSG; +	modlen -= sig_len; +	if ((size_t)ms.signer_len + ms.key_id_len >= modlen) +		return -EBADMSG; +	modlen -= (size_t)ms.signer_len + ms.key_id_len; + +	*_modlen = modlen; +	sig = mod + modlen;  	/* For the moment, only support RSA and X.509 identifiers */  	if (ms.algo != PKEY_ALGO_RSA || diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 478bad2745e..7b07cc0dfb7 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -71,12 +71,22 @@ err_alloc:  	return NULL;  } +/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */ +#define MAX_PID_NS_LEVEL 32 +  static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_pid_ns)  {  	struct pid_namespace *ns;  	unsigned int level = parent_pid_ns->level + 1; -	int i, err = -ENOMEM; +	int i; +	int err; + +	if (level > MAX_PID_NS_LEVEL) { +		err = -EINVAL; +		goto out; +	} +	err = -ENOMEM;  	ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);  	if (ns == NULL)  		goto out; @@ -133,19 +143,26 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old  	return create_pid_namespace(old_ns);  } -void free_pid_ns(struct kref *kref) +static void free_pid_ns(struct kref *kref)  { -	struct pid_namespace *ns, *parent; +	struct pid_namespace *ns;  	ns = container_of(kref, struct pid_namespace, kref); - -	parent = ns->parent;  	destroy_pid_namespace(ns); +} + +void put_pid_ns(struct pid_namespace *ns) +{ +	struct pid_namespace *parent; -	if (parent != NULL) -		put_pid_ns(parent); +	while (ns != &init_pid_ns) { +		parent = ns->parent; +		if (!kref_put(&ns->kref, free_pid_ns)) +			break; +		ns = parent; +	}  } -EXPORT_SYMBOL_GPL(free_pid_ns); +EXPORT_SYMBOL_GPL(put_pid_ns);  void zap_pid_ns_processes(struct pid_namespace *pid_ns)  { diff --git a/kernel/printk.c b/kernel/printk.c index 66a2ea37b57..2d607f4d179 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1890,7 +1890,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,  	switch (action) {  	case CPU_ONLINE:  	case CPU_DEAD: -	case CPU_DYING:  	case CPU_DOWN_FAILED:  	case CPU_UP_CANCELED:  		console_lock(); diff --git a/kernel/sys.c b/kernel/sys.c index c5cb5b99cb8..e6e0ece5f6a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1265,15 +1265,16 @@ DECLARE_RWSEM(uts_sem);   * Work around broken programs that cannot handle "Linux 3.0".   * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40   */ -static int override_release(char __user *release, int len) +static int override_release(char __user *release, size_t len)  {  	int ret = 0; -	char buf[65];  	if (current->personality & UNAME26) { -		char *rest = UTS_RELEASE; +		const char *rest = UTS_RELEASE; +		char buf[65] = { 0 };  		int ndots = 0;  		unsigned v; +		size_t copy;  		while (*rest) {  			if (*rest == '.' && ++ndots >= 3) @@ -1283,8 +1284,9 @@ static int override_release(char __user *release, int len)  			rest++;  		}  		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; -		snprintf(buf, len, "2.6.%u%s", v, rest); -		ret = copy_to_user(release, buf, len); +		copy = clamp_t(size_t, len, 1, sizeof(buf)); +		copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); +		ret = copy_to_user(release, buf, copy + 1);  	}  	return ret;  } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b32ed0e385a..b979426d16c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1567,6 +1567,10 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,  		put_online_cpus();  	} else { +		/* Make sure this CPU has been intitialized */ +		if (!cpumask_test_cpu(cpu_id, buffer->cpumask)) +			goto out; +  		cpu_buffer = buffer->buffers[cpu_id];  		if (nr_pages == cpu_buffer->nr_pages) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d951daa0ca9..042d221d33c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2982,7 +2982,7 @@ bool cancel_delayed_work(struct delayed_work *dwork)  	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));  	local_irq_restore(flags); -	return true; +	return ret;  }  EXPORT_SYMBOL(cancel_delayed_work);  |