diff options
| author | Cyrill Gorcunov <gorcunov@openvz.org> | 2012-05-31 16:26:46 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 17:49:32 -0700 | 
| commit | b32dfe377102ce668775f8b6b1461f7ad428f8b6 (patch) | |
| tree | f89be6bd34eb757c471f3ca506e0ce92224f9bc5 /kernel/sys.c | |
| parent | fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 (diff) | |
| download | olio-linux-3.10-b32dfe377102ce668775f8b6b1461f7ad428f8b6.tar.xz olio-linux-3.10-b32dfe377102ce668775f8b6b1461f7ad428f8b6.zip  | |
c/r: prctl: add ability to set new mm_struct::exe_file
When we do restore we would like to have a way to setup a former
mm_struct::exe_file so that /proc/pid/exe would point to the original
executable file a process had at checkpoint time.
For this the PR_SET_MM_EXE_FILE code is introduced.  This option takes a
file descriptor which will be set as a source for new /proc/$pid/exe
symlink.
Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE
vmas present for current process, simply because this feature is a special
to C/R and mm::num_exe_file_vmas become meaningless after that.
To minimize the amount of transition the /proc/pid/exe symlink might have,
this feature is implemented in one-shot manner.  Thus once changed the
symlink can't be changed again.  This should help sysadmins to monitor the
symlinks over all process running in a system.
In particular one could make a snapshot of processes and ring alarm if
there unexpected changes of /proc/pid/exe's in a system.
Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and
the caller must have CAP_SYS_RESOURCE capability granted, otherwise the
request to change symlink will be rejected.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/sys.c')
| -rw-r--r-- | kernel/sys.c | 56 | 
1 files changed, 56 insertions, 0 deletions
diff --git a/kernel/sys.c b/kernel/sys.c index 8b544972e46..9ff89cb9657 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -36,6 +36,8 @@  #include <linux/personality.h>  #include <linux/ptrace.h>  #include <linux/fs_struct.h> +#include <linux/file.h> +#include <linux/mount.h>  #include <linux/gfp.h>  #include <linux/syscore_ops.h>  #include <linux/version.h> @@ -1792,6 +1794,57 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma,  		(vma->vm_flags & banned);  } +static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) +{ +	struct file *exe_file; +	struct dentry *dentry; +	int err; + +	/* +	 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's +	 * remain. So perform a quick test first. +	 */ +	if (mm->num_exe_file_vmas) +		return -EBUSY; + +	exe_file = fget(fd); +	if (!exe_file) +		return -EBADF; + +	dentry = exe_file->f_path.dentry; + +	/* +	 * Because the original mm->exe_file points to executable file, make +	 * sure that this one is executable as well, to avoid breaking an +	 * overall picture. +	 */ +	err = -EACCES; +	if (!S_ISREG(dentry->d_inode->i_mode)	|| +	    exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) +		goto exit; + +	err = inode_permission(dentry->d_inode, MAY_EXEC); +	if (err) +		goto exit; + +	/* +	 * The symlink can be changed only once, just to disallow arbitrary +	 * transitions malicious software might bring in. This means one +	 * could make a snapshot over all processes running and monitor +	 * /proc/pid/exe changes to notice unusual activity if needed. +	 */ +	down_write(&mm->mmap_sem); +	if (likely(!mm->exe_file)) +		set_mm_exe_file(mm, exe_file); +	else +		err = -EBUSY; +	up_write(&mm->mmap_sem); + +exit: +	fput(exe_file); +	return err; +} +  static int prctl_set_mm(int opt, unsigned long addr,  			unsigned long arg4, unsigned long arg5)  { @@ -1806,6 +1859,9 @@ static int prctl_set_mm(int opt, unsigned long addr,  	if (!capable(CAP_SYS_RESOURCE))  		return -EPERM; +	if (opt == PR_SET_MM_EXE_FILE) +		return prctl_set_mm_exe_file(mm, (unsigned int)addr); +  	if (addr >= TASK_SIZE)  		return -EINVAL;  |