diff options
Diffstat (limited to 'fs/namespace.c')
| -rw-r--r-- | fs/namespace.c | 211 | 
1 files changed, 175 insertions, 36 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 24960626bb6..c1bbe86f492 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -12,6 +12,7 @@  #include <linux/export.h>  #include <linux/capability.h>  #include <linux/mnt_namespace.h> +#include <linux/user_namespace.h>  #include <linux/namei.h>  #include <linux/security.h>  #include <linux/idr.h> @@ -20,6 +21,7 @@  #include <linux/fs_struct.h>	/* get_fs_root et.al. */  #include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */  #include <linux/uaccess.h> +#include <linux/proc_fs.h>  #include "pnode.h"  #include "internal.h" @@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,  	if (!mnt)  		return ERR_PTR(-ENOMEM); -	if (flag & (CL_SLAVE | CL_PRIVATE)) +	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))  		mnt->mnt_group_id = 0; /* not a peer of original */  	else  		mnt->mnt_group_id = old->mnt_group_id; @@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,  	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);  	br_write_unlock(&vfsmount_lock); -	if (flag & CL_SLAVE) { +	if ((flag & CL_SLAVE) || +	    ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {  		list_add(&mnt->mnt_slave, &old->mnt_slave_list);  		mnt->mnt_master = old;  		CLEAR_MNT_SHARED(mnt); @@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)  		goto dput_and_out;  	retval = -EPERM; -	if (!capable(CAP_SYS_ADMIN)) +	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))  		goto dput_and_out;  	retval = do_umount(mnt, flags); @@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)  static int mount_is_safe(struct path *path)  { -	if (capable(CAP_SYS_ADMIN)) +	if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))  		return 0;  	return -EPERM;  #ifdef notyet @@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path)  #endif  } +static bool mnt_ns_loop(struct path *path) +{ +	/* Could bind mounting the mount namespace inode cause a +	 * mount namespace loop? +	 */ +	struct inode *inode = path->dentry->d_inode; +	struct proc_inode *ei; +	struct mnt_namespace *mnt_ns; + +	if (!proc_ns_inode(inode)) +		return false; + +	ei = PROC_I(inode); +	if (ei->ns_ops != &mntns_operations) +		return false; + +	mnt_ns = ei->ns; +	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; +} +  struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,  					int flag)  { @@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag)  	int type;  	int err = 0; -	if (!capable(CAP_SYS_ADMIN)) +	if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))  		return -EPERM;  	if (path->dentry != path->mnt->mnt_root) @@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name,  	if (err)  		return err; +	err = -EINVAL; +	if (mnt_ns_loop(&old_path)) +		goto out;  +  	err = lock_mount(path);  	if (err)  		goto out; @@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name)  	struct mount *p;  	struct mount *old;  	int err = 0; -	if (!capable(CAP_SYS_ADMIN)) +	if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN))  		return -EPERM;  	if (!old_name || !*old_name)  		return -EINVAL; @@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)  	return ERR_PTR(err);  } -static struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) -{ -	struct file_system_type *type = get_fs_type(fstype); -	struct vfsmount *mnt; -	if (!type) -		return ERR_PTR(-ENODEV); -	mnt = vfs_kern_mount(type, flags, name, data); -	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && -	    !mnt->mnt_sb->s_subtype) -		mnt = fs_set_subtype(mnt, fstype); -	put_filesystem(type); -	return mnt; -} -  /*   * add a mount into a namespace's mount tree   */ @@ -1917,20 +1929,46 @@ unlock:   * create a new mount for userspace and request it to be added into the   * namespace's tree   */ -static int do_new_mount(struct path *path, const char *type, int flags, +static int do_new_mount(struct path *path, const char *fstype, int flags,  			int mnt_flags, const char *name, void *data)  { +	struct file_system_type *type; +	struct user_namespace *user_ns;  	struct vfsmount *mnt;  	int err; -	if (!type) +	if (!fstype)  		return -EINVAL;  	/* we need capabilities... */ -	if (!capable(CAP_SYS_ADMIN)) +	user_ns = real_mount(path->mnt)->mnt_ns->user_ns; +	if (!ns_capable(user_ns, CAP_SYS_ADMIN))  		return -EPERM; -	mnt = do_kern_mount(type, flags, name, data); +	type = get_fs_type(fstype); +	if (!type) +		return -ENODEV; + +	if (user_ns != &init_user_ns) { +		if (!(type->fs_flags & FS_USERNS_MOUNT)) { +			put_filesystem(type); +			return -EPERM; +		} +		/* Only in special cases allow devices from mounts +		 * created outside the initial user namespace. +		 */ +		if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { +			flags |= MS_NODEV; +			mnt_flags |= MNT_NODEV; +		} +	} + +	mnt = vfs_kern_mount(type, flags, name, data); +	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && +	    !mnt->mnt_sb->s_subtype) +		mnt = fs_set_subtype(mnt, fstype); + +	put_filesystem(type);  	if (IS_ERR(mnt))  		return PTR_ERR(mnt); @@ -2261,18 +2299,42 @@ dput_out:  	return retval;  } -static struct mnt_namespace *alloc_mnt_ns(void) +static void free_mnt_ns(struct mnt_namespace *ns) +{ +	proc_free_inum(ns->proc_inum); +	put_user_ns(ns->user_ns); +	kfree(ns); +} + +/* + * Assign a sequence number so we can detect when we attempt to bind + * mount a reference to an older mount namespace into the current + * mount namespace, preventing reference counting loops.  A 64bit + * number incrementing at 10Ghz will take 12,427 years to wrap which + * is effectively never, so we can ignore the possibility. + */ +static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); + +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)  {  	struct mnt_namespace *new_ns; +	int ret;  	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);  	if (!new_ns)  		return ERR_PTR(-ENOMEM); +	ret = proc_alloc_inum(&new_ns->proc_inum); +	if (ret) { +		kfree(new_ns); +		return ERR_PTR(ret); +	} +	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);  	atomic_set(&new_ns->count, 1);  	new_ns->root = NULL;  	INIT_LIST_HEAD(&new_ns->list);  	init_waitqueue_head(&new_ns->poll);  	new_ns->event = 0; +	new_ns->user_ns = get_user_ns(user_ns);  	return new_ns;  } @@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void)   * copied from the namespace of the passed in task structure.   */  static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, -		struct fs_struct *fs) +		struct user_namespace *user_ns, struct fs_struct *fs)  {  	struct mnt_namespace *new_ns;  	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;  	struct mount *p, *q;  	struct mount *old = mnt_ns->root;  	struct mount *new; +	int copy_flags; -	new_ns = alloc_mnt_ns(); +	new_ns = alloc_mnt_ns(user_ns);  	if (IS_ERR(new_ns))  		return new_ns;  	down_write(&namespace_sem);  	/* First pass: copy the tree topology */ -	new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); +	copy_flags = CL_COPY_ALL | CL_EXPIRE; +	if (user_ns != mnt_ns->user_ns) +		copy_flags |= CL_SHARED_TO_SLAVE; +	new = copy_tree(old, old->mnt.mnt_root, copy_flags);  	if (IS_ERR(new)) {  		up_write(&namespace_sem); -		kfree(new_ns); +		free_mnt_ns(new_ns);  		return ERR_CAST(new);  	}  	new_ns->root = new; @@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,  }  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, -		struct fs_struct *new_fs) +		struct user_namespace *user_ns, struct fs_struct *new_fs)  {  	struct mnt_namespace *new_ns; @@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,  	if (!(flags & CLONE_NEWNS))  		return ns; -	new_ns = dup_mnt_ns(ns, new_fs); +	new_ns = dup_mnt_ns(ns, user_ns, new_fs);  	put_mnt_ns(ns);  	return new_ns; @@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,   */  static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)  { -	struct mnt_namespace *new_ns = alloc_mnt_ns(); +	struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns);  	if (!IS_ERR(new_ns)) {  		struct mount *mnt = real_mount(m);  		mnt->mnt_ns = new_ns; @@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,  	struct mount *new_mnt, *root_mnt;  	int error; -	if (!capable(CAP_SYS_ADMIN)) +	if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))  		return -EPERM;  	error = user_path_dir(new_root, &new); @@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void)  	struct vfsmount *mnt;  	struct mnt_namespace *ns;  	struct path root; +	struct file_system_type *type; -	mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); +	type = get_fs_type("rootfs"); +	if (!type) +		panic("Can't find rootfs type"); +	mnt = vfs_kern_mount(type, 0, "rootfs", NULL); +	put_filesystem(type);  	if (IS_ERR(mnt))  		panic("Can't create rootfs"); @@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns)  	br_write_unlock(&vfsmount_lock);  	up_write(&namespace_sem);  	release_mounts(&umount_list); -	kfree(ns); +	free_mnt_ns(ns);  }  struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) @@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt)  {  	return check_mnt(real_mount(mnt));  } + +static void *mntns_get(struct task_struct *task) +{ +	struct mnt_namespace *ns = NULL; +	struct nsproxy *nsproxy; + +	rcu_read_lock(); +	nsproxy = task_nsproxy(task); +	if (nsproxy) { +		ns = nsproxy->mnt_ns; +		get_mnt_ns(ns); +	} +	rcu_read_unlock(); + +	return ns; +} + +static void mntns_put(void *ns) +{ +	put_mnt_ns(ns); +} + +static int mntns_install(struct nsproxy *nsproxy, void *ns) +{ +	struct fs_struct *fs = current->fs; +	struct mnt_namespace *mnt_ns = ns; +	struct path root; + +	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || +	    !nsown_capable(CAP_SYS_CHROOT)) +		return -EPERM; + +	if (fs->users != 1) +		return -EINVAL; + +	get_mnt_ns(mnt_ns); +	put_mnt_ns(nsproxy->mnt_ns); +	nsproxy->mnt_ns = mnt_ns; + +	/* Find the root */ +	root.mnt    = &mnt_ns->root->mnt; +	root.dentry = mnt_ns->root->mnt.mnt_root; +	path_get(&root); +	while(d_mountpoint(root.dentry) && follow_down_one(&root)) +		; + +	/* Update the pwd and root */ +	set_fs_pwd(fs, &root); +	set_fs_root(fs, &root); + +	path_put(&root); +	return 0; +} + +static unsigned int mntns_inum(void *ns) +{ +	struct mnt_namespace *mnt_ns = ns; +	return mnt_ns->proc_inum; +} + +const struct proc_ns_operations mntns_operations = { +	.name		= "mnt", +	.type		= CLONE_NEWNS, +	.get		= mntns_get, +	.put		= mntns_put, +	.install	= mntns_install, +	.inum		= mntns_inum, +};  |