diff options
Diffstat (limited to 'fs/file.c')
| -rw-r--r-- | fs/file.c | 175 | 
1 files changed, 171 insertions, 4 deletions
diff --git a/fs/file.c b/fs/file.c index 5110acb1c9e..7b3887e054d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,6 +12,7 @@  #include <linux/slab.h>  #include <linux/vmalloc.h>  #include <linux/file.h> +#include <linux/fdtable.h>  #include <linux/bitops.h>  #include <linux/interrupt.h>  #include <linux/spinlock.h> @@ -25,6 +26,8 @@ struct fdtable_defer {  };  int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */  /*   * We use this list to defer free fdtables that have vmalloced @@ -118,8 +121,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)  	unsigned int cpy, set;  	BUG_ON(nfdt->max_fds < ofdt->max_fds); -	if (ofdt->max_fds == 0) -		return;  	cpy = ofdt->max_fds * sizeof(struct file *);  	set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)  	nr /= (1024 / sizeof(struct file *));  	nr = roundup_pow_of_two(nr + 1);  	nr *= (1024 / sizeof(struct file *)); -	if (nr > sysctl_nr_open) -		nr = sysctl_nr_open; +	/* +	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open +	 * had been set lower between the check in expand_files() and here.  Deal +	 * with that in caller, it's cheaper that way. +	 * +	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise +	 * bitmaps handling below becomes unpleasant, to put it mildly... +	 */ +	if (unlikely(nr > sysctl_nr_open)) +		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;  	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);  	if (!fdt) @@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)  	if (!new_fdt)  		return -ENOMEM;  	/* +	 * extremely unlikely race - sysctl_nr_open decreased between the check in +	 * caller and alloc_fdtable().  Cheaper to catch it here... +	 */ +	if (unlikely(new_fdt->max_fds <= nr)) { +		free_fdarr(new_fdt); +		free_fdset(new_fdt); +		kfree(new_fdt); +		return -EMFILE; +	} +	/*  	 * Check again since another task may have expanded the fd table while  	 * we dropped the lock  	 */ @@ -242,6 +261,139 @@ int expand_files(struct files_struct *files, int nr)  	return expand_fdtable(files, nr);  } +static int count_open_files(struct fdtable *fdt) +{ +	int size = fdt->max_fds; +	int i; + +	/* Find the last open fd */ +	for (i = size/(8*sizeof(long)); i > 0; ) { +		if (fdt->open_fds->fds_bits[--i]) +			break; +	} +	i = (i+1) * 8 * sizeof(long); +	return i; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ +	struct files_struct *newf; +	struct file **old_fds, **new_fds; +	int open_files, size, i; +	struct fdtable *old_fdt, *new_fdt; + +	*errorp = -ENOMEM; +	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); +	if (!newf) +		goto out; + +	atomic_set(&newf->count, 1); + +	spin_lock_init(&newf->file_lock); +	newf->next_fd = 0; +	new_fdt = &newf->fdtab; +	new_fdt->max_fds = NR_OPEN_DEFAULT; +	new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; +	new_fdt->open_fds = (fd_set *)&newf->open_fds_init; +	new_fdt->fd = &newf->fd_array[0]; +	INIT_RCU_HEAD(&new_fdt->rcu); +	new_fdt->next = NULL; + +	spin_lock(&oldf->file_lock); +	old_fdt = files_fdtable(oldf); +	open_files = count_open_files(old_fdt); + +	/* +	 * Check whether we need to allocate a larger fd array and fd set. +	 */ +	while (unlikely(open_files > new_fdt->max_fds)) { +		spin_unlock(&oldf->file_lock); + +		if (new_fdt != &newf->fdtab) { +			free_fdarr(new_fdt); +			free_fdset(new_fdt); +			kfree(new_fdt); +		} + +		new_fdt = alloc_fdtable(open_files - 1); +		if (!new_fdt) { +			*errorp = -ENOMEM; +			goto out_release; +		} + +		/* beyond sysctl_nr_open; nothing to do */ +		if (unlikely(new_fdt->max_fds < open_files)) { +			free_fdarr(new_fdt); +			free_fdset(new_fdt); +			kfree(new_fdt); +			*errorp = -EMFILE; +			goto out_release; +		} + +		/* +		 * Reacquire the oldf lock and a pointer to its fd table +		 * who knows it may have a new bigger fd table. We need +		 * the latest pointer. +		 */ +		spin_lock(&oldf->file_lock); +		old_fdt = files_fdtable(oldf); +		open_files = count_open_files(old_fdt); +	} + +	old_fds = old_fdt->fd; +	new_fds = new_fdt->fd; + +	memcpy(new_fdt->open_fds->fds_bits, +		old_fdt->open_fds->fds_bits, open_files/8); +	memcpy(new_fdt->close_on_exec->fds_bits, +		old_fdt->close_on_exec->fds_bits, open_files/8); + +	for (i = open_files; i != 0; i--) { +		struct file *f = *old_fds++; +		if (f) { +			get_file(f); +		} else { +			/* +			 * The fd may be claimed in the fd bitmap but not yet +			 * instantiated in the files array if a sibling thread +			 * is partway through open().  So make sure that this +			 * fd is available to the new process. +			 */ +			FD_CLR(open_files - i, new_fdt->open_fds); +		} +		rcu_assign_pointer(*new_fds++, f); +	} +	spin_unlock(&oldf->file_lock); + +	/* compute the remainder to be cleared */ +	size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + +	/* This is long word aligned thus could use a optimized version */ +	memset(new_fds, 0, size); + +	if (new_fdt->max_fds > open_files) { +		int left = (new_fdt->max_fds-open_files)/8; +		int start = open_files / (8 * sizeof(unsigned long)); + +		memset(&new_fdt->open_fds->fds_bits[start], 0, left); +		memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); +	} + +	rcu_assign_pointer(newf->fdt, new_fdt); + +	return newf; + +out_release: +	kmem_cache_free(files_cachep, newf); +out: +	return NULL; +} +  static void __devinit fdtable_defer_list_init(int cpu)  {  	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); @@ -255,4 +407,19 @@ void __init files_defer_init(void)  	int i;  	for_each_possible_cpu(i)  		fdtable_defer_list_init(i); +	sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & +			     -BITS_PER_LONG;  } + +struct files_struct init_files = { +	.count		= ATOMIC_INIT(1), +	.fdt		= &init_files.fdtab, +	.fdtab		= { +		.max_fds	= NR_OPEN_DEFAULT, +		.fd		= &init_files.fd_array[0], +		.close_on_exec	= (fd_set *)&init_files.close_on_exec_init, +		.open_fds	= (fd_set *)&init_files.open_fds_init, +		.rcu		= RCU_HEAD_INIT, +	}, +	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock), +};  |