diff options
Diffstat (limited to 'fs/file_table.c')
| -rw-r--r-- | fs/file_table.c | 96 | 
1 files changed, 78 insertions, 18 deletions
diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0fd6ae..b3fc4d67a26 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -23,6 +23,8 @@  #include <linux/lglock.h>  #include <linux/percpu_counter.h>  #include <linux/percpu.h> +#include <linux/hardirq.h> +#include <linux/task_work.h>  #include <linux/ima.h>  #include <linux/atomic.h> @@ -34,7 +36,6 @@ struct files_stat_struct files_stat = {  	.max_files = NR_FILE  }; -DECLARE_LGLOCK(files_lglock);  DEFINE_LGLOCK(files_lglock);  /* SLAB cache for file structures */ @@ -252,7 +253,6 @@ static void __fput(struct file *file)  	}  	fops_put(file->f_op);  	put_pid(file->f_owner.pid); -	file_sb_list_del(file);  	if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)  		i_readcount_dec(inode);  	if (file->f_mode & FMODE_WRITE) @@ -264,10 +264,77 @@ static void __fput(struct file *file)  	mntput(mnt);  } +static DEFINE_SPINLOCK(delayed_fput_lock); +static LIST_HEAD(delayed_fput_list); +static void delayed_fput(struct work_struct *unused) +{ +	LIST_HEAD(head); +	spin_lock_irq(&delayed_fput_lock); +	list_splice_init(&delayed_fput_list, &head); +	spin_unlock_irq(&delayed_fput_lock); +	while (!list_empty(&head)) { +		struct file *f = list_first_entry(&head, struct file, f_u.fu_list); +		list_del_init(&f->f_u.fu_list); +		__fput(f); +	} +} + +static void ____fput(struct callback_head *work) +{ +	__fput(container_of(work, struct file, f_u.fu_rcuhead)); +} + +/* + * If kernel thread really needs to have the final fput() it has done + * to complete, call this.  The only user right now is the boot - we + * *do* need to make sure our writes to binaries on initramfs has + * not left us with opened struct file waiting for __fput() - execve() + * won't work without that.  Please, don't add more callers without + * very good reasons; in particular, never call that with locks + * held and never call that from a thread that might need to do + * some work on any kind of umount. + */ +void flush_delayed_fput(void) +{ +	delayed_fput(NULL); +} + +static DECLARE_WORK(delayed_fput_work, delayed_fput); +  void fput(struct file *file)  { -	if (atomic_long_dec_and_test(&file->f_count)) +	if (atomic_long_dec_and_test(&file->f_count)) { +		struct task_struct *task = current; +		file_sb_list_del(file); +		if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { +			unsigned long flags; +			spin_lock_irqsave(&delayed_fput_lock, flags); +			list_add(&file->f_u.fu_list, &delayed_fput_list); +			schedule_work(&delayed_fput_work); +			spin_unlock_irqrestore(&delayed_fput_lock, flags); +			return; +		} +		init_task_work(&file->f_u.fu_rcuhead, ____fput); +		task_work_add(task, &file->f_u.fu_rcuhead, true); +	} +} + +/* + * synchronous analog of fput(); for kernel threads that might be needed + * in some umount() (and thus can't use flush_delayed_fput() without + * risking deadlocks), need to wait for completion of __fput() and know + * for this specific struct file it won't involve anything that would + * need them.  Use only if you really need it - at the very least, + * don't blindly convert fput() by kernel thread to that. + */ +void __fput_sync(struct file *file) +{ +	if (atomic_long_dec_and_test(&file->f_count)) { +		struct task_struct *task = current; +		file_sb_list_del(file); +		BUG_ON(!(task->flags & PF_KTHREAD));  		__fput(file); +	}  }  EXPORT_SYMBOL(fput); @@ -421,9 +488,9 @@ static inline void __file_sb_list_add(struct file *file, struct super_block *sb)   */  void file_sb_list_add(struct file *file, struct super_block *sb)  { -	lg_local_lock(files_lglock); +	lg_local_lock(&files_lglock);  	__file_sb_list_add(file, sb); -	lg_local_unlock(files_lglock); +	lg_local_unlock(&files_lglock);  }  /** @@ -436,9 +503,9 @@ void file_sb_list_add(struct file *file, struct super_block *sb)  void file_sb_list_del(struct file *file)  {  	if (!list_empty(&file->f_u.fu_list)) { -		lg_local_lock_cpu(files_lglock, file_list_cpu(file)); +		lg_local_lock_cpu(&files_lglock, file_list_cpu(file));  		list_del_init(&file->f_u.fu_list); -		lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); +		lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));  	}  } @@ -484,10 +551,8 @@ void mark_files_ro(struct super_block *sb)  {  	struct file *f; -retry: -	lg_global_lock(files_lglock); +	lg_global_lock(&files_lglock);  	do_file_list_for_each_entry(sb, f) { -		struct vfsmount *mnt;  		if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))  		       continue;  		if (!file_count(f)) @@ -500,14 +565,9 @@ retry:  		if (file_check_writeable(f) != 0)  			continue;  		file_release_write(f); -		mnt = mntget(f->f_path.mnt); -		/* This can sleep, so we can't hold the spinlock. */ -		lg_global_unlock(files_lglock); -		mnt_drop_write(mnt); -		mntput(mnt); -		goto retry; +		mnt_drop_write_file(f);  	} while_file_list_for_each_entry; -	lg_global_unlock(files_lglock); +	lg_global_unlock(&files_lglock);  }  void __init files_init(unsigned long mempages) @@ -525,6 +585,6 @@ void __init files_init(unsigned long mempages)  	n = (mempages * (PAGE_SIZE / 1024)) / 10;  	files_stat.max_files = max_t(unsigned long, n, NR_FILE);  	files_defer_init(); -	lg_lock_init(files_lglock); +	lg_lock_init(&files_lglock, "files_lglock");  	percpu_counter_init(&nr_files, 0);  }   |