diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 | 
| commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
| tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/proc | |
| download | olio-linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.xz olio-linux-3.10-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip  | |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/proc')
| -rw-r--r-- | fs/proc/Makefile | 14 | ||||
| -rw-r--r-- | fs/proc/array.c | 484 | ||||
| -rw-r--r-- | fs/proc/base.c | 2056 | ||||
| -rw-r--r-- | fs/proc/generic.c | 705 | ||||
| -rw-r--r-- | fs/proc/inode-alloc.txt | 14 | ||||
| -rw-r--r-- | fs/proc/inode.c | 218 | ||||
| -rw-r--r-- | fs/proc/internal.h | 48 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 404 | ||||
| -rw-r--r-- | fs/proc/kmsg.c | 55 | ||||
| -rw-r--r-- | fs/proc/mmu.c | 67 | ||||
| -rw-r--r-- | fs/proc/nommu.c | 135 | ||||
| -rw-r--r-- | fs/proc/proc_devtree.c | 165 | ||||
| -rw-r--r-- | fs/proc/proc_misc.c | 615 | ||||
| -rw-r--r-- | fs/proc/proc_tty.c | 242 | ||||
| -rw-r--r-- | fs/proc/root.c | 161 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 235 | ||||
| -rw-r--r-- | fs/proc/task_nommu.c | 164 | 
17 files changed, 5782 insertions, 0 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile new file mode 100644 index 00000000000..738b9b60293 --- /dev/null +++ b/fs/proc/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the Linux proc filesystem routines. +# + +obj-$(CONFIG_PROC_FS) += proc.o + +proc-y			:= nommu.o task_nommu.o +proc-$(CONFIG_MMU)	:= mmu.o task_mmu.o + +proc-y       += inode.o root.o base.o generic.o array.o \ +		kmsg.o proc_tty.o proc_misc.o + +proc-$(CONFIG_PROC_KCORE)	+= kcore.o +proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o diff --git a/fs/proc/array.c b/fs/proc/array.c new file mode 100644 index 00000000000..37668fe998a --- /dev/null +++ b/fs/proc/array.c @@ -0,0 +1,484 @@ +/* + *  linux/fs/proc/array.c + * + *  Copyright (C) 1992  by Linus Torvalds + *  based on ideas by Darren Senn + * + * Fixes: + * Michael. K. Johnson: stat,statm extensions. + *                      <johnsonm@stolaf.edu> + * + * Pauline Middelink :  Made cmdline,envline only break at '\0's, to + *                      make sure SET_PROCTITLE works. Also removed + *                      bad '!' which forced address recalculation for + *                      EVERY character on the current page. + *                      <middelin@polyware.iaf.nl> + * + * Danny ter Haar    :	added cpuinfo + *			<dth@cistron.nl> + * + * Alessandro Rubini :  profile extension. + *                      <rubini@ipvvis.unipv.it> + * + * Jeff Tranter      :  added BogoMips field to cpuinfo + *                      <Jeff_Tranter@Mitel.COM> + * + * Bruno Haible      :  remove 4K limit for the maps file + *			<haible@ma2s2.mathematik.uni-karlsruhe.de> + * + * Yves Arrouye      :  remove removal of trailing spaces in get_array. + *			<Yves.Arrouye@marin.fdn.fr> + * + * Jerome Forissier  :  added per-CPU time information to /proc/stat + *                      and /proc/<pid>/cpu extension + *                      <forissier@isia.cma.fr> + *			- Incorporation and non-SMP safe operation + *			of forissier patch in 2.1.78 by + *			Hans Marcus <crowbar@concepts.nl> + * + * aeb@cwi.nl        :  /proc/partitions + * + * + * Alan Cox	     :  security fixes. + *			<Alan.Cox@linux.org> + * + * Al Viro           :  safe handling of mm_struct + * + * Gerhard Wichert   :  added BIGMEM support + * Siemens AG           <Gerhard.Wichert@pdb.siemens.de> + * + * Al Viro & Jeff Garzik :  moved most of the thing into base.c and + *			 :  proc_misc.c. The rest may eventually go into + *			 :  base.c too. + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/highmem.h> +#include <linux/file.h> +#include <linux/times.h> +#include <linux/cpuset.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/processor.h> +#include "internal.h" + +/* Gcc optimizes away "strlen(x)" for constant x */ +#define ADDBUF(buffer, string) \ +do { memcpy(buffer, string, strlen(string)); \ +     buffer += strlen(string); } while (0) + +static inline char * task_name(struct task_struct *p, char * buf) +{ +	int i; +	char * name; +	char tcomm[sizeof(p->comm)]; + +	get_task_comm(tcomm, p); + +	ADDBUF(buf, "Name:\t"); +	name = tcomm; +	i = sizeof(tcomm); +	do { +		unsigned char c = *name; +		name++; +		i--; +		*buf = c; +		if (!c) +			break; +		if (c == '\\') { +			buf[1] = c; +			buf += 2; +			continue; +		} +		if (c == '\n') { +			buf[0] = '\\'; +			buf[1] = 'n'; +			buf += 2; +			continue; +		} +		buf++; +	} while (i); +	*buf = '\n'; +	return buf+1; +} + +/* + * The task state array is a strange "bitmap" of + * reasons to sleep. Thus "running" is zero, and + * you can test for combinations of others with + * simple bit tests. + */ +static const char *task_state_array[] = { +	"R (running)",		/*  0 */ +	"S (sleeping)",		/*  1 */ +	"D (disk sleep)",	/*  2 */ +	"T (stopped)",		/*  4 */ +	"T (tracing stop)",	/*  8 */ +	"Z (zombie)",		/* 16 */ +	"X (dead)"		/* 32 */ +}; + +static inline const char * get_task_state(struct task_struct *tsk) +{ +	unsigned int state = (tsk->state & (TASK_RUNNING | +					    TASK_INTERRUPTIBLE | +					    TASK_UNINTERRUPTIBLE | +					    TASK_STOPPED | +					    TASK_TRACED)) | +			(tsk->exit_state & (EXIT_ZOMBIE | +					    EXIT_DEAD)); +	const char **p = &task_state_array[0]; + +	while (state) { +		p++; +		state >>= 1; +	} +	return *p; +} + +static inline char * task_state(struct task_struct *p, char *buffer) +{ +	struct group_info *group_info; +	int g; + +	read_lock(&tasklist_lock); +	buffer += sprintf(buffer, +		"State:\t%s\n" +		"SleepAVG:\t%lu%%\n" +		"Tgid:\t%d\n" +		"Pid:\t%d\n" +		"PPid:\t%d\n" +		"TracerPid:\t%d\n" +		"Uid:\t%d\t%d\t%d\t%d\n" +		"Gid:\t%d\t%d\t%d\t%d\n", +		get_task_state(p), +		(p->sleep_avg/1024)*100/(1020000000/1024), +	       	p->tgid, +		p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, +		pid_alive(p) && p->ptrace ? p->parent->pid : 0, +		p->uid, p->euid, p->suid, p->fsuid, +		p->gid, p->egid, p->sgid, p->fsgid); +	read_unlock(&tasklist_lock); +	task_lock(p); +	buffer += sprintf(buffer, +		"FDSize:\t%d\n" +		"Groups:\t", +		p->files ? p->files->max_fds : 0); + +	group_info = p->group_info; +	get_group_info(group_info); +	task_unlock(p); + +	for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) +		buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); +	put_group_info(group_info); + +	buffer += sprintf(buffer, "\n"); +	return buffer; +} + +static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) +{ +	int i, len; + +	len = strlen(header); +	memcpy(buffer, header, len); +	buffer += len; + +	i = _NSIG; +	do { +		int x = 0; + +		i -= 4; +		if (sigismember(set, i+1)) x |= 1; +		if (sigismember(set, i+2)) x |= 2; +		if (sigismember(set, i+3)) x |= 4; +		if (sigismember(set, i+4)) x |= 8; +		*buffer++ = (x < 10 ? '0' : 'a' - 10) + x; +	} while (i >= 4); + +	*buffer++ = '\n'; +	*buffer = 0; +	return buffer; +} + +static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, +				    sigset_t *catch) +{ +	struct k_sigaction *k; +	int i; + +	k = p->sighand->action; +	for (i = 1; i <= _NSIG; ++i, ++k) { +		if (k->sa.sa_handler == SIG_IGN) +			sigaddset(ign, i); +		else if (k->sa.sa_handler != SIG_DFL) +			sigaddset(catch, i); +	} +} + +static inline char * task_sig(struct task_struct *p, char *buffer) +{ +	sigset_t pending, shpending, blocked, ignored, caught; +	int num_threads = 0; +	unsigned long qsize = 0; +	unsigned long qlim = 0; + +	sigemptyset(&pending); +	sigemptyset(&shpending); +	sigemptyset(&blocked); +	sigemptyset(&ignored); +	sigemptyset(&caught); + +	/* Gather all the data with the appropriate locks held */ +	read_lock(&tasklist_lock); +	if (p->sighand) { +		spin_lock_irq(&p->sighand->siglock); +		pending = p->pending.signal; +		shpending = p->signal->shared_pending.signal; +		blocked = p->blocked; +		collect_sigign_sigcatch(p, &ignored, &caught); +		num_threads = atomic_read(&p->signal->count); +		qsize = atomic_read(&p->user->sigpending); +		qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; +		spin_unlock_irq(&p->sighand->siglock); +	} +	read_unlock(&tasklist_lock); + +	buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); +	buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); + +	/* render them all */ +	buffer = render_sigset_t("SigPnd:\t", &pending, buffer); +	buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer); +	buffer = render_sigset_t("SigBlk:\t", &blocked, buffer); +	buffer = render_sigset_t("SigIgn:\t", &ignored, buffer); +	buffer = render_sigset_t("SigCgt:\t", &caught, buffer); + +	return buffer; +} + +static inline char *task_cap(struct task_struct *p, char *buffer) +{ +    return buffer + sprintf(buffer, "CapInh:\t%016x\n" +			    "CapPrm:\t%016x\n" +			    "CapEff:\t%016x\n", +			    cap_t(p->cap_inheritable), +			    cap_t(p->cap_permitted), +			    cap_t(p->cap_effective)); +} + +int proc_pid_status(struct task_struct *task, char * buffer) +{ +	char * orig = buffer; +	struct mm_struct *mm = get_task_mm(task); + +	buffer = task_name(task, buffer); +	buffer = task_state(task, buffer); +  +	if (mm) { +		buffer = task_mem(mm, buffer); +		mmput(mm); +	} +	buffer = task_sig(task, buffer); +	buffer = task_cap(task, buffer); +	buffer = cpuset_task_status_allowed(task, buffer); +#if defined(CONFIG_ARCH_S390) +	buffer = task_show_regs(task, buffer); +#endif +	return buffer - orig; +} + +static int do_task_stat(struct task_struct *task, char * buffer, int whole) +{ +	unsigned long vsize, eip, esp, wchan = ~0UL; +	long priority, nice; +	int tty_pgrp = -1, tty_nr = 0; +	sigset_t sigign, sigcatch; +	char state; +	int res; + 	pid_t ppid, pgid = -1, sid = -1; +	int num_threads = 0; +	struct mm_struct *mm; +	unsigned long long start_time; +	unsigned long cmin_flt = 0, cmaj_flt = 0; +	unsigned long  min_flt = 0,  maj_flt = 0; +	cputime_t cutime, cstime, utime, stime; +	unsigned long rsslim = 0; +	unsigned long it_real_value = 0; +	struct task_struct *t; +	char tcomm[sizeof(task->comm)]; + +	state = *get_task_state(task); +	vsize = eip = esp = 0; +	mm = get_task_mm(task); +	if (mm) { +		vsize = task_vsize(mm); +		eip = KSTK_EIP(task); +		esp = KSTK_ESP(task); +	} + +	get_task_comm(tcomm, task); + +	sigemptyset(&sigign); +	sigemptyset(&sigcatch); +	cutime = cstime = utime = stime = cputime_zero; +	read_lock(&tasklist_lock); +	if (task->sighand) { +		spin_lock_irq(&task->sighand->siglock); +		num_threads = atomic_read(&task->signal->count); +		collect_sigign_sigcatch(task, &sigign, &sigcatch); + +		/* add up live thread stats at the group level */ +		if (whole) { +			t = task; +			do { +				min_flt += t->min_flt; +				maj_flt += t->maj_flt; +				utime = cputime_add(utime, t->utime); +				stime = cputime_add(stime, t->stime); +				t = next_thread(t); +			} while (t != task); +		} + +		spin_unlock_irq(&task->sighand->siglock); +	} +	if (task->signal) { +		if (task->signal->tty) { +			tty_pgrp = task->signal->tty->pgrp; +			tty_nr = new_encode_dev(tty_devnum(task->signal->tty)); +		} +		pgid = process_group(task); +		sid = task->signal->session; +		cmin_flt = task->signal->cmin_flt; +		cmaj_flt = task->signal->cmaj_flt; +		cutime = task->signal->cutime; +		cstime = task->signal->cstime; +		rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur; +		if (whole) { +			min_flt += task->signal->min_flt; +			maj_flt += task->signal->maj_flt; +			utime = cputime_add(utime, task->signal->utime); +			stime = cputime_add(stime, task->signal->stime); +		} +		it_real_value = task->signal->it_real_value; +	} +	ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; +	read_unlock(&tasklist_lock); + +	if (!whole || num_threads<2) +		wchan = get_wchan(task); +	if (!whole) { +		min_flt = task->min_flt; +		maj_flt = task->maj_flt; +		utime = task->utime; +		stime = task->stime; +	} + +	/* scale priority and nice values from timeslices to -20..20 */ +	/* to make it look like a "normal" Unix priority/nice value  */ +	priority = task_prio(task); +	nice = task_nice(task); + +	/* Temporary variable needed for gcc-2.96 */ +	/* convert timespec -> nsec*/ +	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC +				+ task->start_time.tv_nsec; +	/* convert nsec -> ticks */ +	start_time = nsec_to_clock_t(start_time); + +	res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ +%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", +		task->pid, +		tcomm, +		state, +		ppid, +		pgid, +		sid, +		tty_nr, +		tty_pgrp, +		task->flags, +		min_flt, +		cmin_flt, +		maj_flt, +		cmaj_flt, +		cputime_to_clock_t(utime), +		cputime_to_clock_t(stime), +		cputime_to_clock_t(cutime), +		cputime_to_clock_t(cstime), +		priority, +		nice, +		num_threads, +		jiffies_to_clock_t(it_real_value), +		start_time, +		vsize, +		mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */ +	        rsslim, +		mm ? mm->start_code : 0, +		mm ? mm->end_code : 0, +		mm ? mm->start_stack : 0, +		esp, +		eip, +		/* The signal information here is obsolete. +		 * It must be decimal for Linux 2.0 compatibility. +		 * Use /proc/#/status for real-time signals. +		 */ +		task->pending.signal.sig[0] & 0x7fffffffUL, +		task->blocked.sig[0] & 0x7fffffffUL, +		sigign      .sig[0] & 0x7fffffffUL, +		sigcatch    .sig[0] & 0x7fffffffUL, +		wchan, +		0UL, +		0UL, +		task->exit_signal, +		task_cpu(task), +		task->rt_priority, +		task->policy); +	if(mm) +		mmput(mm); +	return res; +} + +int proc_tid_stat(struct task_struct *task, char * buffer) +{ +	return do_task_stat(task, buffer, 0); +} + +int proc_tgid_stat(struct task_struct *task, char * buffer) +{ +	return do_task_stat(task, buffer, 1); +} + +int proc_pid_statm(struct task_struct *task, char *buffer) +{ +	int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; +	struct mm_struct *mm = get_task_mm(task); +	 +	if (mm) { +		size = task_statm(mm, &shared, &text, &data, &resident); +		mmput(mm); +	} + +	return sprintf(buffer,"%d %d %d %d %d %d %d\n", +		       size, resident, shared, text, lib, data, 0); +} diff --git a/fs/proc/base.c b/fs/proc/base.c new file mode 100644 index 00000000000..dad8ea4e00a --- /dev/null +++ b/fs/proc/base.c @@ -0,0 +1,2056 @@ +/* + *  linux/fs/proc/base.c + * + *  Copyright (C) 1991, 1992 Linus Torvalds + * + *  proc base directory handling functions + * + *  1999, Al Viro. Rewritten. Now it covers the whole per-process part. + *  Instead of using magical inumbers to determine the kind of object + *  we allocate and fill in-core inodes upon lookup. They don't even + *  go into icache. We cache the reference to task_struct upon lookup too. + *  Eventually it should become a filesystem in its own. We don't use the + *  rest of procfs anymore. + */ + +#include <asm/uaccess.h> + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/init.h> +#include <linux/file.h> +#include <linux/string.h> +#include <linux/seq_file.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/kallsyms.h> +#include <linux/mount.h> +#include <linux/security.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <linux/cpuset.h> +#include <linux/audit.h> +#include "internal.h" + +/* + * For hysterical raisins we keep the same inumbers as in the old procfs. + * Feel free to change the macro below - just keep the range distinct from + * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). + * As soon as we'll get a separate superblock we will be able to forget + * about magical ranges too. + */ + +#define fake_ino(pid,ino) (((pid)<<16)|(ino)) + +enum pid_directory_inos { +	PROC_TGID_INO = 2, +	PROC_TGID_TASK, +	PROC_TGID_STATUS, +	PROC_TGID_MEM, +#ifdef CONFIG_SECCOMP +	PROC_TGID_SECCOMP, +#endif +	PROC_TGID_CWD, +	PROC_TGID_ROOT, +	PROC_TGID_EXE, +	PROC_TGID_FD, +	PROC_TGID_ENVIRON, +	PROC_TGID_AUXV, +	PROC_TGID_CMDLINE, +	PROC_TGID_STAT, +	PROC_TGID_STATM, +	PROC_TGID_MAPS, +	PROC_TGID_MOUNTS, +	PROC_TGID_WCHAN, +#ifdef CONFIG_SCHEDSTATS +	PROC_TGID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS +	PROC_TGID_CPUSET, +#endif +#ifdef CONFIG_SECURITY +	PROC_TGID_ATTR, +	PROC_TGID_ATTR_CURRENT, +	PROC_TGID_ATTR_PREV, +	PROC_TGID_ATTR_EXEC, +	PROC_TGID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL +	PROC_TGID_LOGINUID, +#endif +	PROC_TGID_FD_DIR, +	PROC_TGID_OOM_SCORE, +	PROC_TGID_OOM_ADJUST, +	PROC_TID_INO, +	PROC_TID_STATUS, +	PROC_TID_MEM, +#ifdef CONFIG_SECCOMP +	PROC_TID_SECCOMP, +#endif +	PROC_TID_CWD, +	PROC_TID_ROOT, +	PROC_TID_EXE, +	PROC_TID_FD, +	PROC_TID_ENVIRON, +	PROC_TID_AUXV, +	PROC_TID_CMDLINE, +	PROC_TID_STAT, +	PROC_TID_STATM, +	PROC_TID_MAPS, +	PROC_TID_MOUNTS, +	PROC_TID_WCHAN, +#ifdef CONFIG_SCHEDSTATS +	PROC_TID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS +	PROC_TID_CPUSET, +#endif +#ifdef CONFIG_SECURITY +	PROC_TID_ATTR, +	PROC_TID_ATTR_CURRENT, +	PROC_TID_ATTR_PREV, +	PROC_TID_ATTR_EXEC, +	PROC_TID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL +	PROC_TID_LOGINUID, +#endif +	PROC_TID_FD_DIR = 0x8000,	/* 0x8000-0xffff */ +	PROC_TID_OOM_SCORE, +	PROC_TID_OOM_ADJUST, +}; + +struct pid_entry { +	int type; +	int len; +	char *name; +	mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct pid_entry tgid_base_stuff[] = { +	E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO), +	E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR), +	E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR), +	E(PROC_TGID_AUXV,      "auxv",	  S_IFREG|S_IRUSR), +	E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO), +	E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO), +	E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO), +	E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO), +	E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO), +	E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP +	E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif +	E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO), +	E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO), +	E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO), +	E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY +	E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS +	E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS +	E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS +	E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO), +#endif +	E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), +	E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL +	E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif +	{0,0,NULL,0} +}; +static struct pid_entry tid_base_stuff[] = { +	E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR), +	E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR), +	E(PROC_TID_AUXV,       "auxv",	  S_IFREG|S_IRUSR), +	E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO), +	E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO), +	E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO), +	E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO), +	E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO), +	E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP +	E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif +	E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO), +	E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO), +	E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO), +	E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY +	E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS +	E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS +	E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS +	E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO), +#endif +	E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO), +	E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL +	E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif +	{0,0,NULL,0} +}; + +#ifdef CONFIG_SECURITY +static struct pid_entry tgid_attr_stuff[] = { +	E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO), +	E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO), +	E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO), +	E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), +	{0,0,NULL,0} +}; +static struct pid_entry tid_attr_stuff[] = { +	E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO), +	E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO), +	E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO), +	E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO), +	{0,0,NULL,0} +}; +#endif + +#undef E + +static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ +	struct task_struct *task = proc_task(inode); +	struct files_struct *files; +	struct file *file; +	int fd = proc_type(inode) - PROC_TID_FD_DIR; + +	files = get_files_struct(task); +	if (files) { +		spin_lock(&files->file_lock); +		file = fcheck_files(files, fd); +		if (file) { +			*mnt = mntget(file->f_vfsmnt); +			*dentry = dget(file->f_dentry); +			spin_unlock(&files->file_lock); +			put_files_struct(files); +			return 0; +		} +		spin_unlock(&files->file_lock); +		put_files_struct(files); +	} +	return -ENOENT; +} + +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ +	struct fs_struct *fs; +	int result = -ENOENT; +	task_lock(proc_task(inode)); +	fs = proc_task(inode)->fs; +	if(fs) +		atomic_inc(&fs->count); +	task_unlock(proc_task(inode)); +	if (fs) { +		read_lock(&fs->lock); +		*mnt = mntget(fs->pwdmnt); +		*dentry = dget(fs->pwd); +		read_unlock(&fs->lock); +		result = 0; +		put_fs_struct(fs); +	} +	return result; +} + +static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ +	struct fs_struct *fs; +	int result = -ENOENT; +	task_lock(proc_task(inode)); +	fs = proc_task(inode)->fs; +	if(fs) +		atomic_inc(&fs->count); +	task_unlock(proc_task(inode)); +	if (fs) { +		read_lock(&fs->lock); +		*mnt = mntget(fs->rootmnt); +		*dentry = dget(fs->root); +		read_unlock(&fs->lock); +		result = 0; +		put_fs_struct(fs); +	} +	return result; +} + +#define MAY_PTRACE(task) \ +	(task == current || \ +	(task->parent == current && \ +	(task->ptrace & PT_PTRACED) && \ +	 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ +	 security_ptrace(current,task) == 0)) + +static int may_ptrace_attach(struct task_struct *task) +{ +	int retval = 0; + +	task_lock(task); + +	if (!task->mm) +		goto out; +	if (((current->uid != task->euid) || +	     (current->uid != task->suid) || +	     (current->uid != task->uid) || +	     (current->gid != task->egid) || +	     (current->gid != task->sgid) || +	     (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) +		goto out; +	rmb(); +	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) +		goto out; +	if (security_ptrace(current, task)) +		goto out; + +	retval = 1; +out: +	task_unlock(task); +	return retval; +} + +static int proc_pid_environ(struct task_struct *task, char * buffer) +{ +	int res = 0; +	struct mm_struct *mm = get_task_mm(task); +	if (mm) { +		unsigned int len = mm->env_end - mm->env_start; +		if (len > PAGE_SIZE) +			len = PAGE_SIZE; +		res = access_process_vm(task, mm->env_start, buffer, len, 0); +		if (!may_ptrace_attach(task)) +			res = -ESRCH; +		mmput(mm); +	} +	return res; +} + +static int proc_pid_cmdline(struct task_struct *task, char * buffer) +{ +	int res = 0; +	unsigned int len; +	struct mm_struct *mm = get_task_mm(task); +	if (!mm) +		goto out; +	if (!mm->arg_end) +		goto out_mm;	/* Shh! No looking before we're done */ + + 	len = mm->arg_end - mm->arg_start; +  +	if (len > PAGE_SIZE) +		len = PAGE_SIZE; +  +	res = access_process_vm(task, mm->arg_start, buffer, len, 0); + +	// If the nul at the end of args has been overwritten, then +	// assume application is using setproctitle(3). +	if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { +		len = strnlen(buffer, res); +		if (len < res) { +		    res = len; +		} else { +			len = mm->env_end - mm->env_start; +			if (len > PAGE_SIZE - res) +				len = PAGE_SIZE - res; +			res += access_process_vm(task, mm->env_start, buffer+res, len, 0); +			res = strnlen(buffer, res); +		} +	} +out_mm: +	mmput(mm); +out: +	return res; +} + +static int proc_pid_auxv(struct task_struct *task, char *buffer) +{ +	int res = 0; +	struct mm_struct *mm = get_task_mm(task); +	if (mm) { +		unsigned int nwords = 0; +		do +			nwords += 2; +		while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ +		res = nwords * sizeof(mm->saved_auxv[0]); +		if (res > PAGE_SIZE) +			res = PAGE_SIZE; +		memcpy(buffer, mm->saved_auxv, res); +		mmput(mm); +	} +	return res; +} + + +#ifdef CONFIG_KALLSYMS +/* + * Provides a wchan file via kallsyms in a proper one-value-per-file format. + * Returns the resolved symbol.  If that fails, simply return the address. + */ +static int proc_pid_wchan(struct task_struct *task, char *buffer) +{ +	char *modname; +	const char *sym_name; +	unsigned long wchan, size, offset; +	char namebuf[KSYM_NAME_LEN+1]; + +	wchan = get_wchan(task); + +	sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); +	if (sym_name) +		return sprintf(buffer, "%s", sym_name); +	return sprintf(buffer, "%lu", wchan); +} +#endif /* CONFIG_KALLSYMS */ + +#ifdef CONFIG_SCHEDSTATS +/* + * Provides /proc/PID/schedstat + */ +static int proc_pid_schedstat(struct task_struct *task, char *buffer) +{ +	return sprintf(buffer, "%lu %lu %lu\n", +			task->sched_info.cpu_time, +			task->sched_info.run_delay, +			task->sched_info.pcnt); +} +#endif + +/* The badness from the OOM killer */ +unsigned long badness(struct task_struct *p, unsigned long uptime); +static int proc_oom_score(struct task_struct *task, char *buffer) +{ +	unsigned long points; +	struct timespec uptime; + +	do_posix_clock_monotonic_gettime(&uptime); +	points = badness(task, uptime.tv_sec); +	return sprintf(buffer, "%lu\n", points); +} + +/************************************************************************/ +/*                       Here the fs part begins                        */ +/************************************************************************/ + +/* permission checks */ + +static int proc_check_root(struct inode *inode) +{ +	struct dentry *de, *base, *root; +	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; +	int res = 0; + +	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ +		return -ENOENT; +	read_lock(¤t->fs->lock); +	our_vfsmnt = mntget(current->fs->rootmnt); +	base = dget(current->fs->root); +	read_unlock(¤t->fs->lock); + +	spin_lock(&vfsmount_lock); +	de = root; +	mnt = vfsmnt; + +	while (vfsmnt != our_vfsmnt) { +		if (vfsmnt == vfsmnt->mnt_parent) +			goto out; +		de = vfsmnt->mnt_mountpoint; +		vfsmnt = vfsmnt->mnt_parent; +	} + +	if (!is_subdir(de, base)) +		goto out; +	spin_unlock(&vfsmount_lock); + +exit: +	dput(base); +	mntput(our_vfsmnt); +	dput(root); +	mntput(mnt); +	return res; +out: +	spin_unlock(&vfsmount_lock); +	res = -EACCES; +	goto exit; +} + +static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) +{ +	if (generic_permission(inode, mask, NULL) != 0) +		return -EACCES; +	return proc_check_root(inode); +} + +extern struct seq_operations proc_pid_maps_op; +static int maps_open(struct inode *inode, struct file *file) +{ +	struct task_struct *task = proc_task(inode); +	int ret = seq_open(file, &proc_pid_maps_op); +	if (!ret) { +		struct seq_file *m = file->private_data; +		m->private = task; +	} +	return ret; +} + +static struct file_operations proc_maps_operations = { +	.open		= maps_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +extern struct seq_operations mounts_op; +static int mounts_open(struct inode *inode, struct file *file) +{ +	struct task_struct *task = proc_task(inode); +	int ret = seq_open(file, &mounts_op); + +	if (!ret) { +		struct seq_file *m = file->private_data; +		struct namespace *namespace; +		task_lock(task); +		namespace = task->namespace; +		if (namespace) +			get_namespace(namespace); +		task_unlock(task); + +		if (namespace) +			m->private = namespace; +		else { +			seq_release(inode, file); +			ret = -EINVAL; +		} +	} +	return ret; +} + +static int mounts_release(struct inode *inode, struct file *file) +{ +	struct seq_file *m = file->private_data; +	struct namespace *namespace = m->private; +	put_namespace(namespace); +	return seq_release(inode, file); +} + +static struct file_operations proc_mounts_operations = { +	.open		= mounts_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= mounts_release, +}; + +#define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */ + +static ssize_t proc_info_read(struct file * file, char __user * buf, +			  size_t count, loff_t *ppos) +{ +	struct inode * inode = file->f_dentry->d_inode; +	unsigned long page; +	ssize_t length; +	struct task_struct *task = proc_task(inode); + +	if (count > PROC_BLOCK_SIZE) +		count = PROC_BLOCK_SIZE; +	if (!(page = __get_free_page(GFP_KERNEL))) +		return -ENOMEM; + +	length = PROC_I(inode)->op.proc_read(task, (char*)page); + +	if (length >= 0) +		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); +	free_page(page); +	return length; +} + +static struct file_operations proc_info_file_operations = { +	.read		= proc_info_read, +}; + +static int mem_open(struct inode* inode, struct file* file) +{ +	file->private_data = (void*)((long)current->self_exec_id); +	return 0; +} + +static ssize_t mem_read(struct file * file, char __user * buf, +			size_t count, loff_t *ppos) +{ +	struct task_struct *task = proc_task(file->f_dentry->d_inode); +	char *page; +	unsigned long src = *ppos; +	int ret = -ESRCH; +	struct mm_struct *mm; + +	if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) +		goto out; + +	ret = -ENOMEM; +	page = (char *)__get_free_page(GFP_USER); +	if (!page) +		goto out; + +	ret = 0; +  +	mm = get_task_mm(task); +	if (!mm) +		goto out_free; + +	ret = -EIO; +  +	if (file->private_data != (void*)((long)current->self_exec_id)) +		goto out_put; + +	ret = 0; +  +	while (count > 0) { +		int this_len, retval; + +		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; +		retval = access_process_vm(task, src, page, this_len, 0); +		if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { +			if (!ret) +				ret = -EIO; +			break; +		} + +		if (copy_to_user(buf, page, retval)) { +			ret = -EFAULT; +			break; +		} +  +		ret += retval; +		src += retval; +		buf += retval; +		count -= retval; +	} +	*ppos = src; + +out_put: +	mmput(mm); +out_free: +	free_page((unsigned long) page); +out: +	return ret; +} + +#define mem_write NULL + +#ifndef mem_write +/* This is a security hazard */ +static ssize_t mem_write(struct file * file, const char * buf, +			 size_t count, loff_t *ppos) +{ +	int copied = 0; +	char *page; +	struct task_struct *task = proc_task(file->f_dentry->d_inode); +	unsigned long dst = *ppos; + +	if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) +		return -ESRCH; + +	page = (char *)__get_free_page(GFP_USER); +	if (!page) +		return -ENOMEM; + +	while (count > 0) { +		int this_len, retval; + +		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; +		if (copy_from_user(page, buf, this_len)) { +			copied = -EFAULT; +			break; +		} +		retval = access_process_vm(task, dst, page, this_len, 1); +		if (!retval) { +			if (!copied) +				copied = -EIO; +			break; +		} +		copied += retval; +		buf += retval; +		dst += retval; +		count -= retval;			 +	} +	*ppos = dst; +	free_page((unsigned long) page); +	return copied; +} +#endif + +static loff_t mem_lseek(struct file * file, loff_t offset, int orig) +{ +	switch (orig) { +	case 0: +		file->f_pos = offset; +		break; +	case 1: +		file->f_pos += offset; +		break; +	default: +		return -EINVAL; +	} +	force_successful_syscall_return(); +	return file->f_pos; +} + +static struct file_operations proc_mem_operations = { +	.llseek		= mem_lseek, +	.read		= mem_read, +	.write		= mem_write, +	.open		= mem_open, +}; + +static ssize_t oom_adjust_read(struct file *file, char __user *buf, +				size_t count, loff_t *ppos) +{ +	struct task_struct *task = proc_task(file->f_dentry->d_inode); +	char buffer[8]; +	size_t len; +	int oom_adjust = task->oomkilladj; +	loff_t __ppos = *ppos; + +	len = sprintf(buffer, "%i\n", oom_adjust); +	if (__ppos >= len) +		return 0; +	if (count > len-__ppos) +		count = len-__ppos; +	if (copy_to_user(buf, buffer + __ppos, count)) +		return -EFAULT; +	*ppos = __ppos + count; +	return count; +} + +static ssize_t oom_adjust_write(struct file *file, const char __user *buf, +				size_t count, loff_t *ppos) +{ +	struct task_struct *task = proc_task(file->f_dentry->d_inode); +	char buffer[8], *end; +	int oom_adjust; + +	if (!capable(CAP_SYS_RESOURCE)) +		return -EPERM; +	memset(buffer, 0, 8); +	if (count > 6) +		count = 6; +	if (copy_from_user(buffer, buf, count)) +		return -EFAULT; +	oom_adjust = simple_strtol(buffer, &end, 0); +	if (oom_adjust < -16 || oom_adjust > 15) +		return -EINVAL; +	if (*end == '\n') +		end++; +	task->oomkilladj = oom_adjust; +	if (end - buffer == 0) +		return -EIO; +	return end - buffer; +} + +static struct file_operations proc_oom_adjust_operations = { +	.read		= oom_adjust_read, +	.write		= oom_adjust_write, +}; + +static struct inode_operations proc_mem_inode_operations = { +	.permission	= proc_permission, +}; + +#ifdef CONFIG_AUDITSYSCALL +#define TMPBUFLEN 21 +static ssize_t proc_loginuid_read(struct file * file, char __user * buf, +				  size_t count, loff_t *ppos) +{ +	struct inode * inode = file->f_dentry->d_inode; +	struct task_struct *task = proc_task(inode); +	ssize_t length; +	char tmpbuf[TMPBUFLEN]; + +	length = scnprintf(tmpbuf, TMPBUFLEN, "%u", +				audit_get_loginuid(task->audit_context)); +	return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); +} + +static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, +				   size_t count, loff_t *ppos) +{ +	struct inode * inode = file->f_dentry->d_inode; +	char *page, *tmp; +	ssize_t length; +	struct task_struct *task = proc_task(inode); +	uid_t loginuid; + +	if (!capable(CAP_AUDIT_CONTROL)) +		return -EPERM; + +	if (current != task) +		return -EPERM; + +	if (count > PAGE_SIZE) +		count = PAGE_SIZE; + +	if (*ppos != 0) { +		/* No partial writes. */ +		return -EINVAL; +	} +	page = (char*)__get_free_page(GFP_USER); +	if (!page) +		return -ENOMEM; +	length = -EFAULT; +	if (copy_from_user(page, buf, count)) +		goto out_free_page; + +	loginuid = simple_strtoul(page, &tmp, 10); +	if (tmp == page) { +		length = -EINVAL; +		goto out_free_page; + +	} +	length = audit_set_loginuid(task->audit_context, loginuid); +	if (likely(length == 0)) +		length = count; + +out_free_page: +	free_page((unsigned long) page); +	return length; +} + +static struct file_operations proc_loginuid_operations = { +	.read		= proc_loginuid_read, +	.write		= proc_loginuid_write, +}; +#endif + +#ifdef CONFIG_SECCOMP +static ssize_t seccomp_read(struct file *file, char __user *buf, +			    size_t count, loff_t *ppos) +{ +	struct task_struct *tsk = proc_task(file->f_dentry->d_inode); +	char __buf[20]; +	loff_t __ppos = *ppos; +	size_t len; + +	/* no need to print the trailing zero, so use only len */ +	len = sprintf(__buf, "%u\n", tsk->seccomp.mode); +	if (__ppos >= len) +		return 0; +	if (count > len - __ppos) +		count = len - __ppos; +	if (copy_to_user(buf, __buf + __ppos, count)) +		return -EFAULT; +	*ppos = __ppos + count; +	return count; +} + +static ssize_t seccomp_write(struct file *file, const char __user *buf, +			     size_t count, loff_t *ppos) +{ +	struct task_struct *tsk = proc_task(file->f_dentry->d_inode); +	char __buf[20], *end; +	unsigned int seccomp_mode; + +	/* can set it only once to be even more secure */ +	if (unlikely(tsk->seccomp.mode)) +		return -EPERM; + +	memset(__buf, 0, sizeof(__buf)); +	count = min(count, sizeof(__buf) - 1); +	if (copy_from_user(__buf, buf, count)) +		return -EFAULT; +	seccomp_mode = simple_strtoul(__buf, &end, 0); +	if (*end == '\n') +		end++; +	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { +		tsk->seccomp.mode = seccomp_mode; +		set_tsk_thread_flag(tsk, TIF_SECCOMP); +	} else +		return -EINVAL; +	if (unlikely(!(end - __buf))) +		return -EIO; +	return end - __buf; +} + +static struct file_operations proc_seccomp_operations = { +	.read		= seccomp_read, +	.write		= seccomp_write, +}; +#endif /* CONFIG_SECCOMP */ + +static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +{ +	struct inode *inode = dentry->d_inode; +	int error = -EACCES; + +	/* We don't need a base pointer in the /proc filesystem */ +	path_release(nd); + +	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) +		goto out; +	error = proc_check_root(inode); +	if (error) +		goto out; + +	error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); +	nd->last_type = LAST_BIND; +out: +	return error; +} + +static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, +			    char __user *buffer, int buflen) +{ +	struct inode * inode; +	char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; +	int len; + +	if (!tmp) +		return -ENOMEM; +		 +	inode = dentry->d_inode; +	path = d_path(dentry, mnt, tmp, PAGE_SIZE); +	len = PTR_ERR(path); +	if (IS_ERR(path)) +		goto out; +	len = tmp + PAGE_SIZE - 1 - path; + +	if (len > buflen) +		len = buflen; +	if (copy_to_user(buffer, path, len)) +		len = -EFAULT; + out: +	free_page((unsigned long)tmp); +	return len; +} + +static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) +{ +	int error = -EACCES; +	struct inode *inode = dentry->d_inode; +	struct dentry *de; +	struct vfsmount *mnt = NULL; + +	lock_kernel(); + +	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) +		goto out; +	error = proc_check_root(inode); +	if (error) +		goto out; + +	error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); +	if (error) +		goto out; + +	error = do_proc_readlink(de, mnt, buffer, buflen); +	dput(de); +	mntput(mnt); +out: +	unlock_kernel(); +	return error; +} + +static struct inode_operations proc_pid_link_inode_operations = { +	.readlink	= proc_pid_readlink, +	.follow_link	= proc_pid_follow_link +}; + +#define NUMBUF 10 + +static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) +{ +	struct inode *inode = filp->f_dentry->d_inode; +	struct task_struct *p = proc_task(inode); +	unsigned int fd, tid, ino; +	int retval; +	char buf[NUMBUF]; +	struct files_struct * files; + +	retval = -ENOENT; +	if (!pid_alive(p)) +		goto out; +	retval = 0; +	tid = p->pid; + +	fd = filp->f_pos; +	switch (fd) { +		case 0: +			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) +				goto out; +			filp->f_pos++; +		case 1: +			ino = fake_ino(tid, PROC_TID_INO); +			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) +				goto out; +			filp->f_pos++; +		default: +			files = get_files_struct(p); +			if (!files) +				goto out; +			spin_lock(&files->file_lock); +			for (fd = filp->f_pos-2; +			     fd < files->max_fds; +			     fd++, filp->f_pos++) { +				unsigned int i,j; + +				if (!fcheck_files(files, fd)) +					continue; +				spin_unlock(&files->file_lock); + +				j = NUMBUF; +				i = fd; +				do { +					j--; +					buf[j] = '0' + (i % 10); +					i /= 10; +				} while (i); + +				ino = fake_ino(tid, PROC_TID_FD_DIR + fd); +				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { +					spin_lock(&files->file_lock); +					break; +				} +				spin_lock(&files->file_lock); +			} +			spin_unlock(&files->file_lock); +			put_files_struct(files); +	} +out: +	return retval; +} + +static int proc_pident_readdir(struct file *filp, +		void *dirent, filldir_t filldir, +		struct pid_entry *ents, unsigned int nents) +{ +	int i; +	int pid; +	struct dentry *dentry = filp->f_dentry; +	struct inode *inode = dentry->d_inode; +	struct pid_entry *p; +	ino_t ino; +	int ret; + +	ret = -ENOENT; +	if (!pid_alive(proc_task(inode))) +		goto out; + +	ret = 0; +	pid = proc_task(inode)->pid; +	i = filp->f_pos; +	switch (i) { +	case 0: +		ino = inode->i_ino; +		if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) +			goto out; +		i++; +		filp->f_pos++; +		/* fall through */ +	case 1: +		ino = parent_ino(dentry); +		if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) +			goto out; +		i++; +		filp->f_pos++; +		/* fall through */ +	default: +		i -= 2; +		if (i >= nents) { +			ret = 1; +			goto out; +		} +		p = ents + i; +		while (p->name) { +			if (filldir(dirent, p->name, p->len, filp->f_pos, +				    fake_ino(pid, p->type), p->mode >> 12) < 0) +				goto out; +			filp->f_pos++; +			p++; +		} +	} + +	ret = 1; +out: +	return ret; +} + +static int proc_tgid_base_readdir(struct file * filp, +			     void * dirent, filldir_t filldir) +{ +	return proc_pident_readdir(filp,dirent,filldir, +				   tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); +} + +static int proc_tid_base_readdir(struct file * filp, +			     void * dirent, filldir_t filldir) +{ +	return proc_pident_readdir(filp,dirent,filldir, +				   tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); +} + +/* building an inode */ + +static int task_dumpable(struct task_struct *task) +{ +	int dumpable = 0; +	struct mm_struct *mm; + +	task_lock(task); +	mm = task->mm; +	if (mm) +		dumpable = mm->dumpable; +	task_unlock(task); +	return dumpable; +} + + +static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) +{ +	struct inode * inode; +	struct proc_inode *ei; + +	/* We need a new inode */ +	 +	inode = new_inode(sb); +	if (!inode) +		goto out; + +	/* Common stuff */ +	ei = PROC_I(inode); +	ei->task = NULL; +	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; +	inode->i_ino = fake_ino(task->pid, ino); + +	if (!pid_alive(task)) +		goto out_unlock; + +	/* +	 * grab the reference to task. +	 */ +	get_task_struct(task); +	ei->task = task; +	ei->type = ino; +	inode->i_uid = 0; +	inode->i_gid = 0; +	if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { +		inode->i_uid = task->euid; +		inode->i_gid = task->egid; +	} +	security_task_to_inode(task, inode); + +out: +	return inode; + +out_unlock: +	ei->pde = NULL; +	iput(inode); +	return NULL; +} + +/* dentry stuff */ + +/* + *	Exceptional case: normally we are not allowed to unhash a busy + * directory. In this case, however, we can do it - no aliasing problems + * due to the way we treat inodes. + * + * Rewrite the inode's ownerships here because the owning task may have + * performed a setuid(), etc. + */ +static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +{ +	struct inode *inode = dentry->d_inode; +	struct task_struct *task = proc_task(inode); +	if (pid_alive(task)) { +		if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { +			inode->i_uid = task->euid; +			inode->i_gid = task->egid; +		} else { +			inode->i_uid = 0; +			inode->i_gid = 0; +		} +		security_task_to_inode(task, inode); +		return 1; +	} +	d_drop(dentry); +	return 0; +} + +static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) +{ +	struct inode *inode = dentry->d_inode; +	struct task_struct *task = proc_task(inode); +	int fd = proc_type(inode) - PROC_TID_FD_DIR; +	struct files_struct *files; + +	files = get_files_struct(task); +	if (files) { +		spin_lock(&files->file_lock); +		if (fcheck_files(files, fd)) { +			spin_unlock(&files->file_lock); +			put_files_struct(files); +			if (task_dumpable(task)) { +				inode->i_uid = task->euid; +				inode->i_gid = task->egid; +			} else { +				inode->i_uid = 0; +				inode->i_gid = 0; +			} +			security_task_to_inode(task, inode); +			return 1; +		} +		spin_unlock(&files->file_lock); +		put_files_struct(files); +	} +	d_drop(dentry); +	return 0; +} + +static void pid_base_iput(struct dentry *dentry, struct inode *inode) +{ +	struct task_struct *task = proc_task(inode); +	spin_lock(&task->proc_lock); +	if (task->proc_dentry == dentry) +		task->proc_dentry = NULL; +	spin_unlock(&task->proc_lock); +	iput(inode); +} + +static int pid_delete_dentry(struct dentry * dentry) +{ +	/* Is the task we represent dead? +	 * If so, then don't put the dentry on the lru list, +	 * kill it immediately. +	 */ +	return !pid_alive(proc_task(dentry->d_inode)); +} + +static struct dentry_operations tid_fd_dentry_operations = +{ +	.d_revalidate	= tid_fd_revalidate, +	.d_delete	= pid_delete_dentry, +}; + +static struct dentry_operations pid_dentry_operations = +{ +	.d_revalidate	= pid_revalidate, +	.d_delete	= pid_delete_dentry, +}; + +static struct dentry_operations pid_base_dentry_operations = +{ +	.d_revalidate	= pid_revalidate, +	.d_iput		= pid_base_iput, +	.d_delete	= pid_delete_dentry, +}; + +/* Lookups */ + +static unsigned name_to_int(struct dentry *dentry) +{ +	const char *name = dentry->d_name.name; +	int len = dentry->d_name.len; +	unsigned n = 0; + +	if (len > 1 && *name == '0') +		goto out; +	while (len-- > 0) { +		unsigned c = *name++ - '0'; +		if (c > 9) +			goto out; +		if (n >= (~0U-9)/10) +			goto out; +		n *= 10; +		n += c; +	} +	return n; +out: +	return ~0U; +} + +/* SMP-safe */ +static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +{ +	struct task_struct *task = proc_task(dir); +	unsigned fd = name_to_int(dentry); +	struct file * file; +	struct files_struct * files; +	struct inode *inode; +	struct proc_inode *ei; + +	if (fd == ~0U) +		goto out; +	if (!pid_alive(task)) +		goto out; + +	inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); +	if (!inode) +		goto out; +	ei = PROC_I(inode); +	files = get_files_struct(task); +	if (!files) +		goto out_unlock; +	inode->i_mode = S_IFLNK; +	spin_lock(&files->file_lock); +	file = fcheck_files(files, fd); +	if (!file) +		goto out_unlock2; +	if (file->f_mode & 1) +		inode->i_mode |= S_IRUSR | S_IXUSR; +	if (file->f_mode & 2) +		inode->i_mode |= S_IWUSR | S_IXUSR; +	spin_unlock(&files->file_lock); +	put_files_struct(files); +	inode->i_op = &proc_pid_link_inode_operations; +	inode->i_size = 64; +	ei->op.proc_get_link = proc_fd_link; +	dentry->d_op = &tid_fd_dentry_operations; +	d_add(dentry, inode); +	return NULL; + +out_unlock2: +	spin_unlock(&files->file_lock); +	put_files_struct(files); +out_unlock: +	iput(inode); +out: +	return ERR_PTR(-ENOENT); +} + +static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); + +static struct file_operations proc_fd_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_readfd, +}; + +static struct file_operations proc_task_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_task_readdir, +}; + +/* + * proc directories can do almost nothing.. + */ +static struct inode_operations proc_fd_inode_operations = { +	.lookup		= proc_lookupfd, +	.permission	= proc_permission, +}; + +static struct inode_operations proc_task_inode_operations = { +	.lookup		= proc_task_lookup, +	.permission	= proc_permission, +}; + +#ifdef CONFIG_SECURITY +static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, +				  size_t count, loff_t *ppos) +{ +	struct inode * inode = file->f_dentry->d_inode; +	unsigned long page; +	ssize_t length; +	struct task_struct *task = proc_task(inode); + +	if (count > PAGE_SIZE) +		count = PAGE_SIZE; +	if (!(page = __get_free_page(GFP_KERNEL))) +		return -ENOMEM; + +	length = security_getprocattr(task,  +				      (char*)file->f_dentry->d_name.name,  +				      (void*)page, count); +	if (length >= 0) +		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); +	free_page(page); +	return length; +} + +static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, +				   size_t count, loff_t *ppos) +{  +	struct inode * inode = file->f_dentry->d_inode; +	char *page;  +	ssize_t length;  +	struct task_struct *task = proc_task(inode);  + +	if (count > PAGE_SIZE)  +		count = PAGE_SIZE;  +	if (*ppos != 0) { +		/* No partial writes. */ +		return -EINVAL; +	} +	page = (char*)__get_free_page(GFP_USER);  +	if (!page)  +		return -ENOMEM; +	length = -EFAULT;  +	if (copy_from_user(page, buf, count))  +		goto out; + +	length = security_setprocattr(task,  +				      (char*)file->f_dentry->d_name.name,  +				      (void*)page, count); +out: +	free_page((unsigned long) page); +	return length; +}  + +static struct file_operations proc_pid_attr_operations = { +	.read		= proc_pid_attr_read, +	.write		= proc_pid_attr_write, +}; + +static struct file_operations proc_tid_attr_operations; +static struct inode_operations proc_tid_attr_inode_operations; +static struct file_operations proc_tgid_attr_operations; +static struct inode_operations proc_tgid_attr_inode_operations; +#endif + +/* SMP-safe */ +static struct dentry *proc_pident_lookup(struct inode *dir,  +					 struct dentry *dentry, +					 struct pid_entry *ents) +{ +	struct inode *inode; +	int error; +	struct task_struct *task = proc_task(dir); +	struct pid_entry *p; +	struct proc_inode *ei; + +	error = -ENOENT; +	inode = NULL; + +	if (!pid_alive(task)) +		goto out; + +	for (p = ents; p->name; p++) { +		if (p->len != dentry->d_name.len) +			continue; +		if (!memcmp(dentry->d_name.name, p->name, p->len)) +			break; +	} +	if (!p->name) +		goto out; + +	error = -EINVAL; +	inode = proc_pid_make_inode(dir->i_sb, task, p->type); +	if (!inode) +		goto out; + +	ei = PROC_I(inode); +	inode->i_mode = p->mode; +	/* +	 * Yes, it does not scale. And it should not. Don't add +	 * new entries into /proc/<tgid>/ without very good reasons. +	 */ +	switch(p->type) { +		case PROC_TGID_TASK: +			inode->i_nlink = 3; +			inode->i_op = &proc_task_inode_operations; +			inode->i_fop = &proc_task_operations; +			break; +		case PROC_TID_FD: +		case PROC_TGID_FD: +			inode->i_nlink = 2; +			inode->i_op = &proc_fd_inode_operations; +			inode->i_fop = &proc_fd_operations; +			break; +		case PROC_TID_EXE: +		case PROC_TGID_EXE: +			inode->i_op = &proc_pid_link_inode_operations; +			ei->op.proc_get_link = proc_exe_link; +			break; +		case PROC_TID_CWD: +		case PROC_TGID_CWD: +			inode->i_op = &proc_pid_link_inode_operations; +			ei->op.proc_get_link = proc_cwd_link; +			break; +		case PROC_TID_ROOT: +		case PROC_TGID_ROOT: +			inode->i_op = &proc_pid_link_inode_operations; +			ei->op.proc_get_link = proc_root_link; +			break; +		case PROC_TID_ENVIRON: +		case PROC_TGID_ENVIRON: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_environ; +			break; +		case PROC_TID_AUXV: +		case PROC_TGID_AUXV: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_auxv; +			break; +		case PROC_TID_STATUS: +		case PROC_TGID_STATUS: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_status; +			break; +		case PROC_TID_STAT: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_tid_stat; +			break; +		case PROC_TGID_STAT: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_tgid_stat; +			break; +		case PROC_TID_CMDLINE: +		case PROC_TGID_CMDLINE: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_cmdline; +			break; +		case PROC_TID_STATM: +		case PROC_TGID_STATM: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_statm; +			break; +		case PROC_TID_MAPS: +		case PROC_TGID_MAPS: +			inode->i_fop = &proc_maps_operations; +			break; +		case PROC_TID_MEM: +		case PROC_TGID_MEM: +			inode->i_op = &proc_mem_inode_operations; +			inode->i_fop = &proc_mem_operations; +			break; +#ifdef CONFIG_SECCOMP +		case PROC_TID_SECCOMP: +		case PROC_TGID_SECCOMP: +			inode->i_fop = &proc_seccomp_operations; +			break; +#endif /* CONFIG_SECCOMP */ +		case PROC_TID_MOUNTS: +		case PROC_TGID_MOUNTS: +			inode->i_fop = &proc_mounts_operations; +			break; +#ifdef CONFIG_SECURITY +		case PROC_TID_ATTR: +			inode->i_nlink = 2; +			inode->i_op = &proc_tid_attr_inode_operations; +			inode->i_fop = &proc_tid_attr_operations; +			break; +		case PROC_TGID_ATTR: +			inode->i_nlink = 2; +			inode->i_op = &proc_tgid_attr_inode_operations; +			inode->i_fop = &proc_tgid_attr_operations; +			break; +		case PROC_TID_ATTR_CURRENT: +		case PROC_TGID_ATTR_CURRENT: +		case PROC_TID_ATTR_PREV: +		case PROC_TGID_ATTR_PREV: +		case PROC_TID_ATTR_EXEC: +		case PROC_TGID_ATTR_EXEC: +		case PROC_TID_ATTR_FSCREATE: +		case PROC_TGID_ATTR_FSCREATE: +			inode->i_fop = &proc_pid_attr_operations; +			break; +#endif +#ifdef CONFIG_KALLSYMS +		case PROC_TID_WCHAN: +		case PROC_TGID_WCHAN: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_wchan; +			break; +#endif +#ifdef CONFIG_SCHEDSTATS +		case PROC_TID_SCHEDSTAT: +		case PROC_TGID_SCHEDSTAT: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_pid_schedstat; +			break; +#endif +#ifdef CONFIG_CPUSETS +		case PROC_TID_CPUSET: +		case PROC_TGID_CPUSET: +			inode->i_fop = &proc_cpuset_operations; +			break; +#endif +		case PROC_TID_OOM_SCORE: +		case PROC_TGID_OOM_SCORE: +			inode->i_fop = &proc_info_file_operations; +			ei->op.proc_read = proc_oom_score; +			break; +		case PROC_TID_OOM_ADJUST: +		case PROC_TGID_OOM_ADJUST: +			inode->i_fop = &proc_oom_adjust_operations; +			break; +#ifdef CONFIG_AUDITSYSCALL +		case PROC_TID_LOGINUID: +		case PROC_TGID_LOGINUID: +			inode->i_fop = &proc_loginuid_operations; +			break; +#endif +		default: +			printk("procfs: impossible type (%d)",p->type); +			iput(inode); +			return ERR_PTR(-EINVAL); +	} +	dentry->d_op = &pid_dentry_operations; +	d_add(dentry, inode); +	return NULL; + +out: +	return ERR_PTR(error); +} + +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +	return proc_pident_lookup(dir, dentry, tgid_base_stuff); +} + +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +	return proc_pident_lookup(dir, dentry, tid_base_stuff); +} + +static struct file_operations proc_tgid_base_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_tgid_base_readdir, +}; + +static struct file_operations proc_tid_base_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_tid_base_readdir, +}; + +static struct inode_operations proc_tgid_base_inode_operations = { +	.lookup		= proc_tgid_base_lookup, +}; + +static struct inode_operations proc_tid_base_inode_operations = { +	.lookup		= proc_tid_base_lookup, +}; + +#ifdef CONFIG_SECURITY +static int proc_tgid_attr_readdir(struct file * filp, +			     void * dirent, filldir_t filldir) +{ +	return proc_pident_readdir(filp,dirent,filldir, +				   tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); +} + +static int proc_tid_attr_readdir(struct file * filp, +			     void * dirent, filldir_t filldir) +{ +	return proc_pident_readdir(filp,dirent,filldir, +				   tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); +} + +static struct file_operations proc_tgid_attr_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_tgid_attr_readdir, +}; + +static struct file_operations proc_tid_attr_operations = { +	.read		= generic_read_dir, +	.readdir	= proc_tid_attr_readdir, +}; + +static struct dentry *proc_tgid_attr_lookup(struct inode *dir, +				struct dentry *dentry, struct nameidata *nd) +{ +	return proc_pident_lookup(dir, dentry, tgid_attr_stuff); +} + +static struct dentry *proc_tid_attr_lookup(struct inode *dir, +				struct dentry *dentry, struct nameidata *nd) +{ +	return proc_pident_lookup(dir, dentry, tid_attr_stuff); +} + +static struct inode_operations proc_tgid_attr_inode_operations = { +	.lookup		= proc_tgid_attr_lookup, +}; + +static struct inode_operations proc_tid_attr_inode_operations = { +	.lookup		= proc_tid_attr_lookup, +}; +#endif + +/* + * /proc/self: + */ +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, +			      int buflen) +{ +	char tmp[30]; +	sprintf(tmp, "%d", current->tgid); +	return vfs_readlink(dentry,buffer,buflen,tmp); +} + +static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ +	char tmp[30]; +	sprintf(tmp, "%d", current->tgid); +	return vfs_follow_link(nd,tmp); +}	 + +static struct inode_operations proc_self_inode_operations = { +	.readlink	= proc_self_readlink, +	.follow_link	= proc_self_follow_link, +}; + +/** + * proc_pid_unhash -  Unhash /proc/<pid> entry from the dcache. + * @p: task that should be flushed. + * + * Drops the /proc/<pid> dcache entry from the hash chains. + * + * Dropping /proc/<pid> entries and detach_pid must be synchroneous, + * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, + * if the pid value is immediately reused. This is enforced by + * - caller must acquire spin_lock(p->proc_lock) + * - must be called before detach_pid() + * - proc_pid_lookup acquires proc_lock, and checks that + *   the target is not dead by looking at the attach count + *   of PIDTYPE_PID. + */ + +struct dentry *proc_pid_unhash(struct task_struct *p) +{ +	struct dentry *proc_dentry; + +	proc_dentry = p->proc_dentry; +	if (proc_dentry != NULL) { + +		spin_lock(&dcache_lock); +		spin_lock(&proc_dentry->d_lock); +		if (!d_unhashed(proc_dentry)) { +			dget_locked(proc_dentry); +			__d_drop(proc_dentry); +			spin_unlock(&proc_dentry->d_lock); +		} else { +			spin_unlock(&proc_dentry->d_lock); +			proc_dentry = NULL; +		} +		spin_unlock(&dcache_lock); +	} +	return proc_dentry; +} + +/** + * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries + * @proc_entry: directoy to prune. + * + * Shrink the /proc directory that was used by the just killed thread. + */ +	 +void proc_pid_flush(struct dentry *proc_dentry) +{ +	might_sleep(); +	if(proc_dentry != NULL) { +		shrink_dcache_parent(proc_dentry); +		dput(proc_dentry); +	} +} + +/* SMP-safe */ +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ +	struct task_struct *task; +	struct inode *inode; +	struct proc_inode *ei; +	unsigned tgid; +	int died; + +	if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { +		inode = new_inode(dir->i_sb); +		if (!inode) +			return ERR_PTR(-ENOMEM); +		ei = PROC_I(inode); +		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; +		inode->i_ino = fake_ino(0, PROC_TGID_INO); +		ei->pde = NULL; +		inode->i_mode = S_IFLNK|S_IRWXUGO; +		inode->i_uid = inode->i_gid = 0; +		inode->i_size = 64; +		inode->i_op = &proc_self_inode_operations; +		d_add(dentry, inode); +		return NULL; +	} +	tgid = name_to_int(dentry); +	if (tgid == ~0U) +		goto out; + +	read_lock(&tasklist_lock); +	task = find_task_by_pid(tgid); +	if (task) +		get_task_struct(task); +	read_unlock(&tasklist_lock); +	if (!task) +		goto out; + +	inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); + + +	if (!inode) { +		put_task_struct(task); +		goto out; +	} +	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; +	inode->i_op = &proc_tgid_base_inode_operations; +	inode->i_fop = &proc_tgid_base_operations; +	inode->i_nlink = 3; +	inode->i_flags|=S_IMMUTABLE; + +	dentry->d_op = &pid_base_dentry_operations; + +	died = 0; +	d_add(dentry, inode); +	spin_lock(&task->proc_lock); +	task->proc_dentry = dentry; +	if (!pid_alive(task)) { +		dentry = proc_pid_unhash(task); +		died = 1; +	} +	spin_unlock(&task->proc_lock); + +	put_task_struct(task); +	if (died) { +		proc_pid_flush(dentry); +		goto out; +	} +	return NULL; +out: +	return ERR_PTR(-ENOENT); +} + +/* SMP-safe */ +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +{ +	struct task_struct *task; +	struct task_struct *leader = proc_task(dir); +	struct inode *inode; +	unsigned tid; + +	tid = name_to_int(dentry); +	if (tid == ~0U) +		goto out; + +	read_lock(&tasklist_lock); +	task = find_task_by_pid(tid); +	if (task) +		get_task_struct(task); +	read_unlock(&tasklist_lock); +	if (!task) +		goto out; +	if (leader->tgid != task->tgid) +		goto out_drop_task; + +	inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); + + +	if (!inode) +		goto out_drop_task; +	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; +	inode->i_op = &proc_tid_base_inode_operations; +	inode->i_fop = &proc_tid_base_operations; +	inode->i_nlink = 3; +	inode->i_flags|=S_IMMUTABLE; + +	dentry->d_op = &pid_base_dentry_operations; + +	d_add(dentry, inode); + +	put_task_struct(task); +	return NULL; +out_drop_task: +	put_task_struct(task); +out: +	return ERR_PTR(-ENOENT); +} + +#define PROC_NUMBUF 10 +#define PROC_MAXPIDS 20 + +/* + * Get a few tgid's to return for filldir - we need to hold the + * tasklist lock while doing this, and we must release it before + * we actually do the filldir itself, so we use a temp buffer.. + */ +static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) +{ +	struct task_struct *p; +	int nr_tgids = 0; + +	index--; +	read_lock(&tasklist_lock); +	p = NULL; +	if (version) { +		p = find_task_by_pid(version); +		if (p && !thread_group_leader(p)) +			p = NULL; +	} + +	if (p) +		index = 0; +	else +		p = next_task(&init_task); + +	for ( ; p != &init_task; p = next_task(p)) { +		int tgid = p->pid; +		if (!pid_alive(p)) +			continue; +		if (--index >= 0) +			continue; +		tgids[nr_tgids] = tgid; +		nr_tgids++; +		if (nr_tgids >= PROC_MAXPIDS) +			break; +	} +	read_unlock(&tasklist_lock); +	return nr_tgids; +} + +/* + * Get a few tid's to return for filldir - we need to hold the + * tasklist lock while doing this, and we must release it before + * we actually do the filldir itself, so we use a temp buffer.. + */ +static int get_tid_list(int index, unsigned int *tids, struct inode *dir) +{ +	struct task_struct *leader_task = proc_task(dir); +	struct task_struct *task = leader_task; +	int nr_tids = 0; + +	index -= 2; +	read_lock(&tasklist_lock); +	/* +	 * The starting point task (leader_task) might be an already +	 * unlinked task, which cannot be used to access the task-list +	 * via next_thread(). +	 */ +	if (pid_alive(task)) do { +		int tid = task->pid; + +		if (--index >= 0) +			continue; +		tids[nr_tids] = tid; +		nr_tids++; +		if (nr_tids >= PROC_MAXPIDS) +			break; +	} while ((task = next_thread(task)) != leader_task); +	read_unlock(&tasklist_lock); +	return nr_tids; +} + +/* for the /proc/ directory itself, after non-process stuff has been done */ +int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ +	unsigned int tgid_array[PROC_MAXPIDS]; +	char buf[PROC_NUMBUF]; +	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; +	unsigned int nr_tgids, i; +	int next_tgid; + +	if (!nr) { +		ino_t ino = fake_ino(0,PROC_TGID_INO); +		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) +			return 0; +		filp->f_pos++; +		nr++; +	} + +	/* f_version caches the tgid value that the last readdir call couldn't +	 * return. lseek aka telldir automagically resets f_version to 0. +	 */ +	next_tgid = filp->f_version; +	filp->f_version = 0; +	for (;;) { +		nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); +		if (!nr_tgids) { +			/* no more entries ! */ +			break; +		} +		next_tgid = 0; + +		/* do not use the last found pid, reserve it for next_tgid */ +		if (nr_tgids == PROC_MAXPIDS) { +			nr_tgids--; +			next_tgid = tgid_array[nr_tgids]; +		} + +		for (i=0;i<nr_tgids;i++) { +			int tgid = tgid_array[i]; +			ino_t ino = fake_ino(tgid,PROC_TGID_INO); +			unsigned long j = PROC_NUMBUF; + +			do +				buf[--j] = '0' + (tgid % 10); +			while ((tgid /= 10) != 0); + +			if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { +				/* returning this tgid failed, save it as the first +				 * pid for the next readir call */ +				filp->f_version = tgid_array[i]; +				goto out; +			} +			filp->f_pos++; +			nr++; +		} +	} +out: +	return 0; +} + +/* for the /proc/TGID/task/ directories */ +static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ +	unsigned int tid_array[PROC_MAXPIDS]; +	char buf[PROC_NUMBUF]; +	unsigned int nr_tids, i; +	struct dentry *dentry = filp->f_dentry; +	struct inode *inode = dentry->d_inode; +	int retval = -ENOENT; +	ino_t ino; +	unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */ + +	if (!pid_alive(proc_task(inode))) +		goto out; +	retval = 0; + +	switch (pos) { +	case 0: +		ino = inode->i_ino; +		if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) +			goto out; +		pos++; +		/* fall through */ +	case 1: +		ino = parent_ino(dentry); +		if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) +			goto out; +		pos++; +		/* fall through */ +	} + +	nr_tids = get_tid_list(pos, tid_array, inode); + +	for (i = 0; i < nr_tids; i++) { +		unsigned long j = PROC_NUMBUF; +		int tid = tid_array[i]; + +		ino = fake_ino(tid,PROC_TID_INO); + +		do +			buf[--j] = '0' + (tid % 10); +		while ((tid /= 10) != 0); + +		if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) +			break; +		pos++; +	} +out: +	filp->f_pos = pos; +	return retval; +} diff --git a/fs/proc/generic.c b/fs/proc/generic.c new file mode 100644 index 00000000000..6c6315d0402 --- /dev/null +++ b/fs/proc/generic.c @@ -0,0 +1,705 @@ +/* + * proc/fs/generic.c --- generic routines for the proc-fs + * + * This file contains generic proc-fs routines for handling + * directories and files. + *  + * Copyright (C) 1991, 1992 Linus Torvalds. + * Copyright (C) 1997 Theodore Ts'o + */ + +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/module.h> +#include <linux/mount.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/idr.h> +#include <linux/namei.h> +#include <linux/bitops.h> +#include <asm/uaccess.h> + +static ssize_t proc_file_read(struct file *file, char __user *buf, +			      size_t nbytes, loff_t *ppos); +static ssize_t proc_file_write(struct file *file, const char __user *buffer, +			       size_t count, loff_t *ppos); +static loff_t proc_file_lseek(struct file *, loff_t, int); + +int proc_match(int len, const char *name, struct proc_dir_entry *de) +{ +	if (de->namelen != len) +		return 0; +	return !memcmp(name, de->name, len); +} + +static struct file_operations proc_file_operations = { +	.llseek		= proc_file_lseek, +	.read		= proc_file_read, +	.write		= proc_file_write, +}; + +/* buffer size is one page but our output routines use some slack for overruns */ +#define PROC_BLOCK_SIZE	(PAGE_SIZE - 1024) + +static ssize_t +proc_file_read(struct file *file, char __user *buf, size_t nbytes, +	       loff_t *ppos) +{ +	struct inode * inode = file->f_dentry->d_inode; +	char 	*page; +	ssize_t	retval=0; +	int	eof=0; +	ssize_t	n, count; +	char	*start; +	struct proc_dir_entry * dp; + +	dp = PDE(inode); +	if (!(page = (char*) __get_free_page(GFP_KERNEL))) +		return -ENOMEM; + +	while ((nbytes > 0) && !eof) { +		count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); + +		start = NULL; +		if (dp->get_info) { +			/* Handle old net routines */ +			n = dp->get_info(page, &start, *ppos, count); +			if (n < count) +				eof = 1; +		} else if (dp->read_proc) { +			/* +			 * How to be a proc read function +			 * ------------------------------ +			 * Prototype: +			 *    int f(char *buffer, char **start, off_t offset, +			 *          int count, int *peof, void *dat) +			 * +			 * Assume that the buffer is "count" bytes in size. +			 * +			 * If you know you have supplied all the data you +			 * have, set *peof. +			 * +			 * You have three ways to return data: +			 * 0) Leave *start = NULL.  (This is the default.) +			 *    Put the data of the requested offset at that +			 *    offset within the buffer.  Return the number (n) +			 *    of bytes there are from the beginning of the +			 *    buffer up to the last byte of data.  If the +			 *    number of supplied bytes (= n - offset) is  +			 *    greater than zero and you didn't signal eof +			 *    and the reader is prepared to take more data +			 *    you will be called again with the requested +			 *    offset advanced by the number of bytes  +			 *    absorbed.  This interface is useful for files +			 *    no larger than the buffer. +			 * 1) Set *start = an unsigned long value less than +			 *    the buffer address but greater than zero. +			 *    Put the data of the requested offset at the +			 *    beginning of the buffer.  Return the number of +			 *    bytes of data placed there.  If this number is +			 *    greater than zero and you didn't signal eof +			 *    and the reader is prepared to take more data +			 *    you will be called again with the requested +			 *    offset advanced by *start.  This interface is +			 *    useful when you have a large file consisting +			 *    of a series of blocks which you want to count +			 *    and return as wholes. +			 *    (Hack by Paul.Russell@rustcorp.com.au) +			 * 2) Set *start = an address within the buffer. +			 *    Put the data of the requested offset at *start. +			 *    Return the number of bytes of data placed there. +			 *    If this number is greater than zero and you +			 *    didn't signal eof and the reader is prepared to +			 *    take more data you will be called again with the +			 *    requested offset advanced by the number of bytes +			 *    absorbed. +			 */ +			n = dp->read_proc(page, &start, *ppos, +					  count, &eof, dp->data); +		} else +			break; + +		if (n == 0)   /* end of file */ +			break; +		if (n < 0) {  /* error */ +			if (retval == 0) +				retval = n; +			break; +		} + +		if (start == NULL) { +			if (n > PAGE_SIZE) { +				printk(KERN_ERR +				       "proc_file_read: Apparent buffer overflow!\n"); +				n = PAGE_SIZE; +			} +			n -= *ppos; +			if (n <= 0) +				break; +			if (n > count) +				n = count; +			start = page + *ppos; +		} else if (start < page) { +			if (n > PAGE_SIZE) { +				printk(KERN_ERR +				       "proc_file_read: Apparent buffer overflow!\n"); +				n = PAGE_SIZE; +			} +			if (n > count) { +				/* +				 * Don't reduce n because doing so might +				 * cut off part of a data block. +				 */ +				printk(KERN_WARNING +				       "proc_file_read: Read count exceeded\n"); +			} +		} else /* start >= page */ { +			unsigned long startoff = (unsigned long)(start - page); +			if (n > (PAGE_SIZE - startoff)) { +				printk(KERN_ERR +				       "proc_file_read: Apparent buffer overflow!\n"); +				n = PAGE_SIZE - startoff; +			} +			if (n > count) +				n = count; +		} +		 + 		n -= copy_to_user(buf, start < page ? page : start, n); +		if (n == 0) { +			if (retval == 0) +				retval = -EFAULT; +			break; +		} + +		*ppos += start < page ? (unsigned long)start : n; +		nbytes -= n; +		buf += n; +		retval += n; +	} +	free_page((unsigned long) page); +	return retval; +} + +static ssize_t +proc_file_write(struct file *file, const char __user *buffer, +		size_t count, loff_t *ppos) +{ +	struct inode *inode = file->f_dentry->d_inode; +	struct proc_dir_entry * dp; +	 +	dp = PDE(inode); + +	if (!dp->write_proc) +		return -EIO; + +	/* FIXME: does this routine need ppos?  probably... */ +	return dp->write_proc(file, buffer, count, dp->data); +} + + +static loff_t +proc_file_lseek(struct file *file, loff_t offset, int orig) +{ +    lock_kernel(); + +    switch (orig) { +    case 0: +	if (offset < 0) +	    goto out; +	file->f_pos = offset; +	unlock_kernel(); +	return(file->f_pos); +    case 1: +	if (offset + file->f_pos < 0) +	    goto out; +	file->f_pos += offset; +	unlock_kernel(); +	return(file->f_pos); +    case 2: +	goto out; +    default: +	goto out; +    } + +out: +    unlock_kernel(); +    return -EINVAL; +} + +static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) +{ +	struct inode *inode = dentry->d_inode; +	struct proc_dir_entry *de = PDE(inode); +	int error; + +	error = inode_change_ok(inode, iattr); +	if (error) +		goto out; + +	error = inode_setattr(inode, iattr); +	if (error) +		goto out; +	 +	de->uid = inode->i_uid; +	de->gid = inode->i_gid; +	de->mode = inode->i_mode; +out: +	return error; +} + +static struct inode_operations proc_file_inode_operations = { +	.setattr	= proc_notify_change, +}; + +/* + * This function parses a name such as "tty/driver/serial", and + * returns the struct proc_dir_entry for "/proc/tty/driver", and + * returns "serial" in residual. + */ +static int xlate_proc_name(const char *name, +			   struct proc_dir_entry **ret, const char **residual) +{ +	const char     		*cp = name, *next; +	struct proc_dir_entry	*de; +	int			len; + +	de = &proc_root; +	while (1) { +		next = strchr(cp, '/'); +		if (!next) +			break; + +		len = next - cp; +		for (de = de->subdir; de ; de = de->next) { +			if (proc_match(len, cp, de)) +				break; +		} +		if (!de) +			return -ENOENT; +		cp += len + 1; +	} +	*residual = cp; +	*ret = de; +	return 0; +} + +static DEFINE_IDR(proc_inum_idr); +static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ + +#define PROC_DYNAMIC_FIRST 0xF0000000UL + +/* + * Return an inode number between PROC_DYNAMIC_FIRST and + * 0xffffffff, or zero on failure. + */ +static unsigned int get_inode_number(void) +{ +	int i, inum = 0; +	int error; + +retry: +	if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) +		return 0; + +	spin_lock(&proc_inum_lock); +	error = idr_get_new(&proc_inum_idr, NULL, &i); +	spin_unlock(&proc_inum_lock); +	if (error == -EAGAIN) +		goto retry; +	else if (error) +		return 0; + +	inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; + +	/* inum will never be more than 0xf0ffffff, so no check +	 * for overflow. +	 */ + +	return inum; +} + +static void release_inode_number(unsigned int inum) +{ +	int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; + +	spin_lock(&proc_inum_lock); +	idr_remove(&proc_inum_idr, id); +	spin_unlock(&proc_inum_lock); +} + +static int proc_follow_link(struct dentry *dentry, struct nameidata *nd) +{ +	nd_set_link(nd, PDE(dentry->d_inode)->data); +	return 0; +} + +static struct inode_operations proc_link_inode_operations = { +	.readlink	= generic_readlink, +	.follow_link	= proc_follow_link, +}; + +/* + * As some entries in /proc are volatile, we want to  + * get rid of unused dentries.  This could be made  + * smarter: we could keep a "volatile" flag in the  + * inode to indicate which ones to keep. + */ +static int proc_delete_dentry(struct dentry * dentry) +{ +	return 1; +} + +static struct dentry_operations proc_dentry_operations = +{ +	.d_delete	= proc_delete_dentry, +}; + +/* + * Don't create negative dentries here, return -ENOENT by hand + * instead. + */ +struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +{ +	struct inode *inode = NULL; +	struct proc_dir_entry * de; +	int error = -ENOENT; + +	lock_kernel(); +	de = PDE(dir); +	if (de) { +		for (de = de->subdir; de ; de = de->next) { +			if (de->namelen != dentry->d_name.len) +				continue; +			if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { +				unsigned int ino = de->low_ino; + +				error = -EINVAL; +				inode = proc_get_inode(dir->i_sb, ino, de); +				break; +			} +		} +	} +	unlock_kernel(); + +	if (inode) { +		dentry->d_op = &proc_dentry_operations; +		d_add(dentry, inode); +		return NULL; +	} +	return ERR_PTR(error); +} + +/* + * This returns non-zero if at EOF, so that the /proc + * root directory can use this and check if it should + * continue with the <pid> entries.. + * + * Note that the VFS-layer doesn't care about the return + * value of the readdir() call, as long as it's non-negative + * for success.. + */ +int proc_readdir(struct file * filp, +	void * dirent, filldir_t filldir) +{ +	struct proc_dir_entry * de; +	unsigned int ino; +	int i; +	struct inode *inode = filp->f_dentry->d_inode; +	int ret = 0; + +	lock_kernel(); + +	ino = inode->i_ino; +	de = PDE(inode); +	if (!de) { +		ret = -EINVAL; +		goto out; +	} +	i = filp->f_pos; +	switch (i) { +		case 0: +			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) +				goto out; +			i++; +			filp->f_pos++; +			/* fall through */ +		case 1: +			if (filldir(dirent, "..", 2, i, +				    parent_ino(filp->f_dentry), +				    DT_DIR) < 0) +				goto out; +			i++; +			filp->f_pos++; +			/* fall through */ +		default: +			de = de->subdir; +			i -= 2; +			for (;;) { +				if (!de) { +					ret = 1; +					goto out; +				} +				if (!i) +					break; +				de = de->next; +				i--; +			} + +			do { +				if (filldir(dirent, de->name, de->namelen, filp->f_pos, +					    de->low_ino, de->mode >> 12) < 0) +					goto out; +				filp->f_pos++; +				de = de->next; +			} while (de); +	} +	ret = 1; +out:	unlock_kernel(); +	return ret;	 +} + +/* + * These are the generic /proc directory operations. They + * use the in-memory "struct proc_dir_entry" tree to parse + * the /proc directory. + */ +static struct file_operations proc_dir_operations = { +	.read			= generic_read_dir, +	.readdir		= proc_readdir, +}; + +/* + * proc directories can do almost nothing.. + */ +static struct inode_operations proc_dir_inode_operations = { +	.lookup		= proc_lookup, +	.setattr	= proc_notify_change, +}; + +static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) +{ +	unsigned int i; +	 +	i = get_inode_number(); +	if (i == 0) +		return -EAGAIN; +	dp->low_ino = i; +	dp->next = dir->subdir; +	dp->parent = dir; +	dir->subdir = dp; +	if (S_ISDIR(dp->mode)) { +		if (dp->proc_iops == NULL) { +			dp->proc_fops = &proc_dir_operations; +			dp->proc_iops = &proc_dir_inode_operations; +		} +		dir->nlink++; +	} else if (S_ISLNK(dp->mode)) { +		if (dp->proc_iops == NULL) +			dp->proc_iops = &proc_link_inode_operations; +	} else if (S_ISREG(dp->mode)) { +		if (dp->proc_fops == NULL) +			dp->proc_fops = &proc_file_operations; +		if (dp->proc_iops == NULL) +			dp->proc_iops = &proc_file_inode_operations; +	} +	return 0; +} + +/* + * Kill an inode that got unregistered.. + */ +static void proc_kill_inodes(struct proc_dir_entry *de) +{ +	struct list_head *p; +	struct super_block *sb = proc_mnt->mnt_sb; + +	/* +	 * Actually it's a partial revoke(). +	 */ +	file_list_lock(); +	list_for_each(p, &sb->s_files) { +		struct file * filp = list_entry(p, struct file, f_list); +		struct dentry * dentry = filp->f_dentry; +		struct inode * inode; +		struct file_operations *fops; + +		if (dentry->d_op != &proc_dentry_operations) +			continue; +		inode = dentry->d_inode; +		if (PDE(inode) != de) +			continue; +		fops = filp->f_op; +		filp->f_op = NULL; +		fops_put(fops); +	} +	file_list_unlock(); +} + +static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, +					  const char *name, +					  mode_t mode, +					  nlink_t nlink) +{ +	struct proc_dir_entry *ent = NULL; +	const char *fn = name; +	int len; + +	/* make sure name is valid */ +	if (!name || !strlen(name)) goto out; + +	if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) +		goto out; + +	/* At this point there must not be any '/' characters beyond *fn */ +	if (strchr(fn, '/')) +		goto out; + +	len = strlen(fn); + +	ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); +	if (!ent) goto out; + +	memset(ent, 0, sizeof(struct proc_dir_entry)); +	memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); +	ent->name = ((char *) ent) + sizeof(*ent); +	ent->namelen = len; +	ent->mode = mode; +	ent->nlink = nlink; + out: +	return ent; +} + +struct proc_dir_entry *proc_symlink(const char *name, +		struct proc_dir_entry *parent, const char *dest) +{ +	struct proc_dir_entry *ent; + +	ent = proc_create(&parent,name, +			  (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); + +	if (ent) { +		ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); +		if (ent->data) { +			strcpy((char*)ent->data,dest); +			if (proc_register(parent, ent) < 0) { +				kfree(ent->data); +				kfree(ent); +				ent = NULL; +			} +		} else { +			kfree(ent); +			ent = NULL; +		} +	} +	return ent; +} + +struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, +		struct proc_dir_entry *parent) +{ +	struct proc_dir_entry *ent; + +	ent = proc_create(&parent, name, S_IFDIR | mode, 2); +	if (ent) { +		ent->proc_fops = &proc_dir_operations; +		ent->proc_iops = &proc_dir_inode_operations; + +		if (proc_register(parent, ent) < 0) { +			kfree(ent); +			ent = NULL; +		} +	} +	return ent; +} + +struct proc_dir_entry *proc_mkdir(const char *name, +		struct proc_dir_entry *parent) +{ +	return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); +} + +struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, +					 struct proc_dir_entry *parent) +{ +	struct proc_dir_entry *ent; +	nlink_t nlink; + +	if (S_ISDIR(mode)) { +		if ((mode & S_IALLUGO) == 0) +			mode |= S_IRUGO | S_IXUGO; +		nlink = 2; +	} else { +		if ((mode & S_IFMT) == 0) +			mode |= S_IFREG; +		if ((mode & S_IALLUGO) == 0) +			mode |= S_IRUGO; +		nlink = 1; +	} + +	ent = proc_create(&parent,name,mode,nlink); +	if (ent) { +		if (S_ISDIR(mode)) { +			ent->proc_fops = &proc_dir_operations; +			ent->proc_iops = &proc_dir_inode_operations; +		} +		if (proc_register(parent, ent) < 0) { +			kfree(ent); +			ent = NULL; +		} +	} +	return ent; +} + +void free_proc_entry(struct proc_dir_entry *de) +{ +	unsigned int ino = de->low_ino; + +	if (ino < PROC_DYNAMIC_FIRST) +		return; + +	release_inode_number(ino); + +	if (S_ISLNK(de->mode) && de->data) +		kfree(de->data); +	kfree(de); +} + +/* + * Remove a /proc entry and free it if it's not currently in use. + * If it is in use, we set the 'deleted' flag. + */ +void remove_proc_entry(const char *name, struct proc_dir_entry *parent) +{ +	struct proc_dir_entry **p; +	struct proc_dir_entry *de; +	const char *fn = name; +	int len; + +	if (!parent && xlate_proc_name(name, &parent, &fn) != 0) +		goto out; +	len = strlen(fn); +	for (p = &parent->subdir; *p; p=&(*p)->next ) { +		if (!proc_match(len, fn, *p)) +			continue; +		de = *p; +		*p = de->next; +		de->next = NULL; +		if (S_ISDIR(de->mode)) +			parent->nlink--; +		proc_kill_inodes(de); +		de->nlink = 0; +		WARN_ON(de->subdir); +		if (!atomic_read(&de->count)) +			free_proc_entry(de); +		else { +			de->deleted = 1; +			printk("remove_proc_entry: %s/%s busy, count=%d\n", +				parent->name, de->name, atomic_read(&de->count)); +		} +		break; +	} +out: +	return; +} diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt new file mode 100644 index 00000000000..77212f938c2 --- /dev/null +++ b/fs/proc/inode-alloc.txt @@ -0,0 +1,14 @@ +Current inode allocations in the proc-fs (hex-numbers): + +  00000000		reserved +  00000001-00000fff	static entries	(goners) +       001		root-ino + +  00001000-00001fff	unused +  0001xxxx-7fffxxxx	pid-dir entries for pid 1-7fff +  80000000-efffffff	unused +  f0000000-ffffffff	dynamic entries + +Goal: +	a) once we'll split the thing into several virtual filesystems we +	will get rid of magical ranges (and this file, BTW). diff --git a/fs/proc/inode.c b/fs/proc/inode.c new file mode 100644 index 00000000000..133c2868510 --- /dev/null +++ b/fs/proc/inode.c @@ -0,0 +1,218 @@ +/* + *  linux/fs/proc/inode.c + * + *  Copyright (C) 1991, 1992  Linus Torvalds + */ + +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/file.h> +#include <linux/limits.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/smp_lock.h> + +#include <asm/system.h> +#include <asm/uaccess.h> + +extern void free_proc_entry(struct proc_dir_entry *); + +static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de) +{ +	if (de) +		atomic_inc(&de->count); +	return de; +} + +/* + * Decrements the use count and checks for deferred deletion. + */ +static void de_put(struct proc_dir_entry *de) +{ +	if (de) {	 +		lock_kernel();		 +		if (!atomic_read(&de->count)) { +			printk("de_put: entry %s already free!\n", de->name); +			unlock_kernel(); +			return; +		} + +		if (atomic_dec_and_test(&de->count)) { +			if (de->deleted) { +				printk("de_put: deferred delete of %s\n", +					de->name); +				free_proc_entry(de); +			} +		}		 +		unlock_kernel(); +	} +} + +/* + * Decrement the use count of the proc_dir_entry. + */ +static void proc_delete_inode(struct inode *inode) +{ +	struct proc_dir_entry *de; +	struct task_struct *tsk; + +	/* Let go of any associated process */ +	tsk = PROC_I(inode)->task; +	if (tsk) +		put_task_struct(tsk); + +	/* Let go of any associated proc directory entry */ +	de = PROC_I(inode)->pde; +	if (de) { +		if (de->owner) +			module_put(de->owner); +		de_put(de); +	} +	clear_inode(inode); +} + +struct vfsmount *proc_mnt; + +static void proc_read_inode(struct inode * inode) +{ +	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; +} + +static kmem_cache_t * proc_inode_cachep; + +static struct inode *proc_alloc_inode(struct super_block *sb) +{ +	struct proc_inode *ei; +	struct inode *inode; + +	ei = (struct proc_inode *)kmem_cache_alloc(proc_inode_cachep, SLAB_KERNEL); +	if (!ei) +		return NULL; +	ei->task = NULL; +	ei->type = 0; +	ei->op.proc_get_link = NULL; +	ei->pde = NULL; +	inode = &ei->vfs_inode; +	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; +	return inode; +} + +static void proc_destroy_inode(struct inode *inode) +{ +	kmem_cache_free(proc_inode_cachep, PROC_I(inode)); +} + +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +{ +	struct proc_inode *ei = (struct proc_inode *) foo; + +	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == +	    SLAB_CTOR_CONSTRUCTOR) +		inode_init_once(&ei->vfs_inode); +} +  +int __init proc_init_inodecache(void) +{ +	proc_inode_cachep = kmem_cache_create("proc_inode_cache", +					     sizeof(struct proc_inode), +					     0, SLAB_RECLAIM_ACCOUNT, +					     init_once, NULL); +	if (proc_inode_cachep == NULL) +		return -ENOMEM; +	return 0; +} + +static int proc_remount(struct super_block *sb, int *flags, char *data) +{ +	*flags |= MS_NODIRATIME; +	return 0; +} + +static struct super_operations proc_sops = {  +	.alloc_inode	= proc_alloc_inode, +	.destroy_inode	= proc_destroy_inode, +	.read_inode	= proc_read_inode, +	.drop_inode	= generic_delete_inode, +	.delete_inode	= proc_delete_inode, +	.statfs		= simple_statfs, +	.remount_fs	= proc_remount, +}; + +struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, +				struct proc_dir_entry *de) +{ +	struct inode * inode; + +	/* +	 * Increment the use count so the dir entry can't disappear. +	 */ +	de_get(de); + +	WARN_ON(de && de->deleted); + +	inode = iget(sb, ino); +	if (!inode) +		goto out_fail; +	 +	PROC_I(inode)->pde = de; +	if (de) { +		if (de->mode) { +			inode->i_mode = de->mode; +			inode->i_uid = de->uid; +			inode->i_gid = de->gid; +		} +		if (de->size) +			inode->i_size = de->size; +		if (de->nlink) +			inode->i_nlink = de->nlink; +		if (!try_module_get(de->owner)) +			goto out_fail; +		if (de->proc_iops) +			inode->i_op = de->proc_iops; +		if (de->proc_fops) +			inode->i_fop = de->proc_fops; +	} + +out: +	return inode; + +out_fail: +	de_put(de); +	goto out; +}			 + +int proc_fill_super(struct super_block *s, void *data, int silent) +{ +	struct inode * root_inode; + +	s->s_flags |= MS_NODIRATIME; +	s->s_blocksize = 1024; +	s->s_blocksize_bits = 10; +	s->s_magic = PROC_SUPER_MAGIC; +	s->s_op = &proc_sops; +	s->s_time_gran = 1; +	 +	root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); +	if (!root_inode) +		goto out_no_root; +	/* +	 * Fixup the root inode's nlink value +	 */ +	root_inode->i_nlink += nr_processes(); +	root_inode->i_uid = 0; +	root_inode->i_gid = 0; +	s->s_root = d_alloc_root(root_inode); +	if (!s->s_root) +		goto out_no_root; +	return 0; + +out_no_root: +	printk("proc_read_super: get root inode failed\n"); +	iput(root_inode); +	return -ENOMEM; +} +MODULE_LICENSE("GPL"); diff --git a/fs/proc/internal.h b/fs/proc/internal.h new file mode 100644 index 00000000000..3e55198f980 --- /dev/null +++ b/fs/proc/internal.h @@ -0,0 +1,48 @@ +/* internal.h: internal procfs definitions + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/proc_fs.h> + +struct vmalloc_info { +	unsigned long	used; +	unsigned long	largest_chunk; +}; + +#ifdef CONFIG_MMU +#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) +extern void get_vmalloc_info(struct vmalloc_info *vmi); +#else + +#define VMALLOC_TOTAL 0UL +#define get_vmalloc_info(vmi)			\ +do {						\ +	(vmi)->used = 0;			\ +	(vmi)->largest_chunk = 0;		\ +} while(0) + +#endif + +extern void create_seq_entry(char *name, mode_t mode, struct file_operations *f); +extern int proc_exe_link(struct inode *, struct dentry **, struct vfsmount **); +extern int proc_tid_stat(struct task_struct *,  char *); +extern int proc_tgid_stat(struct task_struct *, char *); +extern int proc_pid_status(struct task_struct *, char *); +extern int proc_pid_statm(struct task_struct *, char *); + +static inline struct task_struct *proc_task(struct inode *inode) +{ +	return PROC_I(inode)->task; +} + +static inline int proc_type(struct inode *inode) +{ +	return PROC_I(inode)->type; +} diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c new file mode 100644 index 00000000000..1c7da988fcc --- /dev/null +++ b/fs/proc/kcore.c @@ -0,0 +1,404 @@ +/* + *	fs/proc/kcore.c kernel ELF core dumper + * + *	Modelled on fs/exec.c:aout_core_dump() + *	Jeremy Fitzhardinge <jeremy@sw.oz.au> + *	ELF version written by David Howells <David.Howells@nexor.co.uk> + *	Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com> + *	Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com> + *	Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com> + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/elf.h> +#include <linux/elfcore.h> +#include <linux/vmalloc.h> +#include <linux/highmem.h> +#include <linux/init.h> +#include <asm/uaccess.h> +#include <asm/io.h> + + +static int open_kcore(struct inode * inode, struct file * filp) +{ +	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +} + +static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); + +struct file_operations proc_kcore_operations = { +	.read		= read_kcore, +	.open		= open_kcore, +}; + +#ifndef kc_vaddr_to_offset +#define	kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET) +#endif +#ifndef	kc_offset_to_vaddr +#define	kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET) +#endif + +#define roundup(x, y)  ((((x)+((y)-1))/(y))*(y)) + +/* An ELF note in memory */ +struct memelfnote +{ +	const char *name; +	int type; +	unsigned int datasz; +	void *data; +}; + +static struct kcore_list *kclist; +static DEFINE_RWLOCK(kclist_lock); + +void +kclist_add(struct kcore_list *new, void *addr, size_t size) +{ +	new->addr = (unsigned long)addr; +	new->size = size; + +	write_lock(&kclist_lock); +	new->next = kclist; +	kclist = new; +	write_unlock(&kclist_lock); +} + +static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) +{ +	size_t try, size; +	struct kcore_list *m; + +	*nphdr = 1; /* PT_NOTE */ +	size = 0; + +	for (m=kclist; m; m=m->next) { +		try = kc_vaddr_to_offset((size_t)m->addr + m->size); +		if (try > size) +			size = try; +		*nphdr = *nphdr + 1; +	} +	*elf_buflen =	sizeof(struct elfhdr) +  +			(*nphdr + 2)*sizeof(struct elf_phdr) +  +			3 * (sizeof(struct elf_note) + 4) + +			sizeof(struct elf_prstatus) + +			sizeof(struct elf_prpsinfo) + +			sizeof(struct task_struct); +	*elf_buflen = PAGE_ALIGN(*elf_buflen); +	return size + *elf_buflen; +} + + +/*****************************************************************************/ +/* + * determine size of ELF note + */ +static int notesize(struct memelfnote *en) +{ +	int sz; + +	sz = sizeof(struct elf_note); +	sz += roundup(strlen(en->name), 4); +	sz += roundup(en->datasz, 4); + +	return sz; +} /* end notesize() */ + +/*****************************************************************************/ +/* + * store a note in the header buffer + */ +static char *storenote(struct memelfnote *men, char *bufp) +{ +	struct elf_note en; + +#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0) + +	en.n_namesz = strlen(men->name); +	en.n_descsz = men->datasz; +	en.n_type = men->type; + +	DUMP_WRITE(&en, sizeof(en)); +	DUMP_WRITE(men->name, en.n_namesz); + +	/* XXX - cast from long long to long to avoid need for libgcc.a */ +	bufp = (char*) roundup((unsigned long)bufp,4); +	DUMP_WRITE(men->data, men->datasz); +	bufp = (char*) roundup((unsigned long)bufp,4); + +#undef DUMP_WRITE + +	return bufp; +} /* end storenote() */ + +/* + * store an ELF coredump header in the supplied buffer + * nphdr is the number of elf_phdr to insert + */ +static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) +{ +	struct elf_prstatus prstatus;	/* NT_PRSTATUS */ +	struct elf_prpsinfo prpsinfo;	/* NT_PRPSINFO */ +	struct elf_phdr *nhdr, *phdr; +	struct elfhdr *elf; +	struct memelfnote notes[3]; +	off_t offset = 0; +	struct kcore_list *m; + +	/* setup ELF header */ +	elf = (struct elfhdr *) bufp; +	bufp += sizeof(struct elfhdr); +	offset += sizeof(struct elfhdr); +	memcpy(elf->e_ident, ELFMAG, SELFMAG); +	elf->e_ident[EI_CLASS]	= ELF_CLASS; +	elf->e_ident[EI_DATA]	= ELF_DATA; +	elf->e_ident[EI_VERSION]= EV_CURRENT; +	elf->e_ident[EI_OSABI] = ELF_OSABI; +	memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); +	elf->e_type	= ET_CORE; +	elf->e_machine	= ELF_ARCH; +	elf->e_version	= EV_CURRENT; +	elf->e_entry	= 0; +	elf->e_phoff	= sizeof(struct elfhdr); +	elf->e_shoff	= 0; +#if defined(CONFIG_H8300) +	elf->e_flags	= ELF_FLAGS; +#else +	elf->e_flags	= 0; +#endif +	elf->e_ehsize	= sizeof(struct elfhdr); +	elf->e_phentsize= sizeof(struct elf_phdr); +	elf->e_phnum	= nphdr; +	elf->e_shentsize= 0; +	elf->e_shnum	= 0; +	elf->e_shstrndx	= 0; + +	/* setup ELF PT_NOTE program header */ +	nhdr = (struct elf_phdr *) bufp; +	bufp += sizeof(struct elf_phdr); +	offset += sizeof(struct elf_phdr); +	nhdr->p_type	= PT_NOTE; +	nhdr->p_offset	= 0; +	nhdr->p_vaddr	= 0; +	nhdr->p_paddr	= 0; +	nhdr->p_filesz	= 0; +	nhdr->p_memsz	= 0; +	nhdr->p_flags	= 0; +	nhdr->p_align	= 0; + +	/* setup ELF PT_LOAD program header for every area */ +	for (m=kclist; m; m=m->next) { +		phdr = (struct elf_phdr *) bufp; +		bufp += sizeof(struct elf_phdr); +		offset += sizeof(struct elf_phdr); + +		phdr->p_type	= PT_LOAD; +		phdr->p_flags	= PF_R|PF_W|PF_X; +		phdr->p_offset	= kc_vaddr_to_offset(m->addr) + dataoff; +		phdr->p_vaddr	= (size_t)m->addr; +		phdr->p_paddr	= 0; +		phdr->p_filesz	= phdr->p_memsz	= m->size; +		phdr->p_align	= PAGE_SIZE; +	} + +	/* +	 * Set up the notes in similar form to SVR4 core dumps made +	 * with info from their /proc. +	 */ +	nhdr->p_offset	= offset; + +	/* set up the process status */ +	notes[0].name = "CORE"; +	notes[0].type = NT_PRSTATUS; +	notes[0].datasz = sizeof(struct elf_prstatus); +	notes[0].data = &prstatus; + +	memset(&prstatus, 0, sizeof(struct elf_prstatus)); + +	nhdr->p_filesz	= notesize(¬es[0]); +	bufp = storenote(¬es[0], bufp); + +	/* set up the process info */ +	notes[1].name	= "CORE"; +	notes[1].type	= NT_PRPSINFO; +	notes[1].datasz	= sizeof(struct elf_prpsinfo); +	notes[1].data	= &prpsinfo; + +	memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo)); +	prpsinfo.pr_state	= 0; +	prpsinfo.pr_sname	= 'R'; +	prpsinfo.pr_zomb	= 0; + +	strcpy(prpsinfo.pr_fname, "vmlinux"); +	strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ); + +	nhdr->p_filesz	+= notesize(¬es[1]); +	bufp = storenote(¬es[1], bufp); + +	/* set up the task structure */ +	notes[2].name	= "CORE"; +	notes[2].type	= NT_TASKSTRUCT; +	notes[2].datasz	= sizeof(struct task_struct); +	notes[2].data	= current; + +	nhdr->p_filesz	+= notesize(¬es[2]); +	bufp = storenote(¬es[2], bufp); + +} /* end elf_kcore_store_hdr() */ + +/*****************************************************************************/ +/* + * read from the ELF header and then kernel memory + */ +static ssize_t +read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) +{ +	ssize_t acc = 0; +	size_t size, tsz; +	size_t elf_buflen; +	int nphdr; +	unsigned long start; + +	read_lock(&kclist_lock); +	proc_root_kcore->size = size = get_kcore_size(&nphdr, &elf_buflen); +	if (buflen == 0 || *fpos >= size) { +		read_unlock(&kclist_lock); +		return 0; +	} + +	/* trim buflen to not go beyond EOF */ +	if (buflen > size - *fpos) +		buflen = size - *fpos; + +	/* construct an ELF core header if we'll need some of it */ +	if (*fpos < elf_buflen) { +		char * elf_buf; + +		tsz = elf_buflen - *fpos; +		if (buflen < tsz) +			tsz = buflen; +		elf_buf = kmalloc(elf_buflen, GFP_ATOMIC); +		if (!elf_buf) { +			read_unlock(&kclist_lock); +			return -ENOMEM; +		} +		memset(elf_buf, 0, elf_buflen); +		elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen); +		read_unlock(&kclist_lock); +		if (copy_to_user(buffer, elf_buf + *fpos, tsz)) { +			kfree(elf_buf); +			return -EFAULT; +		} +		kfree(elf_buf); +		buflen -= tsz; +		*fpos += tsz; +		buffer += tsz; +		acc += tsz; + +		/* leave now if filled buffer already */ +		if (buflen == 0) +			return acc; +	} else +		read_unlock(&kclist_lock); + +	/* +	 * Check to see if our file offset matches with any of +	 * the addresses in the elf_phdr on our list. +	 */ +	start = kc_offset_to_vaddr(*fpos - elf_buflen); +	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) +		tsz = buflen; +		 +	while (buflen) { +		struct kcore_list *m; + +		read_lock(&kclist_lock); +		for (m=kclist; m; m=m->next) { +			if (start >= m->addr && start < (m->addr+m->size)) +				break; +		} +		read_unlock(&kclist_lock); + +		if (m == NULL) { +			if (clear_user(buffer, tsz)) +				return -EFAULT; +		} else if ((start >= VMALLOC_START) && (start < VMALLOC_END)) { +			char * elf_buf; +			struct vm_struct *m; +			unsigned long curstart = start; +			unsigned long cursize = tsz; + +			elf_buf = kmalloc(tsz, GFP_KERNEL); +			if (!elf_buf) +				return -ENOMEM; +			memset(elf_buf, 0, tsz); + +			read_lock(&vmlist_lock); +			for (m=vmlist; m && cursize; m=m->next) { +				unsigned long vmstart; +				unsigned long vmsize; +				unsigned long msize = m->size - PAGE_SIZE; + +				if (((unsigned long)m->addr + msize) <  +								curstart) +					continue; +				if ((unsigned long)m->addr > (curstart +  +								cursize)) +					break; +				vmstart = (curstart < (unsigned long)m->addr ?  +					(unsigned long)m->addr : curstart); +				if (((unsigned long)m->addr + msize) >  +							(curstart + cursize)) +					vmsize = curstart + cursize - vmstart; +				else +					vmsize = (unsigned long)m->addr +  +							msize - vmstart; +				curstart = vmstart + vmsize; +				cursize -= vmsize; +				/* don't dump ioremap'd stuff! (TA) */ +				if (m->flags & VM_IOREMAP) +					continue; +				memcpy(elf_buf + (vmstart - start), +					(char *)vmstart, vmsize); +			} +			read_unlock(&vmlist_lock); +			if (copy_to_user(buffer, elf_buf, tsz)) { +				kfree(elf_buf); +				return -EFAULT; +			} +			kfree(elf_buf); +		} else { +			if (kern_addr_valid(start)) { +				unsigned long n; + +				n = copy_to_user(buffer, (char *)start, tsz); +				/* +				 * We cannot distingush between fault on source +				 * and fault on destination. When this happens +				 * we clear too and hope it will trigger the +				 * EFAULT again. +				 */ +				if (n) {  +					if (clear_user(buffer + tsz - n, +								tsz - n)) +						return -EFAULT; +				} +			} else { +				if (clear_user(buffer, tsz)) +					return -EFAULT; +			} +		} +		buflen -= tsz; +		*fpos += tsz; +		buffer += tsz; +		acc += tsz; +		start += tsz; +		tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); +	} + +	return acc; +} diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c new file mode 100644 index 00000000000..10d37bf2520 --- /dev/null +++ b/fs/proc/kmsg.c @@ -0,0 +1,55 @@ +/* + *  linux/fs/proc/kmsg.c + * + *  Copyright (C) 1992  by Linus Torvalds + * + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/poll.h> +#include <linux/fs.h> + +#include <asm/uaccess.h> +#include <asm/io.h> + +extern wait_queue_head_t log_wait; + +extern int do_syslog(int type, char __user *bug, int count); + +static int kmsg_open(struct inode * inode, struct file * file) +{ +	return do_syslog(1,NULL,0); +} + +static int kmsg_release(struct inode * inode, struct file * file) +{ +	(void) do_syslog(0,NULL,0); +	return 0; +} + +static ssize_t kmsg_read(struct file *file, char __user *buf, +			 size_t count, loff_t *ppos) +{ +	if ((file->f_flags & O_NONBLOCK) && !do_syslog(9, NULL, 0)) +		return -EAGAIN; +	return do_syslog(2, buf, count); +} + +static unsigned int kmsg_poll(struct file *file, poll_table *wait) +{ +	poll_wait(file, &log_wait, wait); +	if (do_syslog(9, NULL, 0)) +		return POLLIN | POLLRDNORM; +	return 0; +} + + +struct file_operations proc_kmsg_operations = { +	.read		= kmsg_read, +	.poll		= kmsg_poll, +	.open		= kmsg_open, +	.release	= kmsg_release, +}; diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c new file mode 100644 index 00000000000..a7041038ad5 --- /dev/null +++ b/fs/proc/mmu.c @@ -0,0 +1,67 @@ +/* mmu.c: mmu memory info files + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +void get_vmalloc_info(struct vmalloc_info *vmi) +{ +	struct vm_struct *vma; +	unsigned long free_area_size; +	unsigned long prev_end; + +	vmi->used = 0; + +	if (!vmlist) { +		vmi->largest_chunk = VMALLOC_TOTAL; +	} +	else { +		vmi->largest_chunk = 0; + +		prev_end = VMALLOC_START; + +		read_lock(&vmlist_lock); + +		for (vma = vmlist; vma; vma = vma->next) { +			vmi->used += vma->size; + +			free_area_size = (unsigned long) vma->addr - prev_end; +			if (vmi->largest_chunk < free_area_size) +				vmi->largest_chunk = free_area_size; + +			prev_end = vma->size + (unsigned long) vma->addr; +		} + +		if (VMALLOC_END - prev_end > vmi->largest_chunk) +			vmi->largest_chunk = VMALLOC_END - prev_end; + +		read_unlock(&vmlist_lock); +	} +} diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c new file mode 100644 index 00000000000..f3bf016d5ee --- /dev/null +++ b/fs/proc/nommu.c @@ -0,0 +1,135 @@ +/* nommu.c: mmu-less memory info files + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/seq_file.h> +#include <linux/hugetlb.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +/* + * display a list of all the VMAs the kernel knows about + * - nommu kernals have a single flat list + */ +static int nommu_vma_list_show(struct seq_file *m, void *v) +{ +	struct vm_area_struct *vma; +	unsigned long ino = 0; +	struct file *file; +	dev_t dev = 0; +	int flags, len; + +	vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); + +	flags = vma->vm_flags; +	file = vma->vm_file; + +	if (file) { +		struct inode *inode = vma->vm_file->f_dentry->d_inode; +		dev = inode->i_sb->s_dev; +		ino = inode->i_ino; +	} + +	seq_printf(m, +		   "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", +		   vma->vm_start, +		   vma->vm_end, +		   flags & VM_READ ? 'r' : '-', +		   flags & VM_WRITE ? 'w' : '-', +		   flags & VM_EXEC ? 'x' : '-', +		   flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', +		   vma->vm_pgoff << PAGE_SHIFT, +		   MAJOR(dev), MINOR(dev), ino, &len); + +	if (file) { +		len = 25 + sizeof(void *) * 6 - len; +		if (len < 1) +			len = 1; +		seq_printf(m, "%*c", len, ' '); +		seq_path(m, file->f_vfsmnt, file->f_dentry, ""); +	} + +	seq_putc(m, '\n'); +	return 0; +} + +static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) +{ +	struct rb_node *_rb; +	loff_t pos = *_pos; +	void *next = NULL; + +	down_read(&nommu_vma_sem); + +	for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { +		if (pos == 0) { +			next = _rb; +			break; +		} +	} + +	return next; +} + +static void nommu_vma_list_stop(struct seq_file *m, void *v) +{ +	up_read(&nommu_vma_sem); +} + +static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) +{ +	(*pos)++; +	return rb_next((struct rb_node *) v); +} + +static struct seq_operations proc_nommu_vma_list_seqop = { +	.start	= nommu_vma_list_start, +	.next	= nommu_vma_list_next, +	.stop	= nommu_vma_list_stop, +	.show	= nommu_vma_list_show +}; + +static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &proc_nommu_vma_list_seqop); +} + +static struct file_operations proc_nommu_vma_list_operations = { +	.open    = proc_nommu_vma_list_open, +	.read    = seq_read, +	.llseek  = seq_lseek, +	.release = seq_release, +}; + +static int __init proc_nommu_init(void) +{ +	create_seq_entry("maps", S_IRUGO, &proc_nommu_vma_list_operations); +	return 0; +} + +module_init(proc_nommu_init); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c new file mode 100644 index 00000000000..67423c696c0 --- /dev/null +++ b/fs/proc/proc_devtree.c @@ -0,0 +1,165 @@ +/* + * proc_devtree.c - handles /proc/device-tree + * + * Copyright 1997 Paul Mackerras + */ +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/string.h> +#include <asm/prom.h> +#include <asm/uaccess.h> + +#ifndef HAVE_ARCH_DEVTREE_FIXUPS +static inline void set_node_proc_entry(struct device_node *np, struct proc_dir_entry *de) +{ +} + +static void inline set_node_name_link(struct device_node *np, struct proc_dir_entry *de) +{ +} + +static void inline set_node_addr_link(struct device_node *np, struct proc_dir_entry *de) +{ +} +#endif + +static struct proc_dir_entry *proc_device_tree; + +/* + * Supply data on a read from /proc/device-tree/node/property. + */ +static int property_read_proc(char *page, char **start, off_t off, +			      int count, int *eof, void *data) +{ +	struct property *pp = data; +	int n; + +	if (off >= pp->length) { +		*eof = 1; +		return 0; +	} +	n = pp->length - off; +	if (n > count) +		n = count; +	else +		*eof = 1; +	memcpy(page, pp->value + off, n); +	*start = page; +	return n; +} + +/* + * For a node with a name like "gc@10", we make symlinks called "gc" + * and "@10" to it. + */ + +/* + * Process a node, adding entries for its children and its properties. + */ +void proc_device_tree_add_node(struct device_node *np, struct proc_dir_entry *de) +{ +	struct property *pp; +	struct proc_dir_entry *ent; +	struct device_node *child, *sib; +	const char *p, *at; +	int l; +	struct proc_dir_entry *list, **lastp, *al; + +	set_node_proc_entry(np, de); +	lastp = &list; +	for (pp = np->properties; pp != 0; pp = pp->next) { +		/* +		 * Unfortunately proc_register puts each new entry +		 * at the beginning of the list.  So we rearrange them. +		 */ +		ent = create_proc_read_entry(pp->name, strncmp(pp->name, "security-", 9) ? +					     S_IRUGO : S_IRUSR, de, property_read_proc, pp); +		if (ent == 0) +			break; +		if (!strncmp(pp->name, "security-", 9)) +		     ent->size = 0; /* don't leak number of password chars */ +		else +		     ent->size = pp->length; +		*lastp = ent; +		lastp = &ent->next; +	} +	child = NULL; +	while ((child = of_get_next_child(np, child))) { +		p = strrchr(child->full_name, '/'); +		if (!p) +			p = child->full_name; +		else +			++p; +		/* chop off '@0' if the name ends with that */ +		l = strlen(p); +		if (l > 2 && p[l-2] == '@' && p[l-1] == '0') +			l -= 2; +		ent = proc_mkdir(p, de); +		if (ent == 0) +			break; +		*lastp = ent; +		lastp = &ent->next; +		proc_device_tree_add_node(child, ent); + +		/* +		 * If we left the address part on the name, consider +		 * adding symlinks from the name and address parts. +		 */ +		if (p[l] != 0 || (at = strchr(p, '@')) == 0) +			continue; + +		/* +		 * If this is the first node with a given name property, +		 * add a symlink with the name property as its name. +		 */ +		sib = NULL; +		while ((sib = of_get_next_child(np, sib)) && sib != child) +			if (sib->name && strcmp(sib->name, child->name) == 0) +				break; +		if (sib == child && strncmp(p, child->name, l) != 0) { +			al = proc_symlink(child->name, de, ent->name); +			if (al == 0) { +				of_node_put(sib); +				break; +			} +			set_node_name_link(child, al); +			*lastp = al; +			lastp = &al->next; +		} +		of_node_put(sib); +		/* +		 * Add another directory with the @address part as its name. +		 */ +		al = proc_symlink(at, de, ent->name); +		if (al == 0) +			break; +		set_node_addr_link(child, al); +		*lastp = al; +		lastp = &al->next; +	} +	of_node_put(child); +	*lastp = NULL; +	de->subdir = list; +} + +/* + * Called on initialization to set up the /proc/device-tree subtree + */ +void proc_device_tree_init(void) +{ +	struct device_node *root; +	if ( !have_of ) +		return; +	proc_device_tree = proc_mkdir("device-tree", NULL); +	if (proc_device_tree == 0) +		return; +	root = of_find_node_by_path("/"); +	if (root == 0) { +		printk(KERN_ERR "/proc/device-tree: can't find root\n"); +		return; +	} +	proc_device_tree_add_node(root, proc_device_tree); +	of_node_put(root); +} diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c new file mode 100644 index 00000000000..1d75d6ab689 --- /dev/null +++ b/fs/proc/proc_misc.c @@ -0,0 +1,615 @@ +/* + *  linux/fs/proc/proc_misc.c + * + *  linux/fs/proc/array.c + *  Copyright (C) 1992  by Linus Torvalds + *  based on ideas by Darren Senn + * + *  This used to be the part of array.c. See the rest of history and credits + *  there. I took this into a separate file and switched the thing to generic + *  proc_file_inode_operations, leaving in array.c only per-process stuff. + *  Inumbers allocation made dynamic (via create_proc_entry()).  AV, May 1999. + * + * Changes: + * Fulton Green      :  Encapsulated position metric calculations. + *			<kernel@FultonGreen.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/ioport.h> +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/smp_lock.h> +#include <linux/seq_file.h> +#include <linux/times.h> +#include <linux/profile.h> +#include <linux/blkdev.h> +#include <linux/hugetlb.h> +#include <linux/jiffies.h> +#include <linux/sysrq.h> +#include <linux/vmalloc.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/tlb.h> +#include <asm/div64.h> +#include "internal.h" + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) +/* + * Warning: stuff below (imported functions) assumes that its output will fit + * into one page. For some of those functions it may be wrong. Moreover, we + * have a way to deal with that gracefully. Right now I used straightforward + * wrappers, but this needs further analysis wrt potential overflows. + */ +extern int get_hardware_list(char *); +extern int get_stram_list(char *); +extern int get_chrdev_list(char *); +extern int get_filesystem_list(char *); +extern int get_exec_domain_list(char *); +extern int get_dma_list(char *); +extern int get_locks_status (char *, char **, off_t, int); + +static int proc_calc_metrics(char *page, char **start, off_t off, +				 int count, int *eof, int len) +{ +	if (len <= off+count) *eof = 1; +	*start = page + off; +	len -= off; +	if (len>count) len = count; +	if (len<0) len = 0; +	return len; +} + +static int loadavg_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int a, b, c; +	int len; + +	a = avenrun[0] + (FIXED_1/200); +	b = avenrun[1] + (FIXED_1/200); +	c = avenrun[2] + (FIXED_1/200); +	len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", +		LOAD_INT(a), LOAD_FRAC(a), +		LOAD_INT(b), LOAD_FRAC(b), +		LOAD_INT(c), LOAD_FRAC(c), +		nr_running(), nr_threads, last_pid); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int uptime_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	struct timespec uptime; +	struct timespec idle; +	int len; +	cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + +	do_posix_clock_monotonic_gettime(&uptime); +	cputime_to_timespec(idletime, &idle); +	len = sprintf(page,"%lu.%02lu %lu.%02lu\n", +			(unsigned long) uptime.tv_sec, +			(uptime.tv_nsec / (NSEC_PER_SEC / 100)), +			(unsigned long) idle.tv_sec, +			(idle.tv_nsec / (NSEC_PER_SEC / 100))); + +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int meminfo_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	struct sysinfo i; +	int len; +	struct page_state ps; +	unsigned long inactive; +	unsigned long active; +	unsigned long free; +	unsigned long committed; +	unsigned long allowed; +	struct vmalloc_info vmi; + +	get_page_state(&ps); +	get_zone_counts(&active, &inactive, &free); + +/* + * display in kilobytes. + */ +#define K(x) ((x) << (PAGE_SHIFT - 10)) +	si_meminfo(&i); +	si_swapinfo(&i); +	committed = atomic_read(&vm_committed_space); +	allowed = ((totalram_pages - hugetlb_total_pages()) +		* sysctl_overcommit_ratio / 100) + total_swap_pages; + +	get_vmalloc_info(&vmi); + +	/* +	 * Tagged format, for easy grepping and expansion. +	 */ +	len = sprintf(page, +		"MemTotal:     %8lu kB\n" +		"MemFree:      %8lu kB\n" +		"Buffers:      %8lu kB\n" +		"Cached:       %8lu kB\n" +		"SwapCached:   %8lu kB\n" +		"Active:       %8lu kB\n" +		"Inactive:     %8lu kB\n" +		"HighTotal:    %8lu kB\n" +		"HighFree:     %8lu kB\n" +		"LowTotal:     %8lu kB\n" +		"LowFree:      %8lu kB\n" +		"SwapTotal:    %8lu kB\n" +		"SwapFree:     %8lu kB\n" +		"Dirty:        %8lu kB\n" +		"Writeback:    %8lu kB\n" +		"Mapped:       %8lu kB\n" +		"Slab:         %8lu kB\n" +		"CommitLimit:  %8lu kB\n" +		"Committed_AS: %8lu kB\n" +		"PageTables:   %8lu kB\n" +		"VmallocTotal: %8lu kB\n" +		"VmallocUsed:  %8lu kB\n" +		"VmallocChunk: %8lu kB\n", +		K(i.totalram), +		K(i.freeram), +		K(i.bufferram), +		K(get_page_cache_size()-total_swapcache_pages-i.bufferram), +		K(total_swapcache_pages), +		K(active), +		K(inactive), +		K(i.totalhigh), +		K(i.freehigh), +		K(i.totalram-i.totalhigh), +		K(i.freeram-i.freehigh), +		K(i.totalswap), +		K(i.freeswap), +		K(ps.nr_dirty), +		K(ps.nr_writeback), +		K(ps.nr_mapped), +		K(ps.nr_slab), +		K(allowed), +		K(committed), +		K(ps.nr_page_table_pages), +		(unsigned long)VMALLOC_TOTAL >> 10, +		vmi.used >> 10, +		vmi.largest_chunk >> 10 +		); + +		len += hugetlb_report_meminfo(page + len); + +	return proc_calc_metrics(page, start, off, count, eof, len); +#undef K +} + +extern struct seq_operations fragmentation_op; +static int fragmentation_open(struct inode *inode, struct file *file) +{ +	(void)inode; +	return seq_open(file, &fragmentation_op); +} + +static struct file_operations fragmentation_file_operations = { +	.open		= fragmentation_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +static int version_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len; + +	strcpy(page, linux_banner); +	len = strlen(page); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +extern struct seq_operations cpuinfo_op; +static int cpuinfo_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &cpuinfo_op); +} +static struct file_operations proc_cpuinfo_operations = { +	.open		= cpuinfo_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +extern struct seq_operations vmstat_op; +static int vmstat_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &vmstat_op); +} +static struct file_operations proc_vmstat_file_operations = { +	.open		= vmstat_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +#ifdef CONFIG_PROC_HARDWARE +static int hardware_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_hardware_list(page); +	return proc_calc_metrics(page, start, off, count, eof, len); +} +#endif + +#ifdef CONFIG_STRAM_PROC +static int stram_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_stram_list(page); +	return proc_calc_metrics(page, start, off, count, eof, len); +} +#endif + +extern struct seq_operations partitions_op; +static int partitions_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &partitions_op); +} +static struct file_operations proc_partitions_operations = { +	.open		= partitions_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +extern struct seq_operations diskstats_op; +static int diskstats_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &diskstats_op); +} +static struct file_operations proc_diskstats_operations = { +	.open		= diskstats_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +#ifdef CONFIG_MODULES +extern struct seq_operations modules_op; +static int modules_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &modules_op); +} +static struct file_operations proc_modules_operations = { +	.open		= modules_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; +#endif + +extern struct seq_operations slabinfo_op; +extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *); +static int slabinfo_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &slabinfo_op); +} +static struct file_operations proc_slabinfo_operations = { +	.open		= slabinfo_open, +	.read		= seq_read, +	.write		= slabinfo_write, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +static int show_stat(struct seq_file *p, void *v) +{ +	int i; +	unsigned long jif; +	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; +	u64 sum = 0; + +	user = nice = system = idle = iowait = +		irq = softirq = steal = cputime64_zero; +	jif = - wall_to_monotonic.tv_sec; +	if (wall_to_monotonic.tv_nsec) +		--jif; + +	for_each_cpu(i) { +		int j; + +		user = cputime64_add(user, kstat_cpu(i).cpustat.user); +		nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); +		system = cputime64_add(system, kstat_cpu(i).cpustat.system); +		idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); +		iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); +		irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); +		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); +		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); +		for (j = 0 ; j < NR_IRQS ; j++) +			sum += kstat_cpu(i).irqs[j]; +	} + +	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu\n", +		(unsigned long long)cputime64_to_clock_t(user), +		(unsigned long long)cputime64_to_clock_t(nice), +		(unsigned long long)cputime64_to_clock_t(system), +		(unsigned long long)cputime64_to_clock_t(idle), +		(unsigned long long)cputime64_to_clock_t(iowait), +		(unsigned long long)cputime64_to_clock_t(irq), +		(unsigned long long)cputime64_to_clock_t(softirq), +		(unsigned long long)cputime64_to_clock_t(steal)); +	for_each_online_cpu(i) { + +		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */ +		user = kstat_cpu(i).cpustat.user; +		nice = kstat_cpu(i).cpustat.nice; +		system = kstat_cpu(i).cpustat.system; +		idle = kstat_cpu(i).cpustat.idle; +		iowait = kstat_cpu(i).cpustat.iowait; +		irq = kstat_cpu(i).cpustat.irq; +		softirq = kstat_cpu(i).cpustat.softirq; +		steal = kstat_cpu(i).cpustat.steal; +		seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n", +			i, +			(unsigned long long)cputime64_to_clock_t(user), +			(unsigned long long)cputime64_to_clock_t(nice), +			(unsigned long long)cputime64_to_clock_t(system), +			(unsigned long long)cputime64_to_clock_t(idle), +			(unsigned long long)cputime64_to_clock_t(iowait), +			(unsigned long long)cputime64_to_clock_t(irq), +			(unsigned long long)cputime64_to_clock_t(softirq), +			(unsigned long long)cputime64_to_clock_t(steal)); +	} +	seq_printf(p, "intr %llu", (unsigned long long)sum); + +#if !defined(CONFIG_PPC64) && !defined(CONFIG_ALPHA) +	for (i = 0; i < NR_IRQS; i++) +		seq_printf(p, " %u", kstat_irqs(i)); +#endif + +	seq_printf(p, +		"\nctxt %llu\n" +		"btime %lu\n" +		"processes %lu\n" +		"procs_running %lu\n" +		"procs_blocked %lu\n", +		nr_context_switches(), +		(unsigned long)jif, +		total_forks, +		nr_running(), +		nr_iowait()); + +	return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ +	unsigned size = 4096 * (1 + num_possible_cpus() / 32); +	char *buf; +	struct seq_file *m; +	int res; + +	/* don't ask for more than the kmalloc() max size, currently 128 KB */ +	if (size > 128 * 1024) +		size = 128 * 1024; +	buf = kmalloc(size, GFP_KERNEL); +	if (!buf) +		return -ENOMEM; + +	res = single_open(file, show_stat, NULL); +	if (!res) { +		m = file->private_data; +		m->buf = buf; +		m->size = size; +	} else +		kfree(buf); +	return res; +} +static struct file_operations proc_stat_operations = { +	.open		= stat_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= single_release, +}; + +static int devices_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_chrdev_list(page); +	len += get_blkdev_list(page+len); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +/* + * /proc/interrupts + */ +static void *int_seq_start(struct seq_file *f, loff_t *pos) +{ +	return (*pos <= NR_IRQS) ? pos : NULL; +} + +static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) +{ +	(*pos)++; +	if (*pos > NR_IRQS) +		return NULL; +	return pos; +} + +static void int_seq_stop(struct seq_file *f, void *v) +{ +	/* Nothing to do */ +} + + +extern int show_interrupts(struct seq_file *f, void *v); /* In arch code */ +static struct seq_operations int_seq_ops = { +	.start = int_seq_start, +	.next  = int_seq_next, +	.stop  = int_seq_stop, +	.show  = show_interrupts +}; + +static int interrupts_open(struct inode *inode, struct file *filp) +{ +	return seq_open(filp, &int_seq_ops); +} + +static struct file_operations proc_interrupts_operations = { +	.open		= interrupts_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +static int filesystems_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_filesystem_list(page); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int cmdline_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len; + +	len = sprintf(page, "%s\n", saved_command_line); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int locks_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_locks_status(page, start, off, count); + +	if (len < count) +		*eof = 1; +	return len; +} + +static int execdomains_read_proc(char *page, char **start, off_t off, +				 int count, int *eof, void *data) +{ +	int len = get_exec_domain_list(page); +	return proc_calc_metrics(page, start, off, count, eof, len); +} + +#ifdef CONFIG_MAGIC_SYSRQ +/* + * writing 'C' to /proc/sysrq-trigger is like sysrq-C + */ +static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, +				   size_t count, loff_t *ppos) +{ +	if (count) { +		char c; + +		if (get_user(c, buf)) +			return -EFAULT; +		__handle_sysrq(c, NULL, NULL, 0); +	} +	return count; +} + +static struct file_operations proc_sysrq_trigger_operations = { +	.write		= write_sysrq_trigger, +}; +#endif + +struct proc_dir_entry *proc_root_kcore; + +void create_seq_entry(char *name, mode_t mode, struct file_operations *f) +{ +	struct proc_dir_entry *entry; +	entry = create_proc_entry(name, mode, NULL); +	if (entry) +		entry->proc_fops = f; +} + +void __init proc_misc_init(void) +{ +	struct proc_dir_entry *entry; +	static struct { +		char *name; +		int (*read_proc)(char*,char**,off_t,int,int*,void*); +	} *p, simple_ones[] = { +		{"loadavg",     loadavg_read_proc}, +		{"uptime",	uptime_read_proc}, +		{"meminfo",	meminfo_read_proc}, +		{"version",	version_read_proc}, +#ifdef CONFIG_PROC_HARDWARE +		{"hardware",	hardware_read_proc}, +#endif +#ifdef CONFIG_STRAM_PROC +		{"stram",	stram_read_proc}, +#endif +		{"devices",	devices_read_proc}, +		{"filesystems",	filesystems_read_proc}, +		{"cmdline",	cmdline_read_proc}, +		{"locks",	locks_read_proc}, +		{"execdomains",	execdomains_read_proc}, +		{NULL,} +	}; +	for (p = simple_ones; p->name; p++) +		create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); + +	proc_symlink("mounts", NULL, "self/mounts"); + +	/* And now for trickier ones */ +	entry = create_proc_entry("kmsg", S_IRUSR, &proc_root); +	if (entry) +		entry->proc_fops = &proc_kmsg_operations; +	create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations); +	create_seq_entry("partitions", 0, &proc_partitions_operations); +	create_seq_entry("stat", 0, &proc_stat_operations); +	create_seq_entry("interrupts", 0, &proc_interrupts_operations); +	create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations); +	create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations); +	create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations); +	create_seq_entry("diskstats", 0, &proc_diskstats_operations); +#ifdef CONFIG_MODULES +	create_seq_entry("modules", 0, &proc_modules_operations); +#endif +#ifdef CONFIG_SCHEDSTATS +	create_seq_entry("schedstat", 0, &proc_schedstat_operations); +#endif +#ifdef CONFIG_PROC_KCORE +	proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); +	if (proc_root_kcore) { +		proc_root_kcore->proc_fops = &proc_kcore_operations; +		proc_root_kcore->size = +				(size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; +	} +#endif +#ifdef CONFIG_MAGIC_SYSRQ +	entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL); +	if (entry) +		entry->proc_fops = &proc_sysrq_trigger_operations; +#endif +#ifdef CONFIG_PPC32 +	{ +		extern struct file_operations ppc_htab_operations; +		entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL); +		if (entry) +			entry->proc_fops = &ppc_htab_operations; +	} +#endif +} diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c new file mode 100644 index 00000000000..15c4455b09e --- /dev/null +++ b/fs/proc/proc_tty.c @@ -0,0 +1,242 @@ +/* + * proc_tty.c -- handles /proc/tty + * + * Copyright 1997, Theodore Ts'o + */ + +#include <asm/uaccess.h> + +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/tty.h> +#include <linux/seq_file.h> +#include <linux/bitops.h> + +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, +				int count, int *eof, void *data); + +/* + * The /proc/tty directory inodes... + */ +static struct proc_dir_entry *proc_tty_ldisc, *proc_tty_driver; + +/* + * This is the handler for /proc/tty/drivers + */ +static void show_tty_range(struct seq_file *m, struct tty_driver *p, +	dev_t from, int num) +{ +	seq_printf(m, "%-20s ", p->driver_name ? p->driver_name : "unknown"); +	seq_printf(m, "/dev/%-8s ", p->name); +	if (p->num > 1) { +		seq_printf(m, "%3d %d-%d ", MAJOR(from), MINOR(from), +			MINOR(from) + num - 1); +	} else { +		seq_printf(m, "%3d %7d ", MAJOR(from), MINOR(from)); +	} +	switch (p->type) { +	case TTY_DRIVER_TYPE_SYSTEM: +		seq_printf(m, "system"); +		if (p->subtype == SYSTEM_TYPE_TTY) +			seq_printf(m, ":/dev/tty"); +		else if (p->subtype == SYSTEM_TYPE_SYSCONS) +			seq_printf(m, ":console"); +		else if (p->subtype == SYSTEM_TYPE_CONSOLE) +			seq_printf(m, ":vtmaster"); +		break; +	case TTY_DRIVER_TYPE_CONSOLE: +		seq_printf(m, "console"); +		break; +	case TTY_DRIVER_TYPE_SERIAL: +		seq_printf(m, "serial"); +		break; +	case TTY_DRIVER_TYPE_PTY: +		if (p->subtype == PTY_TYPE_MASTER) +			seq_printf(m, "pty:master"); +		else if (p->subtype == PTY_TYPE_SLAVE) +			seq_printf(m, "pty:slave"); +		else +			seq_printf(m, "pty"); +		break; +	default: +		seq_printf(m, "type:%d.%d", p->type, p->subtype); +	} +	seq_putc(m, '\n'); +} + +static int show_tty_driver(struct seq_file *m, void *v) +{ +	struct tty_driver *p = v; +	dev_t from = MKDEV(p->major, p->minor_start); +	dev_t to = from + p->num; + +	if (&p->tty_drivers == tty_drivers.next) { +		/* pseudo-drivers first */ +		seq_printf(m, "%-20s /dev/%-8s ", "/dev/tty", "tty"); +		seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 0); +		seq_printf(m, "system:/dev/tty\n"); +		seq_printf(m, "%-20s /dev/%-8s ", "/dev/console", "console"); +		seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 1); +		seq_printf(m, "system:console\n"); +#ifdef CONFIG_UNIX98_PTYS +		seq_printf(m, "%-20s /dev/%-8s ", "/dev/ptmx", "ptmx"); +		seq_printf(m, "%3d %7d ", TTYAUX_MAJOR, 2); +		seq_printf(m, "system\n"); +#endif +#ifdef CONFIG_VT +		seq_printf(m, "%-20s /dev/%-8s ", "/dev/vc/0", "vc/0"); +		seq_printf(m, "%3d %7d ", TTY_MAJOR, 0); +		seq_printf(m, "system:vtmaster\n"); +#endif +	} + +	while (MAJOR(from) < MAJOR(to)) { +		dev_t next = MKDEV(MAJOR(from)+1, 0); +		show_tty_range(m, p, from, next - from); +		from = next; +	} +	if (from != to) +		show_tty_range(m, p, from, to - from); +	return 0; +} + +/* iterator */ +static void *t_start(struct seq_file *m, loff_t *pos) +{ +	struct list_head *p; +	loff_t l = *pos; +	list_for_each(p, &tty_drivers) +		if (!l--) +			return list_entry(p, struct tty_driver, tty_drivers); +	return NULL; +} + +static void *t_next(struct seq_file *m, void *v, loff_t *pos) +{ +	struct list_head *p = ((struct tty_driver *)v)->tty_drivers.next; +	(*pos)++; +	return p==&tty_drivers ? NULL : +			list_entry(p, struct tty_driver, tty_drivers); +} + +static void t_stop(struct seq_file *m, void *v) +{ +} + +static struct seq_operations tty_drivers_op = { +	.start	= t_start, +	.next	= t_next, +	.stop	= t_stop, +	.show	= show_tty_driver +}; + +static int tty_drivers_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &tty_drivers_op); +} + +static struct file_operations proc_tty_drivers_operations = { +	.open		= tty_drivers_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +}; + +/* + * This is the handler for /proc/tty/ldiscs + */ +static int tty_ldiscs_read_proc(char *page, char **start, off_t off, +				int count, int *eof, void *data) +{ +	int	i; +	int	len = 0; +	off_t	begin = 0; +	struct tty_ldisc *ld; +	 +	for (i=0; i < NR_LDISCS; i++) { +		ld = tty_ldisc_get(i); +		if (ld == NULL) +			continue; +		len += sprintf(page+len, "%-10s %2d\n", +			       ld->name ? ld->name : "???", i); +		tty_ldisc_put(i); +		if (len+begin > off+count) +			break; +		if (len+begin < off) { +			begin += len; +			len = 0; +		} +	} +	if (i >= NR_LDISCS) +		*eof = 1; +	if (off >= len+begin) +		return 0; +	*start = page + (off-begin); +	return ((count < begin+len-off) ? count : begin+len-off); +} + +/* + * This function is called by tty_register_driver() to handle + * registering the driver's /proc handler into /proc/tty/driver/<foo> + */ +void proc_tty_register_driver(struct tty_driver *driver) +{ +	struct proc_dir_entry *ent; +		 +	if ((!driver->read_proc && !driver->write_proc) || +	    !driver->driver_name || +	    driver->proc_entry) +		return; + +	ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver); +	if (!ent) +		return; +	ent->read_proc = driver->read_proc; +	ent->write_proc = driver->write_proc; +	ent->owner = driver->owner; +	ent->data = driver; + +	driver->proc_entry = ent; +} + +/* + * This function is called by tty_unregister_driver() + */ +void proc_tty_unregister_driver(struct tty_driver *driver) +{ +	struct proc_dir_entry *ent; + +	ent = driver->proc_entry; +	if (!ent) +		return; +		 +	remove_proc_entry(driver->driver_name, proc_tty_driver); +	 +	driver->proc_entry = NULL; +} + +/* + * Called by proc_root_init() to initialize the /proc/tty subtree + */ +void __init proc_tty_init(void) +{ +	struct proc_dir_entry *entry; +	if (!proc_mkdir("tty", NULL)) +		return; +	proc_tty_ldisc = proc_mkdir("tty/ldisc", NULL); +	/* +	 * /proc/tty/driver/serial reveals the exact character counts for +	 * serial links which is just too easy to abuse for inferring +	 * password lengths and inter-keystroke timings during password +	 * entry. +	 */ +	proc_tty_driver = proc_mkdir_mode("tty/driver", S_IRUSR | S_IXUSR, NULL); + +	create_proc_read_entry("tty/ldiscs", 0, NULL, tty_ldiscs_read_proc, NULL); +	entry = create_proc_entry("tty/drivers", 0, NULL); +	if (entry) +		entry->proc_fops = &proc_tty_drivers_operations; +} diff --git a/fs/proc/root.c b/fs/proc/root.c new file mode 100644 index 00000000000..aef148f099a --- /dev/null +++ b/fs/proc/root.c @@ -0,0 +1,161 @@ +/* + *  linux/fs/proc/root.c + * + *  Copyright (C) 1991, 1992 Linus Torvalds + * + *  proc root directory handling functions + */ + +#include <asm/uaccess.h> + +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/smp_lock.h> + +struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; + +#ifdef CONFIG_SYSCTL +struct proc_dir_entry *proc_sys_root; +#endif + +static struct super_block *proc_get_sb(struct file_system_type *fs_type, +	int flags, const char *dev_name, void *data) +{ +	return get_sb_single(fs_type, flags, data, proc_fill_super); +} + +static struct file_system_type proc_fs_type = { +	.name		= "proc", +	.get_sb		= proc_get_sb, +	.kill_sb	= kill_anon_super, +}; + +extern int __init proc_init_inodecache(void); +void __init proc_root_init(void) +{ +	int err = proc_init_inodecache(); +	if (err) +		return; +	err = register_filesystem(&proc_fs_type); +	if (err) +		return; +	proc_mnt = kern_mount(&proc_fs_type); +	err = PTR_ERR(proc_mnt); +	if (IS_ERR(proc_mnt)) { +		unregister_filesystem(&proc_fs_type); +		return; +	} +	proc_misc_init(); +	proc_net = proc_mkdir("net", NULL); +	proc_net_stat = proc_mkdir("net/stat", NULL); + +#ifdef CONFIG_SYSVIPC +	proc_mkdir("sysvipc", NULL); +#endif +#ifdef CONFIG_SYSCTL +	proc_sys_root = proc_mkdir("sys", NULL); +#endif +#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) +	proc_mkdir("sys/fs", NULL); +	proc_mkdir("sys/fs/binfmt_misc", NULL); +#endif +	proc_root_fs = proc_mkdir("fs", NULL); +	proc_root_driver = proc_mkdir("driver", NULL); +	proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ +#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) +	/* just give it a mountpoint */ +	proc_mkdir("openprom", NULL); +#endif +	proc_tty_init(); +#ifdef CONFIG_PROC_DEVICETREE +	proc_device_tree_init(); +#endif +	proc_bus = proc_mkdir("bus", NULL); +} + +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +{ +	/* +	 * nr_threads is actually protected by the tasklist_lock; +	 * however, it's conventional to do reads, especially for +	 * reporting, without any locking whatsoever. +	 */ +	if (dir->i_ino == PROC_ROOT_INO) /* check for safety... */ +		dir->i_nlink = proc_root.nlink + nr_threads; + +	if (!proc_lookup(dir, dentry, nd)) { +		return NULL; +	} +	 +	return proc_pid_lookup(dir, dentry, nd); +} + +static int proc_root_readdir(struct file * filp, +	void * dirent, filldir_t filldir) +{ +	unsigned int nr = filp->f_pos; +	int ret; + +	lock_kernel(); + +	if (nr < FIRST_PROCESS_ENTRY) { +		int error = proc_readdir(filp, dirent, filldir); +		if (error <= 0) { +			unlock_kernel(); +			return error; +		} +		filp->f_pos = FIRST_PROCESS_ENTRY; +	} +	unlock_kernel(); + +	ret = proc_pid_readdir(filp, dirent, filldir); +	return ret; +} + +/* + * The root /proc directory is special, as it has the + * <pid> directories. Thus we don't use the generic + * directory handling functions for that.. + */ +static struct file_operations proc_root_operations = { +	.read		 = generic_read_dir, +	.readdir	 = proc_root_readdir, +}; + +/* + * proc root can do almost nothing.. + */ +static struct inode_operations proc_root_inode_operations = { +	.lookup		= proc_root_lookup, +}; + +/* + * This is the root "inode" in the /proc tree.. + */ +struct proc_dir_entry proc_root = { +	.low_ino	= PROC_ROOT_INO,  +	.namelen	= 5,  +	.name		= "/proc", +	.mode		= S_IFDIR | S_IRUGO | S_IXUGO,  +	.nlink		= 2,  +	.proc_iops	= &proc_root_inode_operations,  +	.proc_fops	= &proc_root_operations, +	.parent		= &proc_root, +}; + +EXPORT_SYMBOL(proc_symlink); +EXPORT_SYMBOL(proc_mkdir); +EXPORT_SYMBOL(create_proc_entry); +EXPORT_SYMBOL(remove_proc_entry); +EXPORT_SYMBOL(proc_root); +EXPORT_SYMBOL(proc_root_fs); +EXPORT_SYMBOL(proc_net); +EXPORT_SYMBOL(proc_net_stat); +EXPORT_SYMBOL(proc_bus); +EXPORT_SYMBOL(proc_root_driver); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c new file mode 100644 index 00000000000..28b4a0253a9 --- /dev/null +++ b/fs/proc/task_mmu.c @@ -0,0 +1,235 @@ +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include <asm/elf.h> +#include <asm/uaccess.h> +#include "internal.h" + +char *task_mem(struct mm_struct *mm, char *buffer) +{ +	unsigned long data, text, lib; + +	data = mm->total_vm - mm->shared_vm - mm->stack_vm; +	text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; +	lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; +	buffer += sprintf(buffer, +		"VmSize:\t%8lu kB\n" +		"VmLck:\t%8lu kB\n" +		"VmRSS:\t%8lu kB\n" +		"VmData:\t%8lu kB\n" +		"VmStk:\t%8lu kB\n" +		"VmExe:\t%8lu kB\n" +		"VmLib:\t%8lu kB\n" +		"VmPTE:\t%8lu kB\n", +		(mm->total_vm - mm->reserved_vm) << (PAGE_SHIFT-10), +		mm->locked_vm << (PAGE_SHIFT-10), +		get_mm_counter(mm, rss) << (PAGE_SHIFT-10), +		data << (PAGE_SHIFT-10), +		mm->stack_vm << (PAGE_SHIFT-10), text, lib, +		(PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10); +	return buffer; +} + +unsigned long task_vsize(struct mm_struct *mm) +{ +	return PAGE_SIZE * mm->total_vm; +} + +int task_statm(struct mm_struct *mm, int *shared, int *text, +	       int *data, int *resident) +{ +	int rss = get_mm_counter(mm, rss); + +	*shared = rss - get_mm_counter(mm, anon_rss); +	*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) +								>> PAGE_SHIFT; +	*data = mm->total_vm - mm->shared_vm; +	*resident = rss; +	return mm->total_vm; +} + +int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ +	struct vm_area_struct * vma; +	int result = -ENOENT; +	struct task_struct *task = proc_task(inode); +	struct mm_struct * mm = get_task_mm(task); + +	if (!mm) +		goto out; +	down_read(&mm->mmap_sem); + +	vma = mm->mmap; +	while (vma) { +		if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) +			break; +		vma = vma->vm_next; +	} + +	if (vma) { +		*mnt = mntget(vma->vm_file->f_vfsmnt); +		*dentry = dget(vma->vm_file->f_dentry); +		result = 0; +	} + +	up_read(&mm->mmap_sem); +	mmput(mm); +out: +	return result; +} + +static void pad_len_spaces(struct seq_file *m, int len) +{ +	len = 25 + sizeof(void*) * 6 - len; +	if (len < 1) +		len = 1; +	seq_printf(m, "%*c", len, ' '); +} + +static int show_map(struct seq_file *m, void *v) +{ +	struct task_struct *task = m->private; +	struct vm_area_struct *map = v; +	struct mm_struct *mm = map->vm_mm; +	struct file *file = map->vm_file; +	int flags = map->vm_flags; +	unsigned long ino = 0; +	dev_t dev = 0; +	int len; + +	if (file) { +		struct inode *inode = map->vm_file->f_dentry->d_inode; +		dev = inode->i_sb->s_dev; +		ino = inode->i_ino; +	} + +	seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", +			map->vm_start, +			map->vm_end, +			flags & VM_READ ? 'r' : '-', +			flags & VM_WRITE ? 'w' : '-', +			flags & VM_EXEC ? 'x' : '-', +			flags & VM_MAYSHARE ? 's' : 'p', +			map->vm_pgoff << PAGE_SHIFT, +			MAJOR(dev), MINOR(dev), ino, &len); + +	/* +	 * Print the dentry name for named mappings, and a +	 * special [heap] marker for the heap: +	 */ +	if (map->vm_file) { +		pad_len_spaces(m, len); +		seq_path(m, file->f_vfsmnt, file->f_dentry, ""); +	} else { +		if (mm) { +			if (map->vm_start <= mm->start_brk && +						map->vm_end >= mm->brk) { +				pad_len_spaces(m, len); +				seq_puts(m, "[heap]"); +			} else { +				if (map->vm_start <= mm->start_stack && +					map->vm_end >= mm->start_stack) { + +					pad_len_spaces(m, len); +					seq_puts(m, "[stack]"); +				} +			} +		} else { +			pad_len_spaces(m, len); +			seq_puts(m, "[vdso]"); +		} +	} +	seq_putc(m, '\n'); +	if (m->count < m->size)  /* map is copied successfully */ +		m->version = (map != get_gate_vma(task))? map->vm_start: 0; +	return 0; +} + +static void *m_start(struct seq_file *m, loff_t *pos) +{ +	struct task_struct *task = m->private; +	unsigned long last_addr = m->version; +	struct mm_struct *mm; +	struct vm_area_struct *map, *tail_map; +	loff_t l = *pos; + +	/* +	 * We remember last_addr rather than next_addr to hit with +	 * mmap_cache most of the time. We have zero last_addr at +	 * the begining and also after lseek. We will have -1 last_addr +	 * after the end of the maps. +	 */ + +	if (last_addr == -1UL) +		return NULL; + +	mm = get_task_mm(task); +	if (!mm) +		return NULL; + +	tail_map = get_gate_vma(task); +	down_read(&mm->mmap_sem); + +	/* Start with last addr hint */ +	if (last_addr && (map = find_vma(mm, last_addr))) { +		map = map->vm_next; +		goto out; +	} + +	/* +	 * Check the map index is within the range and do +	 * sequential scan until m_index. +	 */ +	map = NULL; +	if ((unsigned long)l < mm->map_count) { +		map = mm->mmap; +		while (l-- && map) +			map = map->vm_next; +		goto out; +	} + +	if (l != mm->map_count) +		tail_map = NULL; /* After gate map */ + +out: +	if (map) +		return map; + +	/* End of maps has reached */ +	m->version = (tail_map != NULL)? 0: -1UL; +	up_read(&mm->mmap_sem); +	mmput(mm); +	return tail_map; +} + +static void m_stop(struct seq_file *m, void *v) +{ +	struct task_struct *task = m->private; +	struct vm_area_struct *map = v; +	if (map && map != get_gate_vma(task)) { +		struct mm_struct *mm = map->vm_mm; +		up_read(&mm->mmap_sem); +		mmput(mm); +	} +} + +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ +	struct task_struct *task = m->private; +	struct vm_area_struct *map = v; +	struct vm_area_struct *tail_map = get_gate_vma(task); + +	(*pos)++; +	if (map && (map != tail_map) && map->vm_next) +		return map->vm_next; +	m_stop(m, v); +	return (map != tail_map)? tail_map: NULL; +} + +struct seq_operations proc_pid_maps_op = { +	.start	= m_start, +	.next	= m_next, +	.stop	= m_stop, +	.show	= show_map +}; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c new file mode 100644 index 00000000000..8f68827ed10 --- /dev/null +++ b/fs/proc/task_nommu.c @@ -0,0 +1,164 @@ + +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/seq_file.h> +#include "internal.h" + +/* + * Logic: we've got two memory sums for each process, "shared", and + * "non-shared". Shared memory may get counted more then once, for + * each process that owns it. Non-shared memory is counted + * accurately. + */ +char *task_mem(struct mm_struct *mm, char *buffer) +{ +	struct vm_list_struct *vml; +	unsigned long bytes = 0, sbytes = 0, slack = 0; +         +	down_read(&mm->mmap_sem); +	for (vml = mm->context.vmlist; vml; vml = vml->next) { +		if (!vml->vma) +			continue; + +		bytes += kobjsize(vml); +		if (atomic_read(&mm->mm_count) > 1 || +		    atomic_read(&vml->vma->vm_usage) > 1 +		    ) { +			sbytes += kobjsize((void *) vml->vma->vm_start); +			sbytes += kobjsize(vml->vma); +		} else { +			bytes += kobjsize((void *) vml->vma->vm_start); +			bytes += kobjsize(vml->vma); +			slack += kobjsize((void *) vml->vma->vm_start) - +				(vml->vma->vm_end - vml->vma->vm_start); +		} +	} + +	if (atomic_read(&mm->mm_count) > 1) +		sbytes += kobjsize(mm); +	else +		bytes += kobjsize(mm); +	 +	if (current->fs && atomic_read(¤t->fs->count) > 1) +		sbytes += kobjsize(current->fs); +	else +		bytes += kobjsize(current->fs); + +	if (current->files && atomic_read(¤t->files->count) > 1) +		sbytes += kobjsize(current->files); +	else +		bytes += kobjsize(current->files); + +	if (current->sighand && atomic_read(¤t->sighand->count) > 1) +		sbytes += kobjsize(current->sighand); +	else +		bytes += kobjsize(current->sighand); + +	bytes += kobjsize(current); /* includes kernel stack */ + +	buffer += sprintf(buffer, +		"Mem:\t%8lu bytes\n" +		"Slack:\t%8lu bytes\n" +		"Shared:\t%8lu bytes\n", +		bytes, slack, sbytes); + +	up_read(&mm->mmap_sem); +	return buffer; +} + +unsigned long task_vsize(struct mm_struct *mm) +{ +	struct vm_list_struct *tbp; +	unsigned long vsize = 0; + +	down_read(&mm->mmap_sem); +	for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { +		if (tbp->vma) +			vsize += kobjsize((void *) tbp->vma->vm_start); +	} +	up_read(&mm->mmap_sem); +	return vsize; +} + +int task_statm(struct mm_struct *mm, int *shared, int *text, +	       int *data, int *resident) +{ +	struct vm_list_struct *tbp; +	int size = kobjsize(mm); + +	down_read(&mm->mmap_sem); +	for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { +		size += kobjsize(tbp); +		if (tbp->vma) { +			size += kobjsize(tbp->vma); +			size += kobjsize((void *) tbp->vma->vm_start); +		} +	} + +	size += (*text = mm->end_code - mm->start_code); +	size += (*data = mm->start_stack - mm->start_data); +	up_read(&mm->mmap_sem); +	*resident = size; +	return size; +} + +int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ +	struct vm_list_struct *vml; +	struct vm_area_struct *vma; +	struct task_struct *task = proc_task(inode); +	struct mm_struct *mm = get_task_mm(task); +	int result = -ENOENT; + +	if (!mm) +		goto out; +	down_read(&mm->mmap_sem); + +	vml = mm->context.vmlist; +	vma = NULL; +	while (vml) { +		if ((vml->vma->vm_flags & VM_EXECUTABLE) && vml->vma->vm_file) { +			vma = vml->vma; +			break; +		} +		vml = vml->next; +	} + +	if (vma) { +		*mnt = mntget(vma->vm_file->f_vfsmnt); +		*dentry = dget(vma->vm_file->f_dentry); +		result = 0; +	} + +	up_read(&mm->mmap_sem); +	mmput(mm); +out: +	return result; +} + +/* + * Albert D. Cahalan suggested to fake entries for the traditional + * sections here.  This might be worth investigating. + */ +static int show_map(struct seq_file *m, void *v) +{ +	return 0; +} +static void *m_start(struct seq_file *m, loff_t *pos) +{ +	return NULL; +} +static void m_stop(struct seq_file *m, void *v) +{ +} +static void *m_next(struct seq_file *m, void *v, loff_t *pos) +{ +	return NULL; +} +struct seq_operations proc_pid_maps_op = { +	.start	= m_start, +	.next	= m_next, +	.stop	= m_stop, +	.show	= show_map +};  |