diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/proc |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/Makefile | 14 | ||||
-rw-r--r-- | fs/proc/array.c | 484 | ||||
-rw-r--r-- | fs/proc/base.c | 2056 | ||||
-rw-r--r-- | fs/proc/generic.c | 705 | ||||
-rw-r--r-- | fs/proc/inode-alloc.txt | 14 | ||||
-rw-r--r-- | fs/proc/inode.c | 218 | ||||
-rw-r--r-- | fs/proc/internal.h | 48 | ||||
-rw-r--r-- | fs/proc/kcore.c | 404 | ||||
-rw-r--r-- | fs/proc/kmsg.c | 55 | ||||
-rw-r--r-- | fs/proc/mmu.c | 67 | ||||
-rw-r--r-- | fs/proc/nommu.c | 135 | ||||
-rw-r--r-- | fs/proc/proc_devtree.c | 165 | ||||
-rw-r--r-- | fs/proc/proc_misc.c | 615 | ||||
-rw-r--r-- | fs/proc/proc_tty.c | 242 | ||||
-rw-r--r-- | fs/proc/root.c | 161 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 235 | ||||
-rw-r--r-- | fs/proc/task_nommu.c | 164 |
17 files changed, 5782 insertions, 0 deletions
diff --git a/fs/proc/Makefile b/fs/proc/Makefile new file mode 100644 index 00000000000..738b9b60293 --- /dev/null +++ b/fs/proc/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for the Linux proc filesystem routines. +# + +obj-$(CONFIG_PROC_FS) += proc.o + +proc-y := nommu.o task_nommu.o +proc-$(CONFIG_MMU) := mmu.o task_mmu.o + +proc-y += inode.o root.o base.o generic.o array.o \ + kmsg.o proc_tty.o proc_misc.o + +proc-$(CONFIG_PROC_KCORE) += kcore.o +proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o diff --git a/fs/proc/array.c b/fs/proc/array.c new file mode 100644 index 00000000000..37668fe998a --- /dev/null +++ b/fs/proc/array.c @@ -0,0 +1,484 @@ +/* + * linux/fs/proc/array.c + * + * Copyright (C) 1992 by Linus Torvalds + * based on ideas by Darren Senn + * + * Fixes: + * Michael. K. Johnson: stat,statm extensions. + * <johnsonm@stolaf.edu> + * + * Pauline Middelink : Made cmdline,envline only break at '\0's, to + * make sure SET_PROCTITLE works. Also removed + * bad '!' which forced address recalculation for + * EVERY character on the current page. + * <middelin@polyware.iaf.nl> + * + * Danny ter Haar : added cpuinfo + * <dth@cistron.nl> + * + * Alessandro Rubini : profile extension. + * <rubini@ipvvis.unipv.it> + * + * Jeff Tranter : added BogoMips field to cpuinfo + * <Jeff_Tranter@Mitel.COM> + * + * Bruno Haible : remove 4K limit for the maps file + * <haible@ma2s2.mathematik.uni-karlsruhe.de> + * + * Yves Arrouye : remove removal of trailing spaces in get_array. + * <Yves.Arrouye@marin.fdn.fr> + * + * Jerome Forissier : added per-CPU time information to /proc/stat + * and /proc/<pid>/cpu extension + * <forissier@isia.cma.fr> + * - Incorporation and non-SMP safe operation + * of forissier patch in 2.1.78 by + * Hans Marcus <crowbar@concepts.nl> + * + * aeb@cwi.nl : /proc/partitions + * + * + * Alan Cox : security fixes. + * <Alan.Cox@linux.org> + * + * Al Viro : safe handling of mm_struct + * + * Gerhard Wichert : added BIGMEM support + * Siemens AG <Gerhard.Wichert@pdb.siemens.de> + * + * Al Viro & Jeff Garzik : moved most of the thing into base.c and + * : proc_misc.c. The rest may eventually go into + * : base.c too. + */ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/mman.h> +#include <linux/proc_fs.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/swap.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/signal.h> +#include <linux/highmem.h> +#include <linux/file.h> +#include <linux/times.h> +#include <linux/cpuset.h> + +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/processor.h> +#include "internal.h" + +/* Gcc optimizes away "strlen(x)" for constant x */ +#define ADDBUF(buffer, string) \ +do { memcpy(buffer, string, strlen(string)); \ + buffer += strlen(string); } while (0) + +static inline char * task_name(struct task_struct *p, char * buf) +{ + int i; + char * name; + char tcomm[sizeof(p->comm)]; + + get_task_comm(tcomm, p); + + ADDBUF(buf, "Name:\t"); + name = tcomm; + i = sizeof(tcomm); + do { + unsigned char c = *name; + name++; + i--; + *buf = c; + if (!c) + break; + if (c == '\\') { + buf[1] = c; + buf += 2; + continue; + } + if (c == '\n') { + buf[0] = '\\'; + buf[1] = 'n'; + buf += 2; + continue; + } + buf++; + } while (i); + *buf = '\n'; + return buf+1; +} + +/* + * The task state array is a strange "bitmap" of + * reasons to sleep. Thus "running" is zero, and + * you can test for combinations of others with + * simple bit tests. + */ +static const char *task_state_array[] = { + "R (running)", /* 0 */ + "S (sleeping)", /* 1 */ + "D (disk sleep)", /* 2 */ + "T (stopped)", /* 4 */ + "T (tracing stop)", /* 8 */ + "Z (zombie)", /* 16 */ + "X (dead)" /* 32 */ +}; + +static inline const char * get_task_state(struct task_struct *tsk) +{ + unsigned int state = (tsk->state & (TASK_RUNNING | + TASK_INTERRUPTIBLE | + TASK_UNINTERRUPTIBLE | + TASK_STOPPED | + TASK_TRACED)) | + (tsk->exit_state & (EXIT_ZOMBIE | + EXIT_DEAD)); + const char **p = &task_state_array[0]; + + while (state) { + p++; + state >>= 1; + } + return *p; +} + +static inline char * task_state(struct task_struct *p, char *buffer) +{ + struct group_info *group_info; + int g; + + read_lock(&tasklist_lock); + buffer += sprintf(buffer, + "State:\t%s\n" + "SleepAVG:\t%lu%%\n" + "Tgid:\t%d\n" + "Pid:\t%d\n" + "PPid:\t%d\n" + "TracerPid:\t%d\n" + "Uid:\t%d\t%d\t%d\t%d\n" + "Gid:\t%d\t%d\t%d\t%d\n", + get_task_state(p), + (p->sleep_avg/1024)*100/(1020000000/1024), + p->tgid, + p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0, + pid_alive(p) && p->ptrace ? p->parent->pid : 0, + p->uid, p->euid, p->suid, p->fsuid, + p->gid, p->egid, p->sgid, p->fsgid); + read_unlock(&tasklist_lock); + task_lock(p); + buffer += sprintf(buffer, + "FDSize:\t%d\n" + "Groups:\t", + p->files ? p->files->max_fds : 0); + + group_info = p->group_info; + get_group_info(group_info); + task_unlock(p); + + for (g = 0; g < min(group_info->ngroups,NGROUPS_SMALL); g++) + buffer += sprintf(buffer, "%d ", GROUP_AT(group_info,g)); + put_group_info(group_info); + + buffer += sprintf(buffer, "\n"); + return buffer; +} + +static char * render_sigset_t(const char *header, sigset_t *set, char *buffer) +{ + int i, len; + + len = strlen(header); + memcpy(buffer, header, len); + buffer += len; + + i = _NSIG; + do { + int x = 0; + + i -= 4; + if (sigismember(set, i+1)) x |= 1; + if (sigismember(set, i+2)) x |= 2; + if (sigismember(set, i+3)) x |= 4; + if (sigismember(set, i+4)) x |= 8; + *buffer++ = (x < 10 ? '0' : 'a' - 10) + x; + } while (i >= 4); + + *buffer++ = '\n'; + *buffer = 0; + return buffer; +} + +static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, + sigset_t *catch) +{ + struct k_sigaction *k; + int i; + + k = p->sighand->action; + for (i = 1; i <= _NSIG; ++i, ++k) { + if (k->sa.sa_handler == SIG_IGN) + sigaddset(ign, i); + else if (k->sa.sa_handler != SIG_DFL) + sigaddset(catch, i); + } +} + +static inline char * task_sig(struct task_struct *p, char *buffer) +{ + sigset_t pending, shpending, blocked, ignored, caught; + int num_threads = 0; + unsigned long qsize = 0; + unsigned long qlim = 0; + + sigemptyset(&pending); + sigemptyset(&shpending); + sigemptyset(&blocked); + sigemptyset(&ignored); + sigemptyset(&caught); + + /* Gather all the data with the appropriate locks held */ + read_lock(&tasklist_lock); + if (p->sighand) { + spin_lock_irq(&p->sighand->siglock); + pending = p->pending.signal; + shpending = p->signal->shared_pending.signal; + blocked = p->blocked; + collect_sigign_sigcatch(p, &ignored, &caught); + num_threads = atomic_read(&p->signal->count); + qsize = atomic_read(&p->user->sigpending); + qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; + spin_unlock_irq(&p->sighand->siglock); + } + read_unlock(&tasklist_lock); + + buffer += sprintf(buffer, "Threads:\t%d\n", num_threads); + buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim); + + /* render them all */ + buffer = render_sigset_t("SigPnd:\t", &pending, buffer); + buffer = render_sigset_t("ShdPnd:\t", &shpending, buffer); + buffer = render_sigset_t("SigBlk:\t", &blocked, buffer); + buffer = render_sigset_t("SigIgn:\t", &ignored, buffer); + buffer = render_sigset_t("SigCgt:\t", &caught, buffer); + + return buffer; +} + +static inline char *task_cap(struct task_struct *p, char *buffer) +{ + return buffer + sprintf(buffer, "CapInh:\t%016x\n" + "CapPrm:\t%016x\n" + "CapEff:\t%016x\n", + cap_t(p->cap_inheritable), + cap_t(p->cap_permitted), + cap_t(p->cap_effective)); +} + +int proc_pid_status(struct task_struct *task, char * buffer) +{ + char * orig = buffer; + struct mm_struct *mm = get_task_mm(task); + + buffer = task_name(task, buffer); + buffer = task_state(task, buffer); + + if (mm) { + buffer = task_mem(mm, buffer); + mmput(mm); + } + buffer = task_sig(task, buffer); + buffer = task_cap(task, buffer); + buffer = cpuset_task_status_allowed(task, buffer); +#if defined(CONFIG_ARCH_S390) + buffer = task_show_regs(task, buffer); +#endif + return buffer - orig; +} + +static int do_task_stat(struct task_struct *task, char * buffer, int whole) +{ + unsigned long vsize, eip, esp, wchan = ~0UL; + long priority, nice; + int tty_pgrp = -1, tty_nr = 0; + sigset_t sigign, sigcatch; + char state; + int res; + pid_t ppid, pgid = -1, sid = -1; + int num_threads = 0; + struct mm_struct *mm; + unsigned long long start_time; + unsigned long cmin_flt = 0, cmaj_flt = 0; + unsigned long min_flt = 0, maj_flt = 0; + cputime_t cutime, cstime, utime, stime; + unsigned long rsslim = 0; + unsigned long it_real_value = 0; + struct task_struct *t; + char tcomm[sizeof(task->comm)]; + + state = *get_task_state(task); + vsize = eip = esp = 0; + mm = get_task_mm(task); + if (mm) { + vsize = task_vsize(mm); + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } + + get_task_comm(tcomm, task); + + sigemptyset(&sigign); + sigemptyset(&sigcatch); + cutime = cstime = utime = stime = cputime_zero; + read_lock(&tasklist_lock); + if (task->sighand) { + spin_lock_irq(&task->sighand->siglock); + num_threads = atomic_read(&task->signal->count); + collect_sigign_sigcatch(task, &sigign, &sigcatch); + + /* add up live thread stats at the group level */ + if (whole) { + t = task; + do { + min_flt += t->min_flt; + maj_flt += t->maj_flt; + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != task); + } + + spin_unlock_irq(&task->sighand->siglock); + } + if (task->signal) { + if (task->signal->tty) { + tty_pgrp = task->signal->tty->pgrp; + tty_nr = new_encode_dev(tty_devnum(task->signal->tty)); + } + pgid = process_group(task); + sid = task->signal->session; + cmin_flt = task->signal->cmin_flt; + cmaj_flt = task->signal->cmaj_flt; + cutime = task->signal->cutime; + cstime = task->signal->cstime; + rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur; + if (whole) { + min_flt += task->signal->min_flt; + maj_flt += task->signal->maj_flt; + utime = cputime_add(utime, task->signal->utime); + stime = cputime_add(stime, task->signal->stime); + } + it_real_value = task->signal->it_real_value; + } + ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0; + read_unlock(&tasklist_lock); + + if (!whole || num_threads<2) + wchan = get_wchan(task); + if (!whole) { + min_flt = task->min_flt; + maj_flt = task->maj_flt; + utime = task->utime; + stime = task->stime; + } + + /* scale priority and nice values from timeslices to -20..20 */ + /* to make it look like a "normal" Unix priority/nice value */ + priority = task_prio(task); + nice = task_nice(task); + + /* Temporary variable needed for gcc-2.96 */ + /* convert timespec -> nsec*/ + start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC + + task->start_time.tv_nsec; + /* convert nsec -> ticks */ + start_time = nsec_to_clock_t(start_time); + + res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \ +%lu %lu %lu %lu %lu %ld %ld %ld %ld %d %ld %llu %lu %ld %lu %lu %lu %lu %lu \ +%lu %lu %lu %lu %lu %lu %lu %lu %d %d %lu %lu\n", + task->pid, + tcomm, + state, + ppid, + pgid, + sid, + tty_nr, + tty_pgrp, + task->flags, + min_flt, + cmin_flt, + maj_flt, + cmaj_flt, + cputime_to_clock_t(utime), + cputime_to_clock_t(stime), + cputime_to_clock_t(cutime), + cputime_to_clock_t(cstime), + priority, + nice, + num_threads, + jiffies_to_clock_t(it_real_value), + start_time, + vsize, + mm ? get_mm_counter(mm, rss) : 0, /* you might want to shift this left 3 */ + rsslim, + mm ? mm->start_code : 0, + mm ? mm->end_code : 0, + mm ? mm->start_stack : 0, + esp, + eip, + /* The signal information here is obsolete. + * It must be decimal for Linux 2.0 compatibility. + * Use /proc/#/status for real-time signals. + */ + task->pending.signal.sig[0] & 0x7fffffffUL, + task->blocked.sig[0] & 0x7fffffffUL, + sigign .sig[0] & 0x7fffffffUL, + sigcatch .sig[0] & 0x7fffffffUL, + wchan, + 0UL, + 0UL, + task->exit_signal, + task_cpu(task), + task->rt_priority, + task->policy); + if(mm) + mmput(mm); + return res; +} + +int proc_tid_stat(struct task_struct *task, char * buffer) +{ + return do_task_stat(task, buffer, 0); +} + +int proc_tgid_stat(struct task_struct *task, char * buffer) +{ + return do_task_stat(task, buffer, 1); +} + +int proc_pid_statm(struct task_struct *task, char *buffer) +{ + int size = 0, resident = 0, shared = 0, text = 0, lib = 0, data = 0; + struct mm_struct *mm = get_task_mm(task); + + if (mm) { + size = task_statm(mm, &shared, &text, &data, &resident); + mmput(mm); + } + + return sprintf(buffer,"%d %d %d %d %d %d %d\n", + size, resident, shared, text, lib, data, 0); +} diff --git a/fs/proc/base.c b/fs/proc/base.c new file mode 100644 index 00000000000..dad8ea4e00a --- /dev/null +++ b/fs/proc/base.c @@ -0,0 +1,2056 @@ +/* + * linux/fs/proc/base.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + * + * proc base directory handling functions + * + * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. + * Instead of using magical inumbers to determine the kind of object + * we allocate and fill in-core inodes upon lookup. They don't even + * go into icache. We cache the reference to task_struct upon lookup too. + * Eventually it should become a filesystem in its own. We don't use the + * rest of procfs anymore. + */ + +#include <asm/uaccess.h> + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/init.h> +#include <linux/file.h> +#include <linux/string.h> +#include <linux/seq_file.h> +#include <linux/namei.h> +#include <linux/namespace.h> +#include <linux/mm.h> +#include <linux/smp_lock.h> +#include <linux/kallsyms.h> +#include <linux/mount.h> +#include <linux/security.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <linux/cpuset.h> +#include <linux/audit.h> +#include "internal.h" + +/* + * For hysterical raisins we keep the same inumbers as in the old procfs. + * Feel free to change the macro below - just keep the range distinct from + * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). + * As soon as we'll get a separate superblock we will be able to forget + * about magical ranges too. + */ + +#define fake_ino(pid,ino) (((pid)<<16)|(ino)) + +enum pid_directory_inos { + PROC_TGID_INO = 2, + PROC_TGID_TASK, + PROC_TGID_STATUS, + PROC_TGID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TGID_SECCOMP, +#endif + PROC_TGID_CWD, + PROC_TGID_ROOT, + PROC_TGID_EXE, + PROC_TGID_FD, + PROC_TGID_ENVIRON, + PROC_TGID_AUXV, + PROC_TGID_CMDLINE, + PROC_TGID_STAT, + PROC_TGID_STATM, + PROC_TGID_MAPS, + PROC_TGID_MOUNTS, + PROC_TGID_WCHAN, +#ifdef CONFIG_SCHEDSTATS + PROC_TGID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TGID_CPUSET, +#endif +#ifdef CONFIG_SECURITY + PROC_TGID_ATTR, + PROC_TGID_ATTR_CURRENT, + PROC_TGID_ATTR_PREV, + PROC_TGID_ATTR_EXEC, + PROC_TGID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL + PROC_TGID_LOGINUID, +#endif + PROC_TGID_FD_DIR, + PROC_TGID_OOM_SCORE, + PROC_TGID_OOM_ADJUST, + PROC_TID_INO, + PROC_TID_STATUS, + PROC_TID_MEM, +#ifdef CONFIG_SECCOMP + PROC_TID_SECCOMP, +#endif + PROC_TID_CWD, + PROC_TID_ROOT, + PROC_TID_EXE, + PROC_TID_FD, + PROC_TID_ENVIRON, + PROC_TID_AUXV, + PROC_TID_CMDLINE, + PROC_TID_STAT, + PROC_TID_STATM, + PROC_TID_MAPS, + PROC_TID_MOUNTS, + PROC_TID_WCHAN, +#ifdef CONFIG_SCHEDSTATS + PROC_TID_SCHEDSTAT, +#endif +#ifdef CONFIG_CPUSETS + PROC_TID_CPUSET, +#endif +#ifdef CONFIG_SECURITY + PROC_TID_ATTR, + PROC_TID_ATTR_CURRENT, + PROC_TID_ATTR_PREV, + PROC_TID_ATTR_EXEC, + PROC_TID_ATTR_FSCREATE, +#endif +#ifdef CONFIG_AUDITSYSCALL + PROC_TID_LOGINUID, +#endif + PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ + PROC_TID_OOM_SCORE, + PROC_TID_OOM_ADJUST, +}; + +struct pid_entry { + int type; + int len; + char *name; + mode_t mode; +}; + +#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} + +static struct pid_entry tgid_base_stuff[] = { + E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), + E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), + E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), + E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), + E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), + E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), + E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), + E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif + E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), + E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY + E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS + E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif + {0,0,NULL,0} +}; +static struct pid_entry tid_base_stuff[] = { + E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), + E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), + E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), + E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), + E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), + E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), + E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), + E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), + E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), +#ifdef CONFIG_SECCOMP + E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), +#endif + E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), + E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), + E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), + E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), +#ifdef CONFIG_SECURITY + E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), +#endif +#ifdef CONFIG_KALLSYMS + E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_SCHEDSTATS + E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), +#endif +#ifdef CONFIG_CPUSETS + E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), +#endif + E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), + E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), +#ifdef CONFIG_AUDITSYSCALL + E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), +#endif + {0,0,NULL,0} +}; + +#ifdef CONFIG_SECURITY +static struct pid_entry tgid_attr_stuff[] = { + E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), + E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + {0,0,NULL,0} +}; +static struct pid_entry tid_attr_stuff[] = { + E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), + E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), + E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), + {0,0,NULL,0} +}; +#endif + +#undef E + +static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct task_struct *task = proc_task(inode); + struct files_struct *files; + struct file *file; + int fd = proc_type(inode) - PROC_TID_FD_DIR; + + files = get_files_struct(task); + if (files) { + spin_lock(&files->file_lock); + file = fcheck_files(files, fd); + if (file) { + *mnt = mntget(file->f_vfsmnt); + *dentry = dget(file->f_dentry); + spin_unlock(&files->file_lock); + put_files_struct(files); + return 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + return -ENOENT; +} + +static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + task_lock(proc_task(inode)); + fs = proc_task(inode)->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(proc_task(inode)); + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->pwdmnt); + *dentry = dget(fs->pwd); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + +static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + task_lock(proc_task(inode)); + fs = proc_task(inode)->fs; + if(fs) + atomic_inc(&fs->count); + task_unlock(proc_task(inode)); + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + +#define MAY_PTRACE(task) \ + (task == current || \ + (task->parent == current && \ + (task->ptrace & PT_PTRACED) && \ + (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ + security_ptrace(current,task) == 0)) + +static int may_ptrace_attach(struct task_struct *task) +{ + int retval = 0; + + task_lock(task); + + if (!task->mm) + goto out; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + goto out; + rmb(); + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + goto out; + if (security_ptrace(current, task)) + goto out; + + retval = 1; +out: + task_unlock(task); + return retval; +} + +static int proc_pid_environ(struct task_struct *task, char * buffer) +{ + int res = 0; + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int len = mm->env_end - mm->env_start; + if (len > PAGE_SIZE) + len = PAGE_SIZE; + res = access_process_vm(task, mm->env_start, buffer, len, 0); + if (!may_ptrace_attach(task)) + res = -ESRCH; + mmput(mm); + } + return res; +} + +static int proc_pid_cmdline(struct task_struct *task, char * buffer) +{ + int res = 0; + unsigned int len; + struct mm_struct *mm = get_task_mm(task); + if (!mm) + goto out; + if (!mm->arg_end) + goto out_mm; /* Shh! No looking before we're done */ + + len = mm->arg_end - mm->arg_start; + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + + res = access_process_vm(task, mm->arg_start, buffer, len, 0); + + // If the nul at the end of args has been overwritten, then + // assume application is using setproctitle(3). + if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { + len = strnlen(buffer, res); + if (len < res) { + res = len; + } else { + len = mm->env_end - mm->env_start; + if (len > PAGE_SIZE - res) + len = PAGE_SIZE - res; + res += access_process_vm(task, mm->env_start, buffer+res, len, 0); + res = strnlen(buffer, res); + } + } +out_mm: + mmput(mm); +out: + return res; +} + +static int proc_pid_auxv(struct task_struct *task, char *buffer) +{ + int res = 0; + struct mm_struct *mm = get_task_mm(task); + if (mm) { + unsigned int nwords = 0; + do + nwords += 2; + while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + res = nwords * sizeof(mm->saved_auxv[0]); + if (res > PAGE_SIZE) + res = PAGE_SIZE; + memcpy(buffer, mm->saved_auxv, res); + mmput(mm); + } + return res; +} + + +#ifdef CONFIG_KALLSYMS +/* + * Provides a wchan file via kallsyms in a proper one-value-per-file format. + * Returns the resolved symbol. If that fails, simply return the address. + */ +static int proc_pid_wchan(struct task_struct *task, char *buffer) +{ + char *modname; + const char *sym_name; + unsigned long wchan, size, offset; + char namebuf[KSYM_NAME_LEN+1]; + + wchan = get_wchan(task); + + sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); + if (sym_name) + return sprintf(buffer, "%s", sym_name); + return sprintf(buffer, "%lu", wchan); +} +#endif /* CONFIG_KALLSYMS */ + +#ifdef CONFIG_SCHEDSTATS +/* + * Provides /proc/PID/schedstat + */ +static int proc_pid_schedstat(struct task_struct *task, char *buffer) +{ + return sprintf(buffer, "%lu %lu %lu\n", + task->sched_info.cpu_time, + task->sched_info.run_delay, + task->sched_info.pcnt); +} +#endif + +/* The badness from the OOM killer */ +unsigned long badness(struct task_struct *p, unsigned long uptime); +static int proc_oom_score(struct task_struct *task, char *buffer) +{ + unsigned long points; + struct timespec uptime; + + do_posix_clock_monotonic_gettime(&uptime); + points = badness(task, uptime.tv_sec); + return sprintf(buffer, "%lu\n", points); +} + +/************************************************************************/ +/* Here the fs part begins */ +/************************************************************************/ + +/* permission checks */ + +static int proc_check_root(struct inode *inode) +{ + struct dentry *de, *base, *root; + struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; + int res = 0; + + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ + return -ENOENT; + read_lock(¤t->fs->lock); + our_vfsmnt = mntget(current->fs->rootmnt); + base = dget(current->fs->root); + read_unlock(¤t->fs->lock); + + spin_lock(&vfsmount_lock); + de = root; + mnt = vfsmnt; + + while (vfsmnt != our_vfsmnt) { + if (vfsmnt == vfsmnt->mnt_parent) + goto out; + de = vfsmnt->mnt_mountpoint; + vfsmnt = vfsmnt->mnt_parent; + } + + if (!is_subdir(de, base)) + goto out; + spin_unlock(&vfsmount_lock); + +exit: + dput(base); + mntput(our_vfsmnt); + dput(root); + mntput(mnt); + return res; +out: + spin_unlock(&vfsmount_lock); + res = -EACCES; + goto exit; +} + +static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + if (generic_permission(inode, mask, NULL) != 0) + return -EACCES; + return proc_check_root(inode); +} + +extern struct seq_operations proc_pid_maps_op; +static int maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_maps_operations = { + .open = maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +extern struct seq_operations mounts_op; +static int mounts_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &mounts_op); + + if (!ret) { + struct seq_file *m = file->private_data; + struct namespace *namespace; + task_lock(task); + namespace = task->namespace; + if (namespace) + get_namespace(namespace); + task_unlock(task); + + if (namespace) + m->private = namespace; + else { + seq_release(inode, file); + ret = -EINVAL; + } + } + return ret; +} |