diff options
Diffstat (limited to 'fs/proc/base.c')
| -rw-r--r-- | fs/proc/base.c | 2804 |
1 files changed, 1535 insertions, 1269 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 81d7d145292..2d696b0c93b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -53,21 +53,27 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> +#include <linux/fdtable.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> #include <linux/mnt_namespace.h> #include <linux/mm.h> +#include <linux/swap.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> +#include <linux/stacktrace.h> #include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> +#include <linux/printk.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -76,7 +82,17 @@ #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> +#include <linux/fs_struct.h> +#include <linux/slab.h> +#include <linux/flex_array.h> +#include <linux/posix-timers.h> +#ifdef CONFIG_HARDWALL +#include <asm/hardwall.h> +#endif +#include <trace/events/oom.h> #include "internal.h" +#include "fd.h" /* NOTE: * Implementing inode permission operations in /proc is almost @@ -91,7 +107,7 @@ struct pid_entry { char *name; int len; - mode_t mode; + umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; union proc_op op; @@ -106,168 +122,98 @@ struct pid_entry { .op = OP, \ } -#define DIR(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFDIR|(MODE)), \ - &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations, \ - {} ) -#define LNK(NAME, OTYPE) \ +#define DIR(NAME, MODE, iops, fops) \ + NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) +#define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ - { .proc_get_link = &proc_##OTYPE##_link } ) -#define REG(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), NULL, \ - &proc_##OTYPE##_operations, {}) -#define INF(NAME, MODE, OTYPE) \ + { .proc_get_link = get_link } ) +#define REG(NAME, MODE, fops) \ + NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) +#define INF(NAME, MODE, read) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_info_file_operations, \ - { .proc_read = &proc_##OTYPE } ) -#define ONE(NAME, MODE, OTYPE) \ + { .proc_read = read } ) +#define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ - { .proc_show = &proc_##OTYPE } ) - -int maps_protect; -EXPORT_SYMBOL(maps_protect); - -static struct fs_struct *get_fs_struct(struct task_struct *task) -{ - struct fs_struct *fs; - task_lock(task); - fs = task->fs; - if(fs) - atomic_inc(&fs->count); - task_unlock(task); - return fs; -} + { .proc_show = show } ) -static int get_nr_threads(struct task_struct *tsk) +/* + * Count the number of hardlinks for the pid_entry table, excluding the . + * and .. links. + */ +static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, + unsigned int n) { - /* Must be called with the rcu_read_lock held */ - unsigned long flags; - int count = 0; + unsigned int i; + unsigned int count; - if (lock_task_sighand(tsk, &flags)) { - count = atomic_read(&tsk->signal->count); - unlock_task_sighand(tsk, &flags); + count = 0; + for (i = 0; i < n; ++i) { + if (S_ISDIR(entries[i].mode)) + ++count; } + return count; } -static int proc_cwd_link(struct inode *inode, struct path *path) +static int get_task_root(struct task_struct *task, struct path *root) { - struct task_struct *task = get_proc_task(inode); - struct fs_struct *fs = NULL; int result = -ENOENT; - if (task) { - fs = get_fs_struct(task); - put_task_struct(task); - } - if (fs) { - read_lock(&fs->lock); - *path = fs->pwd; - path_get(&fs->pwd); - read_unlock(&fs->lock); + task_lock(task); + if (task->fs) { + get_fs_root(task->fs, root); result = 0; - put_fs_struct(fs); } + task_unlock(task); return result; } -static int proc_root_link(struct inode *inode, struct path *path) +static int proc_cwd_link(struct dentry *dentry, struct path *path) { - struct task_struct *task = get_proc_task(inode); - struct fs_struct *fs = NULL; + struct task_struct *task = get_proc_task(dentry->d_inode); int result = -ENOENT; if (task) { - fs = get_fs_struct(task); + task_lock(task); + if (task->fs) { + get_fs_pwd(task->fs, path); + result = 0; + } + task_unlock(task); put_task_struct(task); } - if (fs) { - read_lock(&fs->lock); - *path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); - result = 0; - put_fs_struct(fs); - } return result; } -#define MAY_PTRACE(task) \ - (task == current || \ - (task->parent == current && \ - (task->ptrace & PT_PTRACED) && \ - (task_is_stopped_or_traced(task)) && \ - security_ptrace(current,task) == 0)) - -struct mm_struct *mm_for_maps(struct task_struct *task) +static int proc_root_link(struct dentry *dentry, struct path *path) { - struct mm_struct *mm = get_task_mm(task); - if (!mm) - return NULL; - down_read(&mm->mmap_sem); - task_lock(task); - if (task->mm != mm) - goto out; - if (task->mm != current->mm && __ptrace_may_attach(task) < 0) - goto out; - task_unlock(task); - return mm; -out: - task_unlock(task); - up_read(&mm->mmap_sem); - mmput(mm); - return NULL; + struct task_struct *task = get_proc_task(dentry->d_inode); + int result = -ENOENT; + + if (task) { + result = get_task_root(task, path); + put_task_struct(task); + } + return result; } -static int proc_pid_cmdline(struct task_struct *task, char * buffer) +static int proc_pid_cmdline(struct task_struct *task, char *buffer) { - int res = 0; - unsigned int len; - struct mm_struct *mm = get_task_mm(task); - if (!mm) - goto out; - if (!mm->arg_end) - goto out_mm; /* Shh! No looking before we're done */ - - len = mm->arg_end - mm->arg_start; - - if (len > PAGE_SIZE) - len = PAGE_SIZE; - - res = access_process_vm(task, mm->arg_start, buffer, len, 0); - - // If the nul at the end of args has been overwritten, then - // assume application is using setproctitle(3). - if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { - len = strnlen(buffer, res); - if (len < res) { - res = len; - } else { - len = mm->env_end - mm->env_start; - if (len > PAGE_SIZE - res) - len = PAGE_SIZE - res; - res += access_process_vm(task, mm->env_start, buffer+res, len, 0); - res = strnlen(buffer, res); - } - } -out_mm: - mmput(mm); -out: - return res; + return get_cmdline(task, buffer, PAGE_SIZE); } static int proc_pid_auxv(struct task_struct *task, char *buffer) { - int res = 0; - struct mm_struct *mm = get_task_mm(task); - if (mm) { + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); + int res = PTR_ERR(mm); + if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; - do + do { nwords += 2; - while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ + } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ res = nwords * sizeof(mm->saved_auxv[0]); if (res > PAGE_SIZE) res = PAGE_SIZE; @@ -291,12 +237,69 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) wchan = get_wchan(task); if (lookup_symbol_name(wchan, symname) < 0) - return sprintf(buffer, "%lu", wchan); + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + return 0; + else + return sprintf(buffer, "%lu", wchan); else return sprintf(buffer, "%s", symname); } #endif /* CONFIG_KALLSYMS */ +static int lock_trace(struct task_struct *task) +{ + int err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return err; + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + mutex_unlock(&task->signal->cred_guard_mutex); + return -EPERM; + } + return 0; +} + +static void unlock_trace(struct task_struct *task) +{ + mutex_unlock(&task->signal->cred_guard_mutex); +} + +#ifdef CONFIG_STACKTRACE + +#define MAX_STACK_TRACE_DEPTH 64 + +static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) +{ + struct stack_trace trace; + unsigned long *entries; + int err; + int i; + + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + trace.nr_entries = 0; + trace.max_entries = MAX_STACK_TRACE_DEPTH; + trace.entries = entries; + trace.skip = 0; + + err = lock_trace(task); + if (!err) { + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%pK>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + unlock_trace(task); + } + kfree(entries); + + return err; +} +#endif + #ifdef CONFIG_SCHEDSTATS /* * Provides /proc/PID/schedstat @@ -304,8 +307,8 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) static int proc_pid_schedstat(struct task_struct *task, char *buffer) { return sprintf(buffer, "%llu %llu %lu\n", - task->sched_info.cpu_time, - task->sched_info.run_delay, + (unsigned long long)task->se.sum_exec_runtime, + (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); } #endif @@ -321,26 +324,20 @@ static int lstats_show_proc(struct seq_file *m, void *v) return -ESRCH; seq_puts(m, "Latency Top version : v0.1\n"); for (i = 0; i < 32; i++) { - if (task->latency_record[i].backtrace[0]) { + struct latency_record *lr = &task->latency_record[i]; + if (lr->backtrace[0]) { int q; - seq_printf(m, "%i %li %li ", - task->latency_record[i].count, - task->latency_record[i].time, - task->latency_record[i].max); + seq_printf(m, "%i %li %li", + lr->count, lr->time, lr->max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; - char *c; - if (!task->latency_record[i].backtrace[q]) + unsigned long bt = lr->backtrace[q]; + if (!bt) break; - if (task->latency_record[i].backtrace[q] == ULONG_MAX) + if (bt == ULONG_MAX) break; - sprint_symbol(sym, task->latency_record[i].backtrace[q]); - c = strchr(sym, '+'); - if (c) - *c = 0; - seq_printf(m, "%s ", sym); + seq_printf(m, " %ps", (void *)bt); } - seq_printf(m, "\n"); + seq_putc(m, '\n'); } } @@ -356,7 +353,7 @@ static int lstats_open(struct inode *inode, struct file *file) static ssize_t lstats_write(struct file *file, const char __user *buf, size_t count, loff_t *offs) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; @@ -376,16 +373,46 @@ static const struct file_operations proc_lstats_operations = { #endif -/* The badness from the OOM killer */ -unsigned long badness(struct task_struct *p, unsigned long uptime); +#ifdef CONFIG_CGROUPS +static int cgroup_open(struct inode *inode, struct file *file) +{ + struct pid *pid = PROC_I(inode)->pid; + return single_open(file, proc_cgroup_show, pid); +} + +static const struct file_operations proc_cgroup_operations = { + .open = cgroup_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +#ifdef CONFIG_PROC_PID_CPUSET + +static int cpuset_open(struct inode *inode, struct file *file) +{ + struct pid *pid = PROC_I(inode)->pid; + return single_open(file, proc_cpuset_show, pid); +} + +static const struct file_operations proc_cpuset_operations = { + .open = cpuset_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static int proc_oom_score(struct task_struct *task, char *buffer) { - unsigned long points; - struct timespec uptime; + unsigned long totalpages = totalram_pages + total_swap_pages; + unsigned long points = 0; - do_posix_clock_monotonic_gettime(&uptime); read_lock(&tasklist_lock); - points = badness(task, uptime.tv_sec); + if (pid_alive(task)) + points = oom_badness(task, NULL, NULL, totalpages) * + 1000 / totalpages; read_unlock(&tasklist_lock); return sprintf(buffer, "%lu\n", points); } @@ -396,7 +423,7 @@ struct limit_names { }; static const struct limit_names lnames[RLIM_NLIMITS] = { - [RLIMIT_CPU] = {"Max cpu time", "ms"}, + [RLIMIT_CPU] = {"Max cpu time", "seconds"}, [RLIMIT_FSIZE] = {"Max file size", "bytes"}, [RLIMIT_DATA] = {"Max data size", "bytes"}, [RLIMIT_STACK] = {"Max stack size", "bytes"}, @@ -424,14 +451,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) struct rlimit rlim[RLIM_NLIMITS]; - rcu_read_lock(); - if (!lock_task_sighand(task,&flags)) { - rcu_read_unlock(); + if (!lock_task_sighand(task, &flags)) return 0; - } memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); - rcu_read_unlock(); /* * print the file header @@ -463,6 +486,30 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) return count; } +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK +static int proc_pid_syscall(struct task_struct *task, char *buffer) +{ + long nr; + unsigned long args[6], sp, pc; + int res = lock_trace(task); + if (res) + return res; + + if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + res = sprintf(buffer, "running\n"); + else if (nr < 0) + res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + else + res = sprintf(buffer, + "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + nr, + args[0], args[1], args[2], args[3], args[4], args[5], + sp, pc); + unlock_trace(task); + return res; +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -478,13 +525,13 @@ static int proc_fd_access_allowed(struct inode *inode) */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_attach(task); + allowed = ptrace_may_access(task, PTRACE_MODE_READ); put_task_struct(task); } return allowed; } -static int proc_setattr(struct dentry *dentry, struct iattr *attr) +int proc_setattr(struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = dentry->d_inode; @@ -493,133 +540,62 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr) return -EPERM; error = inode_change_ok(inode, attr); - if (!error) - error = inode_setattr(inode, attr); - return error; -} - -static const struct inode_operations proc_def_inode_operations = { - .setattr = proc_setattr, -}; - -extern const struct seq_operations mounts_op; -struct proc_mounts { - struct seq_file m; - int event; -}; - -static int mounts_open(struct inode *inode, struct file *file) -{ - struct task_struct *task = get_proc_task(inode); - struct nsproxy *nsp; - struct mnt_namespace *ns = NULL; - struct proc_mounts *p; - int ret = -EINVAL; - - if (task) { - rcu_read_lock(); - nsp = task_nsproxy(task); - if (nsp) { - ns = nsp->mnt_ns; - if (ns) - get_mnt_ns(ns); - } - rcu_read_unlock(); - - put_task_struct(task); - } - - if (ns) { - ret = -ENOMEM; - p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); - if (p) { - file->private_data = &p->m; - ret = seq_open(file, &mounts_op); - if (!ret) { - p->m.private = ns; - p->event = ns->event; - return 0; - } - kfree(p); - } - put_mnt_ns(ns); - } - return ret; -} + if (error) + return error; -static int mounts_release(struct inode *inode, struct file *file) -{ - struct seq_file *m = file->private_data; - struct mnt_namespace *ns = m->private; - put_mnt_ns(ns); - return seq_release(inode, file); + setattr_copy(inode, attr); + mark_inode_dirty(inode); + return 0; } -static unsigned mounts_poll(struct file *file, poll_table *wait) +/* + * May current process learn task's sched/cmdline info (for hide_pid_min=1) + * or euid/egid (for hide_pid_min=2)? + */ +static bool has_pid_permissions(struct pid_namespace *pid, + struct task_struct *task, + int hide_pid_min) { - struct proc_mounts *p = file->private_data; - struct mnt_namespace *ns = p->m.private; - unsigned res = 0; - - poll_wait(file, &ns->poll, wait); - - spin_lock(&vfsmount_lock); - if (p->event != ns->event) { - p->event = ns->event; - res = POLLERR; - } - spin_unlock(&vfsmount_lock); - - return res; + if (pid->hide_pid < hide_pid_min) + return true; + if (in_group_p(pid->pid_gid)) + return true; + return ptrace_may_access(task, PTRACE_MODE_READ); } -static const struct file_operations proc_mounts_operations = { - .open = mounts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mounts_release, - .poll = mounts_poll, -}; -extern const struct seq_operations mountstats_op; -static int mountstats_open(struct inode *inode, struct file *file) +static int proc_pid_permission(struct inode *inode, int mask) { - int ret = seq_open(file, &mountstats_op); - - if (!ret) { - struct seq_file *m = file->private_data; - struct nsproxy *nsp; - struct mnt_namespace *mnt_ns = NULL; - struct task_struct *task = get_proc_task(inode); + struct pid_namespace *pid = inode->i_sb->s_fs_info; + struct task_struct *task; + bool has_perms; - if (task) { - rcu_read_lock(); - nsp = task_nsproxy(task); - if (nsp) { - mnt_ns = nsp->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - } - rcu_read_unlock(); + task = get_proc_task(inode); + if (!task) + return -ESRCH; + has_perms = has_pid_permissions(pid, task, 1); + put_task_struct(task); - put_task_struct(task); + if (!has_perms) { + if (pid->hide_pid == 2) { + /* + * Let's make getdents(), stat(), and open() + * consistent with each other. If a process + * may not stat() a file, it shouldn't be seen + * in procfs at all. + */ + return -ENOENT; } - if (mnt_ns) - m->private = mnt_ns; - else { - seq_release(inode, file); - ret = -EINVAL; - } + return -EPERM; } - return ret; + return generic_permission(inode, mask); } -static const struct file_operations proc_mountstats_operations = { - .open = mountstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = mounts_release, + + +static const struct inode_operations proc_def_inode_operations = { + .setattr = proc_setattr, }; #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ @@ -627,7 +603,7 @@ static const struct file_operations proc_mountstats_operations = { static ssize_t proc_info_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); unsigned long page; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -656,6 +632,7 @@ out_no_task: static const struct file_operations proc_info_file_operations = { .read = proc_info_read, + .llseek = generic_file_llseek, }; static int proc_single_show(struct seq_file *m, void *v) @@ -680,14 +657,7 @@ static int proc_single_show(struct seq_file *m, void *v) static int proc_single_open(struct inode *inode, struct file *filp) { - int ret; - ret = single_open(filp, proc_single_show, NULL); - if (!ret) { - struct seq_file *m = filp->private_data; - - m->private = inode; - } - return ret; + return single_open(filp, proc_single_show, inode); } static const struct file_operations proc_single_file_operations = { @@ -697,130 +667,105 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; -static int mem_open(struct inode* inode, struct file* file) -{ - file->private_data = (void*)((long)current->self_exec_id); - return 0; -} - -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); - char *page; - unsigned long src = *ppos; - int ret = -ESRCH; + struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; if (!task) - goto out_no_task; + return -ESRCH; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) - goto out; + mm = mm_access(task, mode); + put_task_struct(task); - ret = -ENOMEM; - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - goto out; + if (IS_ERR(mm)) + return PTR_ERR(mm); - ret = 0; - - mm = get_task_mm(task); - if (!mm) - goto out_free; + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } - ret = -EIO; - - if (file->private_data != (void*)((long)current->self_exec_id)) - goto out_put; + file->private_data = mm; - ret = 0; - - while (count > 0) { - int this_len, retval; + return 0; +} - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { - if (!ret) - ret = -EIO; - break; - } +static int mem_open(struct inode *inode, struct file *file) +{ + int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; + /* OK to pass negative loff_t, we can catch out-of-range */ + file->f_mode |= FMODE_UNSIGNED_OFFSET; -out_put: - mmput(mm); -out_free: - free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return ret; } -#define mem_write NULL - -#ifndef mem_write -/* This is a security hazard */ -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int copied; + struct mm_struct *mm = file->private_data; + unsigned long addr = *ppos; + ssize_t copied; char *page; - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); - unsigned long dst = *ppos; - - copied = -ESRCH; - if (!task) - goto out_no_task; - if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) - goto out; + if (!mm) + return 0; - copied = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; + return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_process_vm(task, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; + + mmput(mm); +free: free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return copied; } -#endif + +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} loff_t mem_lseek(struct file *file, loff_t offset, int orig) { @@ -838,51 +783,58 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) return file->f_pos; } +static int mem_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + if (mm) + mmdrop(mm); + return 0; +} + static const struct file_operations proc_mem_operations = { .llseek = mem_lseek, .read = mem_read, .write = mem_write, .open = mem_open, + .release = mem_release, }; +static int environ_open(struct inode *inode, struct file *file) +{ + return __mem_open(inode, file, PTRACE_MODE_READ); +} + static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); char *page; unsigned long src = *ppos; - int ret = -ESRCH; - struct mm_struct *mm; - - if (!task) - goto out_no_task; + int ret = 0; + struct mm_struct *mm = file->private_data; - if (!ptrace_may_attach(task)) - goto out; + if (!mm) + return 0; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; + return -ENOMEM; ret = 0; - - mm = get_task_mm(task); - if (!mm) - goto out_free; - + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; while (count > 0) { - int this_len, retval, max_len; - - this_len = mm->env_end - (mm->env_start + src); + size_t this_len, max_len; + int retval; - if (this_len <= 0) + if (src >= (mm->env_end - mm->env_start)) break; - max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - this_len = (this_len > max_len) ? max_len : this_len; + this_len = mm->env_end - (mm->env_start + src); + + max_len = min_t(size_t, PAGE_SIZE, count); + this_len = min(max_len, this_len); - retval = access_process_vm(task, (mm->env_start + src), + retval = access_remote_vm(mm, (mm->env_start + src), page, this_len, 0); if (retval <= 0) { @@ -901,73 +853,214 @@ static ssize_t environ_read(struct file *file, char __user *buf, count -= retval; } *ppos = src; - mmput(mm); -out_free: + +free: free_page((unsigned long) page); -out: - put_task_struct(task); -out_no_task: return ret; } static const struct file_operations proc_environ_operations = { + .open = environ_open, .read = environ_read, + .llseek = generic_file_llseek, + .release = mem_release, }; -static ssize_t oom_adjust_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; + int oom_adj = OOM_ADJUST_MIN; size_t len; - int oom_adjust; + unsigned long flags; if (!task) return -ESRCH; - oom_adjust = task->oomkilladj; + if (lock_task_sighand(task, &flags)) { + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; + unlock_task_sighand(task, &flags); + } put_task_struct(task); - - len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); - + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } -static ssize_t oom_adjust_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t oom_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct task_struct *task; - char buffer[PROC_NUMBUF], *end; - int oom_adjust; + char buffer[PROC_NUMBUF]; + int oom_adj; + unsigned long flags; + int err; memset(buffer, 0, sizeof(buffer)); if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; - if (copy_from_user(buffer, buf, count)) - return -EFAULT; - oom_adjust = simple_strtol(buffer, &end, 0); - if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && - oom_adjust != OOM_DISABLE) - return -EINVAL; - if (*end == '\n') - end++; - task = get_proc_task(file->f_path.dentry->d_inode); + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &oom_adj); + if (err) + goto out; + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && + oom_adj != OOM_DISABLE) { + err = -EINVAL; + goto out; + } + + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out; + } + + task_lock(task); + if (!task->mm) { + err = -EINVAL; + goto err_task_lock; + } + + if (!lock_task_sighand(task, &flags)) { + err = -ESRCH; + goto err_task_lock; + } + + /* + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum + * value is always attainable. + */ + if (oom_adj == OOM_ADJUST_MAX) + oom_adj = OOM_SCORE_ADJ_MAX; + else + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_sighand; + } + + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + + task->signal->oom_score_adj = oom_adj; + trace_oom_score_adj_update(task); +err_sighand: + unlock_task_sighand(task, &flags); +err_task_lock: + task_unlock(task); + put_task_struct(task); +out: + return err < 0 ? err : count; +} + +static const struct file_operations proc_oom_adj_operations = { + .read = oom_adj_read, + .write = oom_adj_write, + .llseek = generic_file_llseek, +}; + +static ssize_t oom_score_adj_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + char buffer[PROC_NUMBUF]; + short oom_score_adj = OOM_SCORE_ADJ_MIN; + unsigned long flags; + size_t len; + if (!task) return -ESRCH; - if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { - put_task_struct(task); - return -EACCES; + if (lock_task_sighand(task, &flags)) { + oom_score_adj = task->signal->oom_score_adj; + unlock_task_sighand(task, &flags); } - task->oomkilladj = oom_adjust; put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF]; + unsigned long flags; + int oom_score_adj; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); + if (err) + goto out; + if (oom_score_adj < OOM_SCORE_ADJ_MIN || + oom_score_adj > OOM_SCORE_ADJ_MAX) { + err = -EINVAL; + goto out; + } + + task = get_proc_task(file_inode(file)); + if (!task) { + err = -ESRCH; + goto out; + } + + task_lock(task); + if (!task->mm) { + err = -EINVAL; + goto err_task_lock; + } + + if (!lock_task_sighand(task, &flags)) { + err = -ESRCH; + goto err_task_lock; + } + + if ((short)oom_score_adj < task->signal->oom_score_adj_min && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_sighand; + } + + task->signal->oom_score_adj = (short)oom_score_adj; + if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) + task->signal->oom_score_adj_min = (short)oom_score_adj; + trace_oom_score_adj_update(task); + +err_sighand: + unlock_task_sighand(task, &flags); +err_task_lock: + task_unlock(task); + put_task_struct(task); +out: + return err < 0 ? err : count; } -static const struct file_operations proc_oom_adjust_operations = { - .read = oom_adjust_read, - .write = oom_adjust_write, +static const struct file_operations proc_oom_score_adj_operations = { + .read = oom_score_adj_read, + .write = oom_score_adj_write, + .llseek = default_llseek, }; #ifdef CONFIG_AUDITSYSCALL @@ -975,7 +1068,7 @@ static const struct file_operations proc_oom_adjust_operations = { static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; @@ -983,7 +1076,8 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", - audit_get_loginuid(task)); + from_kuid(file->f_cred->user_ns, + audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -991,16 +1085,18 @@ static ssize_t proc_loginuid_read(struct file * file, char __user * buf, static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *page, *tmp; ssize_t length; uid_t loginuid; + kuid_t kloginuid; - if (!capable(CAP_AUDIT_CONTROL)) - return -EPERM; - - if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) + rcu_read_lock(); + if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); if (count >= PAGE_SIZE) count = PAGE_SIZE - 1; @@ -1023,7 +1119,19 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, goto out_free_page; } - length = audit_set_loginuid(current, loginuid); + + /* is userspace tring to explicitly UNSET the loginuid? */ + if (loginuid == AUDIT_UID_UNSET) { + kloginuid = INVALID_UID; + } else { + kloginuid = make_kuid(file->f_cred->user_ns, loginuid); + if (!uid_valid(kloginuid)) { + length = -EINVAL; + goto out_free_page; + } + } + + length = audit_set_loginuid(kloginuid); if (likely(length == 0)) length = count; @@ -1035,12 +1143,13 @@ out_free_page: static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, + .llseek = generic_file_llseek, }; static ssize_t proc_sessionid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; @@ -1055,6 +1164,7 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf, static const struct file_operations proc_sessionid_operations = { .read = proc_sessionid_read, + .llseek = generic_file_llseek, }; #endif @@ -1062,7 +1172,7 @@ static const struct file_operations proc_sessionid_operations = { static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; @@ -1091,22 +1201,25 @@ static ssize_t proc_fault_inject_write(struct file * file, count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; - make_it_fail = simple_strtol(buffer, &end, 0); - if (*end == '\n') - end++; - task = get_proc_task(file->f_dentry->d_inode); + make_it_fail = simple_strtol(strstrip(buffer), &end, 0); + if (*end) + return -EINVAL; + if (make_it_fail < 0 || make_it_fail > 1) + return -EINVAL; + + task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; put_task_struct(task); - if (end - buffer == 0) - return -EIO; - return end - buffer; + + return count; } static const struct file_operations proc_fault_inject_operations = { .read = proc_fault_inject_read, .write = proc_fault_inject_write, + .llseek = generic_file_llseek, }; #endif @@ -1120,8 +1233,6 @@ static int sched_show(struct seq_file *m, void *v) struct inode *inode = m->private; struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1136,11 +1247,9 @@ static ssize_t sched_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct task_struct *p; - WARN_ON(!inode); - p = get_proc_task(inode); if (!p) return -ESRCH; @@ -1153,9 +1262,76 @@ sched_write(struct file *file, const char __user *buf, static int sched_open(struct inode *inode, struct file *filp) { + return single_open(filp, sched_show, inode); +} + +static const struct file_operations proc_pid_sched_operations = { + .open = sched_open, + .read = seq_read, + .write = sched_write, + .llseek = seq_lseek, + .release = single_release, +}; + +#endif + +#ifdef CONFIG_SCHED_AUTOGROUP +/* + * Print out autogroup related information: + */ +static int sched_autogroup_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + proc_sched_autogroup_show_task(p, m); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_autogroup_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int nice; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + + err = kstrtoint(strstrip(buffer), 0, &nice); + if (err < 0) + return err; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = proc_sched_autogroup_set_nice(p, nice); + if (err) + count = err; + + put_task_struct(p); + + return count; +} + +static int sched_autogroup_open(struct inode *inode, struct file *filp) +{ int ret; - ret = single_open(filp, sched_show, NULL); + ret = single_open(filp, sched_autogroup_show, NULL); if (!ret) { struct seq_file *m = filp->private_data; @@ -1164,30 +1340,113 @@ static int sched_open(struct inode *inode, struct file *filp) return ret; } -static const struct file_operations proc_pid_sched_operations = { - .open = sched_open, +static const struct file_operations proc_pid_sched_autogroup_operations = { + .open = sched_autogroup_open, .read = seq_read, - .write = sched_write, + .write = sched_autogroup_write, .llseek = seq_lseek, .release = single_release, }; -#endif +#endif /* CONFIG_SCHED_AUTOGROUP */ + +static ssize_t comm_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[TASK_COMM_LEN]; + const size_t maxlen = sizeof(buffer) - 1; + + memset(buffer, 0, sizeof(buffer)); + if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) + return -EFAULT; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (same_thread_group(current, p)) + set_task_comm(p, buffer); + else + count = -EINVAL; + + put_task_struct(p); + + return count; +} + +static int comm_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + task_lock(p); + seq_printf(m, "%s\n", p->comm); + task_unlock(p); + + put_task_struct(p); + + return 0; +} + +static int comm_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, comm_show, inode); +} + +static const struct file_operations proc_pid_set_comm_operations = { + .open = comm_open, + .read = seq_read, + .write = comm_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static int proc_exe_link(struct dentry *dentry, struct path *exe_path) +{ + struct task_struct *task; + struct mm_struct *mm; + struct file *exe_file; + + task = get_proc_task(dentry->d_inode); + if (!task) + return -ENOENT; + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + return -ENOENT; + exe_file = get_mm_exe_file(mm); + mmput(mm); + if (exe_file) { + *exe_path = exe_file->f_path; + path_get(&exe_file->f_path); + fput(exe_file); + return 0; + } else + return -ENOENT; +} static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; + struct path path; int error = -EACCES; - /* We don't need a base pointer in the /proc filesystem */ - path_put(&nd->path); - /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); - nd->last_type = LAST_BIND; + error = PROC_I(inode)->op.proc_get_link(dentry, &path); + if (error) + goto out; + + nd_jump_link(nd, &path); + return NULL; out: return ERR_PTR(error); } @@ -1226,7 +1485,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(inode, &path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; @@ -1236,7 +1495,7 @@ out: return error; } -static const struct inode_operations proc_pid_link_inode_operations = { +const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .follow_link = proc_pid_follow_link, .setattr = proc_setattr, @@ -1245,26 +1504,11 @@ static const struct inode_operations proc_pid_link_inode_operations = { /* building an inode */ -static int task_dumpable(struct task_struct *task) -{ - int dumpable = 0; - struct mm_struct *mm; - - task_lock(task); - mm = task->mm; - if (mm) - dumpable = get_dumpable(mm); - task_unlock(task); - if(dumpable == 1) - return 1; - return 0; -} - - -static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) +struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) { struct inode * inode; struct proc_inode *ei; + const struct cred *cred; /* We need a new inode */ @@ -1274,6 +1518,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st /* Common stuff */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_op = &proc_def_inode_operations; @@ -1284,11 +1529,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st if (!ei->pid) goto out_unlock; - inode->i_uid = 0; - inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } security_task_to_inode(task, inode); @@ -1300,21 +1546,33 @@ out_unlock: return NULL; } -static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; struct task_struct *task; + const struct cred *cred; + struct pid_namespace *pid = dentry->d_sb->s_fs_info; + generic_fillattr(inode, stat); rcu_read_lock(); - stat->uid = 0; - stat->gid = 0; + stat->uid = GLOBAL_ROOT_UID; + stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { + if (!has_pid_permissions(pid, task, 2)) { + rcu_read_unlock(); + /* + * This doesn't prevent learning whether PID exists, + * it only makes getattr() consistent with readdir(). + */ + return -ENOENT; + } if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + cred = __task_cred(task); + stat->uid = cred->euid; + stat->gid = cred->egid; } } rcu_read_unlock(); @@ -1338,18 +1596,29 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat * made this apply to all per process world readable and executable * directories. */ -static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +int pid_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); + struct inode *inode; + struct task_struct *task; + const struct cred *cred; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + inode = dentry->d_inode; + task = get_proc_task(inode); + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { - inode->i_uid = 0; - inode->i_gid = 0; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); @@ -1360,16 +1629,21 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } -static int pid_delete_dentry(struct dentry * dentry) +static inline bool proc_inode_is_dead(struct inode *inode) +{ + return !proc_pid(inode)->tasks[PIDTYPE_PID].first; +} + +int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ - return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; + return proc_inode_is_dead(dentry->d_inode); } -static struct dentry_operations pid_dentry_operations = +const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, @@ -1377,9 +1651,6 @@ static struct dentry_operations pid_dentry_operations = /* Lookups */ -typedef struct dentry *instantiate_t(struct inode *, struct dentry *, - struct task_struct *, const void *); - /* * Fill a directory entry. * @@ -1392,408 +1663,455 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, * reported by readdir in sync with the inode numbers reported * by stat. */ -static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - char *name, int len, +bool proc_fill_cache(struct file *file, struct dir_context *ctx, + const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { - struct dentry *child, *dir = filp->f_path.dentry; + struct dentry *child, *dir = file->f_path.dentry; + struct qstr qname = QSTR_INIT(name, len); struct inode *inode; - struct qstr qname; - ino_t ino = 0; - unsigned type = DT_UNKNOWN; - - qname.name = name; - qname.len = len; - qname.hash = full_name_hash(name, len); + unsigned type; + ino_t ino; - child = d_lookup(dir, &qname); + child = d_hash_and_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - child = instantiate(dir->d_inode, new, task, ptr); - if (child) - dput(new); - else - child = new; + child = d_alloc(dir, &qname); + if (!child) + goto end_instantiate; + if (instantiate(dir->d_inode, child, task, ptr) < 0) { + dput(child); + goto end_instantiate; } } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; - } + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); + return dir_emit(ctx, name, len, ino, type); + end_instantiate: - if (!ino) - ino = find_inode_number(dir, &qname); - if (!ino) - ino = 1; - return filldir(dirent, name, len, filp->f_pos, ino, type); + return dir_emit(ctx, name, len, 1, DT_UNKNOWN); } -static unsigned name_to_int(struct dentry *dentry) +#ifdef CONFIG_CHECKPOINT_RESTORE + +/* + * dname_to_vma_addr - maps a dentry name into two unsigned longs + * which represent vma start and end addresses. + */ +static int dname_to_vma_addr(struct dentry *dentry, + unsigned long *start, unsigned long *end) { - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; - unsigned n = 0; + if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + return -EINVAL; - if (len > 1 && *name == '0') - goto out; - while (len-- > 0) { - unsigned c = *name++ - '0'; - if (c > 9) - goto out; - if (n >= (~0U-9)/10) - goto out; - n *= 10; - n += c; - } - return n; -out: - return ~0U; + return 0; } -#define PROC_FDINFO_MAX 64 - -static int proc_fd_info(struct inode *inode, struct path *path, char *info) +static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct task_struct *task = get_proc_task(inode); - struct files_struct *files = NULL; - struct file *file; - int fd = proc_fd(inode); + unsigned long vm_start, vm_end; + bool exact_vma_exists = false; + struct mm_struct *mm = NULL; + struct task_struct *task; + const struct cred *cred; + struct inode *inode; + int status = 0; - if (task) { - files = get_files_struct(task); - put_task_struct(task); - } - if (files) { - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (file) { - if (path) { - *path = file->f_path; - path_get(&file->f_path); - } - if (info) - snprintf(info, PROC_FDINFO_MAX, - "pos:\t%lli\n" - "flags:\t0%o\n", - (long long) file->f_pos, - file->f_flags); - spin_unlock(&files->file_lock); - put_files_struct(files); - return 0; - } - spin_unlock(&files->file_lock); - put_files_struct(files); + if (flags & LOOKUP_RCU) + return -ECHILD; + + if (!capable(CAP_SYS_ADMIN)) { + status = -EPERM; + goto out_notask; } - return -ENOENT; -} -static int proc_fd_link(struct inode *inode, struct path *path) -{ - return proc_fd_info(inode, path, NULL); -} + inode = dentry->d_inode; + task = get_proc_task(inode); + if (!task) + goto out_notask; -static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - int fd = proc_fd(inode); - struct files_struct *files; + mm = mm_access(task, PTRACE_MODE_READ); + if (IS_ERR_OR_NULL(mm)) + goto out; - if (task) { - files = get_files_struct(task); - if (files) { + if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { + down_read(&mm->mmap_sem); + exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); + up_read(&mm->mmap_sem); + } + + mmput(mm); + + if (exact_vma_exists) { + if (task_dumpable(task)) { rcu_read_lock(); - if (fcheck_files(files, fd)) { - rcu_read_unlock(); - put_files_struct(files); - if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; - } else { - inode->i_uid = 0; - inode->i_gid = 0; - } - inode->i_mode &= ~(S_ISUID | S_ISGID); - security_task_to_inode(task, inode); - put_task_struct(task); - return 1; - } + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; rcu_read_unlock(); - put_files_struct(files); + } else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; } - put_task_struct(task); + security_task_to_inode(task, inode); + status = 1; } - d_drop(dentry); - return 0; + +out: + put_task_struct(task); + +out_notask: + if (status <= 0) + d_drop(dentry); + + return status; } -static struct dentry_operations tid_fd_dentry_operations = -{ - .d_revalidate = tid_fd_revalidate, +static const struct dentry_operations tid_map_files_dentry_operations = { + .d_revalidate = map_files_d_revalidate, .d_delete = pid_delete_dentry, }; -static struct dentry *proc_fd_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static int proc_map_files_get_link(struct dentry *dentry, struct path *path) +{ + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + int rc; + + rc = -ENOENT; + task = get_proc_task(dentry->d_inode); + if (!task) + goto out; + + mm = get_task_mm(task); + put_task_struct(task); + if (!mm) + goto out; + + rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); + if (rc) + goto out_mmput; + + rc = -ENOENT; + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (vma && vma->vm_file) { + *path = vma->vm_file->f_path; + path_get(path); + rc = 0; + } + up_read(&mm->mmap_sem); + +out_mmput: + mmput(mm); +out: + return rc; +} + +struct map_files_info { + fmode_t mode; + unsigned long len; + unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ +}; + +static int +proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, + struct task_struct *task, const void *ptr) { - unsigned fd = *(const unsigned *)ptr; - struct file *file; - struct files_struct *files; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); + fmode_t mode = (fmode_t)(unsigned long)ptr; + struct proc_inode *ei; + struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - files = get_files_struct(task); - if (!files) - goto out_iput; - inode->i_mode = S_IFLNK; + return -ENOENT; - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (!file) - goto out_unlock; - if (file->f_mode & 1) - inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & 2) - inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); - put_files_struct(files); + ei = PROC_I(inode); + ei->op.proc_get_link = proc_map_files_get_link; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; - ei->op.proc_get_link = proc_fd_link; - dentry->d_op = &tid_fd_dentry_operations; + inode->i_mode = S_IFLNK; + + if (mode & FMODE_READ) + inode->i_mode |= S_IRUSR; + if (mode & FMODE_WRITE) + inode->i_mode |= S_IWUSR; + + d_set_d_op(dentry, &tid_map_files_dentry_operations); d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) - error = NULL; - out: - return error; -out_unlock: - spin_unlock(&files->file_lock); - put_files_struct(files); -out_iput: - iput(inode); - goto out; + return 0; } -static struct dentry *proc_lookupfd_common(struct inode *dir, - struct dentry *dentry, - instantiate_t instantiate) +static struct dentry *proc_map_files_lookup(struct inode *dir, + struct dentry *dentry, unsigned int flags) { - struct task_struct *task = get_proc_task(dir); - unsigned fd = name_to_int(dentry); - struct dentry *result = ERR_PTR(-ENOENT); + unsigned long vm_start, vm_end; + struct vm_area_struct *vma; + struct task_struct *task; + int result; + struct mm_struct *mm; + + result = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + result = -ENOENT; + task = get_proc_task(dir); if (!task) - goto out_no_task; - if (fd == ~0U) goto out; - result = instantiate(dir, dentry, task, &fd); -out: + result = -EACCES; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + result = -ENOENT; + if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) + goto out_put_task; + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + + down_read(&mm->mmap_sem); + vma = find_exact_vma(mm, vm_start, vm_end); + if (!vma) + goto out_no_vma; + + if (vma->vm_file) + result = proc_map_files_instantiate(dir, dentry, task, + (void *)(unsigned long)vma->vm_file->f_mode); + +out_no_vma: + up_read(&mm->mmap_sem); + mmput(mm); +out_put_task: put_task_struct(task); -out_no_task: - return result; +out: + return ERR_PTR(result); } -static int proc_readfd_common(struct file * filp, void * dirent, - filldir_t filldir, instantiate_t instantiate) +static const struct inode_operations proc_map_files_inode_operations = { + .lookup = proc_map_files_lookup, + .permission = proc_fd_permission, + .setattr = proc_setattr, +}; + +static int +proc_map_files_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *p = get_proc_task(inode); - unsigned int fd, ino; - int retval; - struct files_struct * files; - struct fdtable *fdt; + struct vm_area_struct *vma; + struct task_struct *task; + struct mm_struct *mm; + unsigned long nr_files, pos, i; + struct flex_array *fa = NULL; + struct map_files_info info; + struct map_files_info *p; + int ret; - retval = -ENOENT; - if (!p) - goto out_no_task; - retval = 0; - - fd = filp->f_pos; - switch (fd) { - case 0: - if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out; - filp->f_pos++; - default: - files = get_files_struct(p); - if (!files) - goto out; - rcu_read_lock(); - fdt = files_fdtable(files); - for (fd = filp->f_pos-2; - fd < fdt->max_fds; - fd++, filp->f_pos++) { - char name[PROC_NUMBUF]; - int len; - - if (!fcheck_files(files, fd)) - continue; - rcu_read_unlock(); - - len = snprintf(name, sizeof(name), "%d", fd); - if (proc_fill_cache(filp, dirent, filldir, - name, len, instantiate, - p, &fd) < 0) { - rcu_read_lock(); - break; - } - rcu_read_lock(); - } - rcu_read_unlock(); - put_files_struct(files); + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto out; + + ret = -ENOENT; + task = get_proc_task(file_inode(file)); + if (!task) + goto out; + + ret = -EACCES; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + ret = 0; + if (!dir_emit_dots(file, ctx)) + goto out_put_task; + + mm = get_task_mm(task); + if (!mm) + goto out_put_task; + down_read(&mm->mmap_sem); + + nr_files = 0; + + /* + * We need two passes here: + * + * 1) Collect vmas of mapped files with mmap_sem taken + * 2) Release mmap_sem and instantiate entries + * + * otherwise we get lockdep complained, since filldir() + * routine might require mmap_sem taken in might_fault(). + */ + + for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { + if (vma->vm_file && ++pos > ctx->pos) + nr_files++; } -out: - put_task_struct(p); -out_no_task: - return retval; -} -static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); -} + if (nr_files) { + fa = flex_array_alloc(sizeof(info), nr_files, + GFP_KERNEL); + if (!fa || flex_array_prealloc(fa, 0, nr_files, + GFP_KERNEL)) { + ret = -ENOMEM; + if (fa) + flex_array_free(fa); + up_read(&mm->mmap_sem); + mmput(mm); + goto out_put_task; + } + for (i = 0, vma = mm->mmap, pos = 2; vma; + vma = vma->vm_next) { + if (!vma->vm_file) + continue; + if (++pos <= ctx->pos) + continue; + + info.mode = vma->vm_file->f_mode; + info.len = snprintf(info.name, + sizeof(info.name), "%lx-%lx", + vma->vm_start, vma->vm_end); + if (flex_array_put(fa, i++, &info, GFP_KERNEL)) + BUG(); + } + } + up_read(&mm->mmap_sem); -static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); -} + for (i = 0; i < nr_files; i++) { + p = flex_array_get(fa, i); + if (!proc_fill_cache(file, ctx, + p->name, p->len, + proc_map_files_instantiate, + task, + (void *)(unsigned long)p->mode)) + break; + ctx->pos++; + } + if (fa) + flex_array_free(fa); + mmput(mm); -static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, - size_t len, loff_t *ppos) -{ - char tmp[PROC_FDINFO_MAX]; - int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); - if (!err) - err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); - return err; +out_put_task: + put_task_struct(task); +out: + return ret; } -static const struct file_operations proc_fdinfo_file_operations = { - .open = nonseekable_open, - .read = proc_fdinfo_read, +static const struct file_operations proc_map_files_operations = { + .read = generic_read_dir, + .iterate = proc_map_files_readdir, + .llseek = default_llseek, }; -static const struct file_operations proc_fd_operations = { - .read = generic_read_dir, - .readdir = proc_readfd, +struct timers_private { + struct pid *pid; + struct task_struct *task; + struct sighand_struct *sighand; + struct pid_namespace *ns; + unsigned long flags; }; -/* - * /proc/pid/fd needs a special permission handler so that a process can still - * access /proc/self/fd after it has executed a setuid(). - */ -static int proc_fd_permission(struct inode *inode, int mask, - struct nameidata *nd) +static void *timers_start(struct seq_file *m, loff_t *pos) { - int rv; + struct timers_private *tp = m->private; - rv = generic_permission(inode, mask, NULL); - if (rv == 0) - return 0; - if (task_pid(current) == proc_pid(inode)) - rv = 0; - return rv; + tp->task = get_pid_task(tp->pid, PIDTYPE_PID); + if (!tp->task) + return ERR_PTR(-ESRCH); + + tp->sighand = lock_task_sighand(tp->task, &tp->flags); + if (!tp->sighand) + return ERR_PTR(-ESRCH); + + return seq_list_start(&tp->task->signal->posix_timers, *pos); } -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fd_inode_operations = { - .lookup = proc_lookupfd, - .permission = proc_fd_permission, - .setattr = proc_setattr, -}; +static void *timers_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct timers_private *tp = m->private; + return seq_list_next(v, &tp->task->signal->posix_timers, pos); +} -static struct dentry *proc_fdinfo_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static void timers_stop(struct seq_file *m, void *v) { - unsigned fd = *(unsigned *)ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-ENOENT); + struct timers_private *tp = m->private; - inode = proc_pid_make_inode(dir->i_sb, task); - if (!inode) - goto out; - ei = PROC_I(inode); - ei->fd = fd; - inode->i_mode = S_IFREG | S_IRUSR; - inode->i_fop = &proc_fdinfo_file_operations; - dentry->d_op = &tid_fd_dentry_operations; - d_add(dentry, inode); - /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) - error = NULL; + if (tp->sighand) { + unlock_task_sighand(tp->task, &tp->flags); + tp->sighand = NULL; + } - out: - return error; + if (tp->task) { + put_task_struct(tp->task); + tp->task = NULL; + } } -static struct dentry *proc_lookupfdinfo(struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) +static int show_timer(struct seq_file *m, void *v) { - return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); -} + struct k_itimer *timer; + struct timers_private *tp = m->private; + int notify; + static char *nstr[] = { + [SIGEV_SIGNAL] = "signal", + [SIGEV_NONE] = "none", + [SIGEV_THREAD] = "thread", + }; -static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) -{ - return proc_readfd_common(filp, dirent, filldir, - proc_fdinfo_instantiate); + timer = list_entry((struct list_head *)v, struct k_itimer, list); + notify = timer->it_sigev_notify; + + seq_printf(m, "ID: %d\n", timer->it_id); + seq_printf(m, "signal: %d/%p\n", timer->sigq->info.si_signo, + timer->sigq->info.si_value.sival_ptr); + seq_printf(m, "notify: %s/%s.%d\n", + nstr[notify & ~SIGEV_THREAD_ID], + (notify & SIGEV_THREAD_ID) ? "tid" : "pid", + pid_nr_ns(timer->it_pid, tp->ns)); + seq_printf(m, "ClockID: %d\n", timer->it_clock); + + return 0; } -static const struct file_operations proc_fdinfo_operations = { - .read = generic_read_dir, - .readdir = proc_readfdinfo, +static const struct seq_operations proc_timers_seq_ops = { + .start = timers_start, + .next = timers_next, + .stop = timers_stop, + .show = show_timer, }; -/* - * proc directories can do almost nothing.. - */ -static const struct inode_operations proc_fdinfo_inode_operations = { - .lookup = proc_lookupfdinfo, - .setattr = proc_setattr, -}; +static int proc_timers_open(struct inode *inode, struct file *file) +{ + struct timers_private *tp; + tp = __seq_open_private(file, &proc_timers_seq_ops, + sizeof(struct timers_private)); + if (!tp) + return -ENOMEM; -static struct dentry *proc_pident_instantiate(struct inode *dir, + tp->pid = proc_pid(inode); + tp->ns = inode->i_sb->s_fs_info; + return 0; +} + +static const struct file_operations proc_timers_operations = { + .open = proc_timers_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; +#endif /* CONFIG_CHECKPOINT_RESTORE */ + +static int proc_pident_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; - struct dentry *error = ERR_PTR(-EINVAL); inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) @@ -1802,19 +2120,19 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, ei = PROC_I(inode); inode->i_mode = p->mode; if (S_ISDIR(inode->i_mode)) - inode->i_nlink = 2; /* Use getattr to fix if necessary */ + set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) inode->i_op = p->iop; if (p->fop) inode->i_fop = p->fop; ei->op = p->op; - dentry->d_op = &pid_dentry_operations; + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) - error = NULL; + if (pid_revalidate(dentry, 0)) + return 0; out: - return error; + return -ENOENT; } static struct dentry *proc_pident_lookup(struct inode *dir, @@ -1822,13 +2140,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir, const struct pid_entry *ents, unsigned int nents) { - struct inode *inode; - struct dentry *error; + int error; struct task_struct *task = get_proc_task(dir); const struct pid_entry *p, *last; - error = ERR_PTR(-ENOENT); - inode = NULL; + error = -ENOENT; if (!task) goto out_no_task; @@ -1851,77 +2167,40 @@ static struct dentry *proc_pident_lookup(struct inode *dir, out: put_task_struct(task); out_no_task: - return error; -} - -static int proc_pident_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) -{ - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_pident_instantiate, task, p); + return ERR_PTR(error); } -static int proc_pident_readdir(struct file *filp, - void *dirent, filldir_t filldir, +static int proc_pident_readdir(struct file *file, struct dir_context *ctx, const struct pid_entry *ents, unsigned int nents) { - int i; - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - const struct pid_entry *p, *last; - ino_t ino; - int ret; + struct task_struct *task = get_proc_task(file_inode(file)); + const struct pid_entry *p; - ret = -ENOENT; if (!task) - goto out_no_task; + return -ENOENT; - ret = 0; - i = filp->f_pos; - switch (i) { - case 0: - ino = inode->i_ino; - if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) - goto out; - i++; - filp->f_pos++; - /* fall through */ - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) - goto out; - i++; - filp->f_pos++; - /* fall through */ - default: - i -= 2; - if (i >= nents) { - ret = 1; - goto out; - } - p = ents + i; - last = &ents[nents - 1]; - while (p <= last) { - if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) - goto out; - filp->f_pos++; - p++; - } - } + if (!dir_emit_dots(file, ctx)) + goto out; + + if (ctx->pos >= nents + 2) + goto out; - ret = 1; + for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { + if (!proc_fill_cache(file, ctx, p->name, p->len, + proc_pident_instantiate, task, p)) + break; + ctx->pos++; + } out: put_task_struct(task); -out_no_task: - return ret; + return 0; } #ifdef CONFIG_SECURITY static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -1942,7 +2221,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { - struct inode * inode = file->f_path.dentry->d_inode; + struct inode * inode = file_inode(file); char *page; ssize_t length; struct task_struct *task = get_proc_task(inode); @@ -1967,9 +2246,15 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (copy_from_user(page, buf, count)) goto out_free; + /* Guard against adverse ptrace interaction */ + length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); + if (length < 0) + goto out_free; + length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); + mutex_unlock(&task->signal->cred_guard_mutex); out_free: free_page((unsigned long) page); out: @@ -1981,31 +2266,32 @@ out_no_task: static const struct file_operations proc_pid_attr_operations = { .read = proc_pid_attr_read, .write = proc_pid_attr_write, + .llseek = generic_file_llseek, }; static const struct pid_entry attr_dir_stuff[] = { - REG("current", S_IRUGO|S_IWUGO, pid_attr), - REG("prev", S_IRUGO, pid_attr), - REG("exec", S_IRUGO|S_IWUGO, pid_attr), - REG("fscreate", S_IRUGO|S_IWUGO, pid_attr), - REG("keycreate", S_IRUGO|S_IWUGO, pid_attr), - REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr), + REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("prev", S_IRUGO, proc_pid_attr_operations), + REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), + REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), }; -static int proc_attr_dir_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); + return proc_pident_readdir(file, ctx, + attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, - .readdir = proc_attr_dir_readdir, + .iterate = proc_attr_dir_readdir, + .llseek = default_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); @@ -2019,11 +2305,11 @@ static const struct inode_operations proc_attr_dir_inode_operations = { #endif -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) +#ifdef CONFIG_ELF_CORE static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct task_struct *task = get_proc_task(file->f_dentry->d_inode); + struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; char buffer[PROC_NUMBUF]; size_t len; @@ -2075,7 +2361,7 @@ static ssize_t proc_coredump_filter_write(struct file *file, goto out_no_task; ret = -ESRCH; - task = get_proc_task(file->f_dentry->d_inode); + task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; @@ -2101,183 +2387,157 @@ static ssize_t proc_coredump_filter_write(struct file *file, static const struct file_operations proc_coredump_filter_operations = { .read = proc_coredump_filter_read, .write = proc_coredump_filter_write, + .llseek = generic_file_llseek, }; #endif -/* - * /proc/self: - */ -static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - int buflen) +#ifdef CONFIG_TASK_IO_ACCOUNTING +static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return -ENOENT; - sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); -} + struct task_io_accounting acct = task->ioac; + unsigned long flags; + int result; -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return ERR_PTR(-ENOENT); - sprintf(tmp, "%d", task_tgid_nr_ns(current, ns)); - return ERR_PTR(vfs_follow_link(nd,tmp)); -} + result = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (result) + return result; -static const struct inode_operations proc_self_inode_operations = { - .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, -}; + if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + result = -EACCES; + goto out_unlock; + } -/* - * proc base - * - * These are the directory entries in the root directory of /proc - * that properly belong to the /proc filesystem, as they describe - * describe something that is process related. - */ -static const struct pid_entry proc_base_stuff[] = { - NOD("self", S_IFLNK|S_IRWXUGO, - &proc_self_inode_operations, NULL, {}), -}; + if (whole && lock_task_sighand(task, &flags)) { + struct task_struct *t = task; -/* - * Exceptional case: normally we are not allowed to unhash a busy - * directory. In this case, however, we can do it - no aliasing problems - * due to the way we treat inodes. - */ -static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *inode = dentry->d_inode; - struct task_struct *task = get_proc_task(inode); - if (task) { - put_task_struct(task); - return 1; + task_io_accounting_add(&acct, &task->signal->ioac); + while_each_thread(task, t) + task_io_accounting_add(&acct, &t->ioac); + + unlock_task_sighand(task, &flags); } - d_drop(dentry); - return 0; + result = sprintf(buffer, + "rchar: %llu\n" + "wchar: %llu\n" + "syscr: %llu\n" + "syscw: %llu\n" + "read_bytes: %llu\n" + "write_bytes: %llu\n" + "cancelled_write_bytes: %llu\n", + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); +out_unlock: + mutex_unlock(&task->signal->cred_guard_mutex); + return result; } -static struct dentry_operations proc_base_dentry_operations = +static int proc_tid_io_accounting(struct task_struct *task, char *buffer) { - .d_revalidate = proc_base_revalidate, - .d_delete = pid_delete_dentry, -}; + return do_io_accounting(task, buffer, 0); +} -static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) +static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) { - const struct pid_entry *p = ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error = ERR_PTR(-EINVAL); + return do_io_accounting(task, buffer, 1); +} +#endif /* CONFIG_TASK_IO_ACCOUNTING */ - /* Allocate the inode */ - error = ERR_PTR(-ENOMEM); - inode = new_inode(dir->i_sb); - if (!inode) - goto out; +#ifdef CONFIG_USER_NS +static int proc_id_map_open(struct inode *inode, struct file *file, + struct seq_operations *seq_ops) +{ + struct user_namespace *ns = NULL; + struct task_struct *task; + struct seq_file *seq; + int ret = -EINVAL; - /* Initialize the inode */ - ei = PROC_I(inode); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + task = get_proc_task(inode); + if (task) { + rcu_read_lock(); + ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + put_task_struct(task); + } + if (!ns) + goto err; - /* - * grab the reference to the task. - */ - ei->pid = get_task_pid(task, PIDTYPE_PID); - if (!ei->pid) - goto out_iput; + ret = seq_open(file, seq_ops); + if (ret) + goto err_put_ns; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_mode = p->mode; - if (S_ISDIR(inode->i_mode)) - inode->i_nlink = 2; - if (S_ISLNK(inode->i_mode)) - inode->i_size = 64; - if (p->iop) - inode->i_op = p->iop; - if (p->fop) - inode->i_fop = p->fop; - ei->op = p->op; - dentry->d_op = &proc_base_dentry_operations; - d_add(dentry, inode); - error = NULL; -out: - return error; -out_iput: - iput(inode); - goto out; + seq = file->private_data; + seq->private = ns; + + return 0; +err_put_ns: + put_user_ns(ns); +err: + return ret; } -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) +static int proc_id_map_release(struct inode *inode, struct file *file) { - struct dentry *error; - struct task_struct *task = get_proc_task(dir); - const struct pid_entry *p, *last; - - error = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - - /* Lookup the directory entry */ - last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; - for (p = proc_base_stuff; p <= last; p++) { - if (p->len != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) - break; - } - if (p > last) - goto out; + struct seq_file *seq = file->private_data; + struct user_namespace *ns = seq->private; + put_user_ns(ns); + return seq_release(inode, file); +} - error = proc_base_instantiate(dir, dentry, task, p); +static int proc_uid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_uid_seq_operations); +} -out: - put_task_struct(task); -out_no_task: - return error; +static int proc_gid_map_open(struct inode *inode, struct file *file) +{ + return proc_id_map_open(inode, file, &proc_gid_seq_operations); } -static int proc_base_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) +static int proc_projid_map_open(struct inode *inode, struct file *file) { - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_base_instantiate, task, p); + return proc_id_map_open(inode, file, &proc_projid_seq_operations); } -#ifdef CONFIG_TASK_IO_ACCOUNTING -static int proc_pid_io_accounting(struct task_struct *task, char *buffer) +static const struct file_operations proc_uid_map_operations = { + .open = proc_uid_map_open, + .write = proc_uid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; + +static const struct file_operations proc_gid_map_operations = { + .open = proc_gid_map_open, + .write = proc_gid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; + +static const struct file_operations proc_projid_map_operations = { + .open = proc_projid_map_open, + .write = proc_projid_map_write, + .read = seq_read, + .llseek = seq_lseek, + .release = proc_id_map_release, +}; +#endif /* CONFIG_USER_NS */ + +static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, + struct pid *pid, struct task_struct *task) { - return sprintf(buffer, -#ifdef CONFIG_TASK_XACCT - "rchar: %llu\n" - "wchar: %llu\n" - "syscr: %llu\n" - "syscw: %llu\n" -#endif - "read_bytes: %llu\n" - "write_bytes: %llu\n" - "cancelled_write_bytes: %llu\n", -#ifdef CONFIG_TASK_XACCT - (unsigned long long)task->rchar, - (unsigned long long)task->wchar, - (unsigned long long)task->syscr, - (unsigned long long)task->syscw, -#endif - (unsigned long long)task->ioac.read_bytes, - (unsigned long long)task->ioac.write_bytes, - (unsigned long long)task->ioac.cancelled_write_bytes); + int err = lock_trace(task); + if (!err) { + seq_printf(m, "%08x\n", task->personality); + unlock_trace(task); + } + return err; } -#endif /* * Thread groups @@ -2286,85 +2546,114 @@ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { - DIR("task", S_IRUGO|S_IXUGO, task), - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), + DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), +#endif + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET - DIR("net", S_IRUGO|S_IXUGO, net), + DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - ONE("status", S_IRUGO, pid_status), - INF("limits", S_IRUSR, pid_limits), + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - ONE("stat", S_IRUGO, tgid_stat), - ONE("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), +#ifdef CONFIG_SCHED_AUTOGROUP + REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), +#endif + REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, proc_pid_syscall), +#endif + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tgid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), - REG("mountstats", S_IRUSR, mountstats), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), + REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), - REG("pagemap", S_IRUSR, pagemap), + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_pid_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP - REG("latency", S_IRUGO, lstats), + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), - REG("sessionid", S_IRUSR, sessionid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), #endif -#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) - REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), +#ifdef CONFIG_ELF_CORE + REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING - INF("io", S_IRUGO, pid_io_accounting), + INF("io", S_IRUSR, proc_tgid_io_accounting), +#endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), +#endif +#ifdef CONFIG_CHECKPOINT_RESTORE + REG("timers", S_IRUGO, proc_timers_operations), #endif }; -static int proc_tgid_base_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); + return proc_pident_readdir(file, ctx, + tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, - .readdir = proc_tgid_base_readdir, + .iterate = proc_tgid_base_readdir, + .llseek = default_llseek, }; -static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } @@ -2373,6 +2662,7 @@ static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) @@ -2383,17 +2673,14 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", pid); + /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { - if (!(current->flags & PF_EXITING)) - shrink_dcache_parent(dentry); + shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (tgid == 0) - goto out; - name.name = buf; name.len = snprintf(buf, sizeof(buf), "%d", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); @@ -2450,29 +2737,23 @@ out: void proc_flush_task(struct task_struct *task) { int i; - struct pid *pid, *tgid = NULL; + struct pid *pid, *tgid; struct upid *upid; pid = task_pid(task); - if (thread_group_leader(task)) - tgid = task_tgid(task); + tgid = task_tgid(task); for (i = 0; i <= pid->level; i++) { upid = &pid->numbers[i]; proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, - tgid ? tgid->numbers[i].nr : 0); + tgid->numbers[i].nr); } - - upid = &pid->numbers[pid->level]; - if (upid->nr == 1) - pid_ns_release_proc(upid->ns); } -static struct dentry *proc_pid_instantiate(struct inode *dir, - struct dentry * dentry, - struct task_struct *task, const void *ptr) +static int proc_pid_instantiate(struct inode *dir, + struct dentry * dentry, + struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -2483,32 +2764,27 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 5; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif - dentry->d_op = &pid_dentry_operations; + set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff, + ARRAY_SIZE(tgid_base_stuff))); + + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) - error = NULL; + if (pid_revalidate(dentry, 0)) + return 0; out: - return error; + return -ENOENT; } -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = ERR_PTR(-ENOENT); + int result = 0; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; - result = proc_base_lookup(dir, dentry); - if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) - goto out; - tgid = name_to_int(dentry); if (tgid == ~0U) goto out; @@ -2525,7 +2801,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct result = proc_pid_instantiate(dir, dentry, task, NULL); put_task_struct(task); out: - return result; + return ERR_PTR(result); } /* @@ -2571,50 +2847,44 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) - -static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct tgid_iter iter) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", iter.tgid); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_pid_instantiate, iter.task, NULL); -} +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) /* for the /proc/ directory itself, after non-process stuff has been done */ -int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) +int proc_pid_readdir(struct file *file, struct dir_context *ctx) { - unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; - struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); struct tgid_iter iter; - struct pid_namespace *ns; + struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info; + loff_t pos = ctx->pos; - if (!reaper) - goto out_no_task; + if (pos >= PID_MAX_LIMIT + TGID_OFFSET) + return 0; - for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - const struct pid_entry *p = &proc_base_stuff[nr]; - if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) - goto out; + if (pos == TGID_OFFSET - 1) { + struct inode *inode = ns->proc_self->d_inode; + if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) + return 0; + iter.tgid = 0; + } else { + iter.tgid = pos - TGID_OFFSET; } - - ns = filp->f_dentry->d_sb->s_fs_info; iter.task = NULL; - iter.tgid = filp->f_pos - TGID_OFFSET; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { - filp->f_pos = iter.tgid + TGID_OFFSET; - if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { + char name[PROC_NUMBUF]; + int len; + if (!has_pid_permissions(ns, iter.task, 2)) + continue; + + len = snprintf(name, sizeof(name), "%d", iter.tgid); + ctx->pos = iter.tgid + TGID_OFFSET; + if (!proc_fill_cache(file, ctx, name, len, + proc_pid_instantiate, iter.task, NULL)) { put_task_struct(iter.task); - goto out; + return 0; } } - filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; -out: - put_task_struct(reaper); -out_no_task: + ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; return 0; } @@ -2622,76 +2892,102 @@ out_no_task: * Tasks */ static const struct pid_entry tid_base_stuff[] = { - DIR("fd", S_IRUSR|S_IXUSR, fd), - DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), - REG("environ", S_IRUSR, environ), - INF("auxv", S_IRUSR, pid_auxv), - ONE("status", S_IRUGO, pid_status), - INF("limits", S_IRUSR, pid_limits), + DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), + DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), + REG("environ", S_IRUSR, proc_environ_operations), + INF("auxv", S_IRUSR, proc_pid_auxv), + ONE("status", S_IRUGO, proc_pid_status), + ONE("personality", S_IRUSR, proc_pid_personality), + INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG - REG("sched", S_IRUGO|S_IWUSR, pid_sched), + REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), +#endif + REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, proc_pid_syscall), +#endif + INF("cmdline", S_IRUGO, proc_pid_cmdline), + ONE("stat", S_IRUGO, proc_tid_stat), + ONE("statm", S_IRUGO, proc_pid_statm), + REG("maps", S_IRUGO, proc_tid_maps_operations), +#ifdef CONFIG_CHECKPOINT_RESTORE + REG("children", S_IRUGO, proc_tid_children_operations), #endif - INF("cmdline", S_IRUGO, pid_cmdline), - ONE("stat", S_IRUGO, tid_stat), - ONE("statm", S_IRUGO, pid_statm), - REG("maps", S_IRUGO, maps), #ifdef CONFIG_NUMA - REG("numa_maps", S_IRUGO, numa_maps), + REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations), #endif - REG("mem", S_IRUSR|S_IWUSR, mem), - LNK("cwd", cwd), - LNK("root", root), - LNK("exe", exe), - REG("mounts", S_IRUGO, mounts), + REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), + LNK("cwd", proc_cwd_link), + LNK("root", proc_root_link), + LNK("exe", proc_exe_link), + REG("mounts", S_IRUGO, proc_mounts_operations), + REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR - REG("clear_refs", S_IWUSR, clear_refs), - REG("smaps", S_IRUGO, smaps), - REG("pagemap", S_IRUSR, pagemap), + REG("clear_refs", S_IWUSR, proc_clear_refs_operations), + REG("smaps", S_IRUGO, proc_tid_smaps_operations), + REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY - DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS - INF("wchan", S_IRUGO, pid_wchan), + INF("wchan", S_IRUGO, proc_pid_wchan), +#endif +#ifdef CONFIG_STACKTRACE + ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS - INF("schedstat", S_IRUGO, pid_schedstat), + INF("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP - REG("latency", S_IRUGO, lstats), + REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET - REG("cpuset", S_IRUGO, cpuset), + REG("cpuset", S_IRUGO, proc_cpuset_operations), #endif #ifdef CONFIG_CGROUPS - REG("cgroup", S_IRUGO, cgroup), + REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif - INF("oom_score", S_IRUGO, oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL - REG("loginuid", S_IWUSR|S_IRUGO, loginuid), - REG("sessionid", S_IRUSR, sessionid), + REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION - REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), +#endif +#ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUSR, proc_tid_io_accounting), +#endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif +#ifdef CONFIG_USER_NS + REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), + REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), + REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), #endif }; -static int proc_tid_base_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) { - return proc_pident_readdir(filp,dirent,filldir, - tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); + return proc_pident_readdir(file, ctx, + tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } -static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, - .readdir = proc_tid_base_readdir, + .iterate = proc_tid_base_readdir, + .llseek = default_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { @@ -2700,10 +2996,9 @@ static const struct inode_operations proc_tid_base_inode_operations = { .setattr = proc_setattr, }; -static struct dentry *proc_task_instantiate(struct inode *dir, +static int proc_task_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { - struct dentry *error = ERR_PTR(-ENOENT); struct inode *inode; inode = proc_pid_make_inode(dir->i_sb, task); @@ -2713,24 +3008,23 @@ static struct dentry *proc_task_instantiate(struct inode *dir, inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags|=S_IMMUTABLE; - inode->i_nlink = 4; -#ifdef CONFIG_SECURITY - inode->i_nlink += 1; -#endif - dentry->d_op = &pid_dentry_operations; + set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff, + ARRAY_SIZE(tid_base_stuff))); + + d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) - error = NULL; + if (pid_revalidate(dentry, 0)) + return 0; out: - return error; + return -ENOENT; } -static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result = ERR_PTR(-ENOENT); + int result = -ENOENT; struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; @@ -2760,7 +3054,7 @@ out_drop_task: out: put_task_struct(leader); out_no_task: - return result; + return ERR_PTR(result); } /* @@ -2775,34 +3069,42 @@ out_no_task: * In the case of a seek we start with the leader and walk nr * threads past it. */ -static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr, struct pid_namespace *ns) +static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, + struct pid_namespace *ns) { - struct task_struct *pos; + struct task_struct *pos, *task; + unsigned long nr = f_pos; + + if (nr != f_pos) /* 32bit overflow? */ + return NULL; rcu_read_lock(); - /* Attempt to start with the pid of a thread */ - if (tid && (nr > 0)) { + task = pid_task(pid, PIDTYPE_PID); + if (!task) + goto fail; + + /* Attempt to start with the tid of a thread */ + if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); - if (pos && (pos->group_leader == leader)) + if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ - pos = NULL; - if (nr && nr >= get_nr_threads(leader)) - goto out; + if (nr >= get_nr_threads(task)) + goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ - for (pos = leader; nr > 0; --nr) { - pos = next_thread(pos); - if (pos == leader) { - pos = NULL; - goto out; - } - } + pos = task = task->group_leader; + do { + if (!nr--) + goto found; + } while_each_thread(task, pos); +fail: + pos = NULL; + goto out; found: get_task_struct(pos); out: @@ -2832,80 +3134,44 @@ static struct task_struct *next_tid(struct task_struct *start) return pos; } -static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, - struct task_struct *task, int tid) -{ - char name[PROC_NUMBUF]; - int len = snprintf(name, sizeof(name), "%d", tid); - return proc_fill_cache(filp, dirent, filldir, name, len, - proc_task_instantiate, task, NULL); -} - /* for the /proc/TGID/task/ directories */ -static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) +static int proc_task_readdir(struct file *file, struct dir_context *ctx) { - struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; - struct task_struct *leader = NULL; + struct inode *inode = file_inode(file); struct task_struct *task; - int retval = -ENOENT; - ino_t ino; - int tid; - unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ struct pid_namespace *ns; + int tid; - task = get_proc_task(inode); - if (!task) - goto out_no_task; - rcu_read_lock(); - if (pid_alive(task)) { - leader = task->group_leader; - get_task_struct(leader); - } - rcu_read_unlock(); - put_task_struct(task); - if (!leader) - goto out_no_task; - retval = 0; + if (proc_inode_is_dead(inode)) + return -ENOENT; - switch (pos) { - case 0: - ino = inode->i_ino; - if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) - goto out; - pos++; - /* fall through */ - case 1: - ino = parent_ino(dentry); - if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) - goto out; - pos++; - /* fall through */ - } + if (!dir_emit_dots(file, ctx)) + return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ - ns = filp->f_dentry->d_sb->s_fs_info; - tid = (int)filp->f_version; - filp->f_version = 0; - for (task = first_tid(leader, tid, pos - 2, ns); + ns = file->f_dentry->d_sb->s_fs_info; + tid = (int)file->f_version; + file->f_version = 0; + for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; - task = next_tid(task), pos++) { + task = next_tid(task), ctx->pos++) { + char name[PROC_NUMBUF]; + int len; tid = task_pid_nr_ns(task, ns); - if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { + len = snprintf(name, sizeof(name), "%d", tid); + if (!proc_fill_cache(file, ctx, name, len, + proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ - filp->f_version = (u64)tid; + file->f_version = (u64)tid; put_task_struct(task); break; } } -out: - filp->f_pos = pos; - put_task_struct(leader); -out_no_task: - return retval; + + return 0; } static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) @@ -2915,9 +3181,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct generic_fillattr(inode, stat); if (p) { - rcu_read_lock(); stat->nlink += get_nr_threads(p); - rcu_read_unlock(); put_task_struct(p); } @@ -2928,9 +3192,11 @@ static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, + .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { .read = generic_read_dir, - .readdir = proc_task_readdir, + .iterate = proc_task_readdir, + .llseek = default_llseek, }; |
