From e9720acd728a46cb40daa52c99a979f7c4ff195c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 7 Mar 2008 11:08:40 -0800 Subject: [NET]: Make /proc/net a symlink on /proc/self/net (v3) Current /proc/net is done with so called "shadows", but current implementation is broken and has little chances to get fixed. The problem is that dentries subtree of /proc/net directory has fancy revalidation rules to make processes living in different net namespaces see different entries in /proc/net subtree, but currently, tasks see in the /proc/net subdir the contents of any other namespace, depending on who opened the file first. The proposed fix is to turn /proc/net into a symlink, which points to /proc/self/net, which in turn shows what previously was in /proc/net - the network-related info, from the net namespace the appropriate task lives in. # ls -l /proc/net lrwxrwxrwx 1 root root 8 Mar 5 15:17 /proc/net -> self/net In other words - this behaves like /proc/mounts, but unlike "mounts", "net" is not a file, but a directory. Changes from v2: * Fixed discrepancy of /proc/net nlink count and selinux labeling screwup pointed out by Stephen. To get the correct nlink count the ->getattr callback for /proc/net is overridden to read one from the net->proc_net entry. To make selinux still work the net->proc_net entry is initialized properly, i.e. with the "net" name and the proc_net parent. Selinux fixes are Acked-by: Stephen Smalley Changes from v1: * Fixed a task_struct leak in get_proc_task_net, pointed out by Paul. Signed-off-by: Pavel Emelyanov Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- fs/proc/base.c | 1 + fs/proc/generic.c | 26 +++++++----- fs/proc/internal.h | 7 ++++ fs/proc/proc_net.c | 117 ++++++++++++++++++++++++++++++++++++++++------------- 4 files changed, 114 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 96ee899d650..cc43cf0c1fa 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2274,6 +2274,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), + DIR("net", S_IRUGO|S_IXUSR, net), REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), ONE("status", S_IRUGO, pid_status), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 68971e66cd4..a36ad3c75cf 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -377,15 +377,14 @@ static struct dentry_operations proc_dentry_operations = * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, + struct dentry *dentry) { struct inode *inode = NULL; - struct proc_dir_entry * de; int error = -ENOENT; lock_kernel(); spin_lock(&proc_subdir_lock); - de = PDE(dir); if (de) { for (de = de->subdir; de ; de = de->next) { if (de->namelen != dentry->d_name.len) @@ -393,8 +392,6 @@ struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nam if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { unsigned int ino; - if (de->shadow_proc) - de = de->shadow_proc(current, de); ino = de->low_ino; de_get(de); spin_unlock(&proc_subdir_lock); @@ -417,6 +414,12 @@ out_unlock: return ERR_PTR(error); } +struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + return proc_lookup_de(PDE(dir), dir, dentry); +} + /* * This returns non-zero if at EOF, so that the /proc * root directory can use this and check if it should @@ -426,10 +429,9 @@ out_unlock: * value of the readdir() call, as long as it's non-negative * for success.. */ -int proc_readdir(struct file * filp, - void * dirent, filldir_t filldir) +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, + filldir_t filldir) { - struct proc_dir_entry * de; unsigned int ino; int i; struct inode *inode = filp->f_path.dentry->d_inode; @@ -438,7 +440,6 @@ int proc_readdir(struct file * filp, lock_kernel(); ino = inode->i_ino; - de = PDE(inode); if (!de) { ret = -EINVAL; goto out; @@ -499,6 +500,13 @@ out: unlock_kernel(); return ret; } +int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + + return proc_readdir_de(PDE(inode), filp, dirent, filldir); +} + /* * These are the generic /proc directory operations. They * use the in-memory "struct proc_dir_entry" tree to parse diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 1c81c8f1aee..bc72f5c8c47 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -64,6 +64,8 @@ extern const struct file_operations proc_numa_maps_operations; extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; +extern const struct file_operations proc_net_operations; +extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); @@ -83,3 +85,8 @@ static inline int proc_fd(struct inode *inode) { return PROC_I(inode)->fd; } + +struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, + struct dentry *dentry); +int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, + filldir_t filldir); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 14e9b5aaf86..4caa5f774fb 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -63,6 +63,82 @@ int seq_release_net(struct inode *ino, struct file *f) } EXPORT_SYMBOL_GPL(seq_release_net); +static struct net *get_proc_task_net(struct inode *dir) +{ + struct task_struct *task; + struct nsproxy *ns; + struct net *net = NULL; + + rcu_read_lock(); + task = pid_task(proc_pid(dir), PIDTYPE_PID); + if (task != NULL) { + ns = task_nsproxy(task); + if (ns != NULL) + net = get_net(ns->net_ns); + } + rcu_read_unlock(); + + return net; +} + +static struct dentry *proc_tgid_net_lookup(struct inode *dir, + struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *de; + struct net *net; + + de = ERR_PTR(-ENOENT); + net = get_proc_task_net(dir); + if (net != NULL) { + de = proc_lookup_de(net->proc_net, dir, dentry); + put_net(net); + } + return de; +} + +static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct net *net; + + net = get_proc_task_net(inode); + + generic_fillattr(inode, stat); + + if (net != NULL) { + stat->nlink = net->proc_net->nlink; + put_net(net); + } + + return 0; +} + +const struct inode_operations proc_net_inode_operations = { + .lookup = proc_tgid_net_lookup, + .getattr = proc_tgid_net_getattr, +}; + +static int proc_tgid_net_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + int ret; + struct net *net; + + ret = -EINVAL; + net = get_proc_task_net(filp->f_path.dentry->d_inode); + if (net != NULL) { + ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); + put_net(net); + } + return ret; +} + +const struct file_operations proc_net_operations = { + .read = generic_read_dir, + .readdir = proc_tgid_net_readdir, +}; + struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) @@ -83,14 +159,6 @@ struct net *get_proc_net(const struct inode *inode) } EXPORT_SYMBOL_GPL(get_proc_net); -static struct proc_dir_entry *shadow_pde; - -static struct proc_dir_entry *proc_net_shadow(struct task_struct *task, - struct proc_dir_entry *de) -{ - return task->nsproxy->net_ns->proc_net; -} - struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, struct proc_dir_entry *parent) { @@ -104,45 +172,39 @@ EXPORT_SYMBOL_GPL(proc_net_mkdir); static __net_init int proc_net_ns_init(struct net *net) { - struct proc_dir_entry *root, *netd, *net_statd; + struct proc_dir_entry *netd, *net_statd; int err; err = -ENOMEM; - root = kzalloc(sizeof(*root), GFP_KERNEL); - if (!root) + netd = kzalloc(sizeof(*netd), GFP_KERNEL); + if (!netd) goto out; - err = -EEXIST; - netd = proc_net_mkdir(net, "net", root); - if (!netd) - goto free_root; + netd->data = net; + netd->nlink = 2; + netd->name = "net"; + netd->namelen = 3; + netd->parent = &proc_root; err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) goto free_net; - root->data = net; - - net->proc_net_root = root; net->proc_net = netd; net->proc_net_stat = net_statd; - err = 0; + return 0; +free_net: + kfree(netd); out: return err; -free_net: - remove_proc_entry("net", root); -free_root: - kfree(root); - goto out; } static __net_exit void proc_net_ns_exit(struct net *net) { remove_proc_entry("stat", net->proc_net); - remove_proc_entry("net", net->proc_net_root); - kfree(net->proc_net_root); + kfree(net->proc_net); } static struct pernet_operations __net_initdata proc_net_ns_ops = { @@ -152,8 +214,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = { int __init proc_net_init(void) { - shadow_pde = proc_mkdir("net", NULL); - shadow_pde->shadow_proc = proc_net_shadow; + proc_symlink("net", NULL, "self/net"); return register_pernet_subsys(&proc_net_ns_ops); } -- cgit v1.2.3-18-g5258 From b2211a361a4289c83971f89da53fe2eb9e72769d Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 11 Mar 2008 18:03:35 -0700 Subject: net: fix build with CONFIG_NET=n fs/built-in.o:(.rodata+0x1134): undefined reference to `proc_net_inode_operations' fs/built-in.o:(.rodata+0x1138): undefined reference to `proc_net_operations' Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- fs/proc/base.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index cc43cf0c1fa..3217774d269 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2274,7 +2274,9 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, task), DIR("fd", S_IRUSR|S_IXUSR, fd), DIR("fdinfo", S_IRUSR|S_IXUSR, fdinfo), +#ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUSR, net), +#endif REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), ONE("status", S_IRUGO, pid_status), -- cgit v1.2.3-18-g5258 From fb39380b8d683b55630ba5ba381f4e43e417420e Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 13 Mar 2008 12:32:35 -0700 Subject: pagemap: proper read error handling Fix pagemap_read() error handling by releasing acquired resources and checking for get_user_pages() partial failure. Signed-off-by: Marcelo Tosatti Acked-by: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6dc0334815f..4206454734e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -640,17 +640,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = -EACCES; if (!ptrace_may_attach(task)) - goto out; + goto out_task; ret = -EINVAL; /* file position must be aligned */ if (*ppos % PM_ENTRY_BYTES) - goto out; + goto out_task; ret = 0; mm = get_task_mm(task); if (!mm) - goto out; + goto out_task; ret = -ENOMEM; uaddr = (unsigned long)buf & PAGE_MASK; @@ -658,7 +658,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); if (!pages) - goto out_task; + goto out_mm; down_read(¤t->mm->mmap_sem); ret = get_user_pages(current, current->mm, uaddr, pagecount, @@ -668,6 +668,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (ret < 0) goto out_free; + if (ret != pagecount) { + pagecount = ret; + ret = -EFAULT; + goto out_pages; + } + pm.out = buf; pm.end = buf + count; @@ -699,15 +705,17 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ret = pm.out - buf; } +out_pages: for (; pagecount; pagecount--) { page = pages[pagecount-1]; if (!PageReserved(page)) SetPageDirty(page); page_cache_release(page); } - mmput(mm); out_free: kfree(pages); +out_mm: + mmput(mm); out_task: put_task_struct(task); out: -- cgit v1.2.3-18-g5258