diff options
Diffstat (limited to 'fs/proc/inode.c')
| -rw-r--r-- | fs/proc/inode.c | 410 | 
1 files changed, 197 insertions, 213 deletions
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 9c2b5f48487..0adbc02d60e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -7,20 +7,23 @@  #include <linux/time.h>  #include <linux/proc_fs.h>  #include <linux/kernel.h> +#include <linux/pid_namespace.h>  #include <linux/mm.h>  #include <linux/string.h>  #include <linux/stat.h>  #include <linux/completion.h>  #include <linux/poll.h> +#include <linux/printk.h>  #include <linux/file.h>  #include <linux/limits.h>  #include <linux/init.h>  #include <linux/module.h> -#include <linux/smp_lock.h>  #include <linux/sysctl.h> +#include <linux/seq_file.h>  #include <linux/slab.h> +#include <linux/mount.h> +#include <linux/magic.h> -#include <asm/system.h>  #include <asm/uaccess.h>  #include "internal.h" @@ -28,9 +31,12 @@  static void proc_evict_inode(struct inode *inode)  {  	struct proc_dir_entry *de; +	struct ctl_table_header *head; +	const struct proc_ns_operations *ns_ops; +	void *ns; -	truncate_inode_pages(&inode->i_data, 0); -	end_writeback(inode); +	truncate_inode_pages_final(&inode->i_data); +	clear_inode(inode);  	/* Stop tracking associated processes */  	put_pid(PROC_I(inode)->pid); @@ -39,12 +45,18 @@ static void proc_evict_inode(struct inode *inode)  	de = PROC_I(inode)->pde;  	if (de)  		pde_put(de); -	if (PROC_I(inode)->sysctl) -		sysctl_head_put(PROC_I(inode)->sysctl); +	head = PROC_I(inode)->sysctl; +	if (head) { +		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); +		sysctl_head_put(head); +	} +	/* Release any associated namespace */ +	ns_ops = PROC_I(inode)->ns.ns_ops; +	ns = PROC_I(inode)->ns.ns; +	if (ns_ops && ns) +		ns_ops->put(ns);  } -struct vfsmount *proc_mnt; -  static struct kmem_cache * proc_inode_cachep;  static struct inode *proc_alloc_inode(struct super_block *sb) @@ -61,16 +73,24 @@ static struct inode *proc_alloc_inode(struct super_block *sb)  	ei->pde = NULL;  	ei->sysctl = NULL;  	ei->sysctl_entry = NULL; +	ei->ns.ns = NULL; +	ei->ns.ns_ops = NULL;  	inode = &ei->vfs_inode;  	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;  	return inode;  } -static void proc_destroy_inode(struct inode *inode) +static void proc_i_callback(struct rcu_head *head)  { +	struct inode *inode = container_of(head, struct inode, i_rcu);  	kmem_cache_free(proc_inode_cachep, PROC_I(inode));  } +static void proc_destroy_inode(struct inode *inode) +{ +	call_rcu(&inode->i_rcu, proc_i_callback); +} +  static void init_once(void *foo)  {  	struct proc_inode *ei = (struct proc_inode *) foo; @@ -87,194 +107,207 @@ void __init proc_init_inodecache(void)  					     init_once);  } +static int proc_show_options(struct seq_file *seq, struct dentry *root) +{ +	struct super_block *sb = root->d_sb; +	struct pid_namespace *pid = sb->s_fs_info; + +	if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) +		seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); +	if (pid->hide_pid != 0) +		seq_printf(seq, ",hidepid=%u", pid->hide_pid); + +	return 0; +} +  static const struct super_operations proc_sops = {  	.alloc_inode	= proc_alloc_inode,  	.destroy_inode	= proc_destroy_inode,  	.drop_inode	= generic_delete_inode,  	.evict_inode	= proc_evict_inode,  	.statfs		= simple_statfs, +	.remount_fs	= proc_remount, +	.show_options	= proc_show_options,  }; -static void __pde_users_dec(struct proc_dir_entry *pde) +enum {BIAS = -1U<<31}; + +static inline int use_pde(struct proc_dir_entry *pde)  { -	pde->pde_users--; -	if (pde->pde_unload_completion && pde->pde_users == 0) -		complete(pde->pde_unload_completion); +	return atomic_inc_unless_negative(&pde->in_use);  } -void pde_users_dec(struct proc_dir_entry *pde) +static void unuse_pde(struct proc_dir_entry *pde)  { -	spin_lock(&pde->pde_unload_lock); -	__pde_users_dec(pde); -	spin_unlock(&pde->pde_unload_lock); +	if (atomic_dec_return(&pde->in_use) == BIAS) +		complete(pde->pde_unload_completion);  } -static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) +/* pde is locked */ +static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); -	loff_t rv = -EINVAL; -	loff_t (*llseek)(struct file *, loff_t, int); - -	spin_lock(&pde->pde_unload_lock); -	/* -	 * remove_proc_entry() is going to delete PDE (as part of module -	 * cleanup sequence). No new callers into module allowed. -	 */ -	if (!pde->proc_fops) { +	if (pdeo->closing) { +		/* somebody else is doing that, just wait */ +		DECLARE_COMPLETION_ONSTACK(c); +		pdeo->c = &c; +		spin_unlock(&pde->pde_unload_lock); +		wait_for_completion(&c); +		spin_lock(&pde->pde_unload_lock); +	} else { +		struct file *file; +		pdeo->closing = 1;  		spin_unlock(&pde->pde_unload_lock); -		return rv; +		file = pdeo->file; +		pde->proc_fops->release(file_inode(file), file); +		spin_lock(&pde->pde_unload_lock); +		list_del_init(&pdeo->lh); +		if (pdeo->c) +			complete(pdeo->c); +		kfree(pdeo);  	} -	/* -	 * Bump refcount so that remove_proc_entry will wail for ->llseek to -	 * complete. -	 */ -	pde->pde_users++; -	/* -	 * Save function pointer under lock, to protect against ->proc_fops -	 * NULL'ifying right after ->pde_unload_lock is dropped. -	 */ -	llseek = pde->proc_fops->llseek; -	spin_unlock(&pde->pde_unload_lock); +} -	if (!llseek) -		llseek = default_llseek; -	rv = llseek(file, offset, whence); +void proc_entry_rundown(struct proc_dir_entry *de) +{ +	DECLARE_COMPLETION_ONSTACK(c); +	/* Wait until all existing callers into module are done. */ +	de->pde_unload_completion = &c; +	if (atomic_add_return(BIAS, &de->in_use) != BIAS) +		wait_for_completion(&c); + +	spin_lock(&de->pde_unload_lock); +	while (!list_empty(&de->pde_openers)) { +		struct pde_opener *pdeo; +		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh); +		close_pdeo(de, pdeo); +	} +	spin_unlock(&de->pde_unload_lock); +} -	pde_users_dec(pde); +static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) +{ +	struct proc_dir_entry *pde = PDE(file_inode(file)); +	loff_t rv = -EINVAL; +	if (use_pde(pde)) { +		loff_t (*llseek)(struct file *, loff_t, int); +		llseek = pde->proc_fops->llseek; +		if (!llseek) +			llseek = default_llseek; +		rv = llseek(file, offset, whence); +		unuse_pde(pde); +	}  	return rv;  }  static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); -	ssize_t rv = -EIO;  	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	struct proc_dir_entry *pde = PDE(file_inode(file)); +	ssize_t rv = -EIO; +	if (use_pde(pde)) { +		read = pde->proc_fops->read; +		if (read) +			rv = read(file, buf, count, ppos); +		unuse_pde(pde);  	} -	pde->pde_users++; -	read = pde->proc_fops->read; -	spin_unlock(&pde->pde_unload_lock); - -	if (read) -		rv = read(file, buf, count, ppos); - -	pde_users_dec(pde);  	return rv;  }  static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); -	ssize_t rv = -EIO;  	ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	struct proc_dir_entry *pde = PDE(file_inode(file)); +	ssize_t rv = -EIO; +	if (use_pde(pde)) { +		write = pde->proc_fops->write; +		if (write) +			rv = write(file, buf, count, ppos); +		unuse_pde(pde);  	} -	pde->pde_users++; -	write = pde->proc_fops->write; -	spin_unlock(&pde->pde_unload_lock); - -	if (write) -		rv = write(file, buf, count, ppos); - -	pde_users_dec(pde);  	return rv;  }  static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); +	struct proc_dir_entry *pde = PDE(file_inode(file));  	unsigned int rv = DEFAULT_POLLMASK;  	unsigned int (*poll)(struct file *, struct poll_table_struct *); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	if (use_pde(pde)) { +		poll = pde->proc_fops->poll; +		if (poll) +			rv = poll(file, pts); +		unuse_pde(pde);  	} -	pde->pde_users++; -	poll = pde->proc_fops->poll; -	spin_unlock(&pde->pde_unload_lock); - -	if (poll) -		rv = poll(file, pts); - -	pde_users_dec(pde);  	return rv;  }  static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); +	struct proc_dir_entry *pde = PDE(file_inode(file));  	long rv = -ENOTTY;  	long (*ioctl)(struct file *, unsigned int, unsigned long); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	if (use_pde(pde)) { +		ioctl = pde->proc_fops->unlocked_ioctl; +		if (ioctl) +			rv = ioctl(file, cmd, arg); +		unuse_pde(pde);  	} -	pde->pde_users++; -	ioctl = pde->proc_fops->unlocked_ioctl; -	spin_unlock(&pde->pde_unload_lock); - -	if (ioctl) -		rv = ioctl(file, cmd, arg); - -	pde_users_dec(pde);  	return rv;  }  #ifdef CONFIG_COMPAT  static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); +	struct proc_dir_entry *pde = PDE(file_inode(file));  	long rv = -ENOTTY;  	long (*compat_ioctl)(struct file *, unsigned int, unsigned long); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	if (use_pde(pde)) { +		compat_ioctl = pde->proc_fops->compat_ioctl; +		if (compat_ioctl) +			rv = compat_ioctl(file, cmd, arg); +		unuse_pde(pde);  	} -	pde->pde_users++; -	compat_ioctl = pde->proc_fops->compat_ioctl; -	spin_unlock(&pde->pde_unload_lock); - -	if (compat_ioctl) -		rv = compat_ioctl(file, cmd, arg); - -	pde_users_dec(pde);  	return rv;  }  #endif  static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)  { -	struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); +	struct proc_dir_entry *pde = PDE(file_inode(file));  	int rv = -EIO;  	int (*mmap)(struct file *, struct vm_area_struct *); - -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); -		return rv; +	if (use_pde(pde)) { +		mmap = pde->proc_fops->mmap; +		if (mmap) +			rv = mmap(file, vma); +		unuse_pde(pde);  	} -	pde->pde_users++; -	mmap = pde->proc_fops->mmap; -	spin_unlock(&pde->pde_unload_lock); +	return rv; +} + +static unsigned long +proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, +			   unsigned long len, unsigned long pgoff, +			   unsigned long flags) +{ +	struct proc_dir_entry *pde = PDE(file_inode(file)); +	unsigned long rv = -EIO; -	if (mmap) -		rv = mmap(file, vma); +	if (use_pde(pde)) { +		typeof(proc_reg_get_unmapped_area) *get_area; -	pde_users_dec(pde); +		get_area = pde->proc_fops->get_unmapped_area; +#ifdef CONFIG_MMU +		if (!get_area) +			get_area = current->mm->get_unmapped_area; +#endif + +		if (get_area) +			rv = get_area(file, orig_addr, len, pgoff, flags); +		else +			rv = orig_addr; +		unuse_pde(pde); +	}  	return rv;  } @@ -296,91 +329,47 @@ static int proc_reg_open(struct inode *inode, struct file *file)  	 * by hand in remove_proc_entry(). For this, save opener's credentials  	 * for later.  	 */ -	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); +	pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);  	if (!pdeo)  		return -ENOMEM; -	spin_lock(&pde->pde_unload_lock); -	if (!pde->proc_fops) { -		spin_unlock(&pde->pde_unload_lock); +	if (!use_pde(pde)) {  		kfree(pdeo); -		return -EINVAL; +		return -ENOENT;  	} -	pde->pde_users++;  	open = pde->proc_fops->open;  	release = pde->proc_fops->release; -	spin_unlock(&pde->pde_unload_lock);  	if (open)  		rv = open(inode, file); -	spin_lock(&pde->pde_unload_lock);  	if (rv == 0 && release) {  		/* To know what to release. */ -		pdeo->inode = inode;  		pdeo->file = file;  		/* Strictly for "too late" ->release in proc_reg_release(). */ -		pdeo->release = release; +		spin_lock(&pde->pde_unload_lock);  		list_add(&pdeo->lh, &pde->pde_openers); +		spin_unlock(&pde->pde_unload_lock);  	} else  		kfree(pdeo); -	__pde_users_dec(pde); -	spin_unlock(&pde->pde_unload_lock); -	return rv; -} - -static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde, -					struct inode *inode, struct file *file) -{ -	struct pde_opener *pdeo; -	list_for_each_entry(pdeo, &pde->pde_openers, lh) { -		if (pdeo->inode == inode && pdeo->file == file) -			return pdeo; -	} -	return NULL; +	unuse_pde(pde); +	return rv;  }  static int proc_reg_release(struct inode *inode, struct file *file)  {  	struct proc_dir_entry *pde = PDE(inode); -	int rv = 0; -	int (*release)(struct inode *, struct file *);  	struct pde_opener *pdeo; -  	spin_lock(&pde->pde_unload_lock); -	pdeo = find_pde_opener(pde, inode, file); -	if (!pde->proc_fops) { -		/* -		 * Can't simply exit, __fput() will think that everything is OK, -		 * and move on to freeing struct file. remove_proc_entry() will -		 * find slacker in opener's list and will try to do non-trivial -		 * things with struct file. Therefore, remove opener from list. -		 * -		 * But if opener is removed from list, who will ->release it? -		 */ -		if (pdeo) { -			list_del(&pdeo->lh); -			spin_unlock(&pde->pde_unload_lock); -			rv = pdeo->release(inode, file); -			kfree(pdeo); -		} else -			spin_unlock(&pde->pde_unload_lock); -		return rv; -	} -	pde->pde_users++; -	release = pde->proc_fops->release; -	if (pdeo) { -		list_del(&pdeo->lh); -		kfree(pdeo); +	list_for_each_entry(pdeo, &pde->pde_openers, lh) { +		if (pdeo->file == file) { +			close_pdeo(pde, pdeo); +			break; +		}  	}  	spin_unlock(&pde->pde_unload_lock); - -	if (release) -		rv = release(inode, file); - -	pde_users_dec(pde); -	return rv; +	return 0;  }  static const struct file_operations proc_reg_file_ops = { @@ -393,6 +382,7 @@ static const struct file_operations proc_reg_file_ops = {  	.compat_ioctl	= proc_reg_compat_ioctl,  #endif  	.mmap		= proc_reg_mmap, +	.get_unmapped_area = proc_reg_get_unmapped_area,  	.open		= proc_reg_open,  	.release	= proc_reg_release,  }; @@ -405,22 +395,19 @@ static const struct file_operations proc_reg_file_ops_no_compat = {  	.poll		= proc_reg_poll,  	.unlocked_ioctl	= proc_reg_unlocked_ioctl,  	.mmap		= proc_reg_mmap, +	.get_unmapped_area = proc_reg_get_unmapped_area,  	.open		= proc_reg_open,  	.release	= proc_reg_release,  };  #endif -struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, -				struct proc_dir_entry *de) +struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)  { -	struct inode * inode; +	struct inode *inode = new_inode_pseudo(sb); -	inode = iget_locked(sb, ino); -	if (!inode) -		return NULL; -	if (inode->i_state & I_NEW) { +	if (inode) { +		inode->i_ino = de->low_ino;  		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -		PROC_I(inode)->fd = 0;  		PROC_I(inode)->pde = de;  		if (de->mode) { @@ -431,9 +418,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,  		if (de->size)  			inode->i_size = de->size;  		if (de->nlink) -			inode->i_nlink = de->nlink; -		if (de->proc_iops) -			inode->i_op = de->proc_iops; +			set_nlink(inode, de->nlink); +		WARN_ON(!de->proc_iops); +		inode->i_op = de->proc_iops;  		if (de->proc_fops) {  			if (S_ISREG(inode->i_mode)) {  #ifdef CONFIG_COMPAT @@ -447,15 +434,14 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,  				inode->i_fop = de->proc_fops;  			}  		} -		unlock_new_inode(inode);  	} else  	       pde_put(de);  	return inode; -}			 +}  int proc_fill_super(struct super_block *s)  { -	struct inode * root_inode; +	struct inode *root_inode;  	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;  	s->s_blocksize = 1024; @@ -465,19 +451,17 @@ int proc_fill_super(struct super_block *s)  	s->s_time_gran = 1;  	pde_get(&proc_root); -	root_inode = proc_get_inode(s, PROC_ROOT_INO, &proc_root); -	if (!root_inode) -		goto out_no_root; -	root_inode->i_uid = 0; -	root_inode->i_gid = 0; -	s->s_root = d_alloc_root(root_inode); -	if (!s->s_root) -		goto out_no_root; -	return 0; +	root_inode = proc_get_inode(s, &proc_root); +	if (!root_inode) { +		pr_err("proc_fill_super: get root inode failed\n"); +		return -ENOMEM; +	} + +	s->s_root = d_make_root(root_inode); +	if (!s->s_root) { +		pr_err("proc_fill_super: allocate dentry failed\n"); +		return -ENOMEM; +	} -out_no_root: -	printk("proc_read_super: get root inode failed\n"); -	iput(root_inode); -	pde_put(&proc_root); -	return -ENOMEM; +	return proc_setup_self(s);  }  | 
