From 8231f2f99a5e5fc45a25e8de09fd1ab9711babf1 Mon Sep 17 00:00:00 2001
From: Qinghuang Feng <qhfeng.kernel@gmail.com>
Date: Wed, 14 Jan 2009 15:45:13 +0800
Subject: SYSFS: use standard magic.h for sysfs

SYSFS_MAGIC has been added into magic.h, so only use that definition
in magic.h to avoid potential consistency problem.

Signed-off-by: Qinghuang Feng <qhfeng.kernel@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/mount.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index ab343e371d6..8133ca36ee0 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -17,11 +17,10 @@
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/magic.h>
 
 #include "sysfs.h"
 
-/* Random magic number */
-#define SYSFS_MAGIC 0x62656572
 
 static struct vfsmount *sysfs_mount;
 struct super_block * sysfs_sb = NULL;
-- 
cgit v1.2.3-18-g5258


From 4a67a1bc0b3a0db017b560cee27370d141c58e25 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 21 Jan 2009 11:55:11 -0800
Subject: sysfs: Take sysfs_mutex when fetching the root inode.

sysfs_get_inode ultimately calls sysfs_count_nlink when the a
directory inode is fectched.  sysfs_count_nlink needs to be
called under the sysfs_mutex to guard against the unlikely
but possible scenario that the root directory is changing
as we are counting the number entries in it, and just in
general to be consistent.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/mount.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8133ca36ee0..84ef378673a 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -52,7 +52,9 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
 	sysfs_sb = sb;
 
 	/* get root inode, initialize and unlock it */
+	mutex_lock(&sysfs_mutex);
 	inode = sysfs_get_inode(&sysfs_root);
+	mutex_unlock(&sysfs_mutex);
 	if (!inode) {
 		pr_debug("sysfs: could not get root inode\n");
 		return -ENOMEM;
-- 
cgit v1.2.3-18-g5258


From 425cb02912d1095febfeaf8d379af7b2ac9e4a89 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 12 Feb 2009 10:56:59 -0700
Subject: sysfs: sysfs_add_one WARNs with full path to duplicate filename

sysfs: sysfs_add_one WARNs with full path to duplicate filename

As a debugging aid, it can be useful to know the full path to a
duplicate file being created in sysfs.

We now will display warnings such as:

	sysfs: cannot create duplicate filename '/foo'

when attempting to create multiple files named 'foo' in the sysfs
root, or:

	sysfs: cannot create duplicate filename '/bus/pci/slots/5/foo'

when attempting to create multiple files named 'foo' under a
given directory in sysfs.

The path displayed is always a relative path to sysfs_root. The
leading '/' in the path name refers to the sysfs_root mount
point, and should not be confused with the "real" '/'.

Thanks to Alex Williamson for essentially writing sysfs_pathname.

Cc: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 82d3b79d0e0..f13d852ab3c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -433,6 +433,26 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 	return 0;
 }
 
+/**
+ *	sysfs_pathname - return full path to sysfs dirent
+ *	@sd: sysfs_dirent whose path we want
+ *	@path: caller allocated buffer
+ *
+ *	Gives the name "/" to the sysfs_root entry; any path returned
+ *	is relative to wherever sysfs is mounted.
+ *
+ *	XXX: does no error checking on @path size
+ */
+static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
+{
+	if (sd->s_parent) {
+		sysfs_pathname(sd->s_parent, path);
+		strcat(path, "/");
+	}
+	strcat(path, sd->s_name);
+	return path;
+}
+
 /**
  *	sysfs_add_one - add sysfs_dirent to parent
  *	@acxt: addrm context to use
@@ -458,8 +478,16 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 	int ret;
 
 	ret = __sysfs_add_one(acxt, sd);
-	WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
-		       "can not be created\n", sd->s_name);
+	if (ret == -EEXIST) {
+		char *path = kzalloc(PATH_MAX, GFP_KERNEL);
+		WARN(1, KERN_WARNING
+		     "sysfs: cannot create duplicate filename '%s'\n",
+		     (path == NULL) ? sd->s_name :
+		     strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
+		            sd->s_name));
+		kfree(path);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3-18-g5258


From 04256b4a8fc73f54cd14f20867882c299728a446 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Wed, 11 Feb 2009 13:20:23 -0800
Subject: sysfs: reference sysfs_dirent from sysfs inodes

The sysfs_dirent serves as both an inode and a directory entry
for sysfs.  To prevent the sysfs inode numbers from being freed
prematurely hold a reference to sysfs_dirent from the sysfs inode.

[akpm@linux-foundation.org: add comment]
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/inode.c | 17 +++++++++++++++++
 fs/sysfs/mount.c |  1 +
 fs/sysfs/sysfs.h |  1 +
 3 files changed, 19 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index dfa3d94cfc7..555f0ff988d 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -147,6 +147,7 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
 {
 	struct bin_attribute *bin_attr;
 
+	inode->i_private = sysfs_get(sd);
 	inode->i_mapping->a_ops = &sysfs_aops;
 	inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
 	inode->i_op = &sysfs_inode_operations;
@@ -214,6 +215,22 @@ struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
 	return inode;
 }
 
+/*
+ * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
+ * To prevent the sysfs inode numbers from being freed prematurely we take a
+ * reference to sysfs_dirent from the sysfs inode.  A
+ * super_operations.delete_inode() implementation is needed to drop that
+ * reference upon inode destruction.
+ */
+void sysfs_delete_inode(struct inode *inode)
+{
+	struct sysfs_dirent *sd  = inode->i_private;
+
+	truncate_inode_pages(&inode->i_data, 0);
+	clear_inode(inode);
+	sysfs_put(sd);
+}
+
 int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
 {
 	struct sysfs_addrm_cxt acxt;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 84ef378673a..49749955cca 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -29,6 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
 static const struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
+	.delete_inode	= sysfs_delete_inode,
 };
 
 struct sysfs_dirent sysfs_root = {
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93c6d6b27c4..9055d04e4ab 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -145,6 +145,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
  * inode.c
  */
 struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
+void sysfs_delete_inode(struct inode *inode);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
 int sysfs_inode_init(void);
-- 
cgit v1.2.3-18-g5258


From e0edd3c65aa5b53e20280565a7ce11675eb7ed6b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Wed, 4 Mar 2009 11:57:20 -0800
Subject: sysfs: don't block indefinitely for unmapped files.

Modify sysfs bin files so that we can remove the bin file while they are
still mapped.  When the kobject is removed we unmap the bin file and
arrange for future accesses to the mapping to receive SIGBUS.

Implementing this prevents a nasty DOS when pci devices are hot plugged
and unplugged.  Where if any of their resources were mmaped the kernel
could not free up their pci resources or release their pci data
structures.

[akpm@linux-foundation.org: remove unused var]
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/bin.c   | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 fs/sysfs/dir.c   |   1 +
 fs/sysfs/sysfs.h |   2 +
 3 files changed, 174 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index f2c478c3424..96cc2bf6a84 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -21,15 +21,28 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
+#include <linux/mm.h>
 
 #include <asm/uaccess.h>
 
 #include "sysfs.h"
 
+/*
+ * There's one bin_buffer for each open file.
+ *
+ * filp->private_data points to bin_buffer and
+ * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s
+ * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock
+ */
+static DEFINE_MUTEX(sysfs_bin_lock);
+
 struct bin_buffer {
-	struct mutex	mutex;
-	void		*buffer;
-	int		mmapped;
+	struct mutex			mutex;
+	void				*buffer;
+	int				mmapped;
+	struct vm_operations_struct 	*vm_ops;
+	struct file			*file;
+	struct hlist_node		list;
 };
 
 static int
@@ -168,29 +181,148 @@ out_free:
 	return count;
 }
 
+static void bin_vma_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+	if (!bb->vm_ops || !bb->vm_ops->open)
+		return;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return;
+
+	bb->vm_ops->open(vma);
+
+	sysfs_put_active_two(attr_sd);
+}
+
+static void bin_vma_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+
+	if (!bb->vm_ops || !bb->vm_ops->close)
+		return;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return;
+
+	bb->vm_ops->close(vma);
+
+	sysfs_put_active_two(attr_sd);
+}
+
+static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->fault)
+		return VM_FAULT_SIGBUS;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return VM_FAULT_SIGBUS;
+
+	ret = bb->vm_ops->fault(vma, vmf);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->page_mkwrite)
+		return -EINVAL;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return -EINVAL;
+
+	ret = bb->vm_ops->page_mkwrite(vma, page);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static int bin_access(struct vm_area_struct *vma, unsigned long addr,
+		  void *buf, int len, int write)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->access)
+		return -EINVAL;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return -EINVAL;
+
+	ret = bb->vm_ops->access(vma, addr, buf, len, write);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static struct vm_operations_struct bin_vm_ops = {
+	.open		= bin_vma_open,
+	.close		= bin_vma_close,
+	.fault		= bin_fault,
+	.page_mkwrite	= bin_page_mkwrite,
+	.access		= bin_access,
+};
+
 static int mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct bin_buffer *bb = file->private_data;
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
+	struct vm_operations_struct *vm_ops;
 	int rc;
 
 	mutex_lock(&bb->mutex);
 
 	/* need attr_sd for attr, its parent for kobj */
+	rc = -ENODEV;
 	if (!sysfs_get_active_two(attr_sd))
-		return -ENODEV;
+		goto out_unlock;
 
 	rc = -EINVAL;
-	if (attr->mmap)
-		rc = attr->mmap(kobj, attr, vma);
+	if (!attr->mmap)
+		goto out_put;
 
-	if (rc == 0 && !bb->mmapped)
-		bb->mmapped = 1;
-	else
-		sysfs_put_active_two(attr_sd);
+	rc = attr->mmap(kobj, attr, vma);
+	vm_ops = vma->vm_ops;
+	vma->vm_ops = &bin_vm_ops;
+	if (rc)
+		goto out_put;
 
+	rc = -EINVAL;
+	if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+		goto out_put;
+
+#ifdef CONFIG_NUMA
+	rc = -EINVAL;
+	if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate)))
+		goto out_put;
+#endif
+
+	rc = 0;
+	bb->mmapped = 1;
+	bb->vm_ops = vm_ops;
+out_put:
+	sysfs_put_active_two(attr_sd);
+out_unlock:
 	mutex_unlock(&bb->mutex);
 
 	return rc;
@@ -223,8 +355,13 @@ static int open(struct inode * inode, struct file * file)
 		goto err_out;
 
 	mutex_init(&bb->mutex);
+	bb->file = file;
 	file->private_data = bb;
 
+	mutex_lock(&sysfs_bin_lock);
+	hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers);
+	mutex_unlock(&sysfs_bin_lock);
+
 	/* open succeeded, put active references */
 	sysfs_put_active_two(attr_sd);
 	return 0;
@@ -237,11 +374,12 @@ static int open(struct inode * inode, struct file * file)
 
 static int release(struct inode * inode, struct file * file)
 {
-	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_buffer *bb = file->private_data;
 
-	if (bb->mmapped)
-		sysfs_put_active_two(attr_sd);
+	mutex_lock(&sysfs_bin_lock);
+	hlist_del(&bb->list);
+	mutex_unlock(&sysfs_bin_lock);
+
 	kfree(bb->buffer);
 	kfree(bb);
 	return 0;
@@ -256,6 +394,26 @@ const struct file_operations bin_fops = {
 	.release	= release,
 };
 
+
+void unmap_bin_file(struct sysfs_dirent *attr_sd)
+{
+	struct bin_buffer *bb;
+	struct hlist_node *tmp;
+
+	if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR)
+		return;
+
+	mutex_lock(&sysfs_bin_lock);
+
+	hlist_for_each_entry(bb, tmp, &attr_sd->s_bin_attr.buffers, list) {
+		struct inode *inode = bb->file->f_path.dentry->d_inode;
+
+		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+	}
+
+	mutex_unlock(&sysfs_bin_lock);
+}
+
 /**
  *	sysfs_create_bin_file - create binary file for object.
  *	@kobj:	object.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f13d852ab3c..66aeb4fff0c 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -609,6 +609,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 
 		sysfs_drop_dentry(sd);
 		sysfs_deactivate(sd);
+		unmap_bin_file(sd);
 		sysfs_put(sd);
 	}
 }
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 9055d04e4ab..3fa0d98481e 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -28,6 +28,7 @@ struct sysfs_elem_attr {
 
 struct sysfs_elem_bin_attr {
 	struct bin_attribute	*bin_attr;
+	struct hlist_head	buffers;
 };
 
 /*
@@ -164,6 +165,7 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
  * bin.c
  */
 extern const struct file_operations bin_fops;
+void unmap_bin_file(struct sysfs_dirent *attr_sd);
 
 /*
  * symlink.c
-- 
cgit v1.2.3-18-g5258


From f67f129e519fa87f8ebd236b6336fe43f31ee141 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Sun, 1 Mar 2009 21:10:49 +0800
Subject: Driver core: implement uevent suppress in kobject

This patch implements uevent suppress in kobject and removes it
from struct device, based on the following ideas:

1,Uevent sending should be one attribute of kobject, so suppressing it
in kobject layer is more natural than in device layer. By this way,
we can do it for other objects embedded with kobject.

2,It may save several bytes for each instance of struct device.(On my
omap3(32bit ARM) based box, can save 8bytes per device object)

This patch also introduces dev_set|get_uevent_suppress() helpers to
set and query uevent_suppress attribute in case to help kobject
as private part of struct device in future.

[This version is against the latest driver-core patch set of Greg,please
ignore the last version.]

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/partitions/check.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6d720243f5f..38e337d51ce 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -400,7 +400,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */
-	pdev->uevent_suppress = 1;
+	dev_set_uevent_suppress(pdev, 1);
 	err = device_add(pdev);
 	if (err)
 		goto out_put;
@@ -410,7 +410,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	if (!p->holder_dir)
 		goto out_del;
 
-	pdev->uevent_suppress = 0;
+	dev_set_uevent_suppress(pdev, 0);
 	if (flags & ADDPART_FLAG_WHOLEDISK) {
 		err = device_create_file(pdev, &dev_attr_whole_disk);
 		if (err)
@@ -422,7 +422,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	rcu_assign_pointer(ptbl->part[partno], p);
 
 	/* suppress uevent if the disk supresses it */
-	if (!ddev->uevent_suppress)
+	if (!dev_get_uevent_suppress(pdev))
 		kobject_uevent(&pdev->kobj, KOBJ_ADD);
 
 	return p;
@@ -455,7 +455,7 @@ void register_disk(struct gendisk *disk)
 	dev_set_name(ddev, disk->disk_name);
 
 	/* delay uevents, until we scanned partition table */
-	ddev->uevent_suppress = 1;
+	dev_set_uevent_suppress(ddev, 1);
 
 	if (device_add(ddev))
 		return;
@@ -490,7 +490,7 @@ void register_disk(struct gendisk *disk)
 
 exit:
 	/* announce disk after possible partitions are created */
-	ddev->uevent_suppress = 0;
+	dev_set_uevent_suppress(ddev, 0);
 	kobject_uevent(&ddev->kobj, KOBJ_ADD);
 
 	/* announce possible partitions */
-- 
cgit v1.2.3-18-g5258


From 669420644c79c207f83fdf9105ae782867e2991f Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Fri, 13 Mar 2009 12:07:36 -0600
Subject: sysfs: only allow one scheduled removal callback per kobj

The only way for a sysfs attribute to remove itself (without
deadlock) is to use the sysfs_schedule_callback() interface.

Vegard Nossum discovered that a poorly written sysfs ->store
callback can repeatedly schedule remove callbacks on the same
device over and over, e.g.

	$ while true ; do echo 1 > /sys/devices/.../remove ; done

If the 'remove' attribute uses the sysfs_schedule_callback API
and also does not protect itself from concurrent accesses, its
callback handler will be called multiple times, and will
eventually attempt to perform operations on a freed kobject,
leading to many problems.

Instead of requiring all callers of sysfs_schedule_callback to
implement their own synchronization, provide the protection in
the infrastructure.

Now, sysfs_schedule_callback will only allow one scheduled
callback per kobject. On subsequent calls with the same kobject,
return -EAGAIN.

This is a short term fix. The long term fix is to allow sysfs
attributes to remove themselves directly, without any of this
callback hokey pokey.

[cornelia.huck@de.ibm.com: s390 ccwgroup bits]

Reported-by: vegard.nossum@gmail.com
Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/file.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1f4a3f87726..289c43a4726 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -659,13 +659,16 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 
 struct sysfs_schedule_callback_struct {
-	struct kobject 		*kobj;
+	struct list_head	workq_list;
+	struct kobject		*kobj;
 	void			(*func)(void *);
 	void			*data;
 	struct module		*owner;
 	struct work_struct	work;
 };
 
+static DEFINE_MUTEX(sysfs_workq_mutex);
+static LIST_HEAD(sysfs_workq);
 static void sysfs_schedule_callback_work(struct work_struct *work)
 {
 	struct sysfs_schedule_callback_struct *ss = container_of(work,
@@ -674,6 +677,9 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
 	(ss->func)(ss->data);
 	kobject_put(ss->kobj);
 	module_put(ss->owner);
+	mutex_lock(&sysfs_workq_mutex);
+	list_del(&ss->workq_list);
+	mutex_unlock(&sysfs_workq_mutex);
 	kfree(ss);
 }
 
@@ -695,15 +701,25 @@ static void sysfs_schedule_callback_work(struct work_struct *work)
  * until @func returns.
  *
  * Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available.
+ * be allocated, -ENODEV if a reference to @owner isn't available,
+ * -EAGAIN if a callback has already been scheduled for @kobj.
  */
 int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
 		void *data, struct module *owner)
 {
-	struct sysfs_schedule_callback_struct *ss;
+	struct sysfs_schedule_callback_struct *ss, *tmp;
 
 	if (!try_module_get(owner))
 		return -ENODEV;
+
+	mutex_lock(&sysfs_workq_mutex);
+	list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
+		if (ss->kobj == kobj) {
+			mutex_unlock(&sysfs_workq_mutex);
+			return -EAGAIN;
+		}
+	mutex_unlock(&sysfs_workq_mutex);
+
 	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
 	if (!ss) {
 		module_put(owner);
@@ -715,6 +731,10 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
 	ss->data = data;
 	ss->owner = owner;
 	INIT_WORK(&ss->work, sysfs_schedule_callback_work);
+	INIT_LIST_HEAD(&ss->workq_list);
+	mutex_lock(&sysfs_workq_mutex);
+	list_add_tail(&ss->workq_list, &sysfs_workq);
+	mutex_unlock(&sysfs_workq_mutex);
 	schedule_work(&ss->work);
 	return 0;
 }
-- 
cgit v1.2.3-18-g5258


From 095160aee954688a9bad225952c4bee546541e19 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Mon, 23 Mar 2009 01:41:27 +0000
Subject: sysfs: fix some bin_vm_ops errors

Commit 86c9508eb1c0ce5aa07b5cf1d36b60c54efc3d7a
"sysfs: don't block indefinitely for unmapped files" in linux-next
crashes the PowerMac G5 when X starts up.  It's caught out by the way
powerpc's pci_mmap of legacy_mem uses shmem_zero_setup(), substituting
a new vma->vm_file whose private_data no longer points to the bin_buffer
(substitution done because some versions of X crash if that mmap fails).

The fix to this is straightforward: the original vm_file is fput() in
that case, so this mmap won't block sysfs at all, so just don't switch
over to bin_vm_ops if vm_file has changed.

But more fixes made before realizing that was the problem:-

It should not be an error if bin_page_mkwrite() finds no underlying
page_mkwrite().

Check that a file already mmap'ed has the same underlying vm_ops
_before_ pointing vma->vm_ops at bin_vm_ops.

If the file being mmap'ed is a shmem/tmpfs file, don't fail the mmap
on CONFIG_NUMA=y, just because that has a set_policy and get_policy:
provide bin_set_policy, bin_get_policy and bin_migrate.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: Eric Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/bin.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 79 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 96cc2bf6a84..07703d3ff4a 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -241,9 +241,12 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	int ret;
 
-	if (!bb->vm_ops || !bb->vm_ops->page_mkwrite)
+	if (!bb->vm_ops)
 		return -EINVAL;
 
+	if (!bb->vm_ops->page_mkwrite)
+		return 0;
+
 	if (!sysfs_get_active_two(attr_sd))
 		return -EINVAL;
 
@@ -273,12 +276,78 @@ static int bin_access(struct vm_area_struct *vma, unsigned long addr,
 	return ret;
 }
 
+#ifdef CONFIG_NUMA
+static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->set_policy)
+		return 0;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return -EINVAL;
+
+	ret = bb->vm_ops->set_policy(vma, new);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+
+static struct mempolicy *bin_get_policy(struct vm_area_struct *vma,
+					unsigned long addr)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	struct mempolicy *pol;
+
+	if (!bb->vm_ops || !bb->vm_ops->get_policy)
+		return vma->vm_policy;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return vma->vm_policy;
+
+	pol = bb->vm_ops->get_policy(vma, addr);
+
+	sysfs_put_active_two(attr_sd);
+	return pol;
+}
+
+static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
+			const nodemask_t *to, unsigned long flags)
+{
+	struct file *file = vma->vm_file;
+	struct bin_buffer *bb = file->private_data;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
+	int ret;
+
+	if (!bb->vm_ops || !bb->vm_ops->migrate)
+		return 0;
+
+	if (!sysfs_get_active_two(attr_sd))
+		return 0;
+
+	ret = bb->vm_ops->migrate(vma, from, to, flags);
+
+	sysfs_put_active_two(attr_sd);
+	return ret;
+}
+#endif
+
 static struct vm_operations_struct bin_vm_ops = {
 	.open		= bin_vma_open,
 	.close		= bin_vma_close,
 	.fault		= bin_fault,
 	.page_mkwrite	= bin_page_mkwrite,
 	.access		= bin_access,
+#ifdef CONFIG_NUMA
+	.set_policy	= bin_set_policy,
+	.get_policy	= bin_get_policy,
+	.migrate	= bin_migrate,
+#endif
 };
 
 static int mmap(struct file *file, struct vm_area_struct *vma)
@@ -287,7 +356,6 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
 	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
-	struct vm_operations_struct *vm_ops;
 	int rc;
 
 	mutex_lock(&bb->mutex);
@@ -302,24 +370,25 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
 		goto out_put;
 
 	rc = attr->mmap(kobj, attr, vma);
-	vm_ops = vma->vm_ops;
-	vma->vm_ops = &bin_vm_ops;
 	if (rc)
 		goto out_put;
 
-	rc = -EINVAL;
-	if (bb->mmapped && bb->vm_ops != vma->vm_ops)
+	/*
+	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
+	 * to satisfy versions of X which crash if the mmap fails: that
+	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
+	 */
+	if (vma->vm_file != file)
 		goto out_put;
 
-#ifdef CONFIG_NUMA
 	rc = -EINVAL;
-	if (vm_ops && ((vm_ops->set_policy || vm_ops->get_policy || vm_ops->migrate)))
+	if (bb->mmapped && bb->vm_ops != vma->vm_ops)
 		goto out_put;
-#endif
 
 	rc = 0;
 	bb->mmapped = 1;
-	bb->vm_ops = vm_ops;
+	bb->vm_ops = vma->vm_ops;
+	vma->vm_ops = &bin_vm_ops;
 out_put:
 	sysfs_put_active_two(attr_sd);
 out_unlock:
-- 
cgit v1.2.3-18-g5258