From 4e4d6d860b9393c5395ba5920edb5b4c5d43a3a3 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 18 Dec 2011 20:05:43 -0800
Subject: sysfs: Add s_hash to sysfs_dirent and order directory entries by hash

Compute a 31 bit hash of directory entries (that can fit in a signed
32bit off_t) and index the sysfs directory entries by that hash,
replacing the per directory indexes by name and by inode.  Because we
now only use a single rbtree this reduces the size of sysfs_dirent by 2
pointers.  Because we have fewer cases to deal with the code is now
simpler.

For now I use the simple hash that the dcache uses as that is easy to
use and seems simple enough.

In addition to makeing the code simpler using a hash for the file
position in readdir brings sysfs in line with other filesystems that
have non-trivial directory structures.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   | 219 +++++++++++++++++++++++++++++--------------------------
 fs/sysfs/sysfs.h |   9 +--
 2 files changed, 120 insertions(+), 108 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7fdf6a7b743..0daf255b7bf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -22,76 +22,103 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/hash.h>
 #include "sysfs.h"
 
 DEFINE_MUTEX(sysfs_mutex);
 DEFINE_SPINLOCK(sysfs_assoc_lock);
 
+#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb);
+
 static DEFINE_SPINLOCK(sysfs_ino_lock);
 static DEFINE_IDA(sysfs_ino_ida);
 
 /**
- *	sysfs_link_sibling - link sysfs_dirent into sibling list
+ *	sysfs_name_hash
+ *	@ns:   Namespace tag to hash
+ *	@name: Null terminated string to hash
+ *
+ *	Returns 31 bit hash of ns + name (so it fits in an off_t )
+ */
+static unsigned int sysfs_name_hash(const void *ns, const char *name)
+{
+	unsigned long hash = init_name_hash();
+	unsigned int len = strlen(name);
+	while (len--)
+		hash = partial_name_hash(*name++, hash);
+	hash = ( end_name_hash(hash) ^ hash_ptr( (void *)ns, 31 ) );
+	hash &= 0x7fffffffU;
+	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
+	if (hash < 1)
+		hash += 2;
+	if (hash >= INT_MAX)
+		hash = INT_MAX - 1;
+	return hash;
+}
+
+static int sysfs_name_compare(unsigned int hash, const void *ns,
+	const char *name, const struct sysfs_dirent *sd)
+{
+	if (hash != sd->s_hash)
+		return hash - sd->s_hash;
+	if (ns != sd->s_ns)
+		return ns - sd->s_ns;
+	return strcmp(name, sd->s_name);
+}
+
+static int sysfs_sd_compare(const struct sysfs_dirent *left,
+			    const struct sysfs_dirent *right)
+{
+	return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name,
+				  right);
+}
+
+/**
+ *	sysfs_link_subling - link sysfs_dirent into sibling rbtree
  *	@sd: sysfs_dirent of interest
  *
- *	Link @sd into its sibling list which starts from
+ *	Link @sd into its sibling rbtree which starts from
  *	sd->s_parent->s_dir.children.
  *
  *	Locking:
  *	mutex_lock(sysfs_mutex)
+ *
+ *	RETURNS:
+ *	0 on susccess -EEXIST on failure.
  */
-static void sysfs_link_sibling(struct sysfs_dirent *sd)
+static int sysfs_link_sibling(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *parent_sd = sd->s_parent;
-
-	struct rb_node **p;
-	struct rb_node *parent;
+	struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
+	struct rb_node *parent = NULL;
 
 	if (sysfs_type(sd) == SYSFS_DIR)
-		parent_sd->s_dir.subdirs++;
-
-	p = &parent_sd->s_dir.inode_tree.rb_node;
-	parent = NULL;
-	while (*p) {
-		parent = *p;
-#define node	rb_entry(parent, struct sysfs_dirent, inode_node)
-		if (sd->s_ino < node->s_ino) {
-			p = &node->inode_node.rb_left;
-		} else if (sd->s_ino > node->s_ino) {
-			p = &node->inode_node.rb_right;
-		} else {
-			printk(KERN_CRIT "sysfs: inserting duplicate inode '%lx'\n",
-			       (unsigned long) sd->s_ino);
-			BUG();
-		}
-#undef node
-	}
-	rb_link_node(&sd->inode_node, parent, p);
-	rb_insert_color(&sd->inode_node, &parent_sd->s_dir.inode_tree);
-
-	p = &parent_sd->s_dir.name_tree.rb_node;
-	parent = NULL;
-	while (*p) {
-		int c;
-		parent = *p;
-#define node	rb_entry(parent, struct sysfs_dirent, name_node)
-		c = strcmp(sd->s_name, node->s_name);
-		if (c < 0) {
-			p = &node->name_node.rb_left;
-		} else {
-			p = &node->name_node.rb_right;
-		}
-#undef node
+		sd->s_parent->s_dir.subdirs++;
+
+	while (*node) {
+		struct sysfs_dirent *pos;
+		int result;
+
+		pos = to_sysfs_dirent(*node);
+		parent = *node;
+		result = sysfs_sd_compare(sd, pos);
+		if (result < 0)
+			node = &pos->s_rb.rb_left;
+		else if (result > 0)
+			node = &pos->s_rb.rb_right;
+		else
+			return -EEXIST;
 	}
-	rb_link_node(&sd->name_node, parent, p);
-	rb_insert_color(&sd->name_node, &parent_sd->s_dir.name_tree);
+	/* add new node and rebalance the tree */
+	rb_link_node(&sd->s_rb, parent, node);
+	rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children);
+	return 0;
 }
 
 /**
- *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
+ *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree
  *	@sd: sysfs_dirent of interest
  *
- *	Unlink @sd from its sibling list which starts from
+ *	Unlink @sd from its sibling rbtree which starts from
  *	sd->s_parent->s_dir.children.
  *
  *	Locking:
@@ -102,8 +129,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 	if (sysfs_type(sd) == SYSFS_DIR)
 		sd->s_parent->s_dir.subdirs--;
 
-	rb_erase(&sd->inode_node, &sd->s_parent->s_dir.inode_tree);
-	rb_erase(&sd->name_node, &sd->s_parent->s_dir.name_tree);
+	rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
 }
 
 /**
@@ -402,6 +428,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
 int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
 	struct sysfs_inode_attrs *ps_iattr;
+	int ret;
 
 	if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) {
 		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -410,12 +437,12 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 		return -EINVAL;
 	}
 
-	if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
-		return -EEXIST;
-
+	sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	sd->s_parent = sysfs_get(acxt->parent_sd);
 
-	sysfs_link_sibling(sd);
+	ret = sysfs_link_sibling(sd);
+	if (ret)
+		return ret;
 
 	/* Update timestamps on the parent */
 	ps_iattr = acxt->parent_sd->s_iattr;
@@ -565,8 +592,8 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 				       const void *ns,
 				       const unsigned char *name)
 {
-	struct rb_node *p = parent_sd->s_dir.name_tree.rb_node;
-	struct sysfs_dirent *found = NULL;
+	struct rb_node *node = parent_sd->s_dir.children.rb_node;
+	unsigned int hash;
 
 	if (!!sysfs_ns_type(parent_sd) != !!ns) {
 		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
@@ -575,33 +602,21 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 		return NULL;
 	}
 
-	while (p) {
-		int c;
-#define node	rb_entry(p, struct sysfs_dirent, name_node)
-		c = strcmp(name, node->s_name);
-		if (c < 0) {
-			p = node->name_node.rb_left;
-		} else if (c > 0) {
-			p = node->name_node.rb_right;
-		} else {
-			found = node;
-			p = node->name_node.rb_left;
-		}
-#undef node
-	}
-
-	if (found) {
-		while (found->s_ns != ns) {
-			p = rb_next(&found->name_node);
-			if (!p)
-				return NULL;
-			found = rb_entry(p, struct sysfs_dirent, name_node);
-			if (strcmp(name, found->s_name))
-				return NULL;
-		}
+	hash = sysfs_name_hash(ns, name);
+	while (node) {
+		struct sysfs_dirent *sd;
+		int result;
+
+		sd = to_sysfs_dirent(node);
+		result = sysfs_name_compare(hash, ns, name, sd);
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return sd;
 	}
-
-	return found;
+	return NULL;
 }
 
 /**
@@ -804,9 +819,9 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
 
 	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
 	sysfs_addrm_start(&acxt, dir_sd);
-	pos = rb_first(&dir_sd->s_dir.inode_tree);
+	pos = rb_first(&dir_sd->s_dir.children);
 	while (pos) {
-		struct sysfs_dirent *sd = rb_entry(pos, struct sysfs_dirent, inode_node);
+		struct sysfs_dirent *sd = to_sysfs_dirent(pos);
 		pos = rb_next(pos);
 		if (sysfs_type(sd) != SYSFS_DIR)
 			sysfs_remove_one(&acxt, sd);
@@ -919,38 +934,36 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
 }
 
 static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
-	struct sysfs_dirent *parent_sd,	ino_t ino, struct sysfs_dirent *pos)
+	struct sysfs_dirent *parent_sd,	loff_t hash, struct sysfs_dirent *pos)
 {
 	if (pos) {
 		int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
 			pos->s_parent == parent_sd &&
-			ino == pos->s_ino;
+			hash == pos->s_hash;
 		sysfs_put(pos);
 		if (!valid)
 			pos = NULL;
 	}
-	if (!pos && (ino > 1) && (ino < INT_MAX)) {
-		struct rb_node *p = parent_sd->s_dir.inode_tree.rb_node;
-		while (p) {
-#define node	rb_entry(p, struct sysfs_dirent, inode_node)
-			if (ino < node->s_ino) {
-				pos = node;
-				p = node->inode_node.rb_left;
-			} else if (ino > node->s_ino) {
-				p = node->inode_node.rb_right;
-			} else {
-				pos = node;
+	if (!pos && (hash > 1) && (hash < INT_MAX)) {
+		struct rb_node *node = parent_sd->s_dir.children.rb_node;
+		while (node) {
+			pos = to_sysfs_dirent(node);
+
+			if (hash < pos->s_hash)
+				node = node->rb_left;
+			else if (hash > pos->s_hash)
+				node = node->rb_right;
+			else
 				break;
-			}
-#undef node
 		}
 	}
+	/* Skip over entries in the wrong namespace */
 	while (pos && pos->s_ns != ns) {
-		struct rb_node *p = rb_next(&pos->inode_node);
-		if (!p)
+		struct rb_node *node = rb_next(&pos->s_rb);
+		if (!node)
 			pos = NULL;
 		else
-			pos = rb_entry(p, struct sysfs_dirent, inode_node);
+			pos = to_sysfs_dirent(node);
 	}
 	return pos;
 }
@@ -960,11 +973,11 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
 {
 	pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
 	if (pos) do {
-		struct rb_node *p = rb_next(&pos->inode_node);
-		if (!p)
+		struct rb_node *node = rb_next(&pos->s_rb);
+		if (!node)
 			pos = NULL;
 		else
-			pos = rb_entry(p, struct sysfs_dirent, inode_node);
+			pos = to_sysfs_dirent(node);
 	} while (pos && pos->s_ns != ns);
 	return pos;
 }
@@ -1006,7 +1019,7 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 		len = strlen(name);
 		ino = pos->s_ino;
 		type = dt_type(pos);
-		filp->f_pos = ino;
+		filp->f_pos = pos->s_hash;
 		filp->private_data = sysfs_get(pos);
 
 		mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 7484a36ee67..2b5c923b4b9 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -20,9 +20,8 @@ struct sysfs_elem_dir {
 	struct kobject		*kobj;
 
 	unsigned long		subdirs;
-
-	struct rb_root		inode_tree;
-	struct rb_root		name_tree;
+	/* children rbtree starts here and goes through sd->s_rb */
+	struct rb_root		children;
 };
 
 struct sysfs_elem_symlink {
@@ -62,8 +61,7 @@ struct sysfs_dirent {
 	struct sysfs_dirent	*s_parent;
 	const char		*s_name;
 
-	struct rb_node		inode_node;
-	struct rb_node		name_node;
+	struct rb_node		s_rb;
 
 	union {
 		struct completion	*completion;
@@ -71,6 +69,7 @@ struct sysfs_dirent {
 	} u;
 
 	const void		*s_ns; /* namespace tag */
+	unsigned int		s_hash; /* ns + name hash */
 	union {
 		struct sysfs_elem_dir		s_dir;
 		struct sysfs_elem_symlink	s_symlink;
-- 
cgit v1.2.3-70-g09d2


From 15a3382451e51925facfe430deeca63d90137f5d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 18 Dec 2011 20:07:23 -0800
Subject: sysfs: Reduce s_flags to an unsinged short so it packs well with
 s_mode

On 32bit this reduces sizeof(struct sysfs_dirent) by 2 bytes.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/sysfs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 2b5c923b4b9..19994948ac5 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -77,7 +77,7 @@ struct sysfs_dirent {
 		struct sysfs_elem_bin_attr	s_bin_attr;
 	};
 
-	unsigned int		s_flags;
+	unsigned short		s_flags;
 	umode_t 		s_mode;
 	ino_t			s_ino;
 	struct sysfs_inode_attrs *s_iattr;
@@ -94,11 +94,11 @@ struct sysfs_dirent {
 #define SYSFS_ACTIVE_REF		(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
 
 /* identify any namespace tag on sysfs_dirents */
-#define SYSFS_NS_TYPE_MASK		0xff00
+#define SYSFS_NS_TYPE_MASK		0xf00
 #define SYSFS_NS_TYPE_SHIFT		8
 
 #define SYSFS_FLAG_MASK			~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
-#define SYSFS_FLAG_REMOVED		0x020000
+#define SYSFS_FLAG_REMOVED		0x02000
 
 static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 {
-- 
cgit v1.2.3-70-g09d2


From cafa6b5dd7ce4f0e0a30be301be4efed587a7808 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 18 Dec 2011 20:08:16 -0800
Subject: sysfs: Store the sysfs inode in an unsigned int.

Store the sysfs inode number in an unsided int because
ida inode allocator can return at most a 31 bit number,
reducing the size of struct sysfs_dirent by 8 bytes
on 64bit platforms.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   | 4 ++--
 fs/sysfs/sysfs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 0daf255b7bf..0589c9a694b 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -224,7 +224,7 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
 	rwsem_release(&sd->dep_map, 1, _RET_IP_);
 }
 
-static int sysfs_alloc_ino(ino_t *pino)
+static int sysfs_alloc_ino(unsigned int *pino)
 {
 	int ino, rc;
 
@@ -243,7 +243,7 @@ static int sysfs_alloc_ino(ino_t *pino)
 	return rc;
 }
 
-static void sysfs_free_ino(ino_t ino)
+static void sysfs_free_ino(unsigned int ino)
 {
 	spin_lock(&sysfs_ino_lock);
 	ida_remove(&sysfs_ino_ida, ino);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 19994948ac5..661a9639570 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -79,7 +79,7 @@ struct sysfs_dirent {
 
 	unsigned short		s_flags;
 	umode_t 		s_mode;
-	ino_t			s_ino;
+	unsigned int		s_ino;
 	struct sysfs_inode_attrs *s_iattr;
 };
 
-- 
cgit v1.2.3-70-g09d2


From 524b6c5b39b931311dfe5a2f5abae2f5c9731676 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 18 Dec 2011 20:09:31 -0800
Subject: sysfs: Kill nlink counting.

Tracking the number of subdirectories requires an extra field that increases
the size of sysfs_dirent.  nlinks are not particularly interesting for sysfs
and the nlink counts are wrong when network namespaces are involved so stop
counting them, and always return nlink == 1.  Userspace already knows that
directories with nlink == 1 have an nlink count they can't use to count
subdirectories.

This reduces the size of sysfs_dirent by 8 bytes on 64bit platforms.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c   | 6 ------
 fs/sysfs/inode.c | 3 ---
 fs/sysfs/sysfs.h | 1 -
 3 files changed, 10 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 0589c9a694b..ea64d01400a 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -91,9 +91,6 @@ static int sysfs_link_sibling(struct sysfs_dirent *sd)
 	struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
 	struct rb_node *parent = NULL;
 
-	if (sysfs_type(sd) == SYSFS_DIR)
-		sd->s_parent->s_dir.subdirs++;
-
 	while (*node) {
 		struct sysfs_dirent *pos;
 		int result;
@@ -126,9 +123,6 @@ static int sysfs_link_sibling(struct sysfs_dirent *sd)
  */
 static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 {
-	if (sysfs_type(sd) == SYSFS_DIR)
-		sd->s_parent->s_dir.subdirs--;
-
 	rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4a802b4a905..0ac3e1c1a7d 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -216,9 +216,6 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
 					    iattrs->ia_secdata,
 					    iattrs->ia_secdata_len);
 	}
-
-	if (sysfs_type(sd) == SYSFS_DIR)
-		set_nlink(inode, sd->s_dir.subdirs + 2);
 }
 
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 661a9639570..6289a00287d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -19,7 +19,6 @@ struct sysfs_open_dirent;
 struct sysfs_elem_dir {
 	struct kobject		*kobj;
 
-	unsigned long		subdirs;
 	/* children rbtree starts here and goes through sd->s_rb */
 	struct rb_root		children;
 };
-- 
cgit v1.2.3-70-g09d2


From a4834c102f4a46808630cad1a545cb0706b3b0a2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 5 Jan 2012 13:06:02 +0400
Subject: tty: move pty count limiting into devpts

Let's move this stuff to the better place, where we can account pty right in
tty-indexes managing code.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/tty/pty.c | 51 ---------------------------------------------------
 fs/devpts/inode.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 03147fa31d4..d505837b347 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -21,7 +21,6 @@
 #include <linux/major.h>
 #include <linux/mm.h>
 #include <linux/init.h>
-#include <linux/sysctl.h>
 #include <linux/device.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
@@ -439,55 +438,9 @@ static inline void legacy_pty_init(void) { }
 
 /* Unix98 devices */
 #ifdef CONFIG_UNIX98_PTYS
-/*
- * sysctl support for setting limits on the number of Unix98 ptys allocated.
- * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
- */
-int pty_limit = NR_UNIX98_PTY_DEFAULT;
-static int pty_limit_min;
-static int pty_limit_max = NR_UNIX98_PTY_MAX;
-static int pty_count;
 
 static struct cdev ptmx_cdev;
 
-static struct ctl_table pty_table[] = {
-	{
-		.procname	= "max",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.data		= &pty_limit,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &pty_limit_min,
-		.extra2		= &pty_limit_max,
-	}, {
-		.procname	= "nr",
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.data		= &pty_count,
-		.proc_handler	= proc_dointvec,
-	}, 
-	{}
-};
-
-static struct ctl_table pty_kern_table[] = {
-	{
-		.procname	= "pty",
-		.mode		= 0555,
-		.child		= pty_table,
-	},
-	{}
-};
-
-static struct ctl_table pty_root_table[] = {
-	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= pty_kern_table,
-	},
-	{}
-};
-
-
 static int pty_unix98_ioctl(struct tty_struct *tty,
 			    unsigned int cmd, unsigned long arg)
 {
@@ -587,7 +540,6 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty)
 	 */
 	tty_driver_kref_get(driver);
 	tty->count++;
-	pty_count++;
 	return 0;
 err_free_mem:
 	deinitialize_tty_struct(o_tty);
@@ -601,7 +553,6 @@ err_free_tty:
 
 static void ptm_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
 {
-	pty_count--;
 }
 
 static void pts_unix98_remove(struct tty_driver *driver, struct tty_struct *tty)
@@ -760,8 +711,6 @@ static void __init unix98_pty_init(void)
 	if (tty_register_driver(pts_driver))
 		panic("Couldn't register Unix98 pts driver");
 
-	register_sysctl_table(pty_root_table);
-
 	/* Now create the /dev/ptmx special device */
 	tty_default_fops(&ptmx_fops);
 	ptmx_fops.open = ptmx_open;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c4e2a58a2e8..c2c7317d568 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -36,7 +36,52 @@
 #define DEVPTS_DEFAULT_PTMX_MODE 0000
 #define PTMX_MINOR	2
 
-extern int pty_limit;			/* Config limit on Unix98 ptys */
+/*
+ * sysctl support for setting limits on the number of Unix98 ptys allocated.
+ * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
+ */
+static int pty_limit = NR_UNIX98_PTY_DEFAULT;
+static int pty_limit_min;
+static int pty_limit_max = NR_UNIX98_PTY_MAX;
+static int pty_count;
+
+static struct ctl_table pty_table[] = {
+	{
+		.procname	= "max",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.data		= &pty_limit,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pty_limit_min,
+		.extra2		= &pty_limit_max,
+	}, {
+		.procname	= "nr",
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.data		= &pty_count,
+		.proc_handler	= proc_dointvec,
+	},
+	{}
+};
+
+static struct ctl_table pty_kern_table[] = {
+	{
+		.procname	= "pty",
+		.mode		= 0555,
+		.child		= pty_table,
+	},
+	{}
+};
+
+static struct ctl_table pty_root_table[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= pty_kern_table,
+	},
+	{}
+};
+
 static DEFINE_MUTEX(allocated_ptys_lock);
 
 static struct vfsmount *devpts_mnt;
@@ -451,6 +496,7 @@ retry:
 		mutex_unlock(&allocated_ptys_lock);
 		return -EIO;
 	}
+	pty_count++;
 	mutex_unlock(&allocated_ptys_lock);
 	return index;
 }
@@ -462,6 +508,7 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
 
 	mutex_lock(&allocated_ptys_lock);
 	ida_remove(&fsi->allocated_ptys, idx);
+	pty_count--;
 	mutex_unlock(&allocated_ptys_lock);
 }
 
@@ -558,11 +605,15 @@ void devpts_pty_kill(struct tty_struct *tty)
 static int __init init_devpts_fs(void)
 {
 	int err = register_filesystem(&devpts_fs_type);
+	struct ctl_table_header *table;
+
 	if (!err) {
+		table = register_sysctl_table(pty_root_table);
 		devpts_mnt = kern_mount(&devpts_fs_type);
 		if (IS_ERR(devpts_mnt)) {
 			err = PTR_ERR(devpts_mnt);
 			unregister_filesystem(&devpts_fs_type);
+			unregister_sysctl_table(table);
 		}
 	}
 	return err;
-- 
cgit v1.2.3-70-g09d2


From e9aba5158a80098447ff207a452a3418ae7ee386 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 5 Jan 2012 13:06:11 +0400
Subject: tty: rework pty count limiting

After adding devpts multiple-insrances sysctl kernel.pty.max limit pty count for
each devpts instance independently, while kernel.pty.nr shows total pty count.

This patch restores sysctl kernel.pty.max as global limit (4096 by default),
adds pty reseve for main devpts (mounted without "newinstance" argument),
and new sysctl to tune it: kernel.pty.reserve (1024 by default)

Also it adds devpts mount option "max=%d" to limit pty count for each devpts
instance independently. (by default NR_UNIX98_PTY_MAX == 2^20)

Thus devpts instances in containers cannot eat up all available pty even if we didn't
set any limits, while with "max" argument we can adjust limits more precisely.

Plus, now open("/dev/ptmx") return -ENOSPC in case lack of pty indexes,
this is more informative than -EIO.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/devpts/inode.c   | 34 ++++++++++++++++++++++++++++++----
 include/linux/tty.h |  1 +
 2 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c2c7317d568..1c6f908e38c 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -41,8 +41,9 @@
  * Otherwise one can eat up all kernel memory by opening /dev/ptmx repeatedly.
  */
 static int pty_limit = NR_UNIX98_PTY_DEFAULT;
+static int pty_reserve = NR_UNIX98_PTY_RESERVE;
 static int pty_limit_min;
-static int pty_limit_max = NR_UNIX98_PTY_MAX;
+static int pty_limit_max = INT_MAX;
 static int pty_count;
 
 static struct ctl_table pty_table[] = {
@@ -54,6 +55,14 @@ static struct ctl_table pty_table[] = {
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &pty_limit_min,
 		.extra2		= &pty_limit_max,
+	}, {
+		.procname	= "reserve",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.data		= &pty_reserve,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pty_limit_min,
+		.extra2		= &pty_limit_max,
 	}, {
 		.procname	= "nr",
 		.maxlen		= sizeof(int),
@@ -94,10 +103,11 @@ struct pts_mount_opts {
 	umode_t mode;
 	umode_t ptmxmode;
 	int newinstance;
+	int max;
 };
 
 enum {
-	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,
+	Opt_uid, Opt_gid, Opt_mode, Opt_ptmxmode, Opt_newinstance,  Opt_max,
 	Opt_err
 };
 
@@ -108,6 +118,7 @@ static const match_table_t tokens = {
 #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	{Opt_ptmxmode, "ptmxmode=%o"},
 	{Opt_newinstance, "newinstance"},
+	{Opt_max, "max=%d"},
 #endif
 	{Opt_err, NULL}
 };
@@ -154,6 +165,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 	opts->gid     = 0;
 	opts->mode    = DEVPTS_DEFAULT_MODE;
 	opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+	opts->max     = NR_UNIX98_PTY_MAX;
 
 	/* newinstance makes sense only on initial mount */
 	if (op == PARSE_MOUNT)
@@ -197,6 +209,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 			if (op == PARSE_MOUNT)
 				opts->newinstance = 1;
 			break;
+		case Opt_max:
+			if (match_int(&args[0], &option) ||
+			    option < 0 || option > NR_UNIX98_PTY_MAX)
+				return -EINVAL;
+			opts->max = option;
+			break;
 #endif
 		default:
 			printk(KERN_ERR "devpts: called with bogus options\n");
@@ -303,6 +321,8 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 	seq_printf(seq, ",mode=%03o", opts->mode);
 #ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
 	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
+	if (opts->max < NR_UNIX98_PTY_MAX)
+		seq_printf(seq, ",max=%d", opts->max);
 #endif
 
 	return 0;
@@ -483,6 +503,12 @@ retry:
 		return -ENOMEM;
 
 	mutex_lock(&allocated_ptys_lock);
+	if (pty_count >= pty_limit -
+			(fsi->mount_opts.newinstance ? pty_reserve : 0)) {
+		mutex_unlock(&allocated_ptys_lock);
+		return -ENOSPC;
+	}
+
 	ida_ret = ida_get_new(&fsi->allocated_ptys, &index);
 	if (ida_ret < 0) {
 		mutex_unlock(&allocated_ptys_lock);
@@ -491,10 +517,10 @@ retry:
 		return -EIO;
 	}
 
-	if (index >= pty_limit) {
+	if (index >= fsi->mount_opts.max) {
 		ida_remove(&fsi->allocated_ptys, index);
 		mutex_unlock(&allocated_ptys_lock);
-		return -EIO;
+		return -ENOSPC;
 	}
 	pty_count++;
 	mutex_unlock(&allocated_ptys_lock);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d3ebd765b54..d4077418820 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -52,6 +52,7 @@
  * hardcoded at present.)
  */
 #define NR_UNIX98_PTY_DEFAULT	4096      /* Default maximum for Unix98 ptys */
+#define NR_UNIX98_PTY_RESERVE	1024	  /* Default reserve for main devpts */
 #define NR_UNIX98_PTY_MAX	(1 << MINORBITS) /* Absolute limit */
 
 /*
-- 
cgit v1.2.3-70-g09d2


From c56d8a7362665d165ba992b6b7a8d6c13a26eafc Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vitty@altlinux.ru>
Date: Tue, 17 Jan 2012 12:17:22 +0000
Subject: sysfs: change permissions for /sys from 0755 to 0555

There is a misleading difference between /proc and /sys permissions, /proc is 0555 and /sys is 0755. But
as it is impossible to create or unlink something in /sys it would be nice to have same permissions.

Signed-off-by: Vitaly Kuznetsov <vitty@altlinux.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/mount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e34f0d99ea4..140f26a3428 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -36,7 +36,7 @@ struct sysfs_dirent sysfs_root = {
 	.s_name		= "",
 	.s_count	= ATOMIC_INIT(1),
 	.s_flags	= SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
-	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+	.s_mode		= S_IFDIR | S_IRUGO | S_IXUGO,
 	.s_ino		= 1,
 };
 
-- 
cgit v1.2.3-70-g09d2


From d6e486868cde585842d55ba3b6ec57af090fc343 Mon Sep 17 00:00:00 2001
From: Ludwig Nussel <ludwig.nussel@suse.de>
Date: Wed, 25 Jan 2012 11:52:28 +0100
Subject: debugfs: add mode, uid and gid options

Cautious admins may want to restrict access to debugfs. Currently a
manual chown/chmod e.g. in an init script is needed to achieve that.
Distributions that want to make the mount options configurable need
to add extra config files. By allowing to set the root inode's uid,
gid and mode via mount options no such hacks are needed anymore.
Instead configuration becomes straight forward via fstab.

Signed-off-by: Ludwig Nussel <ludwig.nussel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/filesystems/debugfs.txt |   5 +-
 fs/debugfs/inode.c                    | 149 +++++++++++++++++++++++++++++++++-
 2 files changed, 152 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 6872c91bce3..4e257587318 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -14,7 +14,10 @@ Debugfs is typically mounted with a command like:
 
     mount -t debugfs none /sys/kernel/debug
 
-(Or an equivalent /etc/fstab line). 
+(Or an equivalent /etc/fstab line).
+The debugfs root directory is accessible by anyone by default. To
+restrict access to the tree the "uid", "gid" and "mode" mount
+options can be used.
 
 Note that the debugfs API is exported GPL-only to modules.
 
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 956d5ddddf6..b80bc846a15 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,9 +23,13 @@
 #include <linux/debugfs.h>
 #include <linux/fsnotify.h>
 #include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
 
+#define DEBUGFS_DEFAULT_MODE	0755
+
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
@@ -125,11 +129,154 @@ static inline int debugfs_positive(struct dentry *dentry)
 	return dentry->d_inode && !d_unhashed(dentry);
 }
 
+struct debugfs_mount_opts {
+	uid_t uid;
+	gid_t gid;
+	umode_t mode;
+};
+
+enum {
+	Opt_uid,
+	Opt_gid,
+	Opt_mode,
+	Opt_err
+};
+
+static const match_table_t tokens = {
+	{Opt_uid, "uid=%u"},
+	{Opt_gid, "gid=%u"},
+	{Opt_mode, "mode=%o"},
+	{Opt_err, NULL}
+};
+
+struct debugfs_fs_info {
+	struct debugfs_mount_opts mount_opts;
+};
+
+static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
+{
+	substring_t args[MAX_OPT_ARGS];
+	int option;
+	int token;
+	char *p;
+
+	opts->mode = DEBUGFS_DEFAULT_MODE;
+
+	while ((p = strsep(&data, ",")) != NULL) {
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_uid:
+			if (match_int(&args[0], &option))
+				return -EINVAL;
+			opts->uid = option;
+			break;
+		case Opt_gid:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->gid = option;
+			break;
+		case Opt_mode:
+			if (match_octal(&args[0], &option))
+				return -EINVAL;
+			opts->mode = option & S_IALLUGO;
+			break;
+		/*
+		 * We might like to report bad mount options here;
+		 * but traditionally debugfs has ignored all mount options
+		 */
+		}
+	}
+
+	return 0;
+}
+
+static int debugfs_apply_options(struct super_block *sb)
+{
+	struct debugfs_fs_info *fsi = sb->s_fs_info;
+	struct inode *inode = sb->s_root->d_inode;
+	struct debugfs_mount_opts *opts = &fsi->mount_opts;
+
+	inode->i_mode &= ~S_IALLUGO;
+	inode->i_mode |= opts->mode;
+
+	inode->i_uid = opts->uid;
+	inode->i_gid = opts->gid;
+
+	return 0;
+}
+
+static int debugfs_remount(struct super_block *sb, int *flags, char *data)
+{
+	int err;
+	struct debugfs_fs_info *fsi = sb->s_fs_info;
+
+	err = debugfs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	debugfs_apply_options(sb);
+
+fail:
+	return err;
+}
+
+static int debugfs_show_options(struct seq_file *m, struct dentry *root)
+{
+	struct debugfs_fs_info *fsi = root->d_sb->s_fs_info;
+	struct debugfs_mount_opts *opts = &fsi->mount_opts;
+
+	if (opts->uid != 0)
+		seq_printf(m, ",uid=%u", opts->uid);
+	if (opts->gid != 0)
+		seq_printf(m, ",gid=%u", opts->gid);
+	if (opts->mode != DEBUGFS_DEFAULT_MODE)
+		seq_printf(m, ",mode=%o", opts->mode);
+
+	return 0;
+}
+
+static const struct super_operations debugfs_super_operations = {
+	.statfs		= simple_statfs,
+	.remount_fs	= debugfs_remount,
+	.show_options	= debugfs_show_options,
+};
+
 static int debug_fill_super(struct super_block *sb, void *data, int silent)
 {
 	static struct tree_descr debug_files[] = {{""}};
+	struct debugfs_fs_info *fsi;
+	int err;
+
+	save_mount_options(sb, data);
+
+	fsi = kzalloc(sizeof(struct debugfs_fs_info), GFP_KERNEL);
+	sb->s_fs_info = fsi;
+	if (!fsi) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	err = debugfs_parse_options(data, &fsi->mount_opts);
+	if (err)
+		goto fail;
+
+	err  =  simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
+	if (err)
+		goto fail;
+
+	sb->s_op = &debugfs_super_operations;
+
+	debugfs_apply_options(sb);
+
+	return 0;
 
-	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
+fail:
+	kfree(fsi);
+	sb->s_fs_info = NULL;
+	return err;
 }
 
 static struct dentry *debug_mount(struct file_system_type *fs_type,
-- 
cgit v1.2.3-70-g09d2


From d5c38b137ac8a6e3dbed13bc494d60df5b69dfc4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 31 Jan 2012 06:40:26 -0800
Subject: sysfs: Update the name hash when renaming sysfs entries

This fixes a bug introduced with sysfs name hashes where renaming a
network device appears to succeed but silently makes the sysfs files for
that network device inaccessible.

In at least one configuration this bug has stopped networking from
coming up during boot.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Tested-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ea64d01400a..dd3779cf3a3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -872,6 +872,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 
 		dup_name = sd->s_name;
 		sd->s_name = new_name;
+		sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	}
 
 	/* Move to the appropriate place in the appropriate directories rbtree. */
-- 
cgit v1.2.3-70-g09d2


From 8112b9830a056c3f42423e4e8e914ac9f7162dce Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 22 Jan 2012 23:27:00 +0900
Subject: reiserfs: fix printk typo in lbalance.c

Correct spelling "entry_cout" to "entry_count" in
fs/reiserfs/lbalance.c

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/reiserfs/lbalance.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 03d85cbf90b..b43d0155631 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -975,7 +975,7 @@ static int leaf_cut_entries(struct buffer_head *bh,
 	   remove */
 	RFALSE(!is_direntry_le_ih(ih), "10180: item is not directory item");
 	RFALSE(I_ENTRY_COUNT(ih) < from + del_count,
-	       "10185: item contains not enough entries: entry_cout = %d, from = %d, to delete = %d",
+	       "10185: item contains not enough entries: entry_count = %d, from = %d, to delete = %d",
 	       I_ENTRY_COUNT(ih), from, del_count);
 
 	if (del_count == 0)
-- 
cgit v1.2.3-70-g09d2


From 982a598ff68acad37647baba06668054568eee49 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 24 Jan 2012 02:29:36 +0900
Subject: ntfs: fix printk typos in mft.c

Correct two spelling errors "dealocate" to "deallocate"
in fs/ntfs/mft.c

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/ntfs/mft.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 382857f9c7d..862f7ff57b7 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
 			ntfs_error(vol->sb, "Failed to merge runlists for mft "
 					"bitmap.");
 			if (ntfs_cluster_free_from_rl(vol, rl2)) {
-				ntfs_error(vol->sb, "Failed to dealocate "
+				ntfs_error(vol->sb, "Failed to deallocate "
 						"allocated cluster.%s", es);
 				NVolSetErrors(vol);
 			}
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
 		ntfs_error(vol->sb, "Failed to merge runlists for mft data "
 				"attribute.");
 		if (ntfs_cluster_free_from_rl(vol, rl2)) {
-			ntfs_error(vol->sb, "Failed to dealocate clusters "
+			ntfs_error(vol->sb, "Failed to deallocate clusters "
 					"from the mft data attribute.%s", es);
 			NVolSetErrors(vol);
 		}
-- 
cgit v1.2.3-70-g09d2


From 934e7d44b810691ae5aefa3308b97a402aac1a55 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 7 Feb 2012 22:21:45 +0900
Subject: btrfs: Fix typo in free-space-cache.c

Correct spelling "cace" to "cache" in
fs/btrfs/free-space-cache.c

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/free-space-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c2f20594c9f..7f4f3025357 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1067,7 +1067,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		spin_unlock(&block_group->lock);
 		ret = 0;
 #ifdef DEBUG
-		printk(KERN_ERR "btrfs: failed to write free space cace "
+		printk(KERN_ERR "btrfs: failed to write free space cache "
 		       "for block group %llu\n", block_group->key.objectid);
 #endif
 	}
-- 
cgit v1.2.3-70-g09d2


From 42ea19790e82498e14a24e97b7cf2a83d89203b6 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 8 Feb 2012 20:39:39 +0900
Subject: jffs2: Fix typo in compr.c

Correct spelling "modul" to "module" in
fs/hffs2/compr.c

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/jffs2/compr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 5b6c9d1a2fb..96ed3c9ec3f 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -340,7 +340,7 @@ int jffs2_unregister_compressor(struct jffs2_compressor *comp)
 
 	if (comp->usecount) {
 		spin_unlock(&jffs2_compressor_list_lock);
-		printk(KERN_WARNING "JFFS2: Compressor modul is in use. Unregister failed.\n");
+		printk(KERN_WARNING "JFFS2: Compressor module is in use. Unregister failed.\n");
 		return -1;
 	}
 	list_del(&comp->list);
-- 
cgit v1.2.3-70-g09d2


From 3e93b8dfd9dd8735152e59913a2bde226f83d43e Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Sun, 5 Feb 2012 01:29:47 +0100
Subject: BTRFS: Don't include disk-io.h twice in check-integrity.c

Once should be enough.

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/btrfs/check-integrity.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b669a7d8e49..064b29bd160 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -89,7 +89,6 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "extent_io.h"
-#include "disk-io.h"
 #include "volumes.h"
 #include "print-tree.h"
 #include "locking.h"
-- 
cgit v1.2.3-70-g09d2


From 0cc785ecbf6c04c1ef01c311accee859c856a6b9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sat, 11 Feb 2012 21:35:12 +0900
Subject: cramfs: Fix typo in inode.c

Correct spelling "endianess" to "endianness" in
fs/cramfs/inode.c

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/cramfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a2ee8f9f5a3..04d51f9333d 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -257,10 +257,10 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* Do sanity checks on the superblock */
 	if (super.magic != CRAMFS_MAGIC) {
-		/* check for wrong endianess */
+		/* check for wrong endianness */
 		if (super.magic == CRAMFS_MAGIC_WEND) {
 			if (!silent)
-				printk(KERN_ERR "cramfs: wrong endianess\n");
+				printk(KERN_ERR "cramfs: wrong endianness\n");
 			goto out;
 		}
 
@@ -270,7 +270,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent)
 		mutex_unlock(&read_mutex);
 		if (super.magic != CRAMFS_MAGIC) {
 			if (super.magic == CRAMFS_MAGIC_WEND && !silent)
-				printk(KERN_ERR "cramfs: wrong endianess\n");
+				printk(KERN_ERR "cramfs: wrong endianness\n");
 			else if (!silent)
 				printk(KERN_ERR "cramfs: wrong magic\n");
 			goto out;
-- 
cgit v1.2.3-70-g09d2


From a80581d0d1b11b2d4bbb9333c1cac5416714112d Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Sat, 11 Feb 2012 05:55:58 -0800
Subject: Typos: change aditional to additional.

The below patch fixes some typos "aditional" to "additional", and also fixes
a comment with another word mispelled.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/powerpc/include/asm/keylargo.h | 2 +-
 drivers/hwmon/tmp401.c              | 2 +-
 fs/ntfs/layout.h                    | 4 ++--
 sound/pci/hda/patch_conexant.c      | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/arch/powerpc/include/asm/keylargo.h b/arch/powerpc/include/asm/keylargo.h
index fc195d0b3c3..2156315d8a9 100644
--- a/arch/powerpc/include/asm/keylargo.h
+++ b/arch/powerpc/include/asm/keylargo.h
@@ -21,7 +21,7 @@
 #define KEYLARGO_FCR4		0x48
 #define KEYLARGO_FCR5		0x4c	/* Pangea only */
 
-/* K2 aditional FCRs */
+/* K2 additional FCRs */
 #define K2_FCR6			0x34
 #define K2_FCR7			0x30
 #define K2_FCR8			0x2c
diff --git a/drivers/hwmon/tmp401.c b/drivers/hwmon/tmp401.c
index 8b9a77486d5..951442adc06 100644
--- a/drivers/hwmon/tmp401.c
+++ b/drivers/hwmon/tmp401.c
@@ -624,7 +624,7 @@ static int tmp401_probe(struct i2c_client *client,
 			goto exit_remove;
 	}
 
-	/* Register aditional tmp411 sysfs hooks */
+	/* Register additional tmp411 sysfs hooks */
 	if (data->kind == tmp411) {
 		for (i = 0; i < ARRAY_SIZE(tmp411_attr); i++) {
 			err = device_create_file(&client->dev,
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index faece719086..809c0e6d8e0 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -2008,14 +2008,14 @@ typedef struct {
  *
  * When a directory is small enough to fit inside the index root then this
  * is the only attribute describing the directory. When the directory is too
- * large to fit in the index root, on the other hand, two aditional attributes
+ * large to fit in the index root, on the other hand, two additional attributes
  * are present: an index allocation attribute, containing sub-nodes of the B+
  * directory tree (see below), and a bitmap attribute, describing which virtual
  * cluster numbers (vcns) in the index allocation attribute are in use by an
  * index block.
  *
  * NOTE: The root directory (FILE_root) contains an entry for itself. Other
- * dircetories do not contain entries for themselves, though.
+ * directories do not contain entries for themselves, though.
  */
 typedef struct {
 	ATTR_TYPE type;			/* Type of the indexed attribute. Is
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index a7a5733aa4d..51e3ed4527c 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -1643,7 +1643,7 @@ static void cxt5051_update_speaker(struct hda_codec *codec)
 	pinctl = (!spec->hp_present && spec->cur_eapd) ? PIN_OUT : 0;
 	snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
 			    pinctl);
-	/* on ideapad there is an aditional speaker (subwoofer) to mute */
+	/* on ideapad there is an additional speaker (subwoofer) to mute */
 	if (spec->ideapad)
 		snd_hda_codec_write(codec, 0x1b, 0,
 				    AC_VERB_SET_PIN_WIDGET_CONTROL,
-- 
cgit v1.2.3-70-g09d2


From 4ff16c25e2cc48cbe6956e356c38a25ac063a64d Mon Sep 17 00:00:00 2001
From: David Smith <dsmith@redhat.com>
Date: Tue, 7 Feb 2012 10:11:05 -0600
Subject: tracepoint, vfs, sched: Add exec() tracepoint

Added a minimal exec tracepoint. Exec is an important major event
in the life of a task, like fork(), clone() or exit(), all of
which we already trace.

[ We also do scheduling re-balancing during exec() - so it's useful
  from a scheduler instrumentation POV as well. ]

If you want to watch a task start up, when it gets exec'ed is a good place
to start.  With the addition of this tracepoint, exec's can be monitored
and better picture of general system activity can be obtained. This
tracepoint will also enable better process life tracking, allowing you to
answer questions like "what process keeps starting up binary X?".

This tracepoint can also be useful in ftrace filtering and trigger
conditions: i.e. starting or stopping filtering when exec is called.

Signed-off-by: David Smith <dsmith@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/4F314D19.7030504@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/exec.c                    |  9 ++++++---
 include/trace/events/sched.h | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index aeb135c7ff5..d0d20809277 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,8 @@
 #include <trace/events/task.h>
 #include "internal.h"
 
+#include <trace/events/sched.h>
+
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
 unsigned int core_pipe_limit;
@@ -1401,9 +1403,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 			 */
 			bprm->recursion_depth = depth;
 			if (retval >= 0) {
-				if (depth == 0)
-					ptrace_event(PTRACE_EVENT_EXEC,
-							old_pid);
+				if (depth == 0) {
+					trace_sched_process_exec(current, old_pid, bprm);
+					ptrace_event(PTRACE_EVENT_EXEC, old_pid);
+				}
 				put_binfmt(fmt);
 				allow_write_access(bprm->file);
 				if (bprm->file)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6ba596b07a7..e61ddfe8fe9 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -6,6 +6,7 @@
 
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
+#include <linux/binfmts.h>
 
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
@@ -275,6 +276,32 @@ TRACE_EVENT(sched_process_fork,
 		__entry->child_comm, __entry->child_pid)
 );
 
+/*
+ * Tracepoint for exec:
+ */
+TRACE_EVENT(sched_process_exec,
+
+	TP_PROTO(struct task_struct *p, pid_t old_pid,
+		 struct linux_binprm *bprm),
+
+	TP_ARGS(p, old_pid, bprm),
+
+	TP_STRUCT__entry(
+		__string(	filename,	bprm->filename	)
+		__field(	pid_t,		pid		)
+		__field(	pid_t,		old_pid		)
+	),
+
+	TP_fast_assign(
+		__assign_str(filename, bprm->filename);
+		__entry->pid		= p->pid;
+		__entry->old_pid	= p->pid;
+	),
+
+	TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
+		  __entry->pid, __entry->old_pid)
+);
+
 /*
  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
-- 
cgit v1.2.3-70-g09d2


From 93518dd2ebafcc761a8637b2877008cfd748c202 Mon Sep 17 00:00:00 2001
From: Masami Ichikawa <masami256@gmail.com>
Date: Tue, 21 Feb 2012 07:43:50 +0900
Subject: sysfs: Fix memory leak in sysfs_sd_setsecdata().

This patch fixies follwing two memory leak patterns that reported by kmemleak.
sysfs_sd_setsecdata() is called during sys_lsetxattr() operation.
It checks sd->s_iattr is NULL or not. Then if it is NULL, it calls
sysfs_init_inode_attrs() to allocate memory.
That code is this.

iattrs = sd->s_iattr;
if (!iattrs)
                iattrs = sysfs_init_inode_attrs(sd);

The iattrs recieves sysfs_init_inode_attrs()'s result,  but sd->s_iattr
doesn't know the address. so it needs to set correct address to
sd->s_iattr to free memory in other function.

unreferenced object 0xffff880250b73e60 (size 32):
  comm "systemd", pid 1, jiffies 4294683888 (age 94.553s)
  hex dump (first 32 bytes):
    73 79 73 74 65 6d 5f 75 3a 6f 62 6a 65 63 74 5f  system_u:object_
    72 3a 73 79 73 66 73 5f 74 3a 73 30 00 00 00 00  r:sysfs_t:s0....
  backtrace:
    [<ffffffff814cb1d0>] kmemleak_alloc+0x73/0x98
    [<ffffffff811270ab>] __kmalloc+0x100/0x12c
    [<ffffffff8120775a>] context_struct_to_string+0x106/0x210
    [<ffffffff81207cc1>] security_sid_to_context_core+0x10b/0x129
    [<ffffffff812090ef>] security_sid_to_context+0x10/0x12
    [<ffffffff811fb0da>] selinux_inode_getsecurity+0x7d/0xa8
    [<ffffffff811fb127>] selinux_inode_getsecctx+0x22/0x2e
    [<ffffffff811f4d62>] security_inode_getsecctx+0x16/0x18
    [<ffffffff81191dad>] sysfs_setxattr+0x96/0x117
    [<ffffffff811542f0>] __vfs_setxattr_noperm+0x73/0xd9
    [<ffffffff811543d9>] vfs_setxattr+0x83/0xa1
    [<ffffffff811544c6>] setxattr+0xcf/0x101
    [<ffffffff81154745>] sys_lsetxattr+0x6a/0x8f
    [<ffffffff814efda9>] system_call_fastpath+0x16/0x1b
    [<ffffffffffffffff>] 0xffffffffffffffff
unreferenced object 0xffff88024163c5a0 (size 96):
  comm "systemd", pid 1, jiffies 4294683888 (age 94.553s)
  hex dump (first 32 bytes):
    00 00 00 00 ed 41 00 00 00 00 00 00 00 00 00 00  .....A..........
    00 00 00 00 00 00 00 00 0c 64 42 4f 00 00 00 00  .........dBO....
  backtrace:
    [<ffffffff814cb1d0>] kmemleak_alloc+0x73/0x98
    [<ffffffff81127402>] kmem_cache_alloc_trace+0xc4/0xee
    [<ffffffff81191cbe>] sysfs_init_inode_attrs+0x2a/0x83
    [<ffffffff81191dd6>] sysfs_setxattr+0xbf/0x117
    [<ffffffff811542f0>] __vfs_setxattr_noperm+0x73/0xd9
    [<ffffffff811543d9>] vfs_setxattr+0x83/0xa1
    [<ffffffff811544c6>] setxattr+0xcf/0x101
    [<ffffffff81154745>] sys_lsetxattr+0x6a/0x8f
    [<ffffffff814efda9>] system_call_fastpath+0x16/0x1b
    [<ffffffffffffffff>] 0xffffffffffffffff
`

Signed-off-by: Masami Ichikawa <masami256@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/inode.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4291fd1617a..cc7ea5de2fd 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
 	void *old_secdata;
 	size_t old_secdata_len;
 
-	iattrs = sd->s_iattr;
-	if (!iattrs)
-		iattrs = sysfs_init_inode_attrs(sd);
-	if (!iattrs)
-		return -ENOMEM;
+	if (!sd->s_iattr) {
+		sd->s_iattr = sysfs_init_inode_attrs(sd);
+		if (!sd->s_iattr)
+			return -ENOMEM;
+	}
 
+	iattrs = sd->s_iattr;
 	old_secdata = iattrs->ia_secdata;
 	old_secdata_len = iattrs->ia_secdata_len;
 
-- 
cgit v1.2.3-70-g09d2


From fe316bf2d5847bc5dd975668671a7b1067603bc7 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 2 Mar 2012 10:38:33 +0100
Subject: block: Fix NULL pointer dereference in sd_revalidate_disk

Since 2.6.39 (1196f8b), when a driver returns -ENOMEDIUM for open(),
__blkdev_get() calls rescan_partitions() to remove
in-kernel partition structures and raise KOBJ_CHANGE uevent.

However it ends up calling driver's revalidate_disk without open
and could cause oops.

In the case of SCSI:

  process A                  process B
  ----------------------------------------------
  sys_open
    __blkdev_get
      sd_open
        returns -ENOMEDIUM
                             scsi_remove_device
                               <scsi_device torn down>
      rescan_partitions
        sd_revalidate_disk
          <oops>
Oopses are reported here:
http://marc.info/?l=linux-scsi&m=132388619710052

This patch separates the partition invalidation from rescan_partitions()
and use it for -ENOMEDIUM case.

Reported-by: Huajun Li <huajun.li.lee@gmail.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partition-generic.c | 48 +++++++++++++++++++++++++++++++++++++++--------
 fs/block_dev.c            | 16 ++++++++++++----
 include/linux/genhd.h     |  1 +
 3 files changed, 53 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/block/partition-generic.c b/block/partition-generic.c
index d06ec1c829c..6df5d6928a4 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -389,17 +389,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
 	}
 }
 
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 {
-	struct parsed_partitions *state = NULL;
 	struct disk_part_iter piter;
 	struct hd_struct *part;
-	int p, highest, res;
-rescan:
-	if (state && !IS_ERR(state)) {
-		kfree(state);
-		state = NULL;
-	}
+	int res;
 
 	if (bdev->bd_part_count)
 		return -EBUSY;
@@ -412,6 +406,24 @@ rescan:
 		delete_partition(disk, part->partno);
 	disk_part_iter_exit(&piter);
 
+	return 0;
+}
+
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+	struct parsed_partitions *state = NULL;
+	struct hd_struct *part;
+	int p, highest, res;
+rescan:
+	if (state && !IS_ERR(state)) {
+		kfree(state);
+		state = NULL;
+	}
+
+	res = drop_partitions(disk, bdev);
+	if (res)
+		return res;
+
 	if (disk->fops->revalidate_disk)
 		disk->fops->revalidate_disk(disk);
 	check_disk_size_change(disk, bdev);
@@ -515,6 +527,26 @@ rescan:
 	return 0;
 }
 
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+	int res;
+
+	if (!bdev->bd_invalidated)
+		return 0;
+
+	res = drop_partitions(disk, bdev);
+	if (res)
+		return res;
+
+	set_capacity(disk, 0);
+	check_disk_size_change(disk, bdev);
+	bdev->bd_invalidated = 0;
+	/* tell userspace that the media / partition table may have changed */
+	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+
+	return 0;
+}
+
 unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 {
 	struct address_space *mapping = bdev->bd_inode->i_mapping;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0e575d1304b..5e9f198f771 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1183,8 +1183,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			 * The latter is necessary to prevent ghost
 			 * partitions on a removed medium.
 			 */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(disk, bdev);
+			}
 			if (ret)
 				goto out_clear;
 		} else {
@@ -1214,8 +1218,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
 			/* the same as first opener case, read comment there */
-			if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
-				rescan_partitions(bdev->bd_disk, bdev);
+			if (bdev->bd_invalidated) {
+				if (!ret)
+					rescan_partitions(bdev->bd_disk, bdev);
+				else if (ret == -ENOMEDIUM)
+					invalidate_partitions(bdev->bd_disk, bdev);
+			}
 			if (ret)
 				goto out_unlock_bdev;
 		}
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index fe23ee76858..e61d3192448 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -596,6 +596,7 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf);
 
 extern int disk_expand_part_tbl(struct gendisk *disk, int target);
 extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
+extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
 extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
 						     int partno, sector_t start,
 						     sector_t len, int flags,
-- 
cgit v1.2.3-70-g09d2


From 2e5b5b3a1b7768c89fbfeca18e75f8ee377e924c Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 23 Feb 2012 17:41:27 +0900
Subject: sched: Clean up parameter passing of proc_sched_autogroup_set_nice()

Pass nice as a value to proc_sched_autogroup_set_nice().

No side effect is expected, and the variable err will be overwritten with
the return value.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/4F45FBB7.5090607@ct.jp.nec.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/proc/base.c            |  3 +--
 include/linux/sched.h     |  2 +-
 kernel/sched/auto_group.c | 12 ++++++------
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index d4548dd49b0..965d4bde3a3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1310,8 +1310,7 @@ sched_autogroup_write(struct file *file, const char __user *buf,
 	if (!p)
 		return -ESRCH;
 
-	err = nice;
-	err = proc_sched_autogroup_set_nice(p, &err);
+	err = proc_sched_autogroup_set_nice(p, nice);
 	if (err)
 		count = err;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c628a915143..c298fb9cf5a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2065,7 +2065,7 @@ extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
-extern int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice);
+extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
 #endif
 #else
 static inline void sched_autogroup_create_attach(struct task_struct *p) { }
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index e8a1f83ee0e..0984a21076a 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -195,20 +195,20 @@ __setup("noautogroup", setup_autogroup);
 
 #ifdef CONFIG_PROC_FS
 
-int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
+int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
 {
 	static unsigned long next = INITIAL_JIFFIES;
 	struct autogroup *ag;
 	int err;
 
-	if (*nice < -20 || *nice > 19)
+	if (nice < -20 || nice > 19)
 		return -EINVAL;
 
-	err = security_task_setnice(current, *nice);
+	err = security_task_setnice(current, nice);
 	if (err)
 		return err;
 
-	if (*nice < 0 && !can_nice(current, *nice))
+	if (nice < 0 && !can_nice(current, nice))
 		return -EPERM;
 
 	/* this is a heavy operation taking global locks.. */
@@ -219,9 +219,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
 	ag = autogroup_task_get(p);
 
 	down_write(&ag->lock);
-	err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
+	err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]);
 	if (!err)
-		ag->nice = *nice;
+		ag->nice = nice;
 	up_write(&ag->lock);
 
 	autogroup_kref_put(ag);
-- 
cgit v1.2.3-70-g09d2


From 4b32da2bcf1de2b7a196a0e48389d231b4472c36 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 4 Mar 2012 12:56:55 +0000
Subject: ppp: Replace uses of <linux/if_ppp.h> with <linux/ppp-ioctl.h>

Since all that include/linux/if_ppp.h does is #include <linux/ppp-ioctl.h>,
this replaces the occurrences of #include <linux/if_ppp.h> with
#include <linux/ppp-ioctl.h>.

It also corrects an error in Documentation/networking/l2tp.txt, where
it referenced include/linux/if_ppp.h as the source of some definitions
that are actually now defined in include/linux/if_pppol2tp.h.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/l2tp.txt | 2 +-
 drivers/isdn/capi/capi.c          | 2 +-
 drivers/net/ppp/ppp_async.c       | 2 +-
 drivers/net/ppp/ppp_synctty.c     | 2 +-
 drivers/net/ppp/pppoe.c           | 2 +-
 drivers/net/ppp/pppox.c           | 2 +-
 drivers/tty/ipwireless/network.c  | 2 +-
 drivers/tty/ipwireless/tty.c      | 2 +-
 fs/compat_ioctl.c                 | 2 +-
 include/linux/isdn.h              | 2 +-
 net/atm/pppoatm.c                 | 2 +-
 net/irda/irnet/irnet.h            | 2 +-
 net/l2tp/l2tp_ppp.c               | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
index e7bf3979fac..e63fc1f7bf8 100644
--- a/Documentation/networking/l2tp.txt
+++ b/Documentation/networking/l2tp.txt
@@ -111,7 +111,7 @@ When creating PPPoL2TP sockets, the application provides information
 to the driver about the socket in a socket connect() call. Source and
 destination tunnel and session ids are provided, as well as the file
 descriptor of a UDP socket. See struct pppol2tp_addr in
-include/linux/if_ppp.h. Note that zero tunnel / session ids are
+include/linux/if_pppol2tp.h. Note that zero tunnel / session ids are
 treated specially. When creating the per-tunnel PPPoL2TP management
 socket in Step 2 above, zero source and destination session ids are
 specified, which tells the driver to prepare the supplied UDP file
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index d33a70c4918..0cf05464bfb 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -25,7 +25,7 @@
 #include <linux/tty.h>
 #include <linux/netdevice.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index c6ba6438082..af95a98fd86 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -26,7 +26,7 @@
 #include <linux/poll.h>
 #include <linux/crc-ccitt.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 #include <linux/spinlock.h>
 #include <linux/init.h>
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 736a39ee05b..55e466c511d 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -39,7 +39,7 @@
 #include <linux/netdevice.h>
 #include <linux/poll.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 #include <linux/spinlock.h>
 #include <linux/completion.h>
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index bc9a4bb3198..2fa1a9b6f49 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -72,7 +72,7 @@
 #include <linux/if_pppox.h>
 #include <linux/ppp_channel.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/notifier.h>
 #include <linux/file.h>
 #include <linux/proc_fs.h>
diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c
index 8c0d170dabc..2940e9fe351 100644
--- a/drivers/net/ppp/pppox.c
+++ b/drivers/net/ppp/pppox.c
@@ -28,7 +28,7 @@
 #include <linux/init.h>
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 #include <linux/kmod.h>
 
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index f7daeea598e..57c8b481113 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -22,7 +22,7 @@
 #include <linux/ppp_channel.h>
 #include <linux/ppp_defs.h>
 #include <linux/slab.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/skbuff.h>
 
 #include "network.h"
diff --git a/drivers/tty/ipwireless/tty.c b/drivers/tty/ipwireless/tty.c
index ef92869502a..2ffa0b77770 100644
--- a/drivers/tty/ipwireless/tty.c
+++ b/drivers/tty/ipwireless/tty.c
@@ -21,7 +21,7 @@
 #include <linux/mutex.h>
 #include <linux/ppp_defs.h>
 #include <linux/if.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/sched.h>
 #include <linux/serial.h>
 #include <linux/slab.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a26bea10e81..10d8cd90ca6 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -34,7 +34,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/if_pppox.h>
 #include <linux/mtio.h>
 #include <linux/auto_fs.h>
diff --git a/include/linux/isdn.h b/include/linux/isdn.h
index 4ccf95d681b..292f27a793d 100644
--- a/include/linux/isdn.h
+++ b/include/linux/isdn.h
@@ -187,7 +187,7 @@ typedef struct {
 #endif
 
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 
 #include <linux/isdn_ppp.h>
 #endif
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index df35d9a3b5f..614d3fc47ed 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -44,7 +44,7 @@
 #include <linux/atmdev.h>
 #include <linux/capability.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 #include <linux/atmppp.h>
 
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 979ecb2435a..564eb0b8afa 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -254,7 +254,7 @@
 #include <linux/init.h>
 
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/ppp_channel.h>
 
 #include <net/irda/irda.h>
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 8a90d756c90..96bc7a67585 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -82,7 +82,7 @@
 #include <net/sock.h>
 #include <linux/ppp_channel.h>
 #include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
+#include <linux/ppp-ioctl.h>
 #include <linux/file.h>
 #include <linux/hash.h>
 #include <linux/sort.h>
-- 
cgit v1.2.3-70-g09d2


From c097b2ca5140249abc3fb5ae9a545c35125ae8d0 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Mon, 5 Mar 2012 15:08:06 -0800
Subject: writeback: fix fn name in writeback_inodes_sb_nr_if_idle() comment
 header

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f855916657b..82e959da686 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1284,7 +1284,7 @@ int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason)
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
 /**
- * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * writeback_inodes_sb_nr_if_idle	-	start writeback if none underway
  * @sb: the superblock
  * @nr: the number of pages to write
  * @reason: reason why some writeback work was initiated
-- 
cgit v1.2.3-70-g09d2


From 54d20f006ceff1f2f1e69d0e54049b6c0765c039 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 8 Mar 2012 13:03:10 -0800
Subject: Revert "sysfs: Kill nlink counting."

This reverts commit 524b6c5b39b931311dfe5a2f5abae2f5c9731676.

It has shown to break userspace tools, which is not acceptable.

Reported-by: Jiri Slaby <jslaby@suse.cz>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c   | 6 ++++++
 fs/sysfs/inode.c | 3 +++
 fs/sysfs/sysfs.h | 1 +
 3 files changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index dd3779cf3a3..2a7a3f5d1ca 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -91,6 +91,9 @@ static int sysfs_link_sibling(struct sysfs_dirent *sd)
 	struct rb_node **node = &sd->s_parent->s_dir.children.rb_node;
 	struct rb_node *parent = NULL;
 
+	if (sysfs_type(sd) == SYSFS_DIR)
+		sd->s_parent->s_dir.subdirs++;
+
 	while (*node) {
 		struct sysfs_dirent *pos;
 		int result;
@@ -123,6 +126,9 @@ static int sysfs_link_sibling(struct sysfs_dirent *sd)
  */
 static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 {
+	if (sysfs_type(sd) == SYSFS_DIR)
+		sd->s_parent->s_dir.subdirs--;
+
 	rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children);
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cc7ea5de2fd..feb2d69396c 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -217,6 +217,9 @@ static void sysfs_refresh_inode(struct sysfs_dirent *sd, struct inode *inode)
 					    iattrs->ia_secdata,
 					    iattrs->ia_secdata_len);
 	}
+
+	if (sysfs_type(sd) == SYSFS_DIR)
+		set_nlink(inode, sd->s_dir.subdirs + 2);
 }
 
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6289a00287d..661a9639570 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -19,6 +19,7 @@ struct sysfs_open_dirent;
 struct sysfs_elem_dir {
 	struct kobject		*kobj;
 
+	unsigned long		subdirs;
 	/* children rbtree starts here and goes through sd->s_rb */
 	struct rb_root		children;
 };
-- 
cgit v1.2.3-70-g09d2


From 2f2d76cc3e938389feee671b46252dde6880b3b7 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.de>
Date: Thu, 8 Mar 2012 05:55:59 +0000
Subject: dlm: Do not allocate a fd for peeloff

avoids allocating a fd that a) propagates to every kernel thread and
usermodehelper b) is not properly released.

References: http://article.gmane.org/gmane.linux.network.drbd/22529
Signed-off-by: Benjamin Poirier <bpoirier@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 0b3109ee425..ca0c59a4246 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,6 +52,7 @@
 #include <linux/mutex.h>
 #include <linux/sctp.h>
 #include <linux/slab.h>
+#include <net/sctp/sctp.h>
 #include <net/sctp/user.h>
 #include <net/ipv6.h>
 
@@ -474,9 +475,6 @@ static void process_sctp_notification(struct connection *con,
 			int prim_len, ret;
 			int addr_len;
 			struct connection *new_con;
-			sctp_peeloff_arg_t parg;
-			int parglen = sizeof(parg);
-			int err;
 
 			/*
 			 * We get this before any data for an association.
@@ -525,23 +523,19 @@ static void process_sctp_notification(struct connection *con,
 				return;
 
 			/* Peel off a new sock */
-			parg.associd = sn->sn_assoc_change.sac_assoc_id;
-			ret = kernel_getsockopt(con->sock, IPPROTO_SCTP,
-						SCTP_SOCKOPT_PEELOFF,
-						(void *)&parg, &parglen);
+			sctp_lock_sock(con->sock->sk);
+			ret = sctp_do_peeloff(con->sock->sk,
+				sn->sn_assoc_change.sac_assoc_id,
+				&new_con->sock);
+			sctp_release_sock(con->sock->sk);
 			if (ret < 0) {
 				log_print("Can't peel off a socket for "
 					  "connection %d to node %d: err=%d",
-					  parg.associd, nodeid, ret);
-				return;
-			}
-			new_con->sock = sockfd_lookup(parg.sd, &err);
-			if (!new_con->sock) {
-				log_print("sockfd_lookup error %d", err);
+					  (int)sn->sn_assoc_change.sac_assoc_id,
+					  nodeid, ret);
 				return;
 			}
 			add_sock(new_con->sock, new_con);
-			sockfd_put(new_con->sock);
 
 			log_print("connecting to %d sctp association %d",
 				 nodeid, (int)sn->sn_assoc_change.sac_assoc_id);
-- 
cgit v1.2.3-70-g09d2


From bfcfaa77bdf0f775263e906015982a608df01c76 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 6 Mar 2012 11:16:17 -0800
Subject: vfs: use 'unsigned long' accesses for dcache name comparison and
 hashing

Ok, this is hacky, and only works on little-endian machines with goo
unaligned handling.  And even then only with CONFIG_DEBUG_PAGEALLOC
disabled, since it can access up to 7 bytes after the pathname.

But it runs like a bat out of hell.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig |   1 +
 fs/Kconfig       |   4 ++
 fs/dcache.c      |  23 +++++++++++
 fs/namei.c       | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 150 insertions(+)

(limited to 'fs')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5bed94e189f..09675d3e0ac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -82,6 +82,7 @@ config X86
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
+	select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS)
diff --git a/fs/Kconfig b/fs/Kconfig
index d621f02a3f9..aa195265362 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -4,6 +4,10 @@
 
 menu "File systems"
 
+# Use unaligned word dcache accesses
+config DCACHE_WORD_ACCESS
+       bool
+
 if BLOCK
 
 source "fs/ext2/Kconfig"
diff --git a/fs/dcache.c b/fs/dcache.c
index bcbdb33fcc2..ffd47a16d87 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -144,6 +144,28 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
 static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 				const unsigned char *ct, size_t tcount)
 {
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+	unsigned long a,b,mask;
+
+	if (unlikely(scount != tcount))
+		return 1;
+
+	for (;;) {
+		a = *(unsigned long *)cs;
+		b = *(unsigned long *)ct;
+		if (tcount < sizeof(unsigned long))
+			break;
+		if (unlikely(a != b))
+			return 1;
+		cs += sizeof(unsigned long);
+		ct += sizeof(unsigned long);
+		tcount -= sizeof(unsigned long);
+		if (!tcount)
+			return 0;
+	}
+	mask = ~(~0ul << tcount*8);
+	return unlikely(!!((a ^ b) & mask));
+#else
 	if (scount != tcount)
 		return 1;
 
@@ -155,6 +177,7 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount,
 		tcount--;
 	} while (tcount);
 	return 0;
+#endif
 }
 
 static void __d_free(struct rcu_head *head)
diff --git a/fs/namei.c b/fs/namei.c
index e2ba62820a0..378497a744b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1374,6 +1374,126 @@ static inline int can_lookup(struct inode *inode)
 	return 1;
 }
 
+/*
+ * We can do the critical dentry name comparison and hashing
+ * operations one word at a time, but we are limited to:
+ *
+ * - Architectures with fast unaligned word accesses. We could
+ *   do a "get_unaligned()" if this helps and is sufficiently
+ *   fast.
+ *
+ * - Little-endian machines (so that we can generate the mask
+ *   of low bytes efficiently). Again, we *could* do a byte
+ *   swapping load on big-endian architectures if that is not
+ *   expensive enough to make the optimization worthless.
+ *
+ * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
+ *   do not trap on the (extremely unlikely) case of a page
+ *   crossing operation.
+ *
+ * - Furthermore, we need an efficient 64-bit compile for the
+ *   64-bit case in order to generate the "number of bytes in
+ *   the final mask". Again, that could be replaced with a
+ *   efficient population count instruction or similar.
+ */
+#ifdef CONFIG_DCACHE_WORD_ACCESS
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+	return mask*0x0001020304050608 >> 56;
+}
+
+static inline unsigned int fold_hash(unsigned long hash)
+{
+	hash += hash >> (8*sizeof(int));
+	return hash;
+}
+
+#else	/* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+#define fold_hash(x) (x)
+
+#endif
+
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+	unsigned long a, mask;
+	unsigned long hash = 0;
+
+	for (;;) {
+		a = *(unsigned long *)name;
+		hash *= 9;
+		if (len < sizeof(unsigned long))
+			break;
+		hash += a;
+		name += sizeof(unsigned long);
+		len -= sizeof(unsigned long);
+		if (!len)
+			goto done;
+	}
+	mask = ~(~0ul << len*8);
+	hash += mask & a;
+done:
+	return fold_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+#define ONEBYTES	0x0101010101010101ul
+#define SLASHBYTES	0x2f2f2f2f2f2f2f2ful
+#define HIGHBITS	0x8080808080808080ul
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+	return ((a - ONEBYTES) & ~a) & HIGHBITS;
+}
+
+/*
+ * Calculate the length and hash of the path component, and
+ * return the length of the component;
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+	unsigned long a, mask, hash, len;
+
+	hash = a = 0;
+	len = -sizeof(unsigned long);
+	do {
+		hash = (hash + a) * 9;
+		len += sizeof(unsigned long);
+		a = *(unsigned long *)(name+len);
+		/* Do we have any NUL or '/' bytes in this word? */
+		mask = has_zero(a) | has_zero(a ^ SLASHBYTES);
+	} while (!mask);
+
+	/* The mask *below* the first high bit set */
+	mask = (mask - 1) & ~mask;
+	mask >>= 7;
+	hash += a & mask;
+	*hashp = fold_hash(hash);
+
+	return len + count_masked_bytes(mask);
+}
+
+#else
+
 unsigned int full_name_hash(const unsigned char *name, unsigned int len)
 {
 	unsigned long hash = init_name_hash();
@@ -1402,6 +1522,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp)
 	return len;
 }
 
+#endif
+
 /*
  * Name resolution.
  * This is the basic name resolution function, turning a pathname into
-- 
cgit v1.2.3-70-g09d2


From 2c724fb92732c0b2a5629eb8af74e82eb62ac947 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 16 Mar 2012 10:28:07 +0000
Subject: afs: Read of file returns EBADMSG

A read of a large file on an afs mount failed:

# cat junk.file > /dev/null
cat: junk.file: Bad message

Looking at the trace, call->offset wrapped since it is only an
unsigned short. In afs_extract_data:

        _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
...

        if (call->offset < count) {
                if (last) {
                        _leave(" = -EBADMSG [%d < %zu]", call->offset, count);
                        return -EBADMSG;
                }

Which matches the trace:

[cat   ] ==> afs_extract_data({65132},{524},1,,65536)
[cat   ] <== afs_extract_data() = -EBADMSG [0 < 65536]

call->offset went from 65132 to 0. Fix this by making call->offset an
unsigned int.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d2b0888126d..a306bb6d88d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
 	unsigned		reply_size;	/* current size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
 	unsigned		last_to;	/* amount of mapping[last] */
-	unsigned short		offset;		/* offset into received data store */
+	unsigned		offset;		/* offset into received data store */
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
-- 
cgit v1.2.3-70-g09d2


From c0173863528a8c9212c53e080d63a1aaae5ef4f4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 16 Mar 2012 10:28:19 +0000
Subject: afs: Remote abort can cause BUG in rxrpc code

When writing files to afs I sometimes hit a BUG:

kernel BUG at fs/afs/rxrpc.c:179!

With a backtrace of:

	afs_free_call
	afs_make_call
	afs_fs_store_data
	afs_vnode_store_data
	afs_write_back_from_locked_page
	afs_writepages_region
	afs_writepages

The cause is:

	ASSERT(skb_queue_empty(&call->rx_queue));

Looking at a tcpdump of the session the abort happens because we
are exceeding our disk quota:

	rx abort fs reply store-data error diskquota exceeded (32)

So the abort error is valid. We hit the BUG because we haven't
freed all the resources for the call.

By freeing any skbs in call->rx_queue before calling afs_free_call
we avoid hitting leaking memory and avoid hitting the BUG.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/rxrpc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323aebb..8ad8c2a0703 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 	struct msghdr msg;
 	struct kvec iov[1];
 	int ret;
+	struct sk_buff *skb;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 error_do_abort:
 	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+	while ((skb = skb_dequeue(&call->rx_queue)))
+		afs_free_skb(skb);
 	rxrpc_kernel_end_call(rxcall);
 	call->rxcall = NULL;
 error_kill_call:
-- 
cgit v1.2.3-70-g09d2


From 3d777a64066f3b9db8a94834aaed6a9cf09808fd Mon Sep 17 00:00:00 2001
From: Haogang Chen <haogangchen@gmail.com>
Date: Fri, 16 Mar 2012 17:08:38 -0700
Subject: nilfs2: clamp ns_r_segments_percentage to [1, 99]

ns_r_segments_percentage is read from the disk.  Bogus or malicious
value could cause integer overflow and malfunction due to meaningless
disk usage calculation.  This patch reports error when mounting such
bogus volumes.

Signed-off-by: Haogang Chen <haogangchen@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/the_nilfs.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d3271409437..8a759016c2e 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -409,6 +409,12 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
 	nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
 	nilfs->ns_r_segments_percentage =
 		le32_to_cpu(sbp->s_r_segments_percentage);
+	if (nilfs->ns_r_segments_percentage < 1 ||
+	    nilfs->ns_r_segments_percentage > 99) {
+		printk(KERN_ERR "NILFS: invalid reserved segments percentage.\n");
+		return -EINVAL;
+	}
+
 	nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
 	nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From d7178c79d9b7c5518f9943188091a75fc6ce0675 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Fri, 16 Mar 2012 17:08:39 -0700
Subject: nilfs2: fix NULL pointer dereference in nilfs_load_super_block()

According to the report from Slicky Devil, nilfs caused kernel oops at
nilfs_load_super_block function during mount after he shrank the
partition without resizing the filesystem:

 BUG: unable to handle kernel NULL pointer dereference at 00000048
 IP: [<d0d7a08e>] nilfs_load_super_block+0x17e/0x280 [nilfs2]
 *pde = 00000000
 Oops: 0000 [#1] PREEMPT SMP
 ...
 Call Trace:
  [<d0d7a87b>] init_nilfs+0x4b/0x2e0 [nilfs2]
  [<d0d6f707>] nilfs_mount+0x447/0x5b0 [nilfs2]
  [<c0226636>] mount_fs+0x36/0x180
  [<c023d961>] vfs_kern_mount+0x51/0xa0
  [<c023ddae>] do_kern_mount+0x3e/0xe0
  [<c023f189>] do_mount+0x169/0x700
  [<c023fa9b>] sys_mount+0x6b/0xa0
  [<c04abd1f>] sysenter_do_call+0x12/0x28
 Code: 53 18 8b 43 20 89 4b 18 8b 4b 24 89 53 1c 89 43 24 89 4b 20 8b 43
 20 c7 43 2c 00 00 00 00 23 75 e8 8b 50 68 89 53 28 8b 54 b3 20 <8b> 72
 48 8b 7a 4c 8b 55 08 89 b3 84 00 00 00 89 bb 88 00 00 00
 EIP: [<d0d7a08e>] nilfs_load_super_block+0x17e/0x280 [nilfs2] SS:ESP 0068:ca9bbdcc
 CR2: 0000000000000048

This turned out due to a defect in an error path which runs if the
calculated location of the secondary super block was invalid.

This patch fixes it and eliminates the reported oops.

Reported-by: Slicky Devil <slicky.dvl@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Tested-by: Slicky Devil <slicky.dvl@gmail.com>
Cc: <stable@vger.kernel.org>	[2.6.30+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/the_nilfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8a759016c2e..501b7f8b739 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -521,6 +521,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
 		brelse(sbh[1]);
 		sbh[1] = NULL;
 		sbp[1] = NULL;
+		valid[1] = 0;
 		swp = 0;
 	}
 	if (!valid[swp]) {
-- 
cgit v1.2.3-70-g09d2


From 93dc6107a76daed81c07f50215fa6ae77691634f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Fri, 16 Mar 2012 16:34:03 -0400
Subject: Don't limit non-nested epoll paths

Commit 28d82dc1c4ed ("epoll: limit paths") that I did to limit the
number of possible wakeup paths in epoll is causing a few applications
to longer work (dovecot for one).

The original patch is really about limiting the amount of epoll nesting
(since epoll fds can be attached to other fds). Thus, we probably can
allow an unlimited number of paths of depth 1. My current patch limits
it at 1000. And enforce the limits on paths that have a greater depth.

This is captured in: https://bugzilla.redhat.com/show_bug.cgi?id=681578

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventpoll.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ea54cdef04d..4d9d3a45e35 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -988,6 +988,10 @@ static int path_count[PATH_ARR_SIZE];
 
 static int path_count_inc(int nests)
 {
+	/* Allow an arbitrary number of depth 1 paths */
+	if (nests == 0)
+		return 0;
+
 	if (++path_count[nests] > path_limits[nests])
 		return -1;
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 6d7d1a0dc735ea8412769edae7154885021107a9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 19 Mar 2012 16:19:53 -0700
Subject: vfs: get rid of batshit-insane pointless dentry hash calculations

For some odd historical reason, the final mixing round for the dentry
cache hash table lookup had an insane "xor with big constant" logic.  In
two places.

The big constant that is being xor'ed is GOLDEN_RATIO_PRIME, which is a
fairly random-looking number that is designed to be *multiplied* with so
that the bits get spread out over a whole long-word.

But xor'ing with it is insane.  It doesn't really even change the hash -
it really only shifts the hash around in the hash table.  To make
matters worse, the insane big constant is different on 32-bit and 64-bit
builds, even though the name hash bits we use are always 32-bit (and the
bits from the pointer we mix in effectively are too).

It's all total voodoo programming, in other words.

Now, some testing and analysis of the hash chains shows that the rest of
the hash function seems to be fairly good.  It does pick the right bits
of the parent dentry pointer, for example, and while it's generally a
bad idea to use an xor to mix down the upper bits (because if there is a
repeating pattern, the xor can cause "destructive interference"), it
seems to not have been a disaster.

For example, replacing the hash with the normal "hash_long()" code (that
uses the GOLDEN_RATIO_PRIME constant correctly, btw) actually just makes
the hash worse.  The hand-picked hash knew which bits of the pointer had
the highest entropy, and hash_long() ends up mixing bits less optimally
at least in some trivial tests.

So the hash function overall seems fine, it just has that really odd
"shift result around by a constant xor".

So get rid of the silly xor, and replace the down-mixing of the bits
with an add instead of an xor that tends to not have the same kind of
destructive interference issues.  Some stats on the resulting hash
chains shows that they look statistically identical before and after,
but the code is simpler and no longer makes you go "WTF?".

Also, the incoming hash really is just "unsigned int", not a long, and
there's no real point to worry about the high 26 bits of the dentry
pointer for the 64-bit case, because they are all going to be identical
anyway.

So also change the hashing to be done in the more natural 'unsigned int'
that is the real size of the actual hashed data anyway.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index bcbdb33fcc2..5f00a6f63c9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -105,10 +105,10 @@ static unsigned int d_hash_shift __read_mostly;
 static struct hlist_bl_head *dentry_hashtable __read_mostly;
 
 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
-					unsigned long hash)
+					unsigned int hash)
 {
-	hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
-	hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+	hash += (unsigned long) parent / L1_CACHE_BYTES;
+	hash = hash + (hash >> D_HASHBITS);
 	return dentry_hashtable + (hash & D_HASHMASK);
 }
 
-- 
cgit v1.2.3-70-g09d2


From f1f996b66cc3908a8f5ffccc2ff41840e92f3b10 Mon Sep 17 00:00:00 2001
From: Laura Vasilescu <laura@rosedu.org>
Date: Mon, 19 Mar 2012 15:41:15 +0200
Subject: kcore: fix spelling in read_kcore() comment

Signed-off-by: Laura Vasilescu <laura@rosedu.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 fs/proc/kcore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d245cb23dd7..e5e69aff6c6 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -513,7 +513,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 
 				n = copy_to_user(buffer, (char *)start, tsz);
 				/*
-				 * We cannot distingush between fault on source
+				 * We cannot distinguish between fault on source
 				 * and fault on destination. When this happens
 				 * we clear too and hope it will trigger the
 				 * EFAULT again.
-- 
cgit v1.2.3-70-g09d2


From ad2a8e6078a16d3b61b530f1447110841c36ae56 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 20 Mar 2012 16:58:06 +0000
Subject: AFS: checking wrong bit in afs_readpages()

We should be testing "if (vnode->flags & (1 << 4))" instead of
"if (vnode->flags & 4) {".  The current test checks if the data was
modified instead of deleted.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 14d89fa58fe..8f6e9234d56 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -251,7 +251,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
 	ASSERT(key != NULL);
 
 	vnode = AFS_FS_I(mapping->host);
-	if (vnode->flags & AFS_VNODE_DELETED) {
+	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
 		_leave(" = -ESTALE");
 		return -ESTALE;
 	}
-- 
cgit v1.2.3-70-g09d2


From e636825346b36a07ccfc8e30946d52855e21f681 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 19 Mar 2012 17:03:22 +0100
Subject: exit_signal: simplify the "we have changed execution domain" logic

exit_notify() checks "tsk->self_exec_id != tsk->parent_exec_id"
to handle the "we have changed execution domain" case.

We can change do_thread() to always set ->exit_signal = SIGCHLD
and remove this check to simplify the code.

We could change setup_new_exec() instead, this looks more logical
because it increments ->self_exec_id. But note that de_thread()
already resets ->exit_signal if it changes the leader, let's keep
both changes close to each other.

Note that we change ->exit_signal lockless, this changes the rules.
Thereafter ->exit_signal is not stable under tasklist but this is
fine, the only possible change is OLDSIG -> SIGCHLD. This can race
with eligible_child() but the race is harmless. We can race with
reparent_leader() which changes our ->exit_signal in parallel, but
it does the same change to SIGCHLD.

The noticeable user-visible change is that the execing task is not
"visible" to do_wait()->eligible_child(__WCLONE) right after exec.
To me this looks more logical, and this is consistent with mt case.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c     | 3 +++
 kernel/exit.c | 7 +------
 2 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index b0695a9900e..1e94d2263ae 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -977,6 +977,9 @@ static int de_thread(struct task_struct *tsk)
 	sig->notify_count = 0;
 
 no_thread_group:
+	/* we have changed execution domain */
+	tsk->exit_signal = SIGCHLD;
+
 	if (current->mm)
 		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 752d2c0abd1..51ac4ced131 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -827,14 +827,9 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	 * If the parent exec id doesn't match the exec id we saved
 	 * when we started then we know the parent has changed security
 	 * domain.
-	 *
-	 * If our self_exec id doesn't match our parent_exec_id then
-	 * we have changed execution domain as these two values started
-	 * the same after a fork.
 	 */
 	if (thread_group_leader(tsk) && tsk->exit_signal != SIGCHLD &&
-	    (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
-	     tsk->self_exec_id != tsk->parent_exec_id))
+	    tsk->parent_exec_id != tsk->real_parent->self_exec_id)
 		tsk->exit_signal = SIGCHLD;
 
 	if (unlikely(tsk->ptrace)) {
-- 
cgit v1.2.3-70-g09d2


From 701085b219016d38f105b031381b9cee6200253a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 19 Mar 2012 17:04:01 +0100
Subject: exec: move de_thread()->setmax_mm_hiwater_rss() into exec_mmap()

Minor cleanup. de_thread()->setmax_mm_hiwater_rss() looks a bit
strange, move it into exec_mmap() which plays with old_mm.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index 1e94d2263ae..95551c6da09 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -850,6 +850,7 @@ static int exec_mmap(struct mm_struct *mm)
 	if (old_mm) {
 		up_read(&old_mm->mmap_sem);
 		BUG_ON(active_mm != old_mm);
+		setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
 		mm_update_next_owner(old_mm);
 		mmput(old_mm);
 		return 0;
@@ -980,9 +981,6 @@ no_thread_group:
 	/* we have changed execution domain */
 	tsk->exit_signal = SIGCHLD;
 
-	if (current->mm)
-		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
-
 	exit_itimers(sig);
 	flush_itimer_signals();
 
-- 
cgit v1.2.3-70-g09d2