From 7c52b166c588c98cf3d2b2e7e6a0468a98e84d0d Mon Sep 17 00:00:00 2001
From: Robert Peterson <rpeterso@redhat.com>
Date: Fri, 16 Mar 2007 10:26:37 +0000
Subject: [GFS2] Add gfs2_tool lockdump support to gfs2 (bz 228540)

The attached patch resolves bz 228540.  This adds the capability
for gfs2 to dump gfs2 locks through the debugfs file system.
This used to exist in gfs1 as "gfs_tool lockdump" but it's missing from
gfs2 because all the ioctls were stripped out.  Please see the bugzilla
for more history about the fix.  This patch is also attached to the bugzilla
record.

The patch is against Steve Whitehouse's latest nmw git tree kernel
(2.6.21-rc1) and has been tested on system trin-10.

Signed-off-by: Robert Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c      | 314 ++++++++++++++++++++++++++++++++++++++++-----------
 fs/gfs2/glock.h      |   4 +
 fs/gfs2/incore.h     |   1 +
 fs/gfs2/main.c       |   3 +
 fs/gfs2/ops_fstype.c |   3 +
 5 files changed, 262 insertions(+), 63 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 12accb08fe0..9f203ef4da6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -23,6 +23,8 @@
 #include <linux/module.h>
 #include <linux/rwsem.h>
 #include <asm/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -40,14 +42,22 @@ struct gfs2_gl_hash_bucket {
         struct hlist_head hb_list;
 };
 
+struct glock_iter {
+	int hash;                     /* hash bucket index         */
+	struct gfs2_sbd *sdp;         /* incore superblock         */
+	struct gfs2_glock *gl;        /* current glock struct      */
+	struct hlist_head *hb_list;   /* current hash bucket ptr   */
+	struct seq_file *seq;         /* sequence file for debugfs */
+	char string[512];             /* scratch space             */
+};
+
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
-static int dump_glock(struct gfs2_glock *gl);
-static int dump_inode(struct gfs2_inode *ip);
 static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
+static struct dentry *gfs2_root;
 
 #define GFS2_GL_HASH_SHIFT      15
 #define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
@@ -1109,6 +1119,20 @@ find_holder_by_owner(struct list_head *head, struct task_struct *owner)
 	return NULL;
 }
 
+static void print_dbg(struct glock_iter *gi, const char *fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	if (gi) {
+		vsprintf(gi->string, fmt, args);
+		seq_printf(gi->seq, gi->string);
+	}
+	else
+		vprintk(fmt, args);
+	va_end(args);
+}
+
 /**
  * add_to_queue - Add a holder to the wait queue (but look for recursion)
  * @gh: the holder structure to add
@@ -1849,31 +1873,32 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int dump_holder(char *str, struct gfs2_holder *gh)
+static int dump_holder(struct glock_iter *gi, char *str,
+		       struct gfs2_holder *gh)
 {
 	unsigned int x;
-	int error = -ENOBUFS;
 
-	printk(KERN_INFO "  %s\n", str);
-	printk(KERN_INFO "    owner = %ld\n",
+	print_dbg(gi, "  %s\n", str);
+	print_dbg(gi, "    owner = %ld\n",
 		   (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
-	printk(KERN_INFO "    gh_state = %u\n", gh->gh_state);
-	printk(KERN_INFO "    gh_flags =");
+	print_dbg(gi, "    gh_state = %u\n", gh->gh_state);
+	print_dbg(gi, "    gh_flags =");
 	for (x = 0; x < 32; x++)
 		if (gh->gh_flags & (1 << x))
-			printk(" %u", x);
-	printk(" \n");
-	printk(KERN_INFO "    error = %d\n", gh->gh_error);
-	printk(KERN_INFO "    gh_iflags =");
+			print_dbg(gi, " %u", x);
+	print_dbg(gi, " \n");
+	print_dbg(gi, "    error = %d\n", gh->gh_error);
+	print_dbg(gi, "    gh_iflags =");
 	for (x = 0; x < 32; x++)
 		if (test_bit(x, &gh->gh_iflags))
-			printk(" %u", x);
-	printk(" \n");
-	print_symbol(KERN_INFO "    initialized at: %s\n", gh->gh_ip);
-
-	error = 0;
+			print_dbg(gi, " %u", x);
+	print_dbg(gi, " \n");
+	if (gi)
+		print_dbg(gi, "    initialized at: 0x%x\n", gh->gh_ip);
+	else
+		print_symbol(KERN_INFO "    initialized at: %s\n", gh->gh_ip);
 
-	return error;
+	return 0;
 }
 
 /**
@@ -1883,25 +1908,20 @@ static int dump_holder(char *str, struct gfs2_holder *gh)
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int dump_inode(struct gfs2_inode *ip)
+static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
 {
 	unsigned int x;
-	int error = -ENOBUFS;
 
-	printk(KERN_INFO "  Inode:\n");
-	printk(KERN_INFO "    num = %llu %llu\n",
-		    (unsigned long long)ip->i_num.no_formal_ino,
-		    (unsigned long long)ip->i_num.no_addr);
-	printk(KERN_INFO "    type = %u\n", IF2DT(ip->i_inode.i_mode));
-	printk(KERN_INFO "    i_flags =");
+	print_dbg(gi, "  Inode:\n");
+	print_dbg(gi, "    num = %llu/%llu\n",
+		    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+	print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
+	print_dbg(gi, "    i_flags =");
 	for (x = 0; x < 32; x++)
 		if (test_bit(x, &ip->i_flags))
-			printk(" %u", x);
-	printk(" \n");
-
-	error = 0;
-
-	return error;
+			print_dbg(gi, " %u", x);
+	print_dbg(gi, " \n");
+	return 0;
 }
 
 /**
@@ -1912,7 +1932,7 @@ static int dump_inode(struct gfs2_inode *ip)
  * Returns: 0 on success, -ENOBUFS when we run out of space
  */
 
-static int dump_glock(struct gfs2_glock *gl)
+static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh;
 	unsigned int x;
@@ -1920,66 +1940,66 @@ static int dump_glock(struct gfs2_glock *gl)
 
 	spin_lock(&gl->gl_spin);
 
-	printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
-	       (unsigned long long)gl->gl_name.ln_number);
-	printk(KERN_INFO "  gl_flags =");
+	print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type,
+		   (unsigned long long)gl->gl_name.ln_number);
+	print_dbg(gi, "  gl_flags =");
 	for (x = 0; x < 32; x++) {
 		if (test_bit(x, &gl->gl_flags))
-			printk(" %u", x);
-	}
-	printk(" \n");
-	printk(KERN_INFO "  gl_ref = %d\n", atomic_read(&gl->gl_ref));
-	printk(KERN_INFO "  gl_state = %u\n", gl->gl_state);
-	printk(KERN_INFO "  gl_owner = %s\n", gl->gl_owner->comm);
-	print_symbol(KERN_INFO "  gl_ip = %s\n", gl->gl_ip);
-	printk(KERN_INFO "  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
-	printk(KERN_INFO "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
-	printk(KERN_INFO "  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
-	printk(KERN_INFO "  object = %s\n", (gl->gl_object) ? "yes" : "no");
-	printk(KERN_INFO "  le = %s\n",
+			print_dbg(gi, " %u", x);
+	}
+	print_dbg(gi, " \n");
+	print_dbg(gi, "  gl_ref = %d\n", atomic_read(&gl->gl_ref));
+	print_dbg(gi, "  gl_state = %u\n", gl->gl_state);
+	print_dbg(gi, "  gl_owner = %s\n", gl->gl_owner->comm);
+	print_dbg(gi, "  gl_ip = %lu\n", gl->gl_ip);
+	print_dbg(gi, "  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
+	print_dbg(gi, "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
+	print_dbg(gi, "  lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
+	print_dbg(gi, "  object = %s\n", (gl->gl_object) ? "yes" : "no");
+	print_dbg(gi, "  le = %s\n",
 		   (list_empty(&gl->gl_le.le_list)) ? "no" : "yes");
-	printk(KERN_INFO "  reclaim = %s\n",
-		    (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
+	print_dbg(gi, "  reclaim = %s\n",
+		   (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
 	if (gl->gl_aspace)
-		printk(KERN_INFO "  aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
-		       gl->gl_aspace->i_mapping->nrpages);
+		print_dbg(gi, "  aspace = 0x%p nrpages = %lu\n", gl->gl_aspace,
+			   gl->gl_aspace->i_mapping->nrpages);
 	else
-		printk(KERN_INFO "  aspace = no\n");
-	printk(KERN_INFO "  ail = %d\n", atomic_read(&gl->gl_ail_count));
+		print_dbg(gi, "  aspace = no\n");
+	print_dbg(gi, "  ail = %d\n", atomic_read(&gl->gl_ail_count));
 	if (gl->gl_req_gh) {
-		error = dump_holder("Request", gl->gl_req_gh);
+		error = dump_holder(gi, "Request", gl->gl_req_gh);
 		if (error)
 			goto out;
 	}
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		error = dump_holder("Holder", gh);
+		error = dump_holder(gi, "Holder", gh);
 		if (error)
 			goto out;
 	}
 	list_for_each_entry(gh, &gl->gl_waiters1, gh_list) {
-		error = dump_holder("Waiter1", gh);
+		error = dump_holder(gi, "Waiter1", gh);
 		if (error)
 			goto out;
 	}
 	list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
-		error = dump_holder("Waiter2", gh);
+		error = dump_holder(gi, "Waiter2", gh);
 		if (error)
 			goto out;
 	}
 	list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
-		error = dump_holder("Waiter3", gh);
+		error = dump_holder(gi, "Waiter3", gh);
 		if (error)
 			goto out;
 	}
 	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
 		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
-		    list_empty(&gl->gl_holders)) {
-			error = dump_inode(gl->gl_object);
+			list_empty(&gl->gl_holders)) {
+			error = dump_inode(gi, gl->gl_object);
 			if (error)
 				goto out;
 		} else {
 			error = -ENOBUFS;
-			printk(KERN_INFO "  Inode: busy\n");
+			print_dbg(gi, "  Inode: busy\n");
 		}
 	}
 
@@ -2014,7 +2034,7 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
 			if (gl->gl_sbd != sdp)
 				continue;
 
-			error = dump_glock(gl);
+			error = dump_glock(NULL, gl);
 			if (error)
 				break;
 		}
@@ -2043,3 +2063,171 @@ int __init gfs2_glock_init(void)
 	return 0;
 }
 
+static int gfs2_glock_iter_next(struct glock_iter *gi)
+{
+	while (1) {
+		if (!gi->hb_list) {  /* If we don't have a hash bucket yet */
+			gi->hb_list = &gl_hash_table[gi->hash].hb_list;
+			if (hlist_empty(gi->hb_list)) {
+				gi->hash++;
+				gi->hb_list = NULL;
+				if (gi->hash >= GFS2_GL_HASH_SIZE)
+					return 1;
+				else
+					continue;
+			}
+			if (!hlist_empty(gi->hb_list)) {
+				gi->gl = list_entry(gi->hb_list->first,
+						    struct gfs2_glock,
+						    gl_list);
+			}
+		} else {
+			if (gi->gl->gl_list.next == NULL) {
+				gi->hash++;
+				gi->hb_list = NULL;
+				continue;
+			}
+			gi->gl = list_entry(gi->gl->gl_list.next,
+					    struct gfs2_glock, gl_list);
+		}
+		if (gi->gl)
+			break;
+	}
+	return 0;
+}
+
+static void gfs2_glock_iter_free(struct glock_iter *gi)
+{
+	kfree(gi);
+}
+
+static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp)
+{
+	struct glock_iter *gi;
+
+	gi = kmalloc(sizeof (*gi), GFP_KERNEL);
+	if (!gi)
+		return NULL;
+
+	gi->sdp = sdp;
+	gi->hash = 0;
+	gi->gl = NULL;
+	gi->hb_list = NULL;
+	gi->seq = NULL;
+	memset(gi->string, 0, sizeof(gi->string));
+
+	if (gfs2_glock_iter_next(gi)) {
+		gfs2_glock_iter_free(gi);
+		return NULL;
+	}
+
+	return gi;
+}
+
+static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos)
+{
+	struct glock_iter *gi;
+	loff_t n = *pos;
+
+	gi = gfs2_glock_iter_init(file->private);
+	if (!gi)
+		return NULL;
+
+	while (n--) {
+		if (gfs2_glock_iter_next(gi)) {
+			gfs2_glock_iter_free(gi);
+			return NULL;
+		}
+	}
+
+	return gi;
+}
+
+static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr,
+				 loff_t *pos)
+{
+	struct glock_iter *gi = iter_ptr;
+
+	(*pos)++;
+
+	if (gfs2_glock_iter_next(gi)) {
+		gfs2_glock_iter_free(gi);
+		return NULL;
+	}
+
+	return gi;
+}
+
+static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+	/* nothing for now */
+}
+
+static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr)
+{
+	struct glock_iter *gi = iter_ptr;
+
+	gi->seq = file;
+	dump_glock(gi, gi->gl);
+
+	return 0;
+}
+
+static struct seq_operations gfs2_glock_seq_ops = {
+	.start = gfs2_glock_seq_start,
+	.next  = gfs2_glock_seq_next,
+	.stop  = gfs2_glock_seq_stop,
+	.show  = gfs2_glock_seq_show,
+};
+
+static int gfs2_debugfs_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &gfs2_glock_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private;
+
+	return 0;
+}
+
+static const struct file_operations gfs2_debug_fops = {
+	.owner   = THIS_MODULE,
+	.open    = gfs2_debugfs_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
+{
+	sdp->debugfs_dentry = debugfs_create_file(sdp->sd_table_name,
+						  S_IFREG | S_IRUGO,
+						  gfs2_root, sdp,
+						  &gfs2_debug_fops);
+	if (!sdp->debugfs_dentry)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
+{
+	if (sdp->debugfs_dentry)
+		debugfs_remove(sdp->debugfs_dentry);
+}
+
+int gfs2_register_debugfs(void)
+{
+	gfs2_root = debugfs_create_dir("gfs2", NULL);
+	return gfs2_root ? 0 : -ENOMEM;
+}
+
+void gfs2_unregister_debugfs(void)
+{
+	debugfs_remove(gfs2_root);
+}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index f50e40ceca4..d7cef740872 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -135,5 +135,9 @@ void gfs2_scand_internal(struct gfs2_sbd *sdp);
 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
 
 int __init gfs2_glock_init(void);
+int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
+void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
+int gfs2_register_debugfs(void);
+void gfs2_unregister_debugfs(void);
 
 #endif /* __GLOCK_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 49f0dbf40d8..7555261d911 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -611,6 +611,7 @@ struct gfs2_sbd {
 
 	unsigned long sd_last_warning;
 	struct vfsmount *sd_gfs2mnt;
+	struct dentry *debugfs_dentry; /* for debugfs */
 };
 
 #endif /* __INCORE_DOT_H__ */
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6e8a59809ab..218395371db 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -103,6 +103,8 @@ static int __init init_gfs2_fs(void)
 	if (error)
 		goto fail_unregister;
 
+	gfs2_register_debugfs();
+
 	printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
@@ -130,6 +132,7 @@ fail:
 
 static void __exit exit_gfs2_fs(void)
 {
+	gfs2_unregister_debugfs();
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
 
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ee54cb66708..ecb8b18de0e 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -690,6 +690,8 @@ static int fill_super(struct super_block *sb, void *data, int silent)
 	if (error)
 		goto fail;
 
+	gfs2_create_debugfs_file(sdp);
+
 	error = gfs2_sys_fs_add(sdp);
 	if (error)
 		goto fail;
@@ -896,6 +898,7 @@ error:
 
 static void gfs2_kill_sb(struct super_block *sb)
 {
+	gfs2_delete_debugfs_file(sb->s_fs_info);
 	kill_block_super(sb);
 }
 
-- 
cgit v1.2.3-18-g5258


From 5c7342d894973636f03270673e1fb7b908a421a8 Mon Sep 17 00:00:00 2001
From: Josef Whiter <jwhiter@redhat.com>
Date: Wed, 7 Mar 2007 17:09:10 -0500
Subject: [GFS2] fix bz 231369, gfs2 will oops if you specify an invalid mount
 option

If you specify an invalid mount option when trying to mount a gfs2 filesystem,
gfs2 will oops.  The attached patch resolves this problem.

Signed-off-by: Josef Whiter <jwhiter@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9f203ef4da6..a3a24f2e99d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -2217,7 +2217,7 @@ int gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
 
 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
 {
-	if (sdp->debugfs_dentry)
+	if (sdp && sdp->debugfs_dentry)
 		debugfs_remove(sdp->debugfs_dentry);
 }
 
-- 
cgit v1.2.3-18-g5258


From 89adc934f3f96600e7f31447426c7e99d62c5460 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Tue, 13 Mar 2007 17:08:45 +0000
Subject: [DLM] Fix uninitialised variable in receiving

The length of the second element of the kvec array was not initialised before
being added to the first one. This could cause invalid lengths to be passed to
kernel_recvmsg

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lowcomms-tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 07e0a122c32..9bfe7fb721e 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -299,6 +299,7 @@ static int receive_from_sock(struct connection *con)
 	 */
 	iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
 	iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
+	iov[1].iov_len = 0;
 	nvec = 1;
 
 	/*
-- 
cgit v1.2.3-18-g5258


From 1de913909263ba7f7054debeda1b79771a7233db Mon Sep 17 00:00:00 2001
From: Josef Whiter <jwhiter@redhat.com>
Date: Mon, 12 Mar 2007 16:55:07 -0500
Subject: [GFS2] Fix bz 231380, unlock page before dequeing glocks in
 gfs2_commit_write

If we are writing a file, and in the middle of writing the file
another node attempts to get a shared lock on that file (by doing a du for
example) the process doing the writing will hang waiting on lock_page.  The
reason for this is because when we have waiters on a exclusive glock, we will go
through and flush out all dirty pages associated with that inode and release the
lock.  The problem is that when we flush the dirty pages, we could hit a page
that we have locked durring the generic_file_buffered_write part of this
operation.  This patch unlocks the page before we go to dequeue the lock and
locks it immediatly afterwards, since generic_file_buffered_write needs the page
locked when the commit_write is completed.  This patch resolves the problem,
however if somebody sees a better way to do this please don't hesistate to yell.

Signed-off-by: Josef Whiter <jwhiter@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/ops_address.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index b3b7e847535..90c287932d5 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -507,7 +507,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
 		gfs2_quota_unlock(ip);
 		gfs2_alloc_put(ip);
 	}
+	unlock_page(page);
 	gfs2_glock_dq_m(1, &ip->i_gh);
+	lock_page(page);
 	gfs2_holder_uninit(&ip->i_gh);
 	return 0;
 
@@ -520,7 +522,9 @@ fail_endtrans:
 		gfs2_quota_unlock(ip);
 		gfs2_alloc_put(ip);
 	}
+	unlock_page(page);
 	gfs2_glock_dq_m(1, &ip->i_gh);
+	lock_page(page);
 	gfs2_holder_uninit(&ip->i_gh);
 fail_nounlock:
 	ClearPageUptodate(page);
-- 
cgit v1.2.3-18-g5258


From 3b8249f6178cb2b68b9d683587797270125cc06a Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Fri, 16 Mar 2007 09:40:31 +0000
Subject: [GFS2] Fix bz 224480 and cleanup glock demotion code

This patch prevents the printing of a warning message in cases where
the fs is functioning normally by handing off responsibility for
unlinked, but still open inodes, to another node for eventual deallocation.
Also, there is now an improved system for ensuring that such requests
to other nodes do not get lost. The callback on the iopen lock is
only ever called when i_nlink == 0 and when a node is unable to deallocate
it due to it still being in use on another node. When a node receives
the callback therefore, it knows that i_nlink must be zero, so we mark
it as such (in gfs2_drop_inode) in order that it will then attempt
deallocation of the inode itself.

As an additional benefit, queuing a demote request no longer requires
a memory allocation. This simplifies the code for dealing with gfs2_holders
as it removes one special case.

There are two new fields in struct gfs2_glock. gl_demote_state is the
state which the remote node has requested and gl_demote_time is the
time when the request came in. Both fields are only valid when the
GLF_DEMOTE flag is set in gl_flags.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c     | 233 ++++++++++++++--------------------------------------
 fs/gfs2/glock.h     |   2 +-
 fs/gfs2/incore.h    |   8 +-
 fs/gfs2/main.c      |   1 -
 fs/gfs2/ops_super.c |  28 ++++++-
 5 files changed, 93 insertions(+), 179 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a3a24f2e99d..e7075945b05 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -54,7 +54,7 @@ struct glock_iter {
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
-static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
+static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
 static struct dentry *gfs2_root;
@@ -212,7 +212,6 @@ int gfs2_glock_put(struct gfs2_glock *gl)
 		gfs2_assert(sdp, list_empty(&gl->gl_reclaim));
 		gfs2_assert(sdp, list_empty(&gl->gl_holders));
 		gfs2_assert(sdp, list_empty(&gl->gl_waiters1));
-		gfs2_assert(sdp, list_empty(&gl->gl_waiters2));
 		gfs2_assert(sdp, list_empty(&gl->gl_waiters3));
 		glock_free(gl);
 		rv = 1;
@@ -399,7 +398,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
 {
 	gh->gh_state = state;
 	gh->gh_flags = flags;
-	gh->gh_iflags &= 1 << HIF_ALLOCED;
+	gh->gh_iflags = 0;
 	gh->gh_ip = (unsigned long)__builtin_return_address(0);
 }
 
@@ -416,54 +415,8 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 	gh->gh_ip = 0;
 }
 
-/**
- * gfs2_holder_get - get a struct gfs2_holder structure
- * @gl: the glock
- * @state: the state we're requesting
- * @flags: the modifier flags
- * @gfp_flags:
- *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd
- * 2) Leave it like it is
- *
- * Returns: the holder structure, NULL on ENOMEM
- */
-
-static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl,
-					   unsigned int state,
-					   int flags, gfp_t gfp_flags)
-{
-	struct gfs2_holder *gh;
-
-	gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags);
-	if (!gh)
-		return NULL;
-
-	gfs2_holder_init(gl, state, flags, gh);
-	set_bit(HIF_ALLOCED, &gh->gh_iflags);
-	gh->gh_ip = (unsigned long)__builtin_return_address(0);
-	return gh;
-}
-
-/**
- * gfs2_holder_put - get rid of a struct gfs2_holder structure
- * @gh: the holder structure
- *
- */
-
-static void gfs2_holder_put(struct gfs2_holder *gh)
-{
-	gfs2_holder_uninit(gh);
-	kfree(gh);
-}
-
-static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
+static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
-	if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
-		gfs2_holder_put(gh);
-		return;
-	}
 	clear_bit(HIF_WAIT, &gh->gh_iflags);
 	smp_mb();
 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
@@ -529,7 +482,7 @@ static int rq_promote(struct gfs2_holder *gh)
 				gfs2_reclaim_glock(sdp);
 			}
 
-			gfs2_glock_xmote_th(gh);
+			gfs2_glock_xmote_th(gh->gh_gl, gh);
 			spin_lock(&gl->gl_spin);
 		}
 		return 1;
@@ -552,7 +505,7 @@ static int rq_promote(struct gfs2_holder *gh)
 	gh->gh_error = 0;
 	set_bit(HIF_HOLDER, &gh->gh_iflags);
 
-	gfs2_holder_dispose_or_wake(gh);
+	gfs2_holder_wake(gh);
 
 	return 0;
 }
@@ -564,32 +517,24 @@ static int rq_promote(struct gfs2_holder *gh)
  * Returns: 1 if the queue is blocked
  */
 
-static int rq_demote(struct gfs2_holder *gh)
+static int rq_demote(struct gfs2_glock *gl)
 {
-	struct gfs2_glock *gl = gh->gh_gl;
-
 	if (!list_empty(&gl->gl_holders))
 		return 1;
 
-	if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
-		list_del_init(&gh->gh_list);
-		gh->gh_error = 0;
-		spin_unlock(&gl->gl_spin);
-		gfs2_holder_dispose_or_wake(gh);
-		spin_lock(&gl->gl_spin);
-	} else {
-		gl->gl_req_gh = gh;
-		set_bit(GLF_LOCK, &gl->gl_flags);
-		spin_unlock(&gl->gl_spin);
-
-		if (gh->gh_state == LM_ST_UNLOCKED ||
-		    gl->gl_state != LM_ST_EXCLUSIVE)
-			gfs2_glock_drop_th(gl);
-		else
-			gfs2_glock_xmote_th(gh);
-
-		spin_lock(&gl->gl_spin);
+	if (gl->gl_state == gl->gl_demote_state ||
+	    gl->gl_state == LM_ST_UNLOCKED) {
+		clear_bit(GLF_DEMOTE, &gl->gl_flags);
+		return 0;
 	}
+	set_bit(GLF_LOCK, &gl->gl_flags);
+	spin_unlock(&gl->gl_spin);
+	if (gl->gl_demote_state == LM_ST_UNLOCKED ||
+	    gl->gl_state != LM_ST_EXCLUSIVE)
+		gfs2_glock_drop_th(gl);
+	else
+		gfs2_glock_xmote_th(gl, NULL);
+	spin_lock(&gl->gl_spin);
 
 	return 0;
 }
@@ -617,16 +562,8 @@ static void run_queue(struct gfs2_glock *gl)
 			else
 				gfs2_assert_warn(gl->gl_sbd, 0);
 
-		} else if (!list_empty(&gl->gl_waiters2) &&
-			   !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
-			gh = list_entry(gl->gl_waiters2.next,
-					struct gfs2_holder, gh_list);
-
-			if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
-				blocked = rq_demote(gh);
-			else
-				gfs2_assert_warn(gl->gl_sbd, 0);
-
+		} else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
+			blocked = rq_demote(gl);
 		} else if (!list_empty(&gl->gl_waiters3)) {
 			gh = list_entry(gl->gl_waiters3.next,
 					struct gfs2_holder, gh_list);
@@ -717,50 +654,24 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
 }
 
 /**
- * handle_callback - add a demote request to a lock's queue
+ * handle_callback - process a demote request
  * @gl: the glock
  * @state: the state the caller wants us to change to
  *
- * Note: This may fail sliently if we are out of memory.
+ * There are only two requests that we are going to see in actual
+ * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
 static void handle_callback(struct gfs2_glock *gl, unsigned int state)
 {
-	struct gfs2_holder *gh, *new_gh = NULL;
-
-restart:
 	spin_lock(&gl->gl_spin);
-
-	list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
-		if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
-		    gl->gl_req_gh != gh) {
-			if (gh->gh_state != state)
-				gh->gh_state = LM_ST_UNLOCKED;
-			goto out;
-		}
-	}
-
-	if (new_gh) {
-		list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
-		new_gh = NULL;
-	} else {
-		spin_unlock(&gl->gl_spin);
-
-		new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS);
-		if (!new_gh)
-			return;
-		set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
-		set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
-		set_bit(HIF_WAIT, &new_gh->gh_iflags);
-
-		goto restart;
+	if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
+		gl->gl_demote_state = state;
+		gl->gl_demote_time = jiffies;
+	} else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
+		gl->gl_demote_state = state;
 	}
-
-out:
 	spin_unlock(&gl->gl_spin);
-
-	if (new_gh)
-		gfs2_holder_put(new_gh);
 }
 
 /**
@@ -820,56 +731,37 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 
 	/*  Deal with each possible exit condition  */
 
-	if (!gh)
+	if (!gh) {
 		gl->gl_stamp = jiffies;
-	else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
+		if (ret & LM_OUT_CANCELED)
+			op_done = 0;
+		else
+			clear_bit(GLF_DEMOTE, &gl->gl_flags);
+	} else {
 		spin_lock(&gl->gl_spin);
 		list_del_init(&gh->gh_list);
 		gh->gh_error = -EIO;
-		spin_unlock(&gl->gl_spin);
-	} else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
-		spin_lock(&gl->gl_spin);
-		list_del_init(&gh->gh_list);
-		if (gl->gl_state == gh->gh_state ||
-		    gl->gl_state == LM_ST_UNLOCKED) {
+		if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 
+			goto out;
+		gh->gh_error = GLR_CANCELED;
+		if (ret & LM_OUT_CANCELED) 
+			goto out;
+		if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
+			list_add_tail(&gh->gh_list, &gl->gl_holders);
 			gh->gh_error = 0;
-		} else {
-			if (gfs2_assert_warn(sdp, gh->gh_flags &
-					(LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1)
-				fs_warn(sdp, "ret = 0x%.8X\n", ret);
-			gh->gh_error = GLR_TRYFAILED;
+			set_bit(HIF_HOLDER, &gh->gh_iflags);
+			set_bit(HIF_FIRST, &gh->gh_iflags);
+			op_done = 0;
+			goto out;
 		}
-		spin_unlock(&gl->gl_spin);
-
-		if (ret & LM_OUT_CANCELED)
-			handle_callback(gl, LM_ST_UNLOCKED);
-
-	} else if (ret & LM_OUT_CANCELED) {
-		spin_lock(&gl->gl_spin);
-		list_del_init(&gh->gh_list);
-		gh->gh_error = GLR_CANCELED;
-		spin_unlock(&gl->gl_spin);
-
-	} else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
-		spin_lock(&gl->gl_spin);
-		list_move_tail(&gh->gh_list, &gl->gl_holders);
-		gh->gh_error = 0;
-		set_bit(HIF_HOLDER, &gh->gh_iflags);
-		spin_unlock(&gl->gl_spin);
-
-		set_bit(HIF_FIRST, &gh->gh_iflags);
-
-		op_done = 0;
-
-	} else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
-		spin_lock(&gl->gl_spin);
-		list_del_init(&gh->gh_list);
 		gh->gh_error = GLR_TRYFAILED;
-		spin_unlock(&gl->gl_spin);
-
-	} else {
+		if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
+			goto out;
+		gh->gh_error = -EINVAL;
 		if (gfs2_assert_withdraw(sdp, 0) == -1)
 			fs_err(sdp, "ret = 0x%.8X\n", ret);
+out:
+		spin_unlock(&gl->gl_spin);
 	}
 
 	if (glops->go_xmote_bh)
@@ -887,7 +779,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
 	gfs2_glock_put(gl);
 
 	if (gh)
-		gfs2_holder_dispose_or_wake(gh);
+		gfs2_holder_wake(gh);
 }
 
 /**
@@ -898,12 +790,11 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
  *
  */
 
-void gfs2_glock_xmote_th(struct gfs2_holder *gh)
+void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh)
 {
-	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	int flags = gh->gh_flags;
-	unsigned state = gh->gh_state;
+	int flags = gh ? gh->gh_flags : 0;
+	unsigned state = gh ? gh->gh_state : gl->gl_demote_state;
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
 				 LM_FLAG_NOEXP | LM_FLAG_ANY |
@@ -953,6 +844,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	gfs2_assert_warn(sdp, !ret);
 
 	state_change(gl, LM_ST_UNLOCKED);
+	clear_bit(GLF_DEMOTE, &gl->gl_flags);
 
 	if (glops->go_inval)
 		glops->go_inval(gl, DIO_METADATA);
@@ -974,7 +866,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
 	gfs2_glock_put(gl);
 
 	if (gh)
-		gfs2_holder_dispose_or_wake(gh);
+		gfs2_holder_wake(gh);
 }
 
 /**
@@ -1291,9 +1183,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		if (glops->go_unlock)
 			glops->go_unlock(gh);
 
-		gl->gl_stamp = jiffies;
-
 		spin_lock(&gl->gl_spin);
+		gl->gl_stamp = jiffies;
 	}
 
 	clear_bit(GLF_LOCK, &gl->gl_flags);
@@ -1981,16 +1872,16 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 		if (error)
 			goto out;
 	}
-	list_for_each_entry(gh, &gl->gl_waiters2, gh_list) {
-		error = dump_holder(gi, "Waiter2", gh);
-		if (error)
-			goto out;
-	}
 	list_for_each_entry(gh, &gl->gl_waiters3, gh_list) {
 		error = dump_holder(gi, "Waiter3", gh);
 		if (error)
 			goto out;
 	}
+	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
+		print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
+			  gl->gl_demote_state,
+			  (u64)(jiffies - gl->gl_demote_time)*1000000/HZ);
+	}
 	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
 		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
 			list_empty(&gl->gl_holders)) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index d7cef740872..5e662eadc6f 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -67,7 +67,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
 {
 	int ret;
 	spin_lock(&gl->gl_spin);
-	ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
+	ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3);
 	spin_unlock(&gl->gl_spin);
 	return ret;
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7555261d911..9c125823d76 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -115,11 +115,8 @@ enum {
 	/* Actions */
 	HIF_MUTEX		= 0,
 	HIF_PROMOTE		= 1,
-	HIF_DEMOTE		= 2,
 
 	/* States */
-	HIF_ALLOCED		= 4,
-	HIF_DEALLOC		= 5,
 	HIF_HOLDER		= 6,
 	HIF_FIRST		= 7,
 	HIF_ABORTED		= 9,
@@ -142,8 +139,8 @@ struct gfs2_holder {
 enum {
 	GLF_LOCK		= 1,
 	GLF_STICKY		= 2,
+	GLF_DEMOTE		= 3,
 	GLF_DIRTY		= 5,
-	GLF_SKIP_WAITERS2	= 6,
 };
 
 struct gfs2_glock {
@@ -156,11 +153,12 @@ struct gfs2_glock {
 
 	unsigned int gl_state;
 	unsigned int gl_hash;
+	unsigned int gl_demote_state; /* state requested by remote node */
+	unsigned long gl_demote_time; /* time of first demote request */
 	struct task_struct *gl_owner;
 	unsigned long gl_ip;
 	struct list_head gl_holders;
 	struct list_head gl_waiters1;	/* HIF_MUTEX */
-	struct list_head gl_waiters2;	/* HIF_DEMOTE */
 	struct list_head gl_waiters3;	/* HIF_PROMOTE */
 
 	const struct gfs2_glock_operations *gl_ops;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 218395371db..c4bb374eaf9 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -45,7 +45,6 @@ static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned
 		spin_lock_init(&gl->gl_spin);
 		INIT_LIST_HEAD(&gl->gl_holders);
 		INIT_LIST_HEAD(&gl->gl_waiters1);
-		INIT_LIST_HEAD(&gl->gl_waiters2);
 		INIT_LIST_HEAD(&gl->gl_waiters3);
 		gl->gl_lvb = NULL;
 		atomic_set(&gl->gl_lvb_count, 0);
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index b89999d3a76..485ce3d4992 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -283,6 +283,31 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
 	return error;
 }
 
+/**
+ * gfs2_drop_inode - Drop an inode (test for remote unlink)
+ * @inode: The inode to drop
+ *
+ * If we've received a callback on an iopen lock then its because a
+ * remote node tried to deallocate the inode but failed due to this node
+ * still having the inode open. Here we mark the link count zero
+ * since we know that it must have reached zero if the GLF_DEMOTE flag
+ * is set on the iopen glock. If we didn't do a disk read since the
+ * remote node removed the final link then we might otherwise miss
+ * this event. This check ensures that this node will deallocate the
+ * inode's blocks, or alternatively pass the baton on to another
+ * node for later deallocation.
+ */
+static void gfs2_drop_inode(struct inode *inode)
+{
+	if (inode->i_private && inode->i_nlink) {
+		struct gfs2_inode *ip = GFS2_I(inode);
+		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
+		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
+			clear_nlink(inode);
+	}
+	generic_drop_inode(inode);
+}
+
 /**
  * gfs2_clear_inode - Deallocate an inode when VFS is done with it
  * @inode: The VFS inode
@@ -441,7 +466,7 @@ out_unlock:
 out_uninit:
 	gfs2_holder_uninit(&ip->i_iopen_gh);
 	gfs2_glock_dq_uninit(&gh);
-	if (error)
+	if (error && error != GLR_TRYFAILED)
 		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
 out:
 	truncate_inode_pages(&inode->i_data, 0);
@@ -481,6 +506,7 @@ const struct super_operations gfs2_super_ops = {
 	.statfs			= gfs2_statfs,
 	.remount_fs		= gfs2_remount_fs,
 	.clear_inode		= gfs2_clear_inode,
+	.drop_inode		= gfs2_drop_inode,
 	.show_options		= gfs2_show_options,
 };
 
-- 
cgit v1.2.3-18-g5258


From 420d2a1028b906f24e836e37089a6ad55ab1848f Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Sun, 18 Mar 2007 16:05:27 +0000
Subject: [GFS2] Fix a bug on i386 due to evaluation order

Since gcc didn't evaluate the last two terms of the expression in
glock.c:1881 as a constant expression, it resulted in an error on
i386 due to the lack of a 64bit divide instruction. This adds some
brackets to fix the problem.

This was reported by Andrew Morton.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e7075945b05..b8aa816bb6e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1880,7 +1880,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 		print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
 			  gl->gl_demote_state,
-			  (u64)(jiffies - gl->gl_demote_time)*1000000/HZ);
+			  (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
 	}
 	if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
 		if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
-- 
cgit v1.2.3-18-g5258


From 254da030dfb1b13d42d07e4292a4790d88c6874f Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Wed, 21 Mar 2007 09:23:53 +0000
Subject: [DLM] Don't delete misc device if lockspace removal fails

Currently if the lockspace removal fails the misc device associated with a
lockspace is left deleted. After that there is no way to access the orphaned
lockspace from userland.

This patch recreates the misc device if th dlm_release_lockspace fails. I
believe this is better than attempting to remove the lockspace first because
that leaves an unattached device lying around. The potential gap in which there
is no access to the lockspace between removing the misc device and recreating it
is acceptable ... after all the application is trying to remove it, and only new
users of the lockspace will be affected.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/user.c | 58 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 22 deletions(-)

diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 3870150b83a..27a75ce571c 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -286,11 +286,34 @@ static int device_user_unlock(struct dlm_user_proc *proc,
 	return error;
 }
 
+static int create_misc_device(struct dlm_ls *ls, char *name)
+{
+	int error, len;
+
+	error = -ENOMEM;
+	len = strlen(name) + strlen(name_prefix) + 2;
+	ls->ls_device.name = kzalloc(len, GFP_KERNEL);
+	if (!ls->ls_device.name)
+		goto fail;
+
+	snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
+		 name);
+	ls->ls_device.fops = &device_fops;
+	ls->ls_device.minor = MISC_DYNAMIC_MINOR;
+
+	error = misc_register(&ls->ls_device);
+	if (error) {
+		kfree(ls->ls_device.name);
+	}
+fail:
+	return error;
+}
+
 static int device_create_lockspace(struct dlm_lspace_params *params)
 {
 	dlm_lockspace_t *lockspace;
 	struct dlm_ls *ls;
-	int error, len;
+	int error;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -304,29 +327,14 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 	if (!ls)
 		return -ENOENT;
 
-	error = -ENOMEM;
-	len = strlen(params->name) + strlen(name_prefix) + 2;
-	ls->ls_device.name = kzalloc(len, GFP_KERNEL);
-	if (!ls->ls_device.name)
-		goto fail;
-	snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
-		 params->name);
-	ls->ls_device.fops = &device_fops;
-	ls->ls_device.minor = MISC_DYNAMIC_MINOR;
-
-	error = misc_register(&ls->ls_device);
-	if (error) {
-		kfree(ls->ls_device.name);
-		goto fail;
-	}
-
-	error = ls->ls_device.minor;
+	error = create_misc_device(ls, params->name);
 	dlm_put_lockspace(ls);
-	return error;
 
- fail:
-	dlm_put_lockspace(ls);
-	dlm_release_lockspace(lockspace, 0);
+	if (error)
+		dlm_release_lockspace(lockspace, 0);
+	else
+		error = ls->ls_device.minor;
+
 	return error;
 }
 
@@ -343,6 +351,10 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
 	if (!ls)
 		return -ENOENT;
 
+	/* Deregister the misc device first, so we don't have
+	 * a device that's not attached to a lockspace. If
+	 * dlm_release_lockspace fails then we can recreate it
+	 */
 	error = misc_deregister(&ls->ls_device);
 	if (error) {
 		dlm_put_lockspace(ls);
@@ -361,6 +373,8 @@ static int device_remove_lockspace(struct dlm_lspace_params *params)
 
 	dlm_put_lockspace(ls);
 	error = dlm_release_lockspace(lockspace, force);
+	if (error)
+		create_misc_device(ls, ls->ls_name);
  out:
 	return error;
 }
-- 
cgit v1.2.3-18-g5258


From f35ac346bc48b2086aa94f031baf1f6237a89de6 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Sun, 18 Mar 2007 17:04:15 +0000
Subject: [GFS2] Speed up lock_dlm's locking (move sprintf)

The following patch speeds up lock_dlm's locking by moving the sprintf
out from the lock acquisition path and into the lock creation path. This
reduces the amount of CPU time used in acquiring locks by a fair amount.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Acked-by: David Teigland <teigland@redhat.com>
---
 fs/gfs2/locking/dlm/lock.c     | 10 ++++------
 fs/gfs2/locking/dlm/lock_dlm.h |  1 +
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index b167addf9fd..f9c8bda289a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -151,7 +151,7 @@ static inline unsigned int make_flags(struct gdlm_lock *lp,
 
 /* make_strname - convert GFS lock numbers to a string */
 
-static inline void make_strname(struct lm_lockname *lockname,
+static inline void make_strname(const struct lm_lockname *lockname,
 				struct gdlm_strname *str)
 {
 	sprintf(str->name, "%8x%16llx", lockname->ln_type,
@@ -169,6 +169,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
 		return -ENOMEM;
 
 	lp->lockname = *name;
+	make_strname(name, &lp->strname);
 	lp->ls = ls;
 	lp->cur = DLM_LOCK_IV;
 	lp->lvb = NULL;
@@ -227,7 +228,6 @@ void gdlm_put_lock(void *lock)
 unsigned int gdlm_do_lock(struct gdlm_lock *lp)
 {
 	struct gdlm_ls *ls = lp->ls;
-	struct gdlm_strname str;
 	int error, bast = 1;
 
 	/*
@@ -249,8 +249,6 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
 	if (test_bit(LFL_NOBAST, &lp->flags))
 		bast = 0;
 
-	make_strname(&lp->lockname, &str);
-
 	set_bit(LFL_ACTIVE, &lp->flags);
 
 	log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
@@ -258,8 +256,8 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp)
 		  lp->cur, lp->req, lp->lkf);
 
 	error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
-			 str.name, str.namelen, 0, gdlm_ast, lp,
-			 bast ? gdlm_bast : NULL);
+			 lp->strname.name, lp->strname.namelen, 0, gdlm_ast,
+			 lp, bast ? gdlm_bast : NULL);
 
 	if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
 		lp->lksb.sb_status = -EAGAIN;
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index a87c7bf3c56..6888bd49c31 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -106,6 +106,7 @@ enum {
 struct gdlm_lock {
 	struct gdlm_ls		*ls;
 	struct lm_lockname	lockname;
+	struct gdlm_strname	strname;
 	char			*lvb;
 	struct dlm_lksb		lksb;
 
-- 
cgit v1.2.3-18-g5258


From 6883562588bc6c70776ecc396ee7eda36c2c8da9 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Fri, 23 Mar 2007 09:05:12 +0000
Subject: [GFS2] Fix log entry list corruption

When glock_lo_add and rg_lo_add attempt to add an element to the log, they
check to see if has already been added before locking the log. If another
process adds that element to the log in this window between the check and
locking the log, the element will be added to the list twice. This causes
the log element list to become corrupted in such a way that the log element
can never be successfully removed from the list. This patch pulls the
list_empty() check inside the log lock, to remove this window.

Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/lops.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 16bb4b4561a..f82d84d05d2 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -33,16 +33,17 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 
 	tr->tr_touched = 1;
 
-	if (!list_empty(&le->le_list))
-		return;
-
 	gl = container_of(le, struct gfs2_glock, gl_le);
 	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
 		return;
-	gfs2_glock_hold(gl);
-	set_bit(GLF_DIRTY, &gl->gl_flags);
 
 	gfs2_log_lock(sdp);
+	if (!list_empty(&le->le_list)){
+		gfs2_log_unlock(sdp);
+		return;
+	}
+	gfs2_glock_hold(gl);
+	set_bit(GLF_DIRTY, &gl->gl_flags);
 	sdp->sd_log_num_gl++;
 	list_add(&le->le_list, &sdp->sd_log_le_gl);
 	gfs2_log_unlock(sdp);
@@ -415,13 +416,14 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 
 	tr->tr_touched = 1;
 
-	if (!list_empty(&le->le_list))
-		return;
-
 	rgd = container_of(le, struct gfs2_rgrpd, rd_le);
-	gfs2_rgrp_bh_hold(rgd);
 
 	gfs2_log_lock(sdp);
+	if (!list_empty(&le->le_list)){
+		gfs2_log_unlock(sdp);
+		return;
+	}
+	gfs2_rgrp_bh_hold(rgd);
 	sdp->sd_log_num_rg++;
 	list_add(&le->le_list, &sdp->sd_log_le_rg);
 	gfs2_log_unlock(sdp);
-- 
cgit v1.2.3-18-g5258


From 172e045a7fcc3ee647fa70dbd585a3c247b49cb2 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Fri, 23 Mar 2007 14:51:56 -0600
Subject: [GFS2] flush the log if a transaction can't allocate space

This is a fix for bz #208514. When GFS2 frees up space, the freed blocks
aren't available for reuse until the resource group is successfully written
to the ondisk journal. So in rare cases, GFS2 operations will fail, saying
that the filesystem is out of space, when in reality, you are just waiting for
a log flush. For instance, on a 1Gig filesystem, if I continually write 10 Mb
to a file, and then truncate it, after a hundred interations, the write will
fail with -ENOSPC, even though the filesystem is just 1% full.

The attached patch calls a log flush in these cases.  I tested this patch
fairly heavily to check if there were any locking issues that I missed, and
it seems to work just fine. Also, this patch only does the log flush if
get_local_rgrp makes a complete loop of resource groups without skipping
any do to locking issues. The code would be slightly simpler if it just always
did the log flush after the first failed pass, and you could only ever have
to go through the loop twice, instead of up to three times. However, I guessed
that failing to find a rg simply do to locking issues would be common enough
to skip the log flush in that case, but I'm not certain that this is the right
way to go. Either way, I don't suppose this code will be hit all that often.

Signed-off-by: Benjamin E. Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/rgrp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 8d9c08b5c4b..2ce48d4f246 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -27,6 +27,7 @@
 #include "trans.h"
 #include "ops_file.h"
 #include "util.h"
+#include "log.h"
 
 #define BFITNOENT ((u32)~0)
 
@@ -941,9 +942,13 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 			rgd = gfs2_rgrpd_get_first(sdp);
 
 		if (rgd == begin) {
-			if (++loops >= 2 || !skipped)
+			if (++loops >= 3)
 				return -ENOSPC;
+			if (!skipped)
+				loops++;
 			flags = 0;
+			if (loops == 2)
+				gfs2_log_flush(sdp, NULL);
 		}
 	}
 
-- 
cgit v1.2.3-18-g5258


From 04b933f27bc8e7f3f6423020cec58a4eab3bb7a7 Mon Sep 17 00:00:00 2001
From: Robert Peterson <rpeterso@redhat.com>
Date: Fri, 23 Mar 2007 17:05:15 -0500
Subject: [GFS2] Red Hat bz 228540: owner references

In Testing the previously posted and accepted patch for
https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=228540
I uncovered some gfs2 badness.  It turns out that the current
gfs2 code saves off a process pointer when glocks is taken
in both the glock and glock holder structures.  Those
structures will persist in memory long after the process has
ended; pointers to poisoned memory.

This problem isn't caused by the 228540 fix; the new capability
introduced by the fix just uncovered the problem.

I wrote this patch that avoids saving process pointers
and instead saves off the process pid.  Rather than
referencing the bad pointers, it now does process lookups.
There is special code that makes the output nicer for
printing holder information for processes that have ended.

This patch also adds a stub for the new "sprint_symbol"
function that exists in Andrew Morton's -mm patch set, but
won't go into the base kernel until 2.6.22, since it adds
functionality but doesn't fix a bug.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/gfs2/glock.c  | 75 ++++++++++++++++++++++++++++++++++++++++++--------------
 fs/gfs2/glock.h  |  2 +-
 fs/gfs2/incore.h |  4 +--
 3 files changed, 59 insertions(+), 22 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8aa816bb6e..d2e3094c40f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -25,6 +25,8 @@
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -54,6 +56,7 @@ struct glock_iter {
 typedef void (*glock_examiner) (struct gfs2_glock * gl);
 
 static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
+static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl);
 static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh);
 static void gfs2_glock_drop_th(struct gfs2_glock *gl);
 static DECLARE_RWSEM(gfs2_umount_flush_sem);
@@ -64,6 +67,7 @@ static struct dentry *gfs2_root;
 #define GFS2_GL_HASH_MASK       (GFS2_GL_HASH_SIZE - 1)
 
 static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE];
+static struct dentry *gfs2_root;
 
 /*
  * Despite what you might think, the numbers below are not arbitrary :-)
@@ -312,7 +316,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 	atomic_set(&gl->gl_ref, 1);
 	gl->gl_state = LM_ST_UNLOCKED;
 	gl->gl_hash = hash;
-	gl->gl_owner = NULL;
+	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
 	gl->gl_ops = glops;
 	gl->gl_req_gh = NULL;
@@ -376,7 +380,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
 	INIT_LIST_HEAD(&gh->gh_list);
 	gh->gh_gl = gl;
 	gh->gh_ip = (unsigned long)__builtin_return_address(0);
-	gh->gh_owner = current;
+	gh->gh_owner_pid = current->pid;
 	gh->gh_state = state;
 	gh->gh_flags = flags;
 	gh->gh_error = 0;
@@ -601,7 +605,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
 		list_add_tail(&gh.gh_list, &gl->gl_waiters1);
 	} else {
-		gl->gl_owner = current;
+		gl->gl_owner_pid = current->pid;
 		gl->gl_ip = (unsigned long)__builtin_return_address(0);
 		clear_bit(HIF_WAIT, &gh.gh_iflags);
 		smp_mb();
@@ -628,7 +632,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl)
 	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
 		acquired = 0;
 	} else {
-		gl->gl_owner = current;
+		gl->gl_owner_pid = current->pid;
 		gl->gl_ip = (unsigned long)__builtin_return_address(0);
 	}
 	spin_unlock(&gl->gl_spin);
@@ -646,7 +650,7 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
 {
 	spin_lock(&gl->gl_spin);
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	gl->gl_owner = NULL;
+	gl->gl_owner_pid = 0;
 	gl->gl_ip = 0;
 	run_queue(gl);
 	BUG_ON(!spin_is_locked(&gl->gl_spin));
@@ -999,12 +1003,12 @@ static int glock_wait_internal(struct gfs2_holder *gh)
 }
 
 static inline struct gfs2_holder *
-find_holder_by_owner(struct list_head *head, struct task_struct *owner)
+find_holder_by_owner(struct list_head *head, pid_t pid)
 {
 	struct gfs2_holder *gh;
 
 	list_for_each_entry(gh, head, gh_list) {
-		if (gh->gh_owner == owner)
+		if (gh->gh_owner_pid == pid)
 			return gh;
 	}
 
@@ -1036,24 +1040,24 @@ static void add_to_queue(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 	struct gfs2_holder *existing;
 
-	BUG_ON(!gh->gh_owner);
+	BUG_ON(!gh->gh_owner_pid);
 	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
 		BUG();
 
-	existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
+	existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid);
 	if (existing) {
 		print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
-		printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid);
+		printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid);
 		printk(KERN_INFO "lock type : %d lock state : %d\n",
 				existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state);
 		print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
-		printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid);
+		printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid);
 		printk(KERN_INFO "lock type : %d lock state : %d\n",
 				gl->gl_name.ln_type, gl->gl_state);
 		BUG();
 	}
 
-	existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner);
+	existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid);
 	if (existing) {
 		print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip);
 		print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip);
@@ -1756,6 +1760,22 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
  *  Diagnostic routines to help debug distributed deadlock
  */
 
+static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt,
+                              unsigned long address)
+{
+/* when sprint_symbol becomes available in the new kernel, replace this */
+/* function with:
+        char buffer[KSYM_SYMBOL_LEN];
+
+        sprint_symbol(buffer, address);
+        print_dbg(gi, fmt, buffer);
+*/
+        if (gi)
+                print_dbg(gi, fmt, address);
+        else
+                print_symbol(fmt, address);
+}
+
 /**
  * dump_holder - print information about a glock holder
  * @str: a string naming the type of holder
@@ -1768,10 +1788,18 @@ static int dump_holder(struct glock_iter *gi, char *str,
 		       struct gfs2_holder *gh)
 {
 	unsigned int x;
+	struct task_struct *gh_owner;
 
 	print_dbg(gi, "  %s\n", str);
-	print_dbg(gi, "    owner = %ld\n",
-		   (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
+	if (gh->gh_owner_pid) {
+		print_dbg(gi, "    owner = %ld ", (long)gh->gh_owner_pid);
+		gh_owner = find_task_by_pid(gh->gh_owner_pid);
+		if (gh_owner)
+			print_dbg(gi, "(%s)\n", gh_owner->comm);
+		else
+			print_dbg(gi, "(ended)\n");
+	} else
+		print_dbg(gi, "    owner = -1\n");
 	print_dbg(gi, "    gh_state = %u\n", gh->gh_state);
 	print_dbg(gi, "    gh_flags =");
 	for (x = 0; x < 32; x++)
@@ -1784,10 +1812,7 @@ static int dump_holder(struct glock_iter *gi, char *str,
 		if (test_bit(x, &gh->gh_iflags))
 			print_dbg(gi, " %u", x);
 	print_dbg(gi, " \n");
-	if (gi)
-		print_dbg(gi, "    initialized at: 0x%x\n", gh->gh_ip);
-	else
-		print_symbol(KERN_INFO "    initialized at: %s\n", gh->gh_ip);
+        gfs2_print_symbol(gi, "    initialized at: %s\n", gh->gh_ip);
 
 	return 0;
 }
@@ -1828,6 +1853,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 	struct gfs2_holder *gh;
 	unsigned int x;
 	int error = -ENOBUFS;
+	struct task_struct *gl_owner;
 
 	spin_lock(&gl->gl_spin);
 
@@ -1838,10 +1864,21 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
 		if (test_bit(x, &gl->gl_flags))
 			print_dbg(gi, " %u", x);
 	}
+	if (!test_bit(GLF_LOCK, &gl->gl_flags))
+		print_dbg(gi, " (unlocked)");
 	print_dbg(gi, " \n");
 	print_dbg(gi, "  gl_ref = %d\n", atomic_read(&gl->gl_ref));
 	print_dbg(gi, "  gl_state = %u\n", gl->gl_state);
-	print_dbg(gi, "  gl_owner = %s\n", gl->gl_owner->comm);
+	if (gl->gl_owner_pid) {
+		gl_owner = find_task_by_pid(gl->gl_owner_pid);
+		if (gl_owner)
+			print_dbg(gi, "  gl_owner = pid %d (%s)\n",
+				  gl->gl_owner_pid, gl_owner->comm);
+		else
+			print_dbg(gi, "  gl_owner = %d (ended)\n",
+				  gl->gl_owner_pid);
+	} else
+		print_dbg(gi, "  gl_owner = -1\n");
 	print_dbg(gi, "  gl_ip = %lu\n", gl->gl_ip);
 	print_dbg(gi, "  req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no");
 	print_dbg(gi, "  req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no");
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 5e662eadc6f..11477ca3a3c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -38,7 +38,7 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
 	/* Look in glock's list of holders for one with current task as owner */
 	spin_lock(&gl->gl_spin);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (gh->gh_owner == current) {
+		if (gh->gh_owner_pid == current->pid) {
 			locked = 1;
 			break;
 		}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9c125823d76..fdf04705906 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -127,7 +127,7 @@ struct gfs2_holder {
 	struct list_head gh_list;
 
 	struct gfs2_glock *gh_gl;
-	struct task_struct *gh_owner;
+	pid_t gh_owner_pid;
 	unsigned int gh_state;
 	unsigned gh_flags;
 
@@ -155,7 +155,7 @@ struct gfs2_glock {
 	unsigned int gl_hash;
 	unsigned int gl_demote_state; /* state requested by remote node */
 	unsigned long gl_demote_time; /* time of first demote request */
-	struct task_struct *gl_owner;
+	pid_t gl_owner_pid;
 	unsigned long gl_ip;
 	struct list_head gl_holders;
 	struct list_head gl_waiters1;	/* HIF_MUTEX */
-- 
cgit v1.2.3-18-g5258


From 032067270295cfca11975c0f7b467244aa170c14 Mon Sep 17 00:00:00 2001
From: Patrick Caulfield <pcaulfie@redhat.com>
Date: Mon, 26 Mar 2007 09:56:00 +0100
Subject: [DLM] fix coverity-spotted stupidity

Replacement patch to remove redundant code rather than moving it around.

Signed-Off-By: Patrick Caulfield <pcaulfie@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/lowcomms-tcp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9bfe7fb721e..919e92a6aeb 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -319,8 +319,6 @@ static int receive_from_sock(struct connection *con)
 
 	if (ret <= 0)
 		goto out_close;
-	if (ret == -EAGAIN)
-		goto out_resched;
 
 	if (ret == len)
 		call_again_soon = 1;
-- 
cgit v1.2.3-18-g5258


From ef0c2bb05f40f9a0cd2deae63e199bfa62faa7fa Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 28 Mar 2007 09:56:46 -0500
Subject: [DLM] overlapping cancel and unlock

Full cancel and force-unlock support.  In the past, cancel and force-unlock
wouldn't work if there was another operation in progress on the lock.  Now,
both cancel and unlock-force can overlap an operation on a lock, meaning there
may be 2 or 3 operations in progress on a lock in parallel.  This support is
important not only because cancel and force-unlock are explicit operations
that an app can use, but both are used implicitly when a process exits while
holding locks.

Summary of changes:

- add-to and remove-from waiters functions were rewritten to handle situations
  with more than one remote operation outstanding on a lock

- validate_unlock_args detects when an overlapping cancel/unlock-force
  can be sent and when it needs to be delayed until a request/lookup
  reply is received

- processing request/lookup replies detects when cancel/unlock-force
  occured during the op, and carries out the delayed cancel/unlock-force

- manipulation of the "waiters" (remote operation) state of a lock moved under
  the standard rsb mutex that protects all the other lock state

- the two recovery routines related to locks on the waiters list changed
  according to the way lkb's are now locked before accessing waiters state

- waiters recovery detects when lkb's being recovered have overlapping
  cancel/unlock-force, and may not recover such locks

- revert_lock (cancel) returns a value to distinguish cases where it did
  nothing vs cases where it actually did a cancel; the cancel completion ast
  should only be done when cancel did something

- orphaned locks put on new list so they can be found later for purging

- cancel must be called on a lock when making it an orphan

- flag user locks (ENDOFLIFE) at the end of their useful life (to the
  application) so we can return an error for any further cancel/unlock-force

- we weren't setting COMP/BAST ast flags if one was already set, so we'd lose
  either a completion or blocking ast

- clear an unread bast on a lock that's become unlocked

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/dlm_internal.h |  10 +-
 fs/dlm/lock.c         | 710 +++++++++++++++++++++++++++++++++++++-------------
 fs/dlm/lockspace.c    |   4 +-
 fs/dlm/user.c         |  77 +++---
 4 files changed, 577 insertions(+), 224 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 61d93201e1b..178931cca67 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -210,6 +210,9 @@ struct dlm_args {
 #define DLM_IFL_MSTCPY		0x00010000
 #define DLM_IFL_RESEND		0x00020000
 #define DLM_IFL_DEAD		0x00040000
+#define DLM_IFL_OVERLAP_UNLOCK  0x00080000
+#define DLM_IFL_OVERLAP_CANCEL  0x00100000
+#define DLM_IFL_ENDOFLIFE	0x00200000
 #define DLM_IFL_USER		0x00000001
 #define DLM_IFL_ORPHAN		0x00000002
 
@@ -230,8 +233,8 @@ struct dlm_lkb {
 	int8_t			lkb_grmode;	/* granted lock mode */
 	int8_t			lkb_bastmode;	/* requested mode */
 	int8_t			lkb_highbast;	/* highest mode bast sent for */
-
 	int8_t			lkb_wait_type;	/* type of reply waiting for */
+	int8_t			lkb_wait_count;
 	int8_t			lkb_ast_type;	/* type of ast queued for */
 
 	struct list_head	lkb_idtbl_list;	/* lockspace lkbtbl */
@@ -440,6 +443,9 @@ struct dlm_ls {
 	struct mutex		ls_waiters_mutex;
 	struct list_head	ls_waiters;	/* lkbs needing a reply */
 
+	struct mutex		ls_orphans_mutex;
+	struct list_head	ls_orphans;
+
 	struct list_head	ls_nodes;	/* current nodes in ls */
 	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
 	int			ls_num_nodes;	/* number of nodes in ls */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index e725005fafd..b865a46059d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -254,6 +254,22 @@ static inline int down_conversion(struct dlm_lkb *lkb)
 	return (!middle_conversion(lkb) && lkb->lkb_rqmode < lkb->lkb_grmode);
 }
 
+static inline int is_overlap_unlock(struct dlm_lkb *lkb)
+{
+	return lkb->lkb_flags & DLM_IFL_OVERLAP_UNLOCK;
+}
+
+static inline int is_overlap_cancel(struct dlm_lkb *lkb)
+{
+	return lkb->lkb_flags & DLM_IFL_OVERLAP_CANCEL;
+}
+
+static inline int is_overlap(struct dlm_lkb *lkb)
+{
+	return (lkb->lkb_flags & (DLM_IFL_OVERLAP_UNLOCK |
+				  DLM_IFL_OVERLAP_CANCEL));
+}
+
 static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 {
 	if (is_master_copy(lkb))
@@ -267,6 +283,12 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	dlm_add_ast(lkb, AST_COMP);
 }
 
+static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
+{
+	queue_cast(r, lkb,
+		   is_overlap_unlock(lkb) ? -DLM_EUNLOCK : -DLM_ECANCEL);
+}
+
 static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
 {
 	if (is_master_copy(lkb))
@@ -547,6 +569,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	lkb->lkb_grmode = DLM_LOCK_IV;
 	kref_init(&lkb->lkb_ref);
 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
+	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
 
 	get_random_bytes(&bucket, sizeof(bucket));
 	bucket &= (ls->ls_lkbtbl_size - 1);
@@ -735,23 +758,75 @@ static void move_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, int sts)
 	unhold_lkb(lkb);
 }
 
+static int msg_reply_type(int mstype)
+{
+	switch (mstype) {
+	case DLM_MSG_REQUEST:
+		return DLM_MSG_REQUEST_REPLY;
+	case DLM_MSG_CONVERT:
+		return DLM_MSG_CONVERT_REPLY;
+	case DLM_MSG_UNLOCK:
+		return DLM_MSG_UNLOCK_REPLY;
+	case DLM_MSG_CANCEL:
+		return DLM_MSG_CANCEL_REPLY;
+	case DLM_MSG_LOOKUP:
+		return DLM_MSG_LOOKUP_REPLY;
+	}
+	return -1;
+}
+
 /* add/remove lkb from global waiters list of lkb's waiting for
    a reply from a remote node */
 
-static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
+static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
 {
 	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+	int error = 0;
 
 	mutex_lock(&ls->ls_waiters_mutex);
-	if (lkb->lkb_wait_type) {
-		log_print("add_to_waiters error %d", lkb->lkb_wait_type);
+
+	if (is_overlap_unlock(lkb) ||
+	    (is_overlap_cancel(lkb) && (mstype == DLM_MSG_CANCEL))) {
+		error = -EINVAL;
+		goto out;
+	}
+
+	if (lkb->lkb_wait_type || is_overlap_cancel(lkb)) {
+		switch (mstype) {
+		case DLM_MSG_UNLOCK:
+			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
+			break;
+		case DLM_MSG_CANCEL:
+			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
+			break;
+		default:
+			error = -EBUSY;
+			goto out;
+		}
+		lkb->lkb_wait_count++;
+		hold_lkb(lkb);
+
+		log_debug(ls, "add overlap %x cur %d new %d count %d flags %x",
+